Lines 1-179
Link Here
|
1 |
# https://github.com/ProgVal/Limnoria/pull/1371 |
|
|
2 |
# https://github.com/ProgVal/Limnoria/issues/1362 |
3 |
# https://github.com/ProgVal/Limnoria/issues/1359 |
4 |
|
5 |
From 7cf91ad703ab324e38c37fa2976626505f1d569a Mon Sep 17 00:00:00 2001 |
6 |
From: Rodrigo <rodrigo.freebsd@minasambiente.com.br> |
7 |
Date: Fri, 27 Sep 2019 01:48:36 -0300 |
8 |
Subject: [PATCH 1/3] make clear in logs about charade dep|make behave the same |
9 |
way in py3 or py2 without errors in any case (e.g. without charade installed) |
10 |
|
11 |
--- |
12 |
plugins/Web/plugin.py | 44 +++++++++++++++++-------------------------- |
13 |
1 file changed, 17 insertions(+), 27 deletions(-) |
14 |
|
15 |
diff --git plugins/Web/plugin.py plugins/Web/plugin.py |
16 |
index 1ac362714..0bdb68a4a 100644 |
17 |
--- plugins/Web/plugin.py |
18 |
+++ plugins/Web/plugin.py |
19 |
@@ -150,37 +150,27 @@ def getTitle(self, irc, url, raiseErrors): |
20 |
size = conf.supybot.protocols.http.peekSize() |
21 |
timeout = self.registryValue('timeout') |
22 |
(target, text) = utils.web.getUrlTargetAndContent(url, size=size, |
23 |
- timeout=timeout) |
24 |
- try: |
25 |
- text = text.decode(utils.web.getEncoding(text) or 'utf8', |
26 |
- 'replace') |
27 |
- except UnicodeDecodeError: |
28 |
- pass |
29 |
- if minisix.PY3 and isinstance(text, bytes): |
30 |
- if raiseErrors: |
31 |
- irc.error(_('Could not guess the page\'s encoding. (Try ' |
32 |
- 'installing python-charade.)'), Raise=True) |
33 |
- else: |
34 |
- return None |
35 |
+ timeout=timeout) |
36 |
+ encoding = utils.web.getEncoding(text) |
37 |
+ if encoding is None: # Condition if charade not installed |
38 |
+ self.log.info('Web plugin TitleSnarfer: Could not guess the page\'s' |
39 |
+ ' encoding. (Try installing python-charade.)') |
40 |
+ encoding = 'utf-8' # Assume UTF-8 and replace unknown chars to the UTF-8 codec for U+FFFD in the next hop |
41 |
try: |
42 |
+ text = text.decode(utils.web.getEncoding(text) or 'utf-8', 'replace') |
43 |
parser = Title() |
44 |
parser.feed(text) |
45 |
- except UnicodeDecodeError: |
46 |
- # Workaround for Python 2 |
47 |
- # https://github.com/ProgVal/Limnoria/issues/1359 |
48 |
- parser = Title() |
49 |
- parser.feed(text.encode('utf8')) |
50 |
- parser.close() |
51 |
- title = utils.str.normalizeWhitespace(''.join(parser.data).strip()) |
52 |
- if title: |
53 |
- return (target, title) |
54 |
- elif raiseErrors: |
55 |
- if len(text) < size: |
56 |
- irc.error(_('That URL appears to have no HTML title.'), |
57 |
- Raise=True) |
58 |
+ parser.close() |
59 |
+ title = utils.str.normalizeWhitespace(''.join(parser.data).strip()) |
60 |
+ if title: |
61 |
+ return (target, title) |
62 |
else: |
63 |
- irc.error(format(_('That URL appears to have no HTML title ' |
64 |
- 'within the first %S.'), size), Raise=True) |
65 |
+ if len(text) < size: |
66 |
+ self.log.info('Web plugin TitleSnarfer: ' |
67 |
+ 'That URL appears to have no HTML title.') |
68 |
+ except raiseErrors: # Can use raiseErrors here ? |
69 |
+ irc.error(_('Web plugin TitleSnarfer encoding errors'), |
70 |
+ Raise=True) |
71 |
|
72 |
@fetch_sandbox |
73 |
def titleSnarfer(self, irc, msg, match): |
74 |
|
75 |
From 16247c5caf59f307b3d6910e31832fe30d0d583a Mon Sep 17 00:00:00 2001 |
76 |
From: Rodrigo <rodrigo.freebsd@minasambiente.com.br> |
77 |
Date: Fri, 27 Sep 2019 20:12:00 -0300 |
78 |
Subject: [PATCH 2/3] more python2 compatibility |
79 |
|
80 |
--- |
81 |
plugins/Web/plugin.py | 18 +++++++++++------- |
82 |
1 file changed, 11 insertions(+), 7 deletions(-) |
83 |
|
84 |
diff --git plugins/Web/plugin.py plugins/Web/plugin.py |
85 |
index 0bdb68a4a..85c129de3 100644 |
86 |
--- plugins/Web/plugin.py |
87 |
+++ plugins/Web/plugin.py |
88 |
@@ -150,16 +150,20 @@ def getTitle(self, irc, url, raiseErrors): |
89 |
size = conf.supybot.protocols.http.peekSize() |
90 |
timeout = self.registryValue('timeout') |
91 |
(target, text) = utils.web.getUrlTargetAndContent(url, size=size, |
92 |
- timeout=timeout) |
93 |
+ timeout=timeout) |
94 |
encoding = utils.web.getEncoding(text) |
95 |
- if encoding is None: # Condition if charade not installed |
96 |
+ if encoding is None: # Condition if charade not installed |
97 |
self.log.info('Web plugin TitleSnarfer: Could not guess the page\'s' |
98 |
' encoding. (Try installing python-charade.)') |
99 |
- encoding = 'utf-8' # Assume UTF-8 and replace unknown chars to the UTF-8 codec for U+FFFD in the next hop |
100 |
+ encoding = 'utf-8' # Assume UTF-8 and replace unknown chars to the UTF-8 codec for U+FFFD in the next hop |
101 |
try: |
102 |
- text = text.decode(utils.web.getEncoding(text) or 'utf-8', 'replace') |
103 |
+ text = text.decode(utils.web.getEncoding(text) or 'utf-8','replace') |
104 |
parser = Title() |
105 |
- parser.feed(text) |
106 |
+ try: |
107 |
+ parser.feed(text) |
108 |
+ except: |
109 |
+ parser = Title() |
110 |
+ parser.feed(bytes(text)) # Explicitly pack to bytes in encoding errors for (more) python2 compatibility |
111 |
parser.close() |
112 |
title = utils.str.normalizeWhitespace(''.join(parser.data).strip()) |
113 |
if title: |
114 |
@@ -168,9 +172,9 @@ def getTitle(self, irc, url, raiseErrors): |
115 |
if len(text) < size: |
116 |
self.log.info('Web plugin TitleSnarfer: ' |
117 |
'That URL appears to have no HTML title.') |
118 |
- except raiseErrors: # Can use raiseErrors here ? |
119 |
+ except: |
120 |
irc.error(_('Web plugin TitleSnarfer encoding errors'), |
121 |
- Raise=True) |
122 |
+ Raise=True) |
123 |
|
124 |
@fetch_sandbox |
125 |
def titleSnarfer(self, irc, msg, match): |
126 |
|
127 |
From 5cac4a3cbda26186a330709901da2633914de415 Mon Sep 17 00:00:00 2001 |
128 |
From: Rodrigo <rodrigo.freebsd@minasambiente.com.br> |
129 |
Date: Sat, 28 Sep 2019 23:40:44 -0300 |
130 |
Subject: [PATCH 3/3] Include '(target, text) = |
131 |
utils.web.getUrlTargetAndContent(url, size=size,timeout=timeout)' in |
132 |
try/catch block to avoid thread blocking | send http errors to log |
133 |
|
134 |
--- |
135 |
plugins/Web/plugin.py | 23 +++++++++++------------ |
136 |
1 file changed, 11 insertions(+), 12 deletions(-) |
137 |
|
138 |
diff --git plugins/Web/plugin.py plugins/Web/plugin.py |
139 |
index 85c129de3..e67944f29 100644 |
140 |
--- plugins/Web/plugin.py |
141 |
+++ plugins/Web/plugin.py |
142 |
@@ -149,14 +149,13 @@ def noIgnore(self, irc, msg): |
143 |
def getTitle(self, irc, url, raiseErrors): |
144 |
size = conf.supybot.protocols.http.peekSize() |
145 |
timeout = self.registryValue('timeout') |
146 |
- (target, text) = utils.web.getUrlTargetAndContent(url, size=size, |
147 |
- timeout=timeout) |
148 |
- encoding = utils.web.getEncoding(text) |
149 |
- if encoding is None: # Condition if charade not installed |
150 |
- self.log.info('Web plugin TitleSnarfer: Could not guess the page\'s' |
151 |
- ' encoding. (Try installing python-charade.)') |
152 |
- encoding = 'utf-8' # Assume UTF-8 and replace unknown chars to the UTF-8 codec for U+FFFD in the next hop |
153 |
try: |
154 |
+ (target, text) = utils.web.getUrlTargetAndContent(url, size=size,timeout=timeout) |
155 |
+ encoding = utils.web.getEncoding(text) |
156 |
+ if encoding is None: # Condition if charade not installed |
157 |
+ self.log.info('Web plugin TitleSnarfer: Could not guess the page\'s' |
158 |
+ ' encoding. (Try installing python-charade.)') |
159 |
+ encoding = 'utf-8' # Assume UTF-8 and replace unknown chars to the UTF-8 codec for U+FFFD in the next hop |
160 |
text = text.decode(utils.web.getEncoding(text) or 'utf-8','replace') |
161 |
parser = Title() |
162 |
try: |
163 |
@@ -170,11 +169,11 @@ def getTitle(self, irc, url, raiseErrors): |
164 |
return (target, title) |
165 |
else: |
166 |
if len(text) < size: |
167 |
- self.log.info('Web plugin TitleSnarfer: ' |
168 |
- 'That URL appears to have no HTML title.') |
169 |
- except: |
170 |
- irc.error(_('Web plugin TitleSnarfer encoding errors'), |
171 |
- Raise=True) |
172 |
+ self.log.info('Web plugin TitleSnarfer: <' + url + '> appears to have no HTML title.') |
173 |
+ else: |
174 |
+ self.log.info('Web plugin TitleSnarfer: Could not retrieve title of <' + url + '>') |
175 |
+ except Exception as e: |
176 |
+ self.log.info('Web plugin TitleSnarfer: <' + str(e) + '> while trying to process <' + url +'>') |
177 |
|
178 |
@fetch_sandbox |
179 |
def titleSnarfer(self, irc, msg, match): |