FreeBSD Bugzilla – Attachment 224133 Details for
Bug 250380
textproc/py-feedparser: fails to import with lang/python39
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
use new devel/py3-sgmllib as run dependency in devel/py-feedparser
py-feedparser-sgmllib3k.diff (text/plain), 20.71 KB, created by
Sofian Brabez
on 2021-04-15 12:58:49 UTC
(
hide
)
Description:
use new devel/py3-sgmllib as run dependency in devel/py-feedparser
Filename:
MIME Type:
Creator:
Sofian Brabez
Created:
2021-04-15 12:58:49 UTC
Size:
20.71 KB
patch
obsolete
>diff --git a/devel/py-sgmllib3k/Makefile b/devel/py-sgmllib3k/Makefile >new file mode 100644 >index 000000000000..0f72372591b1 >--- /dev/null >+++ b/devel/py-sgmllib3k/Makefile >@@ -0,0 +1,17 @@ >+PORTNAME= sgmllib3k >+PORTVERSION= 1.0.0 >+CATEGORIES= devel python >+MASTER_SITES= CHEESESHOP >+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} >+ >+MAINTAINER= python@FreeBSD.org >+COMMENT= Python 3 port of sgmllib >+ >+LICENSE= BSD >+ >+USES= python:3.6+ >+USE_PYTHON= autoplist distutils >+ >+NO_ARCH= yes >+ >+.include <bsd.port.mk> >diff --git a/devel/py-sgmllib3k/distinfo b/devel/py-sgmllib3k/distinfo >new file mode 100644 >index 000000000000..7e29331d5fbc >--- /dev/null >+++ b/devel/py-sgmllib3k/distinfo >@@ -0,0 +1,3 @@ >+TIMESTAMP = 1618485290 >+SHA256 (sgmllib3k-1.0.0.tar.gz) = 7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9 >+SIZE (sgmllib3k-1.0.0.tar.gz) = 5750 >diff --git a/devel/py-sgmllib3k/pkg-descr b/devel/py-sgmllib3k/pkg-descr >new file mode 100644 >index 000000000000..2696896f0268 >--- /dev/null >+++ b/devel/py-sgmllib3k/pkg-descr >@@ -0,0 +1,6 @@ >+sgmllib3k is a Python 3 port of the old Python 2's sgmllib stdlib module. >+ >+sgmllib was dropped from the Python standard library in Python 3. This package >+provides a port of the library to Python 3. >+ >+WWW: https://pypi.org/project/sgmllib3k/ >diff --git a/textproc/py-feedparser/Makefile b/textproc/py-feedparser/Makefile >index 692d5a3c458a..ddcdeaad2d82 100644 >--- a/textproc/py-feedparser/Makefile >+++ b/textproc/py-feedparser/Makefile >@@ -2,17 +2,18 @@ > > PORTNAME= feedparser > PORTVERSION= 6.0.2 >-PORTREVISION= 1 >+PORTREVISION= 2 > CATEGORIES= textproc python > PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} > > MAINTAINER= sbz@FreeBSD.org > COMMENT= Universal feed parser written in Python > >-LICENSE= BSD2CLAUSE PSFL >-LICENSE_COMB= multi >+LICENSE= BSD2CLAUSE > LICENSE_FILE= ${WRKSRC}/LICENSE > >+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}sgmllib3k>=0:devel/py-sgmllib3k@${PY_FLAVOR} >+ > USES= python:3.6+ tar:bzip2 > USE_PYTHON= autoplist distutils > USE_GITHUB= yes >@@ -25,18 +26,6 @@ PORTDOCS= NEWS README.rst > > OPTIONS_DEFINE= DOCS > >-.include <bsd.port.pre.mk> >- >-.if ${PYTHON_REL} > 3700 >-PLIST_FILES+= ${PYTHON_SITELIBDIR}/sgmllib.py >- >-post-install: >- ${INSTALL_DATA} ${FILESDIR}/sgmllib.py ${STAGEDIR}/${PYTHON_SITELIBDIR}/sgmllib.py >-.endif >- >-post-patch: >- @${REINPLACE_CMD} '/sgmllib3k/d' ${WRKSRC}/setup.py >- > post-install-DOCS-on: > @${MKDIR} ${STAGEDIR}${DOCSDIR} > ${INSTALL_DATA} ${PORTDOCS:S,^,${WRKSRC}/,} ${STAGEDIR}${DOCSDIR} >@@ -44,4 +33,4 @@ post-install-DOCS-on: > do-test: > @(cd ${WRKSRC}; ${PYTHON_CMD} tests/runtests.py) > >-.include <bsd.port.post.mk> >+.include <bsd.port.mk> >diff --git a/textproc/py-feedparser/files/sgmllib.py b/textproc/py-feedparser/files/sgmllib.py >deleted file mode 100644 >index 88a02a307f40..000000000000 >--- a/textproc/py-feedparser/files/sgmllib.py >+++ /dev/null >@@ -1,547 +0,0 @@ >-"""A parser for SGML, using the derived class as a static DTD.""" >- >-# XXX This only supports those SGML features used by HTML. >- >-# XXX There should be a way to distinguish between PCDATA (parsed >-# character data -- the normal case), RCDATA (replaceable character >-# data -- only char and entity references and end tags are special) >-# and CDATA (character data -- only end tags are special). RCDATA is >-# not supported at all. >- >-import _markupbase >-import re >- >-__all__ = ["SGMLParser", "SGMLParseError"] >- >-# Regular expressions used for parsing >- >-interesting = re.compile('[&<]') >-incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' >- '<([a-zA-Z][^<>]*|' >- '/([a-zA-Z][^<>]*)?|' >- '![^<>]*)?') >- >-entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') >-charref = re.compile('&#([0-9]+)[^0-9]') >- >-starttagopen = re.compile('<[>a-zA-Z]') >-shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') >-shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') >-piclose = re.compile('>') >-endbracket = re.compile('[<>]') >-tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') >-attrfind = re.compile( >- r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' >- r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') >- >- >-class SGMLParseError(RuntimeError): >- """Exception raised for all parse errors.""" >- pass >- >- >-# SGML parser base class -- find tags and call handler functions. >-# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). >-# The dtd is defined by deriving a class which defines methods >-# with special names to handle tags: start_foo and end_foo to handle >-# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself. >-# (Tags are converted to lower case for this purpose.) The data >-# between tags is passed to the parser by calling self.handle_data() >-# with some data as argument (the data may be split up in arbitrary >-# chunks). Entity references are passed by calling >-# self.handle_entityref() with the entity reference as argument. >- >-class SGMLParser(_markupbase.ParserBase): >- # Definition of entities -- derived classes may override >- entity_or_charref = re.compile('&(?:' >- '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' >- ')(;?)') >- >- def __init__(self, verbose=0): >- """Initialize and reset this instance.""" >- self.verbose = verbose >- self.reset() >- >- def reset(self): >- """Reset this instance. Loses all unprocessed data.""" >- self.__starttag_text = None >- self.rawdata = '' >- self.stack = [] >- self.lasttag = '???' >- self.nomoretags = 0 >- self.literal = 0 >- _markupbase.ParserBase.reset(self) >- >- def setnomoretags(self): >- """Enter literal mode (CDATA) till EOF. >- >- Intended for derived classes only. >- """ >- self.nomoretags = self.literal = 1 >- >- def setliteral(self, *args): >- """Enter literal mode (CDATA). >- >- Intended for derived classes only. >- """ >- self.literal = 1 >- >- def feed(self, data): >- """Feed some data to the parser. >- >- Call this as often as you want, with as little or as much text >- as you want (may include '\n'). (This just saves the text, >- all the processing is done by goahead().) >- """ >- >- self.rawdata = self.rawdata + data >- self.goahead(0) >- >- def close(self): >- """Handle the remaining data.""" >- self.goahead(1) >- >- def error(self, message): >- raise SGMLParseError(message) >- >- # Internal -- handle data as far as reasonable. May leave state >- # and data to be processed by a subsequent call. If 'end' is >- # true, force handling all data as if followed by EOF marker. >- def goahead(self, end): >- rawdata = self.rawdata >- i = 0 >- n = len(rawdata) >- while i < n: >- if self.nomoretags: >- self.handle_data(rawdata[i:n]) >- i = n >- break >- match = interesting.search(rawdata, i) >- if match: j = match.start() >- else: j = n >- if i < j: >- self.handle_data(rawdata[i:j]) >- i = j >- if i == n: break >- if rawdata[i] == '<': >- if starttagopen.match(rawdata, i): >- if self.literal: >- self.handle_data(rawdata[i]) >- i = i+1 >- continue >- k = self.parse_starttag(i) >- if k < 0: break >- i = k >- continue >- if rawdata.startswith("</", i): >- k = self.parse_endtag(i) >- if k < 0: break >- i = k >- self.literal = 0 >- continue >- if self.literal: >- if n > (i + 1): >- self.handle_data("<") >- i = i+1 >- else: >- # incomplete >- break >- continue >- if rawdata.startswith("<!--", i): >- # Strictly speaking, a comment is --.*-- >- # within a declaration tag <!...>. >- # This should be removed, >- # and comments handled only in parse_declaration. >- k = self.parse_comment(i) >- if k < 0: break >- i = k >- continue >- if rawdata.startswith("<?", i): >- k = self.parse_pi(i) >- if k < 0: break >- i = i+k >- continue >- if rawdata.startswith("<!", i): >- # This is some sort of declaration; in "HTML as >- # deployed," this should only be the document type >- # declaration ("<!DOCTYPE html...>"). >- k = self.parse_declaration(i) >- if k < 0: break >- i = k >- continue >- elif rawdata[i] == '&': >- if self.literal: >- self.handle_data(rawdata[i]) >- i = i+1 >- continue >- match = charref.match(rawdata, i) >- if match: >- name = match.group(1) >- self.handle_charref(name) >- i = match.end(0) >- if rawdata[i-1] != ';': i = i-1 >- continue >- match = entityref.match(rawdata, i) >- if match: >- name = match.group(1) >- self.handle_entityref(name) >- i = match.end(0) >- if rawdata[i-1] != ';': i = i-1 >- continue >- else: >- self.error('neither < nor & ??') >- # We get here only if incomplete matches but >- # nothing else >- match = incomplete.match(rawdata, i) >- if not match: >- self.handle_data(rawdata[i]) >- i = i+1 >- continue >- j = match.end(0) >- if j == n: >- break # Really incomplete >- self.handle_data(rawdata[i:j]) >- i = j >- # end while >- if end and i < n: >- self.handle_data(rawdata[i:n]) >- i = n >- self.rawdata = rawdata[i:] >- # XXX if end: check for empty stack >- >- # Extensions for the DOCTYPE scanner: >- _decl_otherchars = '=' >- >- # Internal -- parse processing instr, return length or -1 if not terminated >- def parse_pi(self, i): >- rawdata = self.rawdata >- if rawdata[i:i+2] != '<?': >- self.error('unexpected call to parse_pi()') >- match = piclose.search(rawdata, i+2) >- if not match: >- return -1 >- j = match.start(0) >- self.handle_pi(rawdata[i+2: j]) >- j = match.end(0) >- return j-i >- >- def get_starttag_text(self): >- return self.__starttag_text >- >- # Internal -- handle starttag, return length or -1 if not terminated >- def parse_starttag(self, i): >- self.__starttag_text = None >- start_pos = i >- rawdata = self.rawdata >- if shorttagopen.match(rawdata, i): >- # SGML shorthand: <tag/data/ == <tag>data</tag> >- # XXX Can data contain &... (entity or char refs)? >- # XXX Can data contain < or > (tag characters)? >- # XXX Can there be whitespace before the first /? >- match = shorttag.match(rawdata, i) >- if not match: >- return -1 >- tag, data = match.group(1, 2) >- self.__starttag_text = '<%s/' % tag >- tag = tag.lower() >- k = match.end(0) >- self.finish_shorttag(tag, data) >- self.__starttag_text = rawdata[start_pos:match.end(1) + 1] >- return k >- # XXX The following should skip matching quotes (' or ") >- # As a shortcut way to exit, this isn't so bad, but shouldn't >- # be used to locate the actual end of the start tag since the >- # < or > characters may be embedded in an attribute value. >- match = endbracket.search(rawdata, i+1) >- if not match: >- return -1 >- j = match.start(0) >- # Now parse the data between i+1 and j into a tag and attrs >- attrs = [] >- if rawdata[i:i+2] == '<>': >- # SGML shorthand: <> == <last open tag seen> >- k = j >- tag = self.lasttag >- else: >- match = tagfind.match(rawdata, i+1) >- if not match: >- self.error('unexpected call to parse_starttag') >- k = match.end(0) >- tag = rawdata[i+1:k].lower() >- self.lasttag = tag >- while k < j: >- match = attrfind.match(rawdata, k) >- if not match: break >- attrname, rest, attrvalue = match.group(1, 2, 3) >- if not rest: >- attrvalue = attrname >- else: >- if (attrvalue[:1] == "'" == attrvalue[-1:] or >- attrvalue[:1] == '"' == attrvalue[-1:]): >- # strip quotes >- attrvalue = attrvalue[1:-1] >- attrvalue = self.entity_or_charref.sub( >- self._convert_ref, attrvalue) >- attrs.append((attrname.lower(), attrvalue)) >- k = match.end(0) >- if rawdata[j] == '>': >- j = j+1 >- self.__starttag_text = rawdata[start_pos:j] >- self.finish_starttag(tag, attrs) >- return j >- >- # Internal -- convert entity or character reference >- def _convert_ref(self, match): >- if match.group(2): >- return self.convert_charref(match.group(2)) or \ >- '&#%s%s' % match.groups()[1:] >- elif match.group(3): >- return self.convert_entityref(match.group(1)) or \ >- '&%s;' % match.group(1) >- else: >- return '&%s' % match.group(1) >- >- # Internal -- parse endtag >- def parse_endtag(self, i): >- rawdata = self.rawdata >- match = endbracket.search(rawdata, i+1) >- if not match: >- return -1 >- j = match.start(0) >- tag = rawdata[i+2:j].strip().lower() >- if rawdata[j] == '>': >- j = j+1 >- self.finish_endtag(tag) >- return j >- >- # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>) >- def finish_shorttag(self, tag, data): >- self.finish_starttag(tag, []) >- self.handle_data(data) >- self.finish_endtag(tag) >- >- # Internal -- finish processing of start tag >- # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag >- def finish_starttag(self, tag, attrs): >- try: >- method = getattr(self, 'start_' + tag) >- except AttributeError: >- try: >- method = getattr(self, 'do_' + tag) >- except AttributeError: >- self.unknown_starttag(tag, attrs) >- return -1 >- else: >- self.handle_starttag(tag, method, attrs) >- return 0 >- else: >- self.stack.append(tag) >- self.handle_starttag(tag, method, attrs) >- return 1 >- >- # Internal -- finish processing of end tag >- def finish_endtag(self, tag): >- if not tag: >- found = len(self.stack) - 1 >- if found < 0: >- self.unknown_endtag(tag) >- return >- else: >- if tag not in self.stack: >- try: >- method = getattr(self, 'end_' + tag) >- except AttributeError: >- self.unknown_endtag(tag) >- else: >- self.report_unbalanced(tag) >- return >- found = len(self.stack) >- for i in range(found): >- if self.stack[i] == tag: found = i >- while len(self.stack) > found: >- tag = self.stack[-1] >- try: >- method = getattr(self, 'end_' + tag) >- except AttributeError: >- method = None >- if method: >- self.handle_endtag(tag, method) >- else: >- self.unknown_endtag(tag) >- del self.stack[-1] >- >- # Overridable -- handle start tag >- def handle_starttag(self, tag, method, attrs): >- method(attrs) >- >- # Overridable -- handle end tag >- def handle_endtag(self, tag, method): >- method() >- >- # Example -- report an unbalanced </...> tag. >- def report_unbalanced(self, tag): >- if self.verbose: >- print('*** Unbalanced </' + tag + '>') >- print('*** Stack:', self.stack) >- >- def convert_charref(self, name): >- """Convert character reference, may be overridden.""" >- try: >- n = int(name) >- except ValueError: >- return >- if not 0 <= n <= 127: >- return >- return self.convert_codepoint(n) >- >- def convert_codepoint(self, codepoint): >- return chr(codepoint) >- >- def handle_charref(self, name): >- """Handle character reference, no need to override.""" >- replacement = self.convert_charref(name) >- if replacement is None: >- self.unknown_charref(name) >- else: >- self.handle_data(replacement) >- >- # Definition of entities -- derived classes may override >- entitydefs = \ >- {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} >- >- def convert_entityref(self, name): >- """Convert entity references. >- >- As an alternative to overriding this method; one can tailor the >- results by setting up the self.entitydefs mapping appropriately. >- """ >- table = self.entitydefs >- if name in table: >- return table[name] >- else: >- return >- >- def handle_entityref(self, name): >- """Handle entity references, no need to override.""" >- replacement = self.convert_entityref(name) >- if replacement is None: >- self.unknown_entityref(name) >- else: >- self.handle_data(replacement) >- >- # Example -- handle data, should be overridden >- def handle_data(self, data): >- pass >- >- # Example -- handle comment, could be overridden >- def handle_comment(self, data): >- pass >- >- # Example -- handle declaration, could be overridden >- def handle_decl(self, decl): >- pass >- >- # Example -- handle processing instruction, could be overridden >- def handle_pi(self, data): >- pass >- >- # To be overridden -- handlers for unknown objects >- def unknown_starttag(self, tag, attrs): pass >- def unknown_endtag(self, tag): pass >- def unknown_charref(self, ref): pass >- def unknown_entityref(self, ref): pass >- >- >-class TestSGMLParser(SGMLParser): >- >- def __init__(self, verbose=0): >- self.testdata = "" >- SGMLParser.__init__(self, verbose) >- >- def handle_data(self, data): >- self.testdata = self.testdata + data >- if len(repr(self.testdata)) >= 70: >- self.flush() >- >- def flush(self): >- data = self.testdata >- if data: >- self.testdata = "" >- print('data:', repr(data)) >- >- def handle_comment(self, data): >- self.flush() >- r = repr(data) >- if len(r) > 68: >- r = r[:32] + '...' + r[-32:] >- print('comment:', r) >- >- def unknown_starttag(self, tag, attrs): >- self.flush() >- if not attrs: >- print('start tag: <' + tag + '>') >- else: >- print('start tag: <' + tag, end=' ') >- for name, value in attrs: >- print(name + '=' + '"' + value + '"', end=' ') >- print('>') >- >- def unknown_endtag(self, tag): >- self.flush() >- print('end tag: </' + tag + '>') >- >- def unknown_entityref(self, ref): >- self.flush() >- print('*** unknown entity ref: &' + ref + ';') >- >- def unknown_charref(self, ref): >- self.flush() >- print('*** unknown char ref: &#' + ref + ';') >- >- def unknown_decl(self, data): >- self.flush() >- print('*** unknown decl: [' + data + ']') >- >- def close(self): >- SGMLParser.close(self) >- self.flush() >- >- >-def test(args = None): >- import sys >- >- if args is None: >- args = sys.argv[1:] >- >- if args and args[0] == '-s': >- args = args[1:] >- klass = SGMLParser >- else: >- klass = TestSGMLParser >- >- if args: >- file = args[0] >- else: >- file = 'test.html' >- >- if file == '-': >- f = sys.stdin >- else: >- try: >- f = open(file, 'r') >- except IOError as msg: >- print(file, ":", msg) >- sys.exit(1) >- >- data = f.read() >- if f is not sys.stdin: >- f.close() >- >- x = klass() >- for c in data: >- x.feed(c) >- x.close() >- >- >-if __name__ == '__main__': >- test()
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 250380
:
218819
|
218820
|
218823
|
218824
|
219157
|
219162
|
224091
| 224133