--- b/textproc/py-feedparser/Makefile +++ b/textproc/py-feedparser/Makefile @@ -2,7 +2,7 @@ # $FreeBSD$ PORTNAME= feedparser -PORTVERSION= 5.2.1 +PORTVERSION= 6.0.1 CATEGORIES= textproc python MASTER_SITES= CHEESESHOP PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} @@ -10,10 +10,11 @@ PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} MAINTAINER= sbz@FreeBSD.org COMMENT= Universal feed parser written in Python -LICENSE= BSD2CLAUSE +LICENSE= BSD2CLAUSE PSFL +LICENSE_COMB= multi LICENSE_FILE= ${WRKSRC}/LICENSE -USES= python tar:bzip2 +USES= python:3.6+ USE_PYTHON= distutils autoplist NO_ARCH= yes @@ -22,6 +23,11 @@ OPTIONS_DEFINE= DOCS PORTDOCS= NEWS README.rst +PLIST_FILES+= ${PYTHON_SITELIBDIR}/sgmllib.py + +post-install: + ${INSTALL_DATA} ${FILESDIR}/sgmllib.py ${STAGEDIR}/${PYTHON_SITELIBDIR}/sgmllib.py + post-install-DOCS-on: ${MKDIR} ${STAGEDIR}${DOCSDIR} ${INSTALL_DATA} ${PORTDOCS:S,^,${WRKSRC}/,} ${STAGEDIR}${DOCSDIR} --- b/textproc/py-feedparser/distinfo +++ b/textproc/py-feedparser/distinfo @@ -1,3 +1,3 @@ -TIMESTAMP = 1464128973 -SHA256 (feedparser-5.2.1.tar.bz2) = ce875495c90ebd74b179855449040003a1beb40cd13d5f037a0654251e260b02 -SIZE (feedparser-5.2.1.tar.bz2) = 192328 +TIMESTAMP = 1602900188 +SHA256 (feedparser-6.0.1.tar.gz) = 6ca88edcaa43f428345968df903a87f020843eda5e28d7ea24a612158d61e74c +SIZE (feedparser-6.0.1.tar.gz) = 284620 --- /dev/null +++ b/textproc/py-feedparser/files/sgmllib.py @@ -0,0 +1,547 @@ +"""A parser for SGML, using the derived class as a static DTD.""" + +# XXX This only supports those SGML features used by HTML. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). RCDATA is +# not supported at all. + +import _markupbase +import re + +__all__ = ["SGMLParser", "SGMLParseError"] + +# Regular expressions used for parsing + +interesting = re.compile('[&<]') +incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' + '<([a-zA-Z][^<>]*|' + '/([a-zA-Z][^<>]*)?|' + '![^<>]*)?') + +entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') +charref = re.compile('&#([0-9]+)[^0-9]') + +starttagopen = re.compile('<[>a-zA-Z]') +shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') +shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') +piclose = re.compile('>') +endbracket = re.compile('[<>]') +tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') +attrfind = re.compile( + r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' + r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') + + +class SGMLParseError(RuntimeError): + """Exception raised for all parse errors.""" + pass + + +# SGML parser base class -- find tags and call handler functions. +# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). +# The dtd is defined by deriving a class which defines methods +# with special names to handle tags: start_foo and end_foo to handle +# and , respectively, or do_foo to handle by itself. +# (Tags are converted to lower case for this purpose.) The data +# between tags is passed to the parser by calling self.handle_data() +# with some data as argument (the data may be split up in arbitrary +# chunks). Entity references are passed by calling +# self.handle_entityref() with the entity reference as argument. + +class SGMLParser(_markupbase.ParserBase): + # Definition of entities -- derived classes may override + entity_or_charref = re.compile('&(?:' + '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' + ')(;?)') + + def __init__(self, verbose=0): + """Initialize and reset this instance.""" + self.verbose = verbose + self.reset() + + def reset(self): + """Reset this instance. Loses all unprocessed data.""" + self.__starttag_text = None + self.rawdata = '' + self.stack = [] + self.lasttag = '???' + self.nomoretags = 0 + self.literal = 0 + _markupbase.ParserBase.reset(self) + + def setnomoretags(self): + """Enter literal mode (CDATA) till EOF. + + Intended for derived classes only. + """ + self.nomoretags = self.literal = 1 + + def setliteral(self, *args): + """Enter literal mode (CDATA). + + Intended for derived classes only. + """ + self.literal = 1 + + def feed(self, data): + """Feed some data to the parser. + + Call this as often as you want, with as little or as much text + as you want (may include '\n'). (This just saves the text, + all the processing is done by goahead().) + """ + + self.rawdata = self.rawdata + data + self.goahead(0) + + def close(self): + """Handle the remaining data.""" + self.goahead(1) + + def error(self, message): + raise SGMLParseError(message) + + # Internal -- handle data as far as reasonable. May leave state + # and data to be processed by a subsequent call. If 'end' is + # true, force handling all data as if followed by EOF marker. + def goahead(self, end): + rawdata = self.rawdata + i = 0 + n = len(rawdata) + while i < n: + if self.nomoretags: + self.handle_data(rawdata[i:n]) + i = n + break + match = interesting.search(rawdata, i) + if match: j = match.start() + else: j = n + if i < j: + self.handle_data(rawdata[i:j]) + i = j + if i == n: break + if rawdata[i] == '<': + if starttagopen.match(rawdata, i): + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + k = self.parse_starttag(i) + if k < 0: break + i = k + continue + if rawdata.startswith(" (i + 1): + self.handle_data("<") + i = i+1 + else: + # incomplete + break + continue + if rawdata.startswith("