Link Here
|
1 |
PORTNAME= textract |
2 |
PORTVERSION= 1.6.5 |
3 |
CATEGORIES= textproc python |
4 |
MASTER_SITES= CHEESESHOP |
5 |
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} |
6 |
|
7 |
MAINTAINER= DtxdF@riseup.net |
8 |
COMMENT= Extract text from any document |
9 |
|
10 |
LICENSE= MIT |
11 |
LICENSE_FILE= ${WRKSRC}/LICENSE |
12 |
|
13 |
RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}argcomplete>=1.10.0:devel/py-argcomplete@${PY_FLAVOR} \ |
14 |
${PYTHON_PKGNAMEPREFIX}chardet>=3:textproc/py-chardet@${PY_FLAVOR} \ |
15 |
${PYTHON_PKGNAMEPREFIX}six>1.12.0:devel/py-six@${PY_FLAVOR} |
16 |
|
17 |
USES= python:3.8+ |
18 |
USE_PYTHON= autoplist distutils |
19 |
|
20 |
OPTIONS_DEFINE= ANTIWORD BEAUTIFULSOUP DOCX2TXT MSG LIBXML2 \ |
21 |
LIBXSLT PPTX PS SPREADSHEET UNRTF |
22 |
OPTIONS_DEFAULT= ANTIWORD BEAUTIFULSOUP DOCX2TXT FFMPEG FLAC JPEG_TURBO \ |
23 |
LAME LIBXML2 LIBXSLT MSG PDFTOTEXT PPTX PS SOX \ |
24 |
SPEECH_RECOGNITION SPREADSHEET TESSERACT UNRTF |
25 |
|
26 |
ANTIWORD_DESC= DOC document support |
27 |
DOCX2TXT_DESC= DOCX document support |
28 |
BEAUTIFULSOUP_DESC= HTML parsing library |
29 |
TESSERACT_DESC= Commercial quality open source OCR engine |
30 |
JPEG_TURBO_DESC= SIMD-accelerated JPEG codec |
31 |
SOX_DESC= Command-line audio processing tool |
32 |
SPEECH_RECOGNITION_DESC= Python library for performing speech recognition |
33 |
POCKETSPHINX_DESC= Interface to CMU Sphinxbase and Pocketsphinx |
34 |
SPREADSHEET_DESC= XLS and XLSX spreadsheet support |
35 |
UNRTF_DESC= RTF document support |
36 |
PDFTOTEXT_DESC= Extract text from a PDF document |
37 |
PDFMINER_DESC= PDF parser and analyzer |
38 |
MSG_DESC= MS Outlook MSG file format support |
39 |
PPTX_DESC= MS PowerPoint PPTX presentations support |
40 |
LIBXML2_DESC= Python interface for XML parser library |
41 |
LIBXSLT_DESC= XML stylesheet transformation library |
42 |
|
43 |
# DOC |
44 |
ANTIWORD_RUN_DEPENDS= antiword>0:textproc/antiword |
45 |
# DOCX |
46 |
DOCX2TXT_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}docx2txt>=0.8:textproc/py-docx2txt@${PY_FLAVOR} |
47 |
# HTML, EPUB, etc. |
48 |
BEAUTIFULSOUP_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.8.0:www/py-beautifulsoup@${PY_FLAVOR} |
49 |
# OCR |
50 |
TESSERACT_RUN_DEPENDS= tesseract>0:graphics/tesseract |
51 |
JPEG_TURBO_RUN_DEPENDS= jpeg-turbo>0:graphics/jpeg-turbo |
52 |
# AUDIO |
53 |
SOX_RUN_DEPENDS= sox>0:audio/sox |
54 |
FFMPEG_RUN_DEPENDS= ffmpeg>0:multimedia/ffmpeg |
55 |
FLAC_RUN_DEPENDS= flac>0:audio/flac |
56 |
LAME_RUN_DEPENDS= lame>0:audio/lame |
57 |
SPEECH_RECOGNITION_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}SpeechRecognition>=3.8.1:audio/py-speechrecognition@${PY_FLAVOR} |
58 |
POCKETSPHINX_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}pocketsphinx>0:audio/py-pocketsphinx@${PY_FLAVOR} |
59 |
# XLS and XLSX |
60 |
SPREADSHEET_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}xlrd>=1.2.0:textproc/py-xlrd@${PY_FLAVOR} |
61 |
# RTF |
62 |
UNRTF_RUN_DEPENDS= unrtf>0:textproc/unrtf |
63 |
# PDF |
64 |
PDFTOTEXT_RUN_DEPENDS= poppler-utils>0:graphics/poppler-utils |
65 |
PDFMINER_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}pdfminer.six>=20191110:textproc/py-pdfminer.six@${PY_FLAVOR} |
66 |
# PS |
67 |
PS_RUN_DEPENDS= pstotext>0:print/pstotext |
68 |
# MSG |
69 |
MSG_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}extract-msg>=0.29:textproc/py-extract-msg@${PY_FLAVOR} |
70 |
# PPTX |
71 |
PPTX_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}python-pptx>=0.6.18:textproc/py-python-pptx@${PY_FLAVOR} |
72 |
# MISC |
73 |
LIBXML2_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}libxml2>0:textproc/py-libxml2@${PY_FLAVOR} |
74 |
LIBXSLT_RUN_DEPENDS= libxslt>=1.1.15:textproc/libxslt |
75 |
|
76 |
OPTIONS_GROUP= AUDIO OCR PDF RTF |
77 |
OPTIONS_GROUP_AUDIO= FFMPEG FLAC LAME POCKETSPHINX SOX SPEECH_RECOGNITION |
78 |
OPTIONS_GROUP_OCR= JPEG_TURBO TESSERACT |
79 |
OPTIONS_GROUP_PDF= PDFMINER PDFTOTEXT |
80 |
|
81 |
.include <bsd.port.mk> |