View | Details | Raw Unified | Return to bug 164813
Collapse All | Expand All

(-)Makefile (-18 / +9 lines)
Lines 6-12 Link Here
6
#
6
#
7
7
8
PORTNAME=	jericho-html
8
PORTNAME=	jericho-html
9
PORTVERSION=	2.6
9
PORTVERSION=	3.2
10
CATEGORIES=	www java
10
CATEGORIES=	www java
11
MASTER_SITES=	SF/jerichohtml/${PORTNAME}/${PORTVERSION}
11
MASTER_SITES=	SF/jerichohtml/${PORTNAME}/${PORTVERSION}
12
12
Lines 14-51 Link Here
14
COMMENT=	A java library to analyse and manipulate HTML
14
COMMENT=	A java library to analyse and manipulate HTML
15
15
16
USE_ZIP=	yes
16
USE_ZIP=	yes
17
USE_JAVA=	1.3+
17
USE_JAVA=	1.5+
18
18
19
INTERFACES:=	"compile-time-dependencies/slf4j-api-1.5.2.jar:\
19
INTERFACES:=	"compile-time-dependencies/slf4j-api-1.6.1.jar:\
20
		compile-time-dependencies/commons-logging-api-1.1.1.jar:\
20
		compile-time-dependencies/commons-logging-api-1.1.1.jar:\
21
		compile-time-dependencies/log4j-api-1.2.15.jar"
21
		compile-time-dependencies/log4j-1.2.16.jar"
22
22
23
PORTDOCS=	api
23
PORTDOCS=	*
24
PLIST_FILES+=	%%JAVAJARDIR%%/${PORTNAME}.jar
24
PLIST_FILES+=	%%JAVAJARDIR%%/${PORTNAME}.jar
25
25
26
do-build:
26
do-build:
27
	(cd ${WRKSRC} &&  ${RM} -rf classes/* && ${JAVAC}         \
27
	(cd ${WRKSRC} &&  ${RM} -rf classes/* && ${JAVAC}         \
28
		-classpath ${INTERFACES:S, ,,g}                   \
28
		-classpath ${INTERFACES:S, ,,g}                   \
29
		-d classes src/java/au/id/jericho/lib/html/*.java \
29
		-d classes src/java/net/htmlparser/jericho/*.java \
30
		src/java/au/id/jericho/lib/html/nodoc/*.java)
30
		src/java/net/htmlparser/jericho/nodoc/*.java)
31
	${MKDIR} ${WRKSRC}/lib
31
	${JAR} -cf ${WRKSRC}/lib/${PORTNAME}.jar                  \
32
	${JAR} -cf ${WRKSRC}/lib/${PORTNAME}.jar                  \
32
		 -C ${WRKSRC}/classes .
33
		 -C ${WRKSRC}/classes .
33
.if !defined(NOPORTDOCS)
34
	(cd ${WRKSRC} && ${RM} -rf doc/* && ${JAVADOC} -quiet     \
35
		-windowtitle "Jericho HTML Parser ${PORTVERSION}" \
36
		-classpath ${INTERFACES:S, ,,g}:src/java:classes  \
37
		-use -d ${WRKSRC}/doc/api                         \
38
		-subpackages au.id.jericho.lib.html               \
39
		-exclude au.id.jericho.lib.html.nodoc             \
40
		-noqualifier au.id.jericho.lib.html               \
41
		-group "Core package" au.id.jericho.lib.html)
42
.endif
43
34
44
do-install:
35
do-install:
45
	${INSTALL_DATA} ${WRKSRC}/lib/${PORTNAME}.jar ${JAVAJARDIR}
36
	${INSTALL_DATA} ${WRKSRC}/lib/${PORTNAME}.jar ${JAVAJARDIR}
46
.if !defined(NOPORTDOCS)
37
.if !defined(NOPORTDOCS)
47
	${MKDIR} ${DOCSDIR}
38
	${MKDIR} ${DOCSDIR}
48
	(cd ${WRKSRC}/doc && ${FIND} api | ${CPIO} -pdmu ${DOCSDIR})
39
	(cd ${WRKSRC}/docs && ${COPYTREE_SHARE} . ${DOCSDIR})
49
.endif
40
.endif
50
41
51
.include <bsd.port.mk>
42
.include <bsd.port.mk>
(-)distinfo (-2 / +2 lines)
Lines 1-2 Link Here
1
SHA256 (jericho-html-2.6.zip) = 6968459488579d17c88d2cbd7f46d6e07ea1bd086caac1015a1845975a550f68
1
SHA256 (jericho-html-3.2.zip) = 35787b825bd2fbf78d7c521e27a1ca164caaa01426ac38ae8f3c9697efc0dc13
2
SIZE (jericho-html-2.6.zip) = 1522427
2
SIZE (jericho-html-3.2.zip) = 2396280
(-)files/patch-encoding (-15 lines)
Removed Link Here
1
--- src/java/au/id/jericho/lib/html/StreamEncodingDetector.java.orig	2008-06-17 21:01:53.890292905 +0200
2
+++ src/java/au/id/jericho/lib/html/StreamEncodingDetector.java	2008-06-17 21:02:43.940300330 +0200
3
@@ -203,9 +203,9 @@
4
 		// Assume the more likely case of four 8-bit characters <= U+00FF.
5
 		// Check whether it fits some common EBCDIC strings that might be found at the start of a document:
6
 		if (b1==0x4C) { // first character is EBCDIC '<' (ASCII 'L'), check a couple more characters before assuming EBCDIC encoding:
7
-			if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC ("Lo§”" in Windows-1252)
8
-			if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC ("LZÄÖ" in Windows-1252)
9
-			if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" ("LÈãÔ" in Windows-1252), or "<htm" ("Lˆ£”" in Windows-1252)
10
+			if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC 
11
+			if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC 
12
+			if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" or "<htm" 
13
 			// although this is not an exhaustive check for EBCDIC, it is safer to assume a more common preliminary encoding if none of these conditions are met.
14
 		}
15
 		// Now confident that it is not EBCDIC, but some other 8-bit encoding.

Return to bug 164813