View | Details | Raw Unified | Return to bug 72776
Collapse All | Expand All

(-)webalizer/Makefile (-1 / +17 lines)
Lines 9-16 Link Here
9
9
10
MAINTAINER=	dinoex@FreeBSD.org
10
MAINTAINER=	dinoex@FreeBSD.org
11
11
12
MASTERDIR=	${.CURDIR}/../../www/webalizer
12
WEBALIZER_LANG=	japanese
13
WEBALIZER_LANG=	japanese
13
MASTERDIR?=	${.CURDIR}/../../www/webalizer
14
15
# The patch file is written by URASHIMA Akira
16
#       http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/webalizer-a-urasim_2.patch
17
OPTIONS=                WEBALIZER_CONV "Use character code convert patch" off
18
19
.if defined(WITH_WEBALIZER_CONV)
20
CONFIGURE_ARGS+=	--enable-mininls
21
CONFIGURE_ENV+=		LIBS="-L${LOCALBASE}/lib -liconv"
22
CFLAGS+=		-I${PREFIX}/include
23
.endif
24
25
post-patch:
26
.if defined(WITH_WEBALIZER_CONV)
27
	@cd ${WRKSRC} && ${PATCH} < ${.CURDIR}/files/extra-webalizer-a-urasim_2.patch
28
.endif
29
	@cd ${WRKSRC} && ${PATCH} < ${.CURDIR}/files/extra-ja-webalizer.conf-dist.patch
14
30
15
.if exists(${.CURDIR}/Makefile.local)
31
.if exists(${.CURDIR}/Makefile.local)
16
.include "${.CURDIR}/Makefile.local"
32
.include "${.CURDIR}/Makefile.local"
(-)webalizer/files/extra-ja-webalizer.conf-dist.patch (+67 lines)
Line 0 Link Here
1
--- sample.conf.orig	Fri Sep 29 12:51:42 2000
2
+++ sample.conf	Thu Oct 14 11:48:21 2004
3
@@ -107,9 +107,12 @@
4
 
5
 PageType	htm*
6
 PageType	cgi
7
+#PageType	shtml
8
 #PageType	phtml
9
 #PageType	php3
10
+#PageType	php
11
 #PageType	pl
12
+#PageType	rb
13
 
14
 # UseHTTPS should be used if the analysis is being run on a
15
 # secure server, and links to urls should use 'https://' instead
16
@@ -153,6 +156,7 @@
17
 # is 80 characters, so use multiple lines if needed.
18
 
19
 #HTMLHead <META NAME="author" CONTENT="The Webalizer">
20
+HTMLHead <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=x-euc-jp">
21
 
22
 # HTMLBody defined the HTML code to be inserted, starting with the
23
 # <BODY> tag.  If not specified, the default is shown below.  If
24
@@ -393,6 +397,9 @@
25
 HideURL		*.png
26
 HideURL		*.PNG
27
 HideURL		*.ra
28
+HideURL		*.css
29
+HideURL		*.CSS
30
+HideURL		*.ico
31
 
32
 # Hiding agents is kind of futile
33
 #HideAgent	RealPlayer
34
@@ -412,6 +419,11 @@
35
 #GroupReferrer	excite.com/     Excite
36
 #GroupReferrer	infoseek.com/   InfoSeek
37
 #GroupReferrer	webcrawler.com/ WebCrawler
38
+#GroupReferrer	yahoo.co.jp/	Yahoo!Japan
39
+#GroupReferrer	google.co.jp/	GoogleJapan
40
+#GroupReferrer	infoseek.co.jp/	InfoSeekJapan
41
+#GroupReferrer	goo.ne.jp/	Goo
42
+#GroupReferrer	msn.co.jp/	MSNJapan
43
 
44
 #GroupUser      root            Admin users
45
 #GroupUser      admin           Admin users
46
@@ -530,6 +542,21 @@
47
 SearchEngine	mamma.com	query=
48
 SearchEngine	alltheweb.com	query=
49
 SearchEngine	northernlight.com  qr=
50
+
51
+SearchEngine	yahoo.co.jp	p=
52
+SearchEngine	google.co.jp	q=
53
+SearchEngine	infoseek.co.jp	qt=
54
+SearchEngine	msn.co.jp	q=
55
+# ocn
56
+SearchEngine	goo.ne.jp	MT=
57
+SearchEngine	biglobe.ne.jp	q=
58
+SearchEngine	nifty.com	Text=
59
+# so-net odn
60
+SearchEngine	excite.co.jp	search=
61
+SearchEngine	livedoor.com	q=
62
+SearchEngine	jp.aol.com	query=
63
+#SearchEngine	.google.	q=
64
+#SearchEngine	bulkfeeds.net	q=
65
 
66
 # The Dump* keywords allow the dumping of Sites, URL's, Referrers
67
 # User Agents, Usernames and Search strings to seperate tab delimited
(-)webalizer/files/extra-webalizer-a-urasim_2.patch (+241 lines)
Line 0 Link Here
1
--- webalizer.c.a-urasim	Wed Apr 17 07:11:31 2002
2
+++ webalizer.c	Tue Dec 23 23:26:23 2003
3
@@ -39,6 +39,7 @@
4
 #include <sys/utsname.h>
5
 #include <sys/times.h>
6
 #include <zlib.h>
7
+#include <iconv.h>
8
 
9
 /* ensure getopt */
10
 #ifdef HAVE_GETOPT_H
11
@@ -224,6 +225,8 @@
12
 char    *f_cp=f_buf+GZ_BUFSIZE;               /* pointer into the buffer  */
13
 int     f_end;                                /* count to end of buffer   */ 
14
 
15
+iconv_t cd_from_sjis, cd_from_utf8;
16
+
17
 /*********************************************/
18
 /* MAIN - start here                         */
19
 /*********************************************/
20
@@ -526,6 +529,9 @@
21
 
22
    start_time = times(&mytms);
23
 
24
+   cd_from_sjis = iconv_open("EUC-JP", "Shift_JIS");
25
+   cd_from_utf8 = iconv_open("EUC-JP", "UTF-8");
26
+
27
    /*********************************************/
28
    /* MAIN PROCESS LOOP - read through log file */
29
    /*********************************************/
30
@@ -1345,6 +1351,9 @@
31
       if (dns_db) close_cache();
32
 #endif
33
 
34
+      iconv_close(cd_from_sjis);
35
+      iconv_close(cd_from_utf8);
36
+
37
       /* Whew, all done! Exit with completion status (0) */
38
       exit(0);
39
    }
40
@@ -1773,6 +1782,23 @@
41
 
42
    if (!str) return NULL;                       /* make sure strings valid */
43
 
44
+   while(*cp1){  /* for apache log's escape code. */
45
+     if(*cp1 == '\\' && *(cp1+1) == 'x' &&
46
+	isxdigit(*(cp1+2)) && isxdigit(*(cp1+3))){
47
+       *cp2 = from_hex(*(cp1+2))*16 + from_hex(*(cp1+3));
48
+       if ((*cp2<32)||(*cp2==127)) *cp2='_';
49
+       cp1+=4; cp2++;
50
+
51
+     }
52
+     else if(*cp1 == '\\' && *(cp1+1) == '\\'){
53
+       *cp2++='\\';
54
+       cp1+=2;
55
+     }
56
+     else *cp2++ = *cp1++;
57
+   }
58
+   *cp2=*cp1;
59
+
60
+   cp1=cp2=str;
61
    while (*cp1)
62
    {
63
       if (*cp1=='%')                            /* Found an escape?        */
64
@@ -1783,7 +1809,7 @@
65
             if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ascii */
66
             if (*cp1) *cp2+=from_hex(*cp1);     /* (hopefully) character   */
67
             if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad   */
68
-            if (*cp1) cp2++; cp1++;
69
+            if (*cp1){ cp2++; cp1++;} /* bug? */
70
          }
71
          else *cp2++='%';
72
       }
73
@@ -1793,6 +1819,116 @@
74
    return str;                                  /* return the string       */
75
 }
76
 
77
+int score_eucj(unsigned char *str)
78
+{
79
+  int stat=0;
80
+  int score=0;
81
+  int bad=0;
82
+  if(str==NULL) return -1;
83
+
84
+  for(; *str!=0;str++){
85
+    switch(stat){
86
+    case 0:
87
+      if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
88
+      else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
89
+      else if(*str == 0x8f); // HOJYO KANJI 
90
+      else if(*str == 0x8e) stat=2; // KANA
91
+      else if(*str < 0x20); //CTRL
92
+      else bad=1;
93
+      break;
94
+    case 1:
95
+      if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
96
+      else bad=1;
97
+      stat=0;
98
+      break;
99
+    case 2:
100
+      if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
101
+      else  bad=1;
102
+      stat=0;
103
+      break;
104
+    }
105
+  }
106
+  if(bad != 0) score = -1;
107
+  return score;
108
+}
109
+
110
+int score_sjis(unsigned char *str)
111
+{
112
+  int stat=0;
113
+  int score=0;
114
+  int bad=0;
115
+  if(str==NULL) return -1;
116
+
117
+  for(; *str != 0; str++){
118
+    switch(stat){
119
+    case 0:
120
+      if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
121
+      else if((*str >= 0x81 && *str <= 0x9f) ||
122
+	      (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
123
+      else if(*str >= 0xa1 && *str <= 0xdf); // KANA
124
+      else if(*str < 0x20); // CTRL
125
+      else bad=1;
126
+      break;
127
+    case 1:
128
+      if((*str >= 0x40 && *str <= 0x7e) ||
129
+	 (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
130
+      else bad=1;
131
+      stat=0;
132
+      break;
133
+    }
134
+  }
135
+  if(bad != 0) score = -1;
136
+  return score;
137
+}
138
+
139
+int score_utf8(unsigned char *str)
140
+{
141
+  int stat=0;
142
+  int score=0;
143
+  int bad=0;
144
+  if(str==NULL) return -1;
145
+
146
+  for(; *str != 0; str++){
147
+    switch(stat){
148
+    case 0:
149
+      if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
150
+      else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
151
+      else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
152
+      else if(*str >= 0xf0 && *str <= 0xf7) stat=4; 
153
+      else if(*str < 0x20); //CTRL
154
+      else bad=1;
155
+      break;
156
+    case 1:
157
+      if(*str >= 0x80 && *str <= 0xbf) score++;
158
+      else bad=1;
159
+      stat=0;
160
+      break;
161
+    case 2:
162
+      if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
163
+      else {bad=1; stat=0;}
164
+      break;
165
+    case 3:
166
+      if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
167
+      else bad=1;
168
+      stat=0;
169
+      break;
170
+    case 4:
171
+    case 5:
172
+      if(*str >= 0x80 && *str <= 0xbf) stat++;
173
+      else {bad=1; stat=0;}
174
+      break;
175
+    case 6:
176
+      if(*str >= 0x80 && *str <= 0xbf) score+=4;
177
+      else bad=1;
178
+      stat=0;
179
+      break;
180
+    }
181
+  }
182
+  if(bad != 0) score = -1;
183
+  return score;
184
+}
185
+
186
+
187
 /*********************************************/
188
 /* SRCH_STRING - get search strings from ref */
189
 /*********************************************/
190
@@ -1804,6 +1940,10 @@
191
    char srch[80]="";
192
    unsigned char *cp1, *cp2, *cps;
193
    int  sp_flg=0;
194
+   int sjis, eucj, utf8;
195
+   char tmpbuf2[BUFSIZE];
196
+   size_t inlen, outlen;
197
+   unsigned char *cp3;
198
 
199
    /* Check if search engine referrer or return  */
200
    if ( (cps=isinglist(search_list,log_rec.refer))==NULL) return; 
201
@@ -1839,9 +1978,39 @@
202
    cp1=cp2+strlen(cp2)-1;
203
    while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break;
204
 
205
+   utf8=score_utf8(cp2);
206
+   sjis=score_sjis(cp2);
207
+   eucj=score_eucj(cp2);
208
+   if(utf8 >= sjis && utf8 >= eucj){
209
+     iconv(cd_from_utf8, NULL, 0, NULL, 0);
210
+     cp3 = cp2;
211
+     inlen = strlen(cp2)+1;
212
+     cp1 = tmpbuf2;
213
+     outlen = sizeof(tmpbuf2);
214
+     if(iconv(cd_from_utf8, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
215
+	inlen == 0){
216
+       cp2 = tmpbuf2;
217
+     }
218
+   }
219
+   else if(sjis > utf8 && sjis > eucj){
220
+     iconv(cd_from_sjis, NULL, 0, NULL, 0);
221
+     cp3 = cp2;
222
+     inlen = strlen(cp2)+1;
223
+     cp1 = tmpbuf2;
224
+     outlen = sizeof(tmpbuf2);
225
+     if(iconv(cd_from_sjis, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
226
+	inlen == 0){
227
+       cp2 = tmpbuf2;
228
+     }
229
+   }
230
+
231
    /* strip invalid chars */
232
    cp1=cp2;
233
-   while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
234
+   while (*cp1!=0) {
235
+     if ((*cp1<32)||(*cp1==127)) *cp1='_';
236
+     *cp1=tolower(*cp1);
237
+     cp1++;
238
+   }
239
 
240
    if (put_snode(cp2,(u_long)1,sr_htab))
241
    {

Return to bug 72776