View | Details | Raw Unified | Return to bug 282880 | Differences between
and this patch

Collapse All | Expand All

(-)b/textproc/amberfish/Makefile (-28 / +14 lines)
Lines 1-16 Link Here
1
PORTNAME=	amberfish
1
PORTNAME=	amberfish
2
PORTVERSION=	1.6.4
2
DISTVERSION=	1.7.1
3
PORTREVISION=	3
4
CATEGORIES=	textproc databases
3
CATEGORIES=	textproc databases
5
MASTER_SITES=	SF/${PORTNAME}/Amberfish%20source%20-%20stable/${PORTVERSION}	\
6
		http://etymon.com/software/amberfish/stable/
7
4
8
MAINTAINER=	ports@FreeBSD.org
5
USE_GITLAB=	yes
9
COMMENT=	General purpose text retrieval Software
6
GL_COMMIT=	d0b6e49d750e2c445a4c6526422d4ff43bc668d7
10
WWW=		https://web.archive.org/web/20100419215307/http://www.etymon.com/tr.html
11
7
12
LICENSE=	GPLv2
8
MAINTAINER=	nrn@etymon.com
13
LICENSE_FILE=	${WRKSRC}/COPYING
9
COMMENT=	Full-text search engine with command-line interface
10
WWW=		https://gitlab.com/amberfish/amberfish
11
12
LICENSE=	MIT
13
LICENSE_FILE=	${WRKSRC}/LICENSE
14
14
15
LIB_DEPENDS=	libxerces-c.so:textproc/xerces-c3
15
LIB_DEPENDS=	libxerces-c.so:textproc/xerces-c3
16
16
Lines 26-54 PORTDOCS= * Link Here
26
26
27
OPTIONS_DEFINE=	DOCS
27
OPTIONS_DEFINE=	DOCS
28
28
29
DOCS_USES=	makeinfo
29
DOCS_BUILD_DEPENDS=	asciidoctor:textproc/rubygem-asciidoctor
30
DOCS_PLIST_FILES=	share/man/man1/af.1.gz
30
DOCS_PLIST_FILES=	share/man/man1/af.1.gz
31
31
32
post-extract:
32
post-configure:
33
	${CP} ${FILESDIR}/porter.cc ${WRKSRC}/src
33
	echo "#define AF_VERSION \"v${PORTVERSION}\"" > ${WRKSRC}/src/backend/version.h
34
34
	echo v${PORTVERSION} > ${WRKSRC}/doc/version.adoc
35
post-patch:
36
	@${REINPLACE_CMD} -e \
37
		's|$${MAKEFLAGS} ||' ${WRKSRC}/Makefile
38
	@${REINPLACE_CMD} -e \
39
		's|cp |$${BSD_INSTALL_MAN} |' ${WRKSRC}/doc/Makefile.in
40
	@${REINPLACE_CMD} -e \
41
		's|-O3 |@CFLAGS@ | ; \
42
		s|make strip|| ; \
43
		s|cp |$${BSD_INSTALL_PROGRAM} |' ${WRKSRC}/src/Makefile.in
44
45
post-patch-DOCS-off:
46
	@${REINPLACE_CMD} -e \
47
		'/cd doc/d' ${WRKSRC}/Makefile
48
35
49
post-install-DOCS-on:
36
post-install-DOCS-on:
50
	@${MKDIR} ${STAGEDIR}${DOCSDIR}
37
	@${MKDIR} ${STAGEDIR}${DOCSDIR}
51
	${INSTALL_DATA} ${WRKSRC}/amberfish.png ${STAGEDIR}${DOCSDIR}
38
	${INSTALL_DATA} ${WRKSRC}/doc/amberfish.html ${STAGEDIR}${DOCSDIR}
52
	${INSTALL_DATA} ${WRKSRC}/doc/html/*.html ${STAGEDIR}${DOCSDIR}
53
39
54
.include <bsd.port.mk>
40
.include <bsd.port.mk>
(-)b/textproc/amberfish/distinfo (-2 / +2 lines)
Lines 1-2 Link Here
1
SHA256 (amberfish-1.6.4.tar.gz) = 155ac6e6b9b76fb7cbd94952548f718ab6add72c3b4fd2482d89abb39d96ce76
1
SHA256 (amberfish-amberfish-d0b6e49d750e2c445a4c6526422d4ff43bc668d7_GL0.tar.gz) = 76b878255f85e13e0716bfa7f54023cac09e0352ead631c8cc429d0d850438d9
2
SIZE (amberfish-1.6.4.tar.gz) = 127198
2
SIZE (amberfish-amberfish-d0b6e49d750e2c445a4c6526422d4ff43bc668d7_GL0.tar.gz) = 137011
(-)b/textproc/amberfish/files/patch-Makefile (+26 lines)
Added Link Here
1
--- Makefile.orig	2024-11-23 13:45:47 UTC
2
+++ Makefile
3
@@ -16,18 +16,18 @@ strip:
4
 	cd doc ; ${MAKE} html
5
 
6
 strip:
7
-	cd src/backend ; ${MAKE} ${MAKEFLAGS} strip
8
+	cd src/backend ; ${MAKE} strip
9
 #	cd src/interface ; ${MAKE} ${MAKEFLAGS} strip
10
 
11
 install:
12
-	cd src/backend ; ${MAKE} ${MAKEFLAGS} install
13
+	cd src/backend ; ${MAKE} install
14
 #	cd src/interface ; ${MAKE} ${MAKEFLAGS} install
15
-	cd doc ; ${MAKE} ${MAKEFLAGS} install
16
+	cd doc ; ${MAKE} install
17
 
18
 uninstall:
19
-	cd src/backend ; ${MAKE} ${MAKEFLAGS} uninstall
20
+	cd src/backend ; ${MAKE} uninstall
21
 #	cd src/interface ; ${MAKE} ${MAKEFLAGS} uninstall
22
-	cd doc ; ${MAKE} ${MAKEFLAGS} uninstall
23
+	cd doc ; ${MAKE} uninstall
24
 
25
 clean:
26
 	rm -fr autom4te.cache
(-)b/textproc/amberfish/files/patch-src_backend_Makefile.in (+10 lines)
Added Link Here
1
--- src/backend/Makefile.in.orig	2024-11-23 13:47:04 UTC
2
+++ src/backend/Makefile.in
3
@@ -62,7 +62,6 @@ install: all
4
 	strip ${AF}
5
 
6
 install: all
7
-	make strip
8
 	mkdir -p ${PREFIXBIN}
9
 	cp ${BIN} ${PREFIXBIN}/.
10
 
(-)a/textproc/amberfish/files/porter.cc (-438 lines)
Removed Link Here
1
2
/* This is the Porter stemming algorithm, coded up in ANSI C by the
3
   author. It may be be regarded as cononical, in that it follows the
4
   algorithm presented in
5
6
   Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
7
   no. 3, pp 130-137,
8
9
   only differing from it at the points maked --DEPARTURE-- below.
10
11
   See also http://www.tartarus.org/~martin/PorterStemmer
12
13
The algorithm as described in the paper could be exactly replicated
14
by adjusting the points of DEPARTURE, but this is barely necessary,
15
because (a) the points of DEPARTURE are definitely improvements, and
16
(b) no encoding of the Porter stemmer I have seen is anything like
17
as exact as this version, even with the points of DEPARTURE!
18
19
You can compile it on Unix with 'gcc -O3 -o stem stem.c' after which
20
'stem' takes a list of inputs and sends the stemmed equivalent to
21
stdout.
22
23
The algorithm as encoded here is particularly fast.
24
25
Release 1
26
*/
27
28
#include <string.h>                               /* for memmove */
29
30
#define TRUE 1
31
#define FALSE 0
32
33
/* The main part of the stemming algorithm starts here. b is a buffer
34
   holding a word to be stemmed. The letters are in b[k0], b[k0+1] ...
35
   ending at b[k]. In fact k0 = 0 in this demo program. k is readjusted
36
   downwards as the stemming progresses. Zero termination is not in fact
37
   used in the algorithm.
38
39
   Note that only lower case sequences are stemmed. Forcing to lower case
40
   should be done before stem(...) is called.
41
*/
42
43
static char * b;                                  /* buffer for word to be stemmed */
44
static int k,k0,j;                                /* j is a general offset into the string */
45
46
/* cons(i) is TRUE <=> b[i] is a consonant. */
47
48
static int cons(int i)
49
{
50
    switch (b[i])
51
    {
52
        case 'a': case 'e': case 'i': case 'o': case 'u': return FALSE;
53
        case 'y': return (i==k0) ? TRUE : !cons(i-1);
54
        default: return TRUE;
55
    }
56
}
57
58
59
/* m() measures the number of consonant sequences between k0 and j. if c is
60
   a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
61
   presence,
62
63
      <c><v>       gives 0
64
      <c>vc<v>     gives 1
65
      <c>vcvc<v>   gives 2
66
      <c>vcvcvc<v> gives 3
67
      ....
68
*/
69
70
static int m()
71
{
72
    int n = 0;
73
    int i = k0;
74
    while(TRUE)
75
    {
76
        if (i > j) return n;
77
        if (! cons(i)) break; i++;
78
    }
79
    i++;
80
    while(TRUE)
81
    {
82
        while(TRUE)
83
        {
84
            if (i > j) return n;
85
            if (cons(i)) break;
86
            i++;
87
        }
88
        i++;
89
        n++;
90
        while(TRUE)
91
        {
92
            if (i > j) return n;
93
            if (! cons(i)) break;
94
            i++;
95
        }
96
        i++;
97
    }
98
}
99
100
101
/* vowelinstem() is TRUE <=> k0,...j contains a vowel */
102
103
static int vowelinstem()
104
{
105
    int i; for (i = k0; i <= j; i++) if (! cons(i)) return TRUE;
106
    return FALSE;
107
}
108
109
110
/* doublec(j) is TRUE <=> j,(j-1) contain a double consonant. */
111
112
static int doublec(int j)
113
{
114
    if (j < k0+1) return FALSE;
115
    if (b[j] != b[j-1]) return FALSE;
116
    return cons(j);
117
}
118
119
120
/* cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant
121
   and also if the second c is not w,x or y. this is used when trying to
122
   restore an e at the end of a short word. e.g.
123
124
      cav(e), lov(e), hop(e), crim(e), but
125
      snow, box, tray.
126
127
*/
128
129
static int cvc(int i)
130
{
131
    if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2)) return FALSE;
132
    {
133
        int ch = b[i];
134
        if (ch == 'w' || ch == 'x' || ch == 'y') return FALSE;
135
    }
136
    return TRUE;
137
}
138
139
140
/* ends(s) is TRUE <=> k0,...k ends with the string s. */
141
142
static int ends(char * s)
143
{
144
    int length = s[0];
145
    if (s[length] != b[k]) return FALSE;          /* tiny speed-up */
146
    if (length > k-k0+1) return FALSE;
147
    if (memcmp(b+k-length+1,s+1,length) != 0) return FALSE;
148
    j = k-length;
149
    return TRUE;
150
}
151
152
153
/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
154
   k. */
155
156
static void setto(char * s)
157
{
158
    int length = s[0];
159
    memmove(b+j+1,s+1,length);
160
    k = j+length;
161
}
162
163
164
/* r(s) is used further down. */
165
166
static void r(char * s) { if (m() > 0) setto(s); }
167
168
/* step1ab() gets rid of plurals and -ed or -ing. e.g.
169
170
	caresses  ->  caress
171
	ponies    ->  poni
172
	ties      ->  ti
173
	caress    ->  caress
174
	cats      ->  cat
175
176
	feed      ->  feed
177
	agreed    ->  agree
178
	disabled  ->  disable
179
180
	matting   ->  mat
181
	mating    ->  mate
182
	meeting   ->  meet
183
	milling   ->  mill
184
	messing   ->  mess
185
186
	meetings  ->  meet
187
188
*/
189
190
static void step1ab()
191
{
192
    if (b[k] == 's')
193
    {
194
        if (ends("\04" "sses")) k -= 2; else
195
            if (ends("\03" "ies")) setto("\01" "i"); else
196
                if (b[k-1] != 's') k--;
197
    }
198
    if (ends("\03" "eed")) { if (m() > 0) k--; }
199
    else
200
    if ((ends("\02" "ed") || ends("\03" "ing")) && vowelinstem())
201
    {
202
        k = j;
203
        if (ends("\02" "at")) setto("\03" "ate"); else
204
            if (ends("\02" "bl")) setto("\03" "ble"); else
205
                if (ends("\02" "iz")) setto("\03" "ize"); else
206
                    if (doublec(k))
207
                    {
208
                        k--;
209
                        {
210
                            int ch = b[k];
211
                            if (ch == 'l' || ch == 's' || ch == 'z') k++;
212
                        }
213
                    }
214
        else if (m() == 1 && cvc(k)) setto("\01" "e");
215
    }
216
}
217
218
219
/* step1c() turns terminal y to i when there is another vowel in the stem. */
220
221
static void step1c() { if (ends("\01" "y") && vowelinstem()) b[k] = 'i'; }
222
223
/* step2() maps double suffices to single ones. so -ization ( = -ize plus
224
   -ation) maps to -ize etc. note that the string before the suffix must give
225
   m() > 0. */
226
227
static void step2()
228
{
229
    switch (b[k-1])
230
    {
231
        case 'a': if (ends("\07" "ational")) { r("\03" "ate"); break; }
232
        if (ends("\06" "tional")) { r("\04" "tion"); break; }
233
        break;
234
        case 'c': if (ends("\04" "enci")) { r("\04" "ence"); break; }
235
        if (ends("\04" "anci")) { r("\04" "ance"); break; }
236
        break;
237
        case 'e': if (ends("\04" "izer")) { r("\03" "ize"); break; }
238
        break;
239
        case 'l': if (ends("\03" "bli"))          /*-DEPARTURE-*/
240
        {
241
            r("\03" "ble"); break;
242
        }
243
244
/* To match the published algorithm, replace this line with
245
   case 'l': if (ends("\04" "abli")) { r("\04" "able"); break; } */
246
247
        if (ends("\04" "alli")) { r("\02" "al"); break; }
248
        if (ends("\05" "entli")) { r("\03" "ent"); break; }
249
        if (ends("\03" "eli")) { r("\01" "e"); break; }
250
        if (ends("\05" "ousli")) { r("\03" "ous"); break; }
251
        break;
252
        case 'o': if (ends("\07" "ization")) { r("\03" "ize"); break; }
253
        if (ends("\05" "ation")) { r("\03" "ate"); break; }
254
        if (ends("\04" "ator")) { r("\03" "ate"); break; }
255
        break;
256
        case 's': if (ends("\05" "alism")) { r("\02" "al"); break; }
257
        if (ends("\07" "iveness")) { r("\03" "ive"); break; }
258
        if (ends("\07" "fulness")) { r("\03" "ful"); break; }
259
        if (ends("\07" "ousness")) { r("\03" "ous"); break; }
260
        break;
261
        case 't': if (ends("\05" "aliti")) { r("\02" "al"); break; }
262
        if (ends("\05" "iviti")) { r("\03" "ive"); break; }
263
        if (ends("\06" "biliti")) { r("\03" "ble"); break; }
264
        break;
265
        case 'g': if (ends("\04" "logi"))         /*-DEPARTURE-*/
266
        {
267
            r("\03" "log"); break;
268
        }
269
270
/* To match the published algorithm, delete this line */
271
272
    }
273
}
274
275
276
/* step3() deals with -ic-, -full, -ness etc. similar strategy to step2. */
277
278
static void step3()
279
{
280
    switch (b[k])
281
    {
282
        case 'e': if (ends("\05" "icate")) { r("\02" "ic"); break; }
283
        if (ends("\05" "ative")) { r("\00" ""); break; }
284
        if (ends("\05" "alize")) { r("\02" "al"); break; }
285
        break;
286
        case 'i': if (ends("\05" "iciti")) { r("\02" "ic"); break; }
287
        break;
288
        case 'l': if (ends("\04" "ical")) { r("\02" "ic"); break; }
289
        if (ends("\03" "ful")) { r("\00" ""); break; }
290
        break;
291
        case 's': if (ends("\04" "ness")) { r("\00" ""); break; }
292
        break;
293
    }
294
}
295
296
297
/* step4() takes off -ant, -ence etc., in context <c>vcvc<v>. */
298
299
static void step4()
300
{
301
    switch (b[k-1])
302
    {
303
        case 'a': if (ends("\02" "al")) break; return;
304
        case 'c': if (ends("\04" "ance")) break;
305
        if (ends("\04" "ence")) break; return;
306
        case 'e': if (ends("\02" "er")) break; return;
307
        case 'i': if (ends("\02" "ic")) break; return;
308
        case 'l': if (ends("\04" "able")) break;
309
        if (ends("\04" "ible")) break; return;
310
        case 'n': if (ends("\03" "ant")) break;
311
        if (ends("\05" "ement")) break;
312
        if (ends("\04" "ment")) break;
313
        if (ends("\03" "ent")) break; return;
314
        case 'o': if (ends("\03" "ion") && (b[j] == 's' || b[j] == 't')) break;
315
        if (ends("\02" "ou")) break; return;
316
/* takes care of -ous */
317
        case 's': if (ends("\03" "ism")) break; return;
318
        case 't': if (ends("\03" "ate")) break;
319
        if (ends("\03" "iti")) break; return;
320
        case 'u': if (ends("\03" "ous")) break; return;
321
        case 'v': if (ends("\03" "ive")) break; return;
322
        case 'z': if (ends("\03" "ize")) break; return;
323
        default: return;
324
    }
325
    if (m() > 1) k = j;
326
}
327
328
329
/* step5() removes a final -e if m() > 1, and changes -ll to -l if
330
   m() > 1. */
331
332
static void step5()
333
{
334
    j = k;
335
    if (b[k] == 'e')
336
    {
337
        int a = m();
338
        if (a > 1 || a == 1 && !cvc(k-1)) k--;
339
    }
340
    if (b[k] == 'l' && doublec(k) && m() > 1) k--;
341
}
342
343
344
/* In stem(p,i,j), p is a char pointer, and the string to be stemmed is from
345
   p[i] to p[j] inclusive. Typically i is zero and j is the offset to the last
346
   character of a string, (p[j+1] == '\0'). The stemmer adjusts the
347
   characters p[i] ... p[j] and returns the new end-point of the string, k.
348
   Stemming never increases word length, so i <= k <= j. To turn the stemmer
349
   into a module, declare 'stem' as extern, and delete the remainder of this
350
   file.
351
*/
352
353
int stem(char * p, int i, int j)
354
{                                                 /* copy the parameters into statics */
355
    b = p; k = j; k0 = i;
356
    if (k <= k0+1) return k;                      /*-DEPARTURE-*/
357
358
/* With this line, strings of length 1 or 2 don't go through the
359
   stemming process, although no mention is made of this in the
360
   published algorithm. Remove the line to match the published
361
   algorithm. */
362
363
    step1ab(); step1c(); step2(); step3(); step4(); step5();
364
    return k;
365
}
366
367
368
/*--------------------stemmer definition ends here------------------------*/
369
370
#include <stdio.h>
371
#include <stdlib.h>                               /* for malloc, free */
372
#include <ctype.h>                                /* for isupper, islower, tolower */
373
374
static char * s;                                  /* a char * (=string) pointer; passed into b above */
375
376
#define INC 50                                    /* size units in which s is increased */
377
static int i_max = INC;                           /* maximum offset in s */
378
379
void increase_s()
380
{
381
    i_max += INC;
382
    {
383
        char * new_s = (char *) malloc(i_max+1);
384
        {                                         /* copy across */
385
            int i; for (i = 0; i < i_max; i++) new_s[i] = s[i];
386
        }
387
        free(s); s = new_s;
388
    }
389
}
390
391
392
#define LETTER(ch) (isupper(ch) || islower(ch))
393
394
static void stemfile(FILE * f)
395
{
396
    while(TRUE)
397
    {
398
        int ch = getc(f);
399
        if (ch == EOF) return;
400
        if (LETTER(ch))
401
        {
402
            int i = 0;
403
            while(TRUE)
404
            {
405
                if (i == i_max) increase_s();
406
407
                ch = tolower(ch);                 /* forces lower case */
408
409
                s[i] = ch; i++;
410
                ch = getc(f);
411
                if (!LETTER(ch)) { ungetc(ch,f); break; }
412
            }
413
            s[stem(s,0,i-1)+1] = 0;
414
/* the previous line calls the stemmer and uses its result to
415
   zero-terminate the string in s */
416
            printf("%s",s);
417
        }
418
        else putchar(ch);
419
    }
420
}
421
422
/*
423
 * Commented out as required by amberfish's INSTALL file
424
 *
425
	int main(int argc, char * argv[])
426
	{
427
	    int i;
428
	    s = (char *) malloc(i_max+1);
429
	    for (i = 1; i < argc; i++)
430
	    {
431
	        FILE * f = fopen(argv[i],"r");
432
	        if (f == 0) { fprintf(stderr,"File %s not found\n",argv[i]); exit(1); }
433
	        stemfile(f);
434
	    }
435
	    free(s);
436
	    return 0;
437
	}
438
*/
(-)b/textproc/amberfish/pkg-descr (-20 / +6 lines)
Lines 1-19 Link Here
1
Amberfish is general purpose text retrieval software, developed at Etymon
1
Amberfish is a full-text search engine with a command-line interface.
2
by Nassib Nassar and distributed as open source software under the terms
2
Its features include free-text and Boolean queries, relevance-ranked
3
of version 2 of the GNU General Public License (GPL). Its distinguishing
3
results, wildcard search, phrase search, field search and structured
4
features are indexing/search of semi-structured text (i.e. both free tex
4
field path queries for XML, multiple documents per file and nested
5
and multiply nested fields), built-in support for XML documents using the
5
documents, searching across multiple indexes, incremental update of
6
Xerces library, structured queries allowing generalized field/tag paths,
6
indexes, and low memory requirements for building indexes.
7
hierarchical result sets (XML only), automatic searching across multiple
8
databases (allowing modular indexing), TREC format results, efficient
9
indexing, and relatively low memory requirements during indexing (and the
10
ability to index documents larger than available memory). Z39.50 support
11
is available. Other features include Boolean queries, right truncation,
12
phrase searching, relevance ranking, support for multiple documents per
13
file, incremental indexing, and easy integration with other UNIX tools,
14
The architecture is also designed to permit proximity queries; however,
15
they are not fully implemented at present.
16
17
This port also includes the Porter stemming algorithm for suffix
18
stripping, available at:
19
     http://www.tartarus.org/~martin/PorterStemmer
20
- 

Return to bug 282880