Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
lib_mysqludf_stem
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
lib_mysqludf_stem
Commits
c825e3d9
Commit
c825e3d9
authored
Sep 18, 2015
by
Kazuhiko Shiozaki
Committed by
Kazuhiko
Sep 24, 2015
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update libstemmer_c.
parent
1c6e8c67
Changes
53
Show whitespace changes
Inline
Side-by-side
Showing
53 changed files
with
720 additions
and
12906 deletions
+720
-12906
libstemmer_c/MANIFEST
libstemmer_c/MANIFEST
+2
-2
libstemmer_c/algorithms/danish/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/danish/stem_ISO_8859_1.sbl
+0
-91
libstemmer_c/algorithms/danish/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/danish/stem_MS_DOS_Latin_I.sbl
+0
-91
libstemmer_c/algorithms/dutch/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/dutch/stem_ISO_8859_1.sbl
+0
-164
libstemmer_c/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
+0
-164
libstemmer_c/algorithms/dutch_adj/gen
libstemmer_c/algorithms/dutch_adj/gen
+0
-1
libstemmer_c/algorithms/dutch_adj/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/dutch_adj/stem_ISO_8859_1.sbl
+0
-295
libstemmer_c/algorithms/english/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/english/stem_ISO_8859_1.sbl
+0
-229
libstemmer_c/algorithms/finnish/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/finnish/stem_ISO_8859_1.sbl
+0
-196
libstemmer_c/algorithms/french/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/french/stem_ISO_8859_1.sbl
+0
-248
libstemmer_c/algorithms/french/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/french/stem_MS_DOS_Latin_I.sbl
+0
-239
libstemmer_c/algorithms/german/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/german/stem_ISO_8859_1.sbl
+0
-130
libstemmer_c/algorithms/german/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/german/stem_MS_DOS_Latin_I.sbl
+0
-130
libstemmer_c/algorithms/german2/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/german2/stem_ISO_8859_1.sbl
+0
-136
libstemmer_c/algorithms/hungarian/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/hungarian/stem_ISO_8859_1.sbl
+0
-241
libstemmer_c/algorithms/italian/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/italian/stem_ISO_8859_1.sbl
+0
-195
libstemmer_c/algorithms/italian/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/italian/stem_MS_DOS_Latin_I.sbl
+0
-195
libstemmer_c/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
+0
-245
libstemmer_c/algorithms/lovins/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/lovins/stem_ISO_8859_1.sbl
+0
-208
libstemmer_c/algorithms/norwegian/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/norwegian/stem_ISO_8859_1.sbl
+0
-80
libstemmer_c/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
+0
-80
libstemmer_c/algorithms/porter/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/porter/stem_ISO_8859_1.sbl
+0
-139
libstemmer_c/algorithms/portuguese/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/portuguese/stem_ISO_8859_1.sbl
+0
-218
libstemmer_c/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
+0
-218
libstemmer_c/algorithms/romanian/stem_ISO_8859_2.sbl
libstemmer_c/algorithms/romanian/stem_ISO_8859_2.sbl
+0
-236
libstemmer_c/algorithms/romanian/stem_Unicode.sbl
libstemmer_c/algorithms/romanian/stem_Unicode.sbl
+0
-236
libstemmer_c/algorithms/russian/stem_KOI8_R.sbl
libstemmer_c/algorithms/russian/stem_KOI8_R.sbl
+0
-217
libstemmer_c/algorithms/russian/stem_Unicode.sbl
libstemmer_c/algorithms/russian/stem_Unicode.sbl
+0
-215
libstemmer_c/algorithms/spanish/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/spanish/stem_ISO_8859_1.sbl
+0
-230
libstemmer_c/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
+0
-230
libstemmer_c/algorithms/swedish/stem_ISO_8859_1.sbl
libstemmer_c/algorithms/swedish/stem_ISO_8859_1.sbl
+0
-72
libstemmer_c/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
libstemmer_c/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
+0
-72
libstemmer_c/algorithms/turkish/stem_Unicode.sbl
libstemmer_c/algorithms/turkish/stem_Unicode.sbl
+0
-477
libstemmer_c/compiler/analyser.c
libstemmer_c/compiler/analyser.c
+0
-961
libstemmer_c/compiler/driver.c
libstemmer_c/compiler/driver.c
+0
-256
libstemmer_c/compiler/generator.c
libstemmer_c/compiler/generator.c
+0
-1443
libstemmer_c/compiler/generator_java.c
libstemmer_c/compiler/generator_java.c
+0
-1452
libstemmer_c/compiler/header.h
libstemmer_c/compiler/header.h
+0
-315
libstemmer_c/compiler/space.c
libstemmer_c/compiler/space.c
+0
-257
libstemmer_c/compiler/syswords.h
libstemmer_c/compiler/syswords.h
+0
-84
libstemmer_c/compiler/syswords2.h
libstemmer_c/compiler/syswords2.h
+0
-14
libstemmer_c/compiler/tokeniser.c
libstemmer_c/compiler/tokeniser.c
+0
-469
libstemmer_c/libstemmer/libstemmer.c
libstemmer_c/libstemmer/libstemmer.c
+5
-3
libstemmer_c/libstemmer/libstemmer_utf8.c
libstemmer_c/libstemmer/libstemmer_utf8.c
+5
-3
libstemmer_c/libstemmer/modules.h
libstemmer_c/libstemmer/modules.h
+4
-4
libstemmer_c/libstemmer/modules.txt
libstemmer_c/libstemmer/modules.txt
+1
-1
libstemmer_c/mkinc.mak
libstemmer_c/mkinc.mak
+2
-2
libstemmer_c/src_c/stem_ISO_8859_1_german.c
libstemmer_c/src_c/stem_ISO_8859_1_german.c
+143
-125
libstemmer_c/src_c/stem_ISO_8859_2_hungarian.c
libstemmer_c/src_c/stem_ISO_8859_2_hungarian.c
+6
-6
libstemmer_c/src_c/stem_ISO_8859_2_hungarian.h
libstemmer_c/src_c/stem_ISO_8859_2_hungarian.h
+16
-0
libstemmer_c/src_c/stem_UTF_8_dutch.c
libstemmer_c/src_c/stem_UTF_8_dutch.c
+380
-1453
libstemmer_c/src_c/stem_UTF_8_german.c
libstemmer_c/src_c/stem_UTF_8_german.c
+143
-125
libstemmer_c/src_c/stem_UTF_8_hungarian.c
libstemmer_c/src_c/stem_UTF_8_hungarian.c
+13
-13
No files found.
libstemmer_c/MANIFEST
View file @
c825e3d9
...
@@ -11,8 +11,6 @@ src_c/stem_ISO_8859_1_french.c
...
@@ -11,8 +11,6 @@ src_c/stem_ISO_8859_1_french.c
src_c/stem_ISO_8859_1_french.h
src_c/stem_ISO_8859_1_french.h
src_c/stem_ISO_8859_1_german.c
src_c/stem_ISO_8859_1_german.c
src_c/stem_ISO_8859_1_german.h
src_c/stem_ISO_8859_1_german.h
src_c/stem_ISO_8859_1_hungarian.c
src_c/stem_ISO_8859_1_hungarian.h
src_c/stem_ISO_8859_1_italian.c
src_c/stem_ISO_8859_1_italian.c
src_c/stem_ISO_8859_1_italian.h
src_c/stem_ISO_8859_1_italian.h
src_c/stem_ISO_8859_1_norwegian.c
src_c/stem_ISO_8859_1_norwegian.c
...
@@ -25,6 +23,8 @@ src_c/stem_ISO_8859_1_spanish.c
...
@@ -25,6 +23,8 @@ src_c/stem_ISO_8859_1_spanish.c
src_c/stem_ISO_8859_1_spanish.h
src_c/stem_ISO_8859_1_spanish.h
src_c/stem_ISO_8859_1_swedish.c
src_c/stem_ISO_8859_1_swedish.c
src_c/stem_ISO_8859_1_swedish.h
src_c/stem_ISO_8859_1_swedish.h
src_c/stem_ISO_8859_2_hungarian.c
src_c/stem_ISO_8859_2_hungarian.h
src_c/stem_ISO_8859_2_romanian.c
src_c/stem_ISO_8859_2_romanian.c
src_c/stem_ISO_8859_2_romanian.h
src_c/stem_ISO_8859_2_romanian.h
src_c/stem_KOI8_R_russian.c
src_c/stem_KOI8_R_russian.c
...
...
libstemmer_c/algorithms/danish/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
undouble
)
externals ( stem )
strings ( ch )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef ae hex 'E6'
stringdef ao hex 'E5'
stringdef o/ hex 'F8'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'abcdfghjklmnoprtvyz{ao}'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
'erets' 'et' 'eret'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'gd' // significant in the call from other_suffix
'dt' 'gt' 'kt'
)
)
next] delete
)
define other_suffix as (
do ( ['st'] 'ig' delete )
setlimit tomark p1 for ([substring])
among(
'ig' 'lig' 'elig' 'els'
(delete do consonant_pair)
'l{o/}st'
(<-'l{o/}s')
)
)
define undouble as (
setlimit tomark p1 for ([non-v] ->ch)
ch
delete
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
do undouble
)
)
libstemmer_c/algorithms/danish/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
undouble
)
externals ( stem )
strings ( ch )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef ae hex '91'
stringdef ao hex '86'
stringdef o/ hex '9B'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'abcdfghjklmnoprtvyz{ao}'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
'erets' 'et' 'eret'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'gd' // significant in the call from other_suffix
'dt' 'gt' 'kt'
)
)
next] delete
)
define other_suffix as (
do ( ['st'] 'ig' delete )
setlimit tomark p1 for ([substring])
among(
'ig' 'lig' 'elig' 'els'
(delete do consonant_pair)
'l{o/}st'
(<-'l{o/}s')
)
)
define undouble as (
setlimit tomark p1 for ([non-v] ->ch)
ch
delete
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
do undouble
)
)
libstemmer_c/algorithms/dutch/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude
e_ending
en_ending
mark_regions
R1 R2
undouble
standard_suffix
)
externals ( stem )
booleans ( e_found )
integers ( p1 p2 )
groupings ( v v_I v_j )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef e" hex 'EB'
stringdef i" hex 'EF'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef a' hex 'E1'
stringdef e' hex 'E9'
stringdef i' hex 'ED'
stringdef o' hex 'F3'
stringdef u' hex 'FA'
stringdef e` hex 'E8'
define v 'aeiouy{e`}'
define v_I v + 'I'
define v_j v + 'j'
define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
gopast v gopast non-v setmark p1
try($p1 < 3 $p1 = 3) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'I' (<- 'i')
'' (next)
) //or next
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define undouble as (
test among('kk' 'dd' 'tt') [next] delete
)
define e_ending as (
unset e_found
['e'] R1 test non-v delete
set e_found
undouble
)
define en_ending as (
R1 non-v and not 'gem' delete
undouble
)
define standard_suffix as (
do (
[substring] among(
'heden'
( R1 <- 'heid'
)
'en' 'ene'
( en_ending
)
's' 'se'
( R1 non-v_j delete
)
)
)
do e_ending
do ( ['heid'] R2 not 'c' delete
['en'] en_ending
)
do (
[substring] among(
'end' 'ing'
( R2 delete
(['ig'] R2 not 'e' delete) or undouble
)
'ig'
( R2 not 'e' delete
)
'lijk'
( R2 delete e_ending
)
'baar'
( R2 delete
)
'bar'
( R2 e_found delete
)
)
)
do (
non-v_I
test (
among ('aa' 'ee' 'oo' 'uu')
non-v
)
[next] delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
libstemmer_c/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude
e_ending
en_ending
mark_regions
R1 R2
undouble
standard_suffix
)
externals ( stem )
booleans ( e_found )
integers ( p1 p2 )
groupings ( v v_I v_j )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a" hex '84'
stringdef e" hex '89'
stringdef i" hex '8B'
stringdef o" hex '94'
stringdef u" hex '81'
stringdef a' hex 'A0'
stringdef e' hex '82'
stringdef i' hex 'A1'
stringdef o' hex 'A2'
stringdef u' hex 'A3'
stringdef e` hex '8A'
define v 'aeiouy{e`}'
define v_I v + 'I'
define v_j v + 'j'
define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
gopast v gopast non-v setmark p1
try($p1 < 3 $p1 = 3) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'I' (<- 'i')
'' (next)
) //or next
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define undouble as (
test among('kk' 'dd' 'tt') [next] delete
)
define e_ending as (
unset e_found
['e'] R1 test non-v delete
set e_found
undouble
)
define en_ending as (
R1 non-v and not 'gem' delete
undouble
)
define standard_suffix as (
do (
[substring] among(
'heden'
( R1 <- 'heid'
)
'en' 'ene'
( en_ending
)
's' 'se'
( R1 non-v_j delete
)
)
)
do e_ending
do ( ['heid'] R2 not 'c' delete
['en'] en_ending
)
do (
[substring] among(
'end' 'ing'
( R2 delete
(['ig'] R2 not 'e' delete) or undouble
)
'ig'
( R2 not 'e' delete
)
'lijk'
( R2 delete e_ending
)
'baar'
( R2 delete
)
'bar'
( R2 e_found delete
)
)
)
do (
non-v_I
test (
among ('aa' 'ee' 'oo' 'uu')
non-v
)
[next] delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
libstemmer_c/algorithms/dutch_adj/gen
deleted
100644 → 0
View file @
1c6e8c67
./Snowball stem_ISO_8859_1.sbl -u -eprefix dutch_UTF_8_ -r ../runtime -o stem_UTF_8_dutch
\ No newline at end of file
libstemmer_c/algorithms/dutch_adj/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
strings ( ch )
integers ( x p1 p2 )
booleans ( Y_found stemmed /*GE_removed*/ )
routines (
prelude
R1 R2
C V VX
lengthen_V
Step_1 Step_2 Step_3 Step_4 Step_7
Step_6
// Step_1c
// Lose_prefix
// Lose_infix
measure
)
externals ( stem )
groupings ( v v_WX AOU AIOU)
stringescapes {}
stringdef ' hex '27' // yuk
define v 'aeiouy'
define v_WX v + 'wx'
define AOU 'aou'
define AIOU 'aiou'
stringdef a" hex 'E4'
stringdef e" hex 'EB'
stringdef i" hex 'EF'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef a' hex 'E1'
stringdef e' hex 'E9'
stringdef i' hex 'ED'
stringdef o' hex 'F3'
stringdef u' hex 'FA'
stringdef e` hex 'E8'
//define v_I v + 'I'
//define v_j v + 'j'
define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)
backwardmode (
define R1 as (setmark x $x >= p1)
define R2 as (setmark x $x >= p2)
define V as test (v or 'ij')
define VX as test (next v or 'ij')
define C as test (not 'ij' non-v)
define lengthen_V as do (
non-v_WX [ (AOU] test (non-v or atlimit)) or
('e'] test (non-v or atlimit
not AIOU
not (next AIOU non-v)))
->ch insert ch
)
define Step_1 as
(
[among ( (])
'{'}s' (delete)
's' (R1 not ('t' R1) C delete)
'ies' (R1 <-'ie')
'es'
(('ar' R1 C ] delete lengthen_V) or
('er' R1 C ] delete) or
(R1 C <-'e'))
'aus' (R1 V <-'au')
'alen' (R1 <- 'aal')
'ieen' (R1 <- 'ie')
'ven' (R1 <- 'f')
'en' (('hed' R1 ] <-'heid') or
('nd' delete) or
('d' R1 C ] delete) or
('i' or 'j' V delete) or
(R1 C delete lengthen_V))
'nde' (<-'nd')
)
)
define Step_2 as
(
[among ( (])
'je' (('{'}t' ] delete) or
('et' ] R1 C delete) or
('rnt' ] <-'rn') or
('t' ] R1 VX delete) or
('ink' ] <-'ing') or
('mp' ] <-'m') or
('{'}' ] R1 delete) or
(] R1 C delete))
'ge' (R1 <-'g')
'lijke'(R1 <-'lijk')
'ische'(R1 <-'isch')
'de' (R1 C delete)
'te' (R1 <-'t')
'se' (R1 <-'s')
're' (R1 <-'r')
'le' (R1 delete attach 'l' lengthen_V)
'ene' (R1 C delete attach 'en' lengthen_V)
'ieve' (R1 C <-'ief')
)
)
define Step_3 as
(
[among ( (])
'atie' (R1 <-'eer')
'iteit' (R1 delete lengthen_V)
'heid'
'sel'
'ster' (R1 delete)
'rder' (<-'r')
'ing'
'isme'
'erij' (R1 delete lengthen_V)
'arij' (R1 C <-'aar')
'fie' (R2 delete attach 'f' lengthen_V)
'gie' (R2 delete attach 'g' lengthen_V)
'tst' (R1 C <-'t')
'dst' (R1 C <-'d')
)
)
define Step_4 as
(
( [among ( (])
'ioneel' (R1 <-'ie')
'atief' (R1 <-'eer')
'baar' (R1 delete)
'naar' (R1 V <-'n')
'laar' (R1 V <-'l')
'raar' (R1 V <-'r')
'tant' (R1 <-'teer')
'lijker'
'lijkst' (R1 <-'lijk')
'achtig'
'achtiger'
'achtigst'(R1 delete)
'eriger'
'erigst'
'erig'
'end' (R1 C delete lengthen_V)
)
)
or
( [among ( (])
'iger'
'igst'
'ig' (R1 C delete lengthen_V)
)
)
)
define Step_7 as
(
[among ( (])
'iee' (<-'ie')
'eee' (<-'ee')
'kt' (<-'k')
'ft' (<-'f')
'pt' (<-'p')
)
)
define Step_6 as
(
[among ( (])
'bb' (<-'b')
'cc' (<-'c')
'dd' (<-'d')
'ff' (<-'f')
'gg' (<-'g')
'hh' (<-'h')
'jj' (<-'j')
'kk' (<-'k')
'll' (<-'l')
'mm' (<-'m')
'nn' (<-'n')
'pp' (<-'p')
'qq' (<-'q')
'rr' (<-'r')
'ss' (<-'s')
'tt' (<-'t')
'vv' (<-'v')
'ww' (<-'w')
'xx' (<-'x')
'zz' (<-'z')
'v' (<-'f')
'z' (<-'s')
)
)
/*
define Step_1c as
(
[among ( (] R1 C)
'd' (not ('n' R1) delete)
't' (not ('h' R1) delete)
)
)
*/
)
/*
define Lose_prefix as (
['ge'] test hop 3 (goto v goto non-v)
set GE_removed
delete
)
define Lose_infix as (
next
gopast (['ge']) test hop 3 (goto v goto non-v)
set GE_removed
delete
)
*/
define measure as (
do (
tolimit
setmark p1
setmark p2
)
do(
repeat non-v atleast 1 ('ij' or v) non-v setmark p1
repeat non-v atleast 1 ('ij' or v) non-v setmark p2
)
)
define stem as (
prelude
unset Y_found
unset stemmed
do ( ['y'] <-'Y' set Y_found )
do repeat(goto (v ['y'])<-'Y' set Y_found )
measure
backwards (
do (Step_1 set stemmed )
do (Step_2 set stemmed )
do (Step_3 set stemmed )
do (Step_4 set stemmed )
)
/*
unset GE_removed
do (Lose_prefix and measure)
backwards (
do (GE_removed Step_1c)
)
unset GE_removed
do (Lose_infix and measure)
backwards (
do (GE_removed Step_1c)
)
*/
backwards (
do (Step_7 set stemmed )
do (stemmed or Step_6)
)
do(Y_found repeat(goto (['Y']) <-'y'))
)
libstemmer_c/algorithms/english/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
integers ( p1 p2 )
booleans ( Y_found )
routines (
prelude postlude
mark_regions
shortv
R1 R2
Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
exception1
exception2
)
externals ( stem )
groupings ( v v_WXY valid_LI )
stringescapes {}
define v 'aeiouy'
define v_WXY v + 'wxY'
define valid_LI 'cdeghkmnrt'
define prelude as (
unset Y_found
do ( ['{'}'] delete)
do ( ['y'] <-'Y' set Y_found)
do repeat(goto (v ['y']) <-'Y' set Y_found)
)
define mark_regions as (
$p1 = limit
$p2 = limit
do(
among (
'gener'
'commun' // added May 2005
'arsen' // added Nov 2006 (arsenic/arsenal)
// ... extensions possible here ...
) or (gopast v gopast non-v)
setmark p1
gopast v gopast non-v setmark p2
)
)
backwardmode (
define shortv as (
( non-v_WXY v non-v )
or
( non-v v atlimit )
)
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define Step_1a as (
try (
[substring] among (
'{'}' '{'}s' '{'}s{'}'
(delete)
)
)
[substring] among (
'sses' (<-'ss')
'ied' 'ies'
((hop 2 <-'i') or <-'ie')
's' (next gopast v delete)
'us' 'ss'
)
)
define Step_1b as (
[substring] among (
'eed' 'eedly'
(R1 <-'ee')
'ed' 'edly' 'ing' 'ingly'
(
test gopast v delete
test substring among(
'at' 'bl' 'iz'
(<+ 'e')
'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
// ignoring double c, h, j, k, q, v, w, and x
([next] delete)
'' (atmark p1 test shortv <+ 'e')
)
)
)
)
define Step_1c as (
['y' or 'Y']
non-v not atlimit
<-'i'
)
define Step_2 as (
[substring] R1 among (
'tional' (<-'tion')
'enci' (<-'ence')
'anci' (<-'ance')
'abli' (<-'able')
'entli' (<-'ent')
'izer' 'ization'
(<-'ize')
'ational' 'ation' 'ator'
(<-'ate')
'alism' 'aliti' 'alli'
(<-'al')
'fulness' (<-'ful')
'ousli' 'ousness'
(<-'ous')
'iveness' 'iviti'
(<-'ive')
'biliti' 'bli'
(<-'ble')
'ogi' ('l' <-'og')
'fulli' (<-'ful')
'lessli' (<-'less')
'li' (valid_LI delete)
)
)
define Step_3 as (
[substring] R1 among (
'tional' (<- 'tion')
'ational' (<- 'ate')
'alize' (<-'al')
'icate' 'iciti' 'ical'
(<-'ic')
'ful' 'ness'
(delete)
'ative'
(R2 delete) // 'R2' added Dec 2001
)
)
define Step_4 as (
[substring] R2 among (
'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
(delete)
'ion' ('s' or 't' delete)
)
)
define Step_5 as (
[substring] among (
'e' (R2 or (R1 not shortv) delete)
'l' (R2 'l' delete)
)
)
define exception2 as (
[substring] atlimit among(
'inning' 'outing' 'canning' 'herring' 'earring'
'proceed' 'exceed' 'succeed'
// ... extensions possible here ...
)
)
)
define exception1 as (
[substring] atlimit among(
/* special changes: */
'skis' (<-'ski')
'skies' (<-'sky')
'dying' (<-'die')
'lying' (<-'lie')
'tying' (<-'tie')
/* special -LY cases */
'idly' (<-'idl')
'gently' (<-'gentl')
'ugly' (<-'ugli')
'early' (<-'earli')
'only' (<-'onli')
'singly' (<-'singl')
// ... extensions possible here ...
/* invariant forms: */
'sky'
'news'
'howe'
'atlas' 'cosmos' 'bias' 'andes' // not plural forms
// ... extensions possible here ...
)
)
define postlude as (Y_found repeat(goto (['Y']) <-'y'))
define stem as (
exception1 or
not hop 3 or (
do prelude
do mark_regions
backwards (
do Step_1a
exception2 or (
do Step_1b
do Step_1c
do Step_2
do Step_3
do Step_4
do Step_5
)
)
do postlude
)
)
libstemmer_c/algorithms/finnish/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
/* Finnish stemmer.
Numbers in square brackets refer to the sections in
Fred Karlsson, Finnish: An Essential Grammar. Routledge, 1999
ISBN 0-415-20705-3
*/
routines (
mark_regions
R2
particle_etc possessive
LONG VI
case_ending
i_plural
t_plural
other_endings
tidy
)
externals ( stem )
integers ( p1 p2 )
strings ( x )
booleans ( ending_removed )
groupings ( AEI V1 V2 particle_end )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef o" hex 'F6'
define AEI 'a{a"}ei'
define V1 'aeiouy{a"}{o"}'
define V2 'aeiou{a"}{o"}'
define particle_end V1 + 'nt'
define mark_regions as (
$p1 = limit
$p2 = limit
goto V1 gopast non-V1 setmark p1
goto V1 gopast non-V1 setmark p2
)
backwardmode (
define R2 as $p2 <= cursor
define particle_etc as (
setlimit tomark p1 for ([substring])
among(
'kin'
'kaan' 'k{a"}{a"}n'
'ko' 'k{o"}'
'han' 'h{a"}n'
'pa' 'p{a"}' // Particles [91]
(particle_end)
'sti' // Adverb [87]
(R2)
)
delete
)
define possessive as ( // [36]
setlimit tomark p1 for ([substring])
among(
'si'
(not 'k' delete) // take 'ksi' as the Comitative case
'ni'
(delete ['kse'] <- 'ksi') // kseni = ksi + ni
'nsa' 'ns{a"}'
'mme'
'nne'
(delete)
/* Now for Vn possessives after case endings: [36] */
'an'
(among('ta' 'ssa' 'sta' 'lla' 'lta' 'na') delete)
'{a"}n'
(among('t{a"}' 'ss{a"}' 'st{a"}'
'll{a"}' 'lt{a"}' 'n{a"}') delete)
'en'
(among('lle' 'ine') delete)
)
)
define LONG as
among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}')
define VI as ('i' V2)
define case_ending as (
setlimit tomark p1 for ([substring])
among(
'han' ('a') //-.
'hen' ('e') // |
'hin' ('i') // |
'hon' ('o') // |
'h{a"}n' ('{a"}') // Illative [43]
'h{o"}n' ('{o"}') // |
'siin' VI // |
'seen' LONG //-'
'den' VI
'tten' VI // Genitive plurals [34]
()
'n' // Genitive or Illative
( try ( LONG // Illative
or 'ie' // Genitive
and next ]
)
/* otherwise Genitive */
)
'a' '{a"}' //-.
(V1 non-V1) // |
'tta' 'tt{a"}' // Partitive [32]
('e') // |
'ta' 't{a"}' //-'
'ssa' 'ss{a"}' // Inessive [41]
'sta' 'st{a"}' // Elative [42]
'lla' 'll{a"}' // Adessive [44]
'lta' 'lt{a"}' // Ablative [51]
'lle' // Allative [46]
'na' 'n{a"}' // Essive [49]
'ksi' // Translative[50]
'ine' // Comitative [51]
/* Abessive and Instructive are too rare for
inclusion [51] */
)
delete
set ending_removed
)
define other_endings as (
setlimit tomark p2 for ([substring])
among(
'mpi' 'mpa' 'mp{a"}'
'mmi' 'mma' 'mm{a"}' // Comparative forms [85]
(not 'po') //-improves things
'impi' 'impa' 'imp{a"}'
'immi' 'imma' 'imm{a"}' // Superlative forms [86]
'eja' 'ej{a"}' // indicates agent [93.1B]
)
delete
)
define i_plural as ( // [26]
setlimit tomark p1 for ([substring])
among(
'i' 'j'
)
delete
)
define t_plural as ( // [26]
setlimit tomark p1 for (
['t'] test V1
delete
)
setlimit tomark p2 for ([substring])
among(
'mma' (not 'po') //-mmat endings
'imma' //-immat endings
)
delete
)
define tidy as (
setlimit tomark p1 for (
do ( LONG and ([next] delete ) ) // undouble vowel
do ( [AEI] non-V1 delete ) // remove trailing a, a", e, i
do ( ['j'] 'o' or 'u' delete )
do ( ['o'] 'j' delete )
)
goto non-V1 [next] -> x x delete // undouble consonant
)
)
define stem as (
do mark_regions
unset ending_removed
backwards (
do particle_etc
do possessive
do case_ending
do other_endings
(ending_removed do i_plural) or do t_plural
do tidy
)
)
libstemmer_c/algorithms/french/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
i_verb_suffix
verb_suffix
residual_suffix
un_double
un_accent
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v keep_with_s )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a^ hex 'E2' // a-circumflex
stringdef a` hex 'E0' // a-grave
stringdef c, hex 'E7' // c-cedilla
stringdef e" hex 'EB' // e-diaeresis (rare)
stringdef e' hex 'E9' // e-acute
stringdef e^ hex 'EA' // e-circumflex
stringdef e` hex 'E8' // e-grave
stringdef i" hex 'EF' // i-diaeresis
stringdef i^ hex 'EE' // i-circumflex
stringdef o^ hex 'F4' // o-circumflex
stringdef u^ hex 'FB' // u-circumflex
stringdef u` hex 'F9' // u-grave
define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
define prelude as repeat goto (
( v [ ('u' ] v <- 'U') or
('i' ] v <- 'I') or
('y' ] <- 'Y')
)
or
( ['y'] v <- 'Y' )
or
( 'q' ['u'] <- 'U' )
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v v next )
or
among ( // this exception list begun Nov 2006
'par' // paris, parie, pari
'col' // colis
'tap' // tapis
// extensions possible here
)
or
( next gopast v )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'Y' (<- 'y')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
'ances' 'iqUes' 'ismes' 'ables' 'istes'
( R2 delete )
'atrice' 'ateur' 'ation'
'atrices' 'ateurs' 'ations'
( R2 delete
try ( ['ic'] (R2 delete) or <-'iqU' )
)
'logie'
'logies'
( R2 <- 'log' )
'usion' 'ution'
'usions' 'utions'
( R2 <- 'u' )
'ence'
'ences'
( R2 <- 'ent' )
'ement'
'ements'
(
RV delete
try (
[substring] among(
'iv' (R2 delete ['at'] R2 delete)
'eus' ((R2 delete) or (R1<-'eux'))
'abl' 'iqU'
(R2 delete)
'i{e`}r' 'I{e`}r' //)
(RV <-'i') //)--new 2 Sept 02
)
)
)
'it{e'}'
'it{e'}s'
(
R2 delete
try (
[substring] among(
'abil' ((R2 delete) or <-'abl')
'ic' ((R2 delete) or <-'iqU')
'iv' (R2 delete)
)
)
)
'if' 'ive'
'ifs' 'ives'
(
R2 delete
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
)
'eaux' (<- 'eau')
'aux' (R1 <- 'al')
'euse'
'euses'((R2 delete) or (R1<-'eux'))
'issement'
'issements'(R1 non-v delete) // verbal
// fail(...) below forces entry to verb_suffix. -ment typically
// follows the p.p., e.g 'confus{e'}ment'.
'amment' (RV fail(<- 'ant'))
'emment' (RV fail(<- 'ent'))
'ment'
'ments' (test(v RV) fail(delete))
// v is e,i,u,{e'},I or U
)
)
define i_verb_suffix as setlimit tomark pV for (
[substring] among (
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
'issez' 'issiez' 'issions' 'issons' 'it'
(non-v delete)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among (
'ions'
(R2 delete)
'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
'erons' 'eront' 'ez' 'iez'
// 'ons' //-best omitted
(delete)
'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
'assions'
(delete
try(['e'] delete)
)
)
)
define keep_with_s 'aiou{e`}s'
define residual_suffix as (
try(['s'] test non-keep_with_s delete)
setlimit tomark pV for (
[substring] among(
'ion' (R2 's' or 't' delete)
'ier' 'i{e`}re'
'Ier' 'I{e`}re' (<-'i')
'e' (delete)
'{e"}' ('gu' delete)
)
)
)
define un_double as (
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
)
define un_accent as (
atleast 1 non-v
[ '{e'}' or '{e`}' ] <-'e'
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
(
( standard_suffix or
i_verb_suffix or
verb_suffix
)
and
try( [ ('Y' ] <- 'i' ) or
('{c,}'] <- 'c' )
)
) or
residual_suffix
)
// try(['ent'] RV delete) // is best omitted
do un_double
do un_accent
)
do postlude
)
libstemmer_c/algorithms/french/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
i_verb_suffix
verb_suffix
residual_suffix
un_double
un_accent
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v keep_with_s )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a^ hex '83' // a-circumflex
stringdef a` hex '85' // a-grave
stringdef c, hex '87' // c-cedilla
stringdef e" hex '89' // e-diaeresis (rare)
stringdef e' hex '82' // e-acute
stringdef e^ hex '88' // e-circumflex
stringdef e` hex '8A' // e-grave
stringdef i" hex '8B' // i-diaeresis
stringdef i^ hex '8C' // i-circumflex
stringdef o^ hex '93' // o-circumflex
stringdef u^ hex '96' // u-circumflex
stringdef u` hex '97' // u-grave
define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
define prelude as repeat goto (
( v [ ('u' ] v <- 'U') or
('i' ] v <- 'I') or
('y' ] <- 'Y')
)
or
( ['y'] v <- 'Y' )
or
( 'q' ['u'] <- 'U' )
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v v next ) or ( next gopast v )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'Y' (<- 'y')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
'ances' 'iqUes' 'ismes' 'ables' 'istes'
( R2 delete )
'atrice' 'ateur' 'ation'
'atrices' 'ateurs' 'ations'
( R2 delete
try ( ['ic'] (R2 delete) or <-'iqU' )
)
'logie'
'logies'
( R2 <- 'log' )
'usion' 'ution'
'usions' 'utions'
( R2 <- 'u' )
'ence'
'ences'
( R2 <- 'ent' )
'ement'
'ements'
(
RV delete
try (
[substring] among(
'iv' (R2 delete ['at'] R2 delete)
'eus' ((R2 delete) or (R1<-'eux'))
'abl' 'iqU'
(R2 delete)
'i{e`}r' 'I{e`}r' //)
(RV <-'i') //)--new 2 Sept 02
)
)
)
'it{e'}'
'it{e'}s'
(
R2 delete
try (
[substring] among(
'abil' ((R2 delete) or <-'abl')
'ic' ((R2 delete) or <-'iqU')
'iv' (R2 delete)
)
)
)
'if' 'ive'
'ifs' 'ives'
(
R2 delete
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
)
'eaux' (<- 'eau')
'aux' (R1 <- 'al')
'euse'
'euses'((R2 delete) or (R1<-'eux'))
'issement'
'issements'(R1 non-v delete) // verbal
// fail(...) below forces entry to verb_suffix. -ment typically
// follows the p.p., e.g 'confus{e'}ment'.
'amment' (RV fail(<- 'ant'))
'emment' (RV fail(<- 'ent'))
'ment'
'ments' (test(v RV) fail(delete))
// v is e,i,u,{e'},I or U
)
)
define i_verb_suffix as setlimit tomark pV for (
[substring] among (
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
'issez' 'issiez' 'issions' 'issons' 'it'
(non-v delete)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among (
'ions'
(R2 delete)
'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
'erons' 'eront' 'ez' 'iez'
// 'ons' //-best omitted
(delete)
'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
'assions'
(delete
try(['e'] delete)
)
)
)
define keep_with_s 'aiou{e`}s'
define residual_suffix as (
try(['s'] test non-keep_with_s delete)
setlimit tomark pV for (
[substring] among(
'ion' (R2 's' or 't' delete)
'ier' 'i{e`}re'
'Ier' 'I{e`}re' (<-'i')
'e' (delete)
'{e"}' ('gu' delete)
)
)
)
define un_double as (
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
)
define un_accent as (
atleast 1 non-v
[ '{e'}' or '{e`}' ] <-'e'
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
(
( standard_suffix or
i_verb_suffix or
verb_suffix
)
and
try( [ ('Y' ] <- 'i' ) or
('{c,}'] <- 'c' )
)
) or
residual_suffix
)
// try(['ent'] RV delete) // is best omitted
do un_double
do un_accent
)
do postlude
)
libstemmer_c/algorithms/german/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)
externals ( stem )
integers ( p1 p2 x )
groupings ( v s_ending st_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef ss hex 'DF'
define v 'aeiouy{a"}{o"}{u"}'
define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'
define prelude as (
test repeat (
(
['{ss}'] <- 'ss'
) or next
)
repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
test(hop 3 setmark x)
gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
do (
[substring] R1 among(
'e' 'em' 'en' 'ern' 'er' 'es'
( delete
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
libstemmer_c/algorithms/german/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)
externals ( stem )
integers ( p1 p2 x )
groupings ( v s_ending st_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a" hex '84'
stringdef o" hex '94'
stringdef u" hex '81'
stringdef ss hex 'E1'
define v 'aeiouy{a"}{o"}{u"}'
define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'
define prelude as (
test repeat (
(
['{ss}'] <- 'ss'
) or next
)
repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
test(hop 3 setmark x)
gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
do (
[substring] R1 among(
'e' 'em' 'en' 'ern' 'er' 'es'
( delete
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
libstemmer_c/algorithms/german2/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)
externals ( stem )
integers ( p1 p2 x )
groupings ( v s_ending st_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef ss hex 'DF'
define v 'aeiouy{a"}{o"}{u"}'
define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'
define prelude as (
test repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
repeat (
[substring] among(
'{ss}' (<- 'ss')
'ae' (<- '{a"}')
'oe' (<- '{o"}')
'ue' (<- '{u"}')
'qu' (hop 2)
'' (next)
)
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
test(hop 3 setmark x)
gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
do (
[substring] R1 among(
'e' 'em' 'en' 'ern' 'er' 'es'
( delete
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
libstemmer_c/algorithms/hungarian/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
/*
Hungarian Stemmer
Removes noun inflections
*/
routines (
mark_regions
R1
v_ending
case
case_special
case_other
plural
owned
sing_owner
plur_owner
instrum
factive
undouble
double
)
externals ( stem )
integers ( p1 )
groupings ( v )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1' //a-acute
stringdef e' hex 'E9' //e-acute
stringdef i' hex 'ED' //i-acute
stringdef o' hex 'F3' //o-acute
stringdef o" hex 'F6' //o-umlaut
stringdef oq hex 'F5' //o-double acute
stringdef u' hex 'FA' //u-acute
stringdef u" hex 'FC' //u-umlaut
stringdef uq hex 'FB' //u-double acute
define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
define mark_regions as (
$p1 = limit
(v goto non-v
among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
setmark p1)
or
(non-v gopast v setmark p1)
)
backwardmode (
define R1 as $p1 <= cursor
define v_ending as (
[substring] R1 among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)
define double as (
test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
)
define undouble as (
next [hop 1] delete
)
define instrum as(
[substring] R1 among(
'al' (double)
'el' (double)
)
delete
undouble
)
define case as (
[substring] R1 among(
'ban' 'ben'
'ba' 'be'
'ra' 're'
'nak' 'nek'
'val' 'vel'
't{o'}l' 't{oq}l'
'r{o'}l' 'r{oq}l'
'b{o'}l' 'b{oq}l'
'hoz' 'hez' 'h{o"}z'
'n{a'}l' 'n{e'}l'
'ig'
'at' 'et' 'ot' '{o"}t'
'{e'}rt'
'k{e'}pp' 'k{e'}ppen'
'kor'
'ul' '{u"}l'
'v{a'}' 'v{e'}'
'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
'k{e'}nt'
'en' 'on' 'an' '{o"}n'
'n'
't'
)
delete
v_ending
)
define case_special as(
[substring] R1 among(
'{e'}n' (<- 'e')
'{a'}n' (<- 'a')
'{a'}nk{e'}nt' (<- 'a')
)
)
define case_other as(
[substring] R1 among(
'astul' 'est{u"}l' (delete)
'stul' 'st{u"}l' (delete)
'{a'}stul' (<- 'a')
'{e'}st{u"}l' (<- 'e')
)
)
define factive as(
[substring] R1 among(
'{a'}' (double)
'{e'}' (double)
)
delete
undouble
)
define plural as (
[substring] R1 among(
'{a'}k' (<- 'a')
'{e'}k' (<- 'e')
'{o"}k' (delete)
'ak' (delete)
'ok' (delete)
'ek' (delete)
'k' (delete)
)
)
define owned as (
[substring] R1 among (
'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
'{e'}k{e'}' (<- 'e')
'{a'}k{e'}' (<- 'a')
'k{e'}' (delete)
'{e'}{e'}i' (<- 'e')
'{a'}{e'}i' (<- 'a')
'{e'}i' (delete)
'{e'}{e'}' (<- 'e')
'{e'}' (delete)
)
)
define sing_owner as (
[substring] R1 among(
'{u"}nk' 'unk' (delete)
'{a'}nk' (<- 'a')
'{e'}nk' (<- 'e')
'nk' (delete)
'{a'}juk' (<- 'a')
'{e'}j{u"}k' (<- 'e')
'juk' 'j{u"}k' (delete)
'uk' '{u"}k' (delete)
'em' 'om' 'am' (delete)
'{a'}m' (<- 'a')
'{e'}m' (<- 'e')
'm' (delete)
'od' 'ed' 'ad' '{o"}d' (delete)
'{a'}d' (<- 'a')
'{e'}d' (<- 'e')
'd' (delete)
'ja' 'je' (delete)
'a' 'e' 'o' (delete)
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)
define plur_owner as (
[substring] R1 among(
'jaim' 'jeim' (delete)
'{a'}im' (<- 'a')
'{e'}im' (<- 'e')
'aim' 'eim' (delete)
'im' (delete)
'jaid' 'jeid' (delete)
'{a'}id' (<- 'a')
'{e'}id' (<- 'e')
'aid' 'eid' (delete)
'id' (delete)
'jai' 'jei' (delete)
'{a'}i' (<- 'a')
'{e'}i' (<- 'e')
'ai' 'ei' (delete)
'i' (delete)
'jaink' 'jeink' (delete)
'eink' 'aink' (delete)
'{a'}ink' (<- 'a')
'{e'}ink' (<- 'e')
'ink'
'jaitok' 'jeitek' (delete)
'aitok' 'eitek' (delete)
'{a'}itok' (<- 'a')
'{e'}itek' (<- 'e')
'itek' (delete)
'jeik' 'jaik' (delete)
'aik' 'eik' (delete)
'{a'}ik' (<- 'a')
'{e'}ik' (<- 'e')
'ik' (delete)
)
)
)
define stem as (
do mark_regions
backwards (
do instrum
do case
do case_special
do case_other
do factive
do owned
do sing_owner
do plur_owner
do plural
)
)
libstemmer_c/algorithms/italian/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v AEIO CG )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1'
stringdef a` hex 'E0'
stringdef e' hex 'E9'
stringdef e` hex 'E8'
stringdef i' hex 'ED'
stringdef i` hex 'EC'
stringdef o' hex 'F3'
stringdef o` hex 'F2'
stringdef u' hex 'FA'
stringdef u` hex 'F9'
define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
define prelude as (
test repeat (
[substring] among(
'{a'}' (<- '{a`}')
'{e'}' (<- '{e`}')
'{i'}' (<- '{i`}')
'{o'}' (<- '{o`}')
'{u'}' (<- '{u`}')
'qu' (<- 'qU')
'' (next)
)
)
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'ci' 'gli' 'la' 'le' 'li' 'lo'
'mi' 'ne' 'si' 'ti' 'vi'
// the compound forms are:
'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
'mela' 'mele' 'meli' 'melo' 'mene'
'tela' 'tele' 'teli' 'telo' 'tene'
'cela' 'cele' 'celi' 'celo' 'cene'
'vela' 'vele' 'veli' 'velo' 'vene'
)
among( (RV)
'ando' 'endo' (delete)
'ar' 'er' 'ir' (<- 'e')
)
)
define standard_suffix as (
[substring] among(
'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
'atrice' 'atrici'
'ante' 'anti' // Note 1
( R2 delete )
'azione' 'azioni' 'atore' 'atori'
( R2 delete
try ( ['ic'] R2 delete )
)
'logia' 'logie'
( R2 <- 'log' )
'uzione' 'uzioni' 'usione' 'usioni'
( R2 <- 'u' )
'enza' 'enze'
( R2 <- 'ente' )
'amento' 'amenti' 'imento' 'imenti'
( RV delete )
'amente' (
R1 delete
try (
[substring] R2 delete among(
'iv' ( ['at'] R2 delete )
'os' 'ic' 'abil'
)
)
)
'it{a`}' (
R2 delete
try (
[substring] among(
'abil' 'ic' 'iv' (R2 delete)
)
)
)
'ivo' 'ivi' 'iva' 'ive' (
R2 delete
try ( ['at'] R2 delete ['ic'] R2 delete )
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
'ono' 'uta' 'ute' 'uti' 'uto'
'ar' 'ir' // but 'er' is problematical
(delete)
)
)
define AEIO 'aeio{a`}{e`}{i`}{o`}'
define CG 'cg'
define vowel_suffix as (
try (
[AEIO] RV delete
['i'] RV delete
)
try (
['h'] CG RV delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do attached_pronoun
do (standard_suffix or verb_suffix)
do vowel_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
libstemmer_c/algorithms/italian/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v AEIO CG )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a' hex 'A0'
stringdef a` hex '85'
stringdef e' hex '82'
stringdef e` hex '8A'
stringdef i' hex 'A1'
stringdef i` hex '8D'
stringdef o' hex 'A2'
stringdef o` hex '95'
stringdef u' hex 'A3'
stringdef u` hex '97'
define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
define prelude as (
test repeat (
[substring] among(
'{a'}' (<- '{a`}')
'{e'}' (<- '{e`}')
'{i'}' (<- '{i`}')
'{o'}' (<- '{o`}')
'{u'}' (<- '{u`}')
'qu' (<- 'qU')
'' (next)
)
)
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'ci' 'gli' 'la' 'le' 'li' 'lo'
'mi' 'ne' 'si' 'ti' 'vi'
// the compound forms are:
'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
'mela' 'mele' 'meli' 'melo' 'mene'
'tela' 'tele' 'teli' 'telo' 'tene'
'cela' 'cele' 'celi' 'celo' 'cene'
'vela' 'vele' 'veli' 'velo' 'vene'
)
among( (RV)
'ando' 'endo' (delete)
'ar' 'er' 'ir' (<- 'e')
)
)
define standard_suffix as (
[substring] among(
'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
'atrice' 'atrici'
'ante' 'anti' // Note 1
( R2 delete )
'azione' 'azioni' 'atore' 'atori'
( R2 delete
try ( ['ic'] R2 delete )
)
'logia' 'logie'
( R2 <- 'log' )
'uzione' 'uzioni' 'usione' 'usioni'
( R2 <- 'u' )
'enza' 'enze'
( R2 <- 'ente' )
'amento' 'amenti' 'imento' 'imenti'
( RV delete )
'amente' (
R1 delete
try (
[substring] R2 delete among(
'iv' ( ['at'] R2 delete )
'os' 'ic' 'abil'
)
)
)
'it{a`}' (
R2 delete
try (
[substring] among(
'abil' 'ic' 'iv' (R2 delete)
)
)
)
'ivo' 'ivi' 'iva' 'ive' (
R2 delete
try ( ['at'] R2 delete ['ic'] R2 delete )
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
'ono' 'uta' 'ute' 'uti' 'uto'
'ar' 'ir' // but 'er' is problematical
(delete)
)
)
define AEIO 'aeio{a`}{e`}{i`}{o`}'
define CG 'cg'
define vowel_suffix as (
try (
[AEIO] RV delete
['i'] RV delete
)
try (
['h'] CG RV delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do attached_pronoun
do (standard_suffix or verb_suffix)
do vowel_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
libstemmer_c/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
strings ( ch )
integers ( x p1 p2 )
booleans ( Y_found stemmed GE_removed )
routines (
R1 R2
C V VX
lengthen_V
Step_1 Step_2 Step_3 Step_4 Step_7
Step_6 Step_1c
Lose_prefix
Lose_infix
measure
)
externals ( stem )
groupings ( v v_WX AOU AIOU )
stringescapes {}
stringdef ' hex '27' // yuk
define v 'aeiouy'
define v_WX v + 'wx'
define AOU 'aou'
define AIOU 'aiou'
backwardmode (
define R1 as (setmark x $x >= p1)
define R2 as (setmark x $x >= p2)
define V as test (v or 'ij')
define VX as test (next v or 'ij')
define C as test (not 'ij' non-v)
define lengthen_V as do (
non-v_WX [ (AOU] test (non-v or atlimit)) or
('e'] test (non-v or atlimit
not AIOU
not (next AIOU non-v)))
->ch insert ch
)
define Step_1 as
(
[among ( (])
'{'}s' (delete)
's' (R1 not ('t' R1) C delete)
'ies' (R1 <-'ie')
'es'
(('ar' R1 C ] delete lengthen_V) or
('er' R1 C ] delete) or
(R1 C <-'e'))
'aus' (R1 V <-'au')
'en' (('hed' R1 ] <-'heid') or
('nd' delete) or
('d' R1 C ] delete) or
('i' or 'j' V delete) or
(R1 C delete lengthen_V))
'nde' (<-'nd')
)
)
define Step_2 as
(
[among ( (])
'je' (('{'}t' ] delete) or
('et' ] R1 C delete) or
('rnt' ] <-'rn') or
('t' ] R1 VX delete) or
('ink' ] <-'ing') or
('mp' ] <-'m') or
('{'}' ] R1 delete) or
(] R1 C delete))
'ge' (R1 <-'g')
'lijke'(R1 <-'lijk')
'ische'(R1 <-'isch')
'de' (R1 C delete)
'te' (R1 <-'t')
'se' (R1 <-'s')
're' (R1 <-'r')
'le' (R1 delete attach 'l' lengthen_V)
'ene' (R1 C delete attach 'en' lengthen_V)
'ieve' (R1 C <-'ief')
)
)
define Step_3 as
(
[among ( (])
'atie' (R1 <-'eer')
'iteit' (R1 delete lengthen_V)
'heid'
'sel'
'ster' (R1 delete)
'rder' (<-'r')
'ing'
'isme'
'erij' (R1 delete lengthen_V)
'arij' (R1 C <-'aar')
'fie' (R2 delete attach 'f' lengthen_V)
'gie' (R2 delete attach 'g' lengthen_V)
'tst' (R1 C <-'t')
'dst' (R1 C <-'d')
)
)
define Step_4 as
(
( [among ( (])
'ioneel' (R1 <-'ie')
'atief' (R1 <-'eer')
'baar' (R1 delete)
'naar' (R1 V <-'n')
'laar' (R1 V <-'l')
'raar' (R1 V <-'r')
'tant' (R1 <-'teer')
'lijker'
'lijkst' (R1 <-'lijk')
'achtig'
'achtiger'
'achtigst'(R1 delete)
'eriger'
'erigst'
'erig'
'end' (R1 C delete lengthen_V)
)
)
or
( [among ( (])
'iger'
'igst'
'ig' (R1 C delete lengthen_V)
)
)
)
define Step_7 as
(
[among ( (])
'kt' (<-'k')
'ft' (<-'f')
'pt' (<-'p')
)
)
define Step_6 as
(
[among ( (])
'bb' (<-'b')
'cc' (<-'c')
'dd' (<-'d')
'ff' (<-'f')
'gg' (<-'g')
'hh' (<-'h')
'jj' (<-'j')
'kk' (<-'k')
'll' (<-'l')
'mm' (<-'m')
'nn' (<-'n')
'pp' (<-'p')
'qq' (<-'q')
'rr' (<-'r')
'ss' (<-'s')
'tt' (<-'t')
'vv' (<-'v')
'ww' (<-'w')
'xx' (<-'x')
'zz' (<-'z')
'v' (<-'f')
'z' (<-'s')
)
)
define Step_1c as
(
[among ( (] R1 C)
'd' (not ('n' R1) delete)
't' (not ('h' R1) delete)
)
)
)
define Lose_prefix as (
['ge'] test hop 3 (goto v goto non-v)
set GE_removed
delete
)
define Lose_infix as (
next
gopast (['ge']) test hop 3 (goto v goto non-v)
set GE_removed
delete
)
define measure as (
do (
tolimit
setmark p1
setmark p2
)
do(
repeat non-v atleast 1 ('ij' or v) non-v setmark p1
repeat non-v atleast 1 ('ij' or v) non-v setmark p2
)
)
define stem as (
unset Y_found
unset stemmed
do ( ['y'] <-'Y' set Y_found )
do repeat(goto (v ['y'])<-'Y' set Y_found )
measure
backwards (
do (Step_1 set stemmed )
do (Step_2 set stemmed )
do (Step_3 set stemmed )
do (Step_4 set stemmed )
)
unset GE_removed
do (Lose_prefix and measure)
backwards (
do (GE_removed Step_1c)
)
unset GE_removed
do (Lose_infix and measure)
backwards (
do (GE_removed Step_1c)
)
backwards (
do (Step_7 set stemmed )
do (stemmed or GE_removed Step_6)
)
do(Y_found repeat(goto (['Y']) <-'y'))
)
libstemmer_c/algorithms/lovins/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
stringescapes {}
routines (
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC
endings
undouble respell
)
externals ( stem )
backwardmode (
/* Lovins' conditions A, B ... CC, as given in her Appendix B, where
a test for a two letter prefix ('test hop 2') is implicitly
assumed. Note that 'e' next 'u' corresponds to her u*e because
Snowball is scanning backwards. */
define A as ( hop 2 )
define B as ( hop 3 )
define C as ( hop 4 )
define D as ( hop 5 )
define E as ( test hop 2 not 'e' )
define F as ( test hop 3 not 'e' )
define G as ( test hop 3 'f' )
define H as ( test hop 2 't' or 'll' )
define I as ( test hop 2 not 'o' not 'e' )
define J as ( test hop 2 not 'a' not 'e' )
define K as ( test hop 3 'l' or 'i' or ('e' next 'u') )
define L as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
define M as ( test hop 2 not 'a' not 'c' not 'e' not 'm' )
define N as ( test hop 3 ( hop 2 not 's' or hop 2 ) )
define O as ( test hop 2 'l' or 'i' )
define P as ( test hop 2 not 'c' )
define Q as ( test hop 2 test hop 3 not 'l' not 'n' )
define R as ( test hop 2 'n' or 'r' )
define S as ( test hop 2 'dr' or ('t' not 't') )
define T as ( test hop 2 's' or ('t' not 'o') )
define U as ( test hop 2 'l' or 'm' or 'n' or 'r' )
define V as ( test hop 2 'c' )
define W as ( test hop 2 not 's' not 'u' )
define X as ( test hop 2 'l' or 'i' or ('e' next 'u') )
define Y as ( test hop 2 'in' )
define Z as ( test hop 2 not 'f' )
define AA as ( test hop 2 among ( 'd' 'f' 'ph' 'th' 'l' 'er' 'or'
'es' 't' ) )
define BB as ( test hop 3 not 'met' not 'ryst' )
define CC as ( test hop 2 'l' )
/* The system of endings, as given in Appendix A. */
define endings as (
[substring] among(
'alistically' B 'arizability' A 'izationally' B
'antialness' A 'arisations' A 'arizations' A 'entialness' A
'allically' C 'antaneous' A 'antiality' A 'arisation' A
'arization' A 'ationally' B 'ativeness' A 'eableness' E
'entations' A 'entiality' A 'entialize' A 'entiation' A
'ionalness' A 'istically' A 'itousness' A 'izability' A
'izational' A
'ableness' A 'arizable' A 'entation' A 'entially' A
'eousness' A 'ibleness' A 'icalness' A 'ionalism' A
'ionality' A 'ionalize' A 'iousness' A 'izations' A
'lessness' A
'ability' A 'aically' A 'alistic' B 'alities' A
'ariness' E 'aristic' A 'arizing' A 'ateness' A
'atingly' A 'ational' B 'atively' A 'ativism' A
'elihood' E 'encible' A 'entally' A 'entials' A
'entiate' A 'entness' A 'fulness' A 'ibility' A
'icalism' A 'icalist' A 'icality' A 'icalize' A
'ication' G 'icianry' A 'ination' A 'ingness' A
'ionally' A 'isation' A 'ishness' A 'istical' A
'iteness' A 'iveness' A 'ivistic' A 'ivities' A
'ization' F 'izement' A 'oidally' A 'ousness' A
'aceous' A 'acious' B 'action' G 'alness' A
'ancial' A 'ancies' A 'ancing' B 'ariser' A
'arized' A 'arizer' A 'atable' A 'ations' B
'atives' A 'eature' Z 'efully' A 'encies' A
'encing' A 'ential' A 'enting' C 'entist' A
'eously' A 'ialist' A 'iality' A 'ialize' A
'ically' A 'icance' A 'icians' A 'icists' A
'ifully' A 'ionals' A 'ionate' D 'ioning' A
'ionist' A 'iously' A 'istics' A 'izable' E
'lessly' A 'nesses' A 'oidism' A
'acies' A 'acity' A 'aging' B 'aical' A
'alist' A 'alism' B 'ality' A 'alize' A
'allic'BB 'anced' B 'ances' B 'antic' C
'arial' A 'aries' A 'arily' A 'arity' B
'arize' A 'aroid' A 'ately' A 'ating' I
'ation' B 'ative' A 'ators' A 'atory' A
'ature' E 'early' Y 'ehood' A 'eless' A
'elity' A 'ement' A 'enced' A 'ences' A
'eness' E 'ening' E 'ental' A 'ented' C
'ently' A 'fully' A 'ially' A 'icant' A
'ician' A 'icide' A 'icism' A 'icist' A
'icity' A 'idine' I 'iedly' A 'ihood' A
'inate' A 'iness' A 'ingly' B 'inism' J
'inity'CC 'ional' A 'ioned' A 'ished' A
'istic' A 'ities' A 'itous' A 'ively' A
'ivity' A 'izers' F 'izing' F 'oidal' A
'oides' A 'otide' A 'ously' A
'able' A 'ably' A 'ages' B 'ally' B
'ance' B 'ancy' B 'ants' B 'aric' A
'arly' K 'ated' I 'ates' A 'atic' B
'ator' A 'ealy' Y 'edly' E 'eful' A
'eity' A 'ence' A 'ency' A 'ened' E
'enly' E 'eous' A 'hood' A 'ials' A
'ians' A 'ible' A 'ibly' A 'ical' A
'ides' L 'iers' A 'iful' A 'ines' M
'ings' N 'ions' B 'ious' A 'isms' B
'ists' A 'itic' H 'ized' F 'izer' F
'less' A 'lily' A 'ness' A 'ogen' A
'ward' A 'wise' A 'ying' B 'yish' A
'acy' A 'age' B 'aic' A 'als'BB
'ant' B 'ars' O 'ary' F 'ata' A
'ate' A 'eal' Y 'ear' Y 'ely' E
'ene' E 'ent' C 'ery' E 'ese' A
'ful' A 'ial' A 'ian' A 'ics' A
'ide' L 'ied' A 'ier' A 'ies' P
'ily' A 'ine' M 'ing' N 'ion' Q
'ish' C 'ism' B 'ist' A 'ite'AA
'ity' A 'ium' A 'ive' A 'ize' F
'oid' A 'one' R 'ous' A
'ae' A 'al'BB 'ar' X 'as' B
'ed' E 'en' F 'es' E 'ia' A
'ic' A 'is' A 'ly' B 'on' S
'or' T 'um' U 'us' V 'yl' R
'{'}s' A 's{'}' A
'a' A 'e' A 'i' A 'o' A
's' W 'y' B
(delete)
)
)
/* Undoubling is rule 1 of appendix C. */
define undouble as (
test substring among ('bb' 'dd' 'gg' 'll' 'mm' 'nn' 'pp' 'rr' 'ss'
'tt')
[next] delete
)
/* The other appendix C rules can be done together. */
define respell as (
[substring] among (
'iev' (<-'ief')
'uct' (<-'uc')
'umpt' (<-'um')
'rpt' (<-'rb')
'urs' (<-'ur')
'istr' (<-'ister')
'metr' (<-'meter')
'olv' (<-'olut')
'ul' (not 'a' not 'i' not 'o' <-'l')
'bex' (<-'bic')
'dex' (<-'dic')
'pex' (<-'pic')
'tex' (<-'tic')
'ax' (<-'ac')
'ex' (<-'ec')
'ix' (<-'ic')
'lux' (<-'luc')
'uad' (<-'uas')
'vad' (<-'vas')
'cid' (<-'cis')
'lid' (<-'lis')
'erid' (<-'eris')
'pand' (<-'pans')
'end' (not 's' <-'ens')
'ond' (<-'ons')
'lud' (<-'lus')
'rud' (<-'rus')
'her' (not 'p' not 't' <-'hes')
'mit' (<-'mis')
'ent' (not 'm' <-'ens')
/* 'ent' was 'end' in the 1968 paper - a typo. */
'ert' (<-'ers')
'et' (not 'n' <-'es')
'yt' (<-'ys')
'yz' (<-'ys')
)
)
)
define stem as (
backwards (
do endings
do undouble
do respell
)
)
libstemmer_c/algorithms/norwegian/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef ae hex 'E6'
stringdef ao hex 'E5'
stringdef o/ hex 'F8'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'bcdfghjlmnoprtvyz'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
'hetens' 'ers' 'ets' 'et' 'het' 'ast'
(delete)
's'
(s_ending or ('k' non-v) delete)
'erte' 'ert'
(<-'er')
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'dt' 'vt'
)
)
next] delete
)
define other_suffix as (
setlimit tomark p1 for ([substring])
among(
'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
'hetslov'
(delete)
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
libstemmer_c/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef ae hex '91'
stringdef ao hex '86'
stringdef o/ hex '9B'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'bcdfghjlmnoprtvyz'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
'hetens' 'ers' 'ets' 'et' 'het' 'ast'
(delete)
's'
(s_ending or ('k' non-v) delete)
'erte' 'ert'
(<-'er')
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'dt' 'vt'
)
)
next] delete
)
define other_suffix as (
setlimit tomark p1 for ([substring])
among(
'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
'hetslov'
(delete)
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
libstemmer_c/algorithms/porter/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
integers ( p1 p2 )
booleans ( Y_found )
routines (
shortv
R1 R2
Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b
)
externals ( stem )
groupings ( v v_WXY )
define v 'aeiouy'
define v_WXY v + 'wxY'
backwardmode (
define shortv as ( non-v_WXY v non-v )
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define Step_1a as (
[substring] among (
'sses' (<-'ss')
'ies' (<-'i')
'ss' ()
's' (delete)
)
)
define Step_1b as (
[substring] among (
'eed' (R1 <-'ee')
'ed'
'ing' (
test gopast v delete
test substring among(
'at' 'bl' 'iz'
(<+ 'e')
'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
// ignoring double c, h, j, k, q, v, w, and x
([next] delete)
'' (atmark p1 test shortv <+ 'e')
)
)
)
)
define Step_1c as (
['y' or 'Y']
gopast v
<-'i'
)
define Step_2 as (
[substring] R1 among (
'tional' (<-'tion')
'enci' (<-'ence')
'anci' (<-'ance')
'abli' (<-'able')
'entli' (<-'ent')
'eli' (<-'e')
'izer' 'ization'
(<-'ize')
'ational' 'ation' 'ator'
(<-'ate')
'alli' (<-'al')
'alism' 'aliti'
(<-'al')
'fulness' (<-'ful')
'ousli' 'ousness'
(<-'ous')
'iveness' 'iviti'
(<-'ive')
'biliti' (<-'ble')
)
)
define Step_3 as (
[substring] R1 among (
'alize' (<-'al')
'icate' 'iciti' 'ical'
(<-'ic')
'ative' 'ful' 'ness'
(delete)
)
)
define Step_4 as (
[substring] R2 among (
'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
'ment' 'ent' 'ou' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
(delete)
'ion' ('s' or 't' delete)
)
)
define Step_5a as (
['e']
R2 or (R1 not shortv)
delete
)
define Step_5b as (
['l']
R2 'l'
delete
)
)
define stem as (
unset Y_found
do ( ['y'] <-'Y' set Y_found)
do repeat(goto (v ['y']) <-'Y' set Y_found)
$p1 = limit
$p2 = limit
do(
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
backwards (
do Step_1a
do Step_1b
do Step_1c
do Step_2
do Step_3
do Step_4
do Step_5a
do Step_5b
)
do(Y_found repeat(goto (['Y']) <-'y'))
)
libstemmer_c/algorithms/portuguese/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
verb_suffix
residual_suffix
residual_form
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1' // a-acute
stringdef a^ hex 'E2' // a-circumflex e.g. 'bota^nico
stringdef e' hex 'E9' // e-acute
stringdef e^ hex 'EA' // e-circumflex
stringdef i' hex 'ED' // i-acute
stringdef o^ hex 'F4' // o-circumflex
stringdef o' hex 'F3' // o-acute
stringdef u' hex 'FA' // u-acute
stringdef c, hex 'E7' // c-cedilla
stringdef a~ hex 'E3' // a-tilde
stringdef o~ hex 'F5' // o-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
define prelude as repeat (
[substring] among(
'{a~}' (<- 'a~')
'{o~}' (<- 'o~')
'' (next)
) //or next
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'a~' (<- '{a~}')
'o~' (<- '{o~}')
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'eza' 'ezas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'{a'}vel'
'{i'}vel'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amento' 'amentos'
'imento' 'imentos'
'adora' 'ador' 'a{c,}a~o'
'adoras' 'adores' 'a{c,}o~es' // no -ic test
'ante' 'antes' '{a^}ncia' // Note 1
(
R2 delete
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'{e^}ncia' '{e^}ncias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'avel'
'{i'}vel' (R2 delete)
)
)
)
'idade'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
'ira' 'iras'
(
RV 'e' // -eira -eiras usually non-verbal
<- 'ir'
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
'{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
'{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
'{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
'{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
'ira' 'iras'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
)
)
define residual_form as (
[substring] among(
'e' '{e'}' '{e^}'
( RV delete [('u'] test 'g') or
('i'] test 'c') RV delete )
'{c,}' (<-'c')
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
( ( standard_suffix or verb_suffix )
and do ( ['i'] test 'c' RV delete )
)
or residual_suffix
)
do residual_form
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
libstemmer_c/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
verb_suffix
residual_suffix
residual_form
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a' hex 'A0' // a-acute
stringdef a^ hex '83' // a-circumflex e.g. 'bota^nico
stringdef e' hex '82' // e-acute
stringdef e^ hex '88' // e-circumflex
stringdef i' hex 'A1' // i-acute
stringdef o^ hex '93' // o-circumflex
stringdef o' hex 'A2' // o-acute
stringdef u' hex 'A3' // u-acute
stringdef c, hex '87' // c-cedilla
stringdef a~ hex 'C6' // a-tilde
stringdef o~ hex 'E4' // o-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
define prelude as repeat (
[substring] among(
'{a~}' (<- 'a~')
'{o~}' (<- 'o~')
'' (next)
) //or next
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'a~' (<- '{a~}')
'o~' (<- '{o~}')
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'eza' 'ezas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'{a'}vel'
'{i'}vel'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amento' 'amentos'
'imento' 'imentos'
'adora' 'ador' 'a{c,}a~o'
'adoras' 'adores' 'a{c,}o~es' // no -ic test
'ante' 'antes' '{a^}ncia' // Note 1
(
R2 delete
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'{e^}ncia' '{e^}ncias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'avel'
'{i'}vel' (R2 delete)
)
)
)
'idade'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
'ira' 'iras'
(
RV 'e' // -eira -eiras usually non-verbal
<- 'ir'
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
'{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
'{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
'{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
'{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
'ira' 'iras'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
)
)
define residual_form as (
[substring] among(
'e' '{e'}' '{e^}'
( RV delete [('u'] test 'g') or
('i'] test 'c') RV delete )
'{c,}' (<-'c')
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
( ( standard_suffix or verb_suffix )
and do ( ['i'] test 'c' RV delete )
)
or residual_suffix
)
do residual_form
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
libstemmer_c/algorithms/romanian/stem_ISO_8859_2.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
step_0
standard_suffix combo_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
booleans ( standard_suffix_removed )
stringescapes {}
/* special characters */
stringdef a^ hex 'E2' // a circumflex
stringdef i^ hex 'EE' // i circumflex
stringdef a+ hex 'E3' // a breve
stringdef s, hex 'BA' // s cedilla
stringdef t, hex 'FE' // t cedilla
define v 'aeiou{a^}{i^}{a+}'
define prelude as (
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define step_0 as (
[substring] R1 among(
'ul' 'ului'
( delete )
'aua'
( <-'a' )
'ea' 'ele' 'elor'
( <-'e' )
'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
( <-'i')
'ile'
( not 'ab' <- 'i' )
'atei'
( <- 'at' )
'a{t,}ie' 'a{t,}ia'
( <- 'a{t,}i' )
)
)
define combo_suffix as test (
[substring] R1 (
among(
/* 'IST'. alternative: include the following
'alism' 'alisme'
'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
<- 'al'
)
*/
'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
<- 'abil'
)
'ibilitate' (
<- 'ibil'
)
'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
<- 'iv'
)
'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
'icator' 'icatori'
'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
<- 'ic'
)
'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
'atoare' 'ator' 'atori'
'{a+}toare' '{a+}tor' '{a+}tori' (
<- 'at'
)
'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
'itoare' 'itor' 'itori' (
<- 'it'
)
)
set standard_suffix_removed
)
)
define standard_suffix as (
unset standard_suffix_removed
repeat combo_suffix
[substring] R2 (
among(
// past participle is treated here, rather than
// as a verb ending:
'at' 'ata' 'at{a+}' 'ati' 'ate'
'ut' 'uta' 'ut{a+}' 'uti' 'ute'
'it' 'ita' 'it{a+}' 'iti' 'ite'
'ic' 'ica' 'ice' 'ici' 'ic{a+}'
'abil' 'abila' 'abile' 'abili' 'abil{a+}'
'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
'ant' 'anta' 'ante' 'anti' 'ant{a+}'
'ator' 'atori'
'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
delete
)
'iune' 'iuni' (
'{t,}'] <- 't'
)
'ism' 'isme'
'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
<- 'ist'
/* 'IST'. alternative: remove with <- '' */
)
)
set standard_suffix_removed
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
// 'long' infinitive:
'are' 'ere' 'ire' '{a^}re'
// gerund:
'ind' '{a^}nd'
'indu' '{a^}ndu'
'eze'
'easc{a+}'
// present:
'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
'e{s,}te'
'{a+}sc' '{a+}{s,}ti'
'{a+}{s,}te'
// imperfect:
'am' 'ai' 'au'
'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
// past: // (not 'ii')
'ui'
'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
'{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
// pluferfect:
'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
'{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
'{a^}ser{a+}'
'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
( non-v or 'u' delete )
// present:
'{a+}m' 'a{t,}i'
'em' 'e{t,}i'
'im' 'i{t,}i'
'{a^}m' '{a^}{t,}i'
// past:
'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
'sei' 'se'
// pluperfect:
'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
(delete)
)
)
define vowel_suffix as (
[substring] RV among (
'a' 'e' 'i' 'ie' '{a+}' ( delete )
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do step_0
do standard_suffix
do ( standard_suffix_removed or verb_suffix )
do vowel_suffix
)
do postlude
)
libstemmer_c/algorithms/romanian/stem_Unicode.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
prelude postlude mark_regions
RV R1 R2
step_0
standard_suffix combo_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
booleans ( standard_suffix_removed )
stringescapes {}
/* special characters */
stringdef a^ hex '0E2' // a circumflex
stringdef i^ hex '0EE' // i circumflex
stringdef a+ hex '103' // a breve
stringdef s, hex '15F' // s cedilla
stringdef t, hex '163' // t cedilla
define v 'aeiou{a^}{i^}{a+}'
define prelude as (
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define step_0 as (
[substring] R1 among(
'ul' 'ului'
( delete )
'aua'
( <-'a' )
'ea' 'ele' 'elor'
( <-'e' )
'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
( <-'i')
'ile'
( not 'ab' <- 'i' )
'atei'
( <- 'at' )
'a{t,}ie' 'a{t,}ia'
( <- 'a{t,}i' )
)
)
define combo_suffix as test (
[substring] R1 (
among(
/* 'IST'. alternative: include the following
'alism' 'alisme'
'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
<- 'al'
)
*/
'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
<- 'abil'
)
'ibilitate' (
<- 'ibil'
)
'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
<- 'iv'
)
'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
'icator' 'icatori'
'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
<- 'ic'
)
'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
'atoare' 'ator' 'atori'
'{a+}toare' '{a+}tor' '{a+}tori' (
<- 'at'
)
'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
'itoare' 'itor' 'itori' (
<- 'it'
)
)
set standard_suffix_removed
)
)
define standard_suffix as (
unset standard_suffix_removed
repeat combo_suffix
[substring] R2 (
among(
// past participle is treated here, rather than
// as a verb ending:
'at' 'ata' 'at{a+}' 'ati' 'ate'
'ut' 'uta' 'ut{a+}' 'uti' 'ute'
'it' 'ita' 'it{a+}' 'iti' 'ite'
'ic' 'ica' 'ice' 'ici' 'ic{a+}'
'abil' 'abila' 'abile' 'abili' 'abil{a+}'
'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
'ant' 'anta' 'ante' 'anti' 'ant{a+}'
'ator' 'atori'
'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
delete
)
'iune' 'iuni' (
'{t,}'] <- 't'
)
'ism' 'isme'
'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
<- 'ist'
/* 'IST'. alternative: remove with <- '' */
)
)
set standard_suffix_removed
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
// 'long' infinitive:
'are' 'ere' 'ire' '{a^}re'
// gerund:
'ind' '{a^}nd'
'indu' '{a^}ndu'
'eze'
'easc{a+}'
// present:
'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
'e{s,}te'
'{a+}sc' '{a+}{s,}ti'
'{a+}{s,}te'
// imperfect:
'am' 'ai' 'au'
'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
// past: // (not 'ii')
'ui'
'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
'{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
// pluferfect:
'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
'{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
'{a^}ser{a+}'
'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
( non-v or 'u' delete )
// present:
'{a+}m' 'a{t,}i'
'em' 'e{t,}i'
'im' 'i{t,}i'
'{a^}m' '{a^}{t,}i'
// past:
'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
'sei' 'se'
// pluperfect:
'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
(delete)
)
)
define vowel_suffix as (
[substring] RV among (
'a' 'e' 'i' 'ie' '{a+}' ( delete )
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do step_0
do standard_suffix
do ( standard_suffix_removed or verb_suffix )
do vowel_suffix
)
do postlude
)
libstemmer_c/algorithms/russian/stem_KOI8_R.sbl
deleted
100644 → 0
View file @
1c6e8c67
stringescapes {}
/* the 32 Cyrillic letters in the KOI8-R coding scheme, and represented
in Latin characters following the conventions of the standard Library
of Congress transliteration: */
stringdef a hex 'C1'
stringdef b hex 'C2'
stringdef v hex 'D7'
stringdef g hex 'C7'
stringdef d hex 'C4'
stringdef e hex 'C5'
stringdef zh hex 'D6'
stringdef z hex 'DA'
stringdef i hex 'C9'
stringdef i` hex 'CA'
stringdef k hex 'CB'
stringdef l hex 'CC'
stringdef m hex 'CD'
stringdef n hex 'CE'
stringdef o hex 'CF'
stringdef p hex 'D0'
stringdef r hex 'D2'
stringdef s hex 'D3'
stringdef t hex 'D4'
stringdef u hex 'D5'
stringdef f hex 'C6'
stringdef kh hex 'C8'
stringdef ts hex 'C3'
stringdef ch hex 'DE'
stringdef sh hex 'DB'
stringdef shch hex 'DD'
stringdef " hex 'DF'
stringdef y hex 'D9'
stringdef ' hex 'D8'
stringdef e` hex 'DC'
stringdef iu hex 'C0'
stringdef ia hex 'D1'
routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)
externals ( stem )
integers ( pV p2 )
groupings ( v )
define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
define mark_regions as (
$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)
backwardmode (
define R2 as $p2 <= cursor
define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)
define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)
define adjectival as (
adjective
/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/
try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)
//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle
'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)
)
define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)
define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'
'{n}{n}{o}'
('{a}' or '{ia}' delete)
'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)
define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)
define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)
define tidy_up as (
[substring] among (
'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)
define stem as (
do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}
do derivational
do tidy_up
)
)
libstemmer_c/algorithms/russian/stem_Unicode.sbl
deleted
100644 → 0
View file @
1c6e8c67
stringescapes {}
/* the 32 Cyrillic letters in Unicode */
stringdef a hex '430'
stringdef b hex '431'
stringdef v hex '432'
stringdef g hex '433'
stringdef d hex '434'
stringdef e hex '435'
stringdef zh hex '436'
stringdef z hex '437'
stringdef i hex '438'
stringdef i` hex '439'
stringdef k hex '43A'
stringdef l hex '43B'
stringdef m hex '43C'
stringdef n hex '43D'
stringdef o hex '43E'
stringdef p hex '43F'
stringdef r hex '440'
stringdef s hex '441'
stringdef t hex '442'
stringdef u hex '443'
stringdef f hex '444'
stringdef kh hex '445'
stringdef ts hex '446'
stringdef ch hex '447'
stringdef sh hex '448'
stringdef shch hex '449'
stringdef " hex '44A'
stringdef y hex '44B'
stringdef ' hex '44C'
stringdef e` hex '44D'
stringdef iu hex '44E'
stringdef ia hex '44F'
routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)
externals ( stem )
integers ( pV p2 )
groupings ( v )
define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
define mark_regions as (
$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)
backwardmode (
define R2 as $p2 <= cursor
define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)
define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)
define adjectival as (
adjective
/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/
try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)
//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle
'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)
)
define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)
define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'
'{n}{n}{o}'
('{a}' or '{ia}' delete)
'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)
define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)
define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)
define tidy_up as (
[substring] among (
'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)
define stem as (
do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}
do derivational
do tidy_up
)
)
libstemmer_c/algorithms/spanish/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
y_verb_suffix
verb_suffix
residual_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1' // a-acute
stringdef e' hex 'E9' // e-acute
stringdef i' hex 'ED' // i-acute
stringdef o' hex 'F3' // o-acute
stringdef u' hex 'FA' // u-acute
stringdef u" hex 'FC' // u-diaeresis
stringdef n~ hex 'F1' // n-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
'{i'}' (<- 'i')
'{o'}' (<- 'o')
'{u'}' (<- 'u')
// and possibly {u"}->u here, or in prelude
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
'las' 'les' 'los' 'nos'
)
substring RV among(
'i{e'}ndo' (] <- 'iendo')
'{a'}ndo' (] <- 'ando')
'{a'}r' (] <- 'ar')
'{e'}r' (] <- 'er')
'{i'}r' (] <- 'ir')
'ando'
'iendo'
'ar' 'er' 'ir'
(delete)
'yendo' ('u' delete)
)
)
define standard_suffix as (
[substring] among(
'anza' 'anzas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'able' 'ables'
'ible' 'ibles'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amiento' 'amientos'
'imiento' 'imientos'
(
R2 delete
)
'adora' 'ador' 'aci{o'}n'
'adoras' 'adores' 'aciones'
'ante' 'antes' 'ancia' 'ancias'// Note 1
(
R2 delete
try ( ['ic'] R2 delete )
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'encia' 'encias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'able'
'ible' (R2 delete)
)
)
)
'idad'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
)
)
define y_verb_suffix as (
setlimit tomark pV for ([substring]) among(
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
'yas' 'yes' 'yais' 'yamos'
('u' delete)
)
)
define verb_suffix as (
setlimit tomark pV for ([substring]) among(
'en' 'es' '{e'}is' 'emos'
(try ('u' test 'g') ] delete)
'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
'ar{e'}'
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
'er{e'}'
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
'ir{e'}'
'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
'e' '{e'}'
( RV delete try( ['u'] test 'g' RV delete ) )
)
)
)
define stem as (
do mark_regions
backwards (
do attached_pronoun
do ( standard_suffix or
y_verb_suffix or
verb_suffix
)
do residual_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
libstemmer_c/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
y_verb_suffix
verb_suffix
residual_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a' hex 'A0' // a-acute
stringdef e' hex '82' // e-acute
stringdef i' hex 'A1' // i-acute
stringdef o' hex 'A2' // o-acute
stringdef u' hex 'A3' // u-acute
stringdef u" hex '81' // u-diaeresis
stringdef n~ hex 'A4' // n-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
'{i'}' (<- 'i')
'{o'}' (<- 'o')
'{u'}' (<- 'u')
// and possibly {u"}->u here, or in prelude
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
'las' 'les' 'los' 'nos'
)
substring RV among(
'i{e'}ndo' (] <- 'iendo')
'{a'}ndo' (] <- 'ando')
'{a'}r' (] <- 'ar')
'{e'}r' (] <- 'er')
'{i'}r' (] <- 'ir')
'ando'
'iendo'
'ar' 'er' 'ir'
(delete)
'yendo' ('u' delete)
)
)
define standard_suffix as (
[substring] among(
'anza' 'anzas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'able' 'ables'
'ible' 'ibles'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amiento' 'amientos'
'imiento' 'imientos'
(
R2 delete
)
'adora' 'ador' 'aci{o'}n'
'adoras' 'adores' 'aciones'
'ante' 'antes' 'ancia' 'ancias'// Note 1
(
R2 delete
try ( ['ic'] R2 delete )
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'encia' 'encias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'able'
'ible' (R2 delete)
)
)
)
'idad'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
)
)
define y_verb_suffix as (
setlimit tomark pV for ([substring]) among(
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
'yas' 'yes' 'yais' 'yamos'
('u' delete)
)
)
define verb_suffix as (
setlimit tomark pV for ([substring]) among(
'en' 'es' '{e'}is' 'emos'
(try ('u' test 'g') ] delete)
'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
'ar{e'}'
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
'er{e'}'
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
'ir{e'}'
'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
'e' '{e'}'
( RV delete try( ['u'] test 'g' RV delete ) )
)
)
)
define stem as (
do mark_regions
backwards (
do attached_pronoun
do ( standard_suffix or
y_verb_suffix or
verb_suffix
)
do residual_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
libstemmer_c/algorithms/swedish/stem_ISO_8859_1.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef ao hex 'E5'
stringdef o" hex 'F6'
define v 'aeiouy{a"}{ao}{o"}'
define s_ending 'bcdfghjklmnoprtvy'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as setlimit tomark p1 for (
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
and ([next] delete)
)
define other_suffix as setlimit tomark p1 for (
[substring] among(
'lig' 'ig' 'els' (delete)
'l{o"}st' (<-'l{o"}s')
'fullt' (<-'full')
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
libstemmer_c/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
deleted
100644 → 0
View file @
1c6e8c67
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a" hex '84'
stringdef ao hex '86'
stringdef o" hex '94'
define v 'aeiouy{a"}{ao}{o"}'
define s_ending 'bcdfghjklmnoprtvy'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as setlimit tomark p1 for (
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
and ([next] delete)
)
define other_suffix as setlimit tomark p1 for (
[substring] among(
'lig' 'ig' 'els' (delete)
'l{o"}st' (<-'l{o"}s')
'fullt' (<-'full')
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
libstemmer_c/algorithms/turkish/stem_Unicode.sbl
deleted
100644 → 0
View file @
1c6e8c67
/* Stemmer for Turkish
* author: Evren (Kapusuz) Çilden
* email: evren.kapusuz at gmail.com
* version: 1.0 (15.01.2007)
* stems nominal verb suffixes
* stems nominal inflections
* more than one syllable word check
* (y,n,s,U) context check
* vowel harmony check
* last consonent check and conversion (b, c, d, ğ to p, ç, t, k)
* The stemming algorithm is based on the paper "An Affix Stripping
* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
* Eşref Adalı (Proceedings of the IAESTED International Conference
* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
* Innsbruck, Austria
* Turkish is an agglutinative language and has a very rich morphological
* structure. In Turkish, you can form many different words from a single stem
* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
* "You had been the doctor of him". The stem of the word is "doktor" and it
* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
* the append order of suffixes can be clearly described as FSMs.
* The paper referenced above defines some FSMs for right to left
* morphological analysis. I generated a method for constructing snowball
* expressions from right to left FSMs for stemming suffixes.
*/
routines (
append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
check_vowel_harmony // tests vowel harmony for suffixes
is_reserved_word // tests whether current string is a reserved word ('ad','soyad')
mark_cAsInA // nominal verb suffix
mark_DA // noun suffix
mark_DAn // noun suffix
mark_DUr // nominal verb suffix
mark_ki // noun suffix
mark_lAr // noun suffix, nominal verb suffix
mark_lArI // noun suffix
mark_nA // noun suffix
mark_ncA // noun suffix
mark_ndA // noun suffix
mark_ndAn // noun suffix
mark_nU // noun suffix
mark_nUn // noun suffix
mark_nUz // nominal verb suffix
mark_sU // noun suffix
mark_sUn // nominal verb suffix
mark_sUnUz // nominal verb suffix
mark_possessives // -(U)m,-(U)n,-(U)mUz,-(U)nUz,
mark_yA // noun suffix
mark_ylA // noun suffix
mark_yU // noun suffix
mark_yUm // nominal verb suffix
mark_yUz // nominal verb suffix
mark_yDU // nominal verb suffix
mark_yken // nominal verb suffix
mark_ymUs_ // nominal verb suffix
mark_ysA // nominal verb suffix
mark_suffix_with_optional_y_consonant
mark_suffix_with_optional_U_vowel
mark_suffix_with_optional_n_consonant
mark_suffix_with_optional_s_consonant
more_than_one_syllable_word
post_process_last_consonants
postlude
stem_nominal_verb_suffixes
stem_noun_suffixes
stem_suffix_chain_before_ki
)
/* Special characters in Unicode Latin-1 and Latin Extended-A */
stringdef c. hex 'E7' // LATIN SMALL LETTER C WITH CEDILLA
stringdef g~ hex '011F' // LATIN SMALL LETTER G WITH BREVE
stringdef i' hex '0131' // LATIN SMALL LETTER I WITHOUT DOT
stringdef o" hex 'F6' // LATIN SMALL LETTER O WITH DIAERESIS
stringdef s. hex '015F' // LATIN SMALL LETTER S WITH CEDILLA
stringdef u" hex 'FC' // LATIN SMALL LETTER U WITH DIAERESIS
stringescapes { }
integers ( strlen ) // length of a string
booleans ( continue_stemming_noun_suffixes )
groupings ( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)
define vowel 'ae{i'}io{o"}u{u"}'
define U '{i'}iu{u"}'
// the vowel grouping definitions below are used for checking vowel harmony
define vowel1 'a{i'}ou' // vowels that can end with suffixes containing 'a'
define vowel2 'ei{o"}{u"}' // vowels that can end with suffixes containing 'e'
define vowel3 'a{i'}' // vowels that can end with suffixes containing 'i''
define vowel4 'ei' // vowels that can end with suffixes containing 'i'
define vowel5 'ou' // vowels that can end with suffixes containing 'o' or 'u'
define vowel6 '{o"}{u"}' // vowels that can end with suffixes containing 'o"' or 'u"'
externals ( stem )
backwardmode (
// checks vowel harmony for possible suffixes,
// helps to detect whether the candidate for suffix applies to vowel harmony
// this rule is added to prevent over stemming
define check_vowel_harmony as (
test
(
(goto vowel) // if there is a vowel
(
('a' goto vowel1) or
('e' goto vowel2) or
('{i'}' goto vowel3) or
('i' goto vowel4) or
('o' goto vowel5) or
('{o"}' goto vowel6) or
('u' goto vowel5) or
('{u"}' goto vowel6)
)
)
)
// if the last consonant before suffix is vowel and n then advance and delete
// if the last consonant before suffix is non vowel and n do nothing
// if the last consonant before suffix is not n then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_n_consonant as (
((test 'n') next (test vowel))
or
((not(test 'n')) test(next (test vowel)))
)
// if the last consonant before suffix is vowel and s then advance and delete
// if the last consonant before suffix is non vowel and s do nothing
// if the last consonant before suffix is not s then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_s_consonant as (
((test 's') next (test vowel))
or
((not(test 's')) test(next (test vowel)))
)
// if the last consonant before suffix is vowel and y then advance and delete
// if the last consonant before suffix is non vowel and y do nothing
// if the last consonant before suffix is not y then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_y_consonant as (
((test 'y') next (test vowel))
or
((not(test 'y')) test(next (test vowel)))
)
define mark_suffix_with_optional_U_vowel as (
((test U) next (test non-vowel))
or
((not(test U)) test(next (test non-vowel)))
)
define mark_possessives as (
among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
(mark_suffix_with_optional_U_vowel)
)
define mark_sU as (
check_vowel_harmony
U
(mark_suffix_with_optional_s_consonant)
)
define mark_lArI as (
among ('leri' 'lar{i'}')
)
define mark_yU as (
check_vowel_harmony
U
(mark_suffix_with_optional_y_consonant)
)
define mark_nU as (
check_vowel_harmony
among ('n{i'}' 'ni' 'nu' 'n{u"}')
)
define mark_nUn as (
check_vowel_harmony
among ('{i'}n' 'in' 'un' '{u"}n')
(mark_suffix_with_optional_n_consonant)
)
define mark_yA as (
check_vowel_harmony
among('a' 'e')
(mark_suffix_with_optional_y_consonant)
)
define mark_nA as (
check_vowel_harmony
among('na' 'ne')
)
define mark_DA as (
check_vowel_harmony
among('da' 'de' 'ta' 'te')
)
define mark_ndA as (
check_vowel_harmony
among('nda' 'nde')
)
define mark_DAn as (
check_vowel_harmony
among('dan' 'den' 'tan' 'ten')
)
define mark_ndAn as (
check_vowel_harmony
among('ndan' 'nden')
)
define mark_ylA as (
check_vowel_harmony
among('la' 'le')
(mark_suffix_with_optional_y_consonant)
)
define mark_ki as (
'ki'
)
define mark_ncA as (
check_vowel_harmony
among('ca' 'ce')
(mark_suffix_with_optional_n_consonant)
)
define mark_yUm as (
check_vowel_harmony
among ('{i'}m' 'im' 'um' '{u"}m')
(mark_suffix_with_optional_y_consonant)
)
define mark_sUn as (
check_vowel_harmony
among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
)
define mark_yUz as (
check_vowel_harmony
among ('{i'}z' 'iz' 'uz' '{u"}z')
(mark_suffix_with_optional_y_consonant)
)
define mark_sUnUz as (
among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
)
define mark_lAr as (
check_vowel_harmony
among ('ler' 'lar')
)
define mark_nUz as (
check_vowel_harmony
among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
)
define mark_DUr as (
check_vowel_harmony
among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
)
define mark_cAsInA as (
among ('cas{i'}na' 'cesine')
)
define mark_yDU as (
check_vowel_harmony
among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
(mark_suffix_with_optional_y_consonant)
)
// does not fully obey vowel harmony
define mark_ysA as (
among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
(mark_suffix_with_optional_y_consonant)
)
define mark_ymUs_ as (
check_vowel_harmony
among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
(mark_suffix_with_optional_y_consonant)
)
define mark_yken as (
'ken' (mark_suffix_with_optional_y_consonant)
)
define stem_nominal_verb_suffixes as (
[
set continue_stemming_noun_suffixes
(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
or
(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
or
(
mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
unset continue_stemming_noun_suffixes
)
or
(mark_nUz (mark_yDU or mark_ysA))
or
((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
or
(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
]delete
)
// stems noun suffix chains ending with -ki
define stem_suffix_chain_before_ki as (
[
mark_ki
(
(mark_DA] delete try([
(mark_lAr] delete try(stem_suffix_chain_before_ki))
or
(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
))
or
(mark_nUn] delete try([
(mark_lArI] delete)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
))
or
(mark_ndA (
(mark_lArI] delete)
or
((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
or
(stem_suffix_chain_before_ki)
))
)
)
define stem_noun_suffixes as (
([mark_lAr] delete try(stem_suffix_chain_before_ki))
or
([mark_ncA] delete
try(
([mark_lArI] delete)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
([mark_lAr] delete stem_suffix_chain_before_ki)
)
)
or
([(mark_ndA or mark_nA)
(
(mark_lArI] delete)
or
(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
)
)
or
([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
or
( [mark_DAn] delete try ([
(
(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(mark_lAr] delete try(stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
))
)
or
([mark_nUn or mark_ylA] delete
try(
([mark_lAr] delete stem_suffix_chain_before_ki)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
stem_suffix_chain_before_ki
)
)
or
([mark_lArI] delete)
or
(stem_suffix_chain_before_ki)
or
([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
)
define post_process_last_consonants as (
[substring] among (
'b' (<- 'p')
'c' (<- '{c.}')
'd' (<- 't')
'{g~}' (<- 'k')
)
)
// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
// like in 'kedim' -> 'ked'
// Turkish words don't usually end with 'd' or 'g'
// some very well known words are ignored (like 'ad' 'soyad'
// appends U to stems ending with d or g, decides which vowel to add
// based on the last vowel in the stem
define append_U_to_stems_ending_with_d_or_g as (
test('d' or 'g')
(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
or
(test((goto vowel) 'e' or 'i') <+ 'i')
or
(test((goto vowel) 'o' or 'u') <+ 'u')
or
(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
)
)
// Tests if there are more than one syllables
// In Turkish each vowel indicates a distinct syllable
define more_than_one_syllable_word as (
test (atleast 2 (gopast vowel))
)
define is_reserved_word as (
test(gopast 'ad' ($strlen = 2) ($strlen == limit))
or
test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
)
define postlude as (
not(is_reserved_word)
backwards (
do append_U_to_stems_ending_with_d_or_g
do post_process_last_consonants
)
)
define stem as (
(more_than_one_syllable_word)
(
backwards (
do stem_nominal_verb_suffixes
continue_stemming_noun_suffixes
do stem_noun_suffixes
)
postlude
)
)
libstemmer_c/compiler/analyser.c
deleted
100644 → 0
View file @
1c6e8c67
#include <stdio.h>
/* main etc */
#include <stdlib.h>
/* exit */
#include <string.h>
/* memmove */
#include "header.h"
/* recursive usage: */
static
void
read_program_
(
struct
analyser
*
a
,
int
terminator
);
static
struct
node
*
read_C
(
struct
analyser
*
a
);
static
struct
node
*
C_style
(
struct
analyser
*
a
,
char
*
s
,
int
token
);
static
void
fault
(
int
n
)
{
fprintf
(
stderr
,
"fault %d
\n
"
,
n
);
exit
(
1
);
}
static
void
print_node_
(
struct
node
*
p
,
int
n
,
char
*
s
)
{
int
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
printf
(
i
==
n
-
1
?
s
:
" "
);
printf
(
"%s "
,
name_of_token
(
p
->
type
));
unless
(
p
->
name
==
0
)
report_b
(
stdout
,
p
->
name
->
b
);
unless
(
p
->
literalstring
==
0
)
{
printf
(
"'"
);
report_b
(
stdout
,
p
->
literalstring
);
printf
(
"'"
);
}
printf
(
"
\n
"
);
unless
(
p
->
AE
==
0
)
print_node_
(
p
->
AE
,
n
+
1
,
"# "
);
unless
(
p
->
left
==
0
)
print_node_
(
p
->
left
,
n
+
1
,
" "
);
unless
(
p
->
right
==
0
)
print_node_
(
p
->
right
,
n
,
" "
);
if
(
p
->
aux
!=
0
)
print_node_
(
p
->
aux
,
n
+
1
,
"@ "
);
}
extern
void
print_program
(
struct
analyser
*
a
)
{
print_node_
(
a
->
program
,
0
,
" "
);
}
static
struct
node
*
new_node
(
struct
analyser
*
a
,
int
type
)
{
NEW
(
node
,
p
);
p
->
next
=
a
->
nodes
;
a
->
nodes
=
p
;
p
->
left
=
0
;
p
->
right
=
0
;
p
->
aux
=
0
;
p
->
AE
=
0
;
p
->
name
=
0
;
p
->
literalstring
=
0
;
p
->
mode
=
a
->
mode
;
p
->
line_number
=
a
->
tokeniser
->
line_number
;
p
->
type
=
type
;
return
p
;
}
static
char
*
name_of_mode
(
int
n
)
{
switch
(
n
)
{
default:
fault
(
0
);
case
m_backward
:
return
"string backward"
;
case
m_forward
:
return
"string forward"
;
/* case m_integer: return "integer"; */
}
}
static
char
*
name_of_type
(
int
n
)
{
switch
(
n
)
{
default:
fault
(
1
);
case
's'
:
return
"string"
;
case
'i'
:
return
"integer"
;
case
'r'
:
return
"routine"
;
case
'R'
:
return
"routine or grouping"
;
case
'g'
:
return
"grouping"
;
}
}
static
void
count_error
(
struct
analyser
*
a
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
if
(
t
->
error_count
>=
20
)
{
fprintf
(
stderr
,
"... etc
\n
"
);
exit
(
1
);
}
t
->
error_count
++
;
}
static
void
error2
(
struct
analyser
*
a
,
int
n
,
int
x
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
count_error
(
a
);
fprintf
(
stderr
,
"Line %d"
,
t
->
line_number
);
if
(
t
->
get_depth
>
0
)
fprintf
(
stderr
,
" (of included file)"
);
fprintf
(
stderr
,
": "
);
if
(
n
>=
30
)
report_b
(
stderr
,
t
->
b
);
switch
(
n
)
{
case
0
:
fprintf
(
stderr
,
"%s omitted"
,
name_of_token
(
t
->
omission
));
break
;
case
3
:
fprintf
(
stderr
,
"in among(...), "
);
case
1
:
fprintf
(
stderr
,
"unexpected %s"
,
name_of_token
(
t
->
token
));
if
(
t
->
token
==
c_number
)
fprintf
(
stderr
,
" %d"
,
t
->
number
);
if
(
t
->
token
==
c_name
)
{
fprintf
(
stderr
,
" "
);
report_b
(
stderr
,
t
->
b
);
}
break
;
case
2
:
fprintf
(
stderr
,
"string omitted"
);
break
;
case
14
:
fprintf
(
stderr
,
"unresolved substring on line %d"
,
x
);
break
;
case
15
:
fprintf
(
stderr
,
"%s not allowed inside reverse(...)"
,
name_of_token
(
t
->
token
));
break
;
case
16
:
fprintf
(
stderr
,
"empty grouping"
);
break
;
case
17
:
fprintf
(
stderr
,
"backwards used when already in this mode"
);
break
;
case
18
:
fprintf
(
stderr
,
"empty among(...)"
);
break
;
case
19
:
fprintf
(
stderr
,
"two adjacent bracketed expressions in among(...)"
);
break
;
case
20
:
fprintf
(
stderr
,
"substring preceded by another substring on line %d"
,
x
);
break
;
case
30
:
fprintf
(
stderr
,
" re-declared"
);
break
;
case
31
:
fprintf
(
stderr
,
" undeclared"
);
break
;
case
32
:
fprintf
(
stderr
,
" declared as %s mode; used as %s mode"
,
name_of_mode
(
a
->
mode
),
name_of_mode
(
x
));
break
;
case
33
:
fprintf
(
stderr
,
" not of type %s"
,
name_of_type
(
x
));
break
;
case
34
:
fprintf
(
stderr
,
" not of type string or integer"
);
break
;
case
35
:
fprintf
(
stderr
,
" misplaced"
);
break
;
case
36
:
fprintf
(
stderr
,
" redefined"
);
break
;
case
37
:
fprintf
(
stderr
,
" mis-used as %s mode"
,
name_of_mode
(
x
));
break
;
default:
fprintf
(
stderr
,
" error %d"
,
n
);
break
;
}
if
(
n
<=
13
&&
t
->
previous_token
>
0
)
fprintf
(
stderr
,
" after %s"
,
name_of_token
(
t
->
previous_token
));
fprintf
(
stderr
,
"
\n
"
);
}
static
void
error
(
struct
analyser
*
a
,
int
n
)
{
error2
(
a
,
n
,
0
);
}
static
void
error3
(
struct
analyser
*
a
,
struct
node
*
p
,
symbol
*
b
)
{
count_error
(
a
);
fprintf
(
stderr
,
"among(...) on line %d has repeated string '"
,
p
->
line_number
);
report_b
(
stderr
,
b
);
fprintf
(
stderr
,
"'
\n
"
);
}
static
void
error4
(
struct
analyser
*
a
,
struct
name
*
q
)
{
count_error
(
a
);
report_b
(
stderr
,
q
->
b
);
fprintf
(
stderr
,
" undefined
\n
"
);
}
static
void
omission_error
(
struct
analyser
*
a
,
int
n
)
{
a
->
tokeniser
->
omission
=
n
;
error
(
a
,
0
);
}
static
int
check_token
(
struct
analyser
*
a
,
int
code
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
if
(
t
->
token
!=
code
)
{
omission_error
(
a
,
code
);
return
false
;
}
return
true
;
}
static
int
get_token
(
struct
analyser
*
a
,
int
code
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
read_token
(
t
);
{
int
x
=
check_token
(
a
,
code
);
unless
(
x
)
t
->
token_held
=
true
;
return
x
;
}
}
static
struct
name
*
look_for_name
(
struct
analyser
*
a
)
{
struct
name
*
p
=
a
->
names
;
symbol
*
q
=
a
->
tokeniser
->
b
;
repeat
{
if
(
p
==
0
)
return
0
;
{
symbol
*
b
=
p
->
b
;
int
n
=
SIZE
(
b
);
if
(
n
==
SIZE
(
q
)
&&
memcmp
(
q
,
b
,
n
*
sizeof
(
symbol
))
==
0
)
{
p
->
referenced
=
true
;
return
p
;
}
}
p
=
p
->
next
;
}
}
static
struct
name
*
find_name
(
struct
analyser
*
a
)
{
struct
name
*
p
=
look_for_name
(
a
);
if
(
p
==
0
)
error
(
a
,
31
);
return
p
;
}
static
void
check_routine_mode
(
struct
analyser
*
a
,
struct
name
*
p
,
int
mode
)
{
if
(
p
->
mode
<
0
)
p
->
mode
=
mode
;
else
unless
(
p
->
mode
==
mode
)
error2
(
a
,
37
,
mode
);
}
static
void
check_name_type
(
struct
analyser
*
a
,
struct
name
*
p
,
int
type
)
{
switch
(
type
)
{
case
's'
:
if
(
p
->
type
==
t_string
)
return
;
break
;
case
'i'
:
if
(
p
->
type
==
t_integer
)
return
;
break
;
case
'b'
:
if
(
p
->
type
==
t_boolean
)
return
;
break
;
case
'R'
:
if
(
p
->
type
==
t_grouping
)
return
;
case
'r'
:
if
(
p
->
type
==
t_routine
||
p
->
type
==
t_external
)
return
;
break
;
case
'g'
:
if
(
p
->
type
==
t_grouping
)
return
;
break
;
}
error2
(
a
,
33
,
type
);
}
static
void
read_names
(
struct
analyser
*
a
,
int
type
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
unless
(
get_token
(
a
,
c_bra
))
return
;
repeat
{
if
(
read_token
(
t
)
!=
c_name
)
break
;
if
(
look_for_name
(
a
)
!=
0
)
error
(
a
,
30
);
else
{
NEW
(
name
,
p
);
p
->
b
=
copy_b
(
t
->
b
);
p
->
type
=
type
;
p
->
mode
=
-
1
;
/* routines, externals */
p
->
count
=
a
->
name_count
[
type
];
p
->
referenced
=
false
;
p
->
used
=
false
;
p
->
grouping
=
0
;
p
->
definition
=
0
;
a
->
name_count
[
type
]
++
;
p
->
next
=
a
->
names
;
a
->
names
=
p
;
}
}
unless
(
check_token
(
a
,
c_ket
))
t
->
token_held
=
true
;
}
static
symbol
*
new_literalstring
(
struct
analyser
*
a
)
{
NEW
(
literalstring
,
p
);
p
->
b
=
copy_b
(
a
->
tokeniser
->
b
);
p
->
next
=
a
->
literalstrings
;
a
->
literalstrings
=
p
;
return
p
->
b
;
}
static
int
read_AE_test
(
struct
analyser
*
a
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
switch
(
read_token
(
t
))
{
case
c_assign
:
return
c_mathassign
;
case
c_plusassign
:
case
c_minusassign
:
case
c_multiplyassign
:
case
c_divideassign
:
case
c_eq
:
case
c_ne
:
case
c_gr
:
case
c_ge
:
case
c_ls
:
case
c_le
:
return
t
->
token
;
default:
error
(
a
,
1
);
t
->
token_held
=
true
;
return
c_eq
;
}
}
static
int
binding
(
int
t
)
{
switch
(
t
)
{
case
c_plus
:
case
c_minus
:
return
1
;
case
c_multiply
:
case
c_divide
:
return
2
;
default:
return
-
2
;
}
}
static
void
name_to_node
(
struct
analyser
*
a
,
struct
node
*
p
,
int
type
)
{
struct
name
*
q
=
find_name
(
a
);
unless
(
q
==
0
)
{
check_name_type
(
a
,
q
,
type
);
q
->
used
=
true
;
}
p
->
name
=
q
;
}
static
struct
node
*
read_AE
(
struct
analyser
*
a
,
int
B
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
struct
node
*
p
;
struct
node
*
q
;
switch
(
read_token
(
t
))
{
case
c_minus
:
/* monadic */
p
=
new_node
(
a
,
c_neg
);
p
->
right
=
read_AE
(
a
,
100
);
break
;
case
c_bra
:
p
=
read_AE
(
a
,
0
);
get_token
(
a
,
c_ket
);
break
;
case
c_name
:
p
=
new_node
(
a
,
c_name
);
name_to_node
(
a
,
p
,
'i'
);
break
;
case
c_maxint
:
case
c_minint
:
case
c_cursor
:
case
c_limit
:
case
c_size
:
p
=
new_node
(
a
,
t
->
token
);
break
;
case
c_number
:
p
=
new_node
(
a
,
c_number
);
p
->
number
=
t
->
number
;
break
;
case
c_sizeof
:
p
=
C_style
(
a
,
"s"
,
c_sizeof
);
break
;
default:
error
(
a
,
1
);
t
->
token_held
=
true
;
return
0
;
}
repeat
{
int
token
=
read_token
(
t
);
int
b
=
binding
(
token
);
unless
(
binding
(
token
)
>
B
)
{
t
->
token_held
=
true
;
return
p
;
}
q
=
new_node
(
a
,
token
);
q
->
left
=
p
;
q
->
right
=
read_AE
(
a
,
b
);
p
=
q
;
}
}
static
struct
node
*
read_C_connection
(
struct
analyser
*
a
,
struct
node
*
q
,
int
op
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
struct
node
*
p
=
new_node
(
a
,
op
);
struct
node
*
p_end
=
q
;
p
->
left
=
q
;
repeat
{
q
=
read_C
(
a
);
p_end
->
right
=
q
;
p_end
=
q
;
if
(
read_token
(
t
)
!=
op
)
{
t
->
token_held
=
true
;
break
;
}
}
return
p
;
}
static
struct
node
*
read_C_list
(
struct
analyser
*
a
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
struct
node
*
p
=
new_node
(
a
,
c_bra
);
struct
node
*
p_end
=
0
;
repeat
{
int
token
=
read_token
(
t
);
if
(
token
==
c_ket
)
return
p
;
if
(
token
<
0
)
{
omission_error
(
a
,
c_ket
);
return
p
;
}
t
->
token_held
=
true
;
{
struct
node
*
q
=
read_C
(
a
);
repeat
{
token
=
read_token
(
t
);
if
(
token
!=
c_and
&&
token
!=
c_or
)
{
t
->
token_held
=
true
;
break
;
}
q
=
read_C_connection
(
a
,
q
,
token
);
}
if
(
p_end
==
0
)
p
->
left
=
q
;
else
p_end
->
right
=
q
;
p_end
=
q
;
}
}
}
static
struct
node
*
C_style
(
struct
analyser
*
a
,
char
*
s
,
int
token
)
{
int
i
;
struct
node
*
p
=
new_node
(
a
,
token
);
for
(
i
=
0
;
s
[
i
]
!=
0
;
i
++
)
switch
(
s
[
i
])
{
case
'C'
:
p
->
left
=
read_C
(
a
);
continue
;
case
'D'
:
p
->
aux
=
read_C
(
a
);
continue
;
case
'A'
:
p
->
AE
=
read_AE
(
a
,
0
);
continue
;
case
'f'
:
get_token
(
a
,
c_for
);
continue
;
case
'S'
:
{
int
str_token
=
read_token
(
a
->
tokeniser
);
if
(
str_token
==
c_name
)
name_to_node
(
a
,
p
,
's'
);
else
if
(
str_token
==
c_literalstring
)
p
->
literalstring
=
new_literalstring
(
a
);
else
error
(
a
,
2
);
}
continue
;
case
'b'
:
case
's'
:
case
'i'
:
if
(
get_token
(
a
,
c_name
))
name_to_node
(
a
,
p
,
s
[
i
]);
continue
;
}
return
p
;
}
static
struct
node
*
read_literalstring
(
struct
analyser
*
a
)
{
struct
node
*
p
=
new_node
(
a
,
c_literalstring
);
p
->
literalstring
=
new_literalstring
(
a
);
return
p
;
}
static
void
reverse_b
(
symbol
*
b
)
{
int
i
=
0
;
int
j
=
SIZE
(
b
)
-
1
;
until
(
i
>=
j
)
{
int
ch1
=
b
[
i
];
int
ch2
=
b
[
j
];
b
[
i
++
]
=
ch2
;
b
[
j
--
]
=
ch1
;
}
}
static
int
compare_amongvec
(
const
void
*
pv
,
const
void
*
qv
)
{
const
struct
amongvec
*
p
=
(
const
struct
amongvec
*
)
pv
;
const
struct
amongvec
*
q
=
(
const
struct
amongvec
*
)
qv
;
symbol
*
b_p
=
p
->
b
;
int
p_size
=
p
->
size
;
symbol
*
b_q
=
q
->
b
;
int
q_size
=
q
->
size
;
int
smaller_size
=
p_size
<
q_size
?
p_size
:
q_size
;
int
i
;
for
(
i
=
0
;
i
<
smaller_size
;
i
++
)
if
(
b_p
[
i
]
!=
b_q
[
i
])
return
b_p
[
i
]
-
b_q
[
i
];
return
p_size
-
q_size
;
}
static
void
make_among
(
struct
analyser
*
a
,
struct
node
*
p
,
struct
node
*
substring
)
{
NEW
(
among
,
x
);
NEWVEC
(
amongvec
,
v
,
p
->
number
);
struct
node
*
q
=
p
->
left
;
struct
amongvec
*
w0
=
v
;
struct
amongvec
*
w1
=
v
;
int
result
=
1
;
int
direction
=
substring
!=
0
?
substring
->
mode
:
p
->
mode
;
int
backward
=
direction
==
m_backward
;
if
(
a
->
amongs
==
0
)
a
->
amongs
=
x
;
else
a
->
amongs_end
->
next
=
x
;
a
->
amongs_end
=
x
;
x
->
next
=
0
;
x
->
b
=
v
;
x
->
number
=
a
->
among_count
++
;
x
->
starter
=
0
;
if
(
q
->
type
==
c_bra
)
{
x
->
starter
=
q
;
q
=
q
->
right
;
}
until
(
q
==
0
)
{
if
(
q
->
type
==
c_literalstring
)
{
symbol
*
b
=
q
->
literalstring
;
w1
->
b
=
b
;
/* pointer to case string */
w1
->
p
=
0
;
/* pointer to corresponding case expression */
w1
->
size
=
SIZE
(
b
);
/* number of characters in string */
w1
->
i
=
-
1
;
/* index of longest substring */
w1
->
result
=
-
1
;
/* number of corresponding case expression */
w1
->
function
=
q
->
left
==
0
?
0
:
q
->
left
->
name
;
unless
(
w1
->
function
==
0
)
check_routine_mode
(
a
,
w1
->
function
,
direction
);
w1
++
;
}
else
if
(
q
->
left
==
0
)
/* empty command: () */
w0
=
w1
;
else
{
until
(
w0
==
w1
)
{
w0
->
p
=
q
;
w0
->
result
=
result
;
w0
++
;
}
result
++
;
}
q
=
q
->
right
;
}
unless
(
w1
-
v
==
p
->
number
)
{
fprintf
(
stderr
,
"oh! %d %d
\n
"
,
(
int
)(
w1
-
v
),
p
->
number
);
exit
(
1
);
}
if
(
backward
)
for
(
w0
=
v
;
w0
<
w1
;
w0
++
)
reverse_b
(
w0
->
b
);
qsort
(
v
,
w1
-
v
,
sizeof
(
struct
amongvec
),
compare_amongvec
);
/* the following loop is O(n squared) */
for
(
w0
=
w1
-
1
;
w0
>=
v
;
w0
--
)
{
symbol
*
b
=
w0
->
b
;
int
size
=
w0
->
size
;
struct
amongvec
*
w
;
for
(
w
=
w0
-
1
;
w
>=
v
;
w
--
)
{
if
(
w
->
size
<
size
&&
memcmp
(
w
->
b
,
b
,
w
->
size
*
sizeof
(
symbol
))
==
0
)
{
w0
->
i
=
w
-
v
;
/* fill in index of longest substring */
break
;
}
}
}
if
(
backward
)
for
(
w0
=
v
;
w0
<
w1
;
w0
++
)
reverse_b
(
w0
->
b
);
for
(
w0
=
v
;
w0
<
w1
-
1
;
w0
++
)
if
(
w0
->
size
==
(
w0
+
1
)
->
size
&&
memcmp
(
w0
->
b
,
(
w0
+
1
)
->
b
,
w0
->
size
*
sizeof
(
symbol
))
==
0
)
error3
(
a
,
p
,
w0
->
b
);
x
->
literalstring_count
=
p
->
number
;
x
->
command_count
=
result
-
1
;
p
->
among
=
x
;
x
->
substring
=
substring
;
if
(
substring
!=
0
)
substring
->
among
=
x
;
unless
(
x
->
command_count
==
0
&&
x
->
starter
==
0
)
a
->
amongvar_needed
=
true
;
}
static
struct
node
*
read_among
(
struct
analyser
*
a
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
struct
node
*
p
=
new_node
(
a
,
c_among
);
struct
node
*
p_end
=
0
;
int
previous_token
=
-
1
;
struct
node
*
substring
=
a
->
substring
;
a
->
substring
=
0
;
p
->
number
=
0
;
/* counts the number of literals */
unless
(
get_token
(
a
,
c_bra
))
return
p
;
repeat
{
struct
node
*
q
;
int
token
=
read_token
(
t
);
switch
(
token
)
{
case
c_literalstring
:
q
=
read_literalstring
(
a
);
if
(
read_token
(
t
)
==
c_name
)
{
struct
node
*
r
=
new_node
(
a
,
c_name
);
name_to_node
(
a
,
r
,
'r'
);
q
->
left
=
r
;
}
else
t
->
token_held
=
true
;
p
->
number
++
;
break
;
case
c_bra
:
if
(
previous_token
==
c_bra
)
error
(
a
,
19
);
q
=
read_C_list
(
a
);
break
;
default:
error
(
a
,
3
);
case
c_ket
:
if
(
p
->
number
==
0
)
error
(
a
,
18
);
if
(
t
->
error_count
==
0
)
make_among
(
a
,
p
,
substring
);
return
p
;
}
previous_token
=
token
;
if
(
p_end
==
0
)
p
->
left
=
q
;
else
p_end
->
right
=
q
;
p_end
=
q
;
}
}
static
struct
node
*
read_substring
(
struct
analyser
*
a
)
{
struct
node
*
p
=
new_node
(
a
,
c_substring
);
if
(
a
->
substring
!=
0
)
error2
(
a
,
20
,
a
->
substring
->
line_number
);
a
->
substring
=
p
;
return
p
;
}
static
void
check_modifyable
(
struct
analyser
*
a
)
{
unless
(
a
->
modifyable
)
error
(
a
,
15
);
}
static
struct
node
*
read_C
(
struct
analyser
*
a
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
int
token
=
read_token
(
t
);
switch
(
token
)
{
case
c_bra
:
return
read_C_list
(
a
);
case
c_backwards
:
{
int
mode
=
a
->
mode
;
if
(
a
->
mode
==
m_backward
)
error
(
a
,
17
);
else
a
->
mode
=
m_backward
;
{
struct
node
*
p
=
C_style
(
a
,
"C"
,
token
);
a
->
mode
=
mode
;
return
p
;
}
}
case
c_reverse
:
{
int
mode
=
a
->
mode
;
int
modifyable
=
a
->
modifyable
;
a
->
modifyable
=
false
;
a
->
mode
=
mode
==
m_forward
?
m_backward
:
m_forward
;
{
struct
node
*
p
=
C_style
(
a
,
"C"
,
token
);
a
->
mode
=
mode
;
a
->
modifyable
=
modifyable
;
return
p
;
}
}
case
c_not
:
case
c_try
:
case
c_fail
:
case
c_test
:
case
c_do
:
case
c_goto
:
case
c_gopast
:
case
c_repeat
:
return
C_style
(
a
,
"C"
,
token
);
case
c_loop
:
case
c_atleast
:
return
C_style
(
a
,
"AC"
,
token
);
case
c_setmark
:
return
C_style
(
a
,
"i"
,
token
);
case
c_tomark
:
case
c_atmark
:
case
c_hop
:
return
C_style
(
a
,
"A"
,
token
);
case
c_delete
:
check_modifyable
(
a
);
case
c_next
:
case
c_tolimit
:
case
c_atlimit
:
case
c_leftslice
:
case
c_rightslice
:
case
c_true
:
case
c_false
:
case
c_debug
:
return
C_style
(
a
,
""
,
token
);
case
c_assignto
:
case
c_sliceto
:
check_modifyable
(
a
);
return
C_style
(
a
,
"s"
,
token
);
case
c_assign
:
case
c_insert
:
case
c_attach
:
case
c_slicefrom
:
check_modifyable
(
a
);
return
C_style
(
a
,
"S"
,
token
);
case
c_setlimit
:
return
C_style
(
a
,
"CfD"
,
token
);
case
c_set
:
case
c_unset
:
return
C_style
(
a
,
"b"
,
token
);
case
c_dollar
:
get_token
(
a
,
c_name
);
{
struct
node
*
p
;
struct
name
*
q
=
find_name
(
a
);
int
mode
=
a
->
mode
;
int
modifyable
=
a
->
modifyable
;
switch
(
q
?
q
->
type
:
t_string
)
/* above line was: switch (q->type) - bug #1 fix 7/2/2003 */
{
default:
error
(
a
,
34
);
case
t_string
:
a
->
mode
=
m_forward
;
a
->
modifyable
=
true
;
p
=
new_node
(
a
,
c_dollar
);
p
->
left
=
read_C
(
a
);
break
;
case
t_integer
:
/* a->mode = m_integer; */
p
=
new_node
(
a
,
read_AE_test
(
a
));
p
->
AE
=
read_AE
(
a
,
0
);
break
;
}
p
->
name
=
q
;
a
->
mode
=
mode
;
a
->
modifyable
=
modifyable
;
return
p
;
}
case
c_name
:
{
struct
name
*
q
=
find_name
(
a
);
struct
node
*
p
=
new_node
(
a
,
c_name
);
unless
(
q
==
0
)
{
q
->
used
=
true
;
switch
(
q
->
type
)
{
case
t_boolean
:
p
->
type
=
c_booltest
;
break
;
case
t_integer
:
error
(
a
,
35
);
/* integer name misplaced */
case
t_string
:
break
;
case
t_routine
:
case
t_external
:
p
->
type
=
c_call
;
check_routine_mode
(
a
,
q
,
a
->
mode
);
break
;
case
t_grouping
:
p
->
type
=
c_grouping
;
break
;
}
}
p
->
name
=
q
;
return
p
;
}
case
c_non
:
{
struct
node
*
p
=
new_node
(
a
,
token
);
read_token
(
t
);
if
(
t
->
token
==
c_minus
)
read_token
(
t
);
unless
(
check_token
(
a
,
c_name
))
{
omission_error
(
a
,
c_name
);
return
p
;
}
name_to_node
(
a
,
p
,
'g'
);
return
p
;
}
case
c_literalstring
:
return
read_literalstring
(
a
);
case
c_among
:
return
read_among
(
a
);
case
c_substring
:
return
read_substring
(
a
);
default:
error
(
a
,
1
);
return
0
;
}
}
static
int
next_symbol
(
symbol
*
p
,
symbol
*
W
,
int
utf8
)
{
if
(
utf8
)
{
int
ch
;
int
j
=
get_utf8
(
p
,
&
ch
);
W
[
0
]
=
ch
;
return
j
;
}
else
{
W
[
0
]
=
p
[
0
];
return
1
;
}
}
static
symbol
*
alter_grouping
(
symbol
*
p
,
symbol
*
q
,
int
style
,
int
utf8
)
{
int
j
=
0
;
symbol
W
[
1
];
int
width
;
if
(
style
==
c_plus
)
{
while
(
j
<
SIZE
(
q
))
{
width
=
next_symbol
(
q
+
j
,
W
,
utf8
);
p
=
add_to_b
(
p
,
1
,
W
);
j
+=
width
;
}
}
else
{
while
(
j
<
SIZE
(
q
))
{
int
i
;
width
=
next_symbol
(
q
+
j
,
W
,
utf8
);
for
(
i
=
0
;
i
<
SIZE
(
p
);
i
++
)
{
if
(
p
[
i
]
==
W
[
0
])
{
memmove
(
p
+
i
,
p
+
i
+
1
,
(
SIZE
(
p
)
-
i
-
1
)
*
sizeof
(
symbol
));
SIZE
(
p
)
--
;
}
}
j
+=
width
;
}
}
return
p
;
}
static
void
read_define_grouping
(
struct
analyser
*
a
,
struct
name
*
q
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
int
style
=
c_plus
;
{
NEW
(
grouping
,
p
);
if
(
a
->
groupings
==
0
)
a
->
groupings
=
p
;
else
a
->
groupings_end
->
next
=
p
;
a
->
groupings_end
=
p
;
q
->
grouping
=
p
;
p
->
next
=
0
;
p
->
name
=
q
;
p
->
number
=
q
->
count
;
p
->
b
=
create_b
(
0
);
repeat
{
switch
(
read_token
(
t
))
{
case
c_name
:
{
struct
name
*
r
=
find_name
(
a
);
unless
(
r
==
0
)
{
check_name_type
(
a
,
r
,
'g'
);
p
->
b
=
alter_grouping
(
p
->
b
,
r
->
grouping
->
b
,
style
,
false
);
}
}
break
;
case
c_literalstring
:
p
->
b
=
alter_grouping
(
p
->
b
,
t
->
b
,
style
,
a
->
utf8
);
break
;
default:
error
(
a
,
1
);
return
;
}
switch
(
read_token
(
t
))
{
case
c_plus
:
case
c_minus
:
style
=
t
->
token
;
break
;
default:
goto
label0
;
}
}
label0:
{
int
i
;
int
max
=
0
;
int
min
=
1
<<
16
;
for
(
i
=
0
;
i
<
SIZE
(
p
->
b
);
i
++
)
{
if
(
p
->
b
[
i
]
>
max
)
max
=
p
->
b
[
i
];
if
(
p
->
b
[
i
]
<
min
)
min
=
p
->
b
[
i
];
}
p
->
largest_ch
=
max
;
p
->
smallest_ch
=
min
;
if
(
min
==
1
<<
16
)
error
(
a
,
16
);
}
t
->
token_held
=
true
;
return
;
}
}
static
void
read_define_routine
(
struct
analyser
*
a
,
struct
name
*
q
)
{
struct
node
*
p
=
new_node
(
a
,
c_define
);
a
->
amongvar_needed
=
false
;
unless
(
q
==
0
)
{
check_name_type
(
a
,
q
,
'R'
);
if
(
q
->
definition
!=
0
)
error
(
a
,
36
);
if
(
q
->
mode
<
0
)
q
->
mode
=
a
->
mode
;
else
if
(
q
->
mode
!=
a
->
mode
)
error2
(
a
,
32
,
q
->
mode
);
}
p
->
name
=
q
;
if
(
a
->
program
==
0
)
a
->
program
=
p
;
else
a
->
program_end
->
right
=
p
;
a
->
program_end
=
p
;
get_token
(
a
,
c_as
);
p
->
left
=
read_C
(
a
);
unless
(
q
==
0
)
q
->
definition
=
p
->
left
;
if
(
a
->
substring
!=
0
)
{
error2
(
a
,
14
,
a
->
substring
->
line_number
);
a
->
substring
=
0
;
}
p
->
amongvar_needed
=
a
->
amongvar_needed
;
}
static
void
read_define
(
struct
analyser
*
a
)
{
unless
(
get_token
(
a
,
c_name
))
return
;
{
struct
name
*
q
=
find_name
(
a
);
if
(
q
!=
0
&&
q
->
type
==
t_grouping
)
read_define_grouping
(
a
,
q
);
else
read_define_routine
(
a
,
q
);
}
}
static
void
read_backwardmode
(
struct
analyser
*
a
)
{
int
mode
=
a
->
mode
;
a
->
mode
=
m_backward
;
if
(
get_token
(
a
,
c_bra
))
{
read_program_
(
a
,
c_ket
);
check_token
(
a
,
c_ket
);
}
a
->
mode
=
mode
;
}
static
void
read_program_
(
struct
analyser
*
a
,
int
terminator
)
{
struct
tokeniser
*
t
=
a
->
tokeniser
;
repeat
{
switch
(
read_token
(
t
))
{
case
c_strings
:
read_names
(
a
,
t_string
);
break
;
case
c_booleans
:
read_names
(
a
,
t_boolean
);
break
;
case
c_integers
:
read_names
(
a
,
t_integer
);
break
;
case
c_routines
:
read_names
(
a
,
t_routine
);
break
;
case
c_externals
:
read_names
(
a
,
t_external
);
break
;
case
c_groupings
:
read_names
(
a
,
t_grouping
);
break
;
case
c_define
:
read_define
(
a
);
break
;
case
c_backwardmode
:
read_backwardmode
(
a
);
break
;
case
c_ket
:
if
(
terminator
==
c_ket
)
return
;
default:
error
(
a
,
1
);
break
;
case
-
1
:
unless
(
terminator
<
0
)
omission_error
(
a
,
c_ket
);
return
;
}
}
}
extern
void
read_program
(
struct
analyser
*
a
)
{
read_program_
(
a
,
-
1
);
{
struct
name
*
q
=
a
->
names
;
until
(
q
==
0
)
{
switch
(
q
->
type
)
{
case
t_external
:
case
t_routine
:
if
(
q
->
used
&&
q
->
definition
==
0
)
error4
(
a
,
q
);
break
;
case
t_grouping
:
if
(
q
->
used
&&
q
->
grouping
==
0
)
error4
(
a
,
q
);
break
;
}
q
=
q
->
next
;
}
}
if
(
a
->
tokeniser
->
error_count
==
0
)
{
struct
name
*
q
=
a
->
names
;
int
warned
=
false
;
until
(
q
==
0
)
{
unless
(
q
->
referenced
)
{
unless
(
warned
)
{
fprintf
(
stderr
,
"Declared but not used:"
);
warned
=
true
;
}
fprintf
(
stderr
,
" "
);
report_b
(
stderr
,
q
->
b
);
}
q
=
q
->
next
;
}
if
(
warned
)
fprintf
(
stderr
,
"
\n
"
);
q
=
a
->
names
;
warned
=
false
;
until
(
q
==
0
)
{
if
(
!
q
->
used
&&
(
q
->
type
==
t_routine
||
q
->
type
==
t_grouping
))
{
unless
(
warned
)
{
fprintf
(
stderr
,
"Declared and defined but not used:"
);
warned
=
true
;
}
fprintf
(
stderr
,
" "
);
report_b
(
stderr
,
q
->
b
);
}
q
=
q
->
next
;
}
if
(
warned
)
fprintf
(
stderr
,
"
\n
"
);
}
}
extern
struct
analyser
*
create_analyser
(
struct
tokeniser
*
t
)
{
NEW
(
analyser
,
a
);
a
->
tokeniser
=
t
;
a
->
nodes
=
0
;
a
->
names
=
0
;
a
->
literalstrings
=
0
;
a
->
program
=
0
;
a
->
amongs
=
0
;
a
->
among_count
=
0
;
a
->
groupings
=
0
;
a
->
mode
=
m_forward
;
a
->
modifyable
=
true
;
{
int
i
;
for
(
i
=
0
;
i
<
t_size
;
i
++
)
a
->
name_count
[
i
]
=
0
;
}
a
->
substring
=
0
;
return
a
;
}
extern
void
close_analyser
(
struct
analyser
*
a
)
{
{
struct
node
*
q
=
a
->
nodes
;
until
(
q
==
0
)
{
struct
node
*
q_next
=
q
->
next
;
FREE
(
q
);
q
=
q_next
;
}
}
{
struct
name
*
q
=
a
->
names
;
until
(
q
==
0
)
{
struct
name
*
q_next
=
q
->
next
;
lose_b
(
q
->
b
);
FREE
(
q
);
q
=
q_next
;
}
}
{
struct
literalstring
*
q
=
a
->
literalstrings
;
until
(
q
==
0
)
{
struct
literalstring
*
q_next
=
q
->
next
;
lose_b
(
q
->
b
);
FREE
(
q
);
q
=
q_next
;
}
}
{
struct
among
*
q
=
a
->
amongs
;
until
(
q
==
0
)
{
struct
among
*
q_next
=
q
->
next
;
FREE
(
q
->
b
);
FREE
(
q
);
q
=
q_next
;
}
}
{
struct
grouping
*
q
=
a
->
groupings
;
until
(
q
==
0
)
{
struct
grouping
*
q_next
=
q
->
next
;
lose_b
(
q
->
b
);
FREE
(
q
);
q
=
q_next
;
}
}
FREE
(
a
);
}
libstemmer_c/compiler/driver.c
deleted
100644 → 0
View file @
1c6e8c67
#include <stdio.h>
/* for main etc */
#include <stdlib.h>
/* for free etc */
#include <string.h>
/* for strlen */
#include "header.h"
#define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
#define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
#define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
#define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
static
int
eq
(
char
*
s1
,
char
*
s2
)
{
int
s1_len
=
strlen
(
s1
);
int
s2_len
=
strlen
(
s2
);
return
s1_len
==
s2_len
&&
memcmp
(
s1
,
s2
,
s1_len
)
==
0
;
}
static
void
print_arglist
(
void
)
{
fprintf
(
stderr
,
"Usage: snowball <file> [options]
\n\n
"
"options are: [-o[utput] file]
\n
"
" [-s[yntax]]
\n
"
#ifndef DISABLE_JAVA
" [-j[ava]]
\n
"
#endif
" [-c++]
\n
"
" [-w[idechars]]
\n
"
" [-u[tf8]]
\n
"
" [-n[ame] class name]
\n
"
" [-ep[refix] string]
\n
"
" [-vp[refix] string]
\n
"
" [-i[nclude] directory]
\n
"
" [-r[untime] path to runtime headers]
\n
"
#ifndef DISABLE_JAVA
" [-p[arentclassname] fully qualified parent class name]
\n
"
" [-P[ackage] package name for stemmers]
\n
"
" [-S[tringclass] StringBuffer-compatible class]
\n
"
" [-a[mongclass] fully qualified name of the Among class]
\n
"
#endif
);
exit
(
1
);
}
static
void
check_lim
(
int
i
,
int
argc
)
{
if
(
i
>=
argc
)
{
fprintf
(
stderr
,
"argument list is one short
\n
"
);
print_arglist
();
}
}
static
FILE
*
get_output
(
symbol
*
b
)
{
char
*
s
=
b_to_s
(
b
);
FILE
*
output
=
fopen
(
s
,
"w"
);
if
(
output
==
0
)
{
fprintf
(
stderr
,
"Can't open output %s
\n
"
,
s
);
exit
(
1
);
}
free
(
s
);
return
output
;
}
static
void
read_options
(
struct
options
*
o
,
int
argc
,
char
*
argv
[])
{
char
*
s
;
int
i
=
2
;
/* set defauts: */
o
->
output_file
=
0
;
o
->
syntax_tree
=
false
;
o
->
externals_prefix
=
""
;
o
->
variables_prefix
=
0
;
o
->
runtime_path
=
0
;
o
->
parent_class_name
=
DEFAULT_BASE_CLASS
;
o
->
string_class
=
DEFAULT_STRING_CLASS
;
o
->
among_class
=
DEFAULT_AMONG_CLASS
;
o
->
package
=
DEFAULT_PACKAGE
;
o
->
name
=
""
;
o
->
make_lang
=
LANG_C
;
o
->
widechars
=
false
;
o
->
includes
=
0
;
o
->
includes_end
=
0
;
o
->
utf8
=
false
;
/* read options: */
repeat
{
if
(
i
>=
argc
)
break
;
s
=
argv
[
i
++
];
{
if
(
eq
(
s
,
"-o"
)
||
eq
(
s
,
"-output"
))
{
check_lim
(
i
,
argc
);
o
->
output_file
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-n"
)
||
eq
(
s
,
"-name"
))
{
check_lim
(
i
,
argc
);
o
->
name
=
argv
[
i
++
];
continue
;
}
#ifndef DISABLE_JAVA
if
(
eq
(
s
,
"-j"
)
||
eq
(
s
,
"-java"
))
{
o
->
make_lang
=
LANG_JAVA
;
o
->
widechars
=
true
;
continue
;
}
#endif
if
(
eq
(
s
,
"-c++"
))
{
o
->
make_lang
=
LANG_CPLUSPLUS
;
continue
;
}
if
(
eq
(
s
,
"-w"
)
||
eq
(
s
,
"-widechars"
))
{
o
->
widechars
=
true
;
o
->
utf8
=
false
;
continue
;
}
if
(
eq
(
s
,
"-s"
)
||
eq
(
s
,
"-syntax"
))
{
o
->
syntax_tree
=
true
;
continue
;
}
if
(
eq
(
s
,
"-ep"
)
||
eq
(
s
,
"-eprefix"
))
{
check_lim
(
i
,
argc
);
o
->
externals_prefix
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-vp"
)
||
eq
(
s
,
"-vprefix"
))
{
check_lim
(
i
,
argc
);
o
->
variables_prefix
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-i"
)
||
eq
(
s
,
"-include"
))
{
check_lim
(
i
,
argc
);
{
NEW
(
include
,
p
);
symbol
*
b
=
add_s_to_b
(
0
,
argv
[
i
++
]);
b
=
add_s_to_b
(
b
,
"/"
);
p
->
next
=
0
;
p
->
b
=
b
;
if
(
o
->
includes
==
0
)
o
->
includes
=
p
;
else
o
->
includes_end
->
next
=
p
;
o
->
includes_end
=
p
;
}
continue
;
}
if
(
eq
(
s
,
"-r"
)
||
eq
(
s
,
"-runtime"
))
{
check_lim
(
i
,
argc
);
o
->
runtime_path
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-u"
)
||
eq
(
s
,
"-utf8"
))
{
o
->
utf8
=
true
;
o
->
widechars
=
false
;
continue
;
}
#ifndef DISABLE_JAVA
if
(
eq
(
s
,
"-p"
)
||
eq
(
s
,
"-parentclassname"
))
{
check_lim
(
i
,
argc
);
o
->
parent_class_name
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-P"
)
||
eq
(
s
,
"-Package"
))
{
check_lim
(
i
,
argc
);
o
->
package
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-S"
)
||
eq
(
s
,
"-stringclass"
))
{
check_lim
(
i
,
argc
);
o
->
string_class
=
argv
[
i
++
];
continue
;
}
if
(
eq
(
s
,
"-a"
)
||
eq
(
s
,
"-amongclass"
))
{
check_lim
(
i
,
argc
);
o
->
among_class
=
argv
[
i
++
];
continue
;
}
#endif
fprintf
(
stderr
,
"'%s' misplaced
\n
"
,
s
);
print_arglist
();
}
}
}
extern
int
main
(
int
argc
,
char
*
argv
[])
{
NEW
(
options
,
o
);
if
(
argc
==
1
)
print_arglist
();
read_options
(
o
,
argc
,
argv
);
{
symbol
*
filename
=
add_s_to_b
(
0
,
argv
[
1
]);
symbol
*
u
=
get_input
(
filename
);
if
(
u
==
0
)
{
fprintf
(
stderr
,
"Can't open input %s
\n
"
,
argv
[
1
]);
exit
(
1
);
}
{
struct
tokeniser
*
t
=
create_tokeniser
(
u
);
struct
analyser
*
a
=
create_analyser
(
t
);
t
->
widechars
=
o
->
widechars
;
t
->
includes
=
o
->
includes
;
a
->
utf8
=
t
->
utf8
=
o
->
utf8
;
read_program
(
a
);
if
(
t
->
error_count
>
0
)
exit
(
1
);
if
(
o
->
syntax_tree
)
print_program
(
a
);
close_tokeniser
(
t
);
unless
(
o
->
syntax_tree
)
{
struct
generator
*
g
;
char
*
s
=
o
->
output_file
;
unless
(
s
)
{
fprintf
(
stderr
,
"Please include the -o option
\n
"
);
print_arglist
();
exit
(
1
);
}
if
(
o
->
make_lang
==
LANG_C
||
o
->
make_lang
==
LANG_CPLUSPLUS
)
{
symbol
*
b
=
add_s_to_b
(
0
,
s
);
b
=
add_s_to_b
(
b
,
".h"
);
o
->
output_h
=
get_output
(
b
);
b
[
SIZE
(
b
)
-
1
]
=
'c'
;
if
(
o
->
make_lang
==
LANG_CPLUSPLUS
)
{
b
=
add_s_to_b
(
b
,
"c"
);
}
o
->
output_c
=
get_output
(
b
);
lose_b
(
b
);
g
=
create_generator_c
(
a
,
o
);
generate_program_c
(
g
);
close_generator_c
(
g
);
fclose
(
o
->
output_c
);
fclose
(
o
->
output_h
);
}
#ifndef DISABLE_JAVA
if
(
o
->
make_lang
==
LANG_JAVA
)
{
symbol
*
b
=
add_s_to_b
(
0
,
s
);
b
=
add_s_to_b
(
b
,
".java"
);
o
->
output_java
=
get_output
(
b
);
lose_b
(
b
);
g
=
create_generator_java
(
a
,
o
);
generate_program_java
(
g
);
close_generator_java
(
g
);
fclose
(
o
->
output_java
);
}
#endif
}
close_analyser
(
a
);
}
lose_b
(
u
);
lose_b
(
filename
);
}
{
struct
include
*
p
=
o
->
includes
;
until
(
p
==
0
)
{
struct
include
*
q
=
p
->
next
;
lose_b
(
p
->
b
);
FREE
(
p
);
p
=
q
;
}
}
FREE
(
o
);
unless
(
space_count
==
0
)
fprintf
(
stderr
,
"%d blocks unfreed
\n
"
,
space_count
);
return
0
;
}
libstemmer_c/compiler/generator.c
deleted
100644 → 0
View file @
1c6e8c67
#include <limits.h>
/* for INT_MAX */
#include <stdio.h>
/* for fprintf etc */
#include <stdlib.h>
/* for free etc */
#include <string.h>
/* for strlen */
#include "header.h"
/* Define this to get warning messages when optimisations can't be used. */
/* #define OPTIMISATION_WARNINGS */
/* recursive use: */
static
void
generate
(
struct
generator
*
g
,
struct
node
*
p
);
enum
special_labels
{
x_return
=
-
1
};
static
int
new_label
(
struct
generator
*
g
)
{
return
g
->
next_label
++
;
}
/* Output routines */
static
void
output_str
(
FILE
*
outfile
,
struct
str
*
str
)
{
char
*
s
=
b_to_s
(
str_data
(
str
));
fprintf
(
outfile
,
"%s"
,
s
);
free
(
s
);
}
static
void
wch
(
struct
generator
*
g
,
int
ch
)
{
str_append_ch
(
g
->
outbuf
,
ch
);
/* character */
}
static
void
wnl
(
struct
generator
*
g
)
{
str_append_ch
(
g
->
outbuf
,
'\n'
);
/* newline */
g
->
line_count
++
;
}
static
void
ws
(
struct
generator
*
g
,
const
char
*
s
)
{
str_append_string
(
g
->
outbuf
,
s
);
/* string */
}
static
void
wi
(
struct
generator
*
g
,
int
i
)
{
str_append_int
(
g
->
outbuf
,
i
);
/* integer */
}
static
void
wh_ch
(
struct
generator
*
g
,
int
i
)
{
str_append_ch
(
g
->
outbuf
,
"0123456789ABCDEF"
[
i
&
0xF
]);
/* hexchar */
}
static
void
wh
(
struct
generator
*
g
,
int
i
)
{
if
(
i
>>
4
)
wh
(
g
,
i
>>
4
);
wh_ch
(
g
,
i
);
/* hex integer */
}
static
void
wi3
(
struct
generator
*
g
,
int
i
)
{
if
(
i
<
100
)
wch
(
g
,
' '
);
if
(
i
<
10
)
wch
(
g
,
' '
);
wi
(
g
,
i
);
/* integer (width 3) */
}
static
void
wvn
(
struct
generator
*
g
,
struct
name
*
p
)
{
/* variable name */
int
ch
=
"SBIrxg"
[
p
->
type
];
switch
(
p
->
type
)
{
case
t_string
:
case
t_boolean
:
case
t_integer
:
wch
(
g
,
ch
);
wch
(
g
,
'['
);
wi
(
g
,
p
->
count
);
wch
(
g
,
']'
);
return
;
case
t_external
:
ws
(
g
,
g
->
options
->
externals_prefix
);
break
;
default:
wch
(
g
,
ch
);
wch
(
g
,
'_'
);
}
str_append_b
(
g
->
outbuf
,
p
->
b
);
}
static
void
wv
(
struct
generator
*
g
,
struct
name
*
p
)
{
/* reference to variable */
if
(
p
->
type
<
t_routine
)
ws
(
g
,
"z->"
);
wvn
(
g
,
p
);
}
static
void
wlitarray
(
struct
generator
*
g
,
symbol
*
p
)
{
/* write literal array */
ws
(
g
,
"{ "
);
{
int
i
;
for
(
i
=
0
;
i
<
SIZE
(
p
);
i
++
)
{
int
ch
=
p
[
i
];
if
(
32
<=
ch
&&
ch
<
127
)
{
wch
(
g
,
'\''
);
switch
(
ch
)
{
case
'\''
:
case
'\\'
:
wch
(
g
,
'\\'
);
default:
wch
(
g
,
ch
);
}
wch
(
g
,
'\''
);
}
else
{
wch
(
g
,
'0'
);
wch
(
g
,
'x'
);
wh
(
g
,
ch
);
}
if
(
i
<
SIZE
(
p
)
-
1
)
ws
(
g
,
", "
);
}
}
ws
(
g
,
" }"
);
}
static
void
wlitref
(
struct
generator
*
g
,
symbol
*
p
)
{
/* write ref to literal array */
if
(
SIZE
(
p
)
==
0
)
ws
(
g
,
"0"
);
else
{
struct
str
*
s
=
g
->
outbuf
;
g
->
outbuf
=
g
->
declarations
;
ws
(
g
,
"static const symbol s_"
);
wi
(
g
,
g
->
literalstring_count
);
ws
(
g
,
"[] = "
);
wlitarray
(
g
,
p
);
ws
(
g
,
";
\n
"
);
g
->
outbuf
=
s
;
ws
(
g
,
"s_"
);
wi
(
g
,
g
->
literalstring_count
);
g
->
literalstring_count
++
;
}
}
static
void
wm
(
struct
generator
*
g
)
{
/* margin */
int
i
;
for
(
i
=
0
;
i
<
g
->
margin
;
i
++
)
ws
(
g
,
" "
);
}
static
void
wc
(
struct
generator
*
g
,
struct
node
*
p
)
{
/* comment */
ws
(
g
,
" /* "
);
ws
(
g
,
(
char
*
)
name_of_token
(
p
->
type
));
unless
(
p
->
name
==
0
)
{
ws
(
g
,
" "
);
str_append_b
(
g
->
outbuf
,
p
->
name
->
b
);
}
ws
(
g
,
", line "
);
wi
(
g
,
p
->
line_number
);
ws
(
g
,
" */"
);
wnl
(
g
);
}
static
void
wms
(
struct
generator
*
g
,
const
char
*
s
)
{
wm
(
g
);
ws
(
g
,
s
);
}
/* margin + string */
static
void
wbs
(
struct
generator
*
g
)
{
/* block start */
wms
(
g
,
"{ "
);
g
->
margin
++
;
}
static
void
wbe
(
struct
generator
*
g
)
{
/* block end */
if
(
g
->
line_labelled
==
g
->
line_count
)
{
wms
(
g
,
";"
);
wnl
(
g
);
}
g
->
margin
--
;
wms
(
g
,
"}"
);
wnl
(
g
);
}
static
void
wk
(
struct
generator
*
g
,
struct
node
*
p
)
{
/* keep c */
++
g
->
keep_count
;
if
(
p
->
mode
==
m_forward
)
{
ws
(
g
,
"int c"
);
wi
(
g
,
g
->
keep_count
);
ws
(
g
,
" = z->c;"
);
}
else
{
ws
(
g
,
"int m"
);
wi
(
g
,
g
->
keep_count
);
ws
(
g
,
" = z->l - z->c; (void)m"
);
wi
(
g
,
g
->
keep_count
);
ws
(
g
,
";"
);
}
}
static
void
wrestore
(
struct
generator
*
g
,
struct
node
*
p
,
int
keep_token
)
{
/* restore c */
if
(
p
->
mode
==
m_forward
)
{
ws
(
g
,
"z->c = c"
);
}
else
{
ws
(
g
,
"z->c = z->l - m"
);
}
wi
(
g
,
keep_token
);
ws
(
g
,
";"
);
}
static
void
winc
(
struct
generator
*
g
,
struct
node
*
p
)
{
/* increment c */
ws
(
g
,
p
->
mode
==
m_forward
?
"z->c++;"
:
"z->c--;"
);
}
static
void
wsetl
(
struct
generator
*
g
,
int
n
)
{
g
->
margin
--
;
wms
(
g
,
"lab"
);
wi
(
g
,
n
);
wch
(
g
,
':'
);
wnl
(
g
);
g
->
line_labelled
=
g
->
line_count
;
g
->
margin
++
;
}
static
void
wgotol
(
struct
generator
*
g
,
int
n
)
{
wms
(
g
,
"goto lab"
);
wi
(
g
,
n
);
wch
(
g
,
';'
);
wnl
(
g
);
}
static
void
wf
(
struct
generator
*
g
)
{
/* fail */
if
(
g
->
failure_string
!=
0
)
{
ws
(
g
,
"{ "
);
ws
(
g
,
g
->
failure_string
);
wch
(
g
,
' '
);
}
switch
(
g
->
failure_label
)
{
case
x_return
:
ws
(
g
,
"return 0;"
);
break
;
default:
ws
(
g
,
"goto lab"
);
wi
(
g
,
g
->
failure_label
);
wch
(
g
,
';'
);
g
->
label_used
=
1
;
}
if
(
g
->
failure_string
!=
0
)
ws
(
g
,
" }"
);
}
static
void
wlim
(
struct
generator
*
g
,
struct
node
*
p
)
{
/* if at limit fail */
ws
(
g
,
p
->
mode
==
m_forward
?
"if (z->c >= z->l) "
:
"if (z->c <= z->lb) "
);
wf
(
g
);
}
static
void
wp
(
struct
generator
*
g
,
const
char
*
s
,
struct
node
*
p
)
{
/* formatted write */
int
i
=
0
;
int
l
=
strlen
(
s
);
until
(
i
>=
l
)
{
int
ch
=
s
[
i
++
];
if
(
ch
!=
'~'
)
wch
(
g
,
ch
);
else
switch
(
s
[
i
++
])
{
default:
wch
(
g
,
s
[
i
-
1
]);
continue
;
case
'C'
:
wc
(
g
,
p
);
continue
;
case
'k'
:
wk
(
g
,
p
);
continue
;
case
'K'
:
/* keep for c_test */
ws
(
g
,
p
->
mode
==
m_forward
?
"int c_test = z->c;"
:
"int m_test = z->l - z->c;"
);
continue
;
case
'R'
:
/* restore for c_test */
ws
(
g
,
p
->
mode
==
m_forward
?
"z->c = c_test;"
:
"z->c = z->l - m_test;"
);
continue
;
case
'i'
:
winc
(
g
,
p
);
continue
;
case
'l'
:
wlim
(
g
,
p
);
continue
;
case
'f'
:
wf
(
g
);
continue
;
case
'M'
:
wm
(
g
);
continue
;
case
'N'
:
wnl
(
g
);
continue
;
case
'{'
:
wbs
(
g
);
continue
;
case
'}'
:
wbe
(
g
);
continue
;
case
'S'
:
ws
(
g
,
g
->
S
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'I'
:
wi
(
g
,
g
->
I
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'J'
:
wi3
(
g
,
g
->
I
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'V'
:
wv
(
g
,
g
->
V
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'W'
:
wvn
(
g
,
g
->
V
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'L'
:
wlitref
(
g
,
g
->
L
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'A'
:
wlitarray
(
g
,
g
->
L
[
s
[
i
++
]
-
'0'
]);
continue
;
case
'+'
:
g
->
margin
++
;
continue
;
case
'-'
:
g
->
margin
--
;
continue
;
case
'$'
:
/* insert_s, insert_v etc */
wch
(
g
,
p
->
literalstring
==
0
?
'v'
:
's'
);
continue
;
case
'p'
:
ws
(
g
,
g
->
options
->
externals_prefix
);
continue
;
}
}
}
static
void
w
(
struct
generator
*
g
,
const
char
*
s
)
{
wp
(
g
,
s
,
0
);
}
static
void
generate_AE
(
struct
generator
*
g
,
struct
node
*
p
)
{
char
*
s
;
switch
(
p
->
type
)
{
case
c_name
:
wv
(
g
,
p
->
name
);
break
;
case
c_number
:
wi
(
g
,
p
->
number
);
break
;
case
c_maxint
:
ws
(
g
,
"MAXINT"
);
break
;
case
c_minint
:
ws
(
g
,
"MININT"
);
break
;
case
c_neg
:
wch
(
g
,
'-'
);
generate_AE
(
g
,
p
->
right
);
break
;
case
c_multiply
:
s
=
" * "
;
goto
label0
;
case
c_plus
:
s
=
" + "
;
goto
label0
;
case
c_minus
:
s
=
" - "
;
goto
label0
;
case
c_divide
:
s
=
" / "
;
label0:
wch
(
g
,
'('
);
generate_AE
(
g
,
p
->
left
);
ws
(
g
,
s
);
generate_AE
(
g
,
p
->
right
);
wch
(
g
,
')'
);
break
;
case
c_sizeof
:
g
->
V
[
0
]
=
p
->
name
;
w
(
g
,
"SIZE(~V0)"
);
break
;
case
c_cursor
:
w
(
g
,
"z->c"
);
break
;
case
c_limit
:
w
(
g
,
p
->
mode
==
m_forward
?
"z->l"
:
"z->lb"
);
break
;
case
c_size
:
w
(
g
,
"SIZE(z->p)"
);
break
;
}
}
/* K_needed() tests to see if we really need to keep c. Not true when the
the command does not touch the cursor. This and repeat_score() could be
elaborated almost indefinitely.
*/
static
int
K_needed
(
struct
generator
*
g
,
struct
node
*
p
)
{
until
(
p
==
0
)
{
switch
(
p
->
type
)
{
case
c_dollar
:
case
c_leftslice
:
case
c_rightslice
:
case
c_mathassign
:
case
c_plusassign
:
case
c_minusassign
:
case
c_multiplyassign
:
case
c_divideassign
:
case
c_eq
:
case
c_ne
:
case
c_gr
:
case
c_ge
:
case
c_ls
:
case
c_le
:
case
c_sliceto
:
case
c_true
:
case
c_false
:
case
c_debug
:
break
;
case
c_call
:
if
(
K_needed
(
g
,
p
->
name
->
definition
))
return
true
;
break
;
case
c_bra
:
if
(
K_needed
(
g
,
p
->
left
))
return
true
;
break
;
default:
return
true
;
}
p
=
p
->
right
;
}
return
false
;
}
static
int
repeat_score
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
score
=
0
;
until
(
p
==
0
)
{
switch
(
p
->
type
)
{
case
c_dollar
:
case
c_leftslice
:
case
c_rightslice
:
case
c_mathassign
:
case
c_plusassign
:
case
c_minusassign
:
case
c_multiplyassign
:
case
c_divideassign
:
case
c_eq
:
case
c_ne
:
case
c_gr
:
case
c_ge
:
case
c_ls
:
case
c_le
:
case
c_sliceto
:
/* case c_not: must not be included here! */
case
c_debug
:
break
;
case
c_call
:
score
+=
repeat_score
(
g
,
p
->
name
->
definition
);
break
;
case
c_bra
:
score
+=
repeat_score
(
g
,
p
->
left
);
break
;
case
c_name
:
case
c_literalstring
:
case
c_next
:
case
c_grouping
:
case
c_non
:
case
c_hop
:
score
=
score
+
1
;
break
;
default:
score
=
2
;
break
;
}
p
=
p
->
right
;
}
return
score
;
}
/* tests if an expression requires cursor reinstatement in a repeat */
static
int
repeat_restore
(
struct
generator
*
g
,
struct
node
*
p
)
{
return
repeat_score
(
g
,
p
)
>=
2
;
}
static
void
generate_bra
(
struct
generator
*
g
,
struct
node
*
p
)
{
p
=
p
->
left
;
until
(
p
==
0
)
{
generate
(
g
,
p
);
p
=
p
->
right
;
}
}
static
void
generate_and
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
0
;
if
(
K_needed
(
g
,
p
->
left
))
{
wp
(
g
,
"~{~k~C"
,
p
);
keep_c
=
g
->
keep_count
;
}
else
{
wp
(
g
,
"~M~C"
,
p
);
}
p
=
p
->
left
;
until
(
p
==
0
)
{
generate
(
g
,
p
);
if
(
keep_c
&&
p
->
right
!=
0
)
{
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N"
);
}
p
=
p
->
right
;
}
if
(
keep_c
)
w
(
g
,
"~}"
);
}
static
void
generate_or
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
0
;
int
used
=
g
->
label_used
;
int
a0
=
g
->
failure_label
;
const
char
*
a1
=
g
->
failure_string
;
int
out_lab
=
new_label
(
g
);
if
(
K_needed
(
g
,
p
->
left
))
{
wp
(
g
,
"~{~k~C"
,
p
);
keep_c
=
g
->
keep_count
;
}
else
{
wp
(
g
,
"~M~C"
,
p
);
}
p
=
p
->
left
;
g
->
failure_string
=
0
;
until
(
p
->
right
==
0
)
{
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
generate
(
g
,
p
);
wgotol
(
g
,
out_lab
);
if
(
g
->
label_used
)
wsetl
(
g
,
g
->
failure_label
);
if
(
keep_c
)
{
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N"
);
}
p
=
p
->
right
;
}
g
->
label_used
=
used
;
g
->
failure_label
=
a0
;
g
->
failure_string
=
a1
;
generate
(
g
,
p
);
if
(
keep_c
)
w
(
g
,
"~}"
);
wsetl
(
g
,
out_lab
);
}
static
void
generate_backwards
(
struct
generator
*
g
,
struct
node
*
p
)
{
wp
(
g
,
"~Mz->lb = z->c; z->c = z->l;~C~N"
,
p
);
generate
(
g
,
p
->
left
);
w
(
g
,
"~Mz->c = z->lb;~N"
);
}
static
void
generate_not
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
0
;
int
used
=
g
->
label_used
;
int
a0
=
g
->
failure_label
;
const
char
*
a1
=
g
->
failure_string
;
if
(
K_needed
(
g
,
p
->
left
))
{
wp
(
g
,
"~{~k~C"
,
p
);
keep_c
=
g
->
keep_count
;
}
else
{
wp
(
g
,
"~M~C"
,
p
);
}
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
g
->
failure_string
=
0
;
generate
(
g
,
p
->
left
);
{
int
l
=
g
->
failure_label
;
int
u
=
g
->
label_used
;
g
->
label_used
=
used
;
g
->
failure_label
=
a0
;
g
->
failure_string
=
a1
;
w
(
g
,
"~M~f~N"
);
if
(
u
)
wsetl
(
g
,
l
);
}
if
(
keep_c
)
{
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N~}"
);
}
}
static
void
generate_try
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
K_needed
(
g
,
p
->
left
);
if
(
keep_c
)
{
if
(
p
->
mode
==
m_forward
)
{
wp
(
g
,
"~{int c_keep = z->c;~C"
,
p
);
g
->
failure_string
=
"z->c = c_keep;"
;
}
else
{
wp
(
g
,
"~{int m_keep = z->l - z->c;/* (void) m_keep;*/~C"
,
p
);
g
->
failure_string
=
"z->c = z->l - m_keep;"
;
}
}
else
{
wp
(
g
,
"~M~C"
,
p
);
g
->
failure_string
=
0
;
}
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
generate
(
g
,
p
->
left
);
if
(
g
->
label_used
)
wsetl
(
g
,
g
->
failure_label
);
if
(
keep_c
)
w
(
g
,
"~}"
);
}
static
void
generate_set
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~M~V0 = 1;~C"
,
p
);
}
static
void
generate_unset
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~M~V0 = 0;~C"
,
p
);
}
static
void
generate_fail
(
struct
generator
*
g
,
struct
node
*
p
)
{
generate
(
g
,
p
->
left
);
wp
(
g
,
"~M~f~C"
,
p
);
}
/* generate_test() also implements 'reverse' */
static
void
generate_test
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
K_needed
(
g
,
p
->
left
);
if
(
keep_c
)
wp
(
g
,
"~{~K~C"
,
p
);
else
wp
(
g
,
"~M~C"
,
p
);
generate
(
g
,
p
->
left
);
if
(
keep_c
)
wp
(
g
,
"~M~R~N"
"~}"
,
p
);
}
static
void
generate_do
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
0
;
if
(
K_needed
(
g
,
p
->
left
))
{
wp
(
g
,
"~{~k~C"
,
p
);
keep_c
=
g
->
keep_count
;
}
else
{
wp
(
g
,
"~M~C"
,
p
);
}
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
g
->
failure_string
=
0
;
generate
(
g
,
p
->
left
);
if
(
g
->
label_used
)
wsetl
(
g
,
g
->
failure_label
);
if
(
keep_c
)
{
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N~}"
);
}
}
static
void
generate_next
(
struct
generator
*
g
,
struct
node
*
p
)
{
if
(
g
->
options
->
utf8
)
{
if
(
p
->
mode
==
m_forward
)
w
(
g
,
"~{int ret = skip_utf8(z->p, z->c, 0, z->l, 1"
);
else
w
(
g
,
"~{int ret = skip_utf8(z->p, z->c, z->lb, 0, -1"
);
wp
(
g
,
");~N"
"~Mif (ret < 0) ~f~N"
"~Mz->c = ret;~C"
"~}"
,
p
);
}
else
wp
(
g
,
"~M~l~N"
"~M~i~C"
,
p
);
}
static
void
generate_GO_grouping
(
struct
generator
*
g
,
struct
node
*
p
,
int
is_goto
,
int
complement
)
{
struct
grouping
*
q
=
p
->
name
->
grouping
;
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
S
[
1
]
=
complement
?
"in"
:
"out"
;
g
->
S
[
2
]
=
g
->
options
->
utf8
?
"_U"
:
""
;
g
->
V
[
0
]
=
p
->
name
;
g
->
I
[
0
]
=
q
->
smallest_ch
;
g
->
I
[
1
]
=
q
->
largest_ch
;
if
(
is_goto
)
{
wp
(
g
,
"~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1) < 0) ~f /* goto */~C"
,
p
);
}
else
{
wp
(
g
,
"~{ /* gopast */~C"
"~Mint ret = ~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1);~N"
"~Mif (ret < 0) ~f~N"
,
p
);
if
(
p
->
mode
==
m_forward
)
w
(
g
,
"~Mz->c += ret;~N"
);
else
w
(
g
,
"~Mz->c -= ret;~N"
);
w
(
g
,
"~}"
);
}
}
static
void
generate_GO
(
struct
generator
*
g
,
struct
node
*
p
,
int
style
)
{
int
keep_c
=
0
;
int
used
=
g
->
label_used
;
int
a0
=
g
->
failure_label
;
const
char
*
a1
=
g
->
failure_string
;
if
(
p
->
left
->
type
==
c_grouping
||
p
->
left
->
type
==
c_non
)
{
/* Special case for "goto" or "gopast" when used on a grouping or an
* inverted grouping - the movement of c by the matching action is
* exactly what we want! */
#ifdef OPTIMISATION_WARNINGS
printf
(
"Optimising %s %s
\n
"
,
style
?
"goto"
:
"gopast"
,
p
->
left
->
type
==
c_non
?
"non"
:
"grouping"
);
#endif
generate_GO_grouping
(
g
,
p
->
left
,
style
,
p
->
left
->
type
==
c_non
);
return
;
}
w
(
g
,
"~Mwhile(1) {"
);
wp
(
g
,
"~C~+"
,
p
);
if
(
style
==
1
||
repeat_restore
(
g
,
p
->
left
))
{
wp
(
g
,
"~M~k~N"
,
p
);
keep_c
=
g
->
keep_count
;
}
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
generate
(
g
,
p
->
left
);
if
(
style
==
1
)
{
/* include for goto; omit for gopast */
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N"
);
}
w
(
g
,
"~Mbreak;~N"
);
if
(
g
->
label_used
)
wsetl
(
g
,
g
->
failure_label
);
if
(
keep_c
)
{
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N"
);
}
g
->
label_used
=
used
;
g
->
failure_label
=
a0
;
g
->
failure_string
=
a1
;
/* wp(g, "~M~l~N"
"~M~i~N", p); */
generate_next
(
g
,
p
);
w
(
g
,
"~}"
);
}
static
void
generate_loop
(
struct
generator
*
g
,
struct
node
*
p
)
{
w
(
g
,
"~{int i; for (i = "
);
generate_AE
(
g
,
p
->
AE
);
wp
(
g
,
"; i > 0; i--)~C"
"~{"
,
p
);
generate
(
g
,
p
->
left
);
w
(
g
,
"~}"
"~}"
);
}
static
void
generate_repeat
(
struct
generator
*
g
,
struct
node
*
p
,
int
atleast_case
)
{
int
keep_c
=
0
;
wp
(
g
,
"~Mwhile(1) {~C~+"
,
p
);
if
(
repeat_restore
(
g
,
p
->
left
))
{
wp
(
g
,
"~M~k~N"
,
p
);
keep_c
=
g
->
keep_count
;
}
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
g
->
failure_string
=
0
;
generate
(
g
,
p
->
left
);
if
(
atleast_case
)
w
(
g
,
"~Mi--;~N"
);
w
(
g
,
"~Mcontinue;~N"
);
if
(
g
->
label_used
)
wsetl
(
g
,
g
->
failure_label
);
if
(
keep_c
)
{
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N"
);
}
w
(
g
,
"~Mbreak;~N"
"~}"
);
}
static
void
generate_atleast
(
struct
generator
*
g
,
struct
node
*
p
)
{
w
(
g
,
"~{int i = "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
";~N"
);
{
int
used
=
g
->
label_used
;
int
a0
=
g
->
failure_label
;
const
char
*
a1
=
g
->
failure_string
;
generate_repeat
(
g
,
p
,
true
);
g
->
label_used
=
used
;
g
->
failure_label
=
a0
;
g
->
failure_string
=
a1
;
}
w
(
g
,
"~Mif (i > 0) ~f~N"
"~}"
);
}
static
void
generate_setmark
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~M~V0 = z->c;~C"
,
p
);
}
static
void
generate_tomark
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
">"
:
"<"
;
w
(
g
,
"~Mif (z->c ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
") ~f~N"
);
w
(
g
,
"~Mz->c = "
);
generate_AE
(
g
,
p
->
AE
);
wp
(
g
,
";~C"
,
p
);
}
static
void
generate_atmark
(
struct
generator
*
g
,
struct
node
*
p
)
{
w
(
g
,
"~Mif (z->c != "
);
generate_AE
(
g
,
p
->
AE
);
wp
(
g
,
") ~f~C"
,
p
);
}
static
void
generate_hop
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"+"
:
"-"
;
g
->
S
[
1
]
=
p
->
mode
==
m_forward
?
"0"
:
"z->lb"
;
if
(
g
->
options
->
utf8
)
{
w
(
g
,
"~{int ret = skip_utf8(z->p, z->c, ~S1, z->l, ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
");~N"
);
w
(
g
,
"~Mif (ret < 0) ~f~N"
);
}
else
{
w
(
g
,
"~{int ret = z->c ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
";~N"
);
w
(
g
,
"~Mif (~S1 > ret || ret > z->l) ~f~N"
);
}
wp
(
g
,
"~Mz->c = ret;~C"
"~}"
,
p
);
}
static
void
generate_delete
(
struct
generator
*
g
,
struct
node
*
p
)
{
wp
(
g
,
"~{int ret = slice_del(z);~C"
,
p
);
wp
(
g
,
"~Mif (ret < 0) return ret;~N"
"~}"
,
p
);
}
static
void
generate_tolimit
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"b"
;
wp
(
g
,
"~Mz->c = z->l~S0;~C"
,
p
);
}
static
void
generate_atlimit
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"b"
;
g
->
S
[
1
]
=
p
->
mode
==
m_forward
?
"<"
:
">"
;
wp
(
g
,
"~Mif (z->c ~S1 z->l~S0) ~f~C"
,
p
);
}
static
void
generate_leftslice
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"bra"
:
"ket"
;
wp
(
g
,
"~Mz->~S0 = z->c;~C"
,
p
);
}
static
void
generate_rightslice
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"ket"
:
"bra"
;
wp
(
g
,
"~Mz->~S0 = z->c;~C"
,
p
);
}
static
void
generate_assignto
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~M~V0 = assign_to(z, ~V0);~C"
"~Mif (~V0 == 0) return -1;~C"
,
p
);
}
static
void
generate_sliceto
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~M~V0 = slice_to(z, ~V0);~C"
"~Mif (~V0 == 0) return -1;~C"
,
p
);
}
static
void
generate_data_address
(
struct
generator
*
g
,
struct
node
*
p
)
{
symbol
*
b
=
p
->
literalstring
;
if
(
b
!=
0
)
{
wi
(
g
,
SIZE
(
b
));
w
(
g
,
", "
);
wlitref
(
g
,
b
);
}
else
wv
(
g
,
p
->
name
);
}
static
void
generate_insert
(
struct
generator
*
g
,
struct
node
*
p
,
int
style
)
{
int
keep_c
=
style
==
c_attach
;
if
(
p
->
mode
==
m_backward
)
keep_c
=
!
keep_c
;
wp
(
g
,
"~{"
,
p
);
if
(
keep_c
)
w
(
g
,
"int c_keep = z->c;~N~M"
);
wp
(
g
,
"int ret = insert_~$(z, z->c, z->c, "
,
p
);
generate_data_address
(
g
,
p
);
wp
(
g
,
");~C"
,
p
);
if
(
keep_c
)
w
(
g
,
"~Mz->c = c_keep;~N"
);
wp
(
g
,
"~Mif (ret < 0) return ret;~N"
"~}"
,
p
);
}
static
void
generate_assignfrom
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
p
->
mode
==
m_forward
;
/* like 'attach' */
wp
(
g
,
"~{"
,
p
);
if
(
keep_c
)
wp
(
g
,
"int c_keep = z->c;~N"
"~Mret = insert_~$(z, z->c, z->l, "
,
p
);
else
wp
(
g
,
"ret = insert_~$(z, z->lb, z->c, "
,
p
);
generate_data_address
(
g
,
p
);
wp
(
g
,
");~C"
,
p
);
if
(
keep_c
)
w
(
g
,
"~Mz->c = c_keep;~N"
);
wp
(
g
,
"~Mif (ret < 0) return ret;~N"
"~}"
,
p
);
}
/* bugs marked <======= fixed 22/7/02. Similar fixes required for Java */
static
void
generate_slicefrom
(
struct
generator
*
g
,
struct
node
*
p
)
{
/* w(g, "~Mslice_from_s(z, "); <============= bug! should be: */
wp
(
g
,
"~{int ret = slice_from_~$(z, "
,
p
);
generate_data_address
(
g
,
p
);
wp
(
g
,
");~C"
,
p
);
wp
(
g
,
"~Mif (ret < 0) return ret;~N"
"~}"
,
p
);
}
static
void
generate_setlimit
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
;
wp
(
g
,
"~{int mlimit;~C"
"~M~k~N"
,
p
);
keep_c
=
g
->
keep_count
;
generate
(
g
,
p
->
left
);
if
(
p
->
mode
==
m_forward
)
w
(
g
,
"~Mmlimit = z->l - z->c; z->l = z->c;~N"
);
else
w
(
g
,
"~Mmlimit = z->lb; z->lb = z->c;~N"
);
w
(
g
,
"~M"
);
wrestore
(
g
,
p
,
keep_c
);
w
(
g
,
"~N"
);
g
->
failure_string
=
p
->
mode
==
m_forward
?
"z->l += mlimit;"
:
"z->lb = mlimit;"
;
generate
(
g
,
p
->
aux
);
wms
(
g
,
g
->
failure_string
);
w
(
g
,
"~N"
"~}"
);
}
static
void
generate_dollar
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
used
=
g
->
label_used
;
int
a0
=
g
->
failure_label
;
const
char
*
a1
=
g
->
failure_string
;
g
->
failure_label
=
new_label
(
g
);
g
->
label_used
=
0
;
g
->
failure_string
=
0
;
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~{struct SN_env env = * z;~C"
"~Mint failure = 1; /* assume failure */~N"
"~Mz->p = ~V0;~N"
"~Mz->lb = z->c = 0;~N"
"~Mz->l = SIZE(z->p);~N"
,
p
);
generate
(
g
,
p
->
left
);
w
(
g
,
"~Mfailure = 0; /* mark success */~N"
);
if
(
g
->
label_used
)
wsetl
(
g
,
g
->
failure_label
);
g
->
V
[
0
]
=
p
->
name
;
/* necessary */
g
->
label_used
=
used
;
g
->
failure_label
=
a0
;
g
->
failure_string
=
a1
;
w
(
g
,
"~M~V0 = z->p;~N"
"~M* z = env;~N"
"~Mif (failure) ~f~N~}"
);
}
static
void
generate_integer_assign
(
struct
generator
*
g
,
struct
node
*
p
,
char
*
s
)
{
g
->
V
[
0
]
=
p
->
name
;
g
->
S
[
0
]
=
s
;
w
(
g
,
"~M~V0 ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
";~N"
);
}
static
void
generate_integer_test
(
struct
generator
*
g
,
struct
node
*
p
,
char
*
s
)
{
g
->
V
[
0
]
=
p
->
name
;
g
->
S
[
0
]
=
s
;
w
(
g
,
"~Mif (!(~V0 ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
")) ~f~N"
);
}
static
void
generate_call
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~{int ret = ~V0(z);~N"
"~Mif (ret == 0) ~f~C"
"~Mif (ret < 0) return ret;~N~}"
,
p
);
}
static
void
generate_grouping
(
struct
generator
*
g
,
struct
node
*
p
,
int
complement
)
{
struct
grouping
*
q
=
p
->
name
->
grouping
;
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
S
[
1
]
=
complement
?
"out"
:
"in"
;
g
->
S
[
2
]
=
g
->
options
->
utf8
?
"_U"
:
""
;
g
->
V
[
0
]
=
p
->
name
;
g
->
I
[
0
]
=
q
->
smallest_ch
;
g
->
I
[
1
]
=
q
->
largest_ch
;
w
(
g
,
"~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 0)) ~f~N"
);
}
static
void
generate_namedstring
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~Mif (!(eq_v~S0(z, ~V0))) ~f~C"
,
p
);
}
static
void
generate_literalstring
(
struct
generator
*
g
,
struct
node
*
p
)
{
symbol
*
b
=
p
->
literalstring
;
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
I
[
0
]
=
SIZE
(
b
);
g
->
L
[
0
]
=
b
;
w
(
g
,
"~Mif (!(eq_s~S0(z, ~I0, ~L0))) ~f~N"
);
}
static
void
generate_define
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
name
*
q
=
p
->
name
;
g
->
next_label
=
0
;
g
->
S
[
0
]
=
q
->
type
==
t_routine
?
"static"
:
"extern"
;
g
->
V
[
0
]
=
q
;
w
(
g
,
"~N~S0 int ~V0(struct SN_env * z) {~N~+"
);
if
(
p
->
amongvar_needed
)
w
(
g
,
"~Mint among_var;~N"
);
g
->
failure_string
=
0
;
g
->
failure_label
=
x_return
;
g
->
label_used
=
0
;
g
->
keep_count
=
0
;
generate
(
g
,
p
->
left
);
w
(
g
,
"~Mreturn 1;~N~}"
);
}
static
void
generate_substring
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
among
*
x
=
p
->
among
;
int
block
=
-
1
;
unsigned
int
bitmap
=
0
;
struct
amongvec
*
among_cases
=
x
->
b
;
int
c
;
int
empty_case
=
-
1
;
int
n_cases
=
0
;
symbol
cases
[
2
];
int
shortest_size
=
INT_MAX
;
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
I
[
0
]
=
x
->
number
;
g
->
I
[
1
]
=
x
->
literalstring_count
;
/* In forward mode with non-ASCII UTF-8 characters, the first character
* of the string will often be the same, so instead look at the last
* common character position.
*
* In backward mode, we can't match if there are fewer characters before
* the current position than the minimum length.
*/
for
(
c
=
0
;
c
<
x
->
literalstring_count
;
++
c
)
{
int
size
=
among_cases
[
c
].
size
;
if
(
size
!=
0
&&
size
<
shortest_size
)
{
shortest_size
=
size
;
}
}
for
(
c
=
0
;
c
<
x
->
literalstring_count
;
++
c
)
{
symbol
ch
;
if
(
among_cases
[
c
].
size
==
0
)
{
empty_case
=
c
;
continue
;
}
if
(
p
->
mode
==
m_forward
)
{
ch
=
among_cases
[
c
].
b
[
shortest_size
-
1
];
}
else
{
ch
=
among_cases
[
c
].
b
[
among_cases
[
c
].
size
-
1
];
}
if
(
n_cases
==
0
)
{
block
=
ch
>>
5
;
}
else
if
(
ch
>>
5
!=
block
)
{
block
=
-
1
;
if
(
n_cases
>
2
)
break
;
}
if
(
block
==
-
1
)
{
if
(
ch
==
cases
[
0
])
continue
;
if
(
n_cases
<
2
)
{
cases
[
n_cases
++
]
=
ch
;
}
else
if
(
ch
!=
cases
[
1
])
{
++
n_cases
;
break
;
}
}
else
{
if
((
bitmap
&
(
1u
<<
(
ch
&
0x1f
)))
==
0
)
{
bitmap
|=
1u
<<
(
ch
&
0x1f
);
if
(
n_cases
<
2
)
cases
[
n_cases
]
=
ch
;
++
n_cases
;
}
}
}
if
(
block
!=
-
1
||
n_cases
<=
2
)
{
char
buf
[
64
];
g
->
I
[
2
]
=
block
;
g
->
I
[
3
]
=
bitmap
;
g
->
I
[
4
]
=
shortest_size
-
1
;
if
(
p
->
mode
==
m_forward
)
{
sprintf
(
buf
,
"z->p[z->c + %d]"
,
shortest_size
-
1
);
g
->
S
[
1
]
=
buf
;
if
(
shortest_size
==
1
)
{
wp
(
g
,
"~Mif (z->c >= z->l || "
,
p
);
}
else
{
wp
(
g
,
"~Mif (z->c + ~I4 >= z->l || "
,
p
);
}
}
else
{
g
->
S
[
1
]
=
"z->p[z->c - 1]"
;
if
(
shortest_size
==
1
)
{
wp
(
g
,
"~Mif (z->c <= z->lb || "
,
p
);
}
else
{
wp
(
g
,
"~Mif (z->c - ~I4 <= z->lb || "
,
p
);
}
}
if
(
n_cases
==
0
)
{
/* We get this for the degenerate case: among { '' }
* This doesn't seem to be a useful construct, but it is
* syntactically valid.
*/
wp
(
g
,
"0"
,
p
);
}
else
if
(
n_cases
==
1
)
{
g
->
I
[
4
]
=
cases
[
0
];
wp
(
g
,
"~S1 != ~I4"
,
p
);
}
else
if
(
n_cases
==
2
)
{
g
->
I
[
4
]
=
cases
[
0
];
g
->
I
[
5
]
=
cases
[
1
];
wp
(
g
,
"(~S1 != ~I4 && ~S1 != ~I5)"
,
p
);
}
else
{
wp
(
g
,
"~S1 >> 5 != ~I2 || !((~I3 >> (~S1 & 0x1f)) & 1)"
,
p
);
}
ws
(
g
,
") "
);
if
(
empty_case
!=
-
1
)
{
/* If the among includes the empty string, it can never fail
* so not matching the bitmap means we match the empty string.
*/
g
->
I
[
4
]
=
among_cases
[
empty_case
].
result
;
wp
(
g
,
"among_var = ~I4; else~N"
,
p
);
}
else
{
wp
(
g
,
"~f~N"
,
p
);
}
}
else
{
#ifdef OPTIMISATION_WARNINGS
printf
(
"Couldn't shortcut among %d
\n
"
,
x
->
number
);
#endif
}
if
(
x
->
command_count
==
0
&&
x
->
starter
==
0
)
wp
(
g
,
"~Mif (!(find_among~S0(z, a_~I0, ~I1))) ~f~C"
,
p
);
else
wp
(
g
,
"~Mamong_var = find_among~S0(z, a_~I0, ~I1);~C"
"~Mif (!(among_var)) ~f~N"
,
p
);
}
static
void
generate_among
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
among
*
x
=
p
->
among
;
int
case_number
=
1
;
if
(
x
->
substring
==
0
)
generate_substring
(
g
,
p
);
if
(
x
->
command_count
==
0
&&
x
->
starter
==
0
)
return
;
unless
(
x
->
starter
==
0
)
generate
(
g
,
x
->
starter
);
p
=
p
->
left
;
if
(
p
!=
0
&&
p
->
type
!=
c_literalstring
)
p
=
p
->
right
;
w
(
g
,
"~Mswitch(among_var) {~N~+"
"~Mcase 0: ~f~N"
);
until
(
p
==
0
)
{
if
(
p
->
type
==
c_bra
&&
p
->
left
!=
0
)
{
g
->
I
[
0
]
=
case_number
++
;
w
(
g
,
"~Mcase ~I0:~N~+"
);
generate
(
g
,
p
);
w
(
g
,
"~Mbreak;~N~-"
);
}
p
=
p
->
right
;
}
w
(
g
,
"~}"
);
}
static
void
generate_booltest
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
V
[
0
]
=
p
->
name
;
wp
(
g
,
"~Mif (!(~V0)) ~f~C"
,
p
);
}
static
void
generate_false
(
struct
generator
*
g
,
struct
node
*
p
)
{
wp
(
g
,
"~M~f~C"
,
p
);
}
static
void
generate_debug
(
struct
generator
*
g
,
struct
node
*
p
)
{
g
->
I
[
0
]
=
g
->
debug_count
++
;
g
->
I
[
1
]
=
p
->
line_number
;
wp
(
g
,
"~Mdebug(z, ~I0, ~I1);~C"
,
p
);
}
static
void
generate
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
used
=
g
->
label_used
;
int
a0
=
g
->
failure_label
;
const
char
*
a1
=
g
->
failure_string
;
switch
(
p
->
type
)
{
case
c_define
:
generate_define
(
g
,
p
);
break
;
case
c_bra
:
generate_bra
(
g
,
p
);
break
;
case
c_and
:
generate_and
(
g
,
p
);
break
;
case
c_or
:
generate_or
(
g
,
p
);
break
;
case
c_backwards
:
generate_backwards
(
g
,
p
);
break
;
case
c_not
:
generate_not
(
g
,
p
);
break
;
case
c_set
:
generate_set
(
g
,
p
);
break
;
case
c_unset
:
generate_unset
(
g
,
p
);
break
;
case
c_try
:
generate_try
(
g
,
p
);
break
;
case
c_fail
:
generate_fail
(
g
,
p
);
break
;
case
c_reverse
:
case
c_test
:
generate_test
(
g
,
p
);
break
;
case
c_do
:
generate_do
(
g
,
p
);
break
;
case
c_goto
:
generate_GO
(
g
,
p
,
1
);
break
;
case
c_gopast
:
generate_GO
(
g
,
p
,
0
);
break
;
case
c_repeat
:
generate_repeat
(
g
,
p
,
false
);
break
;
case
c_loop
:
generate_loop
(
g
,
p
);
break
;
case
c_atleast
:
generate_atleast
(
g
,
p
);
break
;
case
c_setmark
:
generate_setmark
(
g
,
p
);
break
;
case
c_tomark
:
generate_tomark
(
g
,
p
);
break
;
case
c_atmark
:
generate_atmark
(
g
,
p
);
break
;
case
c_hop
:
generate_hop
(
g
,
p
);
break
;
case
c_delete
:
generate_delete
(
g
,
p
);
break
;
case
c_next
:
generate_next
(
g
,
p
);
break
;
case
c_tolimit
:
generate_tolimit
(
g
,
p
);
break
;
case
c_atlimit
:
generate_atlimit
(
g
,
p
);
break
;
case
c_leftslice
:
generate_leftslice
(
g
,
p
);
break
;
case
c_rightslice
:
generate_rightslice
(
g
,
p
);
break
;
case
c_assignto
:
generate_assignto
(
g
,
p
);
break
;
case
c_sliceto
:
generate_sliceto
(
g
,
p
);
break
;
case
c_assign
:
generate_assignfrom
(
g
,
p
);
break
;
case
c_insert
:
case
c_attach
:
generate_insert
(
g
,
p
,
p
->
type
);
break
;
case
c_slicefrom
:
generate_slicefrom
(
g
,
p
);
break
;
case
c_setlimit
:
generate_setlimit
(
g
,
p
);
break
;
case
c_dollar
:
generate_dollar
(
g
,
p
);
break
;
case
c_mathassign
:
generate_integer_assign
(
g
,
p
,
"="
);
break
;
case
c_plusassign
:
generate_integer_assign
(
g
,
p
,
"+="
);
break
;
case
c_minusassign
:
generate_integer_assign
(
g
,
p
,
"-="
);
break
;
case
c_multiplyassign
:
generate_integer_assign
(
g
,
p
,
"*="
);
break
;
case
c_divideassign
:
generate_integer_assign
(
g
,
p
,
"/="
);
break
;
case
c_eq
:
generate_integer_test
(
g
,
p
,
"=="
);
break
;
case
c_ne
:
generate_integer_test
(
g
,
p
,
"!="
);
break
;
case
c_gr
:
generate_integer_test
(
g
,
p
,
">"
);
break
;
case
c_ge
:
generate_integer_test
(
g
,
p
,
">="
);
break
;
case
c_ls
:
generate_integer_test
(
g
,
p
,
"<"
);
break
;
case
c_le
:
generate_integer_test
(
g
,
p
,
"<="
);
break
;
case
c_call
:
generate_call
(
g
,
p
);
break
;
case
c_grouping
:
generate_grouping
(
g
,
p
,
false
);
break
;
case
c_non
:
generate_grouping
(
g
,
p
,
true
);
break
;
case
c_name
:
generate_namedstring
(
g
,
p
);
break
;
case
c_literalstring
:
generate_literalstring
(
g
,
p
);
break
;
case
c_among
:
generate_among
(
g
,
p
);
break
;
case
c_substring
:
generate_substring
(
g
,
p
);
break
;
case
c_booltest
:
generate_booltest
(
g
,
p
);
break
;
case
c_false
:
generate_false
(
g
,
p
);
break
;
case
c_true
:
break
;
case
c_debug
:
generate_debug
(
g
,
p
);
break
;
default:
fprintf
(
stderr
,
"%d encountered
\n
"
,
p
->
type
);
exit
(
1
);
}
if
(
g
->
failure_label
!=
a0
)
g
->
label_used
=
used
;
g
->
failure_label
=
a0
;
g
->
failure_string
=
a1
;
}
static
void
generate_start_comment
(
struct
generator
*
g
)
{
w
(
g
,
"~N/* This file was generated automatically by the Snowball to ANSI C compiler */~N"
);
}
static
void
generate_head
(
struct
generator
*
g
)
{
if
(
g
->
options
->
runtime_path
==
0
)
{
w
(
g
,
"~N#include
\"
header.h
\"
~N~N"
);
}
else
{
w
(
g
,
"~N#include
\"
"
);
ws
(
g
,
g
->
options
->
runtime_path
);
if
(
g
->
options
->
runtime_path
[
strlen
(
g
->
options
->
runtime_path
)
-
1
]
!=
'/'
)
wch
(
g
,
'/'
);
w
(
g
,
"header.h
\"
~N~N"
);
}
}
static
void
generate_routine_headers
(
struct
generator
*
g
)
{
struct
name
*
q
=
g
->
analyser
->
names
;
until
(
q
==
0
)
{
g
->
V
[
0
]
=
q
;
switch
(
q
->
type
)
{
case
t_routine
:
w
(
g
,
"static int ~W0(struct SN_env * z);~N"
);
break
;
case
t_external
:
w
(
g
,
"#ifdef __cplusplus~N"
"extern
\"
C
\"
{~N"
"#endif~N"
"extern int ~W0(struct SN_env * z);~N"
"#ifdef __cplusplus~N"
"}~N"
"#endif~N"
);
break
;
}
q
=
q
->
next
;
}
}
static
void
generate_among_table
(
struct
generator
*
g
,
struct
among
*
x
)
{
struct
amongvec
*
v
=
x
->
b
;
g
->
I
[
0
]
=
x
->
number
;
{
int
i
;
for
(
i
=
0
;
i
<
x
->
literalstring_count
;
i
++
)
{
g
->
I
[
1
]
=
i
;
g
->
I
[
2
]
=
v
->
size
;
g
->
L
[
0
]
=
v
->
b
;
unless
(
v
->
size
==
0
)
w
(
g
,
"static const symbol s_~I0_~I1[~I2] = ~A0;~N"
);
v
++
;
}
}
g
->
I
[
1
]
=
x
->
literalstring_count
;
w
(
g
,
"~N~Mstatic const struct among a_~I0[~I1] =~N{~N"
);
v
=
x
->
b
;
{
int
i
;
for
(
i
=
0
;
i
<
x
->
literalstring_count
;
i
++
)
{
g
->
I
[
1
]
=
i
;
g
->
I
[
2
]
=
v
->
size
;
g
->
I
[
3
]
=
v
->
i
;
g
->
I
[
4
]
=
v
->
result
;
g
->
S
[
0
]
=
i
<
x
->
literalstring_count
-
1
?
","
:
""
;
w
(
g
,
"/*~J1 */ { ~I2, "
);
if
(
v
->
size
==
0
)
w
(
g
,
"0,"
);
else
w
(
g
,
"s_~I0_~I1,"
);
w
(
g
,
" ~I3, ~I4, "
);
if
(
v
->
function
==
0
)
w
(
g
,
"0"
);
else
wvn
(
g
,
v
->
function
);
w
(
g
,
"}~S0~N"
);
v
++
;
}
}
w
(
g
,
"};~N~N"
);
}
static
void
generate_amongs
(
struct
generator
*
g
)
{
struct
among
*
x
=
g
->
analyser
->
amongs
;
until
(
x
==
0
)
{
generate_among_table
(
g
,
x
);
x
=
x
->
next
;
}
}
static
void
set_bit
(
symbol
*
b
,
int
i
)
{
b
[
i
/
8
]
|=
1
<<
i
%
8
;
}
static
void
generate_grouping_table
(
struct
generator
*
g
,
struct
grouping
*
q
)
{
int
range
=
q
->
largest_ch
-
q
->
smallest_ch
+
1
;
int
size
=
(
range
+
7
)
/
8
;
/* assume 8 bits per symbol */
symbol
*
b
=
q
->
b
;
symbol
*
map
=
create_b
(
size
);
int
i
;
for
(
i
=
0
;
i
<
size
;
i
++
)
map
[
i
]
=
0
;
for
(
i
=
0
;
i
<
SIZE
(
b
);
i
++
)
set_bit
(
map
,
b
[
i
]
-
q
->
smallest_ch
);
{
g
->
V
[
0
]
=
q
->
name
;
w
(
g
,
"static const unsigned char ~V0[] = { "
);
for
(
i
=
0
;
i
<
size
;
i
++
)
{
wi
(
g
,
map
[
i
]);
if
(
i
<
size
-
1
)
w
(
g
,
", "
);
}
w
(
g
,
" };~N~N"
);
}
lose_b
(
map
);
}
static
void
generate_groupings
(
struct
generator
*
g
)
{
struct
grouping
*
q
=
g
->
analyser
->
groupings
;
until
(
q
==
0
)
{
generate_grouping_table
(
g
,
q
);
q
=
q
->
next
;
}
}
static
void
generate_create
(
struct
generator
*
g
)
{
int
*
p
=
g
->
analyser
->
name_count
;
g
->
I
[
0
]
=
p
[
t_string
];
g
->
I
[
1
]
=
p
[
t_integer
];
g
->
I
[
2
]
=
p
[
t_boolean
];
w
(
g
,
"~N"
"extern struct SN_env * ~pcreate_env(void) { return SN_create_env(~I0, ~I1, ~I2); }"
"~N"
);
}
static
void
generate_close
(
struct
generator
*
g
)
{
int
*
p
=
g
->
analyser
->
name_count
;
g
->
I
[
0
]
=
p
[
t_string
];
w
(
g
,
"~Nextern void ~pclose_env(struct SN_env * z) { SN_close_env(z, ~I0); }~N~N"
);
}
static
void
generate_create_and_close_templates
(
struct
generator
*
g
)
{
w
(
g
,
"~N"
"extern struct SN_env * ~pcreate_env(void);~N"
"extern void ~pclose_env(struct SN_env * z);~N"
"~N"
);
}
static
void
generate_header_file
(
struct
generator
*
g
)
{
struct
name
*
q
=
g
->
analyser
->
names
;
char
*
vp
=
g
->
options
->
variables_prefix
;
g
->
S
[
0
]
=
vp
;
w
(
g
,
"~N"
"#ifdef __cplusplus~N"
"extern
\"
C
\"
{~N"
"#endif~N"
);
/* for C++ */
generate_create_and_close_templates
(
g
);
until
(
q
==
0
)
{
g
->
V
[
0
]
=
q
;
switch
(
q
->
type
)
{
case
t_external
:
w
(
g
,
"extern int ~W0(struct SN_env * z);~N"
);
break
;
case
t_string
:
g
->
S
[
1
]
=
"S"
;
goto
label0
;
case
t_integer
:
g
->
S
[
1
]
=
"I"
;
goto
label0
;
case
t_boolean
:
g
->
S
[
1
]
=
"B"
;
label0:
if
(
vp
)
{
g
->
I
[
0
]
=
q
->
count
;
w
(
g
,
"#define ~S0"
);
str_append_b
(
g
->
outbuf
,
q
->
b
);
w
(
g
,
" (~S1[~I0])~N"
);
}
break
;
}
q
=
q
->
next
;
}
w
(
g
,
"~N"
"#ifdef __cplusplus~N"
"}~N"
"#endif~N"
);
/* for C++ */
w
(
g
,
"~N"
);
}
extern
void
generate_program_c
(
struct
generator
*
g
)
{
g
->
outbuf
=
str_new
();
generate_start_comment
(
g
);
generate_head
(
g
);
generate_routine_headers
(
g
);
w
(
g
,
"#ifdef __cplusplus~N"
"extern
\"
C
\"
{~N"
"#endif~N"
"~N"
);
generate_create_and_close_templates
(
g
);
w
(
g
,
"~N"
"#ifdef __cplusplus~N"
"}~N"
"#endif~N"
);
generate_amongs
(
g
);
generate_groupings
(
g
);
g
->
declarations
=
g
->
outbuf
;
g
->
outbuf
=
str_new
();
g
->
literalstring_count
=
0
;
{
struct
node
*
p
=
g
->
analyser
->
program
;
until
(
p
==
0
)
{
generate
(
g
,
p
);
p
=
p
->
right
;
}
}
generate_create
(
g
);
generate_close
(
g
);
output_str
(
g
->
options
->
output_c
,
g
->
declarations
);
str_delete
(
g
->
declarations
);
output_str
(
g
->
options
->
output_c
,
g
->
outbuf
);
str_clear
(
g
->
outbuf
);
generate_start_comment
(
g
);
generate_header_file
(
g
);
output_str
(
g
->
options
->
output_h
,
g
->
outbuf
);
str_delete
(
g
->
outbuf
);
}
extern
struct
generator
*
create_generator_c
(
struct
analyser
*
a
,
struct
options
*
o
)
{
NEW
(
generator
,
g
);
g
->
analyser
=
a
;
g
->
options
=
o
;
g
->
margin
=
0
;
g
->
debug_count
=
0
;
g
->
line_count
=
0
;
return
g
;
}
extern
void
close_generator_c
(
struct
generator
*
g
)
{
FREE
(
g
);
}
libstemmer_c/compiler/generator_java.c
deleted
100644 → 0
View file @
1c6e8c67
#include <stdlib.h>
/* for exit */
#include <string.h>
/* for strlen */
#include <stdio.h>
/* for fprintf etc */
#include "header.h"
/* prototypes */
static
void
generate
(
struct
generator
*
g
,
struct
node
*
p
);
static
void
w
(
struct
generator
*
g
,
const
char
*
s
);
static
void
writef
(
struct
generator
*
g
,
const
char
*
s
,
struct
node
*
p
);
enum
special_labels
{
x_return
=
-
1
};
static
int
new_label
(
struct
generator
*
g
)
{
return
g
->
next_label
++
;
}
static
struct
str
*
vars_newname
(
struct
generator
*
g
)
{
struct
str
*
output
;
g
->
var_number
++
;
output
=
str_new
();
str_append_string
(
output
,
"v_"
);
str_append_int
(
output
,
g
->
var_number
);
return
output
;
}
/* Output routines */
static
void
output_str
(
FILE
*
outfile
,
struct
str
*
str
)
{
char
*
s
=
b_to_s
(
str_data
(
str
));
fprintf
(
outfile
,
"%s"
,
s
);
free
(
s
);
}
/* Write routines for simple entities */
static
void
write_char
(
struct
generator
*
g
,
int
ch
)
{
str_append_ch
(
g
->
outbuf
,
ch
);
}
static
void
write_newline
(
struct
generator
*
g
)
{
str_append_string
(
g
->
outbuf
,
"
\n
"
);
}
static
void
write_string
(
struct
generator
*
g
,
const
char
*
s
)
{
str_append_string
(
g
->
outbuf
,
s
);
}
static
void
write_b
(
struct
generator
*
g
,
symbol
*
b
)
{
str_append_b
(
g
->
outbuf
,
b
);
}
static
void
write_str
(
struct
generator
*
g
,
struct
str
*
str
)
{
str_append
(
g
->
outbuf
,
str
);
}
static
void
write_int
(
struct
generator
*
g
,
int
i
)
{
str_append_int
(
g
->
outbuf
,
i
);
}
/* Write routines for items from the syntax tree */
static
void
write_varname
(
struct
generator
*
g
,
struct
name
*
p
)
{
int
ch
=
"SBIrxg"
[
p
->
type
];
if
(
p
->
type
!=
t_external
)
{
write_char
(
g
,
ch
);
write_char
(
g
,
'_'
);
}
str_append_b
(
g
->
outbuf
,
p
->
b
);
}
static
void
write_varref
(
struct
generator
*
g
,
struct
name
*
p
)
{
/* In java, references look just the same */
write_varname
(
g
,
p
);
}
static
void
write_hexdigit
(
struct
generator
*
g
,
int
n
)
{
write_char
(
g
,
n
<
10
?
n
+
'0'
:
n
-
10
+
'A'
);
}
static
void
write_hex
(
struct
generator
*
g
,
int
ch
)
{
write_string
(
g
,
"
\\
u"
);
{
int
i
;
for
(
i
=
12
;
i
>=
0
;
i
-=
4
)
write_hexdigit
(
g
,
ch
>>
i
&
0xf
);
}
}
static
void
write_literal_string
(
struct
generator
*
g
,
symbol
*
p
)
{
int
i
;
write_string
(
g
,
"
\"
"
);
for
(
i
=
0
;
i
<
SIZE
(
p
);
i
++
)
{
int
ch
=
p
[
i
];
if
(
32
<=
ch
&&
ch
<=
127
)
{
if
(
ch
==
'\"'
||
ch
==
'\\'
)
write_string
(
g
,
"
\\
"
);
write_char
(
g
,
ch
);
}
else
{
write_hex
(
g
,
ch
);
}
}
write_string
(
g
,
"
\"
"
);
}
static
void
write_margin
(
struct
generator
*
g
)
{
int
i
;
for
(
i
=
0
;
i
<
g
->
margin
;
i
++
)
write_string
(
g
,
" "
);
}
/* Write a variable declaration. */
static
void
write_declare
(
struct
generator
*
g
,
char
*
declaration
,
struct
node
*
p
)
{
struct
str
*
temp
=
g
->
outbuf
;
g
->
outbuf
=
g
->
declarations
;
write_string
(
g
,
" "
);
writef
(
g
,
declaration
,
p
);
write_string
(
g
,
";"
);
write_newline
(
g
);
g
->
outbuf
=
temp
;
}
static
void
write_comment
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_margin
(
g
);
write_string
(
g
,
"// "
);
write_string
(
g
,
(
char
*
)
name_of_token
(
p
->
type
));
if
(
p
->
name
!=
0
)
{
write_string
(
g
,
" "
);
str_append_b
(
g
->
outbuf
,
p
->
name
->
b
);
}
write_string
(
g
,
", line "
);
write_int
(
g
,
p
->
line_number
);
write_newline
(
g
);
}
static
void
write_block_start
(
struct
generator
*
g
)
{
w
(
g
,
"~M{~+~N"
);
}
static
void
write_block_end
(
struct
generator
*
g
)
/* block end */
{
w
(
g
,
"~-~M}~N"
);
}
static
void
write_savecursor
(
struct
generator
*
g
,
struct
node
*
p
,
struct
str
*
savevar
)
{
g
->
B
[
0
]
=
str_data
(
savevar
);
g
->
S
[
1
]
=
""
;
if
(
p
->
mode
!=
m_forward
)
g
->
S
[
1
]
=
"limit - "
;
write_declare
(
g
,
"int ~B0"
,
p
);
writef
(
g
,
"~M~B0 = ~S1cursor;~N"
,
p
);
}
static
void
restore_string
(
struct
node
*
p
,
struct
str
*
out
,
struct
str
*
savevar
)
{
str_clear
(
out
);
str_append_string
(
out
,
"cursor = "
);
if
(
p
->
mode
!=
m_forward
)
str_append_string
(
out
,
"limit - "
);
str_append
(
out
,
savevar
);
str_append_string
(
out
,
";"
);
}
static
void
write_restorecursor
(
struct
generator
*
g
,
struct
node
*
p
,
struct
str
*
savevar
)
{
struct
str
*
temp
=
str_new
();
write_margin
(
g
);
restore_string
(
p
,
temp
,
savevar
);
write_str
(
g
,
temp
);
write_newline
(
g
);
str_delete
(
temp
);
}
static
void
write_inc_cursor
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_margin
(
g
);
write_string
(
g
,
p
->
mode
==
m_forward
?
"cursor++;"
:
"cursor--;"
);
write_newline
(
g
);
}
static
void
wsetlab_begin
(
struct
generator
*
g
,
int
n
)
{
w
(
g
,
"~Mlab"
);
write_int
(
g
,
n
);
w
(
g
,
": do {~+~N"
);
}
static
void
wsetlab_end
(
struct
generator
*
g
)
{
w
(
g
,
"~-~M} while (false);~N"
);
}
static
void
wgotol
(
struct
generator
*
g
,
int
n
)
{
write_margin
(
g
);
write_string
(
g
,
"break lab"
);
write_int
(
g
,
n
);
write_string
(
g
,
";"
);
write_newline
(
g
);
}
static
void
write_failure
(
struct
generator
*
g
)
{
if
(
str_len
(
g
->
failure_str
)
!=
0
)
{
write_margin
(
g
);
write_str
(
g
,
g
->
failure_str
);
write_newline
(
g
);
}
write_margin
(
g
);
switch
(
g
->
failure_label
)
{
case
x_return
:
write_string
(
g
,
"return false;"
);
break
;
default:
write_string
(
g
,
"break lab"
);
write_int
(
g
,
g
->
failure_label
);
write_string
(
g
,
";"
);
}
write_newline
(
g
);
g
->
unreachable
=
true
;
}
static
void
write_failure_if
(
struct
generator
*
g
,
char
*
s
,
struct
node
*
p
)
{
writef
(
g
,
"~Mif ("
,
p
);
writef
(
g
,
s
,
p
);
writef
(
g
,
")~N"
,
p
);
write_block_start
(
g
);
write_failure
(
g
);
write_block_end
(
g
);
g
->
unreachable
=
false
;
}
/* if at limit fail */
static
void
write_check_limit
(
struct
generator
*
g
,
struct
node
*
p
)
{
if
(
p
->
mode
==
m_forward
)
{
write_failure_if
(
g
,
"cursor >= limit"
,
p
);
}
else
{
write_failure_if
(
g
,
"cursor <= limit_backward"
,
p
);
}
}
/* Formatted write. */
static
void
writef
(
struct
generator
*
g
,
const
char
*
input
,
struct
node
*
p
)
{
int
i
=
0
;
int
l
=
strlen
(
input
);
while
(
i
<
l
)
{
int
ch
=
input
[
i
++
];
if
(
ch
==
'~'
)
{
switch
(
input
[
i
++
])
{
default:
write_char
(
g
,
input
[
i
-
1
]);
continue
;
case
'C'
:
write_comment
(
g
,
p
);
continue
;
case
'f'
:
write_block_start
(
g
);
write_failure
(
g
);
g
->
unreachable
=
false
;
write_block_end
(
g
);
continue
;
case
'M'
:
write_margin
(
g
);
continue
;
case
'N'
:
write_newline
(
g
);
continue
;
case
'{'
:
write_block_start
(
g
);
continue
;
case
'}'
:
write_block_end
(
g
);
continue
;
case
'S'
:
write_string
(
g
,
g
->
S
[
input
[
i
++
]
-
'0'
]);
continue
;
case
'B'
:
write_b
(
g
,
g
->
B
[
input
[
i
++
]
-
'0'
]);
continue
;
case
'I'
:
write_int
(
g
,
g
->
I
[
input
[
i
++
]
-
'0'
]);
continue
;
case
'V'
:
write_varref
(
g
,
g
->
V
[
input
[
i
++
]
-
'0'
]);
continue
;
case
'W'
:
write_varname
(
g
,
g
->
V
[
input
[
i
++
]
-
'0'
]);
continue
;
case
'L'
:
write_literal_string
(
g
,
g
->
L
[
input
[
i
++
]
-
'0'
]);
continue
;
case
'+'
:
g
->
margin
++
;
continue
;
case
'-'
:
g
->
margin
--
;
continue
;
case
'n'
:
write_string
(
g
,
g
->
options
->
name
);
continue
;
}
}
else
{
write_char
(
g
,
ch
);
}
}
}
static
void
w
(
struct
generator
*
g
,
const
char
*
s
)
{
writef
(
g
,
s
,
0
);
}
static
void
generate_AE
(
struct
generator
*
g
,
struct
node
*
p
)
{
char
*
s
;
switch
(
p
->
type
)
{
case
c_name
:
write_varref
(
g
,
p
->
name
);
break
;
case
c_number
:
write_int
(
g
,
p
->
number
);
break
;
case
c_maxint
:
write_string
(
g
,
"MAXINT"
);
break
;
case
c_minint
:
write_string
(
g
,
"MININT"
);
break
;
case
c_neg
:
write_string
(
g
,
"-"
);
generate_AE
(
g
,
p
->
right
);
break
;
case
c_multiply
:
s
=
" * "
;
goto
label0
;
case
c_plus
:
s
=
" + "
;
goto
label0
;
case
c_minus
:
s
=
" - "
;
goto
label0
;
case
c_divide
:
s
=
" / "
;
label0:
write_string
(
g
,
"("
);
generate_AE
(
g
,
p
->
left
);
write_string
(
g
,
s
);
generate_AE
(
g
,
p
->
right
);
write_string
(
g
,
")"
);
break
;
case
c_sizeof
:
g
->
V
[
0
]
=
p
->
name
;
w
(
g
,
"(~V0.length())"
);
break
;
case
c_cursor
:
w
(
g
,
"cursor"
);
break
;
case
c_limit
:
w
(
g
,
p
->
mode
==
m_forward
?
"limit"
:
"limit_backward"
);
break
;
case
c_size
:
w
(
g
,
"(current.length())"
);
break
;
}
}
/* K_needed() tests to see if we really need to keep c. Not true when the
the command does not touch the cursor. This and repeat_score() could be
elaborated almost indefinitely.
*/
static
int
K_needed
(
struct
generator
*
g
,
struct
node
*
p
)
{
while
(
p
!=
0
)
{
switch
(
p
->
type
)
{
case
c_dollar
:
case
c_leftslice
:
case
c_rightslice
:
case
c_mathassign
:
case
c_plusassign
:
case
c_minusassign
:
case
c_multiplyassign
:
case
c_divideassign
:
case
c_eq
:
case
c_ne
:
case
c_gr
:
case
c_ge
:
case
c_ls
:
case
c_le
:
case
c_sliceto
:
case
c_booltest
:
case
c_true
:
case
c_false
:
case
c_debug
:
break
;
case
c_call
:
if
(
K_needed
(
g
,
p
->
name
->
definition
))
return
true
;
break
;
case
c_bra
:
if
(
K_needed
(
g
,
p
->
left
))
return
true
;
break
;
default:
return
true
;
}
p
=
p
->
right
;
}
return
false
;
}
static
int
repeat_score
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
score
=
0
;
while
(
p
!=
0
)
{
switch
(
p
->
type
)
{
case
c_dollar
:
case
c_leftslice
:
case
c_rightslice
:
case
c_mathassign
:
case
c_plusassign
:
case
c_minusassign
:
case
c_multiplyassign
:
case
c_divideassign
:
case
c_eq
:
case
c_ne
:
case
c_gr
:
case
c_ge
:
case
c_ls
:
case
c_le
:
case
c_sliceto
:
/* case c_not: must not be included here! */
case
c_debug
:
break
;
case
c_call
:
score
+=
repeat_score
(
g
,
p
->
name
->
definition
);
break
;
case
c_bra
:
score
+=
repeat_score
(
g
,
p
->
left
);
break
;
case
c_name
:
case
c_literalstring
:
case
c_next
:
case
c_grouping
:
case
c_non
:
case
c_hop
:
score
=
score
+
1
;
break
;
default:
score
=
2
;
break
;
}
p
=
p
->
right
;
}
return
score
;
}
/* tests if an expression requires cursor reinstatement in a repeat */
static
int
repeat_restore
(
struct
generator
*
g
,
struct
node
*
p
)
{
return
repeat_score
(
g
,
p
)
>=
2
;
}
static
void
generate_bra
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
p
=
p
->
left
;
while
(
p
!=
0
)
{
generate
(
g
,
p
);
p
=
p
->
right
;
}
}
static
void
generate_and
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
K_needed
(
g
,
p
->
left
);
write_comment
(
g
,
p
);
if
(
keep_c
)
write_savecursor
(
g
,
p
,
savevar
);
p
=
p
->
left
;
while
(
p
!=
0
)
{
generate
(
g
,
p
);
if
(
g
->
unreachable
)
break
;
if
(
keep_c
&&
p
->
right
!=
0
)
write_restorecursor
(
g
,
p
,
savevar
);
p
=
p
->
right
;
}
str_delete
(
savevar
);
}
static
void
generate_or
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
K_needed
(
g
,
p
->
left
);
int
a0
=
g
->
failure_label
;
struct
str
*
a1
=
str_copy
(
g
->
failure_str
);
int
out_lab
=
new_label
(
g
);
write_comment
(
g
,
p
);
wsetlab_begin
(
g
,
out_lab
);
if
(
keep_c
)
write_savecursor
(
g
,
p
,
savevar
);
p
=
p
->
left
;
str_clear
(
g
->
failure_str
);
if
(
p
==
0
)
{
/* p should never be 0 after an or: there should be at least two
* sub nodes. */
fprintf
(
stderr
,
"Error:
\"
or
\"
node without children nodes."
);
exit
(
1
);
}
while
(
p
->
right
!=
0
)
{
g
->
failure_label
=
new_label
(
g
);
wsetlab_begin
(
g
,
g
->
failure_label
);
generate
(
g
,
p
);
if
(
!
g
->
unreachable
)
wgotol
(
g
,
out_lab
);
wsetlab_end
(
g
);
g
->
unreachable
=
false
;
if
(
keep_c
)
write_restorecursor
(
g
,
p
,
savevar
);
p
=
p
->
right
;
}
g
->
failure_label
=
a0
;
str_delete
(
g
->
failure_str
);
g
->
failure_str
=
a1
;
generate
(
g
,
p
);
wsetlab_end
(
g
);
str_delete
(
savevar
);
}
static
void
generate_backwards
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
writef
(
g
,
"~Mlimit_backward = cursor; cursor = limit;~N"
,
p
);
generate
(
g
,
p
->
left
);
w
(
g
,
"~Mcursor = limit_backward;"
);
}
static
void
generate_not
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
K_needed
(
g
,
p
->
left
);
int
a0
=
g
->
failure_label
;
struct
str
*
a1
=
str_copy
(
g
->
failure_str
);
write_comment
(
g
,
p
);
if
(
keep_c
)
{
write_block_start
(
g
);
write_savecursor
(
g
,
p
,
savevar
);
}
g
->
failure_label
=
new_label
(
g
);
str_clear
(
g
->
failure_str
);
wsetlab_begin
(
g
,
g
->
failure_label
);
generate
(
g
,
p
->
left
);
g
->
failure_label
=
a0
;
str_delete
(
g
->
failure_str
);
g
->
failure_str
=
a1
;
if
(
!
g
->
unreachable
)
write_failure
(
g
);
wsetlab_end
(
g
);
g
->
unreachable
=
false
;
if
(
keep_c
)
write_restorecursor
(
g
,
p
,
savevar
);
if
(
keep_c
)
write_block_end
(
g
);
str_delete
(
savevar
);
}
static
void
generate_try
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
K_needed
(
g
,
p
->
left
);
write_comment
(
g
,
p
);
if
(
keep_c
)
write_savecursor
(
g
,
p
,
savevar
);
g
->
failure_label
=
new_label
(
g
);
if
(
keep_c
)
restore_string
(
p
,
g
->
failure_str
,
savevar
);
wsetlab_begin
(
g
,
g
->
failure_label
);
generate
(
g
,
p
->
left
);
wsetlab_end
(
g
);
g
->
unreachable
=
false
;
str_delete
(
savevar
);
}
static
void
generate_set
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
writef
(
g
,
"~M~V0 = true;~N"
,
p
);
}
static
void
generate_unset
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
writef
(
g
,
"~M~V0 = false;~N"
,
p
);
}
static
void
generate_fail
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
generate
(
g
,
p
->
left
);
if
(
!
g
->
unreachable
)
write_failure
(
g
);
}
/* generate_test() also implements 'reverse' */
static
void
generate_test
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
K_needed
(
g
,
p
->
left
);
write_comment
(
g
,
p
);
if
(
keep_c
)
{
write_savecursor
(
g
,
p
,
savevar
);
}
generate
(
g
,
p
->
left
);
if
(
!
g
->
unreachable
)
{
if
(
keep_c
)
{
write_restorecursor
(
g
,
p
,
savevar
);
}
}
str_delete
(
savevar
);
}
static
void
generate_do
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
K_needed
(
g
,
p
->
left
);
write_comment
(
g
,
p
);
if
(
keep_c
)
write_savecursor
(
g
,
p
,
savevar
);
g
->
failure_label
=
new_label
(
g
);
str_clear
(
g
->
failure_str
);
wsetlab_begin
(
g
,
g
->
failure_label
);
generate
(
g
,
p
->
left
);
wsetlab_end
(
g
);
g
->
unreachable
=
false
;
if
(
keep_c
)
write_restorecursor
(
g
,
p
,
savevar
);
str_delete
(
savevar
);
}
static
void
generate_GO
(
struct
generator
*
g
,
struct
node
*
p
,
int
style
)
{
int
end_unreachable
=
false
;
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
style
==
1
||
repeat_restore
(
g
,
p
->
left
);
int
a0
=
g
->
failure_label
;
struct
str
*
a1
=
str_copy
(
g
->
failure_str
);
int
golab
=
new_label
(
g
);
g
->
I
[
0
]
=
golab
;
write_comment
(
g
,
p
);
w
(
g
,
"~Mgolab~I0: while(true)~N"
);
w
(
g
,
"~{"
);
if
(
keep_c
)
write_savecursor
(
g
,
p
,
savevar
);
g
->
failure_label
=
new_label
(
g
);
wsetlab_begin
(
g
,
g
->
failure_label
);
generate
(
g
,
p
->
left
);
if
(
g
->
unreachable
)
{
/* Cannot break out of this loop: therefore the code after the
* end of the loop is unreachable.*/
end_unreachable
=
true
;
}
else
{
/* include for goto; omit for gopast */
if
(
style
==
1
)
write_restorecursor
(
g
,
p
,
savevar
);
g
->
I
[
0
]
=
golab
;
w
(
g
,
"~Mbreak golab~I0;~N"
);
}
g
->
unreachable
=
false
;
wsetlab_end
(
g
);
if
(
keep_c
)
write_restorecursor
(
g
,
p
,
savevar
);
g
->
failure_label
=
a0
;
str_delete
(
g
->
failure_str
);
g
->
failure_str
=
a1
;
write_check_limit
(
g
,
p
);
write_inc_cursor
(
g
,
p
);
write_block_end
(
g
);
str_delete
(
savevar
);
g
->
unreachable
=
end_unreachable
;
}
static
void
generate_loop
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
loopvar
=
vars_newname
(
g
);
write_comment
(
g
,
p
);
g
->
B
[
0
]
=
str_data
(
loopvar
);
write_declare
(
g
,
"int ~B0"
,
p
);
w
(
g
,
"~Mfor (~B0 = "
);
generate_AE
(
g
,
p
->
AE
);
g
->
B
[
0
]
=
str_data
(
loopvar
);
writef
(
g
,
"; ~B0 > 0; ~B0--)~N"
,
p
);
writef
(
g
,
"~{"
,
p
);
generate
(
g
,
p
->
left
);
w
(
g
,
"~}"
);
str_delete
(
loopvar
);
g
->
unreachable
=
false
;
}
static
void
generate_repeat
(
struct
generator
*
g
,
struct
node
*
p
,
struct
str
*
loopvar
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
int
keep_c
=
repeat_restore
(
g
,
p
->
left
);
int
replab
=
new_label
(
g
);
g
->
I
[
0
]
=
replab
;
write_comment
(
g
,
p
);
writef
(
g
,
"~Mreplab~I0: while(true)~N~{"
,
p
);
if
(
keep_c
)
write_savecursor
(
g
,
p
,
savevar
);
g
->
failure_label
=
new_label
(
g
);
str_clear
(
g
->
failure_str
);
wsetlab_begin
(
g
,
g
->
failure_label
);
generate
(
g
,
p
->
left
);
if
(
!
g
->
unreachable
)
{
if
(
loopvar
!=
0
)
{
g
->
B
[
0
]
=
str_data
(
loopvar
);
w
(
g
,
"~M~B0--;~N"
);
}
g
->
I
[
0
]
=
replab
;
w
(
g
,
"~Mcontinue replab~I0;~N"
);
}
wsetlab_end
(
g
);
g
->
unreachable
=
false
;
if
(
keep_c
)
write_restorecursor
(
g
,
p
,
savevar
);
g
->
I
[
0
]
=
replab
;
w
(
g
,
"~Mbreak replab~I0;~N~}"
);
str_delete
(
savevar
);
}
static
void
generate_atleast
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
loopvar
=
vars_newname
(
g
);
write_comment
(
g
,
p
);
w
(
g
,
"~{"
);
g
->
B
[
0
]
=
str_data
(
loopvar
);
w
(
g
,
"~Mint ~B0 = "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
";~N"
);
{
int
a0
=
g
->
failure_label
;
struct
str
*
a1
=
str_copy
(
g
->
failure_str
);
generate_repeat
(
g
,
p
,
loopvar
);
g
->
failure_label
=
a0
;
str_delete
(
g
->
failure_str
);
g
->
failure_str
=
a1
;
}
g
->
B
[
0
]
=
str_data
(
loopvar
);
write_failure_if
(
g
,
"~B0 > 0"
,
p
);
w
(
g
,
"~}"
);
str_delete
(
loopvar
);
}
static
void
generate_setmark
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
writef
(
g
,
"~M~V0 = cursor;~N"
,
p
);
}
static
void
generate_tomark
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
">"
:
"<"
;
w
(
g
,
"~Mif (cursor ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
")~N"
);
write_block_start
(
g
);
write_failure
(
g
);
write_block_end
(
g
);
g
->
unreachable
=
false
;
w
(
g
,
"~Mcursor = "
);
generate_AE
(
g
,
p
->
AE
);
writef
(
g
,
";~N"
,
p
);
}
static
void
generate_atmark
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
w
(
g
,
"~Mif (cursor != "
);
generate_AE
(
g
,
p
->
AE
);
writef
(
g
,
")~N"
,
p
);
write_block_start
(
g
);
write_failure
(
g
);
write_block_end
(
g
);
g
->
unreachable
=
false
;
}
static
void
generate_hop
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"+"
:
"-"
;
w
(
g
,
"~{~Mint c = cursor ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
";~N"
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"0"
:
"limit_backward"
;
write_failure_if
(
g
,
"~S0 > c || c > limit"
,
p
);
writef
(
g
,
"~Mcursor = c;~N"
,
p
);
writef
(
g
,
"~}"
,
p
);
}
static
void
generate_delete
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
writef
(
g
,
"~Mslice_del();~N"
,
p
);
}
static
void
generate_next
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
write_check_limit
(
g
,
p
);
write_inc_cursor
(
g
,
p
);
}
static
void
generate_tolimit
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"limit"
:
"limit_backward"
;
writef
(
g
,
"~Mcursor = ~S0;~N"
,
p
);
}
static
void
generate_atlimit
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"limit"
:
"limit_backward"
;
g
->
S
[
1
]
=
p
->
mode
==
m_forward
?
"<"
:
">"
;
write_failure_if
(
g
,
"cursor ~S1 ~S0"
,
p
);
}
static
void
generate_leftslice
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"bra"
:
"ket"
;
writef
(
g
,
"~M~S0 = cursor;~N"
,
p
);
}
static
void
generate_rightslice
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
"ket"
:
"bra"
;
writef
(
g
,
"~M~S0 = cursor;~N"
,
p
);
}
static
void
generate_assignto
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
writef
(
g
,
"~M~V0 = assign_to(~V0);~N"
,
p
);
}
static
void
generate_sliceto
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
writef
(
g
,
"~M~V0 = slice_to(~V0);~N"
,
p
);
}
static
void
generate_address
(
struct
generator
*
g
,
struct
node
*
p
)
{
symbol
*
b
=
p
->
literalstring
;
if
(
b
!=
0
)
{
write_literal_string
(
g
,
b
);
}
else
{
write_varref
(
g
,
p
->
name
);
}
}
static
void
generate_insert
(
struct
generator
*
g
,
struct
node
*
p
,
int
style
)
{
int
keep_c
=
style
==
c_attach
;
write_comment
(
g
,
p
);
if
(
p
->
mode
==
m_backward
)
keep_c
=
!
keep_c
;
if
(
keep_c
)
w
(
g
,
"~{~Mint c = cursor;~N"
);
writef
(
g
,
"~Minsert(cursor, cursor, "
,
p
);
generate_address
(
g
,
p
);
writef
(
g
,
");~N"
,
p
);
if
(
keep_c
)
w
(
g
,
"~Mcursor = c;~N~}"
);
}
static
void
generate_assignfrom
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
keep_c
=
p
->
mode
==
m_forward
;
/* like 'attach' */
write_comment
(
g
,
p
);
if
(
keep_c
)
writef
(
g
,
"~{~Mint c = cursor;~N"
,
p
);
if
(
p
->
mode
==
m_forward
)
{
writef
(
g
,
"~Minsert(cursor, limit, "
,
p
);
}
else
{
writef
(
g
,
"~Minsert(limit_backward, cursor, "
,
p
);
}
generate_address
(
g
,
p
);
writef
(
g
,
");~N"
,
p
);
if
(
keep_c
)
w
(
g
,
"~Mcursor = c;~N~}"
);
}
static
void
generate_slicefrom
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
w
(
g
,
"~Mslice_from("
);
generate_address
(
g
,
p
);
writef
(
g
,
");~N"
,
p
);
}
static
void
generate_setlimit
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
struct
str
*
varname
=
vars_newname
(
g
);
write_comment
(
g
,
p
);
write_savecursor
(
g
,
p
,
savevar
);
generate
(
g
,
p
->
left
);
if
(
!
g
->
unreachable
)
{
g
->
B
[
0
]
=
str_data
(
varname
);
write_declare
(
g
,
"int ~B0"
,
p
);
if
(
p
->
mode
==
m_forward
)
{
w
(
g
,
"~M~B0 = limit - cursor;~N"
);
w
(
g
,
"~Mlimit = cursor;~N"
);
}
else
{
w
(
g
,
"~M~B0 = limit_backward;~N"
);
w
(
g
,
"~Mlimit_backward = cursor;~N"
);
}
write_restorecursor
(
g
,
p
,
savevar
);
if
(
p
->
mode
==
m_forward
)
{
str_assign
(
g
->
failure_str
,
"limit += "
);
str_append
(
g
->
failure_str
,
varname
);
str_append_ch
(
g
->
failure_str
,
';'
);
}
else
{
str_assign
(
g
->
failure_str
,
"limit_backward = "
);
str_append
(
g
->
failure_str
,
varname
);
str_append_ch
(
g
->
failure_str
,
';'
);
}
generate
(
g
,
p
->
aux
);
if
(
!
g
->
unreachable
)
{
write_margin
(
g
);
write_str
(
g
,
g
->
failure_str
);
write_newline
(
g
);
}
}
str_delete
(
varname
);
str_delete
(
savevar
);
}
/* dollar sets snowball up to operate on a string variable as if it were the
* current string */
static
void
generate_dollar
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
str
*
savevar
=
vars_newname
(
g
);
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
str_assign
(
g
->
failure_str
,
"copy_from("
);
str_append
(
g
->
failure_str
,
savevar
);
str_append_string
(
g
->
failure_str
,
");"
);
g
->
B
[
0
]
=
str_data
(
savevar
);
writef
(
g
,
"~{~M~n ~B0 = this;~N"
"~Mcurrent = ~V0;~N"
"~Mcursor = 0;~N"
"~Mlimit = (current.length());~N"
,
p
);
generate
(
g
,
p
->
left
);
if
(
!
g
->
unreachable
)
{
write_margin
(
g
);
write_str
(
g
,
g
->
failure_str
);
write_newline
(
g
);
}
w
(
g
,
"~}"
);
str_delete
(
savevar
);
}
static
void
generate_integer_assign
(
struct
generator
*
g
,
struct
node
*
p
,
char
*
s
)
{
g
->
V
[
0
]
=
p
->
name
;
g
->
S
[
0
]
=
s
;
w
(
g
,
"~M~V0 ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
";~N"
);
}
static
void
generate_integer_test
(
struct
generator
*
g
,
struct
node
*
p
,
char
*
s
)
{
g
->
V
[
0
]
=
p
->
name
;
g
->
S
[
0
]
=
s
;
w
(
g
,
"~Mif (!(~V0 ~S0 "
);
generate_AE
(
g
,
p
->
AE
);
w
(
g
,
"))~N"
);
write_block_start
(
g
);
write_failure
(
g
);
write_block_end
(
g
);
g
->
unreachable
=
false
;
}
static
void
generate_call
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
write_failure_if
(
g
,
"!~V0()"
,
p
);
}
static
void
generate_grouping
(
struct
generator
*
g
,
struct
node
*
p
,
int
complement
)
{
struct
grouping
*
q
=
p
->
name
->
grouping
;
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
S
[
1
]
=
complement
?
"out"
:
"in"
;
g
->
V
[
0
]
=
p
->
name
;
g
->
I
[
0
]
=
q
->
smallest_ch
;
g
->
I
[
1
]
=
q
->
largest_ch
;
if
(
q
->
no_gaps
)
write_failure_if
(
g
,
"!(~S1_range~S0(~I0, ~I1))"
,
p
);
else
write_failure_if
(
g
,
"!(~S1_grouping~S0(~V0, ~I0, ~I1))"
,
p
);
}
static
void
generate_namedstring
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
V
[
0
]
=
p
->
name
;
write_failure_if
(
g
,
"!(eq_v~S0(~V0))"
,
p
);
}
static
void
generate_literalstring
(
struct
generator
*
g
,
struct
node
*
p
)
{
symbol
*
b
=
p
->
literalstring
;
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
I
[
0
]
=
SIZE
(
b
);
g
->
L
[
0
]
=
b
;
write_failure_if
(
g
,
"!(eq_s~S0(~I0, ~L0))"
,
p
);
}
static
void
generate_define
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
name
*
q
=
p
->
name
;
struct
str
*
saved_output
=
g
->
outbuf
;
struct
str
*
saved_declarations
=
g
->
declarations
;
g
->
S
[
0
]
=
q
->
type
==
t_routine
?
"private"
:
"public"
;
g
->
V
[
0
]
=
q
;
w
(
g
,
"~+~+~N~M~S0 boolean ~V0() {~+~N"
);
g
->
outbuf
=
str_new
();
g
->
declarations
=
str_new
();
g
->
next_label
=
0
;
g
->
var_number
=
0
;
if
(
p
->
amongvar_needed
)
write_declare
(
g
,
"int among_var"
,
p
);
str_clear
(
g
->
failure_str
);
g
->
failure_label
=
x_return
;
g
->
unreachable
=
false
;
generate
(
g
,
p
->
left
);
if
(
!
g
->
unreachable
)
w
(
g
,
"~Mreturn true;~N"
);
w
(
g
,
"~}~-~-"
);
str_append
(
saved_output
,
g
->
declarations
);
str_append
(
saved_output
,
g
->
outbuf
);
str_delete
(
g
->
declarations
);
str_delete
(
g
->
outbuf
);
g
->
declarations
=
saved_declarations
;
g
->
outbuf
=
saved_output
;
}
static
void
generate_substring
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
among
*
x
=
p
->
among
;
write_comment
(
g
,
p
);
g
->
S
[
0
]
=
p
->
mode
==
m_forward
?
""
:
"_b"
;
g
->
I
[
0
]
=
x
->
number
;
g
->
I
[
1
]
=
x
->
literalstring_count
;
if
(
x
->
command_count
==
0
&&
x
->
starter
==
0
)
{
write_failure_if
(
g
,
"find_among~S0(a_~I0, ~I1) == 0"
,
p
);
}
else
{
writef
(
g
,
"~Mamong_var = find_among~S0(a_~I0, ~I1);~N"
,
p
);
write_failure_if
(
g
,
"among_var == 0"
,
p
);
}
}
static
void
generate_among
(
struct
generator
*
g
,
struct
node
*
p
)
{
struct
among
*
x
=
p
->
among
;
int
case_number
=
1
;
if
(
x
->
substring
==
0
)
generate_substring
(
g
,
p
);
if
(
x
->
command_count
==
0
&&
x
->
starter
==
0
)
return
;
if
(
x
->
starter
!=
0
)
generate
(
g
,
x
->
starter
);
p
=
p
->
left
;
if
(
p
!=
0
&&
p
->
type
!=
c_literalstring
)
p
=
p
->
right
;
w
(
g
,
"~Mswitch(among_var) {~N~+"
);
w
(
g
,
"~Mcase 0:~N~+"
);
write_failure
(
g
);
g
->
unreachable
=
false
;
w
(
g
,
"~-"
);
while
(
p
!=
0
)
{
if
(
p
->
type
==
c_bra
&&
p
->
left
!=
0
)
{
g
->
I
[
0
]
=
case_number
++
;
w
(
g
,
"~Mcase ~I0:~N~+"
);
generate
(
g
,
p
);
if
(
!
g
->
unreachable
)
w
(
g
,
"~Mbreak;~N"
);
w
(
g
,
"~-"
);
g
->
unreachable
=
false
;
}
p
=
p
->
right
;
}
write_block_end
(
g
);
}
static
void
generate_booltest
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
V
[
0
]
=
p
->
name
;
write_failure_if
(
g
,
"!(~V0)"
,
p
);
}
static
void
generate_false
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
write_failure
(
g
);
}
static
void
generate_debug
(
struct
generator
*
g
,
struct
node
*
p
)
{
write_comment
(
g
,
p
);
g
->
I
[
0
]
=
g
->
debug_count
++
;
g
->
I
[
1
]
=
p
->
line_number
;
writef
(
g
,
"~Mdebug(~I0, ~I1);~N"
,
p
);
}
static
void
generate
(
struct
generator
*
g
,
struct
node
*
p
)
{
int
a0
;
struct
str
*
a1
;
if
(
g
->
unreachable
)
return
;
a0
=
g
->
failure_label
;
a1
=
str_copy
(
g
->
failure_str
);
switch
(
p
->
type
)
{
case
c_define
:
generate_define
(
g
,
p
);
break
;
case
c_bra
:
generate_bra
(
g
,
p
);
break
;
case
c_and
:
generate_and
(
g
,
p
);
break
;
case
c_or
:
generate_or
(
g
,
p
);
break
;
case
c_backwards
:
generate_backwards
(
g
,
p
);
break
;
case
c_not
:
generate_not
(
g
,
p
);
break
;
case
c_set
:
generate_set
(
g
,
p
);
break
;
case
c_unset
:
generate_unset
(
g
,
p
);
break
;
case
c_try
:
generate_try
(
g
,
p
);
break
;
case
c_fail
:
generate_fail
(
g
,
p
);
break
;
case
c_reverse
:
case
c_test
:
generate_test
(
g
,
p
);
break
;
case
c_do
:
generate_do
(
g
,
p
);
break
;
case
c_goto
:
generate_GO
(
g
,
p
,
1
);
break
;
case
c_gopast
:
generate_GO
(
g
,
p
,
0
);
break
;
case
c_repeat
:
generate_repeat
(
g
,
p
,
0
);
break
;
case
c_loop
:
generate_loop
(
g
,
p
);
break
;
case
c_atleast
:
generate_atleast
(
g
,
p
);
break
;
case
c_setmark
:
generate_setmark
(
g
,
p
);
break
;
case
c_tomark
:
generate_tomark
(
g
,
p
);
break
;
case
c_atmark
:
generate_atmark
(
g
,
p
);
break
;
case
c_hop
:
generate_hop
(
g
,
p
);
break
;
case
c_delete
:
generate_delete
(
g
,
p
);
break
;
case
c_next
:
generate_next
(
g
,
p
);
break
;
case
c_tolimit
:
generate_tolimit
(
g
,
p
);
break
;
case
c_atlimit
:
generate_atlimit
(
g
,
p
);
break
;
case
c_leftslice
:
generate_leftslice
(
g
,
p
);
break
;
case
c_rightslice
:
generate_rightslice
(
g
,
p
);
break
;
case
c_assignto
:
generate_assignto
(
g
,
p
);
break
;
case
c_sliceto
:
generate_sliceto
(
g
,
p
);
break
;
case
c_assign
:
generate_assignfrom
(
g
,
p
);
break
;
case
c_insert
:
case
c_attach
:
generate_insert
(
g
,
p
,
p
->
type
);
break
;
case
c_slicefrom
:
generate_slicefrom
(
g
,
p
);
break
;
case
c_setlimit
:
generate_setlimit
(
g
,
p
);
break
;
case
c_dollar
:
generate_dollar
(
g
,
p
);
break
;
case
c_mathassign
:
generate_integer_assign
(
g
,
p
,
"="
);
break
;
case
c_plusassign
:
generate_integer_assign
(
g
,
p
,
"+="
);
break
;
case
c_minusassign
:
generate_integer_assign
(
g
,
p
,
"-="
);
break
;
case
c_multiplyassign
:
generate_integer_assign
(
g
,
p
,
"*="
);
break
;
case
c_divideassign
:
generate_integer_assign
(
g
,
p
,
"/="
);
break
;
case
c_eq
:
generate_integer_test
(
g
,
p
,
"=="
);
break
;
case
c_ne
:
generate_integer_test
(
g
,
p
,
"!="
);
break
;
case
c_gr
:
generate_integer_test
(
g
,
p
,
">"
);
break
;
case
c_ge
:
generate_integer_test
(
g
,
p
,
">="
);
break
;
case
c_ls
:
generate_integer_test
(
g
,
p
,
"<"
);
break
;
case
c_le
:
generate_integer_test
(
g
,
p
,
"<="
);
break
;
case
c_call
:
generate_call
(
g
,
p
);
break
;
case
c_grouping
:
generate_grouping
(
g
,
p
,
false
);
break
;
case
c_non
:
generate_grouping
(
g
,
p
,
true
);
break
;
case
c_name
:
generate_namedstring
(
g
,
p
);
break
;
case
c_literalstring
:
generate_literalstring
(
g
,
p
);
break
;
case
c_among
:
generate_among
(
g
,
p
);
break
;
case
c_substring
:
generate_substring
(
g
,
p
);
break
;
case
c_booltest
:
generate_booltest
(
g
,
p
);
break
;
case
c_false
:
generate_false
(
g
,
p
);
break
;
case
c_true
:
break
;
case
c_debug
:
generate_debug
(
g
,
p
);
break
;
default:
fprintf
(
stderr
,
"%d encountered
\n
"
,
p
->
type
);
exit
(
1
);
}
g
->
failure_label
=
a0
;
str_delete
(
g
->
failure_str
);
g
->
failure_str
=
a1
;
}
static
void
generate_start_comment
(
struct
generator
*
g
)
{
w
(
g
,
"// This file was generated automatically by the Snowball to Java compiler~N"
);
w
(
g
,
"~N"
);
}
static
void
generate_class_begin
(
struct
generator
*
g
)
{
w
(
g
,
"package "
);
w
(
g
,
g
->
options
->
package
);
w
(
g
,
";~N~N"
);
w
(
g
,
"import "
);
w
(
g
,
g
->
options
->
among_class
);
w
(
g
,
";~N"
"~N"
" /**~N"
" * This class was automatically generated by a Snowball to Java compiler ~N"
" * It implements the stemming algorithm defined by a snowball script.~N"
" */~N"
"~N"
"public class ~n extends "
);
w
(
g
,
g
->
options
->
parent_class_name
);
w
(
g
,
" {~N"
"~N"
"private static final long serialVersionUID = 1L;~N"
"~N"
"~+~+~Mprivate final static ~n methodObject = new ~n ();~N"
"~N"
);
}
static
void
generate_class_end
(
struct
generator
*
g
)
{
w
(
g
,
"~N}"
);
w
(
g
,
"~N~N"
);
}
static
void
generate_equals
(
struct
generator
*
g
)
{
w
(
g
,
"~N"
"~Mpublic boolean equals( Object o ) {~N"
"~+~Mreturn o instanceof "
);
w
(
g
,
g
->
options
->
name
);
w
(
g
,
";~N~-~M}~N"
"~N"
"~Mpublic int hashCode() {~N"
"~+~Mreturn "
);
w
(
g
,
g
->
options
->
name
);
w
(
g
,
".class.getName().hashCode();~N"
"~-~M}~N"
);
w
(
g
,
"~N~N"
);
}
static
void
generate_among_table
(
struct
generator
*
g
,
struct
among
*
x
)
{
struct
amongvec
*
v
=
x
->
b
;
g
->
I
[
0
]
=
x
->
number
;
g
->
I
[
1
]
=
x
->
literalstring_count
;
w
(
g
,
"~+~+~Mprivate final static Among a_~I0[] = {~N~+"
);
{
int
i
;
for
(
i
=
0
;
i
<
x
->
literalstring_count
;
i
++
)
{
g
->
I
[
0
]
=
i
;
g
->
I
[
1
]
=
v
->
i
;
g
->
I
[
2
]
=
v
->
result
;
g
->
L
[
0
]
=
v
->
b
;
g
->
S
[
0
]
=
i
<
x
->
literalstring_count
-
1
?
","
:
""
;
w
(
g
,
"~Mnew Among ( ~L0, ~I1, ~I2,
\"
"
);
if
(
v
->
function
!=
0
)
{
write_varname
(
g
,
v
->
function
);
}
w
(
g
,
"
\"
, methodObject )~S0~N"
);
v
++
;
}
}
w
(
g
,
"~-~M};~-~-~N~N"
);
}
static
void
generate_amongs
(
struct
generator
*
g
)
{
struct
among
*
x
=
g
->
analyser
->
amongs
;
while
(
x
!=
0
)
{
generate_among_table
(
g
,
x
);
x
=
x
->
next
;
}
}
static
void
set_bit
(
symbol
*
b
,
int
i
)
{
b
[
i
/
8
]
|=
1
<<
i
%
8
;
}
static
int
bit_is_set
(
symbol
*
b
,
int
i
)
{
return
b
[
i
/
8
]
&
1
<<
i
%
8
;
}
static
void
generate_grouping_table
(
struct
generator
*
g
,
struct
grouping
*
q
)
{
int
range
=
q
->
largest_ch
-
q
->
smallest_ch
+
1
;
int
size
=
(
range
+
7
)
/
8
;
/* assume 8 bits per symbol */
symbol
*
b
=
q
->
b
;
symbol
*
map
=
create_b
(
size
);
int
i
;
for
(
i
=
0
;
i
<
size
;
i
++
)
map
[
i
]
=
0
;
/* Using unicode would require revision here */
for
(
i
=
0
;
i
<
SIZE
(
b
);
i
++
)
set_bit
(
map
,
b
[
i
]
-
q
->
smallest_ch
);
q
->
no_gaps
=
true
;
for
(
i
=
0
;
i
<
range
;
i
++
)
unless
(
bit_is_set
(
map
,
i
))
q
->
no_gaps
=
false
;
unless
(
q
->
no_gaps
)
{
g
->
V
[
0
]
=
q
->
name
;
w
(
g
,
"~+~+~Mprivate static final char ~V0[] = {"
);
for
(
i
=
0
;
i
<
size
;
i
++
)
{
write_int
(
g
,
map
[
i
]);
if
(
i
<
size
-
1
)
w
(
g
,
", "
);
}
w
(
g
,
" };~N~-~-~N"
);
}
lose_b
(
map
);
}
static
void
generate_groupings
(
struct
generator
*
g
)
{
struct
grouping
*
q
=
g
->
analyser
->
groupings
;
until
(
q
==
0
)
{
generate_grouping_table
(
g
,
q
);
q
=
q
->
next
;
}
}
static
void
generate_members
(
struct
generator
*
g
)
{
struct
name
*
q
=
g
->
analyser
->
names
;
until
(
q
==
0
)
{
g
->
V
[
0
]
=
q
;
switch
(
q
->
type
)
{
case
t_string
:
w
(
g
,
" private "
);
w
(
g
,
g
->
options
->
string_class
);
w
(
g
,
" ~W0 = new "
);
w
(
g
,
g
->
options
->
string_class
);
w
(
g
,
"();~N"
);
break
;
case
t_integer
:
w
(
g
,
" private int ~W0;~N"
);
break
;
case
t_boolean
:
w
(
g
,
" private boolean ~W0;~N"
);
break
;
}
q
=
q
->
next
;
}
w
(
g
,
"~N"
);
}
static
void
generate_copyfrom
(
struct
generator
*
g
)
{
struct
name
*
q
;
w
(
g
,
"~+~+~Mprivate void copy_from(~n other) {~+~N"
);
for
(
q
=
g
->
analyser
->
names
;
q
!=
0
;
q
=
q
->
next
)
{
g
->
V
[
0
]
=
q
;
switch
(
q
->
type
)
{
case
t_string
:
case
t_integer
:
case
t_boolean
:
w
(
g
,
"~M~W0 = other.~W0;~N"
);
break
;
}
}
w
(
g
,
"~Msuper.copy_from(other);~N"
);
w
(
g
,
"~-~M}~-~-~N"
);
}
static
void
generate_methods
(
struct
generator
*
g
)
{
struct
node
*
p
=
g
->
analyser
->
program
;
while
(
p
!=
0
)
{
generate
(
g
,
p
);
g
->
unreachable
=
false
;
p
=
p
->
right
;
}
}
extern
void
generate_program_java
(
struct
generator
*
g
)
{
g
->
outbuf
=
str_new
();
g
->
failure_str
=
str_new
();
generate_start_comment
(
g
);
generate_class_begin
(
g
);
generate_amongs
(
g
);
generate_groupings
(
g
);
generate_members
(
g
);
generate_copyfrom
(
g
);
generate_methods
(
g
);
generate_equals
(
g
);
generate_class_end
(
g
);
output_str
(
g
->
options
->
output_java
,
g
->
outbuf
);
str_delete
(
g
->
failure_str
);
str_delete
(
g
->
outbuf
);
}
extern
struct
generator
*
create_generator_java
(
struct
analyser
*
a
,
struct
options
*
o
)
{
NEW
(
generator
,
g
);
g
->
analyser
=
a
;
g
->
options
=
o
;
g
->
margin
=
0
;
g
->
debug_count
=
0
;
g
->
unreachable
=
false
;
return
g
;
}
extern
void
close_generator_java
(
struct
generator
*
g
)
{
FREE
(
g
);
}
libstemmer_c/compiler/header.h
deleted
100644 → 0
View file @
1c6e8c67
typedef
unsigned
char
byte
;
typedef
unsigned
short
symbol
;
#define true 1
#define false 0
#define repeat while(true)
#define unless(C) if(!(C))
#define until(C) while(!(C))
#define MALLOC check_malloc
#define FREE check_free
#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * n)
#define STARTSIZE 10
#define SIZE(p) ((int *)(p))[-1]
#define CAPACITY(p) ((int *)(p))[-2]
extern
symbol
*
create_b
(
int
n
);
extern
void
report_b
(
FILE
*
out
,
symbol
*
p
);
extern
void
lose_b
(
symbol
*
p
);
extern
symbol
*
increase_capacity
(
symbol
*
p
,
int
n
);
extern
symbol
*
move_to_b
(
symbol
*
p
,
int
n
,
symbol
*
q
);
extern
symbol
*
add_to_b
(
symbol
*
p
,
int
n
,
symbol
*
q
);
extern
symbol
*
copy_b
(
symbol
*
p
);
extern
char
*
b_to_s
(
symbol
*
p
);
extern
symbol
*
add_s_to_b
(
symbol
*
p
,
const
char
*
s
);
struct
str
;
/* defined in space.c */
extern
struct
str
*
str_new
(
void
);
extern
void
str_delete
(
struct
str
*
str
);
extern
void
str_append
(
struct
str
*
str
,
struct
str
*
add
);
extern
void
str_append_ch
(
struct
str
*
str
,
char
add
);
extern
void
str_append_b
(
struct
str
*
str
,
symbol
*
q
);
extern
void
str_append_string
(
struct
str
*
str
,
const
char
*
s
);
extern
void
str_append_int
(
struct
str
*
str
,
int
i
);
extern
void
str_clear
(
struct
str
*
str
);
extern
void
str_assign
(
struct
str
*
str
,
char
*
s
);
extern
struct
str
*
str_copy
(
struct
str
*
old
);
extern
symbol
*
str_data
(
struct
str
*
str
);
extern
int
str_len
(
struct
str
*
str
);
extern
int
get_utf8
(
const
symbol
*
p
,
int
*
slot
);
extern
int
put_utf8
(
int
ch
,
symbol
*
p
);
struct
m_pair
{
struct
m_pair
*
next
;
symbol
*
name
;
symbol
*
value
;
};
struct
input
{
struct
input
*
next
;
symbol
*
p
;
int
c
;
int
line_number
;
};
struct
include
{
struct
include
*
next
;
symbol
*
b
;
};
struct
tokeniser
{
struct
input
*
next
;
symbol
*
p
;
int
c
;
int
line_number
;
symbol
*
b
;
symbol
*
b2
;
int
number
;
int
m_start
;
int
m_end
;
struct
m_pair
*
m_pairs
;
int
get_depth
;
int
error_count
;
int
token
;
int
previous_token
;
byte
token_held
;
byte
widechars
;
byte
utf8
;
int
omission
;
struct
include
*
includes
;
};
extern
symbol
*
get_input
(
symbol
*
p
);
extern
struct
tokeniser
*
create_tokeniser
(
symbol
*
b
);
extern
int
read_token
(
struct
tokeniser
*
t
);
extern
byte
*
name_of_token
(
int
code
);
extern
void
close_tokeniser
(
struct
tokeniser
*
t
);
enum
token_codes
{
#include "syswords2.h"
c_mathassign
,
c_name
,
c_number
,
c_literalstring
,
c_neg
,
c_call
,
c_grouping
,
c_booltest
};
extern
int
space_count
;
extern
void
*
check_malloc
(
int
n
);
extern
void
check_free
(
void
*
p
);
struct
node
;
struct
name
{
struct
name
*
next
;
symbol
*
b
;
int
type
;
/* t_string etc */
int
mode
;
/* )_ for routines, externals */
struct
node
*
definition
;
/* ) */
int
count
;
/* 0, 1, 2 for each type */
struct
grouping
*
grouping
;
/* for grouping names */
byte
referenced
;
byte
used
;
};
struct
literalstring
{
struct
literalstring
*
next
;
symbol
*
b
;
};
struct
amongvec
{
symbol
*
b
;
/* the string giving the case */
int
size
;
/* - and its size */
struct
node
*
p
;
/* the corresponding command */
int
i
;
/* the amongvec index of the longest substring of b */
int
result
;
/* the numeric result for the case */
struct
name
*
function
;
};
struct
among
{
struct
among
*
next
;
struct
amongvec
*
b
;
/* pointer to the amongvec */
int
number
;
/* amongs are numbered 0, 1, 2 ... */
int
literalstring_count
;
/* in this among */
int
command_count
;
/* in this among */
struct
node
*
starter
;
/* i.e. among( (starter) 'string' ... ) */
struct
node
*
substring
;
/* i.e. substring ... among ( ... ) */
};
struct
grouping
{
struct
grouping
*
next
;
int
number
;
/* groupings are numbered 0, 1, 2 ... */
symbol
*
b
;
/* the characters of this group */
int
largest_ch
;
/* character with max code */
int
smallest_ch
;
/* character with min code */
byte
no_gaps
;
/* not used in generator.c after 11/5/05 */
struct
name
*
name
;
/* so g->name->grouping == g */
};
struct
node
{
struct
node
*
next
;
struct
node
*
left
;
struct
node
*
aux
;
/* used in setlimit */
struct
among
*
among
;
/* used in among */
struct
node
*
right
;
int
type
;
int
mode
;
struct
node
*
AE
;
struct
name
*
name
;
symbol
*
literalstring
;
int
number
;
int
line_number
;
int
amongvar_needed
;
/* used in routine definitions */
};
enum
name_types
{
t_size
=
6
,
t_string
=
0
,
t_boolean
=
1
,
t_integer
=
2
,
t_routine
=
3
,
t_external
=
4
,
t_grouping
=
5
/* If this list is extended, adjust wvn in generator.c */
};
/* In name_count[i] below, remember that
type is
----+----
0 | string
1 | boolean
2 | integer
3 | routine
4 | external
5 | grouping
*/
struct
analyser
{
struct
tokeniser
*
tokeniser
;
struct
node
*
nodes
;
struct
name
*
names
;
struct
literalstring
*
literalstrings
;
int
mode
;
byte
modifyable
;
/* false inside reverse(...) */
struct
node
*
program
;
struct
node
*
program_end
;
int
name_count
[
t_size
];
/* name_count[i] counts the number of names of type i */
struct
among
*
amongs
;
struct
among
*
amongs_end
;
int
among_count
;
int
amongvar_needed
;
/* used in reading routine definitions */
struct
grouping
*
groupings
;
struct
grouping
*
groupings_end
;
struct
node
*
substring
;
/* pending 'substring' in current routine definition */
byte
utf8
;
};
enum
analyser_modes
{
m_forward
=
0
,
m_backward
/*, m_integer */
};
extern
void
print_program
(
struct
analyser
*
a
);
extern
struct
analyser
*
create_analyser
(
struct
tokeniser
*
t
);
extern
void
close_analyser
(
struct
analyser
*
a
);
extern
void
read_program
(
struct
analyser
*
a
);
struct
generator
{
struct
analyser
*
analyser
;
struct
options
*
options
;
int
unreachable
;
/* 0 if code can be reached, 1 if current code
* is unreachable. */
int
var_number
;
/* Number of next variable to use. */
struct
str
*
outbuf
;
/* temporary str to store output */
struct
str
*
declarations
;
/* str storing variable declarations */
int
next_label
;
int
margin
;
const
char
*
failure_string
;
/* String to output in case of a failure. */
struct
str
*
failure_str
;
/* This is used by the java generator instead of failure_string */
int
label_used
;
/* Keep track of whether the failure label is used. */
int
failure_label
;
int
debug_count
;
const
char
*
S
[
10
];
/* strings */
symbol
*
B
[
10
];
/* blocks */
int
I
[
10
];
/* integers */
struct
name
*
V
[
5
];
/* variables */
symbol
*
L
[
5
];
/* literals, used in formatted write */
int
line_count
;
/* counts number of lines output */
int
line_labelled
;
/* in ANSI C, will need extra ';' if it is a block end */
int
literalstring_count
;
int
keep_count
;
/* used to number keep/restore pairs to avoid compiler warnings
about shadowed variables */
};
struct
options
{
/* for the command line: */
char
*
output_file
;
char
*
name
;
FILE
*
output_c
;
FILE
*
output_h
;
FILE
*
output_java
;
byte
syntax_tree
;
byte
widechars
;
enum
{
LANG_JAVA
,
LANG_C
,
LANG_CPLUSPLUS
}
make_lang
;
char
*
externals_prefix
;
char
*
variables_prefix
;
char
*
runtime_path
;
char
*
parent_class_name
;
char
*
package
;
char
*
string_class
;
char
*
among_class
;
struct
include
*
includes
;
struct
include
*
includes_end
;
byte
utf8
;
};
/* Generator for C code. */
extern
struct
generator
*
create_generator_c
(
struct
analyser
*
a
,
struct
options
*
o
);
extern
void
close_generator_c
(
struct
generator
*
g
);
extern
void
generate_program_c
(
struct
generator
*
g
);
/* Generator for Java code. */
extern
struct
generator
*
create_generator_java
(
struct
analyser
*
a
,
struct
options
*
o
);
extern
void
close_generator_java
(
struct
generator
*
g
);
extern
void
generate_program_java
(
struct
generator
*
g
);
libstemmer_c/compiler/space.c
deleted
100644 → 0
View file @
1c6e8c67
#include <stdio.h>
/* for printf */
#include <stdlib.h>
/* malloc, free */
#include <string.h>
/* memmove */
#include "header.h"
#define HEAD 2*sizeof(int)
#define EXTENDER 40
/* This modules provides a simple mechanism for arbitrary length writable
strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
items however.
The calls are:
symbol * b = create_b(n);
- create an empty block b with room for n symbols
b = increase_capacity(b, n);
- increase the capacity of block b by n symbols (b may change)
b2 = copy_b(b)
- copy block b into b2
lose_b(b);
- lose block b
b = move_to_b(b, n, p);
- set the data in b to be the n symbols at address p
b = add_to_b(b, n, p);
- add the n symbols at address p to the end of the data in b
SIZE(b)
- is the number of symbols in b
For example:
symbol * b = create_b(0);
{ int i;
char p[10];
for (i = 0; i < 100; i++) {
sprintf(p, " %d", i);
add_s_to_b(b, p);
}
}
and b contains " 0 1 2 ... 99" spaced out as symbols.
*/
/* For a block b, SIZE(b) is the number of symbols so far written into it,
CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
In fact blocks have 1 extra character over the promised capacity so
they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
overwriting.
*/
extern
symbol
*
create_b
(
int
n
)
{
symbol
*
p
=
(
symbol
*
)
(
HEAD
+
(
char
*
)
MALLOC
(
HEAD
+
(
n
+
1
)
*
sizeof
(
symbol
)));
CAPACITY
(
p
)
=
n
;
SIZE
(
p
)
=
0
;
return
p
;
}
extern
void
report_b
(
FILE
*
out
,
symbol
*
p
)
{
int
i
;
for
(
i
=
0
;
i
<
SIZE
(
p
);
i
++
)
fprintf
(
out
,
"%c"
,
p
[
i
]);
}
extern
void
lose_b
(
symbol
*
p
)
{
if
(
p
==
0
)
return
;
FREE
((
char
*
)
p
-
HEAD
);
}
extern
symbol
*
increase_capacity
(
symbol
*
p
,
int
n
)
{
symbol
*
q
=
create_b
(
CAPACITY
(
p
)
+
n
+
EXTENDER
);
memmove
(
q
,
p
,
CAPACITY
(
p
)
*
sizeof
(
symbol
));
SIZE
(
q
)
=
SIZE
(
p
);
lose_b
(
p
);
return
q
;
}
extern
symbol
*
move_to_b
(
symbol
*
p
,
int
n
,
symbol
*
q
)
{
int
x
=
n
-
CAPACITY
(
p
);
if
(
x
>
0
)
p
=
increase_capacity
(
p
,
x
);
memmove
(
p
,
q
,
n
*
sizeof
(
symbol
));
SIZE
(
p
)
=
n
;
return
p
;
}
extern
symbol
*
add_to_b
(
symbol
*
p
,
int
n
,
symbol
*
q
)
{
int
x
=
SIZE
(
p
)
+
n
-
CAPACITY
(
p
);
if
(
x
>
0
)
p
=
increase_capacity
(
p
,
x
);
memmove
(
p
+
SIZE
(
p
),
q
,
n
*
sizeof
(
symbol
));
SIZE
(
p
)
+=
n
;
return
p
;
}
extern
symbol
*
copy_b
(
symbol
*
p
)
{
int
n
=
SIZE
(
p
);
symbol
*
q
=
create_b
(
n
);
move_to_b
(
q
,
n
,
p
);
return
q
;
}
int
space_count
=
0
;
extern
void
*
check_malloc
(
int
n
)
{
space_count
++
;
return
malloc
(
n
);
}
extern
void
check_free
(
void
*
p
)
{
space_count
--
;
free
(
p
);
}
/* To convert a block to a zero terminated string: */
extern
char
*
b_to_s
(
symbol
*
p
)
{
int
n
=
SIZE
(
p
);
char
*
s
=
(
char
*
)
malloc
(
n
+
1
);
{
int
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
s
[
i
]
=
p
[
i
];
}
s
[
n
]
=
0
;
return
s
;
}
/* To add a zero terminated string to a block. If p = 0 the
block is created. */
extern
symbol
*
add_s_to_b
(
symbol
*
p
,
const
char
*
s
)
{
int
n
=
strlen
(
s
);
int
k
;
if
(
p
==
0
)
p
=
create_b
(
n
);
k
=
SIZE
(
p
);
{
int
x
=
k
+
n
-
CAPACITY
(
p
);
if
(
x
>
0
)
p
=
increase_capacity
(
p
,
x
);
}
{
int
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
p
[
i
+
k
]
=
s
[
i
];
}
SIZE
(
p
)
+=
n
;
return
p
;
}
/* The next section defines string handling capabilities in terms
of the lower level block handling capabilities of space.c */
/* -------------------------------------------------------------*/
struct
str
{
symbol
*
data
;
};
/* Create a new string. */
extern
struct
str
*
str_new
()
{
struct
str
*
output
=
(
struct
str
*
)
malloc
(
sizeof
(
struct
str
));
output
->
data
=
create_b
(
0
);
return
output
;
}
/* Delete a string. */
extern
void
str_delete
(
struct
str
*
str
)
{
lose_b
(
str
->
data
);
free
(
str
);
}
/* Append a str to this str. */
extern
void
str_append
(
struct
str
*
str
,
struct
str
*
add
)
{
symbol
*
q
=
add
->
data
;
str
->
data
=
add_to_b
(
str
->
data
,
SIZE
(
q
),
q
);
}
/* Append a character to this str. */
extern
void
str_append_ch
(
struct
str
*
str
,
char
add
)
{
symbol
q
[
1
];
q
[
0
]
=
add
;
str
->
data
=
add_to_b
(
str
->
data
,
1
,
q
);
}
/* Append a low level block to a str. */
extern
void
str_append_b
(
struct
str
*
str
,
symbol
*
q
)
{
str
->
data
=
add_to_b
(
str
->
data
,
SIZE
(
q
),
q
);
}
/* Append a (char *, null teminated) string to a str. */
extern
void
str_append_string
(
struct
str
*
str
,
const
char
*
s
)
{
str
->
data
=
add_s_to_b
(
str
->
data
,
s
);
}
/* Append an integer to a str. */
extern
void
str_append_int
(
struct
str
*
str
,
int
i
)
{
char
s
[
30
];
sprintf
(
s
,
"%d"
,
i
);
str_append_string
(
str
,
s
);
}
/* Clear a string */
extern
void
str_clear
(
struct
str
*
str
)
{
SIZE
(
str
->
data
)
=
0
;
}
/* Set a string */
extern
void
str_assign
(
struct
str
*
str
,
char
*
s
)
{
str_clear
(
str
);
str_append_string
(
str
,
s
);
}
/* Copy a string. */
extern
struct
str
*
str_copy
(
struct
str
*
old
)
{
struct
str
*
newstr
=
str_new
();
str_append
(
newstr
,
old
);
return
newstr
;
}
/* Get the data stored in this str. */
extern
symbol
*
str_data
(
struct
str
*
str
)
{
return
str
->
data
;
}
/* Get the length of the str. */
extern
int
str_len
(
struct
str
*
str
)
{
return
SIZE
(
str
->
data
);
}
extern
int
get_utf8
(
const
symbol
*
p
,
int
*
slot
)
{
int
b0
,
b1
;
b0
=
*
p
++
;
if
(
b0
<
0xC0
)
{
/* 1100 0000 */
*
slot
=
b0
;
return
1
;
}
b1
=
*
p
++
;
if
(
b0
<
0xE0
)
{
/* 1110 0000 */
*
slot
=
(
b0
&
0x1F
)
<<
6
|
(
b1
&
0x3F
);
return
2
;
}
*
slot
=
(
b0
&
0xF
)
<<
12
|
(
b1
&
0x3F
)
<<
6
|
(
*
p
&
0x3F
);
return
3
;
}
extern
int
put_utf8
(
int
ch
,
symbol
*
p
)
{
if
(
ch
<
0x80
)
{
p
[
0
]
=
ch
;
return
1
;
}
if
(
ch
<
0x800
)
{
p
[
0
]
=
(
ch
>>
6
)
|
0xC0
;
p
[
1
]
=
(
ch
&
0x3F
)
|
0x80
;
return
2
;
}
p
[
0
]
=
(
ch
>>
12
)
|
0xE0
;
p
[
1
]
=
((
ch
>>
6
)
&
0x3F
)
|
0x80
;
p
[
2
]
=
(
ch
&
0x3F
)
|
0x80
;
return
3
;
}
libstemmer_c/compiler/syswords.h
deleted
100644 → 0
View file @
1c6e8c67
static
struct
system_word
vocab
[
80
+
1
]
=
{
{
0
,
(
byte
*
)
""
,
80
+
1
},
{
1
,
(
byte
*
)
"$"
,
c_dollar
},
{
1
,
(
byte
*
)
"("
,
c_bra
},
{
1
,
(
byte
*
)
")"
,
c_ket
},
{
1
,
(
byte
*
)
"*"
,
c_multiply
},
{
1
,
(
byte
*
)
"+"
,
c_plus
},
{
1
,
(
byte
*
)
"-"
,
c_minus
},
{
1
,
(
byte
*
)
"/"
,
c_divide
},
{
1
,
(
byte
*
)
"<"
,
c_ls
},
{
1
,
(
byte
*
)
"="
,
c_assign
},
{
1
,
(
byte
*
)
">"
,
c_gr
},
{
1
,
(
byte
*
)
"?"
,
c_debug
},
{
1
,
(
byte
*
)
"["
,
c_leftslice
},
{
1
,
(
byte
*
)
"]"
,
c_rightslice
},
{
2
,
(
byte
*
)
"!="
,
c_ne
},
{
2
,
(
byte
*
)
"*="
,
c_multiplyassign
},
{
2
,
(
byte
*
)
"+="
,
c_plusassign
},
{
2
,
(
byte
*
)
"-="
,
c_minusassign
},
{
2
,
(
byte
*
)
"->"
,
c_sliceto
},
{
2
,
(
byte
*
)
"/*"
,
c_comment2
},
{
2
,
(
byte
*
)
"//"
,
c_comment1
},
{
2
,
(
byte
*
)
"/="
,
c_divideassign
},
{
2
,
(
byte
*
)
"<+"
,
c_insert
},
{
2
,
(
byte
*
)
"<-"
,
c_slicefrom
},
{
2
,
(
byte
*
)
"<="
,
c_le
},
{
2
,
(
byte
*
)
"=="
,
c_eq
},
{
2
,
(
byte
*
)
"=>"
,
c_assignto
},
{
2
,
(
byte
*
)
">="
,
c_ge
},
{
2
,
(
byte
*
)
"as"
,
c_as
},
{
2
,
(
byte
*
)
"do"
,
c_do
},
{
2
,
(
byte
*
)
"or"
,
c_or
},
{
3
,
(
byte
*
)
"and"
,
c_and
},
{
3
,
(
byte
*
)
"for"
,
c_for
},
{
3
,
(
byte
*
)
"get"
,
c_get
},
{
3
,
(
byte
*
)
"hex"
,
c_hex
},
{
3
,
(
byte
*
)
"hop"
,
c_hop
},
{
3
,
(
byte
*
)
"non"
,
c_non
},
{
3
,
(
byte
*
)
"not"
,
c_not
},
{
3
,
(
byte
*
)
"set"
,
c_set
},
{
3
,
(
byte
*
)
"try"
,
c_try
},
{
4
,
(
byte
*
)
"fail"
,
c_fail
},
{
4
,
(
byte
*
)
"goto"
,
c_goto
},
{
4
,
(
byte
*
)
"loop"
,
c_loop
},
{
4
,
(
byte
*
)
"next"
,
c_next
},
{
4
,
(
byte
*
)
"size"
,
c_size
},
{
4
,
(
byte
*
)
"test"
,
c_test
},
{
4
,
(
byte
*
)
"true"
,
c_true
},
{
5
,
(
byte
*
)
"among"
,
c_among
},
{
5
,
(
byte
*
)
"false"
,
c_false
},
{
5
,
(
byte
*
)
"limit"
,
c_limit
},
{
5
,
(
byte
*
)
"unset"
,
c_unset
},
{
6
,
(
byte
*
)
"atmark"
,
c_atmark
},
{
6
,
(
byte
*
)
"attach"
,
c_attach
},
{
6
,
(
byte
*
)
"cursor"
,
c_cursor
},
{
6
,
(
byte
*
)
"define"
,
c_define
},
{
6
,
(
byte
*
)
"delete"
,
c_delete
},
{
6
,
(
byte
*
)
"gopast"
,
c_gopast
},
{
6
,
(
byte
*
)
"insert"
,
c_insert
},
{
6
,
(
byte
*
)
"maxint"
,
c_maxint
},
{
6
,
(
byte
*
)
"minint"
,
c_minint
},
{
6
,
(
byte
*
)
"repeat"
,
c_repeat
},
{
6
,
(
byte
*
)
"sizeof"
,
c_sizeof
},
{
6
,
(
byte
*
)
"tomark"
,
c_tomark
},
{
7
,
(
byte
*
)
"atleast"
,
c_atleast
},
{
7
,
(
byte
*
)
"atlimit"
,
c_atlimit
},
{
7
,
(
byte
*
)
"decimal"
,
c_decimal
},
{
7
,
(
byte
*
)
"reverse"
,
c_reverse
},
{
7
,
(
byte
*
)
"setmark"
,
c_setmark
},
{
7
,
(
byte
*
)
"strings"
,
c_strings
},
{
7
,
(
byte
*
)
"tolimit"
,
c_tolimit
},
{
8
,
(
byte
*
)
"booleans"
,
c_booleans
},
{
8
,
(
byte
*
)
"integers"
,
c_integers
},
{
8
,
(
byte
*
)
"routines"
,
c_routines
},
{
8
,
(
byte
*
)
"setlimit"
,
c_setlimit
},
{
9
,
(
byte
*
)
"backwards"
,
c_backwards
},
{
9
,
(
byte
*
)
"externals"
,
c_externals
},
{
9
,
(
byte
*
)
"groupings"
,
c_groupings
},
{
9
,
(
byte
*
)
"stringdef"
,
c_stringdef
},
{
9
,
(
byte
*
)
"substring"
,
c_substring
},
{
12
,
(
byte
*
)
"backwardmode"
,
c_backwardmode
},
{
13
,
(
byte
*
)
"stringescapes"
,
c_stringescapes
}
};
libstemmer_c/compiler/syswords2.h
deleted
100644 → 0
View file @
1c6e8c67
c_among
=
4
,
c_and
,
c_as
,
c_assign
,
c_assignto
,
c_atleast
,
c_atlimit
,
c_atmark
,
c_attach
,
c_backwardmode
,
c_backwards
,
c_booleans
,
c_bra
,
c_comment1
,
c_comment2
,
c_cursor
,
c_debug
,
c_decimal
,
c_define
,
c_delete
,
c_divide
,
c_divideassign
,
c_do
,
c_dollar
,
c_eq
,
c_externals
,
c_fail
,
c_false
,
c_for
,
c_ge
,
c_get
,
c_gopast
,
c_goto
,
c_gr
,
c_groupings
,
c_hex
,
c_hop
,
c_insert
,
c_integers
,
c_ket
,
c_le
,
c_leftslice
,
c_limit
,
c_loop
,
c_ls
,
c_maxint
,
c_minint
,
c_minus
,
c_minusassign
,
c_multiply
,
c_multiplyassign
,
c_ne
,
c_next
,
c_non
,
c_not
,
c_or
,
c_plus
,
c_plusassign
,
c_repeat
,
c_reverse
,
c_rightslice
,
c_routines
,
c_set
,
c_setlimit
,
c_setmark
,
c_size
,
c_sizeof
,
c_slicefrom
,
c_sliceto
,
c_stringdef
,
c_stringescapes
,
c_strings
,
c_substring
,
c_test
,
c_tolimit
,
c_tomark
,
c_true
,
c_try
,
c_unset
,
libstemmer_c/compiler/tokeniser.c
deleted
100644 → 0
View file @
1c6e8c67
#include <stdio.h>
/* stderr etc */
#include <stdlib.h>
/* malloc free */
#include <string.h>
/* strlen */
#include <ctype.h>
/* isalpha etc */
#include "header.h"
struct
system_word
{
int
s_size
;
/* size of system word */
byte
*
s
;
/* pointer to the system word */
int
code
;
/* it's internal code */
};
/* ASCII collating assumed in syswords.c */
#include "syswords.h"
static
int
smaller
(
int
a
,
int
b
)
{
return
a
<
b
?
a
:
b
;
}
extern
symbol
*
get_input
(
symbol
*
p
)
{
char
*
s
=
b_to_s
(
p
);
{
FILE
*
input
=
fopen
(
s
,
"r"
);
free
(
s
);
if
(
input
==
0
)
return
0
;
{
symbol
*
u
=
create_b
(
STARTSIZE
);
int
size
=
0
;
repeat
{
int
ch
=
getc
(
input
);
if
(
ch
==
EOF
)
break
;
if
(
size
>=
CAPACITY
(
u
))
u
=
increase_capacity
(
u
,
size
/
2
);
u
[
size
++
]
=
ch
;
}
fclose
(
input
);
SIZE
(
u
)
=
size
;
return
u
;
}
}
}
static
void
error
(
struct
tokeniser
*
t
,
char
*
s1
,
int
n
,
symbol
*
p
,
char
*
s2
)
{
if
(
t
->
error_count
==
20
)
{
fprintf
(
stderr
,
"... etc
\n
"
);
exit
(
1
);
}
fprintf
(
stderr
,
"Line %d"
,
t
->
line_number
);
if
(
t
->
get_depth
>
0
)
fprintf
(
stderr
,
" (of included file)"
);
fprintf
(
stderr
,
": "
);
unless
(
s1
==
0
)
fprintf
(
stderr
,
"%s"
,
s1
);
unless
(
p
==
0
)
{
int
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
fprintf
(
stderr
,
"%c"
,
p
[
i
]);
}
unless
(
s2
==
0
)
fprintf
(
stderr
,
"%s"
,
s2
);
fprintf
(
stderr
,
"
\n
"
);
t
->
error_count
++
;
}
static
void
error1
(
struct
tokeniser
*
t
,
char
*
s
)
{
error
(
t
,
s
,
0
,
0
,
0
);
}
static
void
error2
(
struct
tokeniser
*
t
,
char
*
s
)
{
error
(
t
,
"unexpected end of text after "
,
0
,
0
,
s
);
}
static
int
compare_words
(
int
m
,
symbol
*
p
,
int
n
,
byte
*
q
)
{
unless
(
m
==
n
)
return
m
-
n
;
{
int
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
int
diff
=
p
[
i
]
-
q
[
i
];
unless
(
diff
==
0
)
return
diff
;
}
}
return
0
;
}
static
int
find_word
(
int
n
,
symbol
*
p
)
{
int
i
=
0
;
int
j
=
vocab
->
code
;
repeat
{
int
k
=
i
+
(
j
-
i
)
/
2
;
struct
system_word
*
w
=
vocab
+
k
;
int
diff
=
compare_words
(
n
,
p
,
w
->
s_size
,
w
->
s
);
if
(
diff
==
0
)
return
w
->
code
;
if
(
diff
<
0
)
j
=
k
;
else
i
=
k
;
if
(
j
-
i
==
1
)
break
;
}
return
-
1
;
}
static
int
get_number
(
int
n
,
symbol
*
p
)
{
int
x
=
0
;
int
i
;
for
(
i
=
0
;
i
<
n
;
i
++
)
x
=
10
*
x
+
p
[
i
]
-
'0'
;
return
x
;
}
static
int
eq_s
(
struct
tokeniser
*
t
,
char
*
s
)
{
int
l
=
strlen
(
s
);
if
(
SIZE
(
t
->
p
)
-
t
->
c
<
l
)
return
false
;
{
int
i
;
for
(
i
=
0
;
i
<
l
;
i
++
)
if
(
t
->
p
[
t
->
c
+
i
]
!=
s
[
i
])
return
false
;
}
t
->
c
+=
l
;
return
true
;
}
static
int
white_space
(
struct
tokeniser
*
t
,
int
ch
)
{
switch
(
ch
)
{
case
'\n'
:
t
->
line_number
++
;
case
'\r'
:
case
'\t'
:
case
' '
:
return
true
;
}
return
false
;
}
static
symbol
*
find_in_m
(
struct
tokeniser
*
t
,
int
n
,
symbol
*
p
)
{
struct
m_pair
*
q
=
t
->
m_pairs
;
repeat
{
if
(
q
==
0
)
return
0
;
{
symbol
*
name
=
q
->
name
;
if
(
n
==
SIZE
(
name
)
&&
memcmp
(
name
,
p
,
n
*
sizeof
(
symbol
))
==
0
)
return
q
->
value
;
}
q
=
q
->
next
;
}
}
static
int
read_literal_string
(
struct
tokeniser
*
t
,
int
c
)
{
symbol
*
p
=
t
->
p
;
int
ch
;
SIZE
(
t
->
b
)
=
0
;
repeat
{
if
(
c
>=
SIZE
(
p
))
{
error2
(
t
,
"'"
);
return
c
;
}
ch
=
p
[
c
];
if
(
ch
==
'\n'
)
{
error1
(
t
,
"string not terminated"
);
return
c
;
}
c
++
;
if
(
ch
==
t
->
m_start
)
{
int
c0
=
c
;
int
newlines
=
false
;
/* no newlines as yet */
int
black_found
=
false
;
/* no printing chars as yet */
repeat
{
if
(
c
>=
SIZE
(
p
))
{
error2
(
t
,
"'"
);
return
c
;
}
ch
=
p
[
c
];
c
++
;
if
(
ch
==
t
->
m_end
)
break
;
unless
(
white_space
(
t
,
ch
))
black_found
=
true
;
if
(
ch
==
'\n'
)
newlines
=
true
;
if
(
newlines
&&
black_found
)
{
error1
(
t
,
"string not terminated"
);
return
c
;
}
}
unless
(
newlines
)
{
int
n
=
c
-
c0
-
1
;
/* macro size */
int
firstch
=
p
[
c0
];
symbol
*
q
=
find_in_m
(
t
,
n
,
p
+
c0
);
if
(
q
==
0
)
{
if
(
n
==
1
&&
(
firstch
==
'\''
||
firstch
==
t
->
m_start
))
t
->
b
=
add_to_b
(
t
->
b
,
1
,
p
+
c0
);
else
error
(
t
,
"string macro '"
,
n
,
p
+
c0
,
"' undeclared"
);
}
else
t
->
b
=
add_to_b
(
t
->
b
,
SIZE
(
q
),
q
);
}
}
else
{
if
(
ch
==
'\''
)
return
c
;
t
->
b
=
add_to_b
(
t
->
b
,
1
,
p
+
c
-
1
);
}
}
}
static
int
next_token
(
struct
tokeniser
*
t
)
{
symbol
*
p
=
t
->
p
;
int
c
=
t
->
c
;
int
ch
;
int
code
=
-
1
;
repeat
{
if
(
c
>=
SIZE
(
p
))
{
t
->
c
=
c
;
return
-
1
;
}
ch
=
p
[
c
];
if
(
white_space
(
t
,
ch
))
{
c
++
;
continue
;
}
if
(
isalpha
(
ch
))
{
int
c0
=
c
;
while
(
c
<
SIZE
(
p
)
&&
(
isalnum
(
p
[
c
])
||
p
[
c
]
==
'_'
))
c
++
;
code
=
find_word
(
c
-
c0
,
p
+
c0
);
if
(
code
<
0
)
{
t
->
b
=
move_to_b
(
t
->
b
,
c
-
c0
,
p
+
c0
);
code
=
c_name
;
}
}
else
if
(
isdigit
(
ch
))
{
int
c0
=
c
;
while
(
c
<
SIZE
(
p
)
&&
isdigit
(
p
[
c
]))
c
++
;
t
->
number
=
get_number
(
c
-
c0
,
p
+
c0
);
code
=
c_number
;
}
else
if
(
ch
==
'\''
)
{
c
=
read_literal_string
(
t
,
c
+
1
);
code
=
c_literalstring
;
}
else
{
int
lim
=
smaller
(
2
,
SIZE
(
p
)
-
c
);
int
i
;
for
(
i
=
lim
;
i
>
0
;
i
--
)
{
code
=
find_word
(
i
,
p
+
c
);
if
(
code
>=
0
)
{
c
+=
i
;
break
;
}
}
}
if
(
code
>=
0
)
{
t
->
c
=
c
;
return
code
;
}
error
(
t
,
"'"
,
1
,
p
+
c
,
"' unknown"
);
c
++
;
continue
;
}
}
static
int
next_char
(
struct
tokeniser
*
t
)
{
if
(
t
->
c
>=
SIZE
(
t
->
p
))
return
-
1
;
return
t
->
p
[
t
->
c
++
];
}
static
int
next_real_char
(
struct
tokeniser
*
t
)
{
repeat
{
int
ch
=
next_char
(
t
);
if
(
white_space
(
t
,
ch
))
continue
;
return
ch
;
}
}
static
void
read_chars
(
struct
tokeniser
*
t
)
{
int
ch
=
next_real_char
(
t
);
if
(
ch
<
0
)
{
error2
(
t
,
"stringdef"
);
return
;
}
{
int
c0
=
t
->
c
-
1
;
repeat
{
ch
=
next_char
(
t
);
if
(
white_space
(
t
,
ch
)
||
ch
<
0
)
break
;
}
t
->
b2
=
move_to_b
(
t
->
b2
,
t
->
c
-
c0
-
1
,
t
->
p
+
c0
);
}
}
static
int
decimal_to_num
(
int
ch
)
{
if
(
'0'
<=
ch
&&
ch
<=
'9'
)
return
ch
-
'0'
;
return
-
1
;
}
static
int
hex_to_num
(
int
ch
)
{
if
(
'0'
<=
ch
&&
ch
<=
'9'
)
return
ch
-
'0'
;
if
(
'a'
<=
ch
&&
ch
<=
'f'
)
return
ch
-
'a'
+
10
;
return
-
1
;
}
static
void
convert_numeric_string
(
struct
tokeniser
*
t
,
symbol
*
p
,
int
base
)
{
int
c
=
0
;
int
d
=
0
;
repeat
{
while
(
c
<
SIZE
(
p
)
&&
p
[
c
]
==
' '
)
c
++
;
if
(
c
==
SIZE
(
p
))
break
;
{
int
number
=
0
;
repeat
{
int
ch
=
p
[
c
];
if
(
c
==
SIZE
(
p
)
||
ch
==
' '
)
break
;
if
(
base
==
10
)
{
ch
=
decimal_to_num
(
ch
);
if
(
ch
<
0
)
{
error1
(
t
,
"decimal string contains non-digits"
);
return
;
}
}
else
{
ch
=
hex_to_num
(
tolower
(
ch
));
if
(
ch
<
0
)
{
error1
(
t
,
"hex string contains non-hex characters"
);
return
;
}
}
number
=
base
*
number
+
ch
;
c
++
;
}
if
(
t
->
widechars
||
t
->
utf8
)
{
unless
(
0
<=
number
&&
number
<=
0xffff
)
{
error1
(
t
,
"character values exceed 64K"
);
return
;
}
}
else
{
unless
(
0
<=
number
&&
number
<=
0xff
)
{
error1
(
t
,
"character values exceed 256"
);
return
;
}
}
if
(
t
->
utf8
)
d
+=
put_utf8
(
number
,
p
+
d
);
else
p
[
d
++
]
=
number
;
}
}
SIZE
(
p
)
=
d
;
}
extern
int
read_token
(
struct
tokeniser
*
t
)
{
symbol
*
p
=
t
->
p
;
int
held
=
t
->
token_held
;
t
->
token_held
=
false
;
if
(
held
)
return
t
->
token
;
repeat
{
int
code
=
next_token
(
t
);
switch
(
code
)
{
case
c_comment1
:
/* slash-slash comment */
while
(
t
->
c
<
SIZE
(
p
)
&&
p
[
t
->
c
]
!=
'\n'
)
t
->
c
++
;
continue
;
case
c_comment2
:
/* slash-star comment */
repeat
{
if
(
t
->
c
>=
SIZE
(
p
))
{
error1
(
t
,
"/* comment not terminated"
);
t
->
token
=
-
1
;
return
-
1
;
}
if
(
p
[
t
->
c
]
==
'\n'
)
t
->
line_number
++
;
if
(
eq_s
(
t
,
"*/"
))
break
;
t
->
c
++
;
}
continue
;
case
c_stringescapes
:
{
int
ch1
=
next_real_char
(
t
);
int
ch2
=
next_real_char
(
t
);
if
(
ch2
<
0
)
{
error2
(
t
,
"stringescapes"
);
continue
;
}
if
(
ch1
==
'\''
)
{
error1
(
t
,
"first stringescape cannot be '"
);
continue
;
}
t
->
m_start
=
ch1
;
t
->
m_end
=
ch2
;
}
continue
;
case
c_stringdef
:
{
int
base
=
0
;
read_chars
(
t
);
code
=
read_token
(
t
);
if
(
code
==
c_hex
)
{
base
=
16
;
code
=
read_token
(
t
);
}
else
if
(
code
==
c_decimal
)
{
base
=
10
;
code
=
read_token
(
t
);
}
unless
(
code
==
c_literalstring
)
{
error1
(
t
,
"string omitted after stringdef"
);
continue
;
}
if
(
base
>
0
)
convert_numeric_string
(
t
,
t
->
b
,
base
);
{
NEW
(
m_pair
,
q
);
q
->
next
=
t
->
m_pairs
;
q
->
name
=
copy_b
(
t
->
b2
);
q
->
value
=
copy_b
(
t
->
b
);
t
->
m_pairs
=
q
;
}
}
continue
;
case
c_get
:
code
=
read_token
(
t
);
unless
(
code
==
c_literalstring
)
{
error1
(
t
,
"string omitted after get"
);
continue
;
}
t
->
get_depth
++
;
if
(
t
->
get_depth
>
10
)
{
fprintf
(
stderr
,
"get directives go 10 deep. Looping?
\n
"
);
exit
(
1
);
}
{
NEW
(
input
,
q
);
symbol
*
u
=
get_input
(
t
->
b
);
if
(
u
==
0
)
{
struct
include
*
r
=
t
->
includes
;
until
(
r
==
0
)
{
symbol
*
b
=
copy_b
(
r
->
b
);
b
=
add_to_b
(
b
,
SIZE
(
t
->
b
),
t
->
b
);
u
=
get_input
(
b
);
lose_b
(
b
);
unless
(
u
==
0
)
break
;
r
=
r
->
next
;
}
}
if
(
u
==
0
)
{
error
(
t
,
"Can't get '"
,
SIZE
(
t
->
b
),
t
->
b
,
"'"
);
exit
(
1
);
}
memmove
(
q
,
t
,
sizeof
(
struct
input
));
t
->
next
=
q
;
t
->
p
=
u
;
t
->
c
=
0
;
t
->
line_number
=
1
;
}
p
=
t
->
p
;
continue
;
case
-
1
:
unless
(
t
->
next
==
0
)
{
lose_b
(
p
);
{
struct
input
*
q
=
t
->
next
;
memmove
(
t
,
q
,
sizeof
(
struct
input
));
p
=
t
->
p
;
FREE
(
q
);
}
t
->
get_depth
--
;
continue
;
}
/* drop through */
default:
t
->
previous_token
=
t
->
token
;
t
->
token
=
code
;
return
code
;
}
}
}
extern
byte
*
name_of_token
(
int
code
)
{
int
i
;
for
(
i
=
1
;
i
<
vocab
->
code
;
i
++
)
if
((
vocab
+
i
)
->
code
==
code
)
return
(
vocab
+
i
)
->
s
;
switch
(
code
)
{
case
c_mathassign
:
return
(
byte
*
)
"="
;
case
c_name
:
return
(
byte
*
)
"name"
;
case
c_number
:
return
(
byte
*
)
"number"
;
case
c_literalstring
:
return
(
byte
*
)
"literal"
;
case
c_neg
:
return
(
byte
*
)
"neg"
;
case
c_grouping
:
return
(
byte
*
)
"grouping"
;
case
c_call
:
return
(
byte
*
)
"call"
;
case
c_booltest
:
return
(
byte
*
)
"Boolean test"
;
case
-
2
:
return
(
byte
*
)
"start of text"
;
case
-
1
:
return
(
byte
*
)
"end of text"
;
default:
return
(
byte
*
)
"?"
;
}
}
extern
struct
tokeniser
*
create_tokeniser
(
symbol
*
p
)
{
NEW
(
tokeniser
,
t
);
t
->
next
=
0
;
t
->
p
=
p
;
t
->
c
=
0
;
t
->
line_number
=
1
;
t
->
b
=
create_b
(
0
);
t
->
b2
=
create_b
(
0
);
t
->
m_start
=
-
1
;
t
->
m_pairs
=
0
;
t
->
get_depth
=
0
;
t
->
error_count
=
0
;
t
->
token_held
=
false
;
t
->
token
=
-
2
;
t
->
previous_token
=
-
2
;
return
t
;
}
extern
void
close_tokeniser
(
struct
tokeniser
*
t
)
{
lose_b
(
t
->
b
);
lose_b
(
t
->
b2
);
{
struct
m_pair
*
q
=
t
->
m_pairs
;
until
(
q
==
0
)
{
struct
m_pair
*
q_next
=
q
->
next
;
lose_b
(
q
->
name
);
lose_b
(
q
->
value
);
FREE
(
q
);
q
=
q_next
;
}
}
{
struct
input
*
q
=
t
->
next
;
until
(
q
==
0
)
{
struct
input
*
q_next
=
q
->
next
;
FREE
(
q
);
q
=
q_next
;
}
}
FREE
(
t
);
}
libstemmer_c/libstemmer/libstemmer.c
View file @
c825e3d9
...
@@ -36,9 +36,8 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
...
@@ -36,9 +36,8 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
{
{
stemmer_encoding_t
enc
;
stemmer_encoding_t
enc
;
struct
stemmer_modules
*
module
;
struct
stemmer_modules
*
module
;
struct
sb_stemmer
*
stemmer
=
struct
sb_stemmer
*
stemmer
;
(
struct
sb_stemmer
*
)
malloc
(
sizeof
(
struct
sb_stemmer
));
if
(
stemmer
==
NULL
)
return
NULL
;
enc
=
sb_getenc
(
charenc
);
enc
=
sb_getenc
(
charenc
);
if
(
enc
==
ENC_UNKNOWN
)
return
NULL
;
if
(
enc
==
ENC_UNKNOWN
)
return
NULL
;
...
@@ -47,6 +46,9 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
...
@@ -47,6 +46,9 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
}
}
if
(
module
->
name
==
NULL
)
return
NULL
;
if
(
module
->
name
==
NULL
)
return
NULL
;
stemmer
=
(
struct
sb_stemmer
*
)
malloc
(
sizeof
(
struct
sb_stemmer
));
if
(
stemmer
==
NULL
)
return
NULL
;
stemmer
->
create
=
module
->
create
;
stemmer
->
create
=
module
->
create
;
stemmer
->
close
=
module
->
close
;
stemmer
->
close
=
module
->
close
;
stemmer
->
stem
=
module
->
stem
;
stemmer
->
stem
=
module
->
stem
;
...
...
libstemmer_c/libstemmer/libstemmer_utf8.c
View file @
c825e3d9
...
@@ -36,9 +36,8 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
...
@@ -36,9 +36,8 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
{
{
stemmer_encoding_t
enc
;
stemmer_encoding_t
enc
;
struct
stemmer_modules
*
module
;
struct
stemmer_modules
*
module
;
struct
sb_stemmer
*
stemmer
=
struct
sb_stemmer
*
stemmer
;
(
struct
sb_stemmer
*
)
malloc
(
sizeof
(
struct
sb_stemmer
));
if
(
stemmer
==
NULL
)
return
NULL
;
enc
=
sb_getenc
(
charenc
);
enc
=
sb_getenc
(
charenc
);
if
(
enc
==
ENC_UNKNOWN
)
return
NULL
;
if
(
enc
==
ENC_UNKNOWN
)
return
NULL
;
...
@@ -47,6 +46,9 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
...
@@ -47,6 +46,9 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
}
}
if
(
module
->
name
==
NULL
)
return
NULL
;
if
(
module
->
name
==
NULL
)
return
NULL
;
stemmer
=
(
struct
sb_stemmer
*
)
malloc
(
sizeof
(
struct
sb_stemmer
));
if
(
stemmer
==
NULL
)
return
NULL
;
stemmer
->
create
=
module
->
create
;
stemmer
->
create
=
module
->
create
;
stemmer
->
close
=
module
->
close
;
stemmer
->
close
=
module
->
close
;
stemmer
->
stem
=
module
->
stem
;
stemmer
->
stem
=
module
->
stem
;
...
...
libstemmer_c/libstemmer/modules.h
View file @
c825e3d9
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
#include "../src_c/stem_UTF_8_french.h"
#include "../src_c/stem_UTF_8_french.h"
#include "../src_c/stem_ISO_8859_1_german.h"
#include "../src_c/stem_ISO_8859_1_german.h"
#include "../src_c/stem_UTF_8_german.h"
#include "../src_c/stem_UTF_8_german.h"
#include "../src_c/stem_ISO_8859_
1
_hungarian.h"
#include "../src_c/stem_ISO_8859_
2
_hungarian.h"
#include "../src_c/stem_UTF_8_hungarian.h"
#include "../src_c/stem_UTF_8_hungarian.h"
#include "../src_c/stem_ISO_8859_1_italian.h"
#include "../src_c/stem_ISO_8859_1_italian.h"
#include "../src_c/stem_UTF_8_italian.h"
#include "../src_c/stem_UTF_8_italian.h"
...
@@ -110,11 +110,11 @@ static struct stemmer_modules modules[] = {
...
@@ -110,11 +110,11 @@ static struct stemmer_modules modules[] = {
{
"ger"
,
ENC_UTF_8
,
german_UTF_8_create_env
,
german_UTF_8_close_env
,
german_UTF_8_stem
},
{
"ger"
,
ENC_UTF_8
,
german_UTF_8_create_env
,
german_UTF_8_close_env
,
german_UTF_8_stem
},
{
"german"
,
ENC_ISO_8859_1
,
german_ISO_8859_1_create_env
,
german_ISO_8859_1_close_env
,
german_ISO_8859_1_stem
},
{
"german"
,
ENC_ISO_8859_1
,
german_ISO_8859_1_create_env
,
german_ISO_8859_1_close_env
,
german_ISO_8859_1_stem
},
{
"german"
,
ENC_UTF_8
,
german_UTF_8_create_env
,
german_UTF_8_close_env
,
german_UTF_8_stem
},
{
"german"
,
ENC_UTF_8
,
german_UTF_8_create_env
,
german_UTF_8_close_env
,
german_UTF_8_stem
},
{
"hu"
,
ENC_ISO_8859_
1
,
hungarian_ISO_8859_1_create_env
,
hungarian_ISO_8859_1_close_env
,
hungarian_ISO_8859_1
_stem
},
{
"hu"
,
ENC_ISO_8859_
2
,
hungarian_ISO_8859_2_create_env
,
hungarian_ISO_8859_2_close_env
,
hungarian_ISO_8859_2
_stem
},
{
"hu"
,
ENC_UTF_8
,
hungarian_UTF_8_create_env
,
hungarian_UTF_8_close_env
,
hungarian_UTF_8_stem
},
{
"hu"
,
ENC_UTF_8
,
hungarian_UTF_8_create_env
,
hungarian_UTF_8_close_env
,
hungarian_UTF_8_stem
},
{
"hun"
,
ENC_ISO_8859_
1
,
hungarian_ISO_8859_1_create_env
,
hungarian_ISO_8859_1_close_env
,
hungarian_ISO_8859_1
_stem
},
{
"hun"
,
ENC_ISO_8859_
2
,
hungarian_ISO_8859_2_create_env
,
hungarian_ISO_8859_2_close_env
,
hungarian_ISO_8859_2
_stem
},
{
"hun"
,
ENC_UTF_8
,
hungarian_UTF_8_create_env
,
hungarian_UTF_8_close_env
,
hungarian_UTF_8_stem
},
{
"hun"
,
ENC_UTF_8
,
hungarian_UTF_8_create_env
,
hungarian_UTF_8_close_env
,
hungarian_UTF_8_stem
},
{
"hungarian"
,
ENC_ISO_8859_
1
,
hungarian_ISO_8859_1_create_env
,
hungarian_ISO_8859_1_close_env
,
hungarian_ISO_8859_1
_stem
},
{
"hungarian"
,
ENC_ISO_8859_
2
,
hungarian_ISO_8859_2_create_env
,
hungarian_ISO_8859_2_close_env
,
hungarian_ISO_8859_2
_stem
},
{
"hungarian"
,
ENC_UTF_8
,
hungarian_UTF_8_create_env
,
hungarian_UTF_8_close_env
,
hungarian_UTF_8_stem
},
{
"hungarian"
,
ENC_UTF_8
,
hungarian_UTF_8_create_env
,
hungarian_UTF_8_close_env
,
hungarian_UTF_8_stem
},
{
"it"
,
ENC_ISO_8859_1
,
italian_ISO_8859_1_create_env
,
italian_ISO_8859_1_close_env
,
italian_ISO_8859_1_stem
},
{
"it"
,
ENC_ISO_8859_1
,
italian_ISO_8859_1_create_env
,
italian_ISO_8859_1_close_env
,
italian_ISO_8859_1_stem
},
{
"it"
,
ENC_UTF_8
,
italian_UTF_8_create_env
,
italian_UTF_8_close_env
,
italian_UTF_8_stem
},
{
"it"
,
ENC_UTF_8
,
italian_UTF_8_create_env
,
italian_UTF_8_close_env
,
italian_UTF_8_stem
},
...
...
libstemmer_c/libstemmer/modules.txt
View file @
c825e3d9
...
@@ -15,7 +15,7 @@ english UTF_8,ISO_8859_1 english,en,eng
...
@@ -15,7 +15,7 @@ english UTF_8,ISO_8859_1 english,en,eng
finnish UTF_8,ISO_8859_1 finnish,fi,fin
finnish UTF_8,ISO_8859_1 finnish,fi,fin
french UTF_8,ISO_8859_1 french,fr,fre,fra
french UTF_8,ISO_8859_1 french,fr,fre,fra
german UTF_8,ISO_8859_1 german,de,ger,deu
german UTF_8,ISO_8859_1 german,de,ger,deu
hungarian UTF_8,ISO_8859_
1
hungarian,hu,hun
hungarian UTF_8,ISO_8859_
2
hungarian,hu,hun
italian UTF_8,ISO_8859_1 italian,it,ita
italian UTF_8,ISO_8859_1 italian,it,ita
norwegian UTF_8,ISO_8859_1 norwegian,no,nor
norwegian UTF_8,ISO_8859_1 norwegian,no,nor
portuguese UTF_8,ISO_8859_1 portuguese,pt,por
portuguese UTF_8,ISO_8859_1 portuguese,pt,por
...
...
libstemmer_c/mkinc.mak
View file @
c825e3d9
...
@@ -20,7 +20,7 @@ snowball_sources= \
...
@@ -20,7 +20,7 @@ snowball_sources= \
src_c/stem_UTF_8_french.c
\
src_c/stem_UTF_8_french.c
\
src_c/stem_ISO_8859_1_german.c
\
src_c/stem_ISO_8859_1_german.c
\
src_c/stem_UTF_8_german.c
\
src_c/stem_UTF_8_german.c
\
src_c/stem_ISO_8859_
1
_hungarian.c
\
src_c/stem_ISO_8859_
2
_hungarian.c
\
src_c/stem_UTF_8_hungarian.c
\
src_c/stem_UTF_8_hungarian.c
\
src_c/stem_ISO_8859_1_italian.c
\
src_c/stem_ISO_8859_1_italian.c
\
src_c/stem_UTF_8_italian.c
\
src_c/stem_UTF_8_italian.c
\
...
@@ -56,7 +56,7 @@ snowball_headers= \
...
@@ -56,7 +56,7 @@ snowball_headers= \
src_c/stem_UTF_8_french.h
\
src_c/stem_UTF_8_french.h
\
src_c/stem_ISO_8859_1_german.h
\
src_c/stem_ISO_8859_1_german.h
\
src_c/stem_UTF_8_german.h
\
src_c/stem_UTF_8_german.h
\
src_c/stem_ISO_8859_
1
_hungarian.h
\
src_c/stem_ISO_8859_
2
_hungarian.h
\
src_c/stem_UTF_8_hungarian.h
\
src_c/stem_UTF_8_hungarian.h
\
src_c/stem_ISO_8859_1_italian.h
\
src_c/stem_ISO_8859_1_italian.h
\
src_c/stem_UTF_8_italian.h
\
src_c/stem_UTF_8_italian.h
\
...
...
libstemmer_c/src_c/stem_ISO_8859_1_german.c
View file @
c825e3d9
...
@@ -54,13 +54,13 @@ static const symbol s_1_6[2] = { 'e', 's' };
...
@@ -54,13 +54,13 @@ static const symbol s_1_6[2] = { 'e', 's' };
static
const
struct
among
a_1
[
7
]
=
static
const
struct
among
a_1
[
7
]
=
{
{
/* 0 */
{
1
,
s_1_0
,
-
1
,
1
,
0
},
/* 0 */
{
1
,
s_1_0
,
-
1
,
2
,
0
},
/* 1 */
{
2
,
s_1_1
,
-
1
,
1
,
0
},
/* 1 */
{
2
,
s_1_1
,
-
1
,
1
,
0
},
/* 2 */
{
2
,
s_1_2
,
-
1
,
1
,
0
},
/* 2 */
{
2
,
s_1_2
,
-
1
,
2
,
0
},
/* 3 */
{
3
,
s_1_3
,
-
1
,
1
,
0
},
/* 3 */
{
3
,
s_1_3
,
-
1
,
1
,
0
},
/* 4 */
{
2
,
s_1_4
,
-
1
,
1
,
0
},
/* 4 */
{
2
,
s_1_4
,
-
1
,
1
,
0
},
/* 5 */
{
1
,
s_1_5
,
-
1
,
2
,
0
},
/* 5 */
{
1
,
s_1_5
,
-
1
,
3
,
0
},
/* 6 */
{
2
,
s_1_6
,
5
,
1
,
0
}
/* 6 */
{
2
,
s_1_6
,
5
,
2
,
0
}
};
};
static
const
symbol
s_2_0
[
2
]
=
{
'e'
,
'n'
};
static
const
symbol
s_2_0
[
2
]
=
{
'e'
,
'n'
};
...
@@ -123,28 +123,30 @@ static const symbol s_7[] = { 'u' };
...
@@ -123,28 +123,30 @@ static const symbol s_7[] = { 'u' };
static
const
symbol
s_8
[]
=
{
'a'
};
static
const
symbol
s_8
[]
=
{
'a'
};
static
const
symbol
s_9
[]
=
{
'o'
};
static
const
symbol
s_9
[]
=
{
'o'
};
static
const
symbol
s_10
[]
=
{
'u'
};
static
const
symbol
s_10
[]
=
{
'u'
};
static
const
symbol
s_11
[]
=
{
'i'
,
'g'
};
static
const
symbol
s_11
[]
=
{
's'
};
static
const
symbol
s_12
[]
=
{
'e'
};
static
const
symbol
s_12
[]
=
{
'n'
,
'i'
,
's'
};
static
const
symbol
s_13
[]
=
{
'e'
};
static
const
symbol
s_13
[]
=
{
'i'
,
'g'
};
static
const
symbol
s_14
[]
=
{
'e'
,
'r'
};
static
const
symbol
s_14
[]
=
{
'e'
};
static
const
symbol
s_15
[]
=
{
'e'
,
'n'
};
static
const
symbol
s_15
[]
=
{
'e'
};
static
const
symbol
s_16
[]
=
{
'e'
,
'r'
};
static
const
symbol
s_17
[]
=
{
'e'
,
'n'
};
static
int
r_prelude
(
struct
SN_env
*
z
)
{
static
int
r_prelude
(
struct
SN_env
*
z
)
{
{
int
c_test
=
z
->
c
;
/* test, line 3
0
*/
{
int
c_test
=
z
->
c
;
/* test, line 3
5
*/
while
(
1
)
{
/* repeat, line 3
0
*/
while
(
1
)
{
/* repeat, line 3
5
*/
int
c1
=
z
->
c
;
int
c1
=
z
->
c
;
{
int
c2
=
z
->
c
;
/* or, line 3
3
*/
{
int
c2
=
z
->
c
;
/* or, line 3
8
*/
z
->
bra
=
z
->
c
;
/* [, line 3
2
*/
z
->
bra
=
z
->
c
;
/* [, line 3
7
*/
if
(
!
(
eq_s
(
z
,
1
,
s_0
)))
goto
lab2
;
if
(
!
(
eq_s
(
z
,
1
,
s_0
)))
goto
lab2
;
z
->
ket
=
z
->
c
;
/* ], line 3
2
*/
z
->
ket
=
z
->
c
;
/* ], line 3
7
*/
{
int
ret
=
slice_from_s
(
z
,
2
,
s_1
);
/* <-, line 3
2
*/
{
int
ret
=
slice_from_s
(
z
,
2
,
s_1
);
/* <-, line 3
7
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab1
;
goto
lab1
;
lab2:
lab2:
z
->
c
=
c2
;
z
->
c
=
c2
;
if
(
z
->
c
>=
z
->
l
)
goto
lab0
;
if
(
z
->
c
>=
z
->
l
)
goto
lab0
;
z
->
c
++
;
/* next, line 3
3
*/
z
->
c
++
;
/* next, line 3
8
*/
}
}
lab1:
lab1:
continue
;
continue
;
...
@@ -154,26 +156,26 @@ static int r_prelude(struct SN_env * z) {
...
@@ -154,26 +156,26 @@ static int r_prelude(struct SN_env * z) {
}
}
z
->
c
=
c_test
;
z
->
c
=
c_test
;
}
}
while
(
1
)
{
/* repeat, line
36
*/
while
(
1
)
{
/* repeat, line
41
*/
int
c3
=
z
->
c
;
int
c3
=
z
->
c
;
while
(
1
)
{
/* goto, line
36
*/
while
(
1
)
{
/* goto, line
41
*/
int
c4
=
z
->
c
;
int
c4
=
z
->
c
;
if
(
in_grouping
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
if
(
in_grouping
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
z
->
bra
=
z
->
c
;
/* [, line
37
*/
z
->
bra
=
z
->
c
;
/* [, line
42
*/
{
int
c5
=
z
->
c
;
/* or, line
37
*/
{
int
c5
=
z
->
c
;
/* or, line
42
*/
if
(
!
(
eq_s
(
z
,
1
,
s_2
)))
goto
lab6
;
if
(
!
(
eq_s
(
z
,
1
,
s_2
)))
goto
lab6
;
z
->
ket
=
z
->
c
;
/* ], line
37
*/
z
->
ket
=
z
->
c
;
/* ], line
42
*/
if
(
in_grouping
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab6
;
if
(
in_grouping
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab6
;
{
int
ret
=
slice_from_s
(
z
,
1
,
s_3
);
/* <-, line
37
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_3
);
/* <-, line
42
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab5
;
goto
lab5
;
lab6:
lab6:
z
->
c
=
c5
;
z
->
c
=
c5
;
if
(
!
(
eq_s
(
z
,
1
,
s_4
)))
goto
lab4
;
if
(
!
(
eq_s
(
z
,
1
,
s_4
)))
goto
lab4
;
z
->
ket
=
z
->
c
;
/* ], line
38
*/
z
->
ket
=
z
->
c
;
/* ], line
43
*/
if
(
in_grouping
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
if
(
in_grouping
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
{
int
ret
=
slice_from_s
(
z
,
1
,
s_5
);
/* <-, line
38
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_5
);
/* <-, line
43
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
}
}
...
@@ -183,7 +185,7 @@ static int r_prelude(struct SN_env * z) {
...
@@ -183,7 +185,7 @@ static int r_prelude(struct SN_env * z) {
lab4:
lab4:
z
->
c
=
c4
;
z
->
c
=
c4
;
if
(
z
->
c
>=
z
->
l
)
goto
lab3
;
if
(
z
->
c
>=
z
->
l
)
goto
lab3
;
z
->
c
++
;
/* goto, line
36
*/
z
->
c
++
;
/* goto, line
41
*/
}
}
continue
;
continue
;
lab3:
lab3:
...
@@ -196,81 +198,81 @@ static int r_prelude(struct SN_env * z) {
...
@@ -196,81 +198,81 @@ static int r_prelude(struct SN_env * z) {
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
z
->
I
[
0
]
=
z
->
l
;
z
->
I
[
0
]
=
z
->
l
;
z
->
I
[
1
]
=
z
->
l
;
z
->
I
[
1
]
=
z
->
l
;
{
int
c_test
=
z
->
c
;
/* test, line
47
*/
{
int
c_test
=
z
->
c
;
/* test, line
52
*/
{
int
ret
=
z
->
c
+
3
;
{
int
ret
=
z
->
c
+
3
;
if
(
0
>
ret
||
ret
>
z
->
l
)
return
0
;
if
(
0
>
ret
||
ret
>
z
->
l
)
return
0
;
z
->
c
=
ret
;
/* hop, line
47
*/
z
->
c
=
ret
;
/* hop, line
52
*/
}
}
z
->
I
[
2
]
=
z
->
c
;
/* setmark x, line
47
*/
z
->
I
[
2
]
=
z
->
c
;
/* setmark x, line
52
*/
z
->
c
=
c_test
;
z
->
c
=
c_test
;
}
}
{
/* gopast */
/* grouping v, line
49
*/
{
/* gopast */
/* grouping v, line
54
*/
int
ret
=
out_grouping
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
out_grouping
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
{
/* gopast */
/* non v, line
49
*/
{
/* gopast */
/* non v, line
54
*/
int
ret
=
in_grouping
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
in_grouping
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
z
->
I
[
0
]
=
z
->
c
;
/* setmark p1, line
49
*/
z
->
I
[
0
]
=
z
->
c
;
/* setmark p1, line
54
*/
/* try, line 5
0
*/
/* try, line 5
5
*/
if
(
!
(
z
->
I
[
0
]
<
z
->
I
[
2
]))
goto
lab0
;
if
(
!
(
z
->
I
[
0
]
<
z
->
I
[
2
]))
goto
lab0
;
z
->
I
[
0
]
=
z
->
I
[
2
];
z
->
I
[
0
]
=
z
->
I
[
2
];
lab0:
lab0:
{
/* gopast */
/* grouping v, line 5
1
*/
{
/* gopast */
/* grouping v, line 5
6
*/
int
ret
=
out_grouping
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
out_grouping
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
{
/* gopast */
/* non v, line 5
1
*/
{
/* gopast */
/* non v, line 5
6
*/
int
ret
=
in_grouping
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
in_grouping
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
z
->
I
[
1
]
=
z
->
c
;
/* setmark p2, line 5
1
*/
z
->
I
[
1
]
=
z
->
c
;
/* setmark p2, line 5
6
*/
return
1
;
return
1
;
}
}
static
int
r_postlude
(
struct
SN_env
*
z
)
{
static
int
r_postlude
(
struct
SN_env
*
z
)
{
int
among_var
;
int
among_var
;
while
(
1
)
{
/* repeat, line
55
*/
while
(
1
)
{
/* repeat, line
60
*/
int
c1
=
z
->
c
;
int
c1
=
z
->
c
;
z
->
bra
=
z
->
c
;
/* [, line
57
*/
z
->
bra
=
z
->
c
;
/* [, line
62
*/
among_var
=
find_among
(
z
,
a_0
,
6
);
/* substring, line
57
*/
among_var
=
find_among
(
z
,
a_0
,
6
);
/* substring, line
62
*/
if
(
!
(
among_var
))
goto
lab0
;
if
(
!
(
among_var
))
goto
lab0
;
z
->
ket
=
z
->
c
;
/* ], line
57
*/
z
->
ket
=
z
->
c
;
/* ], line
62
*/
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab0
;
case
0
:
goto
lab0
;
case
1
:
case
1
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_6
);
/* <-, line
58
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_6
);
/* <-, line
63
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_7
);
/* <-, line
59
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_7
);
/* <-, line
64
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
3
:
case
3
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_8
);
/* <-, line 6
0
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_8
);
/* <-, line 6
5
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
4
:
case
4
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_9
);
/* <-, line 6
1
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_9
);
/* <-, line 6
6
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
5
:
case
5
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_10
);
/* <-, line 6
2
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_10
);
/* <-, line 6
7
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
6
:
case
6
:
if
(
z
->
c
>=
z
->
l
)
goto
lab0
;
if
(
z
->
c
>=
z
->
l
)
goto
lab0
;
z
->
c
++
;
/* next, line 6
3
*/
z
->
c
++
;
/* next, line 6
8
*/
break
;
break
;
}
}
continue
;
continue
;
...
@@ -293,26 +295,42 @@ static int r_R2(struct SN_env * z) {
...
@@ -293,26 +295,42 @@ static int r_R2(struct SN_env * z) {
static
int
r_standard_suffix
(
struct
SN_env
*
z
)
{
static
int
r_standard_suffix
(
struct
SN_env
*
z
)
{
int
among_var
;
int
among_var
;
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* do, line 7
4
*/
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* do, line 7
9
*/
z
->
ket
=
z
->
c
;
/* [, line
75
*/
z
->
ket
=
z
->
c
;
/* [, line
80
*/
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
811040
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab0
;
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
811040
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab0
;
among_var
=
find_among_b
(
z
,
a_1
,
7
);
/* substring, line
75
*/
among_var
=
find_among_b
(
z
,
a_1
,
7
);
/* substring, line
80
*/
if
(
!
(
among_var
))
goto
lab0
;
if
(
!
(
among_var
))
goto
lab0
;
z
->
bra
=
z
->
c
;
/* ], line
75
*/
z
->
bra
=
z
->
c
;
/* ], line
80
*/
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line
75
*/
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line
80
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab0
;
case
0
:
goto
lab0
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line
77
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
82
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 85 */
if
(
ret
<
0
)
return
ret
;
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 86 */
z
->
ket
=
z
->
c
;
/* [, line 86 */
if
(
!
(
eq_s_b
(
z
,
1
,
s_11
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab1
;
}
z
->
bra
=
z
->
c
;
/* ], line 86 */
if
(
!
(
eq_s_b
(
z
,
3
,
s_12
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab1
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 86 */
if
(
ret
<
0
)
return
ret
;
}
lab1:
;
}
break
;
case
3
:
if
(
in_grouping_b
(
z
,
g_s_ending
,
98
,
116
,
0
))
goto
lab0
;
if
(
in_grouping_b
(
z
,
g_s_ending
,
98
,
116
,
0
))
goto
lab0
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 8
0
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 8
9
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
...
@@ -320,175 +338,175 @@ static int r_standard_suffix(struct SN_env * z) {
...
@@ -320,175 +338,175 @@ static int r_standard_suffix(struct SN_env * z) {
lab0:
lab0:
z
->
c
=
z
->
l
-
m1
;
z
->
c
=
z
->
l
-
m1
;
}
}
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* do, line
84
*/
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* do, line
93
*/
z
->
ket
=
z
->
c
;
/* [, line
85
*/
z
->
ket
=
z
->
c
;
/* [, line
94
*/
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1327104
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
1
;
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1327104
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
2
;
among_var
=
find_among_b
(
z
,
a_2
,
4
);
/* substring, line
85
*/
among_var
=
find_among_b
(
z
,
a_2
,
4
);
/* substring, line
94
*/
if
(
!
(
among_var
))
goto
lab
1
;
if
(
!
(
among_var
))
goto
lab
2
;
z
->
bra
=
z
->
c
;
/* ], line
85
*/
z
->
bra
=
z
->
c
;
/* ], line
94
*/
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab
1
;
/* call R1, line 85
*/
if
(
ret
==
0
)
goto
lab
2
;
/* call R1, line 94
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab
1
;
case
0
:
goto
lab
2
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line
87
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
96
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
if
(
in_grouping_b
(
z
,
g_st_ending
,
98
,
116
,
0
))
goto
lab
1
;
if
(
in_grouping_b
(
z
,
g_st_ending
,
98
,
116
,
0
))
goto
lab
2
;
{
int
ret
=
z
->
c
-
3
;
{
int
ret
=
z
->
c
-
3
;
if
(
z
->
lb
>
ret
||
ret
>
z
->
l
)
goto
lab
1
;
if
(
z
->
lb
>
ret
||
ret
>
z
->
l
)
goto
lab
2
;
z
->
c
=
ret
;
/* hop, line 9
0
*/
z
->
c
=
ret
;
/* hop, line 9
9
*/
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 9
0
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 9
9
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
}
}
lab
1
:
lab
2
:
z
->
c
=
z
->
l
-
m2
;
z
->
c
=
z
->
l
-
m2
;
}
}
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line
94
*/
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line
103
*/
z
->
ket
=
z
->
c
;
/* [, line
95
*/
z
->
ket
=
z
->
c
;
/* [, line
104
*/
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1051024
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
2
;
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1051024
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
3
;
among_var
=
find_among_b
(
z
,
a_4
,
8
);
/* substring, line
95
*/
among_var
=
find_among_b
(
z
,
a_4
,
8
);
/* substring, line
104
*/
if
(
!
(
among_var
))
goto
lab
2
;
if
(
!
(
among_var
))
goto
lab
3
;
z
->
bra
=
z
->
c
;
/* ], line
95
*/
z
->
bra
=
z
->
c
;
/* ], line
104
*/
{
int
ret
=
r_R2
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab
2
;
/* call R2, line 95
*/
if
(
ret
==
0
)
goto
lab
3
;
/* call R2, line 104
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab
2
;
case
0
:
goto
lab
3
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line
97
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
106
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line
98
*/
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line
107
*/
z
->
ket
=
z
->
c
;
/* [, line
98
*/
z
->
ket
=
z
->
c
;
/* [, line
107
*/
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
1
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab3
;
}
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
3
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab4
;
}
z
->
bra
=
z
->
c
;
/* ], line
98
*/
z
->
bra
=
z
->
c
;
/* ], line
107
*/
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* not, line
98
*/
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* not, line
107
*/
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
2
)))
goto
lab4
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
4
)))
goto
lab5
;
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
3
;
}
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
4
;
}
lab
4
:
lab
5
:
z
->
c
=
z
->
l
-
m4
;
z
->
c
=
z
->
l
-
m4
;
}
}
{
int
ret
=
r_R2
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
3
;
}
/* call R2, line 98
*/
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
4
;
}
/* call R2, line 107
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line
98
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
107
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab
3
:
lab
4
:
;
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* not, line 1
01
*/
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* not, line 1
10
*/
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
3
)))
goto
lab5
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
5
)))
goto
lab6
;
goto
lab
2
;
goto
lab
3
;
lab
5
:
lab
6
:
z
->
c
=
z
->
l
-
m5
;
z
->
c
=
z
->
l
-
m5
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
01
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
10
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
3
:
case
3
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
04
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
13
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
05
*/
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
14
*/
z
->
ket
=
z
->
c
;
/* [, line 1
06
*/
z
->
ket
=
z
->
c
;
/* [, line 1
15
*/
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* or, line 1
06
*/
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* or, line 1
15
*/
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
4
)))
goto
lab8
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
6
)))
goto
lab9
;
goto
lab
7
;
goto
lab
8
;
lab
8
:
lab
9
:
z
->
c
=
z
->
l
-
m6
;
z
->
c
=
z
->
l
-
m6
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
5
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab6
;
}
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
7
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab7
;
}
}
}
lab
7
:
lab
8
:
z
->
bra
=
z
->
c
;
/* ], line 1
06
*/
z
->
bra
=
z
->
c
;
/* ], line 1
15
*/
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
6
;
}
/* call R1, line 106
*/
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
7
;
}
/* call R1, line 115
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
06
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
15
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab
6
:
lab
7
:
;
;
}
}
break
;
break
;
case
4
:
case
4
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 11
0
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 11
9
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
11
*/
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
20
*/
z
->
ket
=
z
->
c
;
/* [, line 1
12
*/
z
->
ket
=
z
->
c
;
/* [, line 1
21
*/
if
(
z
->
c
-
1
<=
z
->
lb
||
(
z
->
p
[
z
->
c
-
1
]
!=
103
&&
z
->
p
[
z
->
c
-
1
]
!=
104
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
if
(
z
->
c
-
1
<=
z
->
lb
||
(
z
->
p
[
z
->
c
-
1
]
!=
103
&&
z
->
p
[
z
->
c
-
1
]
!=
104
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
among_var
=
find_among_b
(
z
,
a_3
,
2
);
/* substring, line 1
12
*/
among_var
=
find_among_b
(
z
,
a_3
,
2
);
/* substring, line 1
21
*/
if
(
!
(
among_var
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
if
(
!
(
among_var
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
z
->
bra
=
z
->
c
;
/* ], line 1
12
*/
z
->
bra
=
z
->
c
;
/* ], line 1
21
*/
{
int
ret
=
r_R2
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
/* call R2, line 112
*/
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
/* call R2, line 121
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
case
0
:
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
14
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
23
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
}
}
lab
9
:
lab
10
:
;
;
}
}
break
;
break
;
}
}
lab
2
:
lab
3
:
z
->
c
=
z
->
l
-
m3
;
z
->
c
=
z
->
l
-
m3
;
}
}
return
1
;
return
1
;
}
}
extern
int
german_ISO_8859_1_stem
(
struct
SN_env
*
z
)
{
extern
int
german_ISO_8859_1_stem
(
struct
SN_env
*
z
)
{
{
int
c1
=
z
->
c
;
/* do, line 1
25
*/
{
int
c1
=
z
->
c
;
/* do, line 1
34
*/
{
int
ret
=
r_prelude
(
z
);
{
int
ret
=
r_prelude
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call prelude, line 1
25
*/
if
(
ret
==
0
)
goto
lab0
;
/* call prelude, line 1
34
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab0:
lab0:
z
->
c
=
c1
;
z
->
c
=
c1
;
}
}
{
int
c2
=
z
->
c
;
/* do, line 1
26
*/
{
int
c2
=
z
->
c
;
/* do, line 1
35
*/
{
int
ret
=
r_mark_regions
(
z
);
{
int
ret
=
r_mark_regions
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call mark_regions, line 1
26
*/
if
(
ret
==
0
)
goto
lab1
;
/* call mark_regions, line 1
35
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab1:
lab1:
z
->
c
=
c2
;
z
->
c
=
c2
;
}
}
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 1
27
*/
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 1
36
*/
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line 1
28
*/
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line 1
37
*/
{
int
ret
=
r_standard_suffix
(
z
);
{
int
ret
=
r_standard_suffix
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call standard_suffix, line 1
28
*/
if
(
ret
==
0
)
goto
lab2
;
/* call standard_suffix, line 1
37
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab2:
lab2:
z
->
c
=
z
->
l
-
m3
;
z
->
c
=
z
->
l
-
m3
;
}
}
z
->
c
=
z
->
lb
;
z
->
c
=
z
->
lb
;
{
int
c4
=
z
->
c
;
/* do, line 1
29
*/
{
int
c4
=
z
->
c
;
/* do, line 1
38
*/
{
int
ret
=
r_postlude
(
z
);
{
int
ret
=
r_postlude
(
z
);
if
(
ret
==
0
)
goto
lab3
;
/* call postlude, line 1
29
*/
if
(
ret
==
0
)
goto
lab3
;
/* call postlude, line 1
38
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab3:
lab3:
...
...
libstemmer_c/src_c/stem_ISO_8859_
1
_hungarian.c
→
libstemmer_c/src_c/stem_ISO_8859_
2
_hungarian.c
View file @
c825e3d9
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
#endif
#endif
extern
int
hungarian_ISO_8859_
1
_stem
(
struct
SN_env
*
z
);
extern
int
hungarian_ISO_8859_
2
_stem
(
struct
SN_env
*
z
);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
#endif
#endif
...
@@ -29,8 +29,8 @@ extern "C" {
...
@@ -29,8 +29,8 @@ extern "C" {
#endif
#endif
extern
struct
SN_env
*
hungarian_ISO_8859_
1
_create_env
(
void
);
extern
struct
SN_env
*
hungarian_ISO_8859_
2
_create_env
(
void
);
extern
void
hungarian_ISO_8859_
1
_close_env
(
struct
SN_env
*
z
);
extern
void
hungarian_ISO_8859_
2
_close_env
(
struct
SN_env
*
z
);
#ifdef __cplusplus
#ifdef __cplusplus
...
@@ -1137,7 +1137,7 @@ static int r_plur_owner(struct SN_env * z) {
...
@@ -1137,7 +1137,7 @@ static int r_plur_owner(struct SN_env * z) {
return
1
;
return
1
;
}
}
extern
int
hungarian_ISO_8859_
1
_stem
(
struct
SN_env
*
z
)
{
extern
int
hungarian_ISO_8859_
2
_stem
(
struct
SN_env
*
z
)
{
{
int
c1
=
z
->
c
;
/* do, line 229 */
{
int
c1
=
z
->
c
;
/* do, line 229 */
{
int
ret
=
r_mark_regions
(
z
);
{
int
ret
=
r_mark_regions
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call mark_regions, line 229 */
if
(
ret
==
0
)
goto
lab0
;
/* call mark_regions, line 229 */
...
@@ -1224,7 +1224,7 @@ extern int hungarian_ISO_8859_1_stem(struct SN_env * z) {
...
@@ -1224,7 +1224,7 @@ extern int hungarian_ISO_8859_1_stem(struct SN_env * z) {
return
1
;
return
1
;
}
}
extern
struct
SN_env
*
hungarian_ISO_8859_
1
_create_env
(
void
)
{
return
SN_create_env
(
0
,
1
,
0
);
}
extern
struct
SN_env
*
hungarian_ISO_8859_
2
_create_env
(
void
)
{
return
SN_create_env
(
0
,
1
,
0
);
}
extern
void
hungarian_ISO_8859_
1
_close_env
(
struct
SN_env
*
z
)
{
SN_close_env
(
z
,
0
);
}
extern
void
hungarian_ISO_8859_
2
_close_env
(
struct
SN_env
*
z
)
{
SN_close_env
(
z
,
0
);
}
libstemmer_c/src_c/stem_ISO_8859_
1
_hungarian.h
→
libstemmer_c/src_c/stem_ISO_8859_
2
_hungarian.h
View file @
c825e3d9
...
@@ -5,10 +5,10 @@
...
@@ -5,10 +5,10 @@
extern
"C"
{
extern
"C"
{
#endif
#endif
extern
struct
SN_env
*
hungarian_ISO_8859_
1
_create_env
(
void
);
extern
struct
SN_env
*
hungarian_ISO_8859_
2
_create_env
(
void
);
extern
void
hungarian_ISO_8859_
1
_close_env
(
struct
SN_env
*
z
);
extern
void
hungarian_ISO_8859_
2
_close_env
(
struct
SN_env
*
z
);
extern
int
hungarian_ISO_8859_
1
_stem
(
struct
SN_env
*
z
);
extern
int
hungarian_ISO_8859_
2
_stem
(
struct
SN_env
*
z
);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
...
...
libstemmer_c/src_c/stem_UTF_8_dutch.c
View file @
c825e3d9
...
@@ -10,19 +10,14 @@ extern int dutch_UTF_8_stem(struct SN_env * z);
...
@@ -10,19 +10,14 @@ extern int dutch_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
#endif
#endif
static
int
r_measure
(
struct
SN_env
*
z
);
static
int
r_standard_suffix
(
struct
SN_env
*
z
);
static
int
r_Step_6
(
struct
SN_env
*
z
);
static
int
r_undouble
(
struct
SN_env
*
z
);
static
int
r_Step_7
(
struct
SN_env
*
z
);
static
int
r_Step_4
(
struct
SN_env
*
z
);
static
int
r_Step_3
(
struct
SN_env
*
z
);
static
int
r_Step_2
(
struct
SN_env
*
z
);
static
int
r_Step_1
(
struct
SN_env
*
z
);
static
int
r_lengthen_V
(
struct
SN_env
*
z
);
static
int
r_VX
(
struct
SN_env
*
z
);
static
int
r_V
(
struct
SN_env
*
z
);
static
int
r_C
(
struct
SN_env
*
z
);
static
int
r_R2
(
struct
SN_env
*
z
);
static
int
r_R2
(
struct
SN_env
*
z
);
static
int
r_R1
(
struct
SN_env
*
z
);
static
int
r_R1
(
struct
SN_env
*
z
);
static
int
r_mark_regions
(
struct
SN_env
*
z
);
static
int
r_en_ending
(
struct
SN_env
*
z
);
static
int
r_e_ending
(
struct
SN_env
*
z
);
static
int
r_postlude
(
struct
SN_env
*
z
);
static
int
r_prelude
(
struct
SN_env
*
z
);
static
int
r_prelude
(
struct
SN_env
*
z
);
#ifdef __cplusplus
#ifdef __cplusplus
extern
"C"
{
extern
"C"
{
...
@@ -62,210 +57,77 @@ static const struct among a_0[11] =
...
@@ -62,210 +57,77 @@ static const struct among a_0[11] =
/* 10 */
{
2
,
s_0_10
,
0
,
5
,
0
}
/* 10 */
{
2
,
s_0_10
,
0
,
5
,
0
}
};
};
static
const
symbol
s_1_0
[
3
]
=
{
'n'
,
'd'
,
'e'
};
static
const
symbol
s_1_1
[
1
]
=
{
'I'
};
static
const
symbol
s_1_1
[
2
]
=
{
'e'
,
'n'
};
static
const
symbol
s_1_2
[
1
]
=
{
'Y'
};
static
const
symbol
s_1_2
[
4
]
=
{
'i'
,
'e'
,
'e'
,
'n'
};
static
const
symbol
s_1_3
[
4
]
=
{
'a'
,
'l'
,
'e'
,
'n'
};
static
const
symbol
s_1_4
[
3
]
=
{
'v'
,
'e'
,
'n'
};
static
const
symbol
s_1_5
[
1
]
=
{
's'
};
static
const
symbol
s_1_6
[
2
]
=
{
'\''
,
's'
};
static
const
symbol
s_1_7
[
2
]
=
{
'e'
,
's'
};
static
const
symbol
s_1_8
[
3
]
=
{
'i'
,
'e'
,
's'
};
static
const
symbol
s_1_9
[
3
]
=
{
'a'
,
'u'
,
's'
};
static
const
struct
among
a_1
[
10
]
=
static
const
struct
among
a_1
[
3
]
=
{
{
/* 0 */
{
3
,
s_1_0
,
-
1
,
10
,
0
},
/* 0 */
{
0
,
0
,
-
1
,
3
,
0
},
/* 1 */
{
2
,
s_1_1
,
-
1
,
9
,
0
},
/* 1 */
{
1
,
s_1_1
,
0
,
2
,
0
},
/* 2 */
{
4
,
s_1_2
,
1
,
7
,
0
},
/* 2 */
{
1
,
s_1_2
,
0
,
1
,
0
}
/* 3 */
{
4
,
s_1_3
,
1
,
6
,
0
},
/* 4 */
{
3
,
s_1_4
,
1
,
8
,
0
},
/* 5 */
{
1
,
s_1_5
,
-
1
,
2
,
0
},
/* 6 */
{
2
,
s_1_6
,
5
,
1
,
0
},
/* 7 */
{
2
,
s_1_7
,
5
,
4
,
0
},
/* 8 */
{
3
,
s_1_8
,
7
,
3
,
0
},
/* 9 */
{
3
,
s_1_9
,
5
,
5
,
0
}
};
};
static
const
symbol
s_2_0
[
2
]
=
{
'd'
,
'e'
};
static
const
symbol
s_2_0
[
2
]
=
{
'd'
,
'd'
};
static
const
symbol
s_2_1
[
2
]
=
{
'g'
,
'e'
};
static
const
symbol
s_2_1
[
2
]
=
{
'k'
,
'k'
};
static
const
symbol
s_2_2
[
5
]
=
{
'i'
,
's'
,
'c'
,
'h'
,
'e'
};
static
const
symbol
s_2_2
[
2
]
=
{
't'
,
't'
};
static
const
symbol
s_2_3
[
2
]
=
{
'j'
,
'e'
};
static
const
symbol
s_2_4
[
5
]
=
{
'l'
,
'i'
,
'j'
,
'k'
,
'e'
};
static
const
symbol
s_2_5
[
2
]
=
{
'l'
,
'e'
};
static
const
symbol
s_2_6
[
3
]
=
{
'e'
,
'n'
,
'e'
};
static
const
symbol
s_2_7
[
2
]
=
{
'r'
,
'e'
};
static
const
symbol
s_2_8
[
2
]
=
{
's'
,
'e'
};
static
const
symbol
s_2_9
[
2
]
=
{
't'
,
'e'
};
static
const
symbol
s_2_10
[
4
]
=
{
'i'
,
'e'
,
'v'
,
'e'
};
static
const
struct
among
a_2
[
11
]
=
static
const
struct
among
a_2
[
3
]
=
{
{
/* 0 */
{
2
,
s_2_0
,
-
1
,
5
,
0
},
/* 0 */
{
2
,
s_2_0
,
-
1
,
-
1
,
0
},
/* 1 */
{
2
,
s_2_1
,
-
1
,
2
,
0
},
/* 1 */
{
2
,
s_2_1
,
-
1
,
-
1
,
0
},
/* 2 */
{
5
,
s_2_2
,
-
1
,
4
,
0
},
/* 2 */
{
2
,
s_2_2
,
-
1
,
-
1
,
0
}
/* 3 */
{
2
,
s_2_3
,
-
1
,
1
,
0
},
/* 4 */
{
5
,
s_2_4
,
-
1
,
3
,
0
},
/* 5 */
{
2
,
s_2_5
,
-
1
,
9
,
0
},
/* 6 */
{
3
,
s_2_6
,
-
1
,
10
,
0
},
/* 7 */
{
2
,
s_2_7
,
-
1
,
8
,
0
},
/* 8 */
{
2
,
s_2_8
,
-
1
,
7
,
0
},
/* 9 */
{
2
,
s_2_9
,
-
1
,
6
,
0
},
/* 10 */
{
4
,
s_2_10
,
-
1
,
11
,
0
}
};
};
static
const
symbol
s_3_0
[
4
]
=
{
'h'
,
'e'
,
'i'
,
'd'
};
static
const
symbol
s_3_0
[
3
]
=
{
'e'
,
'n'
,
'e'
};
static
const
symbol
s_3_1
[
3
]
=
{
'f'
,
'i'
,
'e'
};
static
const
symbol
s_3_1
[
2
]
=
{
's'
,
'e'
};
static
const
symbol
s_3_2
[
3
]
=
{
'g'
,
'i'
,
'e'
};
static
const
symbol
s_3_2
[
2
]
=
{
'e'
,
'n'
};
static
const
symbol
s_3_3
[
4
]
=
{
'a'
,
't'
,
'i'
,
'e'
};
static
const
symbol
s_3_3
[
5
]
=
{
'h'
,
'e'
,
'd'
,
'e'
,
'n'
};
static
const
symbol
s_3_4
[
4
]
=
{
'i'
,
's'
,
'm'
,
'e'
};
static
const
symbol
s_3_4
[
1
]
=
{
's'
};
static
const
symbol
s_3_5
[
3
]
=
{
'i'
,
'n'
,
'g'
};
static
const
symbol
s_3_6
[
4
]
=
{
'a'
,
'r'
,
'i'
,
'j'
};
static
const
symbol
s_3_7
[
4
]
=
{
'e'
,
'r'
,
'i'
,
'j'
};
static
const
symbol
s_3_8
[
3
]
=
{
's'
,
'e'
,
'l'
};
static
const
symbol
s_3_9
[
4
]
=
{
'r'
,
'd'
,
'e'
,
'r'
};
static
const
symbol
s_3_10
[
4
]
=
{
's'
,
't'
,
'e'
,
'r'
};
static
const
symbol
s_3_11
[
5
]
=
{
'i'
,
't'
,
'e'
,
'i'
,
't'
};
static
const
symbol
s_3_12
[
3
]
=
{
'd'
,
's'
,
't'
};
static
const
symbol
s_3_13
[
3
]
=
{
't'
,
's'
,
't'
};
static
const
struct
among
a_3
[
14
]
=
static
const
struct
among
a_3
[
5
]
=
{
{
/* 0 */
{
4
,
s_3_0
,
-
1
,
3
,
0
},
/* 0 */
{
3
,
s_3_0
,
-
1
,
2
,
0
},
/* 1 */
{
3
,
s_3_1
,
-
1
,
7
,
0
},
/* 1 */
{
2
,
s_3_1
,
-
1
,
3
,
0
},
/* 2 */
{
3
,
s_3_2
,
-
1
,
8
,
0
},
/* 2 */
{
2
,
s_3_2
,
-
1
,
2
,
0
},
/* 3 */
{
4
,
s_3_3
,
-
1
,
1
,
0
},
/* 3 */
{
5
,
s_3_3
,
2
,
1
,
0
},
/* 4 */
{
4
,
s_3_4
,
-
1
,
5
,
0
},
/* 4 */
{
1
,
s_3_4
,
-
1
,
3
,
0
}
/* 5 */
{
3
,
s_3_5
,
-
1
,
5
,
0
},
/* 6 */
{
4
,
s_3_6
,
-
1
,
6
,
0
},
/* 7 */
{
4
,
s_3_7
,
-
1
,
5
,
0
},
/* 8 */
{
3
,
s_3_8
,
-
1
,
3
,
0
},
/* 9 */
{
4
,
s_3_9
,
-
1
,
4
,
0
},
/* 10 */
{
4
,
s_3_10
,
-
1
,
3
,
0
},
/* 11 */
{
5
,
s_3_11
,
-
1
,
2
,
0
},
/* 12 */
{
3
,
s_3_12
,
-
1
,
10
,
0
},
/* 13 */
{
3
,
s_3_13
,
-
1
,
9
,
0
}
};
};
static
const
symbol
s_4_0
[
3
]
=
{
'e'
,
'n'
,
'd'
};
static
const
symbol
s_4_0
[
3
]
=
{
'e'
,
'n'
,
'd'
};
static
const
symbol
s_4_1
[
5
]
=
{
'a'
,
't'
,
'i'
,
'e'
,
'f'
};
static
const
symbol
s_4_1
[
2
]
=
{
'i'
,
'g'
};
static
const
symbol
s_4_2
[
4
]
=
{
'e'
,
'r'
,
'i'
,
'g'
};
static
const
symbol
s_4_2
[
3
]
=
{
'i'
,
'n'
,
'g'
};
static
const
symbol
s_4_3
[
6
]
=
{
'a'
,
'c'
,
'h'
,
't'
,
'i'
,
'g'
};
static
const
symbol
s_4_3
[
4
]
=
{
'l'
,
'i'
,
'j'
,
'k'
};
static
const
symbol
s_4_4
[
6
]
=
{
'i'
,
'o'
,
'n'
,
'e'
,
'e'
,
'l'
};
static
const
symbol
s_4_4
[
4
]
=
{
'b'
,
'a'
,
'a'
,
'r'
};
static
const
symbol
s_4_5
[
4
]
=
{
'b'
,
'a'
,
'a'
,
'r'
};
static
const
symbol
s_4_5
[
3
]
=
{
'b'
,
'a'
,
'r'
};
static
const
symbol
s_4_6
[
4
]
=
{
'l'
,
'a'
,
'a'
,
'r'
};
static
const
symbol
s_4_7
[
4
]
=
{
'n'
,
'a'
,
'a'
,
'r'
};
static
const
symbol
s_4_8
[
4
]
=
{
'r'
,
'a'
,
'a'
,
'r'
};
static
const
symbol
s_4_9
[
6
]
=
{
'e'
,
'r'
,
'i'
,
'g'
,
'e'
,
'r'
};
static
const
symbol
s_4_10
[
8
]
=
{
'a'
,
'c'
,
'h'
,
't'
,
'i'
,
'g'
,
'e'
,
'r'
};
static
const
symbol
s_4_11
[
6
]
=
{
'l'
,
'i'
,
'j'
,
'k'
,
'e'
,
'r'
};
static
const
symbol
s_4_12
[
4
]
=
{
't'
,
'a'
,
'n'
,
't'
};
static
const
symbol
s_4_13
[
6
]
=
{
'e'
,
'r'
,
'i'
,
'g'
,
's'
,
't'
};
static
const
symbol
s_4_14
[
8
]
=
{
'a'
,
'c'
,
'h'
,
't'
,
'i'
,
'g'
,
's'
,
't'
};
static
const
symbol
s_4_15
[
6
]
=
{
'l'
,
'i'
,
'j'
,
'k'
,
's'
,
't'
};
static
const
struct
among
a_4
[
1
6
]
=
static
const
struct
among
a_4
[
6
]
=
{
{
/* 0 */
{
3
,
s_4_0
,
-
1
,
10
,
0
},
/* 0 */
{
3
,
s_4_0
,
-
1
,
1
,
0
},
/* 1 */
{
5
,
s_4_1
,
-
1
,
2
,
0
},
/* 1 */
{
2
,
s_4_1
,
-
1
,
2
,
0
},
/* 2 */
{
4
,
s_4_2
,
-
1
,
10
,
0
},
/* 2 */
{
3
,
s_4_2
,
-
1
,
1
,
0
},
/* 3 */
{
6
,
s_4_3
,
-
1
,
9
,
0
},
/* 3 */
{
4
,
s_4_3
,
-
1
,
3
,
0
},
/* 4 */
{
6
,
s_4_4
,
-
1
,
1
,
0
},
/* 4 */
{
4
,
s_4_4
,
-
1
,
4
,
0
},
/* 5 */
{
4
,
s_4_5
,
-
1
,
3
,
0
},
/* 5 */
{
3
,
s_4_5
,
-
1
,
5
,
0
}
/* 6 */
{
4
,
s_4_6
,
-
1
,
5
,
0
},
/* 7 */
{
4
,
s_4_7
,
-
1
,
4
,
0
},
/* 8 */
{
4
,
s_4_8
,
-
1
,
6
,
0
},
/* 9 */
{
6
,
s_4_9
,
-
1
,
10
,
0
},
/* 10 */
{
8
,
s_4_10
,
-
1
,
9
,
0
},
/* 11 */
{
6
,
s_4_11
,
-
1
,
8
,
0
},
/* 12 */
{
4
,
s_4_12
,
-
1
,
7
,
0
},
/* 13 */
{
6
,
s_4_13
,
-
1
,
10
,
0
},
/* 14 */
{
8
,
s_4_14
,
-
1
,
9
,
0
},
/* 15 */
{
6
,
s_4_15
,
-
1
,
8
,
0
}
};
};
static
const
symbol
s_5_0
[
2
]
=
{
'i'
,
'g'
};
static
const
symbol
s_5_0
[
2
]
=
{
'a'
,
'a'
};
static
const
symbol
s_5_1
[
4
]
=
{
'i'
,
'g'
,
'e'
,
'r'
};
static
const
symbol
s_5_1
[
2
]
=
{
'e'
,
'e'
};
static
const
symbol
s_5_2
[
4
]
=
{
'i'
,
'g'
,
's'
,
't'
};
static
const
symbol
s_5_2
[
2
]
=
{
'o'
,
'o'
};
static
const
symbol
s_5_3
[
2
]
=
{
'u'
,
'u'
};
static
const
struct
among
a_5
[
3
]
=
static
const
struct
among
a_5
[
4
]
=
{
{
/* 0 */
{
2
,
s_5_0
,
-
1
,
1
,
0
},
/* 0 */
{
2
,
s_5_0
,
-
1
,
-
1
,
0
},
/* 1 */
{
4
,
s_5_1
,
-
1
,
1
,
0
},
/* 1 */
{
2
,
s_5_1
,
-
1
,
-
1
,
0
},
/* 2 */
{
4
,
s_5_2
,
-
1
,
1
,
0
}
/* 2 */
{
2
,
s_5_2
,
-
1
,
-
1
,
0
},
/* 3 */
{
2
,
s_5_3
,
-
1
,
-
1
,
0
}
};
};
static
const
symbol
s_6_0
[
3
]
=
{
'e'
,
'e'
,
'e'
};
static
const
unsigned
char
g_v
[]
=
{
17
,
65
,
16
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
128
};
static
const
symbol
s_6_1
[
3
]
=
{
'i'
,
'e'
,
'e'
};
static
const
symbol
s_6_2
[
2
]
=
{
'f'
,
't'
};
static
const
symbol
s_6_3
[
2
]
=
{
'k'
,
't'
};
static
const
symbol
s_6_4
[
2
]
=
{
'p'
,
't'
};
static
const
struct
among
a_6
[
5
]
=
static
const
unsigned
char
g_v_I
[]
=
{
1
,
0
,
0
,
17
,
65
,
16
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
128
};
{
/* 0 */
{
3
,
s_6_0
,
-
1
,
2
,
0
},
/* 1 */
{
3
,
s_6_1
,
-
1
,
1
,
0
},
/* 2 */
{
2
,
s_6_2
,
-
1
,
4
,
0
},
/* 3 */
{
2
,
s_6_3
,
-
1
,
3
,
0
},
/* 4 */
{
2
,
s_6_4
,
-
1
,
5
,
0
}
};
static
const
symbol
s_7_0
[
2
]
=
{
'b'
,
'b'
};
static
const
symbol
s_7_1
[
2
]
=
{
'c'
,
'c'
};
static
const
symbol
s_7_2
[
2
]
=
{
'd'
,
'd'
};
static
const
symbol
s_7_3
[
2
]
=
{
'f'
,
'f'
};
static
const
symbol
s_7_4
[
2
]
=
{
'g'
,
'g'
};
static
const
symbol
s_7_5
[
2
]
=
{
'h'
,
'h'
};
static
const
symbol
s_7_6
[
2
]
=
{
'j'
,
'j'
};
static
const
symbol
s_7_7
[
2
]
=
{
'k'
,
'k'
};
static
const
symbol
s_7_8
[
2
]
=
{
'l'
,
'l'
};
static
const
symbol
s_7_9
[
2
]
=
{
'm'
,
'm'
};
static
const
symbol
s_7_10
[
2
]
=
{
'n'
,
'n'
};
static
const
symbol
s_7_11
[
2
]
=
{
'p'
,
'p'
};
static
const
symbol
s_7_12
[
2
]
=
{
'q'
,
'q'
};
static
const
symbol
s_7_13
[
2
]
=
{
'r'
,
'r'
};
static
const
symbol
s_7_14
[
2
]
=
{
's'
,
's'
};
static
const
symbol
s_7_15
[
2
]
=
{
't'
,
't'
};
static
const
symbol
s_7_16
[
1
]
=
{
'v'
};
static
const
symbol
s_7_17
[
2
]
=
{
'v'
,
'v'
};
static
const
symbol
s_7_18
[
2
]
=
{
'w'
,
'w'
};
static
const
symbol
s_7_19
[
2
]
=
{
'x'
,
'x'
};
static
const
symbol
s_7_20
[
1
]
=
{
'z'
};
static
const
symbol
s_7_21
[
2
]
=
{
'z'
,
'z'
};
static
const
struct
among
a_7
[
22
]
=
{
/* 0 */
{
2
,
s_7_0
,
-
1
,
1
,
0
},
/* 1 */
{
2
,
s_7_1
,
-
1
,
2
,
0
},
/* 2 */
{
2
,
s_7_2
,
-
1
,
3
,
0
},
/* 3 */
{
2
,
s_7_3
,
-
1
,
4
,
0
},
/* 4 */
{
2
,
s_7_4
,
-
1
,
5
,
0
},
/* 5 */
{
2
,
s_7_5
,
-
1
,
6
,
0
},
/* 6 */
{
2
,
s_7_6
,
-
1
,
7
,
0
},
/* 7 */
{
2
,
s_7_7
,
-
1
,
8
,
0
},
/* 8 */
{
2
,
s_7_8
,
-
1
,
9
,
0
},
/* 9 */
{
2
,
s_7_9
,
-
1
,
10
,
0
},
/* 10 */
{
2
,
s_7_10
,
-
1
,
11
,
0
},
/* 11 */
{
2
,
s_7_11
,
-
1
,
12
,
0
},
/* 12 */
{
2
,
s_7_12
,
-
1
,
13
,
0
},
/* 13 */
{
2
,
s_7_13
,
-
1
,
14
,
0
},
/* 14 */
{
2
,
s_7_14
,
-
1
,
15
,
0
},
/* 15 */
{
2
,
s_7_15
,
-
1
,
16
,
0
},
/* 16 */
{
1
,
s_7_16
,
-
1
,
21
,
0
},
/* 17 */
{
2
,
s_7_17
,
16
,
17
,
0
},
/* 18 */
{
2
,
s_7_18
,
-
1
,
18
,
0
},
/* 19 */
{
2
,
s_7_19
,
-
1
,
19
,
0
},
/* 20 */
{
1
,
s_7_20
,
-
1
,
22
,
0
},
/* 21 */
{
2
,
s_7_21
,
20
,
20
,
0
}
};
static
const
unsigned
char
g_v
[]
=
{
17
,
65
,
16
,
1
};
static
const
unsigned
char
g_v_WX
[]
=
{
17
,
65
,
208
,
1
};
static
const
unsigned
char
g_AOU
[]
=
{
1
,
64
,
16
};
static
const
unsigned
char
g_v_j
[]
=
{
17
,
67
,
16
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
128
};
static
const
unsigned
char
g_AIOU
[]
=
{
1
,
65
,
16
};
static
const
symbol
s_0
[]
=
{
'a'
};
static
const
symbol
s_0
[]
=
{
'a'
};
static
const
symbol
s_1
[]
=
{
'e'
};
static
const
symbol
s_1
[]
=
{
'e'
};
...
@@ -278,136 +140,59 @@ static const symbol s_7[] = { 'i' };
...
@@ -278,136 +140,59 @@ static const symbol s_7[] = { 'i' };
static
const
symbol
s_8
[]
=
{
'I'
};
static
const
symbol
s_8
[]
=
{
'I'
};
static
const
symbol
s_9
[]
=
{
'y'
};
static
const
symbol
s_9
[]
=
{
'y'
};
static
const
symbol
s_10
[]
=
{
'Y'
};
static
const
symbol
s_10
[]
=
{
'Y'
};
static
const
symbol
s_11
[]
=
{
'i'
,
'j'
};
static
const
symbol
s_11
[]
=
{
'y'
};
static
const
symbol
s_12
[]
=
{
'i'
,
'j'
};
static
const
symbol
s_12
[]
=
{
'i'
};
static
const
symbol
s_13
[]
=
{
'i'
,
'j'
};
static
const
symbol
s_13
[]
=
{
'e'
};
static
const
symbol
s_14
[]
=
{
'e'
};
static
const
symbol
s_14
[]
=
{
'g'
,
'e'
,
'm'
};
static
const
symbol
s_15
[]
=
{
't'
};
static
const
symbol
s_15
[]
=
{
'h'
,
'e'
,
'i'
,
'd'
};
static
const
symbol
s_16
[]
=
{
'i'
,
'e'
};
static
const
symbol
s_16
[]
=
{
'h'
,
'e'
,
'i'
,
'd'
};
static
const
symbol
s_17
[]
=
{
'a'
,
'r'
};
static
const
symbol
s_17
[]
=
{
'c'
};
static
const
symbol
s_18
[]
=
{
'e'
,
'r'
};
static
const
symbol
s_18
[]
=
{
'e'
,
'n'
};
static
const
symbol
s_19
[]
=
{
'e'
};
static
const
symbol
s_19
[]
=
{
'i'
,
'g'
};
static
const
symbol
s_20
[]
=
{
'a'
,
'u'
};
static
const
symbol
s_20
[]
=
{
'e'
};
static
const
symbol
s_21
[]
=
{
'a'
,
'a'
,
'l'
};
static
const
symbol
s_21
[]
=
{
'e'
};
static
const
symbol
s_22
[]
=
{
'i'
,
'e'
};
static
const
symbol
s_23
[]
=
{
'f'
};
static
const
symbol
s_24
[]
=
{
'h'
,
'e'
,
'd'
};
static
const
symbol
s_25
[]
=
{
'h'
,
'e'
,
'i'
,
'd'
};
static
const
symbol
s_26
[]
=
{
'n'
,
'd'
};
static
const
symbol
s_27
[]
=
{
'd'
};
static
const
symbol
s_28
[]
=
{
'i'
};
static
const
symbol
s_29
[]
=
{
'j'
};
static
const
symbol
s_30
[]
=
{
'n'
,
'd'
};
static
const
symbol
s_31
[]
=
{
'\''
,
't'
};
static
const
symbol
s_32
[]
=
{
'e'
,
't'
};
static
const
symbol
s_33
[]
=
{
'r'
,
'n'
,
't'
};
static
const
symbol
s_34
[]
=
{
'r'
,
'n'
};
static
const
symbol
s_35
[]
=
{
't'
};
static
const
symbol
s_36
[]
=
{
'i'
,
'n'
,
'k'
};
static
const
symbol
s_37
[]
=
{
'i'
,
'n'
,
'g'
};
static
const
symbol
s_38
[]
=
{
'm'
,
'p'
};
static
const
symbol
s_39
[]
=
{
'm'
};
static
const
symbol
s_40
[]
=
{
'\''
};
static
const
symbol
s_41
[]
=
{
'g'
};
static
const
symbol
s_42
[]
=
{
'l'
,
'i'
,
'j'
,
'k'
};
static
const
symbol
s_43
[]
=
{
'i'
,
's'
,
'c'
,
'h'
};
static
const
symbol
s_44
[]
=
{
't'
};
static
const
symbol
s_45
[]
=
{
's'
};
static
const
symbol
s_46
[]
=
{
'r'
};
static
const
symbol
s_47
[]
=
{
'l'
};
static
const
symbol
s_48
[]
=
{
'e'
,
'n'
};
static
const
symbol
s_49
[]
=
{
'i'
,
'e'
,
'f'
};
static
const
symbol
s_50
[]
=
{
'e'
,
'e'
,
'r'
};
static
const
symbol
s_51
[]
=
{
'r'
};
static
const
symbol
s_52
[]
=
{
'a'
,
'a'
,
'r'
};
static
const
symbol
s_53
[]
=
{
'f'
};
static
const
symbol
s_54
[]
=
{
'g'
};
static
const
symbol
s_55
[]
=
{
't'
};
static
const
symbol
s_56
[]
=
{
'd'
};
static
const
symbol
s_57
[]
=
{
'i'
,
'e'
};
static
const
symbol
s_58
[]
=
{
'e'
,
'e'
,
'r'
};
static
const
symbol
s_59
[]
=
{
'n'
};
static
const
symbol
s_60
[]
=
{
'l'
};
static
const
symbol
s_61
[]
=
{
'r'
};
static
const
symbol
s_62
[]
=
{
't'
,
'e'
,
'e'
,
'r'
};
static
const
symbol
s_63
[]
=
{
'l'
,
'i'
,
'j'
,
'k'
};
static
const
symbol
s_64
[]
=
{
'i'
,
'e'
};
static
const
symbol
s_65
[]
=
{
'e'
,
'e'
};
static
const
symbol
s_66
[]
=
{
'k'
};
static
const
symbol
s_67
[]
=
{
'f'
};
static
const
symbol
s_68
[]
=
{
'p'
};
static
const
symbol
s_69
[]
=
{
'b'
};
static
const
symbol
s_70
[]
=
{
'c'
};
static
const
symbol
s_71
[]
=
{
'd'
};
static
const
symbol
s_72
[]
=
{
'f'
};
static
const
symbol
s_73
[]
=
{
'g'
};
static
const
symbol
s_74
[]
=
{
'h'
};
static
const
symbol
s_75
[]
=
{
'j'
};
static
const
symbol
s_76
[]
=
{
'k'
};
static
const
symbol
s_77
[]
=
{
'l'
};
static
const
symbol
s_78
[]
=
{
'm'
};
static
const
symbol
s_79
[]
=
{
'n'
};
static
const
symbol
s_80
[]
=
{
'p'
};
static
const
symbol
s_81
[]
=
{
'q'
};
static
const
symbol
s_82
[]
=
{
'r'
};
static
const
symbol
s_83
[]
=
{
's'
};
static
const
symbol
s_84
[]
=
{
't'
};
static
const
symbol
s_85
[]
=
{
'v'
};
static
const
symbol
s_86
[]
=
{
'w'
};
static
const
symbol
s_87
[]
=
{
'x'
};
static
const
symbol
s_88
[]
=
{
'z'
};
static
const
symbol
s_89
[]
=
{
'f'
};
static
const
symbol
s_90
[]
=
{
's'
};
static
const
symbol
s_91
[]
=
{
'i'
,
'j'
};
static
const
symbol
s_92
[]
=
{
'i'
,
'j'
};
static
const
symbol
s_93
[]
=
{
'y'
};
static
const
symbol
s_94
[]
=
{
'Y'
};
static
const
symbol
s_95
[]
=
{
'y'
};
static
const
symbol
s_96
[]
=
{
'Y'
};
static
const
symbol
s_97
[]
=
{
'Y'
};
static
const
symbol
s_98
[]
=
{
'y'
};
static
int
r_prelude
(
struct
SN_env
*
z
)
{
static
int
r_prelude
(
struct
SN_env
*
z
)
{
int
among_var
;
int
among_var
;
{
int
c_test
=
z
->
c
;
/* test, line 4
9
*/
{
int
c_test
=
z
->
c
;
/* test, line 4
2
*/
while
(
1
)
{
/* repeat, line 4
9
*/
while
(
1
)
{
/* repeat, line 4
2
*/
int
c1
=
z
->
c
;
int
c1
=
z
->
c
;
z
->
bra
=
z
->
c
;
/* [, line
50
*/
z
->
bra
=
z
->
c
;
/* [, line
43
*/
if
(
z
->
c
+
1
>=
z
->
l
||
z
->
p
[
z
->
c
+
1
]
>>
5
!=
5
||
!
((
340306450
>>
(
z
->
p
[
z
->
c
+
1
]
&
0x1f
))
&
1
))
among_var
=
6
;
else
if
(
z
->
c
+
1
>=
z
->
l
||
z
->
p
[
z
->
c
+
1
]
>>
5
!=
5
||
!
((
340306450
>>
(
z
->
p
[
z
->
c
+
1
]
&
0x1f
))
&
1
))
among_var
=
6
;
else
among_var
=
find_among
(
z
,
a_0
,
11
);
/* substring, line
50
*/
among_var
=
find_among
(
z
,
a_0
,
11
);
/* substring, line
43
*/
if
(
!
(
among_var
))
goto
lab0
;
if
(
!
(
among_var
))
goto
lab0
;
z
->
ket
=
z
->
c
;
/* ], line
50
*/
z
->
ket
=
z
->
c
;
/* ], line
43
*/
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab0
;
case
0
:
goto
lab0
;
case
1
:
case
1
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_0
);
/* <-, line
52
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_0
);
/* <-, line
45
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_1
);
/* <-, line
54
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_1
);
/* <-, line
47
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
3
:
case
3
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_2
);
/* <-, line
56
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_2
);
/* <-, line
49
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
4
:
case
4
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_3
);
/* <-, line 5
8
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_3
);
/* <-, line 5
1
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
5
:
case
5
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_4
);
/* <-, line
60
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_4
);
/* <-, line
53
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
6
:
case
6
:
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab0
;
if
(
ret
<
0
)
goto
lab0
;
z
->
c
=
ret
;
/* next, line
61
*/
z
->
c
=
ret
;
/* next, line
54
*/
}
}
break
;
break
;
}
}
...
@@ -418,35 +203,35 @@ static int r_prelude(struct SN_env * z) {
...
@@ -418,35 +203,35 @@ static int r_prelude(struct SN_env * z) {
}
}
z
->
c
=
c_test
;
z
->
c
=
c_test
;
}
}
{
int
c_keep
=
z
->
c
;
/* try, line
64
*/
{
int
c_keep
=
z
->
c
;
/* try, line
57
*/
z
->
bra
=
z
->
c
;
/* [, line
64
*/
z
->
bra
=
z
->
c
;
/* [, line
57
*/
if
(
!
(
eq_s
(
z
,
1
,
s_5
)))
{
z
->
c
=
c_keep
;
goto
lab1
;
}
if
(
!
(
eq_s
(
z
,
1
,
s_5
)))
{
z
->
c
=
c_keep
;
goto
lab1
;
}
z
->
ket
=
z
->
c
;
/* ], line
64
*/
z
->
ket
=
z
->
c
;
/* ], line
57
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_6
);
/* <-, line
64
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_6
);
/* <-, line
57
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab1:
lab1:
;
;
}
}
while
(
1
)
{
/* repeat, line
65
*/
while
(
1
)
{
/* repeat, line
58
*/
int
c2
=
z
->
c
;
int
c2
=
z
->
c
;
while
(
1
)
{
/* goto, line
65
*/
while
(
1
)
{
/* goto, line
58
*/
int
c3
=
z
->
c
;
int
c3
=
z
->
c
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab3
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
232
,
0
))
goto
lab3
;
z
->
bra
=
z
->
c
;
/* [, line
66
*/
z
->
bra
=
z
->
c
;
/* [, line
59
*/
{
int
c4
=
z
->
c
;
/* or, line
66
*/
{
int
c4
=
z
->
c
;
/* or, line
59
*/
if
(
!
(
eq_s
(
z
,
1
,
s_7
)))
goto
lab5
;
if
(
!
(
eq_s
(
z
,
1
,
s_7
)))
goto
lab5
;
z
->
ket
=
z
->
c
;
/* ], line
66
*/
z
->
ket
=
z
->
c
;
/* ], line
59
*/
if
(
in_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab5
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
232
,
0
))
goto
lab5
;
{
int
ret
=
slice_from_s
(
z
,
1
,
s_8
);
/* <-, line
66
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_8
);
/* <-, line
59
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab4
;
goto
lab4
;
lab5:
lab5:
z
->
c
=
c4
;
z
->
c
=
c4
;
if
(
!
(
eq_s
(
z
,
1
,
s_9
)))
goto
lab3
;
if
(
!
(
eq_s
(
z
,
1
,
s_9
)))
goto
lab3
;
z
->
ket
=
z
->
c
;
/* ], line 6
7
*/
z
->
ket
=
z
->
c
;
/* ], line 6
0
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_10
);
/* <-, line 6
7
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_10
);
/* <-, line 6
0
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
}
}
...
@@ -457,7 +242,7 @@ static int r_prelude(struct SN_env * z) {
...
@@ -457,7 +242,7 @@ static int r_prelude(struct SN_env * z) {
z
->
c
=
c3
;
z
->
c
=
c3
;
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab2
;
if
(
ret
<
0
)
goto
lab2
;
z
->
c
=
ret
;
/* goto, line
65
*/
z
->
c
=
ret
;
/* goto, line
58
*/
}
}
}
}
continue
;
continue
;
...
@@ -468,1240 +253,382 @@ static int r_prelude(struct SN_env * z) {
...
@@ -468,1240 +253,382 @@ static int r_prelude(struct SN_env * z) {
return
1
;
return
1
;
}
}
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
z
->
I
[
0
]
=
z
->
l
;
z
->
I
[
1
]
=
z
->
l
;
{
/* gopast */
/* grouping v, line 69 */
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
232
,
1
);
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
}
{
/* gopast */
/* non v, line 69 */
int
ret
=
in_grouping_U
(
z
,
g_v
,
97
,
232
,
1
);
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
}
z
->
I
[
0
]
=
z
->
c
;
/* setmark p1, line 69 */
/* try, line 70 */
if
(
!
(
z
->
I
[
0
]
<
3
))
goto
lab0
;
z
->
I
[
0
]
=
3
;
lab0:
{
/* gopast */
/* grouping v, line 71 */
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
232
,
1
);
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
}
{
/* gopast */
/* non v, line 71 */
int
ret
=
in_grouping_U
(
z
,
g_v
,
97
,
232
,
1
);
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
}
z
->
I
[
1
]
=
z
->
c
;
/* setmark p2, line 71 */
return
1
;
}
static
int
r_postlude
(
struct
SN_env
*
z
)
{
int
among_var
;
while
(
1
)
{
/* repeat, line 75 */
int
c1
=
z
->
c
;
z
->
bra
=
z
->
c
;
/* [, line 77 */
if
(
z
->
c
>=
z
->
l
||
(
z
->
p
[
z
->
c
+
0
]
!=
73
&&
z
->
p
[
z
->
c
+
0
]
!=
89
))
among_var
=
3
;
else
among_var
=
find_among
(
z
,
a_1
,
3
);
/* substring, line 77 */
if
(
!
(
among_var
))
goto
lab0
;
z
->
ket
=
z
->
c
;
/* ], line 77 */
switch
(
among_var
)
{
case
0
:
goto
lab0
;
case
1
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_11
);
/* <-, line 78 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
2
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_12
);
/* <-, line 79 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
3
:
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab0
;
z
->
c
=
ret
;
/* next, line 80 */
}
break
;
}
continue
;
lab0:
z
->
c
=
c1
;
break
;
}
return
1
;
}
static
int
r_R1
(
struct
SN_env
*
z
)
{
static
int
r_R1
(
struct
SN_env
*
z
)
{
z
->
I
[
0
]
=
z
->
c
;
/* setmark x, line 74 */
if
(
!
(
z
->
I
[
0
]
<=
z
->
c
))
return
0
;
if
(
!
(
z
->
I
[
0
]
>=
z
->
I
[
1
]))
return
0
;
return
1
;
return
1
;
}
}
static
int
r_R2
(
struct
SN_env
*
z
)
{
static
int
r_R2
(
struct
SN_env
*
z
)
{
z
->
I
[
0
]
=
z
->
c
;
/* setmark x, line 75 */
if
(
!
(
z
->
I
[
1
]
<=
z
->
c
))
return
0
;
if
(
!
(
z
->
I
[
0
]
>=
z
->
I
[
2
]))
return
0
;
return
1
;
return
1
;
}
}
static
int
r_V
(
struct
SN_env
*
z
)
{
static
int
r_undouble
(
struct
SN_env
*
z
)
{
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 77 */
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 91 */
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* or, line 77 */
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1050640
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
return
0
;
if
(
in_grouping_b_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab1
;
if
(
!
(
find_among_b
(
z
,
a_2
,
3
)))
return
0
;
/* among, line 91 */
goto
lab0
;
lab1:
z
->
c
=
z
->
l
-
m1
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_11
)))
return
0
;
}
lab0:
z
->
c
=
z
->
l
-
m_test
;
z
->
c
=
z
->
l
-
m_test
;
}
}
return
1
;
z
->
ket
=
z
->
c
;
/* [, line 91 */
}
static
int
r_VX
(
struct
SN_env
*
z
)
{
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 78 */
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
z
->
lb
,
0
,
-
1
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
z
->
lb
,
0
,
-
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
=
ret
;
/* next, line 78
*/
z
->
c
=
ret
;
/* next, line 91
*/
}
}
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* or, line 78 */
z
->
bra
=
z
->
c
;
/* ], line 91 */
if
(
in_grouping_b_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab1
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 91 */
goto
lab0
;
if
(
ret
<
0
)
return
ret
;
lab1:
z
->
c
=
z
->
l
-
m1
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_12
)))
return
0
;
}
lab0:
z
->
c
=
z
->
l
-
m_test
;
}
}
return
1
;
return
1
;
}
}
static
int
r_C
(
struct
SN_env
*
z
)
{
static
int
r_e_ending
(
struct
SN_env
*
z
)
{
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 79 */
z
->
B
[
0
]
=
0
;
/* unset e_found, line 95 */
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* not, line 79 */
z
->
ket
=
z
->
c
;
/* [, line 96 */
if
(
!
(
eq_s_b
(
z
,
2
,
s_13
)))
goto
lab0
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_13
)))
return
0
;
return
0
;
z
->
bra
=
z
->
c
;
/* ], line 96 */
lab0:
{
int
ret
=
r_R1
(
z
);
z
->
c
=
z
->
l
-
m1
;
if
(
ret
==
0
)
return
0
;
/* call R1, line 96 */
if
(
ret
<
0
)
return
ret
;
}
}
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
121
,
0
))
return
0
;
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 96 */
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
232
,
0
))
return
0
;
z
->
c
=
z
->
l
-
m_test
;
z
->
c
=
z
->
l
-
m_test
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 96 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
0
]
=
1
;
/* set e_found, line 97 */
{
int
ret
=
r_undouble
(
z
);
if
(
ret
==
0
)
return
0
;
/* call undouble, line 98 */
if
(
ret
<
0
)
return
ret
;
}
return
1
;
return
1
;
}
}
static
int
r_lengthen_V
(
struct
SN_env
*
z
)
{
static
int
r_en_ending
(
struct
SN_env
*
z
)
{
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* do, line 81 */
{
int
ret
=
r_R1
(
z
);
if
(
out_grouping_b_U
(
z
,
g_v_WX
,
97
,
121
,
0
))
goto
lab0
;
if
(
ret
==
0
)
return
0
;
/* call R1, line 102 */
z
->
ket
=
z
->
c
;
/* [, line 82 */
if
(
ret
<
0
)
return
ret
;
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* or, line 82 */
if
(
in_grouping_b_U
(
z
,
g_AOU
,
97
,
117
,
0
))
goto
lab2
;
z
->
bra
=
z
->
c
;
/* ], line 82 */
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 82 */
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* or, line 82 */
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab4
;
goto
lab3
;
lab4:
z
->
c
=
z
->
l
-
m3
;
if
(
z
->
c
>
z
->
lb
)
goto
lab2
;
/* atlimit, line 82 */
}
lab3:
z
->
c
=
z
->
l
-
m_test
;
}
}
goto
lab1
;
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* and, line 102 */
lab2:
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
232
,
0
))
return
0
;
z
->
c
=
z
->
l
-
m1
;
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* not, line 102 */
if
(
!
(
eq_s_b
(
z
,
3
,
s_14
)))
goto
lab0
;
return
0
;
lab0:
z
->
c
=
z
->
l
-
m2
;
z
->
c
=
z
->
l
-
m2
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_14
)))
goto
lab0
;
z
->
bra
=
z
->
c
;
/* ], line 83 */
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 83 */
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* or, line 83 */
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab6
;
goto
lab5
;
lab6:
z
->
c
=
z
->
l
-
m4
;
if
(
z
->
c
>
z
->
lb
)
goto
lab0
;
/* atlimit, line 83 */
}
lab5:
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* not, line 84 */
if
(
in_grouping_b_U
(
z
,
g_AIOU
,
97
,
117
,
0
))
goto
lab7
;
goto
lab0
;
lab7:
z
->
c
=
z
->
l
-
m5
;
}
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* not, line 85 */
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
z
->
lb
,
0
,
-
1
);
if
(
ret
<
0
)
goto
lab8
;
z
->
c
=
ret
;
/* next, line 85 */
}
if
(
in_grouping_b_U
(
z
,
g_AIOU
,
97
,
117
,
0
))
goto
lab8
;
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab8
;
goto
lab0
;
lab8:
z
->
c
=
z
->
l
-
m6
;
}
z
->
c
=
z
->
l
-
m_test
;
}
}
}
}
lab1:
{
int
ret
=
slice_del
(
z
);
/* delete, line 102 */
z
->
S
[
0
]
=
slice_to
(
z
,
z
->
S
[
0
]);
/* -> ch, line 86 */
if
(
z
->
S
[
0
]
==
0
)
return
-
1
;
/* -> ch, line 86 */
{
int
c_keep
=
z
->
c
;
int
ret
=
insert_v
(
z
,
z
->
c
,
z
->
c
,
z
->
S
[
0
]);
/* <+ ch, line 86 */
z
->
c
=
c_keep
;
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab0:
{
int
ret
=
r_undouble
(
z
);
z
->
c
=
z
->
l
-
m1
;
if
(
ret
==
0
)
return
0
;
/* call undouble, line 103 */
if
(
ret
<
0
)
return
ret
;
}
}
return
1
;
return
1
;
}
}
static
int
r_
Step_1
(
struct
SN_env
*
z
)
{
static
int
r_
standard_suffix
(
struct
SN_env
*
z
)
{
int
among_var
;
int
among_var
;
z
->
ket
=
z
->
c
;
/* [, line 91 */
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* do, line 107 */
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
540704
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
return
0
;
z
->
ket
=
z
->
c
;
/* [, line 108 */
among_var
=
find_among_b
(
z
,
a_1
,
10
);
/* among, line 91 */
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
540704
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab0
;
if
(
!
(
among_var
))
return
0
;
among_var
=
find_among_b
(
z
,
a_3
,
5
);
/* substring, line 108 */
z
->
bra
=
z
->
c
;
/* ], line 91 */
if
(
!
(
among_var
))
goto
lab0
;
z
->
bra
=
z
->
c
;
/* ], line 108 */
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
return
0
;
case
0
:
goto
lab
0
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 93 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
2
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 94 */
if
(
ret
<
0
)
return
ret
;
}
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* not, line 94 */
if
(
!
(
eq_s_b
(
z
,
1
,
s_15
)))
goto
lab0
;
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line
94
*/
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line
110
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
return
0
;
{
int
ret
=
slice_from_s
(
z
,
4
,
s_15
);
/* <-, line 110 */
lab0:
z
->
c
=
z
->
l
-
m1
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 94 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 94 */
break
;
case
2
:
{
int
ret
=
r_en_ending
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call en_ending, line 113 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
3
:
case
3
:
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 95
*/
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line 116
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_from_s
(
z
,
2
,
s_16
);
/* <-, line 95 */
if
(
out_grouping_b_U
(
z
,
g_v_j
,
97
,
232
,
0
))
goto
lab0
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 116 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
4
:
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* or, line 97 */
if
(
!
(
eq_s_b
(
z
,
2
,
s_17
)))
goto
lab2
;
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call R1, line 97 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call C, line 97 */
if
(
ret
<
0
)
return
ret
;
}
}
z
->
bra
=
z
->
c
;
/* ], line 97 */
lab0:
{
int
ret
=
slice_del
(
z
);
/* delete, line 97 */
z
->
c
=
z
->
l
-
m1
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
r_lengthen_V
(
z
);
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* do, line 120 */
if
(
ret
==
0
)
goto
lab2
;
/* call lengthen_V, line 97 */
{
int
ret
=
r_e_ending
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call e_ending, line 120 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab1
;
lab1:
lab2:
z
->
c
=
z
->
l
-
m2
;
z
->
c
=
z
->
l
-
m2
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_18
)))
goto
lab3
;
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab3
;
/* call R1, line 98 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
goto
lab3
;
/* call C, line 98 */
if
(
ret
<
0
)
return
ret
;
}
}
z
->
bra
=
z
->
c
;
/* ], line 98 */
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line 122 */
{
int
ret
=
slice_del
(
z
);
/* delete, line 98 */
z
->
ket
=
z
->
c
;
/* [, line 122 */
if
(
!
(
eq_s_b
(
z
,
4
,
s_16
)))
goto
lab2
;
z
->
bra
=
z
->
c
;
/* ], line 122 */
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call R2, line 122 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab1
;
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* not, line 122 */
if
(
!
(
eq_s_b
(
z
,
1
,
s_17
)))
goto
lab3
;
goto
lab2
;
lab3:
lab3:
z
->
c
=
z
->
l
-
m2
;
z
->
c
=
z
->
l
-
m4
;
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 99 */
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
r_C
(
z
);
{
int
ret
=
slice_del
(
z
);
/* delete, line 122 */
if
(
ret
==
0
)
return
0
;
/* call C, line 99 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_19
);
/* <-, line 99 */
z
->
ket
=
z
->
c
;
/* [, line 123 */
if
(
!
(
eq_s_b
(
z
,
2
,
s_18
)))
goto
lab2
;
z
->
bra
=
z
->
c
;
/* ], line 123 */
{
int
ret
=
r_en_ending
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call en_ending, line 123 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab2:
z
->
c
=
z
->
l
-
m3
;
}
}
lab1:
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* do, line 126 */
break
;
z
->
ket
=
z
->
c
;
/* [, line 127 */
case
5
:
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
264336
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab4
;
{
int
ret
=
r_R1
(
z
);
among_var
=
find_among_b
(
z
,
a_4
,
6
);
/* substring, line 127 */
if
(
ret
==
0
)
return
0
;
/* call R1, line 101 */
if
(
!
(
among_var
))
goto
lab4
;
z
->
bra
=
z
->
c
;
/* ], line 127 */
switch
(
among_var
)
{
case
0
:
goto
lab4
;
case
1
:
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab4
;
/* call R2, line 129 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
r_V
(
z
);
{
int
ret
=
slice_del
(
z
);
/* delete, line 129 */
if
(
ret
==
0
)
return
0
;
/* call V, line 101 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_from_s
(
z
,
2
,
s_20
);
/* <-, line 101 */
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* or, line 130 */
z
->
ket
=
z
->
c
;
/* [, line 130 */
if
(
!
(
eq_s_b
(
z
,
2
,
s_19
)))
goto
lab6
;
z
->
bra
=
z
->
c
;
/* ], line 130 */
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab6
;
/* call R2, line 130 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
{
int
m7
=
z
->
l
-
z
->
c
;
(
void
)
m7
;
/* not, line 130 */
case
6
:
if
(
!
(
eq_s_b
(
z
,
1
,
s_20
)))
goto
lab7
;
{
int
ret
=
r_R1
(
z
)
;
goto
lab6
;
if
(
ret
==
0
)
return
0
;
/* call R1, line 102 */
lab7:
if
(
ret
<
0
)
return
ret
;
z
->
c
=
z
->
l
-
m7
;
}
}
{
int
ret
=
slice_from_s
(
z
,
3
,
s_21
);
/* <-, line 102
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 130
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
goto
lab5
;
case
7
:
lab6:
{
int
ret
=
r_R1
(
z
);
z
->
c
=
z
->
l
-
m6
;
if
(
ret
==
0
)
return
0
;
/* call R1, line 103 */
{
int
ret
=
r_undouble
(
z
);
if
(
ret
==
0
)
goto
lab4
;
/* call undouble, line 130 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_from_s
(
z
,
2
,
s_22
);
/* <-, line 103 */
if
(
ret
<
0
)
return
ret
;
}
}
lab5:
break
;
break
;
case
8
:
case
2
:
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 104 */
if
(
ret
==
0
)
goto
lab4
;
/* call R2, line 133 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_23
);
/* <-, line 104 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
{
int
m8
=
z
->
l
-
z
->
c
;
(
void
)
m8
;
/* not, line 133 */
case
9
:
if
(
!
(
eq_s_b
(
z
,
1
,
s_21
)))
goto
lab8
;
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* or, line 105 */
goto
lab4
;
if
(
!
(
eq_s_b
(
z
,
3
,
s_24
)))
goto
lab5
;
lab8:
{
int
ret
=
r_R1
(
z
);
z
->
c
=
z
->
l
-
m8
;
if
(
ret
==
0
)
goto
lab5
;
/* call R1, line 105 */
if
(
ret
<
0
)
return
ret
;
}
}
z
->
bra
=
z
->
c
;
/* ], line 105 */
{
int
ret
=
slice_del
(
z
);
/* delete, line 133 */
{
int
ret
=
slice_from_s
(
z
,
4
,
s_25
);
/* <-, line 105 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab4
;
break
;
lab5:
case
3
:
z
->
c
=
z
->
l
-
m3
;
{
int
ret
=
r_R2
(
z
);
if
(
!
(
eq_s_b
(
z
,
2
,
s_26
)))
goto
lab6
;
if
(
ret
==
0
)
goto
lab4
;
/* call R2, line 136 */
{
int
ret
=
slice_del
(
z
);
/* delete, line 106 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab4
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 136 */
lab6:
z
->
c
=
z
->
l
-
m3
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_27
)))
goto
lab7
;
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab7
;
/* call R1, line 107 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
r_
C
(
z
);
{
int
ret
=
r_
e_ending
(
z
);
if
(
ret
==
0
)
goto
lab
7
;
/* call C, line 107
*/
if
(
ret
==
0
)
goto
lab
4
;
/* call e_ending, line 136
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
z
->
bra
=
z
->
c
;
/* ], line 107 */
break
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 107 */
case
4
:
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab4
;
/* call R2, line 139 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab4
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 139 */
lab7:
z
->
c
=
z
->
l
-
m3
;
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* or, line 108 */
if
(
!
(
eq_s_b
(
z
,
1
,
s_28
)))
goto
lab10
;
goto
lab9
;
lab10:
z
->
c
=
z
->
l
-
m4
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_29
)))
goto
lab8
;
}
lab9:
{
int
ret
=
r_V
(
z
);
if
(
ret
==
0
)
goto
lab8
;
/* call V, line 108 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 108 */
break
;
case
5
:
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab4
;
/* call R2, line 142 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab4
;
if
(
!
(
z
->
B
[
0
]))
goto
lab4
;
/* Boolean test e_found, line 142 */
lab8:
{
int
ret
=
slice_del
(
z
);
/* delete, line 142 */
z
->
c
=
z
->
l
-
m3
;
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 109 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
r_C
(
z
);
break
;
if
(
ret
==
0
)
return
0
;
/* call C, line 109 */
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 109 */
lab4:
if
(
ret
<
0
)
return
ret
;
z
->
c
=
z
->
l
-
m5
;
}
}
{
int
ret
=
r_lengthen_V
(
z
);
{
int
m9
=
z
->
l
-
z
->
c
;
(
void
)
m9
;
/* do, line 146 */
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 109 */
if
(
out_grouping_b_U
(
z
,
g_v_I
,
73
,
232
,
0
))
goto
lab9
;
if
(
ret
<
0
)
return
ret
;
{
int
m_test
=
z
->
l
-
z
->
c
;
/* test, line 148 */
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
2129954
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab9
;
if
(
!
(
find_among_b
(
z
,
a_5
,
4
)))
goto
lab9
;
/* among, line 149 */
if
(
out_grouping_b_U
(
z
,
g_v
,
97
,
232
,
0
))
goto
lab9
;
z
->
c
=
z
->
l
-
m_test
;
}
}
z
->
ket
=
z
->
c
;
/* [, line 152 */
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
z
->
lb
,
0
,
-
1
);
if
(
ret
<
0
)
goto
lab9
;
z
->
c
=
ret
;
/* next, line 152 */
}
}
lab4:
z
->
bra
=
z
->
c
;
/* ], line 152 */
break
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 152 */
case
10
:
{
int
ret
=
slice_from_s
(
z
,
2
,
s_30
);
/* <-, line 110 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
lab9:
z
->
c
=
z
->
l
-
m9
;
}
}
return
1
;
return
1
;
}
}
static
int
r_Step_2
(
struct
SN_env
*
z
)
{
extern
int
dutch_UTF_8_stem
(
struct
SN_env
*
z
)
{
int
among_var
;
{
int
c1
=
z
->
c
;
/* do, line 159 */
z
->
ket
=
z
->
c
;
/* [, line 116 */
{
int
ret
=
r_prelude
(
z
);
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
!=
101
)
return
0
;
if
(
ret
==
0
)
goto
lab0
;
/* call prelude, line 159 */
among_var
=
find_among_b
(
z
,
a_2
,
11
);
/* among, line 116 */
if
(
!
(
among_var
))
return
0
;
z
->
bra
=
z
->
c
;
/* ], line 116 */
switch
(
among_var
)
{
case
0
:
return
0
;
case
1
:
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* or, line 117 */
if
(
!
(
eq_s_b
(
z
,
2
,
s_31
)))
goto
lab1
;
z
->
bra
=
z
->
c
;
/* ], line 117 */
{
int
ret
=
slice_del
(
z
);
/* delete, line 117 */
if
(
ret
<
0
)
return
ret
;
}
goto
lab0
;
lab1:
z
->
c
=
z
->
l
-
m1
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_32
)))
goto
lab2
;
z
->
bra
=
z
->
c
;
/* ], line 118 */
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call R1, line 118 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call C, line 118 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 118 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab0
;
lab0:
lab2:
z
->
c
=
c1
;
z
->
c
=
z
->
l
-
m1
;
if
(
!
(
eq_s_b
(
z
,
3
,
s_33
)))
goto
lab3
;
z
->
bra
=
z
->
c
;
/* ], line 119 */
{
int
ret
=
slice_from_s
(
z
,
2
,
s_34
);
/* <-, line 119 */
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab0
;
{
int
c2
=
z
->
c
;
/* do, line 160 */
lab3:
{
int
ret
=
r_mark_regions
(
z
);
z
->
c
=
z
->
l
-
m1
;
if
(
ret
==
0
)
goto
lab1
;
/* call mark_regions, line 160 */
if
(
!
(
eq_s_b
(
z
,
1
,
s_35
)))
goto
lab4
;
z
->
bra
=
z
->
c
;
/* ], line 120 */
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab4
;
/* call R1, line 120 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
r_VX
(
z
);
lab1:
if
(
ret
==
0
)
goto
lab4
;
/* call VX, line 120 */
z
->
c
=
c2
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 120 */
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 161 */
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line 162 */
{
int
ret
=
r_standard_suffix
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call standard_suffix, line 162 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab0
;
lab2:
lab4:
z
->
c
=
z
->
l
-
m3
;
z
->
c
=
z
->
l
-
m1
;
if
(
!
(
eq_s_b
(
z
,
3
,
s_36
)))
goto
lab5
;
z
->
bra
=
z
->
c
;
/* ], line 121 */
{
int
ret
=
slice_from_s
(
z
,
3
,
s_37
);
/* <-, line 121 */
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab0
;
z
->
c
=
z
->
lb
;
lab5:
{
int
c4
=
z
->
c
;
/* do, line 163 */
z
->
c
=
z
->
l
-
m1
;
{
int
ret
=
r_postlude
(
z
);
if
(
!
(
eq_s_b
(
z
,
2
,
s_38
)))
goto
lab6
;
if
(
ret
==
0
)
goto
lab3
;
/* call postlude, line 163 */
z
->
bra
=
z
->
c
;
/* ], line 122 */
{
int
ret
=
slice_from_s
(
z
,
1
,
s_39
);
/* <-, line 122 */
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab0
;
lab6:
z
->
c
=
z
->
l
-
m1
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_40
)))
goto
lab7
;
z
->
bra
=
z
->
c
;
/* ], line 123 */
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab7
;
/* call R1, line 123 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 123 */
if
(
ret
<
0
)
return
ret
;
}
goto
lab0
;
lab7:
z
->
c
=
z
->
l
-
m1
;
z
->
bra
=
z
->
c
;
/* ], line 124 */
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 124 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 124 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 124 */
if
(
ret
<
0
)
return
ret
;
}
}
lab0:
break
;
case
2
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 125 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_41
);
/* <-, line 125 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
3
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 126 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
4
,
s_42
);
/* <-, line 126 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
4
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 127 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
4
,
s_43
);
/* <-, line 127 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
5
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 128 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 128 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 128 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
6
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 129 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_44
);
/* <-, line 129 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
7
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 130 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_45
);
/* <-, line 130 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
8
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 131 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_46
);
/* <-, line 131 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
9
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 132 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 132 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
insert_s
(
z
,
z
->
c
,
z
->
c
,
1
,
s_47
);
/* attach, line 132 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 132 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
10
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 133 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 133 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 133 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
insert_s
(
z
,
z
->
c
,
z
->
c
,
2
,
s_48
);
/* attach, line 133 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 133 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
11
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 134 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 134 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
3
,
s_49
);
/* <-, line 134 */
if
(
ret
<
0
)
return
ret
;
}
break
;
}
return
1
;
}
static
int
r_Step_3
(
struct
SN_env
*
z
)
{
int
among_var
;
z
->
ket
=
z
->
c
;
/* [, line 140 */
if
(
z
->
c
-
2
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1316016
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
return
0
;
among_var
=
find_among_b
(
z
,
a_3
,
14
);
/* among, line 140 */
if
(
!
(
among_var
))
return
0
;
z
->
bra
=
z
->
c
;
/* ], line 140 */
switch
(
among_var
)
{
case
0
:
return
0
;
case
1
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 141 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
3
,
s_50
);
/* <-, line 141 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
2
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 142 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 142 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 142 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
3
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 145 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 145 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
4
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_51
);
/* <-, line 146 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
5
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 149 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 149 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 149 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
6
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 150 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 150 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
3
,
s_52
);
/* <-, line 150 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
7
:
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R2, line 151 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 151 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
insert_s
(
z
,
z
->
c
,
z
->
c
,
1
,
s_53
);
/* attach, line 151 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 151 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
8
:
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R2, line 152 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 152 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
insert_s
(
z
,
z
->
c
,
z
->
c
,
1
,
s_54
);
/* attach, line 152 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 152 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
9
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 153 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 153 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_55
);
/* <-, line 153 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
10
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 154 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 154 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_56
);
/* <-, line 154 */
if
(
ret
<
0
)
return
ret
;
}
break
;
}
return
1
;
}
static
int
r_Step_4
(
struct
SN_env
*
z
)
{
int
among_var
;
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* or, line 179 */
z
->
ket
=
z
->
c
;
/* [, line 160 */
if
(
z
->
c
-
2
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1315024
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab1
;
among_var
=
find_among_b
(
z
,
a_4
,
16
);
/* among, line 160 */
if
(
!
(
among_var
))
goto
lab1
;
z
->
bra
=
z
->
c
;
/* ], line 160 */
switch
(
among_var
)
{
case
0
:
goto
lab1
;
case
1
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 161 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
2
,
s_57
);
/* <-, line 161 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
2
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 162 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
3
,
s_58
);
/* <-, line 162 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
3
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 163 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 163 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
4
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 164 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_V
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call V, line 164 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_59
);
/* <-, line 164 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
5
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 165 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_V
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call V, line 165 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_60
);
/* <-, line 165 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
6
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 166 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_V
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call V, line 166 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_61
);
/* <-, line 166 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
7
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 167 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
4
,
s_62
);
/* <-, line 167 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
8
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 169 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_from_s
(
z
,
4
,
s_63
);
/* <-, line 169 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
9
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 172 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 172 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
10
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call R1, line 176 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call C, line 176 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 176 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call lengthen_V, line 176 */
if
(
ret
<
0
)
return
ret
;
}
break
;
}
goto
lab0
;
lab1:
z
->
c
=
z
->
l
-
m1
;
z
->
ket
=
z
->
c
;
/* [, line 180 */
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1310848
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
return
0
;
among_var
=
find_among_b
(
z
,
a_5
,
3
);
/* among, line 180 */
if
(
!
(
among_var
))
return
0
;
z
->
bra
=
z
->
c
;
/* ], line 180 */
switch
(
among_var
)
{
case
0
:
return
0
;
case
1
:
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
return
0
;
/* call R1, line 183 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_C
(
z
);
if
(
ret
==
0
)
return
0
;
/* call C, line 183 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 183 */
if
(
ret
<
0
)
return
ret
;
}
{
int
ret
=
r_lengthen_V
(
z
);
if
(
ret
==
0
)
return
0
;
/* call lengthen_V, line 183 */
if
(
ret
<
0
)
return
ret
;
}
break
;
}
}
lab0:
return
1
;
}
static
int
r_Step_7
(
struct
SN_env
*
z
)
{
int
among_var
;
z
->
ket
=
z
->
c
;
/* [, line 190 */
if
(
z
->
c
-
1
<=
z
->
lb
||
(
z
->
p
[
z
->
c
-
1
]
!=
101
&&
z
->
p
[
z
->
c
-
1
]
!=
116
))
return
0
;
among_var
=
find_among_b
(
z
,
a_6
,
5
);
/* among, line 190 */
if
(
!
(
among_var
))
return
0
;
z
->
bra
=
z
->
c
;
/* ], line 190 */
switch
(
among_var
)
{
case
0
:
return
0
;
case
1
:
{
int
ret
=
slice_from_s
(
z
,
2
,
s_64
);
/* <-, line 191 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
2
:
{
int
ret
=
slice_from_s
(
z
,
2
,
s_65
);
/* <-, line 192 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
3
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_66
);
/* <-, line 193 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
4
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_67
);
/* <-, line 194 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
5
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_68
);
/* <-, line 195 */
if
(
ret
<
0
)
return
ret
;
}
break
;
}
return
1
;
}
static
int
r_Step_6
(
struct
SN_env
*
z
)
{
int
among_var
;
z
->
ket
=
z
->
c
;
/* [, line 201 */
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
98532828
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
return
0
;
among_var
=
find_among_b
(
z
,
a_7
,
22
);
/* among, line 201 */
if
(
!
(
among_var
))
return
0
;
z
->
bra
=
z
->
c
;
/* ], line 201 */
switch
(
among_var
)
{
case
0
:
return
0
;
case
1
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_69
);
/* <-, line 202 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
2
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_70
);
/* <-, line 203 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
3
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_71
);
/* <-, line 204 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
4
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_72
);
/* <-, line 205 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
5
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_73
);
/* <-, line 206 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
6
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_74
);
/* <-, line 207 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
7
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_75
);
/* <-, line 208 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
8
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_76
);
/* <-, line 209 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
9
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_77
);
/* <-, line 210 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
10
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_78
);
/* <-, line 211 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
11
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_79
);
/* <-, line 212 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
12
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_80
);
/* <-, line 213 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
13
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_81
);
/* <-, line 214 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
14
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_82
);
/* <-, line 215 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
15
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_83
);
/* <-, line 216 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
16
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_84
);
/* <-, line 217 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
17
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_85
);
/* <-, line 218 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
18
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_86
);
/* <-, line 219 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
19
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_87
);
/* <-, line 220 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
20
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_88
);
/* <-, line 221 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
21
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_89
);
/* <-, line 222 */
if
(
ret
<
0
)
return
ret
;
}
break
;
case
22
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_90
);
/* <-, line 223 */
if
(
ret
<
0
)
return
ret
;
}
break
;
}
return
1
;
}
static
int
r_measure
(
struct
SN_env
*
z
)
{
{
int
c1
=
z
->
c
;
/* do, line 251 */
z
->
c
=
z
->
l
;
/* tolimit, line 252 */
z
->
I
[
1
]
=
z
->
c
;
/* setmark p1, line 253 */
z
->
I
[
2
]
=
z
->
c
;
/* setmark p2, line 254 */
z
->
c
=
c1
;
}
{
int
c2
=
z
->
c
;
/* do, line 256 */
while
(
1
)
{
/* repeat, line 257 */
if
(
out_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab2
;
continue
;
lab2:
break
;
}
{
int
i
=
1
;
while
(
1
)
{
/* atleast, line 257 */
int
c3
=
z
->
c
;
{
int
c4
=
z
->
c
;
/* or, line 257 */
if
(
!
(
eq_s
(
z
,
2
,
s_91
)))
goto
lab5
;
goto
lab4
;
lab5:
z
->
c
=
c4
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab3
;
}
lab4:
i
--
;
continue
;
lab3:
z
->
c
=
c3
;
break
;
}
if
(
i
>
0
)
goto
lab1
;
}
if
(
out_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab1
;
z
->
I
[
1
]
=
z
->
c
;
/* setmark p1, line 257 */
while
(
1
)
{
/* repeat, line 258 */
if
(
out_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab6
;
continue
;
lab6:
break
;
}
{
int
i
=
1
;
while
(
1
)
{
/* atleast, line 258 */
int
c5
=
z
->
c
;
{
int
c6
=
z
->
c
;
/* or, line 258 */
if
(
!
(
eq_s
(
z
,
2
,
s_92
)))
goto
lab9
;
goto
lab8
;
lab9:
z
->
c
=
c6
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab7
;
}
lab8:
i
--
;
continue
;
lab7:
z
->
c
=
c5
;
break
;
}
if
(
i
>
0
)
goto
lab1
;
}
if
(
out_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab1
;
z
->
I
[
2
]
=
z
->
c
;
/* setmark p2, line 258 */
lab1:
z
->
c
=
c2
;
}
return
1
;
}
extern
int
dutch_UTF_8_stem
(
struct
SN_env
*
z
)
{
{
int
ret
=
r_prelude
(
z
);
if
(
ret
==
0
)
return
0
;
/* call prelude, line 263 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
0
]
=
0
;
/* unset Y_found, line 264 */
z
->
B
[
1
]
=
0
;
/* unset stemmed, line 265 */
{
int
c1
=
z
->
c
;
/* do, line 266 */
z
->
bra
=
z
->
c
;
/* [, line 266 */
if
(
!
(
eq_s
(
z
,
1
,
s_93
)))
goto
lab0
;
z
->
ket
=
z
->
c
;
/* ], line 266 */
{
int
ret
=
slice_from_s
(
z
,
1
,
s_94
);
/* <-, line 266 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
0
]
=
1
;
/* set Y_found, line 266 */
lab0:
z
->
c
=
c1
;
}
{
int
c2
=
z
->
c
;
/* do, line 267 */
while
(
1
)
{
/* repeat, line 267 */
int
c3
=
z
->
c
;
while
(
1
)
{
/* goto, line 267 */
int
c4
=
z
->
c
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
121
,
0
))
goto
lab3
;
z
->
bra
=
z
->
c
;
/* [, line 267 */
if
(
!
(
eq_s
(
z
,
1
,
s_95
)))
goto
lab3
;
z
->
ket
=
z
->
c
;
/* ], line 267 */
z
->
c
=
c4
;
break
;
lab3:
lab3:
z
->
c
=
c4
;
z
->
c
=
c4
;
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab2
;
z
->
c
=
ret
;
/* goto, line 267 */
}
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_96
);
/* <-, line 267 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
0
]
=
1
;
/* set Y_found, line 267 */
continue
;
lab2:
z
->
c
=
c3
;
break
;
}
z
->
c
=
c2
;
}
{
int
ret
=
r_measure
(
z
);
if
(
ret
==
0
)
return
0
;
/* call measure, line 269 */
if
(
ret
<
0
)
return
ret
;
}
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 271 */
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* do, line 272 */
{
int
ret
=
r_Step_1
(
z
);
if
(
ret
==
0
)
goto
lab4
;
/* call Step_1, line 272 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
1
]
=
1
;
/* set stemmed, line 272 */
lab4:
z
->
c
=
z
->
l
-
m5
;
}
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* do, line 273 */
{
int
ret
=
r_Step_2
(
z
);
if
(
ret
==
0
)
goto
lab5
;
/* call Step_2, line 273 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
1
]
=
1
;
/* set stemmed, line 273 */
lab5:
z
->
c
=
z
->
l
-
m6
;
}
{
int
m7
=
z
->
l
-
z
->
c
;
(
void
)
m7
;
/* do, line 274 */
{
int
ret
=
r_Step_3
(
z
);
if
(
ret
==
0
)
goto
lab6
;
/* call Step_3, line 274 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
1
]
=
1
;
/* set stemmed, line 274 */
lab6:
z
->
c
=
z
->
l
-
m7
;
}
{
int
m8
=
z
->
l
-
z
->
c
;
(
void
)
m8
;
/* do, line 275 */
{
int
ret
=
r_Step_4
(
z
);
if
(
ret
==
0
)
goto
lab7
;
/* call Step_4, line 275 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
1
]
=
1
;
/* set stemmed, line 275 */
lab7:
z
->
c
=
z
->
l
-
m8
;
}
z
->
c
=
z
->
lb
;
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 289 */
{
int
m9
=
z
->
l
-
z
->
c
;
(
void
)
m9
;
/* do, line 290 */
{
int
ret
=
r_Step_7
(
z
);
if
(
ret
==
0
)
goto
lab8
;
/* call Step_7, line 290 */
if
(
ret
<
0
)
return
ret
;
}
z
->
B
[
1
]
=
1
;
/* set stemmed, line 290 */
lab8:
z
->
c
=
z
->
l
-
m9
;
}
{
int
m10
=
z
->
l
-
z
->
c
;
(
void
)
m10
;
/* do, line 291 */
{
int
m11
=
z
->
l
-
z
->
c
;
(
void
)
m11
;
/* or, line 291 */
if
(
!
(
z
->
B
[
1
]))
goto
lab11
;
/* Boolean test stemmed, line 291 */
goto
lab10
;
lab11:
z
->
c
=
z
->
l
-
m11
;
{
int
ret
=
r_Step_6
(
z
);
if
(
ret
==
0
)
goto
lab9
;
/* call Step_6, line 291 */
if
(
ret
<
0
)
return
ret
;
}
}
lab10:
lab9:
z
->
c
=
z
->
l
-
m10
;
}
z
->
c
=
z
->
lb
;
{
int
c12
=
z
->
c
;
/* do, line 293 */
if
(
!
(
z
->
B
[
0
]))
goto
lab12
;
/* Boolean test Y_found, line 293 */
while
(
1
)
{
/* repeat, line 293 */
int
c13
=
z
->
c
;
while
(
1
)
{
/* goto, line 293 */
int
c14
=
z
->
c
;
z
->
bra
=
z
->
c
;
/* [, line 293 */
if
(
!
(
eq_s
(
z
,
1
,
s_97
)))
goto
lab14
;
z
->
ket
=
z
->
c
;
/* ], line 293 */
z
->
c
=
c14
;
break
;
lab14:
z
->
c
=
c14
;
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab13
;
z
->
c
=
ret
;
/* goto, line 293 */
}
}
{
int
ret
=
slice_from_s
(
z
,
1
,
s_98
);
/* <-, line 293 */
if
(
ret
<
0
)
return
ret
;
}
continue
;
lab13:
z
->
c
=
c13
;
break
;
}
lab12:
z
->
c
=
c12
;
}
}
return
1
;
return
1
;
}
}
extern
struct
SN_env
*
dutch_UTF_8_create_env
(
void
)
{
return
SN_create_env
(
1
,
3
,
2
);
}
extern
struct
SN_env
*
dutch_UTF_8_create_env
(
void
)
{
return
SN_create_env
(
0
,
2
,
1
);
}
extern
void
dutch_UTF_8_close_env
(
struct
SN_env
*
z
)
{
SN_close_env
(
z
,
1
);
}
extern
void
dutch_UTF_8_close_env
(
struct
SN_env
*
z
)
{
SN_close_env
(
z
,
0
);
}
libstemmer_c/src_c/stem_UTF_8_german.c
View file @
c825e3d9
...
@@ -54,13 +54,13 @@ static const symbol s_1_6[2] = { 'e', 's' };
...
@@ -54,13 +54,13 @@ static const symbol s_1_6[2] = { 'e', 's' };
static
const
struct
among
a_1
[
7
]
=
static
const
struct
among
a_1
[
7
]
=
{
{
/* 0 */
{
1
,
s_1_0
,
-
1
,
1
,
0
},
/* 0 */
{
1
,
s_1_0
,
-
1
,
2
,
0
},
/* 1 */
{
2
,
s_1_1
,
-
1
,
1
,
0
},
/* 1 */
{
2
,
s_1_1
,
-
1
,
1
,
0
},
/* 2 */
{
2
,
s_1_2
,
-
1
,
1
,
0
},
/* 2 */
{
2
,
s_1_2
,
-
1
,
2
,
0
},
/* 3 */
{
3
,
s_1_3
,
-
1
,
1
,
0
},
/* 3 */
{
3
,
s_1_3
,
-
1
,
1
,
0
},
/* 4 */
{
2
,
s_1_4
,
-
1
,
1
,
0
},
/* 4 */
{
2
,
s_1_4
,
-
1
,
1
,
0
},
/* 5 */
{
1
,
s_1_5
,
-
1
,
2
,
0
},
/* 5 */
{
1
,
s_1_5
,
-
1
,
3
,
0
},
/* 6 */
{
2
,
s_1_6
,
5
,
1
,
0
}
/* 6 */
{
2
,
s_1_6
,
5
,
2
,
0
}
};
};
static
const
symbol
s_2_0
[
2
]
=
{
'e'
,
'n'
};
static
const
symbol
s_2_0
[
2
]
=
{
'e'
,
'n'
};
...
@@ -123,21 +123,23 @@ static const symbol s_7[] = { 'u' };
...
@@ -123,21 +123,23 @@ static const symbol s_7[] = { 'u' };
static
const
symbol
s_8
[]
=
{
'a'
};
static
const
symbol
s_8
[]
=
{
'a'
};
static
const
symbol
s_9
[]
=
{
'o'
};
static
const
symbol
s_9
[]
=
{
'o'
};
static
const
symbol
s_10
[]
=
{
'u'
};
static
const
symbol
s_10
[]
=
{
'u'
};
static
const
symbol
s_11
[]
=
{
'i'
,
'g'
};
static
const
symbol
s_11
[]
=
{
's'
};
static
const
symbol
s_12
[]
=
{
'e'
};
static
const
symbol
s_12
[]
=
{
'n'
,
'i'
,
's'
};
static
const
symbol
s_13
[]
=
{
'e'
};
static
const
symbol
s_13
[]
=
{
'i'
,
'g'
};
static
const
symbol
s_14
[]
=
{
'e'
,
'r'
};
static
const
symbol
s_14
[]
=
{
'e'
};
static
const
symbol
s_15
[]
=
{
'e'
,
'n'
};
static
const
symbol
s_15
[]
=
{
'e'
};
static
const
symbol
s_16
[]
=
{
'e'
,
'r'
};
static
const
symbol
s_17
[]
=
{
'e'
,
'n'
};
static
int
r_prelude
(
struct
SN_env
*
z
)
{
static
int
r_prelude
(
struct
SN_env
*
z
)
{
{
int
c_test
=
z
->
c
;
/* test, line 3
0
*/
{
int
c_test
=
z
->
c
;
/* test, line 3
5
*/
while
(
1
)
{
/* repeat, line 3
0
*/
while
(
1
)
{
/* repeat, line 3
5
*/
int
c1
=
z
->
c
;
int
c1
=
z
->
c
;
{
int
c2
=
z
->
c
;
/* or, line 3
3
*/
{
int
c2
=
z
->
c
;
/* or, line 3
8
*/
z
->
bra
=
z
->
c
;
/* [, line 3
2
*/
z
->
bra
=
z
->
c
;
/* [, line 3
7
*/
if
(
!
(
eq_s
(
z
,
2
,
s_0
)))
goto
lab2
;
if
(
!
(
eq_s
(
z
,
2
,
s_0
)))
goto
lab2
;
z
->
ket
=
z
->
c
;
/* ], line 3
2
*/
z
->
ket
=
z
->
c
;
/* ], line 3
7
*/
{
int
ret
=
slice_from_s
(
z
,
2
,
s_1
);
/* <-, line 3
2
*/
{
int
ret
=
slice_from_s
(
z
,
2
,
s_1
);
/* <-, line 3
7
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab1
;
goto
lab1
;
...
@@ -145,7 +147,7 @@ static int r_prelude(struct SN_env * z) {
...
@@ -145,7 +147,7 @@ static int r_prelude(struct SN_env * z) {
z
->
c
=
c2
;
z
->
c
=
c2
;
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab0
;
if
(
ret
<
0
)
goto
lab0
;
z
->
c
=
ret
;
/* next, line 3
3
*/
z
->
c
=
ret
;
/* next, line 3
8
*/
}
}
}
}
lab1:
lab1:
...
@@ -156,26 +158,26 @@ static int r_prelude(struct SN_env * z) {
...
@@ -156,26 +158,26 @@ static int r_prelude(struct SN_env * z) {
}
}
z
->
c
=
c_test
;
z
->
c
=
c_test
;
}
}
while
(
1
)
{
/* repeat, line
36
*/
while
(
1
)
{
/* repeat, line
41
*/
int
c3
=
z
->
c
;
int
c3
=
z
->
c
;
while
(
1
)
{
/* goto, line
36
*/
while
(
1
)
{
/* goto, line
41
*/
int
c4
=
z
->
c
;
int
c4
=
z
->
c
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
z
->
bra
=
z
->
c
;
/* [, line
37
*/
z
->
bra
=
z
->
c
;
/* [, line
42
*/
{
int
c5
=
z
->
c
;
/* or, line
37
*/
{
int
c5
=
z
->
c
;
/* or, line
42
*/
if
(
!
(
eq_s
(
z
,
1
,
s_2
)))
goto
lab6
;
if
(
!
(
eq_s
(
z
,
1
,
s_2
)))
goto
lab6
;
z
->
ket
=
z
->
c
;
/* ], line
37
*/
z
->
ket
=
z
->
c
;
/* ], line
42
*/
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab6
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab6
;
{
int
ret
=
slice_from_s
(
z
,
1
,
s_3
);
/* <-, line
37
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_3
);
/* <-, line
42
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
goto
lab5
;
goto
lab5
;
lab6:
lab6:
z
->
c
=
c5
;
z
->
c
=
c5
;
if
(
!
(
eq_s
(
z
,
1
,
s_4
)))
goto
lab4
;
if
(
!
(
eq_s
(
z
,
1
,
s_4
)))
goto
lab4
;
z
->
ket
=
z
->
c
;
/* ], line
38
*/
z
->
ket
=
z
->
c
;
/* ], line
43
*/
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab4
;
{
int
ret
=
slice_from_s
(
z
,
1
,
s_5
);
/* <-, line
38
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_5
);
/* <-, line
43
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
}
}
...
@@ -186,7 +188,7 @@ static int r_prelude(struct SN_env * z) {
...
@@ -186,7 +188,7 @@ static int r_prelude(struct SN_env * z) {
z
->
c
=
c4
;
z
->
c
=
c4
;
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab3
;
if
(
ret
<
0
)
goto
lab3
;
z
->
c
=
ret
;
/* goto, line
36
*/
z
->
c
=
ret
;
/* goto, line
41
*/
}
}
}
}
continue
;
continue
;
...
@@ -200,82 +202,82 @@ static int r_prelude(struct SN_env * z) {
...
@@ -200,82 +202,82 @@ static int r_prelude(struct SN_env * z) {
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
z
->
I
[
0
]
=
z
->
l
;
z
->
I
[
0
]
=
z
->
l
;
z
->
I
[
1
]
=
z
->
l
;
z
->
I
[
1
]
=
z
->
l
;
{
int
c_test
=
z
->
c
;
/* test, line
47
*/
{
int
c_test
=
z
->
c
;
/* test, line
52
*/
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
+
3
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
+
3
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
=
ret
;
/* hop, line
47
*/
z
->
c
=
ret
;
/* hop, line
52
*/
}
}
z
->
I
[
2
]
=
z
->
c
;
/* setmark x, line
47
*/
z
->
I
[
2
]
=
z
->
c
;
/* setmark x, line
52
*/
z
->
c
=
c_test
;
z
->
c
=
c_test
;
}
}
{
/* gopast */
/* grouping v, line
49
*/
{
/* gopast */
/* grouping v, line
54
*/
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
{
/* gopast */
/* non v, line
49
*/
{
/* gopast */
/* non v, line
54
*/
int
ret
=
in_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
in_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
z
->
I
[
0
]
=
z
->
c
;
/* setmark p1, line
49
*/
z
->
I
[
0
]
=
z
->
c
;
/* setmark p1, line
54
*/
/* try, line 5
0
*/
/* try, line 5
5
*/
if
(
!
(
z
->
I
[
0
]
<
z
->
I
[
2
]))
goto
lab0
;
if
(
!
(
z
->
I
[
0
]
<
z
->
I
[
2
]))
goto
lab0
;
z
->
I
[
0
]
=
z
->
I
[
2
];
z
->
I
[
0
]
=
z
->
I
[
2
];
lab0:
lab0:
{
/* gopast */
/* grouping v, line 5
1
*/
{
/* gopast */
/* grouping v, line 5
6
*/
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
{
/* gopast */
/* non v, line 5
1
*/
{
/* gopast */
/* non v, line 5
6
*/
int
ret
=
in_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
in_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
z
->
I
[
1
]
=
z
->
c
;
/* setmark p2, line 5
1
*/
z
->
I
[
1
]
=
z
->
c
;
/* setmark p2, line 5
6
*/
return
1
;
return
1
;
}
}
static
int
r_postlude
(
struct
SN_env
*
z
)
{
static
int
r_postlude
(
struct
SN_env
*
z
)
{
int
among_var
;
int
among_var
;
while
(
1
)
{
/* repeat, line
55
*/
while
(
1
)
{
/* repeat, line
60
*/
int
c1
=
z
->
c
;
int
c1
=
z
->
c
;
z
->
bra
=
z
->
c
;
/* [, line
57
*/
z
->
bra
=
z
->
c
;
/* [, line
62
*/
among_var
=
find_among
(
z
,
a_0
,
6
);
/* substring, line
57
*/
among_var
=
find_among
(
z
,
a_0
,
6
);
/* substring, line
62
*/
if
(
!
(
among_var
))
goto
lab0
;
if
(
!
(
among_var
))
goto
lab0
;
z
->
ket
=
z
->
c
;
/* ], line
57
*/
z
->
ket
=
z
->
c
;
/* ], line
62
*/
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab0
;
case
0
:
goto
lab0
;
case
1
:
case
1
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_6
);
/* <-, line
58
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_6
);
/* <-, line
63
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_7
);
/* <-, line
59
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_7
);
/* <-, line
64
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
3
:
case
3
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_8
);
/* <-, line 6
0
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_8
);
/* <-, line 6
5
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
4
:
case
4
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_9
);
/* <-, line 6
1
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_9
);
/* <-, line 6
6
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
5
:
case
5
:
{
int
ret
=
slice_from_s
(
z
,
1
,
s_10
);
/* <-, line 6
2
*/
{
int
ret
=
slice_from_s
(
z
,
1
,
s_10
);
/* <-, line 6
7
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
6
:
case
6
:
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
0
,
z
->
l
,
1
);
if
(
ret
<
0
)
goto
lab0
;
if
(
ret
<
0
)
goto
lab0
;
z
->
c
=
ret
;
/* next, line 6
3
*/
z
->
c
=
ret
;
/* next, line 6
8
*/
}
}
break
;
break
;
}
}
...
@@ -299,26 +301,42 @@ static int r_R2(struct SN_env * z) {
...
@@ -299,26 +301,42 @@ static int r_R2(struct SN_env * z) {
static
int
r_standard_suffix
(
struct
SN_env
*
z
)
{
static
int
r_standard_suffix
(
struct
SN_env
*
z
)
{
int
among_var
;
int
among_var
;
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* do, line 7
4
*/
{
int
m1
=
z
->
l
-
z
->
c
;
(
void
)
m1
;
/* do, line 7
9
*/
z
->
ket
=
z
->
c
;
/* [, line
75
*/
z
->
ket
=
z
->
c
;
/* [, line
80
*/
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
811040
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab0
;
if
(
z
->
c
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
811040
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab0
;
among_var
=
find_among_b
(
z
,
a_1
,
7
);
/* substring, line
75
*/
among_var
=
find_among_b
(
z
,
a_1
,
7
);
/* substring, line
80
*/
if
(
!
(
among_var
))
goto
lab0
;
if
(
!
(
among_var
))
goto
lab0
;
z
->
bra
=
z
->
c
;
/* ], line
75
*/
z
->
bra
=
z
->
c
;
/* ], line
80
*/
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line
75
*/
if
(
ret
==
0
)
goto
lab0
;
/* call R1, line
80
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab0
;
case
0
:
goto
lab0
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line
77
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
82
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 85 */
if
(
ret
<
0
)
return
ret
;
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 86 */
z
->
ket
=
z
->
c
;
/* [, line 86 */
if
(
!
(
eq_s_b
(
z
,
1
,
s_11
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab1
;
}
z
->
bra
=
z
->
c
;
/* ], line 86 */
if
(
!
(
eq_s_b
(
z
,
3
,
s_12
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab1
;
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 86 */
if
(
ret
<
0
)
return
ret
;
}
lab1:
;
}
break
;
case
3
:
if
(
in_grouping_b_U
(
z
,
g_s_ending
,
98
,
116
,
0
))
goto
lab0
;
if
(
in_grouping_b_U
(
z
,
g_s_ending
,
98
,
116
,
0
))
goto
lab0
;
{
int
ret
=
slice_del
(
z
);
/* delete, line 8
0
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 8
9
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
...
@@ -326,175 +344,175 @@ static int r_standard_suffix(struct SN_env * z) {
...
@@ -326,175 +344,175 @@ static int r_standard_suffix(struct SN_env * z) {
lab0:
lab0:
z
->
c
=
z
->
l
-
m1
;
z
->
c
=
z
->
l
-
m1
;
}
}
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* do, line
84
*/
{
int
m2
=
z
->
l
-
z
->
c
;
(
void
)
m2
;
/* do, line
93
*/
z
->
ket
=
z
->
c
;
/* [, line
85
*/
z
->
ket
=
z
->
c
;
/* [, line
94
*/
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1327104
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
1
;
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1327104
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
2
;
among_var
=
find_among_b
(
z
,
a_2
,
4
);
/* substring, line
85
*/
among_var
=
find_among_b
(
z
,
a_2
,
4
);
/* substring, line
94
*/
if
(
!
(
among_var
))
goto
lab
1
;
if
(
!
(
among_var
))
goto
lab
2
;
z
->
bra
=
z
->
c
;
/* ], line
85
*/
z
->
bra
=
z
->
c
;
/* ], line
94
*/
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
goto
lab
1
;
/* call R1, line 85
*/
if
(
ret
==
0
)
goto
lab
2
;
/* call R1, line 94
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab
1
;
case
0
:
goto
lab
2
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line
87
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
96
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
2
:
case
2
:
if
(
in_grouping_b_U
(
z
,
g_st_ending
,
98
,
116
,
0
))
goto
lab
1
;
if
(
in_grouping_b_U
(
z
,
g_st_ending
,
98
,
116
,
0
))
goto
lab
2
;
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
z
->
lb
,
z
->
l
,
-
3
);
{
int
ret
=
skip_utf8
(
z
->
p
,
z
->
c
,
z
->
lb
,
z
->
l
,
-
3
);
if
(
ret
<
0
)
goto
lab
1
;
if
(
ret
<
0
)
goto
lab
2
;
z
->
c
=
ret
;
/* hop, line 9
0
*/
z
->
c
=
ret
;
/* hop, line 9
9
*/
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 9
0
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 9
9
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
}
}
lab
1
:
lab
2
:
z
->
c
=
z
->
l
-
m2
;
z
->
c
=
z
->
l
-
m2
;
}
}
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line
94
*/
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line
103
*/
z
->
ket
=
z
->
c
;
/* [, line
95
*/
z
->
ket
=
z
->
c
;
/* [, line
104
*/
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1051024
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
2
;
if
(
z
->
c
-
1
<=
z
->
lb
||
z
->
p
[
z
->
c
-
1
]
>>
5
!=
3
||
!
((
1051024
>>
(
z
->
p
[
z
->
c
-
1
]
&
0x1f
))
&
1
))
goto
lab
3
;
among_var
=
find_among_b
(
z
,
a_4
,
8
);
/* substring, line
95
*/
among_var
=
find_among_b
(
z
,
a_4
,
8
);
/* substring, line
104
*/
if
(
!
(
among_var
))
goto
lab
2
;
if
(
!
(
among_var
))
goto
lab
3
;
z
->
bra
=
z
->
c
;
/* ], line
95
*/
z
->
bra
=
z
->
c
;
/* ], line
104
*/
{
int
ret
=
r_R2
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
goto
lab
2
;
/* call R2, line 95
*/
if
(
ret
==
0
)
goto
lab
3
;
/* call R2, line 104
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
goto
lab
2
;
case
0
:
goto
lab
3
;
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line
97
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
106
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line
98
*/
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line
107
*/
z
->
ket
=
z
->
c
;
/* [, line
98
*/
z
->
ket
=
z
->
c
;
/* [, line
107
*/
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
1
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab3
;
}
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
3
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab4
;
}
z
->
bra
=
z
->
c
;
/* ], line
98
*/
z
->
bra
=
z
->
c
;
/* ], line
107
*/
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* not, line
98
*/
{
int
m4
=
z
->
l
-
z
->
c
;
(
void
)
m4
;
/* not, line
107
*/
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
2
)))
goto
lab4
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
4
)))
goto
lab5
;
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
3
;
}
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
4
;
}
lab
4
:
lab
5
:
z
->
c
=
z
->
l
-
m4
;
z
->
c
=
z
->
l
-
m4
;
}
}
{
int
ret
=
r_R2
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
3
;
}
/* call R2, line 98
*/
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
4
;
}
/* call R2, line 107
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line
98
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line
107
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab
3
:
lab
4
:
;
;
}
}
break
;
break
;
case
2
:
case
2
:
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* not, line 1
01
*/
{
int
m5
=
z
->
l
-
z
->
c
;
(
void
)
m5
;
/* not, line 1
10
*/
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
3
)))
goto
lab5
;
if
(
!
(
eq_s_b
(
z
,
1
,
s_1
5
)))
goto
lab6
;
goto
lab
2
;
goto
lab
3
;
lab
5
:
lab
6
:
z
->
c
=
z
->
l
-
m5
;
z
->
c
=
z
->
l
-
m5
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
01
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
10
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
case
3
:
case
3
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
04
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
13
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
05
*/
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
14
*/
z
->
ket
=
z
->
c
;
/* [, line 1
06
*/
z
->
ket
=
z
->
c
;
/* [, line 1
15
*/
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* or, line 1
06
*/
{
int
m6
=
z
->
l
-
z
->
c
;
(
void
)
m6
;
/* or, line 1
15
*/
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
4
)))
goto
lab8
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
6
)))
goto
lab9
;
goto
lab
7
;
goto
lab
8
;
lab
8
:
lab
9
:
z
->
c
=
z
->
l
-
m6
;
z
->
c
=
z
->
l
-
m6
;
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
5
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab6
;
}
if
(
!
(
eq_s_b
(
z
,
2
,
s_1
7
)))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab7
;
}
}
}
lab
7
:
lab
8
:
z
->
bra
=
z
->
c
;
/* ], line 1
06
*/
z
->
bra
=
z
->
c
;
/* ], line 1
15
*/
{
int
ret
=
r_R1
(
z
);
{
int
ret
=
r_R1
(
z
);
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
6
;
}
/* call R1, line 106
*/
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
7
;
}
/* call R1, line 115
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
06
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
15
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab
6
:
lab
7
:
;
;
}
}
break
;
break
;
case
4
:
case
4
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 11
0
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 11
9
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
11
*/
{
int
m_keep
=
z
->
l
-
z
->
c
;
/* (void) m_keep;*/
/* try, line 1
20
*/
z
->
ket
=
z
->
c
;
/* [, line 1
12
*/
z
->
ket
=
z
->
c
;
/* [, line 1
21
*/
if
(
z
->
c
-
1
<=
z
->
lb
||
(
z
->
p
[
z
->
c
-
1
]
!=
103
&&
z
->
p
[
z
->
c
-
1
]
!=
104
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
if
(
z
->
c
-
1
<=
z
->
lb
||
(
z
->
p
[
z
->
c
-
1
]
!=
103
&&
z
->
p
[
z
->
c
-
1
]
!=
104
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
among_var
=
find_among_b
(
z
,
a_3
,
2
);
/* substring, line 1
12
*/
among_var
=
find_among_b
(
z
,
a_3
,
2
);
/* substring, line 1
21
*/
if
(
!
(
among_var
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
if
(
!
(
among_var
))
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
z
->
bra
=
z
->
c
;
/* ], line 1
12
*/
z
->
bra
=
z
->
c
;
/* ], line 1
21
*/
{
int
ret
=
r_R2
(
z
);
{
int
ret
=
r_R2
(
z
);
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
/* call R2, line 112
*/
if
(
ret
==
0
)
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
/* call R2, line 121
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
switch
(
among_var
)
{
switch
(
among_var
)
{
case
0
:
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
9
;
}
case
0
:
{
z
->
c
=
z
->
l
-
m_keep
;
goto
lab
10
;
}
case
1
:
case
1
:
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
14
*/
{
int
ret
=
slice_del
(
z
);
/* delete, line 1
23
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
break
;
break
;
}
}
lab
9
:
lab
10
:
;
;
}
}
break
;
break
;
}
}
lab
2
:
lab
3
:
z
->
c
=
z
->
l
-
m3
;
z
->
c
=
z
->
l
-
m3
;
}
}
return
1
;
return
1
;
}
}
extern
int
german_UTF_8_stem
(
struct
SN_env
*
z
)
{
extern
int
german_UTF_8_stem
(
struct
SN_env
*
z
)
{
{
int
c1
=
z
->
c
;
/* do, line 1
25
*/
{
int
c1
=
z
->
c
;
/* do, line 1
34
*/
{
int
ret
=
r_prelude
(
z
);
{
int
ret
=
r_prelude
(
z
);
if
(
ret
==
0
)
goto
lab0
;
/* call prelude, line 1
25
*/
if
(
ret
==
0
)
goto
lab0
;
/* call prelude, line 1
34
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab0:
lab0:
z
->
c
=
c1
;
z
->
c
=
c1
;
}
}
{
int
c2
=
z
->
c
;
/* do, line 1
26
*/
{
int
c2
=
z
->
c
;
/* do, line 1
35
*/
{
int
ret
=
r_mark_regions
(
z
);
{
int
ret
=
r_mark_regions
(
z
);
if
(
ret
==
0
)
goto
lab1
;
/* call mark_regions, line 1
26
*/
if
(
ret
==
0
)
goto
lab1
;
/* call mark_regions, line 1
35
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab1:
lab1:
z
->
c
=
c2
;
z
->
c
=
c2
;
}
}
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 1
27
*/
z
->
lb
=
z
->
c
;
z
->
c
=
z
->
l
;
/* backwards, line 1
36
*/
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line 1
28
*/
{
int
m3
=
z
->
l
-
z
->
c
;
(
void
)
m3
;
/* do, line 1
37
*/
{
int
ret
=
r_standard_suffix
(
z
);
{
int
ret
=
r_standard_suffix
(
z
);
if
(
ret
==
0
)
goto
lab2
;
/* call standard_suffix, line 1
28
*/
if
(
ret
==
0
)
goto
lab2
;
/* call standard_suffix, line 1
37
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab2:
lab2:
z
->
c
=
z
->
l
-
m3
;
z
->
c
=
z
->
l
-
m3
;
}
}
z
->
c
=
z
->
lb
;
z
->
c
=
z
->
lb
;
{
int
c4
=
z
->
c
;
/* do, line 1
29
*/
{
int
c4
=
z
->
c
;
/* do, line 1
38
*/
{
int
ret
=
r_postlude
(
z
);
{
int
ret
=
r_postlude
(
z
);
if
(
ret
==
0
)
goto
lab3
;
/* call postlude, line 1
29
*/
if
(
ret
==
0
)
goto
lab3
;
/* call postlude, line 1
38
*/
if
(
ret
<
0
)
return
ret
;
if
(
ret
<
0
)
return
ret
;
}
}
lab3:
lab3:
...
...
libstemmer_c/src_c/stem_UTF_8_hungarian.c
View file @
c825e3d9
...
@@ -136,14 +136,14 @@ static const symbol s_4_6[3] = { 'n', 'e', 'k' };
...
@@ -136,14 +136,14 @@ static const symbol s_4_6[3] = { 'n', 'e', 'k' };
static
const
symbol
s_4_7
[
3
]
=
{
'v'
,
'a'
,
'l'
};
static
const
symbol
s_4_7
[
3
]
=
{
'v'
,
'a'
,
'l'
};
static
const
symbol
s_4_8
[
3
]
=
{
'v'
,
'e'
,
'l'
};
static
const
symbol
s_4_8
[
3
]
=
{
'v'
,
'e'
,
'l'
};
static
const
symbol
s_4_9
[
2
]
=
{
'u'
,
'l'
};
static
const
symbol
s_4_9
[
2
]
=
{
'u'
,
'l'
};
static
const
symbol
s_4_10
[
4
]
=
{
'
n'
,
0xC3
,
0xA
1
,
'l'
};
static
const
symbol
s_4_10
[
4
]
=
{
'
b'
,
0xC5
,
0x9
1
,
'l'
};
static
const
symbol
s_4_11
[
4
]
=
{
'
n'
,
0xC3
,
0xA9
,
'l'
};
static
const
symbol
s_4_11
[
4
]
=
{
'
r'
,
0xC5
,
0x91
,
'l'
};
static
const
symbol
s_4_12
[
4
]
=
{
'
b'
,
0xC3
,
0xB3
,
'l'
};
static
const
symbol
s_4_12
[
4
]
=
{
'
t'
,
0xC5
,
0x91
,
'l'
};
static
const
symbol
s_4_13
[
4
]
=
{
'
r'
,
0xC3
,
0xB3
,
'l'
};
static
const
symbol
s_4_13
[
4
]
=
{
'
n'
,
0xC3
,
0xA1
,
'l'
};
static
const
symbol
s_4_14
[
4
]
=
{
'
t'
,
0xC3
,
0xB3
,
'l'
};
static
const
symbol
s_4_14
[
4
]
=
{
'
n'
,
0xC3
,
0xA9
,
'l'
};
static
const
symbol
s_4_15
[
4
]
=
{
'b'
,
0xC3
,
0xB
5
,
'l'
};
static
const
symbol
s_4_15
[
4
]
=
{
'b'
,
0xC3
,
0xB
3
,
'l'
};
static
const
symbol
s_4_16
[
4
]
=
{
'r'
,
0xC3
,
0xB
5
,
'l'
};
static
const
symbol
s_4_16
[
4
]
=
{
'r'
,
0xC3
,
0xB
3
,
'l'
};
static
const
symbol
s_4_17
[
4
]
=
{
't'
,
0xC3
,
0xB
5
,
'l'
};
static
const
symbol
s_4_17
[
4
]
=
{
't'
,
0xC3
,
0xB
3
,
'l'
};
static
const
symbol
s_4_18
[
3
]
=
{
0xC3
,
0xBC
,
'l'
};
static
const
symbol
s_4_18
[
3
]
=
{
0xC3
,
0xBC
,
'l'
};
static
const
symbol
s_4_19
[
1
]
=
{
'n'
};
static
const
symbol
s_4_19
[
1
]
=
{
'n'
};
static
const
symbol
s_4_20
[
2
]
=
{
'a'
,
'n'
};
static
const
symbol
s_4_20
[
2
]
=
{
'a'
,
'n'
};
...
@@ -460,7 +460,7 @@ static const struct among a_11[42] =
...
@@ -460,7 +460,7 @@ static const struct among a_11[42] =
/* 41 */
{
4
,
s_11_41
,
35
,
3
,
0
}
/* 41 */
{
4
,
s_11_41
,
35
,
3
,
0
}
};
};
static
const
unsigned
char
g_v
[]
=
{
17
,
65
,
16
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
17
,
52
,
14
};
static
const
unsigned
char
g_v
[]
=
{
17
,
65
,
16
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
17
,
36
,
10
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
,
1
};
static
const
symbol
s_0
[]
=
{
'a'
};
static
const
symbol
s_0
[]
=
{
'a'
};
static
const
symbol
s_1
[]
=
{
'e'
};
static
const
symbol
s_1
[]
=
{
'e'
};
...
@@ -502,8 +502,8 @@ static const symbol s_35[] = { 'e' };
...
@@ -502,8 +502,8 @@ static const symbol s_35[] = { 'e' };
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
static
int
r_mark_regions
(
struct
SN_env
*
z
)
{
z
->
I
[
0
]
=
z
->
l
;
z
->
I
[
0
]
=
z
->
l
;
{
int
c1
=
z
->
c
;
/* or, line 51 */
{
int
c1
=
z
->
c
;
/* or, line 51 */
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
goto
lab1
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
369
,
0
))
goto
lab1
;
if
(
in_grouping_U
(
z
,
g_v
,
97
,
252
,
1
)
<
0
)
goto
lab1
;
/* goto */
/* non v, line 48 */
if
(
in_grouping_U
(
z
,
g_v
,
97
,
369
,
1
)
<
0
)
goto
lab1
;
/* goto */
/* non v, line 48 */
{
int
c2
=
z
->
c
;
/* or, line 49 */
{
int
c2
=
z
->
c
;
/* or, line 49 */
if
(
z
->
c
+
1
>=
z
->
l
||
z
->
p
[
z
->
c
+
1
]
>>
5
!=
3
||
!
((
101187584
>>
(
z
->
p
[
z
->
c
+
1
]
&
0x1f
))
&
1
))
goto
lab3
;
if
(
z
->
c
+
1
>=
z
->
l
||
z
->
p
[
z
->
c
+
1
]
>>
5
!=
3
||
!
((
101187584
>>
(
z
->
p
[
z
->
c
+
1
]
&
0x1f
))
&
1
))
goto
lab3
;
if
(
!
(
find_among
(
z
,
a_0
,
8
)))
goto
lab3
;
/* among, line 49 */
if
(
!
(
find_among
(
z
,
a_0
,
8
)))
goto
lab3
;
/* among, line 49 */
...
@@ -520,9 +520,9 @@ static int r_mark_regions(struct SN_env * z) {
...
@@ -520,9 +520,9 @@ static int r_mark_regions(struct SN_env * z) {
goto
lab0
;
goto
lab0
;
lab1:
lab1:
z
->
c
=
c1
;
z
->
c
=
c1
;
if
(
out_grouping_U
(
z
,
g_v
,
97
,
252
,
0
))
return
0
;
if
(
out_grouping_U
(
z
,
g_v
,
97
,
369
,
0
))
return
0
;
{
/* gopast */
/* grouping v, line 53 */
{
/* gopast */
/* grouping v, line 53 */
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
252
,
1
);
int
ret
=
out_grouping_U
(
z
,
g_v
,
97
,
369
,
1
);
if
(
ret
<
0
)
return
0
;
if
(
ret
<
0
)
return
0
;
z
->
c
+=
ret
;
z
->
c
+=
ret
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment