Commit c825e3d9 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki Committed by Kazuhiko

update libstemmer_c.

parent 1c6e8c67
......@@ -11,8 +11,6 @@ src_c/stem_ISO_8859_1_french.c
src_c/stem_ISO_8859_1_french.h
src_c/stem_ISO_8859_1_german.c
src_c/stem_ISO_8859_1_german.h
src_c/stem_ISO_8859_1_hungarian.c
src_c/stem_ISO_8859_1_hungarian.h
src_c/stem_ISO_8859_1_italian.c
src_c/stem_ISO_8859_1_italian.h
src_c/stem_ISO_8859_1_norwegian.c
......@@ -25,6 +23,8 @@ src_c/stem_ISO_8859_1_spanish.c
src_c/stem_ISO_8859_1_spanish.h
src_c/stem_ISO_8859_1_swedish.c
src_c/stem_ISO_8859_1_swedish.h
src_c/stem_ISO_8859_2_hungarian.c
src_c/stem_ISO_8859_2_hungarian.h
src_c/stem_ISO_8859_2_romanian.c
src_c/stem_ISO_8859_2_romanian.h
src_c/stem_KOI8_R_russian.c
......
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
undouble
)
externals ( stem )
strings ( ch )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef ae hex 'E6'
stringdef ao hex 'E5'
stringdef o/ hex 'F8'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'abcdfghjklmnoprtvyz{ao}'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
'erets' 'et' 'eret'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'gd' // significant in the call from other_suffix
'dt' 'gt' 'kt'
)
)
next] delete
)
define other_suffix as (
do ( ['st'] 'ig' delete )
setlimit tomark p1 for ([substring])
among(
'ig' 'lig' 'elig' 'els'
(delete do consonant_pair)
'l{o/}st'
(<-'l{o/}s')
)
)
define undouble as (
setlimit tomark p1 for ([non-v] ->ch)
ch
delete
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
do undouble
)
)
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
undouble
)
externals ( stem )
strings ( ch )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef ae hex '91'
stringdef ao hex '86'
stringdef o/ hex '9B'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'abcdfghjklmnoprtvyz{ao}'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
'erets' 'et' 'eret'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'gd' // significant in the call from other_suffix
'dt' 'gt' 'kt'
)
)
next] delete
)
define other_suffix as (
do ( ['st'] 'ig' delete )
setlimit tomark p1 for ([substring])
among(
'ig' 'lig' 'elig' 'els'
(delete do consonant_pair)
'l{o/}st'
(<-'l{o/}s')
)
)
define undouble as (
setlimit tomark p1 for ([non-v] ->ch)
ch
delete
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
do undouble
)
)
routines (
prelude postlude
e_ending
en_ending
mark_regions
R1 R2
undouble
standard_suffix
)
externals ( stem )
booleans ( e_found )
integers ( p1 p2 )
groupings ( v v_I v_j )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef e" hex 'EB'
stringdef i" hex 'EF'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef a' hex 'E1'
stringdef e' hex 'E9'
stringdef i' hex 'ED'
stringdef o' hex 'F3'
stringdef u' hex 'FA'
stringdef e` hex 'E8'
define v 'aeiouy{e`}'
define v_I v + 'I'
define v_j v + 'j'
define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
gopast v gopast non-v setmark p1
try($p1 < 3 $p1 = 3) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'I' (<- 'i')
'' (next)
) //or next
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define undouble as (
test among('kk' 'dd' 'tt') [next] delete
)
define e_ending as (
unset e_found
['e'] R1 test non-v delete
set e_found
undouble
)
define en_ending as (
R1 non-v and not 'gem' delete
undouble
)
define standard_suffix as (
do (
[substring] among(
'heden'
( R1 <- 'heid'
)
'en' 'ene'
( en_ending
)
's' 'se'
( R1 non-v_j delete
)
)
)
do e_ending
do ( ['heid'] R2 not 'c' delete
['en'] en_ending
)
do (
[substring] among(
'end' 'ing'
( R2 delete
(['ig'] R2 not 'e' delete) or undouble
)
'ig'
( R2 not 'e' delete
)
'lijk'
( R2 delete e_ending
)
'baar'
( R2 delete
)
'bar'
( R2 e_found delete
)
)
)
do (
non-v_I
test (
among ('aa' 'ee' 'oo' 'uu')
non-v
)
[next] delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
routines (
prelude postlude
e_ending
en_ending
mark_regions
R1 R2
undouble
standard_suffix
)
externals ( stem )
booleans ( e_found )
integers ( p1 p2 )
groupings ( v v_I v_j )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a" hex '84'
stringdef e" hex '89'
stringdef i" hex '8B'
stringdef o" hex '94'
stringdef u" hex '81'
stringdef a' hex 'A0'
stringdef e' hex '82'
stringdef i' hex 'A1'
stringdef o' hex 'A2'
stringdef u' hex 'A3'
stringdef e` hex '8A'
define v 'aeiouy{e`}'
define v_I v + 'I'
define v_j v + 'j'
define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
gopast v gopast non-v setmark p1
try($p1 < 3 $p1 = 3) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'I' (<- 'i')
'' (next)
) //or next
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define undouble as (
test among('kk' 'dd' 'tt') [next] delete
)
define e_ending as (
unset e_found
['e'] R1 test non-v delete
set e_found
undouble
)
define en_ending as (
R1 non-v and not 'gem' delete
undouble
)
define standard_suffix as (
do (
[substring] among(
'heden'
( R1 <- 'heid'
)
'en' 'ene'
( en_ending
)
's' 'se'
( R1 non-v_j delete
)
)
)
do e_ending
do ( ['heid'] R2 not 'c' delete
['en'] en_ending
)
do (
[substring] among(
'end' 'ing'
( R2 delete
(['ig'] R2 not 'e' delete) or undouble
)
'ig'
( R2 not 'e' delete
)
'lijk'
( R2 delete e_ending
)
'baar'
( R2 delete
)
'bar'
( R2 e_found delete
)
)
)
do (
non-v_I
test (
among ('aa' 'ee' 'oo' 'uu')
non-v
)
[next] delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
./Snowball stem_ISO_8859_1.sbl -u -eprefix dutch_UTF_8_ -r ../runtime -o stem_UTF_8_dutch
\ No newline at end of file
strings ( ch )
integers ( x p1 p2 )
booleans ( Y_found stemmed /*GE_removed*/ )
routines (
prelude
R1 R2
C V VX
lengthen_V
Step_1 Step_2 Step_3 Step_4 Step_7
Step_6
// Step_1c
// Lose_prefix
// Lose_infix
measure
)
externals ( stem )
groupings ( v v_WX AOU AIOU)
stringescapes {}
stringdef ' hex '27' // yuk
define v 'aeiouy'
define v_WX v + 'wx'
define AOU 'aou'
define AIOU 'aiou'
stringdef a" hex 'E4'
stringdef e" hex 'EB'
stringdef i" hex 'EF'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef a' hex 'E1'
stringdef e' hex 'E9'
stringdef i' hex 'ED'
stringdef o' hex 'F3'
stringdef u' hex 'FA'
stringdef e` hex 'E8'
//define v_I v + 'I'
//define v_j v + 'j'
define prelude as (
test repeat (
[substring] among(
'{a"}' '{a'}'
(<- 'a')
'{e"}' '{e'}'
(<- 'e')
'{i"}' '{i'}'
(<- 'i')
'{o"}' '{o'}'
(<- 'o')
'{u"}' '{u'}'
(<- 'u')
'' (next)
) //or next
)
try(['y'] <- 'Y')
repeat goto (
v [('i'] v <- 'I') or
('y'] <- 'Y')
)
)
backwardmode (
define R1 as (setmark x $x >= p1)
define R2 as (setmark x $x >= p2)
define V as test (v or 'ij')
define VX as test (next v or 'ij')
define C as test (not 'ij' non-v)
define lengthen_V as do (
non-v_WX [ (AOU] test (non-v or atlimit)) or
('e'] test (non-v or atlimit
not AIOU
not (next AIOU non-v)))
->ch insert ch
)
define Step_1 as
(
[among ( (])
'{'}s' (delete)
's' (R1 not ('t' R1) C delete)
'ies' (R1 <-'ie')
'es'
(('ar' R1 C ] delete lengthen_V) or
('er' R1 C ] delete) or
(R1 C <-'e'))
'aus' (R1 V <-'au')
'alen' (R1 <- 'aal')
'ieen' (R1 <- 'ie')
'ven' (R1 <- 'f')
'en' (('hed' R1 ] <-'heid') or
('nd' delete) or
('d' R1 C ] delete) or
('i' or 'j' V delete) or
(R1 C delete lengthen_V))
'nde' (<-'nd')
)
)
define Step_2 as
(
[among ( (])
'je' (('{'}t' ] delete) or
('et' ] R1 C delete) or
('rnt' ] <-'rn') or
('t' ] R1 VX delete) or
('ink' ] <-'ing') or
('mp' ] <-'m') or
('{'}' ] R1 delete) or
(] R1 C delete))
'ge' (R1 <-'g')
'lijke'(R1 <-'lijk')
'ische'(R1 <-'isch')
'de' (R1 C delete)
'te' (R1 <-'t')
'se' (R1 <-'s')
're' (R1 <-'r')
'le' (R1 delete attach 'l' lengthen_V)
'ene' (R1 C delete attach 'en' lengthen_V)
'ieve' (R1 C <-'ief')
)
)
define Step_3 as
(
[among ( (])
'atie' (R1 <-'eer')
'iteit' (R1 delete lengthen_V)
'heid'
'sel'
'ster' (R1 delete)
'rder' (<-'r')
'ing'
'isme'
'erij' (R1 delete lengthen_V)
'arij' (R1 C <-'aar')
'fie' (R2 delete attach 'f' lengthen_V)
'gie' (R2 delete attach 'g' lengthen_V)
'tst' (R1 C <-'t')
'dst' (R1 C <-'d')
)
)
define Step_4 as
(
( [among ( (])
'ioneel' (R1 <-'ie')
'atief' (R1 <-'eer')
'baar' (R1 delete)
'naar' (R1 V <-'n')
'laar' (R1 V <-'l')
'raar' (R1 V <-'r')
'tant' (R1 <-'teer')
'lijker'
'lijkst' (R1 <-'lijk')
'achtig'
'achtiger'
'achtigst'(R1 delete)
'eriger'
'erigst'
'erig'
'end' (R1 C delete lengthen_V)
)
)
or
( [among ( (])
'iger'
'igst'
'ig' (R1 C delete lengthen_V)
)
)
)
define Step_7 as
(
[among ( (])
'iee' (<-'ie')
'eee' (<-'ee')
'kt' (<-'k')
'ft' (<-'f')
'pt' (<-'p')
)
)
define Step_6 as
(
[among ( (])
'bb' (<-'b')
'cc' (<-'c')
'dd' (<-'d')
'ff' (<-'f')
'gg' (<-'g')
'hh' (<-'h')
'jj' (<-'j')
'kk' (<-'k')
'll' (<-'l')
'mm' (<-'m')
'nn' (<-'n')
'pp' (<-'p')
'qq' (<-'q')
'rr' (<-'r')
'ss' (<-'s')
'tt' (<-'t')
'vv' (<-'v')
'ww' (<-'w')
'xx' (<-'x')
'zz' (<-'z')
'v' (<-'f')
'z' (<-'s')
)
)
/*
define Step_1c as
(
[among ( (] R1 C)
'd' (not ('n' R1) delete)
't' (not ('h' R1) delete)
)
)
*/
)
/*
define Lose_prefix as (
['ge'] test hop 3 (goto v goto non-v)
set GE_removed
delete
)
define Lose_infix as (
next
gopast (['ge']) test hop 3 (goto v goto non-v)
set GE_removed
delete
)
*/
define measure as (
do (
tolimit
setmark p1
setmark p2
)
do(
repeat non-v atleast 1 ('ij' or v) non-v setmark p1
repeat non-v atleast 1 ('ij' or v) non-v setmark p2
)
)
define stem as (
prelude
unset Y_found
unset stemmed
do ( ['y'] <-'Y' set Y_found )
do repeat(goto (v ['y'])<-'Y' set Y_found )
measure
backwards (
do (Step_1 set stemmed )
do (Step_2 set stemmed )
do (Step_3 set stemmed )
do (Step_4 set stemmed )
)
/*
unset GE_removed
do (Lose_prefix and measure)
backwards (
do (GE_removed Step_1c)
)
unset GE_removed
do (Lose_infix and measure)
backwards (
do (GE_removed Step_1c)
)
*/
backwards (
do (Step_7 set stemmed )
do (stemmed or Step_6)
)
do(Y_found repeat(goto (['Y']) <-'y'))
)
integers ( p1 p2 )
booleans ( Y_found )
routines (
prelude postlude
mark_regions
shortv
R1 R2
Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
exception1
exception2
)
externals ( stem )
groupings ( v v_WXY valid_LI )
stringescapes {}
define v 'aeiouy'
define v_WXY v + 'wxY'
define valid_LI 'cdeghkmnrt'
define prelude as (
unset Y_found
do ( ['{'}'] delete)
do ( ['y'] <-'Y' set Y_found)
do repeat(goto (v ['y']) <-'Y' set Y_found)
)
define mark_regions as (
$p1 = limit
$p2 = limit
do(
among (
'gener'
'commun' // added May 2005
'arsen' // added Nov 2006 (arsenic/arsenal)
// ... extensions possible here ...
) or (gopast v gopast non-v)
setmark p1
gopast v gopast non-v setmark p2
)
)
backwardmode (
define shortv as (
( non-v_WXY v non-v )
or
( non-v v atlimit )
)
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define Step_1a as (
try (
[substring] among (
'{'}' '{'}s' '{'}s{'}'
(delete)
)
)
[substring] among (
'sses' (<-'ss')
'ied' 'ies'
((hop 2 <-'i') or <-'ie')
's' (next gopast v delete)
'us' 'ss'
)
)
define Step_1b as (
[substring] among (
'eed' 'eedly'
(R1 <-'ee')
'ed' 'edly' 'ing' 'ingly'
(
test gopast v delete
test substring among(
'at' 'bl' 'iz'
(<+ 'e')
'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
// ignoring double c, h, j, k, q, v, w, and x
([next] delete)
'' (atmark p1 test shortv <+ 'e')
)
)
)
)
define Step_1c as (
['y' or 'Y']
non-v not atlimit
<-'i'
)
define Step_2 as (
[substring] R1 among (
'tional' (<-'tion')
'enci' (<-'ence')
'anci' (<-'ance')
'abli' (<-'able')
'entli' (<-'ent')
'izer' 'ization'
(<-'ize')
'ational' 'ation' 'ator'
(<-'ate')
'alism' 'aliti' 'alli'
(<-'al')
'fulness' (<-'ful')
'ousli' 'ousness'
(<-'ous')
'iveness' 'iviti'
(<-'ive')
'biliti' 'bli'
(<-'ble')
'ogi' ('l' <-'og')
'fulli' (<-'ful')
'lessli' (<-'less')
'li' (valid_LI delete)
)
)
define Step_3 as (
[substring] R1 among (
'tional' (<- 'tion')
'ational' (<- 'ate')
'alize' (<-'al')
'icate' 'iciti' 'ical'
(<-'ic')
'ful' 'ness'
(delete)
'ative'
(R2 delete) // 'R2' added Dec 2001
)
)
define Step_4 as (
[substring] R2 among (
'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
(delete)
'ion' ('s' or 't' delete)
)
)
define Step_5 as (
[substring] among (
'e' (R2 or (R1 not shortv) delete)
'l' (R2 'l' delete)
)
)
define exception2 as (
[substring] atlimit among(
'inning' 'outing' 'canning' 'herring' 'earring'
'proceed' 'exceed' 'succeed'
// ... extensions possible here ...
)
)
)
define exception1 as (
[substring] atlimit among(
/* special changes: */
'skis' (<-'ski')
'skies' (<-'sky')
'dying' (<-'die')
'lying' (<-'lie')
'tying' (<-'tie')
/* special -LY cases */
'idly' (<-'idl')
'gently' (<-'gentl')
'ugly' (<-'ugli')
'early' (<-'earli')
'only' (<-'onli')
'singly' (<-'singl')
// ... extensions possible here ...
/* invariant forms: */
'sky'
'news'
'howe'
'atlas' 'cosmos' 'bias' 'andes' // not plural forms
// ... extensions possible here ...
)
)
define postlude as (Y_found repeat(goto (['Y']) <-'y'))
define stem as (
exception1 or
not hop 3 or (
do prelude
do mark_regions
backwards (
do Step_1a
exception2 or (
do Step_1b
do Step_1c
do Step_2
do Step_3
do Step_4
do Step_5
)
)
do postlude
)
)
/* Finnish stemmer.
Numbers in square brackets refer to the sections in
Fred Karlsson, Finnish: An Essential Grammar. Routledge, 1999
ISBN 0-415-20705-3
*/
routines (
mark_regions
R2
particle_etc possessive
LONG VI
case_ending
i_plural
t_plural
other_endings
tidy
)
externals ( stem )
integers ( p1 p2 )
strings ( x )
booleans ( ending_removed )
groupings ( AEI V1 V2 particle_end )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef o" hex 'F6'
define AEI 'a{a"}ei'
define V1 'aeiouy{a"}{o"}'
define V2 'aeiou{a"}{o"}'
define particle_end V1 + 'nt'
define mark_regions as (
$p1 = limit
$p2 = limit
goto V1 gopast non-V1 setmark p1
goto V1 gopast non-V1 setmark p2
)
backwardmode (
define R2 as $p2 <= cursor
define particle_etc as (
setlimit tomark p1 for ([substring])
among(
'kin'
'kaan' 'k{a"}{a"}n'
'ko' 'k{o"}'
'han' 'h{a"}n'
'pa' 'p{a"}' // Particles [91]
(particle_end)
'sti' // Adverb [87]
(R2)
)
delete
)
define possessive as ( // [36]
setlimit tomark p1 for ([substring])
among(
'si'
(not 'k' delete) // take 'ksi' as the Comitative case
'ni'
(delete ['kse'] <- 'ksi') // kseni = ksi + ni
'nsa' 'ns{a"}'
'mme'
'nne'
(delete)
/* Now for Vn possessives after case endings: [36] */
'an'
(among('ta' 'ssa' 'sta' 'lla' 'lta' 'na') delete)
'{a"}n'
(among('t{a"}' 'ss{a"}' 'st{a"}'
'll{a"}' 'lt{a"}' 'n{a"}') delete)
'en'
(among('lle' 'ine') delete)
)
)
define LONG as
among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}')
define VI as ('i' V2)
define case_ending as (
setlimit tomark p1 for ([substring])
among(
'han' ('a') //-.
'hen' ('e') // |
'hin' ('i') // |
'hon' ('o') // |
'h{a"}n' ('{a"}') // Illative [43]
'h{o"}n' ('{o"}') // |
'siin' VI // |
'seen' LONG //-'
'den' VI
'tten' VI // Genitive plurals [34]
()
'n' // Genitive or Illative
( try ( LONG // Illative
or 'ie' // Genitive
and next ]
)
/* otherwise Genitive */
)
'a' '{a"}' //-.
(V1 non-V1) // |
'tta' 'tt{a"}' // Partitive [32]
('e') // |
'ta' 't{a"}' //-'
'ssa' 'ss{a"}' // Inessive [41]
'sta' 'st{a"}' // Elative [42]
'lla' 'll{a"}' // Adessive [44]
'lta' 'lt{a"}' // Ablative [51]
'lle' // Allative [46]
'na' 'n{a"}' // Essive [49]
'ksi' // Translative[50]
'ine' // Comitative [51]
/* Abessive and Instructive are too rare for
inclusion [51] */
)
delete
set ending_removed
)
define other_endings as (
setlimit tomark p2 for ([substring])
among(
'mpi' 'mpa' 'mp{a"}'
'mmi' 'mma' 'mm{a"}' // Comparative forms [85]
(not 'po') //-improves things
'impi' 'impa' 'imp{a"}'
'immi' 'imma' 'imm{a"}' // Superlative forms [86]
'eja' 'ej{a"}' // indicates agent [93.1B]
)
delete
)
define i_plural as ( // [26]
setlimit tomark p1 for ([substring])
among(
'i' 'j'
)
delete
)
define t_plural as ( // [26]
setlimit tomark p1 for (
['t'] test V1
delete
)
setlimit tomark p2 for ([substring])
among(
'mma' (not 'po') //-mmat endings
'imma' //-immat endings
)
delete
)
define tidy as (
setlimit tomark p1 for (
do ( LONG and ([next] delete ) ) // undouble vowel
do ( [AEI] non-V1 delete ) // remove trailing a, a", e, i
do ( ['j'] 'o' or 'u' delete )
do ( ['o'] 'j' delete )
)
goto non-V1 [next] -> x x delete // undouble consonant
)
)
define stem as (
do mark_regions
unset ending_removed
backwards (
do particle_etc
do possessive
do case_ending
do other_endings
(ending_removed do i_plural) or do t_plural
do tidy
)
)
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
i_verb_suffix
verb_suffix
residual_suffix
un_double
un_accent
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v keep_with_s )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a^ hex 'E2' // a-circumflex
stringdef a` hex 'E0' // a-grave
stringdef c, hex 'E7' // c-cedilla
stringdef e" hex 'EB' // e-diaeresis (rare)
stringdef e' hex 'E9' // e-acute
stringdef e^ hex 'EA' // e-circumflex
stringdef e` hex 'E8' // e-grave
stringdef i" hex 'EF' // i-diaeresis
stringdef i^ hex 'EE' // i-circumflex
stringdef o^ hex 'F4' // o-circumflex
stringdef u^ hex 'FB' // u-circumflex
stringdef u` hex 'F9' // u-grave
define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
define prelude as repeat goto (
( v [ ('u' ] v <- 'U') or
('i' ] v <- 'I') or
('y' ] <- 'Y')
)
or
( ['y'] v <- 'Y' )
or
( 'q' ['u'] <- 'U' )
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v v next )
or
among ( // this exception list begun Nov 2006
'par' // paris, parie, pari
'col' // colis
'tap' // tapis
// extensions possible here
)
or
( next gopast v )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'Y' (<- 'y')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
'ances' 'iqUes' 'ismes' 'ables' 'istes'
( R2 delete )
'atrice' 'ateur' 'ation'
'atrices' 'ateurs' 'ations'
( R2 delete
try ( ['ic'] (R2 delete) or <-'iqU' )
)
'logie'
'logies'
( R2 <- 'log' )
'usion' 'ution'
'usions' 'utions'
( R2 <- 'u' )
'ence'
'ences'
( R2 <- 'ent' )
'ement'
'ements'
(
RV delete
try (
[substring] among(
'iv' (R2 delete ['at'] R2 delete)
'eus' ((R2 delete) or (R1<-'eux'))
'abl' 'iqU'
(R2 delete)
'i{e`}r' 'I{e`}r' //)
(RV <-'i') //)--new 2 Sept 02
)
)
)
'it{e'}'
'it{e'}s'
(
R2 delete
try (
[substring] among(
'abil' ((R2 delete) or <-'abl')
'ic' ((R2 delete) or <-'iqU')
'iv' (R2 delete)
)
)
)
'if' 'ive'
'ifs' 'ives'
(
R2 delete
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
)
'eaux' (<- 'eau')
'aux' (R1 <- 'al')
'euse'
'euses'((R2 delete) or (R1<-'eux'))
'issement'
'issements'(R1 non-v delete) // verbal
// fail(...) below forces entry to verb_suffix. -ment typically
// follows the p.p., e.g 'confus{e'}ment'.
'amment' (RV fail(<- 'ant'))
'emment' (RV fail(<- 'ent'))
'ment'
'ments' (test(v RV) fail(delete))
// v is e,i,u,{e'},I or U
)
)
define i_verb_suffix as setlimit tomark pV for (
[substring] among (
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
'issez' 'issiez' 'issions' 'issons' 'it'
(non-v delete)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among (
'ions'
(R2 delete)
'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
'erons' 'eront' 'ez' 'iez'
// 'ons' //-best omitted
(delete)
'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
'assions'
(delete
try(['e'] delete)
)
)
)
define keep_with_s 'aiou{e`}s'
define residual_suffix as (
try(['s'] test non-keep_with_s delete)
setlimit tomark pV for (
[substring] among(
'ion' (R2 's' or 't' delete)
'ier' 'i{e`}re'
'Ier' 'I{e`}re' (<-'i')
'e' (delete)
'{e"}' ('gu' delete)
)
)
)
define un_double as (
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
)
define un_accent as (
atleast 1 non-v
[ '{e'}' or '{e`}' ] <-'e'
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
(
( standard_suffix or
i_verb_suffix or
verb_suffix
)
and
try( [ ('Y' ] <- 'i' ) or
('{c,}'] <- 'c' )
)
) or
residual_suffix
)
// try(['ent'] RV delete) // is best omitted
do un_double
do un_accent
)
do postlude
)
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
i_verb_suffix
verb_suffix
residual_suffix
un_double
un_accent
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v keep_with_s )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a^ hex '83' // a-circumflex
stringdef a` hex '85' // a-grave
stringdef c, hex '87' // c-cedilla
stringdef e" hex '89' // e-diaeresis (rare)
stringdef e' hex '82' // e-acute
stringdef e^ hex '88' // e-circumflex
stringdef e` hex '8A' // e-grave
stringdef i" hex '8B' // i-diaeresis
stringdef i^ hex '8C' // i-circumflex
stringdef o^ hex '93' // o-circumflex
stringdef u^ hex '96' // u-circumflex
stringdef u` hex '97' // u-grave
define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
define prelude as repeat goto (
( v [ ('u' ] v <- 'U') or
('i' ] v <- 'I') or
('y' ] <- 'Y')
)
or
( ['y'] v <- 'Y' )
or
( 'q' ['u'] <- 'U' )
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v v next ) or ( next gopast v )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'Y' (<- 'y')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
'ances' 'iqUes' 'ismes' 'ables' 'istes'
( R2 delete )
'atrice' 'ateur' 'ation'
'atrices' 'ateurs' 'ations'
( R2 delete
try ( ['ic'] (R2 delete) or <-'iqU' )
)
'logie'
'logies'
( R2 <- 'log' )
'usion' 'ution'
'usions' 'utions'
( R2 <- 'u' )
'ence'
'ences'
( R2 <- 'ent' )
'ement'
'ements'
(
RV delete
try (
[substring] among(
'iv' (R2 delete ['at'] R2 delete)
'eus' ((R2 delete) or (R1<-'eux'))
'abl' 'iqU'
(R2 delete)
'i{e`}r' 'I{e`}r' //)
(RV <-'i') //)--new 2 Sept 02
)
)
)
'it{e'}'
'it{e'}s'
(
R2 delete
try (
[substring] among(
'abil' ((R2 delete) or <-'abl')
'ic' ((R2 delete) or <-'iqU')
'iv' (R2 delete)
)
)
)
'if' 'ive'
'ifs' 'ives'
(
R2 delete
try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
)
'eaux' (<- 'eau')
'aux' (R1 <- 'al')
'euse'
'euses'((R2 delete) or (R1<-'eux'))
'issement'
'issements'(R1 non-v delete) // verbal
// fail(...) below forces entry to verb_suffix. -ment typically
// follows the p.p., e.g 'confus{e'}ment'.
'amment' (RV fail(<- 'ant'))
'emment' (RV fail(<- 'ent'))
'ment'
'ments' (test(v RV) fail(delete))
// v is e,i,u,{e'},I or U
)
)
define i_verb_suffix as setlimit tomark pV for (
[substring] among (
'{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
'issez' 'issiez' 'issions' 'issons' 'it'
(non-v delete)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among (
'ions'
(R2 delete)
'{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
'erons' 'eront' 'ez' 'iez'
// 'ons' //-best omitted
(delete)
'{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
'assions'
(delete
try(['e'] delete)
)
)
)
define keep_with_s 'aiou{e`}s'
define residual_suffix as (
try(['s'] test non-keep_with_s delete)
setlimit tomark pV for (
[substring] among(
'ion' (R2 's' or 't' delete)
'ier' 'i{e`}re'
'Ier' 'I{e`}re' (<-'i')
'e' (delete)
'{e"}' ('gu' delete)
)
)
)
define un_double as (
test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
)
define un_accent as (
atleast 1 non-v
[ '{e'}' or '{e`}' ] <-'e'
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
(
( standard_suffix or
i_verb_suffix or
verb_suffix
)
and
try( [ ('Y' ] <- 'i' ) or
('{c,}'] <- 'c' )
)
) or
residual_suffix
)
// try(['ent'] RV delete) // is best omitted
do un_double
do un_accent
)
do postlude
)
routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)
externals ( stem )
integers ( p1 p2 x )
groupings ( v s_ending st_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef ss hex 'DF'
define v 'aeiouy{a"}{o"}{u"}'
define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'
define prelude as (
test repeat (
(
['{ss}'] <- 'ss'
) or next
)
repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
test(hop 3 setmark x)
gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
do (
[substring] R1 among(
'e' 'em' 'en' 'ern' 'er' 'es'
( delete
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)
externals ( stem )
integers ( p1 p2 x )
groupings ( v s_ending st_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a" hex '84'
stringdef o" hex '94'
stringdef u" hex '81'
stringdef ss hex 'E1'
define v 'aeiouy{a"}{o"}{u"}'
define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'
define prelude as (
test repeat (
(
['{ss}'] <- 'ss'
) or next
)
repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
test(hop 3 setmark x)
gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
do (
[substring] R1 among(
'e' 'em' 'en' 'ern' 'er' 'es'
( delete
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
routines (
prelude postlude
mark_regions
R1 R2
standard_suffix
)
externals ( stem )
integers ( p1 p2 x )
groupings ( v s_ending st_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef o" hex 'F6'
stringdef u" hex 'FC'
stringdef ss hex 'DF'
define v 'aeiouy{a"}{o"}{u"}'
define s_ending 'bdfghklmnrt'
define st_ending s_ending - 'r'
define prelude as (
test repeat goto (
v [('u'] v <- 'U') or
('y'] v <- 'Y')
)
repeat (
[substring] among(
'{ss}' (<- 'ss')
'ae' (<- '{a"}')
'oe' (<- '{o"}')
'ue' (<- '{u"}')
'qu' (hop 2)
'' (next)
)
)
)
define mark_regions as (
$p1 = limit
$p2 = limit
test(hop 3 setmark x)
gopast v gopast non-v setmark p1
try($p1 < x $p1 = x) // at least 3
gopast v gopast non-v setmark p2
)
define postlude as repeat (
[substring] among(
'Y' (<- 'y')
'U' (<- 'u')
'{a"}' (<- 'a')
'{o"}' (<- 'o')
'{u"}' (<- 'u')
'' (next)
)
)
backwardmode (
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
do (
[substring] R1 among(
'e' 'em' 'en' 'ern' 'er' 'es'
( delete
)
's'
( s_ending delete
)
)
)
do (
[substring] R1 among(
'en' 'er' 'est'
( delete
)
'st'
( st_ending hop 3 delete
)
)
)
do (
[substring] R2 among(
'end' 'ung'
( delete
try (['ig'] not 'e' R2 delete)
)
'ig' 'ik' 'isch'
( not 'e' delete
)
'lich' 'heit'
( delete
try (
['er' or 'en'] R1 delete
)
)
'keit'
( delete
try (
[substring] R2 among(
'lich' 'ig'
( delete
)
)
)
)
)
)
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
do postlude
)
/*
Hungarian Stemmer
Removes noun inflections
*/
routines (
mark_regions
R1
v_ending
case
case_special
case_other
plural
owned
sing_owner
plur_owner
instrum
factive
undouble
double
)
externals ( stem )
integers ( p1 )
groupings ( v )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1' //a-acute
stringdef e' hex 'E9' //e-acute
stringdef i' hex 'ED' //i-acute
stringdef o' hex 'F3' //o-acute
stringdef o" hex 'F6' //o-umlaut
stringdef oq hex 'F5' //o-double acute
stringdef u' hex 'FA' //u-acute
stringdef u" hex 'FC' //u-umlaut
stringdef uq hex 'FB' //u-double acute
define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
define mark_regions as (
$p1 = limit
(v goto non-v
among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
setmark p1)
or
(non-v gopast v setmark p1)
)
backwardmode (
define R1 as $p1 <= cursor
define v_ending as (
[substring] R1 among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)
define double as (
test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
)
define undouble as (
next [hop 1] delete
)
define instrum as(
[substring] R1 among(
'al' (double)
'el' (double)
)
delete
undouble
)
define case as (
[substring] R1 among(
'ban' 'ben'
'ba' 'be'
'ra' 're'
'nak' 'nek'
'val' 'vel'
't{o'}l' 't{oq}l'
'r{o'}l' 'r{oq}l'
'b{o'}l' 'b{oq}l'
'hoz' 'hez' 'h{o"}z'
'n{a'}l' 'n{e'}l'
'ig'
'at' 'et' 'ot' '{o"}t'
'{e'}rt'
'k{e'}pp' 'k{e'}ppen'
'kor'
'ul' '{u"}l'
'v{a'}' 'v{e'}'
'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
'k{e'}nt'
'en' 'on' 'an' '{o"}n'
'n'
't'
)
delete
v_ending
)
define case_special as(
[substring] R1 among(
'{e'}n' (<- 'e')
'{a'}n' (<- 'a')
'{a'}nk{e'}nt' (<- 'a')
)
)
define case_other as(
[substring] R1 among(
'astul' 'est{u"}l' (delete)
'stul' 'st{u"}l' (delete)
'{a'}stul' (<- 'a')
'{e'}st{u"}l' (<- 'e')
)
)
define factive as(
[substring] R1 among(
'{a'}' (double)
'{e'}' (double)
)
delete
undouble
)
define plural as (
[substring] R1 among(
'{a'}k' (<- 'a')
'{e'}k' (<- 'e')
'{o"}k' (delete)
'ak' (delete)
'ok' (delete)
'ek' (delete)
'k' (delete)
)
)
define owned as (
[substring] R1 among (
'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
'{e'}k{e'}' (<- 'e')
'{a'}k{e'}' (<- 'a')
'k{e'}' (delete)
'{e'}{e'}i' (<- 'e')
'{a'}{e'}i' (<- 'a')
'{e'}i' (delete)
'{e'}{e'}' (<- 'e')
'{e'}' (delete)
)
)
define sing_owner as (
[substring] R1 among(
'{u"}nk' 'unk' (delete)
'{a'}nk' (<- 'a')
'{e'}nk' (<- 'e')
'nk' (delete)
'{a'}juk' (<- 'a')
'{e'}j{u"}k' (<- 'e')
'juk' 'j{u"}k' (delete)
'uk' '{u"}k' (delete)
'em' 'om' 'am' (delete)
'{a'}m' (<- 'a')
'{e'}m' (<- 'e')
'm' (delete)
'od' 'ed' 'ad' '{o"}d' (delete)
'{a'}d' (<- 'a')
'{e'}d' (<- 'e')
'd' (delete)
'ja' 'je' (delete)
'a' 'e' 'o' (delete)
'{a'}' (<- 'a')
'{e'}' (<- 'e')
)
)
define plur_owner as (
[substring] R1 among(
'jaim' 'jeim' (delete)
'{a'}im' (<- 'a')
'{e'}im' (<- 'e')
'aim' 'eim' (delete)
'im' (delete)
'jaid' 'jeid' (delete)
'{a'}id' (<- 'a')
'{e'}id' (<- 'e')
'aid' 'eid' (delete)
'id' (delete)
'jai' 'jei' (delete)
'{a'}i' (<- 'a')
'{e'}i' (<- 'e')
'ai' 'ei' (delete)
'i' (delete)
'jaink' 'jeink' (delete)
'eink' 'aink' (delete)
'{a'}ink' (<- 'a')
'{e'}ink' (<- 'e')
'ink'
'jaitok' 'jeitek' (delete)
'aitok' 'eitek' (delete)
'{a'}itok' (<- 'a')
'{e'}itek' (<- 'e')
'itek' (delete)
'jeik' 'jaik' (delete)
'aik' 'eik' (delete)
'{a'}ik' (<- 'a')
'{e'}ik' (<- 'e')
'ik' (delete)
)
)
)
define stem as (
do mark_regions
backwards (
do instrum
do case
do case_special
do case_other
do factive
do owned
do sing_owner
do plur_owner
do plural
)
)
routines (
prelude postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v AEIO CG )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1'
stringdef a` hex 'E0'
stringdef e' hex 'E9'
stringdef e` hex 'E8'
stringdef i' hex 'ED'
stringdef i` hex 'EC'
stringdef o' hex 'F3'
stringdef o` hex 'F2'
stringdef u' hex 'FA'
stringdef u` hex 'F9'
define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
define prelude as (
test repeat (
[substring] among(
'{a'}' (<- '{a`}')
'{e'}' (<- '{e`}')
'{i'}' (<- '{i`}')
'{o'}' (<- '{o`}')
'{u'}' (<- '{u`}')
'qu' (<- 'qU')
'' (next)
)
)
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'ci' 'gli' 'la' 'le' 'li' 'lo'
'mi' 'ne' 'si' 'ti' 'vi'
// the compound forms are:
'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
'mela' 'mele' 'meli' 'melo' 'mene'
'tela' 'tele' 'teli' 'telo' 'tene'
'cela' 'cele' 'celi' 'celo' 'cene'
'vela' 'vele' 'veli' 'velo' 'vene'
)
among( (RV)
'ando' 'endo' (delete)
'ar' 'er' 'ir' (<- 'e')
)
)
define standard_suffix as (
[substring] among(
'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
'atrice' 'atrici'
'ante' 'anti' // Note 1
( R2 delete )
'azione' 'azioni' 'atore' 'atori'
( R2 delete
try ( ['ic'] R2 delete )
)
'logia' 'logie'
( R2 <- 'log' )
'uzione' 'uzioni' 'usione' 'usioni'
( R2 <- 'u' )
'enza' 'enze'
( R2 <- 'ente' )
'amento' 'amenti' 'imento' 'imenti'
( RV delete )
'amente' (
R1 delete
try (
[substring] R2 delete among(
'iv' ( ['at'] R2 delete )
'os' 'ic' 'abil'
)
)
)
'it{a`}' (
R2 delete
try (
[substring] among(
'abil' 'ic' 'iv' (R2 delete)
)
)
)
'ivo' 'ivi' 'iva' 'ive' (
R2 delete
try ( ['at'] R2 delete ['ic'] R2 delete )
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
'ono' 'uta' 'ute' 'uti' 'uto'
'ar' 'ir' // but 'er' is problematical
(delete)
)
)
define AEIO 'aeio{a`}{e`}{i`}{o`}'
define CG 'cg'
define vowel_suffix as (
try (
[AEIO] RV delete
['i'] RV delete
)
try (
['h'] CG RV delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do attached_pronoun
do (standard_suffix or verb_suffix)
do vowel_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
routines (
prelude postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v AEIO CG )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a' hex 'A0'
stringdef a` hex '85'
stringdef e' hex '82'
stringdef e` hex '8A'
stringdef i' hex 'A1'
stringdef i` hex '8D'
stringdef o' hex 'A2'
stringdef o` hex '95'
stringdef u' hex 'A3'
stringdef u` hex '97'
define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
define prelude as (
test repeat (
[substring] among(
'{a'}' (<- '{a`}')
'{e'}' (<- '{e`}')
'{i'}' (<- '{i`}')
'{o'}' (<- '{o`}')
'{u'}' (<- '{u`}')
'qu' (<- 'qU')
'' (next)
)
)
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'ci' 'gli' 'la' 'le' 'li' 'lo'
'mi' 'ne' 'si' 'ti' 'vi'
// the compound forms are:
'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
'mela' 'mele' 'meli' 'melo' 'mene'
'tela' 'tele' 'teli' 'telo' 'tene'
'cela' 'cele' 'celi' 'celo' 'cene'
'vela' 'vele' 'veli' 'velo' 'vene'
)
among( (RV)
'ando' 'endo' (delete)
'ar' 'er' 'ir' (<- 'e')
)
)
define standard_suffix as (
[substring] among(
'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
'atrice' 'atrici'
'ante' 'anti' // Note 1
( R2 delete )
'azione' 'azioni' 'atore' 'atori'
( R2 delete
try ( ['ic'] R2 delete )
)
'logia' 'logie'
( R2 <- 'log' )
'uzione' 'uzioni' 'usione' 'usioni'
( R2 <- 'u' )
'enza' 'enze'
( R2 <- 'ente' )
'amento' 'amenti' 'imento' 'imenti'
( RV delete )
'amente' (
R1 delete
try (
[substring] R2 delete among(
'iv' ( ['at'] R2 delete )
'os' 'ic' 'abil'
)
)
)
'it{a`}' (
R2 delete
try (
[substring] among(
'abil' 'ic' 'iv' (R2 delete)
)
)
)
'ivo' 'ivi' 'iva' 'ive' (
R2 delete
try ( ['at'] R2 delete ['ic'] R2 delete )
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
'ono' 'uta' 'ute' 'uti' 'uto'
'ar' 'ir' // but 'er' is problematical
(delete)
)
)
define AEIO 'aeio{a`}{e`}{i`}{o`}'
define CG 'cg'
define vowel_suffix as (
try (
[AEIO] RV delete
['i'] RV delete
)
try (
['h'] CG RV delete
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do attached_pronoun
do (standard_suffix or verb_suffix)
do vowel_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
strings ( ch )
integers ( x p1 p2 )
booleans ( Y_found stemmed GE_removed )
routines (
R1 R2
C V VX
lengthen_V
Step_1 Step_2 Step_3 Step_4 Step_7
Step_6 Step_1c
Lose_prefix
Lose_infix
measure
)
externals ( stem )
groupings ( v v_WX AOU AIOU )
stringescapes {}
stringdef ' hex '27' // yuk
define v 'aeiouy'
define v_WX v + 'wx'
define AOU 'aou'
define AIOU 'aiou'
backwardmode (
define R1 as (setmark x $x >= p1)
define R2 as (setmark x $x >= p2)
define V as test (v or 'ij')
define VX as test (next v or 'ij')
define C as test (not 'ij' non-v)
define lengthen_V as do (
non-v_WX [ (AOU] test (non-v or atlimit)) or
('e'] test (non-v or atlimit
not AIOU
not (next AIOU non-v)))
->ch insert ch
)
define Step_1 as
(
[among ( (])
'{'}s' (delete)
's' (R1 not ('t' R1) C delete)
'ies' (R1 <-'ie')
'es'
(('ar' R1 C ] delete lengthen_V) or
('er' R1 C ] delete) or
(R1 C <-'e'))
'aus' (R1 V <-'au')
'en' (('hed' R1 ] <-'heid') or
('nd' delete) or
('d' R1 C ] delete) or
('i' or 'j' V delete) or
(R1 C delete lengthen_V))
'nde' (<-'nd')
)
)
define Step_2 as
(
[among ( (])
'je' (('{'}t' ] delete) or
('et' ] R1 C delete) or
('rnt' ] <-'rn') or
('t' ] R1 VX delete) or
('ink' ] <-'ing') or
('mp' ] <-'m') or
('{'}' ] R1 delete) or
(] R1 C delete))
'ge' (R1 <-'g')
'lijke'(R1 <-'lijk')
'ische'(R1 <-'isch')
'de' (R1 C delete)
'te' (R1 <-'t')
'se' (R1 <-'s')
're' (R1 <-'r')
'le' (R1 delete attach 'l' lengthen_V)
'ene' (R1 C delete attach 'en' lengthen_V)
'ieve' (R1 C <-'ief')
)
)
define Step_3 as
(
[among ( (])
'atie' (R1 <-'eer')
'iteit' (R1 delete lengthen_V)
'heid'
'sel'
'ster' (R1 delete)
'rder' (<-'r')
'ing'
'isme'
'erij' (R1 delete lengthen_V)
'arij' (R1 C <-'aar')
'fie' (R2 delete attach 'f' lengthen_V)
'gie' (R2 delete attach 'g' lengthen_V)
'tst' (R1 C <-'t')
'dst' (R1 C <-'d')
)
)
define Step_4 as
(
( [among ( (])
'ioneel' (R1 <-'ie')
'atief' (R1 <-'eer')
'baar' (R1 delete)
'naar' (R1 V <-'n')
'laar' (R1 V <-'l')
'raar' (R1 V <-'r')
'tant' (R1 <-'teer')
'lijker'
'lijkst' (R1 <-'lijk')
'achtig'
'achtiger'
'achtigst'(R1 delete)
'eriger'
'erigst'
'erig'
'end' (R1 C delete lengthen_V)
)
)
or
( [among ( (])
'iger'
'igst'
'ig' (R1 C delete lengthen_V)
)
)
)
define Step_7 as
(
[among ( (])
'kt' (<-'k')
'ft' (<-'f')
'pt' (<-'p')
)
)
define Step_6 as
(
[among ( (])
'bb' (<-'b')
'cc' (<-'c')
'dd' (<-'d')
'ff' (<-'f')
'gg' (<-'g')
'hh' (<-'h')
'jj' (<-'j')
'kk' (<-'k')
'll' (<-'l')
'mm' (<-'m')
'nn' (<-'n')
'pp' (<-'p')
'qq' (<-'q')
'rr' (<-'r')
'ss' (<-'s')
'tt' (<-'t')
'vv' (<-'v')
'ww' (<-'w')
'xx' (<-'x')
'zz' (<-'z')
'v' (<-'f')
'z' (<-'s')
)
)
define Step_1c as
(
[among ( (] R1 C)
'd' (not ('n' R1) delete)
't' (not ('h' R1) delete)
)
)
)
define Lose_prefix as (
['ge'] test hop 3 (goto v goto non-v)
set GE_removed
delete
)
define Lose_infix as (
next
gopast (['ge']) test hop 3 (goto v goto non-v)
set GE_removed
delete
)
define measure as (
do (
tolimit
setmark p1
setmark p2
)
do(
repeat non-v atleast 1 ('ij' or v) non-v setmark p1
repeat non-v atleast 1 ('ij' or v) non-v setmark p2
)
)
define stem as (
unset Y_found
unset stemmed
do ( ['y'] <-'Y' set Y_found )
do repeat(goto (v ['y'])<-'Y' set Y_found )
measure
backwards (
do (Step_1 set stemmed )
do (Step_2 set stemmed )
do (Step_3 set stemmed )
do (Step_4 set stemmed )
)
unset GE_removed
do (Lose_prefix and measure)
backwards (
do (GE_removed Step_1c)
)
unset GE_removed
do (Lose_infix and measure)
backwards (
do (GE_removed Step_1c)
)
backwards (
do (Step_7 set stemmed )
do (stemmed or GE_removed Step_6)
)
do(Y_found repeat(goto (['Y']) <-'y'))
)
stringescapes {}
routines (
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC
endings
undouble respell
)
externals ( stem )
backwardmode (
/* Lovins' conditions A, B ... CC, as given in her Appendix B, where
a test for a two letter prefix ('test hop 2') is implicitly
assumed. Note that 'e' next 'u' corresponds to her u*e because
Snowball is scanning backwards. */
define A as ( hop 2 )
define B as ( hop 3 )
define C as ( hop 4 )
define D as ( hop 5 )
define E as ( test hop 2 not 'e' )
define F as ( test hop 3 not 'e' )
define G as ( test hop 3 'f' )
define H as ( test hop 2 't' or 'll' )
define I as ( test hop 2 not 'o' not 'e' )
define J as ( test hop 2 not 'a' not 'e' )
define K as ( test hop 3 'l' or 'i' or ('e' next 'u') )
define L as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
define M as ( test hop 2 not 'a' not 'c' not 'e' not 'm' )
define N as ( test hop 3 ( hop 2 not 's' or hop 2 ) )
define O as ( test hop 2 'l' or 'i' )
define P as ( test hop 2 not 'c' )
define Q as ( test hop 2 test hop 3 not 'l' not 'n' )
define R as ( test hop 2 'n' or 'r' )
define S as ( test hop 2 'dr' or ('t' not 't') )
define T as ( test hop 2 's' or ('t' not 'o') )
define U as ( test hop 2 'l' or 'm' or 'n' or 'r' )
define V as ( test hop 2 'c' )
define W as ( test hop 2 not 's' not 'u' )
define X as ( test hop 2 'l' or 'i' or ('e' next 'u') )
define Y as ( test hop 2 'in' )
define Z as ( test hop 2 not 'f' )
define AA as ( test hop 2 among ( 'd' 'f' 'ph' 'th' 'l' 'er' 'or'
'es' 't' ) )
define BB as ( test hop 3 not 'met' not 'ryst' )
define CC as ( test hop 2 'l' )
/* The system of endings, as given in Appendix A. */
define endings as (
[substring] among(
'alistically' B 'arizability' A 'izationally' B
'antialness' A 'arisations' A 'arizations' A 'entialness' A
'allically' C 'antaneous' A 'antiality' A 'arisation' A
'arization' A 'ationally' B 'ativeness' A 'eableness' E
'entations' A 'entiality' A 'entialize' A 'entiation' A
'ionalness' A 'istically' A 'itousness' A 'izability' A
'izational' A
'ableness' A 'arizable' A 'entation' A 'entially' A
'eousness' A 'ibleness' A 'icalness' A 'ionalism' A
'ionality' A 'ionalize' A 'iousness' A 'izations' A
'lessness' A
'ability' A 'aically' A 'alistic' B 'alities' A
'ariness' E 'aristic' A 'arizing' A 'ateness' A
'atingly' A 'ational' B 'atively' A 'ativism' A
'elihood' E 'encible' A 'entally' A 'entials' A
'entiate' A 'entness' A 'fulness' A 'ibility' A
'icalism' A 'icalist' A 'icality' A 'icalize' A
'ication' G 'icianry' A 'ination' A 'ingness' A
'ionally' A 'isation' A 'ishness' A 'istical' A
'iteness' A 'iveness' A 'ivistic' A 'ivities' A
'ization' F 'izement' A 'oidally' A 'ousness' A
'aceous' A 'acious' B 'action' G 'alness' A
'ancial' A 'ancies' A 'ancing' B 'ariser' A
'arized' A 'arizer' A 'atable' A 'ations' B
'atives' A 'eature' Z 'efully' A 'encies' A
'encing' A 'ential' A 'enting' C 'entist' A
'eously' A 'ialist' A 'iality' A 'ialize' A
'ically' A 'icance' A 'icians' A 'icists' A
'ifully' A 'ionals' A 'ionate' D 'ioning' A
'ionist' A 'iously' A 'istics' A 'izable' E
'lessly' A 'nesses' A 'oidism' A
'acies' A 'acity' A 'aging' B 'aical' A
'alist' A 'alism' B 'ality' A 'alize' A
'allic'BB 'anced' B 'ances' B 'antic' C
'arial' A 'aries' A 'arily' A 'arity' B
'arize' A 'aroid' A 'ately' A 'ating' I
'ation' B 'ative' A 'ators' A 'atory' A
'ature' E 'early' Y 'ehood' A 'eless' A
'elity' A 'ement' A 'enced' A 'ences' A
'eness' E 'ening' E 'ental' A 'ented' C
'ently' A 'fully' A 'ially' A 'icant' A
'ician' A 'icide' A 'icism' A 'icist' A
'icity' A 'idine' I 'iedly' A 'ihood' A
'inate' A 'iness' A 'ingly' B 'inism' J
'inity'CC 'ional' A 'ioned' A 'ished' A
'istic' A 'ities' A 'itous' A 'ively' A
'ivity' A 'izers' F 'izing' F 'oidal' A
'oides' A 'otide' A 'ously' A
'able' A 'ably' A 'ages' B 'ally' B
'ance' B 'ancy' B 'ants' B 'aric' A
'arly' K 'ated' I 'ates' A 'atic' B
'ator' A 'ealy' Y 'edly' E 'eful' A
'eity' A 'ence' A 'ency' A 'ened' E
'enly' E 'eous' A 'hood' A 'ials' A
'ians' A 'ible' A 'ibly' A 'ical' A
'ides' L 'iers' A 'iful' A 'ines' M
'ings' N 'ions' B 'ious' A 'isms' B
'ists' A 'itic' H 'ized' F 'izer' F
'less' A 'lily' A 'ness' A 'ogen' A
'ward' A 'wise' A 'ying' B 'yish' A
'acy' A 'age' B 'aic' A 'als'BB
'ant' B 'ars' O 'ary' F 'ata' A
'ate' A 'eal' Y 'ear' Y 'ely' E
'ene' E 'ent' C 'ery' E 'ese' A
'ful' A 'ial' A 'ian' A 'ics' A
'ide' L 'ied' A 'ier' A 'ies' P
'ily' A 'ine' M 'ing' N 'ion' Q
'ish' C 'ism' B 'ist' A 'ite'AA
'ity' A 'ium' A 'ive' A 'ize' F
'oid' A 'one' R 'ous' A
'ae' A 'al'BB 'ar' X 'as' B
'ed' E 'en' F 'es' E 'ia' A
'ic' A 'is' A 'ly' B 'on' S
'or' T 'um' U 'us' V 'yl' R
'{'}s' A 's{'}' A
'a' A 'e' A 'i' A 'o' A
's' W 'y' B
(delete)
)
)
/* Undoubling is rule 1 of appendix C. */
define undouble as (
test substring among ('bb' 'dd' 'gg' 'll' 'mm' 'nn' 'pp' 'rr' 'ss'
'tt')
[next] delete
)
/* The other appendix C rules can be done together. */
define respell as (
[substring] among (
'iev' (<-'ief')
'uct' (<-'uc')
'umpt' (<-'um')
'rpt' (<-'rb')
'urs' (<-'ur')
'istr' (<-'ister')
'metr' (<-'meter')
'olv' (<-'olut')
'ul' (not 'a' not 'i' not 'o' <-'l')
'bex' (<-'bic')
'dex' (<-'dic')
'pex' (<-'pic')
'tex' (<-'tic')
'ax' (<-'ac')
'ex' (<-'ec')
'ix' (<-'ic')
'lux' (<-'luc')
'uad' (<-'uas')
'vad' (<-'vas')
'cid' (<-'cis')
'lid' (<-'lis')
'erid' (<-'eris')
'pand' (<-'pans')
'end' (not 's' <-'ens')
'ond' (<-'ons')
'lud' (<-'lus')
'rud' (<-'rus')
'her' (not 'p' not 't' <-'hes')
'mit' (<-'mis')
'ent' (not 'm' <-'ens')
/* 'ent' was 'end' in the 1968 paper - a typo. */
'ert' (<-'ers')
'et' (not 'n' <-'es')
'yt' (<-'ys')
'yz' (<-'ys')
)
)
)
define stem as (
backwards (
do endings
do undouble
do respell
)
)
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef ae hex 'E6'
stringdef ao hex 'E5'
stringdef o/ hex 'F8'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'bcdfghjlmnoprtvyz'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
'hetens' 'ers' 'ets' 'et' 'het' 'ast'
(delete)
's'
(s_ending or ('k' non-v) delete)
'erte' 'ert'
(<-'er')
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'dt' 'vt'
)
)
next] delete
)
define other_suffix as (
setlimit tomark p1 for ([substring])
among(
'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
'hetslov'
(delete)
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef ae hex '91'
stringdef ao hex '86'
stringdef o/ hex '9B'
define v 'aeiouy{ae}{ao}{o/}'
define s_ending 'bcdfghjlmnoprtvyz'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
'hetens' 'ers' 'ets' 'et' 'het' 'ast'
(delete)
's'
(s_ending or ('k' non-v) delete)
'erte' 'ert'
(<-'er')
)
)
define consonant_pair as (
test (
setlimit tomark p1 for ([substring])
among(
'dt' 'vt'
)
)
next] delete
)
define other_suffix as (
setlimit tomark p1 for ([substring])
among(
'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
'hetslov'
(delete)
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
integers ( p1 p2 )
booleans ( Y_found )
routines (
shortv
R1 R2
Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b
)
externals ( stem )
groupings ( v v_WXY )
define v 'aeiouy'
define v_WXY v + 'wxY'
backwardmode (
define shortv as ( non-v_WXY v non-v )
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define Step_1a as (
[substring] among (
'sses' (<-'ss')
'ies' (<-'i')
'ss' ()
's' (delete)
)
)
define Step_1b as (
[substring] among (
'eed' (R1 <-'ee')
'ed'
'ing' (
test gopast v delete
test substring among(
'at' 'bl' 'iz'
(<+ 'e')
'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
// ignoring double c, h, j, k, q, v, w, and x
([next] delete)
'' (atmark p1 test shortv <+ 'e')
)
)
)
)
define Step_1c as (
['y' or 'Y']
gopast v
<-'i'
)
define Step_2 as (
[substring] R1 among (
'tional' (<-'tion')
'enci' (<-'ence')
'anci' (<-'ance')
'abli' (<-'able')
'entli' (<-'ent')
'eli' (<-'e')
'izer' 'ization'
(<-'ize')
'ational' 'ation' 'ator'
(<-'ate')
'alli' (<-'al')
'alism' 'aliti'
(<-'al')
'fulness' (<-'ful')
'ousli' 'ousness'
(<-'ous')
'iveness' 'iviti'
(<-'ive')
'biliti' (<-'ble')
)
)
define Step_3 as (
[substring] R1 among (
'alize' (<-'al')
'icate' 'iciti' 'ical'
(<-'ic')
'ative' 'ful' 'ness'
(delete)
)
)
define Step_4 as (
[substring] R2 among (
'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
'ment' 'ent' 'ou' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
(delete)
'ion' ('s' or 't' delete)
)
)
define Step_5a as (
['e']
R2 or (R1 not shortv)
delete
)
define Step_5b as (
['l']
R2 'l'
delete
)
)
define stem as (
unset Y_found
do ( ['y'] <-'Y' set Y_found)
do repeat(goto (v ['y']) <-'Y' set Y_found)
$p1 = limit
$p2 = limit
do(
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
backwards (
do Step_1a
do Step_1b
do Step_1c
do Step_2
do Step_3
do Step_4
do Step_5a
do Step_5b
)
do(Y_found repeat(goto (['Y']) <-'y'))
)
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
verb_suffix
residual_suffix
residual_form
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1' // a-acute
stringdef a^ hex 'E2' // a-circumflex e.g. 'bota^nico
stringdef e' hex 'E9' // e-acute
stringdef e^ hex 'EA' // e-circumflex
stringdef i' hex 'ED' // i-acute
stringdef o^ hex 'F4' // o-circumflex
stringdef o' hex 'F3' // o-acute
stringdef u' hex 'FA' // u-acute
stringdef c, hex 'E7' // c-cedilla
stringdef a~ hex 'E3' // a-tilde
stringdef o~ hex 'F5' // o-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
define prelude as repeat (
[substring] among(
'{a~}' (<- 'a~')
'{o~}' (<- 'o~')
'' (next)
) //or next
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'a~' (<- '{a~}')
'o~' (<- '{o~}')
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'eza' 'ezas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'{a'}vel'
'{i'}vel'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amento' 'amentos'
'imento' 'imentos'
'adora' 'ador' 'a{c,}a~o'
'adoras' 'adores' 'a{c,}o~es' // no -ic test
'ante' 'antes' '{a^}ncia' // Note 1
(
R2 delete
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'{e^}ncia' '{e^}ncias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'avel'
'{i'}vel' (R2 delete)
)
)
)
'idade'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
'ira' 'iras'
(
RV 'e' // -eira -eiras usually non-verbal
<- 'ir'
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
'{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
'{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
'{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
'{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
'ira' 'iras'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
)
)
define residual_form as (
[substring] among(
'e' '{e'}' '{e^}'
( RV delete [('u'] test 'g') or
('i'] test 'c') RV delete )
'{c,}' (<-'c')
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
( ( standard_suffix or verb_suffix )
and do ( ['i'] test 'c' RV delete )
)
or residual_suffix
)
do residual_form
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
routines (
prelude postlude mark_regions
RV R1 R2
standard_suffix
verb_suffix
residual_suffix
residual_form
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a' hex 'A0' // a-acute
stringdef a^ hex '83' // a-circumflex e.g. 'bota^nico
stringdef e' hex '82' // e-acute
stringdef e^ hex '88' // e-circumflex
stringdef i' hex 'A1' // i-acute
stringdef o^ hex '93' // o-circumflex
stringdef o' hex 'A2' // o-acute
stringdef u' hex 'A3' // u-acute
stringdef c, hex '87' // c-cedilla
stringdef a~ hex 'C6' // a-tilde
stringdef o~ hex 'E4' // o-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
define prelude as repeat (
[substring] among(
'{a~}' (<- 'a~')
'{o~}' (<- 'o~')
'' (next)
) //or next
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'a~' (<- '{a~}')
'o~' (<- '{o~}')
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define standard_suffix as (
[substring] among(
'eza' 'ezas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'{a'}vel'
'{i'}vel'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amento' 'amentos'
'imento' 'imentos'
'adora' 'ador' 'a{c,}a~o'
'adoras' 'adores' 'a{c,}o~es' // no -ic test
'ante' 'antes' '{a^}ncia' // Note 1
(
R2 delete
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'{e^}ncia' '{e^}ncias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'avel'
'{i'}vel' (R2 delete)
)
)
)
'idade'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
'ira' 'iras'
(
RV 'e' // -eira -eiras usually non-verbal
<- 'ir'
)
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
'{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
'{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
'{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
'{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
'ira' 'iras'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
)
)
define residual_form as (
[substring] among(
'e' '{e'}' '{e^}'
( RV delete [('u'] test 'g') or
('i'] test 'c') RV delete )
'{c,}' (<-'c')
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do (
( ( standard_suffix or verb_suffix )
and do ( ['i'] test 'c' RV delete )
)
or residual_suffix
)
do residual_form
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
routines (
prelude postlude mark_regions
RV R1 R2
step_0
standard_suffix combo_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
booleans ( standard_suffix_removed )
stringescapes {}
/* special characters */
stringdef a^ hex 'E2' // a circumflex
stringdef i^ hex 'EE' // i circumflex
stringdef a+ hex 'E3' // a breve
stringdef s, hex 'BA' // s cedilla
stringdef t, hex 'FE' // t cedilla
define v 'aeiou{a^}{i^}{a+}'
define prelude as (
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define step_0 as (
[substring] R1 among(
'ul' 'ului'
( delete )
'aua'
( <-'a' )
'ea' 'ele' 'elor'
( <-'e' )
'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
( <-'i')
'ile'
( not 'ab' <- 'i' )
'atei'
( <- 'at' )
'a{t,}ie' 'a{t,}ia'
( <- 'a{t,}i' )
)
)
define combo_suffix as test (
[substring] R1 (
among(
/* 'IST'. alternative: include the following
'alism' 'alisme'
'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
<- 'al'
)
*/
'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
<- 'abil'
)
'ibilitate' (
<- 'ibil'
)
'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
<- 'iv'
)
'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
'icator' 'icatori'
'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
<- 'ic'
)
'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
'atoare' 'ator' 'atori'
'{a+}toare' '{a+}tor' '{a+}tori' (
<- 'at'
)
'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
'itoare' 'itor' 'itori' (
<- 'it'
)
)
set standard_suffix_removed
)
)
define standard_suffix as (
unset standard_suffix_removed
repeat combo_suffix
[substring] R2 (
among(
// past participle is treated here, rather than
// as a verb ending:
'at' 'ata' 'at{a+}' 'ati' 'ate'
'ut' 'uta' 'ut{a+}' 'uti' 'ute'
'it' 'ita' 'it{a+}' 'iti' 'ite'
'ic' 'ica' 'ice' 'ici' 'ic{a+}'
'abil' 'abila' 'abile' 'abili' 'abil{a+}'
'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
'ant' 'anta' 'ante' 'anti' 'ant{a+}'
'ator' 'atori'
'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
delete
)
'iune' 'iuni' (
'{t,}'] <- 't'
)
'ism' 'isme'
'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
<- 'ist'
/* 'IST'. alternative: remove with <- '' */
)
)
set standard_suffix_removed
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
// 'long' infinitive:
'are' 'ere' 'ire' '{a^}re'
// gerund:
'ind' '{a^}nd'
'indu' '{a^}ndu'
'eze'
'easc{a+}'
// present:
'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
'e{s,}te'
'{a+}sc' '{a+}{s,}ti'
'{a+}{s,}te'
// imperfect:
'am' 'ai' 'au'
'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
// past: // (not 'ii')
'ui'
'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
'{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
// pluferfect:
'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
'{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
'{a^}ser{a+}'
'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
( non-v or 'u' delete )
// present:
'{a+}m' 'a{t,}i'
'em' 'e{t,}i'
'im' 'i{t,}i'
'{a^}m' '{a^}{t,}i'
// past:
'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
'sei' 'se'
// pluperfect:
'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
(delete)
)
)
define vowel_suffix as (
[substring] RV among (
'a' 'e' 'i' 'ie' '{a+}' ( delete )
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do step_0
do standard_suffix
do ( standard_suffix_removed or verb_suffix )
do vowel_suffix
)
do postlude
)
routines (
prelude postlude mark_regions
RV R1 R2
step_0
standard_suffix combo_suffix
verb_suffix
vowel_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
booleans ( standard_suffix_removed )
stringescapes {}
/* special characters */
stringdef a^ hex '0E2' // a circumflex
stringdef i^ hex '0EE' // i circumflex
stringdef a+ hex '103' // a breve
stringdef s, hex '15F' // s cedilla
stringdef t, hex '163' // t cedilla
define v 'aeiou{a^}{i^}{a+}'
define prelude as (
repeat goto (
v [ ('u' ] v <- 'U') or
('i' ] v <- 'I')
)
)
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'I' (<- 'i')
'U' (<- 'u')
'' (next)
)
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define step_0 as (
[substring] R1 among(
'ul' 'ului'
( delete )
'aua'
( <-'a' )
'ea' 'ele' 'elor'
( <-'e' )
'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
( <-'i')
'ile'
( not 'ab' <- 'i' )
'atei'
( <- 'at' )
'a{t,}ie' 'a{t,}ia'
( <- 'a{t,}i' )
)
)
define combo_suffix as test (
[substring] R1 (
among(
/* 'IST'. alternative: include the following
'alism' 'alisme'
'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
<- 'al'
)
*/
'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
<- 'abil'
)
'ibilitate' (
<- 'ibil'
)
'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
<- 'iv'
)
'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
'icator' 'icatori'
'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
<- 'ic'
)
'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
'atoare' 'ator' 'atori'
'{a+}toare' '{a+}tor' '{a+}tori' (
<- 'at'
)
'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
'itoare' 'itor' 'itori' (
<- 'it'
)
)
set standard_suffix_removed
)
)
define standard_suffix as (
unset standard_suffix_removed
repeat combo_suffix
[substring] R2 (
among(
// past participle is treated here, rather than
// as a verb ending:
'at' 'ata' 'at{a+}' 'ati' 'ate'
'ut' 'uta' 'ut{a+}' 'uti' 'ute'
'it' 'ita' 'it{a+}' 'iti' 'ite'
'ic' 'ica' 'ice' 'ici' 'ic{a+}'
'abil' 'abila' 'abile' 'abili' 'abil{a+}'
'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
'ant' 'anta' 'ante' 'anti' 'ant{a+}'
'ator' 'atori'
'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
delete
)
'iune' 'iuni' (
'{t,}'] <- 't'
)
'ism' 'isme'
'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
<- 'ist'
/* 'IST'. alternative: remove with <- '' */
)
)
set standard_suffix_removed
)
)
define verb_suffix as setlimit tomark pV for (
[substring] among(
// 'long' infinitive:
'are' 'ere' 'ire' '{a^}re'
// gerund:
'ind' '{a^}nd'
'indu' '{a^}ndu'
'eze'
'easc{a+}'
// present:
'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
'e{s,}te'
'{a+}sc' '{a+}{s,}ti'
'{a+}{s,}te'
// imperfect:
'am' 'ai' 'au'
'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
// past: // (not 'ii')
'ui'
'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
'{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
// pluferfect:
'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
'{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
'{a^}ser{a+}'
'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
( non-v or 'u' delete )
// present:
'{a+}m' 'a{t,}i'
'em' 'e{t,}i'
'im' 'i{t,}i'
'{a^}m' '{a^}{t,}i'
// past:
'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
'sei' 'se'
// pluperfect:
'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
(delete)
)
)
define vowel_suffix as (
[substring] RV among (
'a' 'e' 'i' 'ie' '{a+}' ( delete )
)
)
)
define stem as (
do prelude
do mark_regions
backwards (
do step_0
do standard_suffix
do ( standard_suffix_removed or verb_suffix )
do vowel_suffix
)
do postlude
)
stringescapes {}
/* the 32 Cyrillic letters in the KOI8-R coding scheme, and represented
in Latin characters following the conventions of the standard Library
of Congress transliteration: */
stringdef a hex 'C1'
stringdef b hex 'C2'
stringdef v hex 'D7'
stringdef g hex 'C7'
stringdef d hex 'C4'
stringdef e hex 'C5'
stringdef zh hex 'D6'
stringdef z hex 'DA'
stringdef i hex 'C9'
stringdef i` hex 'CA'
stringdef k hex 'CB'
stringdef l hex 'CC'
stringdef m hex 'CD'
stringdef n hex 'CE'
stringdef o hex 'CF'
stringdef p hex 'D0'
stringdef r hex 'D2'
stringdef s hex 'D3'
stringdef t hex 'D4'
stringdef u hex 'D5'
stringdef f hex 'C6'
stringdef kh hex 'C8'
stringdef ts hex 'C3'
stringdef ch hex 'DE'
stringdef sh hex 'DB'
stringdef shch hex 'DD'
stringdef " hex 'DF'
stringdef y hex 'D9'
stringdef ' hex 'D8'
stringdef e` hex 'DC'
stringdef iu hex 'C0'
stringdef ia hex 'D1'
routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)
externals ( stem )
integers ( pV p2 )
groupings ( v )
define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
define mark_regions as (
$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)
backwardmode (
define R2 as $p2 <= cursor
define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)
define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)
define adjectival as (
adjective
/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/
try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)
//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle
'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)
)
define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)
define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'
'{n}{n}{o}'
('{a}' or '{ia}' delete)
'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)
define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)
define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)
define tidy_up as (
[substring] among (
'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)
define stem as (
do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}
do derivational
do tidy_up
)
)
stringescapes {}
/* the 32 Cyrillic letters in Unicode */
stringdef a hex '430'
stringdef b hex '431'
stringdef v hex '432'
stringdef g hex '433'
stringdef d hex '434'
stringdef e hex '435'
stringdef zh hex '436'
stringdef z hex '437'
stringdef i hex '438'
stringdef i` hex '439'
stringdef k hex '43A'
stringdef l hex '43B'
stringdef m hex '43C'
stringdef n hex '43D'
stringdef o hex '43E'
stringdef p hex '43F'
stringdef r hex '440'
stringdef s hex '441'
stringdef t hex '442'
stringdef u hex '443'
stringdef f hex '444'
stringdef kh hex '445'
stringdef ts hex '446'
stringdef ch hex '447'
stringdef sh hex '448'
stringdef shch hex '449'
stringdef " hex '44A'
stringdef y hex '44B'
stringdef ' hex '44C'
stringdef e` hex '44D'
stringdef iu hex '44E'
stringdef ia hex '44F'
routines ( mark_regions R2
perfective_gerund
adjective
adjectival
reflexive
verb
noun
derivational
tidy_up
)
externals ( stem )
integers ( pV p2 )
groupings ( v )
define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
define mark_regions as (
$pV = limit
$p2 = limit
do (
gopast v setmark pV gopast non-v
gopast v gopast non-v setmark p2
)
)
backwardmode (
define R2 as $p2 <= cursor
define perfective_gerund as (
[substring] among (
'{v}'
'{v}{sh}{i}'
'{v}{sh}{i}{s}{'}'
('{a}' or '{ia}' delete)
'{i}{v}'
'{i}{v}{sh}{i}'
'{i}{v}{sh}{i}{s}{'}'
'{y}{v}'
'{y}{v}{sh}{i}'
'{y}{v}{sh}{i}{s}{'}'
(delete)
)
)
define adjective as (
[substring] among (
'{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
'{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
'{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
'{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
'{ia}{ia}'
// and -
'{o}{iu}' // - which is somewhat archaic
'{e}{iu}' // - soft form of {o}{iu}
(delete)
)
)
define adjectival as (
adjective
/* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
errors. Removing im, uem, enn creates too many errors.
*/
try (
[substring] among (
'{e}{m}' // present passive participle
'{n}{n}' // adjective from past passive participle
'{v}{sh}' // past active participle
'{iu}{shch}' '{shch}' // present active participle
('{a}' or '{ia}' delete)
//but not '{i}{m}' '{u}{e}{m}' // present passive participle
//or '{e}{n}{n}' // adjective from past passive participle
'{i}{v}{sh}' '{y}{v}{sh}'// past active participle
'{u}{iu}{shch}' // present active participle
(delete)
)
)
)
define reflexive as (
[substring] among (
'{s}{ia}'
'{s}{'}'
(delete)
)
)
define verb as (
[substring] among (
'{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
'{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
'{n}{y}' '{t}{'}' '{e}{sh}{'}'
'{n}{n}{o}'
('{a}' or '{ia}' delete)
'{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
'{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
'{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
'{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
'{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
'{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
(delete)
/* note the short passive participle tests:
'{n}{a}' '{n}' '{n}{o}' '{n}{y}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
*/
)
)
define noun as (
[substring] among (
'{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
'{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
'{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
'{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
'{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
'{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
(delete)
/* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
'{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
omitted - they only occur on 12 words.
*/
)
)
define derivational as (
[substring] R2 among (
'{o}{s}{t}'
'{o}{s}{t}{'}'
(delete)
)
)
define tidy_up as (
[substring] among (
'{e}{i`}{sh}'
'{e}{i`}{sh}{e}' // superlative forms
(delete
['{n}'] '{n}' delete
)
'{n}'
('{n}' delete) // e.g. -nno endings
'{'}'
(delete) // with some slight false conflations
)
)
)
define stem as (
do mark_regions
backwards setlimit tomark pV for (
do (
perfective_gerund or
( try reflexive
adjectival or verb or noun
)
)
try([ '{i}' ] delete)
// because noun ending -i{iu} is being treated as verb ending -{iu}
do derivational
do tidy_up
)
)
routines (
postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
y_verb_suffix
verb_suffix
residual_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a' hex 'E1' // a-acute
stringdef e' hex 'E9' // e-acute
stringdef i' hex 'ED' // i-acute
stringdef o' hex 'F3' // o-acute
stringdef u' hex 'FA' // u-acute
stringdef u" hex 'FC' // u-diaeresis
stringdef n~ hex 'F1' // n-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
'{i'}' (<- 'i')
'{o'}' (<- 'o')
'{u'}' (<- 'u')
// and possibly {u"}->u here, or in prelude
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
'las' 'les' 'los' 'nos'
)
substring RV among(
'i{e'}ndo' (] <- 'iendo')
'{a'}ndo' (] <- 'ando')
'{a'}r' (] <- 'ar')
'{e'}r' (] <- 'er')
'{i'}r' (] <- 'ir')
'ando'
'iendo'
'ar' 'er' 'ir'
(delete)
'yendo' ('u' delete)
)
)
define standard_suffix as (
[substring] among(
'anza' 'anzas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'able' 'ables'
'ible' 'ibles'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amiento' 'amientos'
'imiento' 'imientos'
(
R2 delete
)
'adora' 'ador' 'aci{o'}n'
'adoras' 'adores' 'aciones'
'ante' 'antes' 'ancia' 'ancias'// Note 1
(
R2 delete
try ( ['ic'] R2 delete )
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'encia' 'encias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'able'
'ible' (R2 delete)
)
)
)
'idad'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
)
)
define y_verb_suffix as (
setlimit tomark pV for ([substring]) among(
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
'yas' 'yes' 'yais' 'yamos'
('u' delete)
)
)
define verb_suffix as (
setlimit tomark pV for ([substring]) among(
'en' 'es' '{e'}is' 'emos'
(try ('u' test 'g') ] delete)
'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
'ar{e'}'
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
'er{e'}'
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
'ir{e'}'
'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
'e' '{e'}'
( RV delete try( ['u'] test 'g' RV delete ) )
)
)
)
define stem as (
do mark_regions
backwards (
do attached_pronoun
do ( standard_suffix or
y_verb_suffix or
verb_suffix
)
do residual_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
routines (
postlude mark_regions
RV R1 R2
attached_pronoun
standard_suffix
y_verb_suffix
verb_suffix
residual_suffix
)
externals ( stem )
integers ( pV p1 p2 )
groupings ( v )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a' hex 'A0' // a-acute
stringdef e' hex '82' // e-acute
stringdef i' hex 'A1' // i-acute
stringdef o' hex 'A2' // o-acute
stringdef u' hex 'A3' // u-acute
stringdef u" hex '81' // u-diaeresis
stringdef n~ hex 'A4' // n-tilde
define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
define mark_regions as (
$pV = limit
$p1 = limit
$p2 = limit // defaults
do (
( v (non-v gopast v) or (v gopast non-v) )
or
( non-v (non-v gopast v) or (v next) )
setmark pV
)
do (
gopast v gopast non-v setmark p1
gopast v gopast non-v setmark p2
)
)
define postlude as repeat (
[substring] among(
'{a'}' (<- 'a')
'{e'}' (<- 'e')
'{i'}' (<- 'i')
'{o'}' (<- 'o')
'{u'}' (<- 'u')
// and possibly {u"}->u here, or in prelude
'' (next)
) //or next
)
backwardmode (
define RV as $pV <= cursor
define R1 as $p1 <= cursor
define R2 as $p2 <= cursor
define attached_pronoun as (
[substring] among(
'me' 'se' 'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
'las' 'les' 'los' 'nos'
)
substring RV among(
'i{e'}ndo' (] <- 'iendo')
'{a'}ndo' (] <- 'ando')
'{a'}r' (] <- 'ar')
'{e'}r' (] <- 'er')
'{i'}r' (] <- 'ir')
'ando'
'iendo'
'ar' 'er' 'ir'
(delete)
'yendo' ('u' delete)
)
)
define standard_suffix as (
[substring] among(
'anza' 'anzas'
'ico' 'ica' 'icos' 'icas'
'ismo' 'ismos'
'able' 'ables'
'ible' 'ibles'
'ista' 'istas'
'oso' 'osa' 'osos' 'osas'
'amiento' 'amientos'
'imiento' 'imientos'
(
R2 delete
)
'adora' 'ador' 'aci{o'}n'
'adoras' 'adores' 'aciones'
'ante' 'antes' 'ancia' 'ancias'// Note 1
(
R2 delete
try ( ['ic'] R2 delete )
)
'log{i'}a'
'log{i'}as'
(
R2 <- 'log'
)
'uci{o'}n' 'uciones'
(
R2 <- 'u'
)
'encia' 'encias'
(
R2 <- 'ente'
)
'amente'
(
R1 delete
try (
[substring] R2 delete among(
'iv' (['at'] R2 delete)
'os'
'ic'
'ad'
)
)
)
'mente'
(
R2 delete
try (
[substring] among(
'ante' // Note 1
'able'
'ible' (R2 delete)
)
)
)
'idad'
'idades'
(
R2 delete
try (
[substring] among(
'abil'
'ic'
'iv' (R2 delete)
)
)
)
'iva' 'ivo'
'ivas' 'ivos'
(
R2 delete
try (
['at'] R2 delete // but not a further ['ic'] R2 delete
)
)
)
)
define y_verb_suffix as (
setlimit tomark pV for ([substring]) among(
'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
'yas' 'yes' 'yais' 'yamos'
('u' delete)
)
)
define verb_suffix as (
setlimit tomark pV for ([substring]) among(
'en' 'es' '{e'}is' 'emos'
(try ('u' test 'g') ] delete)
'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
'ar{e'}'
'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
'er{e'}'
'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
'ir{e'}'
'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
'ierais' 'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
'{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
(delete)
)
)
define residual_suffix as (
[substring] among(
'os'
'a' 'o' '{a'}' '{i'}' '{o'}'
( RV delete )
'e' '{e'}'
( RV delete try( ['u'] test 'g' RV delete ) )
)
)
)
define stem as (
do mark_regions
backwards (
do attached_pronoun
do ( standard_suffix or
y_verb_suffix or
verb_suffix
)
do residual_suffix
)
do postlude
)
/*
Note 1: additions of 15 Jun 2005
*/
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in ISO Latin I) */
stringdef a" hex 'E4'
stringdef ao hex 'E5'
stringdef o" hex 'F6'
define v 'aeiouy{a"}{ao}{o"}'
define s_ending 'bcdfghjklmnoprtvy'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as setlimit tomark p1 for (
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
and ([next] delete)
)
define other_suffix as setlimit tomark p1 for (
[substring] among(
'lig' 'ig' 'els' (delete)
'l{o"}st' (<-'l{o"}s')
'fullt' (<-'full')
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
routines (
mark_regions
main_suffix
consonant_pair
other_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( v s_ending )
stringescapes {}
/* special characters (in MS-DOS Latin I) */
stringdef a" hex '84'
stringdef ao hex '86'
stringdef o" hex '94'
define v 'aeiouy{a"}{ao}{o"}'
define s_ending 'bcdfghjklmnoprtvy'
define mark_regions as (
$p1 = limit
test ( hop 3 setmark x )
goto v gopast non-v setmark p1
try ( $p1 < x $p1 = x )
)
backwardmode (
define main_suffix as (
setlimit tomark p1 for ([substring])
among(
'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
'hetens' 'erns' 'at' 'andet' 'het' 'ast'
(delete)
's'
(s_ending delete)
)
)
define consonant_pair as setlimit tomark p1 for (
among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
and ([next] delete)
)
define other_suffix as setlimit tomark p1 for (
[substring] among(
'lig' 'ig' 'els' (delete)
'l{o"}st' (<-'l{o"}s')
'fullt' (<-'full')
)
)
)
define stem as (
do mark_regions
backwards (
do main_suffix
do consonant_pair
do other_suffix
)
)
/* Stemmer for Turkish
* author: Evren (Kapusuz) Çilden
* email: evren.kapusuz at gmail.com
* version: 1.0 (15.01.2007)
* stems nominal verb suffixes
* stems nominal inflections
* more than one syllable word check
* (y,n,s,U) context check
* vowel harmony check
* last consonent check and conversion (b, c, d, ğ to p, ç, t, k)
* The stemming algorithm is based on the paper "An Affix Stripping
* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
* Eşref Adalı (Proceedings of the IAESTED International Conference
* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
* Innsbruck, Austria
* Turkish is an agglutinative language and has a very rich morphological
* structure. In Turkish, you can form many different words from a single stem
* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
* "You had been the doctor of him". The stem of the word is "doktor" and it
* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
* the append order of suffixes can be clearly described as FSMs.
* The paper referenced above defines some FSMs for right to left
* morphological analysis. I generated a method for constructing snowball
* expressions from right to left FSMs for stemming suffixes.
*/
routines (
append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
check_vowel_harmony // tests vowel harmony for suffixes
is_reserved_word // tests whether current string is a reserved word ('ad','soyad')
mark_cAsInA // nominal verb suffix
mark_DA // noun suffix
mark_DAn // noun suffix
mark_DUr // nominal verb suffix
mark_ki // noun suffix
mark_lAr // noun suffix, nominal verb suffix
mark_lArI // noun suffix
mark_nA // noun suffix
mark_ncA // noun suffix
mark_ndA // noun suffix
mark_ndAn // noun suffix
mark_nU // noun suffix
mark_nUn // noun suffix
mark_nUz // nominal verb suffix
mark_sU // noun suffix
mark_sUn // nominal verb suffix
mark_sUnUz // nominal verb suffix
mark_possessives // -(U)m,-(U)n,-(U)mUz,-(U)nUz,
mark_yA // noun suffix
mark_ylA // noun suffix
mark_yU // noun suffix
mark_yUm // nominal verb suffix
mark_yUz // nominal verb suffix
mark_yDU // nominal verb suffix
mark_yken // nominal verb suffix
mark_ymUs_ // nominal verb suffix
mark_ysA // nominal verb suffix
mark_suffix_with_optional_y_consonant
mark_suffix_with_optional_U_vowel
mark_suffix_with_optional_n_consonant
mark_suffix_with_optional_s_consonant
more_than_one_syllable_word
post_process_last_consonants
postlude
stem_nominal_verb_suffixes
stem_noun_suffixes
stem_suffix_chain_before_ki
)
/* Special characters in Unicode Latin-1 and Latin Extended-A */
stringdef c. hex 'E7' // LATIN SMALL LETTER C WITH CEDILLA
stringdef g~ hex '011F' // LATIN SMALL LETTER G WITH BREVE
stringdef i' hex '0131' // LATIN SMALL LETTER I WITHOUT DOT
stringdef o" hex 'F6' // LATIN SMALL LETTER O WITH DIAERESIS
stringdef s. hex '015F' // LATIN SMALL LETTER S WITH CEDILLA
stringdef u" hex 'FC' // LATIN SMALL LETTER U WITH DIAERESIS
stringescapes { }
integers ( strlen ) // length of a string
booleans ( continue_stemming_noun_suffixes )
groupings ( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)
define vowel 'ae{i'}io{o"}u{u"}'
define U '{i'}iu{u"}'
// the vowel grouping definitions below are used for checking vowel harmony
define vowel1 'a{i'}ou' // vowels that can end with suffixes containing 'a'
define vowel2 'ei{o"}{u"}' // vowels that can end with suffixes containing 'e'
define vowel3 'a{i'}' // vowels that can end with suffixes containing 'i''
define vowel4 'ei' // vowels that can end with suffixes containing 'i'
define vowel5 'ou' // vowels that can end with suffixes containing 'o' or 'u'
define vowel6 '{o"}{u"}' // vowels that can end with suffixes containing 'o"' or 'u"'
externals ( stem )
backwardmode (
// checks vowel harmony for possible suffixes,
// helps to detect whether the candidate for suffix applies to vowel harmony
// this rule is added to prevent over stemming
define check_vowel_harmony as (
test
(
(goto vowel) // if there is a vowel
(
('a' goto vowel1) or
('e' goto vowel2) or
('{i'}' goto vowel3) or
('i' goto vowel4) or
('o' goto vowel5) or
('{o"}' goto vowel6) or
('u' goto vowel5) or
('{u"}' goto vowel6)
)
)
)
// if the last consonant before suffix is vowel and n then advance and delete
// if the last consonant before suffix is non vowel and n do nothing
// if the last consonant before suffix is not n then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_n_consonant as (
((test 'n') next (test vowel))
or
((not(test 'n')) test(next (test vowel)))
)
// if the last consonant before suffix is vowel and s then advance and delete
// if the last consonant before suffix is non vowel and s do nothing
// if the last consonant before suffix is not s then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_s_consonant as (
((test 's') next (test vowel))
or
((not(test 's')) test(next (test vowel)))
)
// if the last consonant before suffix is vowel and y then advance and delete
// if the last consonant before suffix is non vowel and y do nothing
// if the last consonant before suffix is not y then only delete the suffix
// assumption: slice beginning is set correctly
define mark_suffix_with_optional_y_consonant as (
((test 'y') next (test vowel))
or
((not(test 'y')) test(next (test vowel)))
)
define mark_suffix_with_optional_U_vowel as (
((test U) next (test non-vowel))
or
((not(test U)) test(next (test non-vowel)))
)
define mark_possessives as (
among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
(mark_suffix_with_optional_U_vowel)
)
define mark_sU as (
check_vowel_harmony
U
(mark_suffix_with_optional_s_consonant)
)
define mark_lArI as (
among ('leri' 'lar{i'}')
)
define mark_yU as (
check_vowel_harmony
U
(mark_suffix_with_optional_y_consonant)
)
define mark_nU as (
check_vowel_harmony
among ('n{i'}' 'ni' 'nu' 'n{u"}')
)
define mark_nUn as (
check_vowel_harmony
among ('{i'}n' 'in' 'un' '{u"}n')
(mark_suffix_with_optional_n_consonant)
)
define mark_yA as (
check_vowel_harmony
among('a' 'e')
(mark_suffix_with_optional_y_consonant)
)
define mark_nA as (
check_vowel_harmony
among('na' 'ne')
)
define mark_DA as (
check_vowel_harmony
among('da' 'de' 'ta' 'te')
)
define mark_ndA as (
check_vowel_harmony
among('nda' 'nde')
)
define mark_DAn as (
check_vowel_harmony
among('dan' 'den' 'tan' 'ten')
)
define mark_ndAn as (
check_vowel_harmony
among('ndan' 'nden')
)
define mark_ylA as (
check_vowel_harmony
among('la' 'le')
(mark_suffix_with_optional_y_consonant)
)
define mark_ki as (
'ki'
)
define mark_ncA as (
check_vowel_harmony
among('ca' 'ce')
(mark_suffix_with_optional_n_consonant)
)
define mark_yUm as (
check_vowel_harmony
among ('{i'}m' 'im' 'um' '{u"}m')
(mark_suffix_with_optional_y_consonant)
)
define mark_sUn as (
check_vowel_harmony
among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
)
define mark_yUz as (
check_vowel_harmony
among ('{i'}z' 'iz' 'uz' '{u"}z')
(mark_suffix_with_optional_y_consonant)
)
define mark_sUnUz as (
among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
)
define mark_lAr as (
check_vowel_harmony
among ('ler' 'lar')
)
define mark_nUz as (
check_vowel_harmony
among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
)
define mark_DUr as (
check_vowel_harmony
among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
)
define mark_cAsInA as (
among ('cas{i'}na' 'cesine')
)
define mark_yDU as (
check_vowel_harmony
among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
(mark_suffix_with_optional_y_consonant)
)
// does not fully obey vowel harmony
define mark_ysA as (
among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
(mark_suffix_with_optional_y_consonant)
)
define mark_ymUs_ as (
check_vowel_harmony
among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
(mark_suffix_with_optional_y_consonant)
)
define mark_yken as (
'ken' (mark_suffix_with_optional_y_consonant)
)
define stem_nominal_verb_suffixes as (
[
set continue_stemming_noun_suffixes
(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
or
(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
or
(
mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
unset continue_stemming_noun_suffixes
)
or
(mark_nUz (mark_yDU or mark_ysA))
or
((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
or
(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
]delete
)
// stems noun suffix chains ending with -ki
define stem_suffix_chain_before_ki as (
[
mark_ki
(
(mark_DA] delete try([
(mark_lAr] delete try(stem_suffix_chain_before_ki))
or
(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
))
or
(mark_nUn] delete try([
(mark_lArI] delete)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
))
or
(mark_ndA (
(mark_lArI] delete)
or
((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
or
(stem_suffix_chain_before_ki)
))
)
)
define stem_noun_suffixes as (
([mark_lAr] delete try(stem_suffix_chain_before_ki))
or
([mark_ncA] delete
try(
([mark_lArI] delete)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
([mark_lAr] delete stem_suffix_chain_before_ki)
)
)
or
([(mark_ndA or mark_nA)
(
(mark_lArI] delete)
or
(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
)
)
or
([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
or
( [mark_DAn] delete try ([
(
(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
(mark_lAr] delete try(stem_suffix_chain_before_ki))
or
(stem_suffix_chain_before_ki)
))
)
or
([mark_nUn or mark_ylA] delete
try(
([mark_lAr] delete stem_suffix_chain_before_ki)
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
or
stem_suffix_chain_before_ki
)
)
or
([mark_lArI] delete)
or
(stem_suffix_chain_before_ki)
or
([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
or
([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
)
define post_process_last_consonants as (
[substring] among (
'b' (<- 'p')
'c' (<- '{c.}')
'd' (<- 't')
'{g~}' (<- 'k')
)
)
// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
// like in 'kedim' -> 'ked'
// Turkish words don't usually end with 'd' or 'g'
// some very well known words are ignored (like 'ad' 'soyad'
// appends U to stems ending with d or g, decides which vowel to add
// based on the last vowel in the stem
define append_U_to_stems_ending_with_d_or_g as (
test('d' or 'g')
(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
or
(test((goto vowel) 'e' or 'i') <+ 'i')
or
(test((goto vowel) 'o' or 'u') <+ 'u')
or
(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
)
)
// Tests if there are more than one syllables
// In Turkish each vowel indicates a distinct syllable
define more_than_one_syllable_word as (
test (atleast 2 (gopast vowel))
)
define is_reserved_word as (
test(gopast 'ad' ($strlen = 2) ($strlen == limit))
or
test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
)
define postlude as (
not(is_reserved_word)
backwards (
do append_U_to_stems_ending_with_d_or_g
do post_process_last_consonants
)
)
define stem as (
(more_than_one_syllable_word)
(
backwards (
do stem_nominal_verb_suffixes
continue_stemming_noun_suffixes
do stem_noun_suffixes
)
postlude
)
)
#include <stdio.h> /* main etc */
#include <stdlib.h> /* exit */
#include <string.h> /* memmove */
#include "header.h"
/* recursive usage: */
static void read_program_(struct analyser * a, int terminator);
static struct node * read_C(struct analyser * a);
static struct node * C_style(struct analyser * a, char * s, int token);
static void fault(int n) { fprintf(stderr, "fault %d\n", n); exit(1); }
static void print_node_(struct node * p, int n, char * s) {
int i;
for (i = 0; i < n; i++) printf(i == n - 1 ? s : " ");
printf("%s ", name_of_token(p->type));
unless (p->name == 0) report_b(stdout, p->name->b);
unless (p->literalstring == 0) {
printf("'");
report_b(stdout, p->literalstring);
printf("'");
}
printf("\n");
unless (p->AE == 0) print_node_(p->AE, n+1, "# ");
unless (p->left == 0) print_node_(p->left, n+1, " ");
unless (p->right == 0) print_node_(p->right, n, " ");
if (p->aux != 0) print_node_(p->aux, n+1, "@ ");
}
extern void print_program(struct analyser * a) {
print_node_(a->program, 0, " ");
}
static struct node * new_node(struct analyser * a, int type) {
NEW(node, p);
p->next = a->nodes; a->nodes = p;
p->left = 0;
p->right = 0;
p->aux = 0;
p->AE = 0;
p->name = 0;
p->literalstring = 0;
p->mode = a->mode;
p->line_number = a->tokeniser->line_number;
p->type = type;
return p;
}
static char * name_of_mode(int n) {
switch (n) {
default: fault(0);
case m_backward: return "string backward";
case m_forward: return "string forward";
/* case m_integer: return "integer"; */
}
}
static char * name_of_type(int n) {
switch (n) {
default: fault(1);
case 's': return "string";
case 'i': return "integer";
case 'r': return "routine";
case 'R': return "routine or grouping";
case 'g': return "grouping";
}
}
static void count_error(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
t->error_count++;
}
static void error2(struct analyser * a, int n, int x) {
struct tokeniser * t = a->tokeniser;
count_error(a);
fprintf(stderr, "Line %d", t->line_number);
if (t->get_depth > 0) fprintf(stderr, " (of included file)");
fprintf(stderr, ": ");
if (n >= 30) report_b(stderr, t->b);
switch (n) {
case 0:
fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
case 3:
fprintf(stderr, "in among(...), ");
case 1:
fprintf(stderr, "unexpected %s", name_of_token(t->token));
if (t->token == c_number) fprintf(stderr, " %d", t->number);
if (t->token == c_name) {
fprintf(stderr, " ");
report_b(stderr, t->b);
} break;
case 2:
fprintf(stderr, "string omitted"); break;
case 14:
fprintf(stderr, "unresolved substring on line %d", x); break;
case 15:
fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
case 16:
fprintf(stderr, "empty grouping"); break;
case 17:
fprintf(stderr, "backwards used when already in this mode"); break;
case 18:
fprintf(stderr, "empty among(...)"); break;
case 19:
fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
case 20:
fprintf(stderr, "substring preceded by another substring on line %d", x); break;
case 30:
fprintf(stderr, " re-declared"); break;
case 31:
fprintf(stderr, " undeclared"); break;
case 32:
fprintf(stderr, " declared as %s mode; used as %s mode",
name_of_mode(a->mode), name_of_mode(x)); break;
case 33:
fprintf(stderr, " not of type %s", name_of_type(x)); break;
case 34:
fprintf(stderr, " not of type string or integer"); break;
case 35:
fprintf(stderr, " misplaced"); break;
case 36:
fprintf(stderr, " redefined"); break;
case 37:
fprintf(stderr, " mis-used as %s mode",
name_of_mode(x)); break;
default:
fprintf(stderr, " error %d", n); break;
}
if (n <= 13 && t->previous_token > 0)
fprintf(stderr, " after %s", name_of_token(t->previous_token));
fprintf(stderr, "\n");
}
static void error(struct analyser * a, int n) { error2(a, n, 0); }
static void error3(struct analyser * a, struct node * p, symbol * b) {
count_error(a);
fprintf(stderr, "among(...) on line %d has repeated string '", p->line_number);
report_b(stderr, b);
fprintf(stderr, "'\n");
}
static void error4(struct analyser * a, struct name * q) {
count_error(a);
report_b(stderr, q->b);
fprintf(stderr, " undefined\n");
}
static void omission_error(struct analyser * a, int n) {
a->tokeniser->omission = n;
error(a, 0);
}
static int check_token(struct analyser * a, int code) {
struct tokeniser * t = a->tokeniser;
if (t->token != code) { omission_error(a, code); return false; }
return true;
}
static int get_token(struct analyser * a, int code) {
struct tokeniser * t = a->tokeniser;
read_token(t);
{
int x = check_token(a, code);
unless (x) t->token_held = true;
return x;
}
}
static struct name * look_for_name(struct analyser * a) {
struct name * p = a->names;
symbol * q = a->tokeniser->b;
repeat {
if (p == 0) return 0;
{ symbol * b = p->b;
int n = SIZE(b);
if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
p->referenced = true;
return p;
}
}
p = p->next;
}
}
static struct name * find_name(struct analyser * a) {
struct name * p = look_for_name(a);
if (p == 0) error(a, 31);
return p;
}
static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
if (p->mode < 0) p->mode = mode; else
unless (p->mode == mode) error2(a, 37, mode);
}
static void check_name_type(struct analyser * a, struct name * p, int type) {
switch (type) {
case 's': if (p->type == t_string) return; break;
case 'i': if (p->type == t_integer) return; break;
case 'b': if (p->type == t_boolean) return; break;
case 'R': if (p->type == t_grouping) return;
case 'r': if (p->type == t_routine ||
p->type == t_external) return; break;
case 'g': if (p->type == t_grouping) return; break;
}
error2(a, 33, type);
}
static void read_names(struct analyser * a, int type) {
struct tokeniser * t = a->tokeniser;
unless (get_token(a, c_bra)) return;
repeat {
if (read_token(t) != c_name) break;
if (look_for_name(a) != 0) error(a, 30); else {
NEW(name, p);
p->b = copy_b(t->b);
p->type = type;
p->mode = -1; /* routines, externals */
p->count = a->name_count[type];
p->referenced = false;
p->used = false;
p->grouping = 0;
p->definition = 0;
a->name_count[type] ++;
p->next = a->names;
a->names = p;
}
}
unless (check_token(a, c_ket)) t->token_held = true;
}
static symbol * new_literalstring(struct analyser * a) {
NEW(literalstring, p);
p->b = copy_b(a->tokeniser->b);
p->next = a->literalstrings;
a->literalstrings = p;
return p->b;
}
static int read_AE_test(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
switch (read_token(t)) {
case c_assign: return c_mathassign;
case c_plusassign:
case c_minusassign:
case c_multiplyassign:
case c_divideassign:
case c_eq:
case c_ne:
case c_gr:
case c_ge:
case c_ls:
case c_le: return t->token;
default: error(a, 1); t->token_held = true; return c_eq;
}
}
static int binding(int t) {
switch (t) {
case c_plus: case c_minus: return 1;
case c_multiply: case c_divide: return 2;
default: return -2;
}
}
static void name_to_node(struct analyser * a, struct node * p, int type) {
struct name * q = find_name(a);
unless (q == 0) {
check_name_type(a, q, type);
q->used = true;
}
p->name = q;
}
static struct node * read_AE(struct analyser * a, int B) {
struct tokeniser * t = a->tokeniser;
struct node * p;
struct node * q;
switch (read_token(t)) {
case c_minus: /* monadic */
p = new_node(a, c_neg);
p->right = read_AE(a, 100);
break;
case c_bra:
p = read_AE(a, 0);
get_token(a, c_ket);
break;
case c_name:
p = new_node(a, c_name);
name_to_node(a, p, 'i');
break;
case c_maxint:
case c_minint:
case c_cursor:
case c_limit:
case c_size:
p = new_node(a, t->token);
break;
case c_number:
p = new_node(a, c_number);
p->number = t->number;
break;
case c_sizeof:
p = C_style(a, "s", c_sizeof);
break;
default:
error(a, 1);
t->token_held = true;
return 0;
}
repeat {
int token = read_token(t);
int b = binding(token);
unless (binding(token) > B) {
t->token_held = true;
return p;
}
q = new_node(a, token);
q->left = p;
q->right = read_AE(a, b);
p = q;
}
}
static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
struct tokeniser * t = a->tokeniser;
struct node * p = new_node(a, op);
struct node * p_end = q;
p->left = q;
repeat {
q = read_C(a);
p_end->right = q; p_end = q;
if (read_token(t) != op) {
t->token_held = true;
break;
}
}
return p;
}
static struct node * read_C_list(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
struct node * p = new_node(a, c_bra);
struct node * p_end = 0;
repeat {
int token = read_token(t);
if (token == c_ket) return p;
if (token < 0) { omission_error(a, c_ket); return p; }
t->token_held = true;
{
struct node * q = read_C(a);
repeat {
token = read_token(t);
if (token != c_and && token != c_or) {
t->token_held = true;
break;
}
q = read_C_connection(a, q, token);
}
if (p_end == 0) p->left = q; else p_end->right = q;
p_end = q;
}
}
}
static struct node * C_style(struct analyser * a, char * s, int token) {
int i;
struct node * p = new_node(a, token);
for (i = 0; s[i] != 0; i++) switch(s[i]) {
case 'C':
p->left = read_C(a); continue;
case 'D':
p->aux = read_C(a); continue;
case 'A':
p->AE = read_AE(a, 0); continue;
case 'f':
get_token(a, c_for); continue;
case 'S':
{
int str_token = read_token(a->tokeniser);
if (str_token == c_name) name_to_node(a, p, 's'); else
if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
else error(a, 2);
}
continue;
case 'b':
case 's':
case 'i':
if (get_token(a, c_name)) name_to_node(a, p, s[i]);
continue;
}
return p;
}
static struct node * read_literalstring(struct analyser * a) {
struct node * p = new_node(a, c_literalstring);
p->literalstring = new_literalstring(a);
return p;
}
static void reverse_b(symbol * b) {
int i = 0; int j = SIZE(b) - 1;
until (i >= j) {
int ch1 = b[i]; int ch2 = b[j];
b[i++] = ch2; b[j--] = ch1;
}
}
static int compare_amongvec(const void *pv, const void *qv) {
const struct amongvec * p = (const struct amongvec*)pv;
const struct amongvec * q = (const struct amongvec*)qv;
symbol * b_p = p->b; int p_size = p->size;
symbol * b_q = q->b; int q_size = q->size;
int smaller_size = p_size < q_size ? p_size : q_size;
int i;
for (i = 0; i < smaller_size; i++)
if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
return p_size - q_size;
}
static void make_among(struct analyser * a, struct node * p, struct node * substring) {
NEW(among, x);
NEWVEC(amongvec, v, p->number);
struct node * q = p->left;
struct amongvec * w0 = v;
struct amongvec * w1 = v;
int result = 1;
int direction = substring != 0 ? substring->mode : p->mode;
int backward = direction == m_backward;
if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
a->amongs_end = x;
x->next = 0;
x->b = v;
x->number = a->among_count++;
x->starter = 0;
if (q->type == c_bra) { x->starter = q; q = q->right; }
until (q == 0) {
if (q->type == c_literalstring) {
symbol * b = q->literalstring;
w1->b = b; /* pointer to case string */
w1->p = 0; /* pointer to corresponding case expression */
w1->size = SIZE(b); /* number of characters in string */
w1->i = -1; /* index of longest substring */
w1->result = -1; /* number of corresponding case expression */
w1->function = q->left == 0 ? 0 : q->left->name;
unless (w1->function == 0)
check_routine_mode(a, w1->function, direction);
w1++;
}
else
if (q->left == 0) /* empty command: () */
w0 = w1;
else {
until (w0 == w1) {
w0->p = q;
w0->result = result;
w0++;
}
result++;
}
q = q->right;
}
unless (w1-v == p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);
/* the following loop is O(n squared) */
for (w0 = w1 - 1; w0 >= v; w0--) {
symbol * b = w0->b;
int size = w0->size;
struct amongvec * w;
for (w = w0 - 1; w >= v; w--) {
if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
w0->i = w - v; /* fill in index of longest substring */
break;
}
}
}
if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
for (w0 = v; w0 < w1 - 1; w0++)
if (w0->size == (w0 + 1)->size &&
memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) error3(a, p, w0->b);
x->literalstring_count = p->number;
x->command_count = result - 1;
p->among = x;
x->substring = substring;
if (substring != 0) substring->among = x;
unless (x->command_count == 0 && x->starter == 0) a->amongvar_needed = true;
}
static struct node * read_among(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
struct node * p = new_node(a, c_among);
struct node * p_end = 0;
int previous_token = -1;
struct node * substring = a->substring;
a->substring = 0;
p->number = 0; /* counts the number of literals */
unless (get_token(a, c_bra)) return p;
repeat {
struct node * q;
int token = read_token(t);
switch (token) {
case c_literalstring:
q = read_literalstring(a);
if (read_token(t) == c_name) {
struct node * r = new_node(a, c_name);
name_to_node(a, r, 'r');
q->left = r;
}
else t->token_held = true;
p->number++; break;
case c_bra:
if (previous_token == c_bra) error(a, 19);
q = read_C_list(a); break;
default:
error(a, 3);
case c_ket:
if (p->number == 0) error(a, 18);
if (t->error_count == 0) make_among(a, p, substring);
return p;
}
previous_token = token;
if (p_end == 0) p->left = q; else p_end->right = q;
p_end = q;
}
}
static struct node * read_substring(struct analyser * a) {
struct node * p = new_node(a, c_substring);
if (a->substring != 0) error2(a, 20, a->substring->line_number);
a->substring = p;
return p;
}
static void check_modifyable(struct analyser * a) {
unless (a->modifyable) error(a, 15);
}
static struct node * read_C(struct analyser * a) {
struct tokeniser * t = a->tokeniser;
int token = read_token(t);
switch (token) {
case c_bra:
return read_C_list(a);
case c_backwards:
{
int mode = a->mode;
if (a->mode == m_backward) error(a, 17); else a->mode = m_backward;
{ struct node * p = C_style(a, "C", token);
a->mode = mode;
return p;
}
}
case c_reverse:
{
int mode = a->mode;
int modifyable = a->modifyable;
a->modifyable = false;
a->mode = mode == m_forward ? m_backward : m_forward;
{
struct node * p = C_style(a, "C", token);
a->mode = mode;
a->modifyable = modifyable;
return p;
}
}
case c_not:
case c_try:
case c_fail:
case c_test:
case c_do:
case c_goto:
case c_gopast:
case c_repeat:
return C_style(a, "C", token);
case c_loop:
case c_atleast:
return C_style(a, "AC", token);
case c_setmark:
return C_style(a, "i", token);
case c_tomark:
case c_atmark:
case c_hop:
return C_style(a, "A", token);
case c_delete:
check_modifyable(a);
case c_next:
case c_tolimit:
case c_atlimit:
case c_leftslice:
case c_rightslice:
case c_true:
case c_false:
case c_debug:
return C_style(a, "", token);
case c_assignto:
case c_sliceto:
check_modifyable(a);
return C_style(a, "s", token);
case c_assign:
case c_insert:
case c_attach:
case c_slicefrom:
check_modifyable(a);
return C_style(a, "S", token);
case c_setlimit:
return C_style(a, "CfD", token);
case c_set:
case c_unset:
return C_style(a, "b", token);
case c_dollar:
get_token(a, c_name);
{
struct node * p;
struct name * q = find_name(a);
int mode = a->mode;
int modifyable = a->modifyable;
switch (q ? q->type : t_string)
/* above line was: switch (q->type) - bug #1 fix 7/2/2003 */
{
default: error(a, 34);
case t_string:
a->mode = m_forward;
a->modifyable = true;
p = new_node(a, c_dollar);
p->left = read_C(a); break;
case t_integer:
/* a->mode = m_integer; */
p = new_node(a, read_AE_test(a));
p->AE = read_AE(a, 0); break;
}
p->name = q;
a->mode = mode;
a->modifyable = modifyable;
return p;
}
case c_name:
{
struct name * q = find_name(a);
struct node * p = new_node(a, c_name);
unless (q == 0) {
q->used = true;
switch (q->type) {
case t_boolean:
p->type = c_booltest; break;
case t_integer:
error(a, 35); /* integer name misplaced */
case t_string:
break;
case t_routine:
case t_external:
p->type = c_call;
check_routine_mode(a, q, a->mode);
break;
case t_grouping:
p->type = c_grouping; break;
}
}
p->name = q;
return p;
}
case c_non:
{
struct node * p = new_node(a, token);
read_token(t);
if (t->token == c_minus) read_token(t);
unless (check_token(a, c_name)) { omission_error(a, c_name); return p; }
name_to_node(a, p, 'g');
return p;
}
case c_literalstring:
return read_literalstring(a);
case c_among: return read_among(a);
case c_substring: return read_substring(a);
default: error(a, 1); return 0;
}
}
static int next_symbol(symbol * p, symbol * W, int utf8) {
if (utf8) {
int ch;
int j = get_utf8(p, & ch);
W[0] = ch; return j;
} else {
W[0] = p[0]; return 1;
}
}
static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
int j = 0;
symbol W[1];
int width;
if (style == c_plus) {
while (j < SIZE(q)) {
width = next_symbol(q + j, W, utf8);
p = add_to_b(p, 1, W);
j += width;
}
} else {
while (j < SIZE(q)) {
int i;
width = next_symbol(q + j, W, utf8);
for (i = 0; i < SIZE(p); i++) {
if (p[i] == W[0]) {
memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
SIZE(p)--;
}
}
j += width;
}
}
return p;
}
static void read_define_grouping(struct analyser * a, struct name * q) {
struct tokeniser * t = a->tokeniser;
int style = c_plus;
{
NEW(grouping, p);
if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
a->groupings_end = p;
q->grouping = p;
p->next = 0;
p->name = q;
p->number = q->count;
p->b = create_b(0);
repeat {
switch (read_token(t)) {
case c_name:
{
struct name * r = find_name(a);
unless (r == 0) {
check_name_type(a, r, 'g');
p->b = alter_grouping(p->b, r->grouping->b, style, false);
}
}
break;
case c_literalstring:
p->b = alter_grouping(p->b, t->b, style, a->utf8);
break;
default: error(a, 1); return;
}
switch (read_token(t)) {
case c_plus:
case c_minus: style = t->token; break;
default: goto label0;
}
}
label0:
{
int i;
int max = 0;
int min = 1<<16;
for (i = 0; i < SIZE(p->b); i++) {
if (p->b[i] > max) max = p->b[i];
if (p->b[i] < min) min = p->b[i];
}
p->largest_ch = max;
p->smallest_ch = min;
if (min == 1<<16) error(a, 16);
}
t->token_held = true; return;
}
}
static void read_define_routine(struct analyser * a, struct name * q) {
struct node * p = new_node(a, c_define);
a->amongvar_needed = false;
unless (q == 0) {
check_name_type(a, q, 'R');
if (q->definition != 0) error(a, 36);
if (q->mode < 0) q->mode = a->mode; else
if (q->mode != a->mode) error2(a, 32, q->mode);
}
p->name = q;
if (a->program == 0) a->program = p; else a->program_end->right = p;
a->program_end = p;
get_token(a, c_as);
p->left = read_C(a);
unless (q == 0) q->definition = p->left;
if (a->substring != 0) {
error2(a, 14, a->substring->line_number);
a->substring = 0;
}
p->amongvar_needed = a->amongvar_needed;
}
static void read_define(struct analyser * a) {
unless (get_token(a, c_name)) return;
{
struct name * q = find_name(a);
if (q != 0 && q->type == t_grouping) read_define_grouping(a, q);
else read_define_routine(a, q);
}
}
static void read_backwardmode(struct analyser * a) {
int mode = a->mode;
a->mode = m_backward;
if (get_token(a, c_bra)) {
read_program_(a, c_ket);
check_token(a, c_ket);
}
a->mode = mode;
}
static void read_program_(struct analyser * a, int terminator) {
struct tokeniser * t = a->tokeniser;
repeat {
switch (read_token(t)) {
case c_strings: read_names(a, t_string); break;
case c_booleans: read_names(a, t_boolean); break;
case c_integers: read_names(a, t_integer); break;
case c_routines: read_names(a, t_routine); break;
case c_externals: read_names(a, t_external); break;
case c_groupings: read_names(a, t_grouping); break;
case c_define: read_define(a); break;
case c_backwardmode:read_backwardmode(a); break;
case c_ket:
if (terminator == c_ket) return;
default:
error(a, 1); break;
case -1:
unless (terminator < 0) omission_error(a, c_ket);
return;
}
}
}
extern void read_program(struct analyser * a) {
read_program_(a, -1);
{
struct name * q = a->names;
until (q == 0) {
switch(q->type) {
case t_external: case t_routine:
if (q->used && q->definition == 0) error4(a, q); break;
case t_grouping:
if (q->used && q->grouping == 0) error4(a, q); break;
}
q = q->next;
}
}
if (a->tokeniser->error_count == 0) {
struct name * q = a->names;
int warned = false;
until (q == 0) {
unless (q->referenced) {
unless (warned) {
fprintf(stderr, "Declared but not used:");
warned = true;
}
fprintf(stderr, " "); report_b(stderr, q->b);
}
q = q->next;
}
if (warned) fprintf(stderr, "\n");
q = a->names;
warned = false;
until (q == 0) {
if (! q->used && (q->type == t_routine ||
q->type == t_grouping)) {
unless (warned) {
fprintf(stderr, "Declared and defined but not used:");
warned = true;
}
fprintf(stderr, " "); report_b(stderr, q->b);
}
q = q->next;
}
if (warned) fprintf(stderr, "\n");
}
}
extern struct analyser * create_analyser(struct tokeniser * t) {
NEW(analyser, a);
a->tokeniser = t;
a->nodes = 0;
a->names = 0;
a->literalstrings = 0;
a->program = 0;
a->amongs = 0;
a->among_count = 0;
a->groupings = 0;
a->mode = m_forward;
a->modifyable = true;
{ int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
a->substring = 0;
return a;
}
extern void close_analyser(struct analyser * a) {
{
struct node * q = a->nodes;
until (q == 0) {
struct node * q_next = q->next;
FREE(q);
q = q_next;
}
}
{
struct name * q = a->names;
until (q == 0) {
struct name * q_next = q->next;
lose_b(q->b); FREE(q);
q = q_next;
}
}
{
struct literalstring * q = a->literalstrings;
until (q == 0) {
struct literalstring * q_next = q->next;
lose_b(q->b); FREE(q);
q = q_next;
}
}
{
struct among * q = a->amongs;
until (q == 0) {
struct among * q_next = q->next;
FREE(q->b); FREE(q);
q = q_next;
}
}
{
struct grouping * q = a->groupings;
until (q == 0) {
struct grouping * q_next = q->next;
lose_b(q->b); FREE(q);
q = q_next;
}
}
FREE(a);
}
#include <stdio.h> /* for main etc */
#include <stdlib.h> /* for free etc */
#include <string.h> /* for strlen */
#include "header.h"
#define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
#define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
#define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
#define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
static int eq(char * s1, char * s2) {
int s1_len = strlen(s1);
int s2_len = strlen(s2);
return s1_len == s2_len && memcmp(s1, s2, s1_len) == 0;
}
static void print_arglist(void) {
fprintf(stderr, "Usage: snowball <file> [options]\n\n"
"options are: [-o[utput] file]\n"
" [-s[yntax]]\n"
#ifndef DISABLE_JAVA
" [-j[ava]]\n"
#endif
" [-c++]\n"
" [-w[idechars]]\n"
" [-u[tf8]]\n"
" [-n[ame] class name]\n"
" [-ep[refix] string]\n"
" [-vp[refix] string]\n"
" [-i[nclude] directory]\n"
" [-r[untime] path to runtime headers]\n"
#ifndef DISABLE_JAVA
" [-p[arentclassname] fully qualified parent class name]\n"
" [-P[ackage] package name for stemmers]\n"
" [-S[tringclass] StringBuffer-compatible class]\n"
" [-a[mongclass] fully qualified name of the Among class]\n"
#endif
);
exit(1);
}
static void check_lim(int i, int argc) {
if (i >= argc) {
fprintf(stderr, "argument list is one short\n");
print_arglist();
}
}
static FILE * get_output(symbol * b) {
char * s = b_to_s(b);
FILE * output = fopen(s, "w");
if (output == 0) {
fprintf(stderr, "Can't open output %s\n", s);
exit(1);
}
free(s);
return output;
}
static void read_options(struct options * o, int argc, char * argv[]) {
char * s;
int i = 2;
/* set defauts: */
o->output_file = 0;
o->syntax_tree = false;
o->externals_prefix = "";
o->variables_prefix = 0;
o->runtime_path = 0;
o->parent_class_name = DEFAULT_BASE_CLASS;
o->string_class = DEFAULT_STRING_CLASS;
o->among_class = DEFAULT_AMONG_CLASS;
o->package = DEFAULT_PACKAGE;
o->name = "";
o->make_lang = LANG_C;
o->widechars = false;
o->includes = 0;
o->includes_end = 0;
o->utf8 = false;
/* read options: */
repeat {
if (i >= argc) break;
s = argv[i++];
{ if (eq(s, "-o") || eq(s, "-output")) {
check_lim(i, argc);
o->output_file = argv[i++];
continue;
}
if (eq(s, "-n") || eq(s, "-name")) {
check_lim(i, argc);
o->name = argv[i++];
continue;
}
#ifndef DISABLE_JAVA
if (eq(s, "-j") || eq(s, "-java")) {
o->make_lang = LANG_JAVA;
o->widechars = true;
continue;
}
#endif
if (eq(s, "-c++")) {
o->make_lang = LANG_CPLUSPLUS;
continue;
}
if (eq(s, "-w") || eq(s, "-widechars")) {
o->widechars = true;
o->utf8 = false;
continue;
}
if (eq(s, "-s") || eq(s, "-syntax")) {
o->syntax_tree = true;
continue;
}
if (eq(s, "-ep") || eq(s, "-eprefix")) {
check_lim(i, argc);
o->externals_prefix = argv[i++];
continue;
}
if (eq(s, "-vp") || eq(s, "-vprefix")) {
check_lim(i, argc);
o->variables_prefix = argv[i++];
continue;
}
if (eq(s, "-i") || eq(s, "-include")) {
check_lim(i, argc);
{
NEW(include, p);
symbol * b = add_s_to_b(0, argv[i++]);
b = add_s_to_b(b, "/");
p->next = 0; p->b = b;
if (o->includes == 0) o->includes = p; else
o->includes_end->next = p;
o->includes_end = p;
}
continue;
}
if (eq(s, "-r") || eq(s, "-runtime")) {
check_lim(i, argc);
o->runtime_path = argv[i++];
continue;
}
if (eq(s, "-u") || eq(s, "-utf8")) {
o->utf8 = true;
o->widechars = false;
continue;
}
#ifndef DISABLE_JAVA
if (eq(s, "-p") || eq(s, "-parentclassname")) {
check_lim(i, argc);
o->parent_class_name = argv[i++];
continue;
}
if (eq(s, "-P") || eq(s, "-Package")) {
check_lim(i, argc);
o->package = argv[i++];
continue;
}
if (eq(s, "-S") || eq(s, "-stringclass")) {
check_lim(i, argc);
o->string_class = argv[i++];
continue;
}
if (eq(s, "-a") || eq(s, "-amongclass")) {
check_lim(i, argc);
o->among_class = argv[i++];
continue;
}
#endif
fprintf(stderr, "'%s' misplaced\n", s);
print_arglist();
}
}
}
extern int main(int argc, char * argv[]) {
NEW(options, o);
if (argc == 1) print_arglist();
read_options(o, argc, argv);
{
symbol * filename = add_s_to_b(0, argv[1]);
symbol * u = get_input(filename);
if (u == 0) {
fprintf(stderr, "Can't open input %s\n", argv[1]);
exit(1);
}
{
struct tokeniser * t = create_tokeniser(u);
struct analyser * a = create_analyser(t);
t->widechars = o->widechars;
t->includes = o->includes;
a->utf8 = t->utf8 = o->utf8;
read_program(a);
if (t->error_count > 0) exit(1);
if (o->syntax_tree) print_program(a);
close_tokeniser(t);
unless (o->syntax_tree) {
struct generator * g;
char * s = o->output_file;
unless (s) {
fprintf(stderr, "Please include the -o option\n");
print_arglist();
exit(1);
}
if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
symbol * b = add_s_to_b(0, s);
b = add_s_to_b(b, ".h");
o->output_h = get_output(b);
b[SIZE(b) - 1] = 'c';
if (o->make_lang == LANG_CPLUSPLUS) {
b = add_s_to_b(b, "c");
}
o->output_c = get_output(b);
lose_b(b);
g = create_generator_c(a, o);
generate_program_c(g);
close_generator_c(g);
fclose(o->output_c);
fclose(o->output_h);
}
#ifndef DISABLE_JAVA
if (o->make_lang == LANG_JAVA) {
symbol * b = add_s_to_b(0, s);
b = add_s_to_b(b, ".java");
o->output_java = get_output(b);
lose_b(b);
g = create_generator_java(a, o);
generate_program_java(g);
close_generator_java(g);
fclose(o->output_java);
}
#endif
}
close_analyser(a);
}
lose_b(u);
lose_b(filename);
}
{ struct include * p = o->includes;
until (p == 0)
{ struct include * q = p->next;
lose_b(p->b); FREE(p); p = q;
}
}
FREE(o);
unless (space_count == 0) fprintf(stderr, "%d blocks unfreed\n", space_count);
return 0;
}
#include <limits.h> /* for INT_MAX */
#include <stdio.h> /* for fprintf etc */
#include <stdlib.h> /* for free etc */
#include <string.h> /* for strlen */
#include "header.h"
/* Define this to get warning messages when optimisations can't be used. */
/* #define OPTIMISATION_WARNINGS */
/* recursive use: */
static void generate(struct generator * g, struct node * p);
enum special_labels {
x_return = -1
};
static int new_label(struct generator * g) {
return g->next_label++;
}
/* Output routines */
static void output_str(FILE * outfile, struct str * str) {
char * s = b_to_s(str_data(str));
fprintf(outfile, "%s", s);
free(s);
}
static void wch(struct generator * g, int ch) {
str_append_ch(g->outbuf, ch); /* character */
}
static void wnl(struct generator * g) {
str_append_ch(g->outbuf, '\n'); /* newline */
g->line_count++;
}
static void ws(struct generator * g, const char * s) {
str_append_string(g->outbuf, s); /* string */
}
static void wi(struct generator * g, int i) {
str_append_int(g->outbuf, i); /* integer */
}
static void wh_ch(struct generator * g, int i) {
str_append_ch(g->outbuf, "0123456789ABCDEF"[i & 0xF]); /* hexchar */
}
static void wh(struct generator * g, int i) {
if (i >> 4) wh(g, i >> 4);
wh_ch(g, i); /* hex integer */
}
static void wi3(struct generator * g, int i) {
if (i < 100) wch(g, ' ');
if (i < 10) wch(g, ' ');
wi(g, i); /* integer (width 3) */
}
static void wvn(struct generator * g, struct name * p) { /* variable name */
int ch = "SBIrxg"[p->type];
switch (p->type) {
case t_string:
case t_boolean:
case t_integer:
wch(g, ch); wch(g, '['); wi(g, p->count); wch(g, ']'); return;
case t_external:
ws(g, g->options->externals_prefix); break;
default:
wch(g, ch); wch(g, '_');
}
str_append_b(g->outbuf, p->b);
}
static void wv(struct generator * g, struct name * p) { /* reference to variable */
if (p->type < t_routine) ws(g, "z->");
wvn(g, p);
}
static void wlitarray(struct generator * g, symbol * p) { /* write literal array */
ws(g, "{ ");
{
int i;
for (i = 0; i < SIZE(p); i++) {
int ch = p[i];
if (32 <= ch && ch < 127) {
wch(g, '\'');
switch (ch) {
case '\'':
case '\\': wch(g, '\\');
default: wch(g, ch);
}
wch(g, '\'');
} else {
wch(g, '0'); wch(g, 'x'); wh(g, ch);
}
if (i < SIZE(p) - 1) ws(g, ", ");
}
}
ws(g, " }");
}
static void wlitref(struct generator * g, symbol * p) { /* write ref to literal array */
if (SIZE(p) == 0) ws(g, "0"); else {
struct str * s = g->outbuf;
g->outbuf = g->declarations;
ws(g, "static const symbol s_"); wi(g, g->literalstring_count); ws(g, "[] = ");
wlitarray(g, p);
ws(g, ";\n");
g->outbuf = s;
ws(g, "s_"); wi(g, g->literalstring_count);
g->literalstring_count++;
}
}
static void wm(struct generator * g) { /* margin */
int i;
for (i = 0; i < g->margin; i++) ws(g, " ");
}
static void wc(struct generator * g, struct node * p) { /* comment */
ws(g, " /* ");
ws(g, (char *) name_of_token(p->type));
unless (p->name == 0) {
ws(g, " ");
str_append_b(g->outbuf, p->name->b);
}
ws(g, ", line "); wi(g, p->line_number); ws(g, " */");
wnl(g);
}
static void wms(struct generator * g, const char * s) {
wm(g); ws(g, s); } /* margin + string */
static void wbs(struct generator * g) { /* block start */
wms(g, "{ ");
g->margin++;
}
static void wbe(struct generator * g) { /* block end */
if (g->line_labelled == g->line_count) { wms(g, ";"); wnl(g); }
g->margin--;
wms(g, "}"); wnl(g);
}
static void wk(struct generator * g, struct node * p) { /* keep c */
++g->keep_count;
if (p->mode == m_forward) {
ws(g, "int c"); wi(g, g->keep_count); ws(g, " = z->c;");
} else {
ws(g, "int m"); wi(g, g->keep_count); ws(g, " = z->l - z->c; (void)m");
wi(g, g->keep_count); ws(g, ";");
}
}
static void wrestore(struct generator * g, struct node * p, int keep_token) { /* restore c */
if (p->mode == m_forward) {
ws(g, "z->c = c");
} else {
ws(g, "z->c = z->l - m");
}
wi(g, keep_token); ws(g, ";");
}
static void winc(struct generator * g, struct node * p) { /* increment c */
ws(g, p->mode == m_forward ? "z->c++;" :
"z->c--;");
}
static void wsetl(struct generator * g, int n) {
g->margin--;
wms(g, "lab"); wi(g, n); wch(g, ':'); wnl(g);
g->line_labelled = g->line_count;
g->margin++;
}
static void wgotol(struct generator * g, int n) {
wms(g, "goto lab"); wi(g, n); wch(g, ';'); wnl(g);
}
static void wf(struct generator * g) { /* fail */
if (g->failure_string != 0) { ws(g, "{ "); ws(g, g->failure_string); wch(g, ' '); }
switch (g->failure_label)
{
case x_return:
ws(g, "return 0;");
break;
default:
ws(g, "goto lab");
wi(g, g->failure_label);
wch(g, ';');
g->label_used = 1;
}
if (g->failure_string != 0) ws(g, " }");
}
static void wlim(struct generator * g, struct node * p) { /* if at limit fail */
ws(g, p->mode == m_forward ? "if (z->c >= z->l) " :
"if (z->c <= z->lb) ");
wf(g);
}
static void wp(struct generator * g, const char * s, struct node * p) { /* formatted write */
int i = 0;
int l = strlen(s);
until (i >= l) {
int ch = s[i++];
if (ch != '~') wch(g, ch); else
switch(s[i++]) {
default: wch(g, s[i - 1]); continue;
case 'C': wc(g, p); continue;
case 'k': wk(g, p); continue;
case 'K': /* keep for c_test */
ws(g, p->mode == m_forward ? "int c_test = z->c;" :
"int m_test = z->l - z->c;");
continue;
case 'R': /* restore for c_test */
ws(g, p->mode == m_forward ? "z->c = c_test;" :
"z->c = z->l - m_test;");
continue;
case 'i': winc(g, p); continue;
case 'l': wlim(g, p); continue;
case 'f': wf(g); continue;
case 'M': wm(g); continue;
case 'N': wnl(g); continue;
case '{': wbs(g); continue;
case '}': wbe(g); continue;
case 'S': ws(g, g->S[s[i++] - '0']); continue;
case 'I': wi(g, g->I[s[i++] - '0']); continue;
case 'J': wi3(g, g->I[s[i++] - '0']); continue;
case 'V': wv(g, g->V[s[i++] - '0']); continue;
case 'W': wvn(g, g->V[s[i++] - '0']); continue;
case 'L': wlitref(g, g->L[s[i++] - '0']); continue;
case 'A': wlitarray(g, g->L[s[i++] - '0']); continue;
case '+': g->margin++; continue;
case '-': g->margin--; continue;
case '$': /* insert_s, insert_v etc */
wch(g, p->literalstring == 0 ? 'v' : 's');
continue;
case 'p': ws(g, g->options->externals_prefix); continue;
}
}
}
static void w(struct generator * g, const char * s) { wp(g, s, 0); }
static void generate_AE(struct generator * g, struct node * p) {
char * s;
switch (p->type) {
case c_name:
wv(g, p->name); break;
case c_number:
wi(g, p->number); break;
case c_maxint:
ws(g, "MAXINT"); break;
case c_minint:
ws(g, "MININT"); break;
case c_neg:
wch(g, '-'); generate_AE(g, p->right); break;
case c_multiply:
s = " * "; goto label0;
case c_plus:
s = " + "; goto label0;
case c_minus:
s = " - "; goto label0;
case c_divide:
s = " / ";
label0:
wch(g, '('); generate_AE(g, p->left);
ws(g, s); generate_AE(g, p->right); wch(g, ')'); break;
case c_sizeof:
g->V[0] = p->name;
w(g, "SIZE(~V0)"); break;
case c_cursor:
w(g, "z->c"); break;
case c_limit:
w(g, p->mode == m_forward ? "z->l" : "z->lb"); break;
case c_size:
w(g, "SIZE(z->p)"); break;
}
}
/* K_needed() tests to see if we really need to keep c. Not true when the
the command does not touch the cursor. This and repeat_score() could be
elaborated almost indefinitely.
*/
static int K_needed(struct generator * g, struct node * p) {
until (p == 0) {
switch (p->type) {
case c_dollar:
case c_leftslice:
case c_rightslice:
case c_mathassign:
case c_plusassign:
case c_minusassign:
case c_multiplyassign:
case c_divideassign:
case c_eq:
case c_ne:
case c_gr:
case c_ge:
case c_ls:
case c_le:
case c_sliceto:
case c_true:
case c_false:
case c_debug:
break;
case c_call:
if (K_needed(g, p->name->definition)) return true;
break;
case c_bra:
if (K_needed(g, p->left)) return true;
break;
default: return true;
}
p = p->right;
}
return false;
}
static int repeat_score(struct generator * g, struct node * p) {
int score = 0;
until (p == 0)
{
switch (p->type) {
case c_dollar:
case c_leftslice:
case c_rightslice:
case c_mathassign:
case c_plusassign:
case c_minusassign:
case c_multiplyassign:
case c_divideassign:
case c_eq:
case c_ne:
case c_gr:
case c_ge:
case c_ls:
case c_le:
case c_sliceto: /* case c_not: must not be included here! */
case c_debug:
break;
case c_call:
score += repeat_score(g, p->name->definition);
break;
case c_bra:
score += repeat_score(g, p->left);
break;
case c_name:
case c_literalstring:
case c_next:
case c_grouping:
case c_non:
case c_hop:
score = score + 1; break;
default: score = 2; break;
}
p = p->right;
}
return score;
}
/* tests if an expression requires cursor reinstatement in a repeat */
static int repeat_restore(struct generator * g, struct node * p) {
return repeat_score(g, p) >= 2;
}
static void generate_bra(struct generator * g, struct node * p) {
p = p->left;
until (p == 0) { generate(g, p); p = p->right; }
}
static void generate_and(struct generator * g, struct node * p) {
int keep_c = 0;
if (K_needed(g, p->left)) {
wp(g, "~{~k~C", p);
keep_c = g->keep_count;
} else {
wp(g, "~M~C", p);
}
p = p->left;
until (p == 0) {
generate(g, p);
if (keep_c && p->right != 0) {
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
}
p = p->right;
}
if (keep_c) w(g, "~}");
}
static void generate_or(struct generator * g, struct node * p) {
int keep_c = 0;
int used = g->label_used;
int a0 = g->failure_label;
const char * a1 = g->failure_string;
int out_lab = new_label(g);
if (K_needed(g, p->left)) {
wp(g, "~{~k~C", p);
keep_c = g->keep_count;
} else {
wp(g, "~M~C", p);
}
p = p->left;
g->failure_string = 0;
until (p->right == 0) {
g->failure_label = new_label(g);
g->label_used = 0;
generate(g, p);
wgotol(g, out_lab);
if (g->label_used)
wsetl(g, g->failure_label);
if (keep_c) {
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
}
p = p->right;
}
g->label_used = used;
g->failure_label = a0;
g->failure_string = a1;
generate(g, p);
if (keep_c) w(g, "~}");
wsetl(g, out_lab);
}
static void generate_backwards(struct generator * g, struct node * p) {
wp(g,"~Mz->lb = z->c; z->c = z->l;~C~N", p);
generate(g, p->left);
w(g, "~Mz->c = z->lb;~N");
}
static void generate_not(struct generator * g, struct node * p) {
int keep_c = 0;
int used = g->label_used;
int a0 = g->failure_label;
const char * a1 = g->failure_string;
if (K_needed(g, p->left)) {
wp(g, "~{~k~C", p);
keep_c = g->keep_count;
} else {
wp(g, "~M~C", p);
}
g->failure_label = new_label(g);
g->label_used = 0;
g->failure_string = 0;
generate(g, p->left);
{
int l = g->failure_label;
int u = g->label_used;
g->label_used = used;
g->failure_label = a0;
g->failure_string = a1;
w(g, "~M~f~N");
if (u)
wsetl(g, l);
}
if (keep_c) {
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N~}");
}
}
static void generate_try(struct generator * g, struct node * p) {
int keep_c = K_needed(g, p->left);
if (keep_c) {
if (p->mode == m_forward) {
wp(g, "~{int c_keep = z->c;~C", p);
g->failure_string = "z->c = c_keep;";
} else {
wp(g, "~{int m_keep = z->l - z->c;/* (void) m_keep;*/~C", p);
g->failure_string = "z->c = z->l - m_keep;";
}
} else {
wp(g, "~M~C", p);
g->failure_string = 0;
}
g->failure_label = new_label(g);
g->label_used = 0;
generate(g, p->left);
if (g->label_used)
wsetl(g, g->failure_label);
if (keep_c) w(g, "~}");
}
static void generate_set(struct generator * g, struct node * p) {
g->V[0] = p->name; wp(g, "~M~V0 = 1;~C", p);
}
static void generate_unset(struct generator * g, struct node * p) {
g->V[0] = p->name; wp(g, "~M~V0 = 0;~C", p);
}
static void generate_fail(struct generator * g, struct node * p) {
generate(g, p->left);
wp(g, "~M~f~C", p);
}
/* generate_test() also implements 'reverse' */
static void generate_test(struct generator * g, struct node * p) {
int keep_c = K_needed(g, p->left);
if (keep_c) wp(g, "~{~K~C", p);
else wp(g, "~M~C", p);
generate(g, p->left);
if (keep_c) wp(g, "~M~R~N"
"~}", p);
}
static void generate_do(struct generator * g, struct node * p) {
int keep_c = 0;
if (K_needed(g, p->left)) {
wp(g, "~{~k~C", p);
keep_c = g->keep_count;
} else {
wp(g, "~M~C", p);
}
g->failure_label = new_label(g);
g->label_used = 0;
g->failure_string = 0;
generate(g, p->left);
if (g->label_used)
wsetl(g, g->failure_label);
if (keep_c) {
w(g, "~M"); wrestore(g, p, keep_c);
w(g, "~N~}");
}
}
static void generate_next(struct generator * g, struct node * p) {
if (g->options->utf8) {
if (p->mode == m_forward)
w(g, "~{int ret = skip_utf8(z->p, z->c, 0, z->l, 1");
else
w(g, "~{int ret = skip_utf8(z->p, z->c, z->lb, 0, -1");
wp(g, ");~N"
"~Mif (ret < 0) ~f~N"
"~Mz->c = ret;~C"
"~}", p);
} else
wp(g, "~M~l~N"
"~M~i~C", p);
}
static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
struct grouping * q = p->name->grouping;
g->S[0] = p->mode == m_forward ? "" : "_b";
g->S[1] = complement ? "in" : "out";
g->S[2] = g->options->utf8 ? "_U" : "";
g->V[0] = p->name;
g->I[0] = q->smallest_ch;
g->I[1] = q->largest_ch;
if (is_goto) {
wp(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1) < 0) ~f /* goto */~C", p);
} else {
wp(g, "~{ /* gopast */~C"
"~Mint ret = ~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1);~N"
"~Mif (ret < 0) ~f~N", p);
if (p->mode == m_forward)
w(g, "~Mz->c += ret;~N");
else
w(g, "~Mz->c -= ret;~N");
w(g, "~}");
}
}
static void generate_GO(struct generator * g, struct node * p, int style) {
int keep_c = 0;
int used = g->label_used;
int a0 = g->failure_label;
const char * a1 = g->failure_string;
if (p->left->type == c_grouping || p->left->type == c_non) {
/* Special case for "goto" or "gopast" when used on a grouping or an
* inverted grouping - the movement of c by the matching action is
* exactly what we want! */
#ifdef OPTIMISATION_WARNINGS
printf("Optimising %s %s\n", style ? "goto" : "gopast", p->left->type == c_non ? "non" : "grouping");
#endif
generate_GO_grouping(g, p->left, style, p->left->type == c_non);
return;
}
w(g, "~Mwhile(1) {"); wp(g, "~C~+", p);
if (style == 1 || repeat_restore(g, p->left)) {
wp(g, "~M~k~N", p);
keep_c = g->keep_count;
}
g->failure_label = new_label(g);
g->label_used = 0;
generate(g, p->left);
if (style == 1) {
/* include for goto; omit for gopast */
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
}
w(g, "~Mbreak;~N");
if (g->label_used)
wsetl(g, g->failure_label);
if (keep_c) {
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
}
g->label_used = used;
g->failure_label = a0;
g->failure_string = a1;
/* wp(g, "~M~l~N"
"~M~i~N", p); */
generate_next(g, p);
w(g, "~}");
}
static void generate_loop(struct generator * g, struct node * p) {
w(g, "~{int i; for (i = "); generate_AE(g, p->AE); wp(g, "; i > 0; i--)~C"
"~{", p);
generate(g, p->left);
w(g, "~}"
"~}");
}
static void generate_repeat(struct generator * g, struct node * p, int atleast_case) {
int keep_c = 0;
wp(g, "~Mwhile(1) {~C~+", p);
if (repeat_restore(g, p->left)) {
wp(g, "~M~k~N", p);
keep_c = g->keep_count;
}
g->failure_label = new_label(g);
g->label_used = 0;
g->failure_string = 0;
generate(g, p->left);
if (atleast_case) w(g, "~Mi--;~N");
w(g, "~Mcontinue;~N");
if (g->label_used)
wsetl(g, g->failure_label);
if (keep_c) {
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
}
w(g, "~Mbreak;~N"
"~}");
}
static void generate_atleast(struct generator * g, struct node * p) {
w(g, "~{int i = "); generate_AE(g, p->AE); w(g, ";~N");
{
int used = g->label_used;
int a0 = g->failure_label;
const char * a1 = g->failure_string;
generate_repeat(g, p, true);
g->label_used = used;
g->failure_label = a0;
g->failure_string = a1;
}
w(g, "~Mif (i > 0) ~f~N"
"~}");
}
static void generate_setmark(struct generator * g, struct node * p) {
g->V[0] = p->name;
wp(g, "~M~V0 = z->c;~C", p);
}
static void generate_tomark(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? ">" : "<";
w(g, "~Mif (z->c ~S0 "); generate_AE(g, p->AE); w(g, ") ~f~N");
w(g, "~Mz->c = "); generate_AE(g, p->AE); wp(g, ";~C", p);
}
static void generate_atmark(struct generator * g, struct node * p) {
w(g, "~Mif (z->c != "); generate_AE(g, p->AE); wp(g, ") ~f~C", p);
}
static void generate_hop(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? "+" : "-";
g->S[1] = p->mode == m_forward ? "0" : "z->lb";
if (g->options->utf8) {
w(g, "~{int ret = skip_utf8(z->p, z->c, ~S1, z->l, ~S0 ");
generate_AE(g, p->AE); w(g, ");~N");
w(g, "~Mif (ret < 0) ~f~N");
} else {
w(g, "~{int ret = z->c ~S0 ");
generate_AE(g, p->AE); w(g, ";~N");
w(g, "~Mif (~S1 > ret || ret > z->l) ~f~N");
}
wp(g, "~Mz->c = ret;~C"
"~}", p);
}
static void generate_delete(struct generator * g, struct node * p) {
wp(g, "~{int ret = slice_del(z);~C", p);
wp(g, "~Mif (ret < 0) return ret;~N"
"~}", p);
}
static void generate_tolimit(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? "" : "b";
wp(g, "~Mz->c = z->l~S0;~C", p);
}
static void generate_atlimit(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? "" : "b";
g->S[1] = p->mode == m_forward ? "<" : ">";
wp(g, "~Mif (z->c ~S1 z->l~S0) ~f~C", p);
}
static void generate_leftslice(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? "bra" : "ket";
wp(g, "~Mz->~S0 = z->c;~C", p);
}
static void generate_rightslice(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? "ket" : "bra";
wp(g, "~Mz->~S0 = z->c;~C", p);
}
static void generate_assignto(struct generator * g, struct node * p) {
g->V[0] = p->name;
wp(g, "~M~V0 = assign_to(z, ~V0);~C"
"~Mif (~V0 == 0) return -1;~C", p);
}
static void generate_sliceto(struct generator * g, struct node * p) {
g->V[0] = p->name;
wp(g, "~M~V0 = slice_to(z, ~V0);~C"
"~Mif (~V0 == 0) return -1;~C", p);
}
static void generate_data_address(struct generator * g, struct node * p) {
symbol * b = p->literalstring;
if (b != 0) {
wi(g, SIZE(b)); w(g, ", ");
wlitref(g, b);
} else
wv(g, p->name);
}
static void generate_insert(struct generator * g, struct node * p, int style) {
int keep_c = style == c_attach;
if (p->mode == m_backward) keep_c = !keep_c;
wp(g, "~{", p);
if (keep_c) w(g, "int c_keep = z->c;~N~M");
wp(g, "int ret = insert_~$(z, z->c, z->c, ", p);
generate_data_address(g, p);
wp(g, ");~C", p);
if (keep_c) w(g, "~Mz->c = c_keep;~N");
wp(g, "~Mif (ret < 0) return ret;~N"
"~}", p);
}
static void generate_assignfrom(struct generator * g, struct node * p) {
int keep_c = p->mode == m_forward; /* like 'attach' */
wp(g, "~{", p);
if (keep_c) wp(g, "int c_keep = z->c;~N"
"~Mret = insert_~$(z, z->c, z->l, ", p);
else wp(g, "ret = insert_~$(z, z->lb, z->c, ", p);
generate_data_address(g, p);
wp(g, ");~C", p);
if (keep_c) w(g, "~Mz->c = c_keep;~N");
wp(g, "~Mif (ret < 0) return ret;~N"
"~}", p);
}
/* bugs marked <======= fixed 22/7/02. Similar fixes required for Java */
static void generate_slicefrom(struct generator * g, struct node * p) {
/* w(g, "~Mslice_from_s(z, "); <============= bug! should be: */
wp(g, "~{int ret = slice_from_~$(z, ", p);
generate_data_address(g, p);
wp(g, ");~C", p);
wp(g, "~Mif (ret < 0) return ret;~N"
"~}", p);
}
static void generate_setlimit(struct generator * g, struct node * p) {
int keep_c;
wp(g, "~{int mlimit;~C"
"~M~k~N"
, p);
keep_c = g->keep_count;
generate(g, p->left);
if (p->mode == m_forward) w(g, "~Mmlimit = z->l - z->c; z->l = z->c;~N");
else w(g, "~Mmlimit = z->lb; z->lb = z->c;~N");
w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
g->failure_string = p->mode == m_forward ? "z->l += mlimit;" :
"z->lb = mlimit;";
generate(g, p->aux);
wms(g, g->failure_string);
w(g, "~N"
"~}");
}
static void generate_dollar(struct generator * g, struct node * p) {
int used = g->label_used;
int a0 = g->failure_label;
const char * a1 = g->failure_string;
g->failure_label = new_label(g);
g->label_used = 0;
g->failure_string = 0;
g->V[0] = p->name;
wp(g, "~{struct SN_env env = * z;~C"
"~Mint failure = 1; /* assume failure */~N"
"~Mz->p = ~V0;~N"
"~Mz->lb = z->c = 0;~N"
"~Mz->l = SIZE(z->p);~N", p);
generate(g, p->left);
w(g, "~Mfailure = 0; /* mark success */~N");
if (g->label_used)
wsetl(g, g->failure_label);
g->V[0] = p->name; /* necessary */
g->label_used = used;
g->failure_label = a0;
g->failure_string = a1;
w(g, "~M~V0 = z->p;~N"
"~M* z = env;~N"
"~Mif (failure) ~f~N~}");
}
static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
g->V[0] = p->name;
g->S[0] = s;
w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, ";~N");
}
static void generate_integer_test(struct generator * g, struct node * p, char * s) {
g->V[0] = p->name;
g->S[0] = s;
w(g, "~Mif (!(~V0 ~S0 "); generate_AE(g, p->AE); w(g, ")) ~f~N");
}
static void generate_call(struct generator * g, struct node * p) {
g->V[0] = p->name;
wp(g, "~{int ret = ~V0(z);~N"
"~Mif (ret == 0) ~f~C"
"~Mif (ret < 0) return ret;~N~}", p);
}
static void generate_grouping(struct generator * g, struct node * p, int complement) {
struct grouping * q = p->name->grouping;
g->S[0] = p->mode == m_forward ? "" : "_b";
g->S[1] = complement ? "out" : "in";
g->S[2] = g->options->utf8 ? "_U" : "";
g->V[0] = p->name;
g->I[0] = q->smallest_ch;
g->I[1] = q->largest_ch;
w(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 0)) ~f~N");
}
static void generate_namedstring(struct generator * g, struct node * p) {
g->S[0] = p->mode == m_forward ? "" : "_b";
g->V[0] = p->name;
wp(g, "~Mif (!(eq_v~S0(z, ~V0))) ~f~C", p);
}
static void generate_literalstring(struct generator * g, struct node * p) {
symbol * b = p->literalstring;
g->S[0] = p->mode == m_forward ? "" : "_b";
g->I[0] = SIZE(b);
g->L[0] = b;
w(g, "~Mif (!(eq_s~S0(z, ~I0, ~L0))) ~f~N");
}
static void generate_define(struct generator * g, struct node * p) {
struct name * q = p->name;
g->next_label = 0;
g->S[0] = q->type == t_routine ? "static" : "extern";
g->V[0] = q;
w(g, "~N~S0 int ~V0(struct SN_env * z) {~N~+");
if (p->amongvar_needed) w(g, "~Mint among_var;~N");
g->failure_string = 0;
g->failure_label = x_return;
g->label_used = 0;
g->keep_count = 0;
generate(g, p->left);
w(g, "~Mreturn 1;~N~}");
}
static void generate_substring(struct generator * g, struct node * p) {
struct among * x = p->among;
int block = -1;
unsigned int bitmap = 0;
struct amongvec * among_cases = x->b;
int c;
int empty_case = -1;
int n_cases = 0;
symbol cases[2];
int shortest_size = INT_MAX;
g->S[0] = p->mode == m_forward ? "" : "_b";
g->I[0] = x->number;
g->I[1] = x->literalstring_count;
/* In forward mode with non-ASCII UTF-8 characters, the first character
* of the string will often be the same, so instead look at the last
* common character position.
*
* In backward mode, we can't match if there are fewer characters before
* the current position than the minimum length.
*/
for (c = 0; c < x->literalstring_count; ++c) {
int size = among_cases[c].size;
if (size != 0 && size < shortest_size) {
shortest_size = size;
}
}
for (c = 0; c < x->literalstring_count; ++c) {
symbol ch;
if (among_cases[c].size == 0) {
empty_case = c;
continue;
}
if (p->mode == m_forward) {
ch = among_cases[c].b[shortest_size - 1];
} else {
ch = among_cases[c].b[among_cases[c].size - 1];
}
if (n_cases == 0) {
block = ch >> 5;
} else if (ch >> 5 != block) {
block = -1;
if (n_cases > 2) break;
}
if (block == -1) {
if (ch == cases[0]) continue;
if (n_cases < 2) {
cases[n_cases++] = ch;
} else if (ch != cases[1]) {
++n_cases;
break;
}
} else {
if ((bitmap & (1u << (ch & 0x1f))) == 0) {
bitmap |= 1u << (ch & 0x1f);
if (n_cases < 2)
cases[n_cases] = ch;
++n_cases;
}
}
}
if (block != -1 || n_cases <= 2) {
char buf[64];
g->I[2] = block;
g->I[3] = bitmap;
g->I[4] = shortest_size - 1;
if (p->mode == m_forward) {
sprintf(buf, "z->p[z->c + %d]", shortest_size - 1);
g->S[1] = buf;
if (shortest_size == 1) {
wp(g, "~Mif (z->c >= z->l || ", p);
} else {
wp(g, "~Mif (z->c + ~I4 >= z->l || ", p);
}
} else {
g->S[1] = "z->p[z->c - 1]";
if (shortest_size == 1) {
wp(g, "~Mif (z->c <= z->lb || ", p);
} else {
wp(g, "~Mif (z->c - ~I4 <= z->lb || ", p);
}
}
if (n_cases == 0) {
/* We get this for the degenerate case: among { '' }
* This doesn't seem to be a useful construct, but it is
* syntactically valid.
*/
wp(g, "0", p);
} else if (n_cases == 1) {
g->I[4] = cases[0];
wp(g, "~S1 != ~I4", p);
} else if (n_cases == 2) {
g->I[4] = cases[0];
g->I[5] = cases[1];
wp(g, "(~S1 != ~I4 && ~S1 != ~I5)", p);
} else {
wp(g, "~S1 >> 5 != ~I2 || !((~I3 >> (~S1 & 0x1f)) & 1)", p);
}
ws(g, ") ");
if (empty_case != -1) {
/* If the among includes the empty string, it can never fail
* so not matching the bitmap means we match the empty string.
*/
g->I[4] = among_cases[empty_case].result;
wp(g, "among_var = ~I4; else~N", p);
} else {
wp(g, "~f~N", p);
}
} else {
#ifdef OPTIMISATION_WARNINGS
printf("Couldn't shortcut among %d\n", x->number);
#endif
}
if (x->command_count == 0 && x->starter == 0)
wp(g, "~Mif (!(find_among~S0(z, a_~I0, ~I1))) ~f~C", p);
else
wp(g, "~Mamong_var = find_among~S0(z, a_~I0, ~I1);~C"
"~Mif (!(among_var)) ~f~N", p);
}
static void generate_among(struct generator * g, struct node * p) {
struct among * x = p->among;
int case_number = 1;
if (x->substring == 0) generate_substring(g, p);
if (x->command_count == 0 && x->starter == 0) return;
unless (x->starter == 0) generate(g, x->starter);
p = p->left;
if (p != 0 && p->type != c_literalstring) p = p->right;
w(g, "~Mswitch(among_var) {~N~+"
"~Mcase 0: ~f~N");
until (p == 0) {
if (p->type == c_bra && p->left != 0) {
g->I[0] = case_number++;
w(g, "~Mcase ~I0:~N~+"); generate(g, p); w(g, "~Mbreak;~N~-");
}
p = p->right;
}
w(g, "~}");
}
static void generate_booltest(struct generator * g, struct node * p) {
g->V[0] = p->name;
wp(g, "~Mif (!(~V0)) ~f~C", p);
}
static void generate_false(struct generator * g, struct node * p) {
wp(g, "~M~f~C", p);
}
static void generate_debug(struct generator * g, struct node * p) {
g->I[0] = g->debug_count++;
g->I[1] = p->line_number;
wp(g, "~Mdebug(z, ~I0, ~I1);~C", p);
}
static void generate(struct generator * g, struct node * p) {
int used = g->label_used;
int a0 = g->failure_label;
const char * a1 = g->failure_string;
switch (p->type)
{
case c_define: generate_define(g, p); break;
case c_bra: generate_bra(g, p); break;
case c_and: generate_and(g, p); break;
case c_or: generate_or(g, p); break;
case c_backwards: generate_backwards(g, p); break;
case c_not: generate_not(g, p); break;
case c_set: generate_set(g, p); break;
case c_unset: generate_unset(g, p); break;
case c_try: generate_try(g, p); break;
case c_fail: generate_fail(g, p); break;
case c_reverse:
case c_test: generate_test(g, p); break;
case c_do: generate_do(g, p); break;
case c_goto: generate_GO(g, p, 1); break;
case c_gopast: generate_GO(g, p, 0); break;
case c_repeat: generate_repeat(g, p, false); break;
case c_loop: generate_loop(g, p); break;
case c_atleast: generate_atleast(g, p); break;
case c_setmark: generate_setmark(g, p); break;
case c_tomark: generate_tomark(g, p); break;
case c_atmark: generate_atmark(g, p); break;
case c_hop: generate_hop(g, p); break;
case c_delete: generate_delete(g, p); break;
case c_next: generate_next(g, p); break;
case c_tolimit: generate_tolimit(g, p); break;
case c_atlimit: generate_atlimit(g, p); break;
case c_leftslice: generate_leftslice(g, p); break;
case c_rightslice: generate_rightslice(g, p); break;
case c_assignto: generate_assignto(g, p); break;
case c_sliceto: generate_sliceto(g, p); break;
case c_assign: generate_assignfrom(g, p); break;
case c_insert:
case c_attach: generate_insert(g, p, p->type); break;
case c_slicefrom: generate_slicefrom(g, p); break;
case c_setlimit: generate_setlimit(g, p); break;
case c_dollar: generate_dollar(g, p); break;
case c_mathassign: generate_integer_assign(g, p, "="); break;
case c_plusassign: generate_integer_assign(g, p, "+="); break;
case c_minusassign: generate_integer_assign(g, p, "-="); break;
case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
case c_divideassign: generate_integer_assign(g, p, "/="); break;
case c_eq: generate_integer_test(g, p, "=="); break;
case c_ne: generate_integer_test(g, p, "!="); break;
case c_gr: generate_integer_test(g, p, ">"); break;
case c_ge: generate_integer_test(g, p, ">="); break;
case c_ls: generate_integer_test(g, p, "<"); break;
case c_le: generate_integer_test(g, p, "<="); break;
case c_call: generate_call(g, p); break;
case c_grouping: generate_grouping(g, p, false); break;
case c_non: generate_grouping(g, p, true); break;
case c_name: generate_namedstring(g, p); break;
case c_literalstring: generate_literalstring(g, p); break;
case c_among: generate_among(g, p); break;
case c_substring: generate_substring(g, p); break;
case c_booltest: generate_booltest(g, p); break;
case c_false: generate_false(g, p); break;
case c_true: break;
case c_debug: generate_debug(g, p); break;
default: fprintf(stderr, "%d encountered\n", p->type);
exit(1);
}
if (g->failure_label != a0)
g->label_used = used;
g->failure_label = a0;
g->failure_string = a1;
}
static void generate_start_comment(struct generator * g) {
w(g, "~N/* This file was generated automatically by the Snowball to ANSI C compiler */~N");
}
static void generate_head(struct generator * g) {
if (g->options->runtime_path == 0) {
w(g, "~N#include \"header.h\"~N~N");
} else {
w(g, "~N#include \"");
ws(g, g->options->runtime_path);
if (g->options->runtime_path[strlen(g->options->runtime_path) - 1] != '/')
wch(g, '/');
w(g, "header.h\"~N~N");
}
}
static void generate_routine_headers(struct generator * g) {
struct name * q = g->analyser->names;
until (q == 0) {
g->V[0] = q;
switch (q->type) {
case t_routine:
w(g, "static int ~W0(struct SN_env * z);~N");
break;
case t_external:
w(g,
"#ifdef __cplusplus~N"
"extern \"C\" {~N"
"#endif~N"
"extern int ~W0(struct SN_env * z);~N"
"#ifdef __cplusplus~N"
"}~N"
"#endif~N"
);
break;
}
q = q->next;
}
}
static void generate_among_table(struct generator * g, struct among * x) {
struct amongvec * v = x->b;
g->I[0] = x->number;
{
int i;
for (i = 0; i < x->literalstring_count; i++)
{
g->I[1] = i;
g->I[2] = v->size;
g->L[0] = v->b;
unless (v->size == 0)
w(g, "static const symbol s_~I0_~I1[~I2] = ~A0;~N");
v++;
}
}
g->I[1] = x->literalstring_count;
w(g, "~N~Mstatic const struct among a_~I0[~I1] =~N{~N");
v = x->b;
{
int i;
for (i = 0; i < x->literalstring_count; i++) {
g->I[1] = i;
g->I[2] = v->size;
g->I[3] = v->i;
g->I[4] = v->result;
g->S[0] = i < x->literalstring_count - 1 ? "," : "";
w(g, "/*~J1 */ { ~I2, ");
if (v->size == 0) w(g, "0,");
else w(g, "s_~I0_~I1,");
w(g, " ~I3, ~I4, ");
if (v->function == 0) w(g, "0"); else
wvn(g, v->function);
w(g, "}~S0~N");
v++;
}
}
w(g, "};~N~N");
}
static void generate_amongs(struct generator * g) {
struct among * x = g->analyser->amongs;
until (x == 0) {
generate_among_table(g, x);
x = x->next;
}
}
static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
static void generate_grouping_table(struct generator * g, struct grouping * q) {
int range = q->largest_ch - q->smallest_ch + 1;
int size = (range + 7)/ 8; /* assume 8 bits per symbol */
symbol * b = q->b;
symbol * map = create_b(size);
int i;
for (i = 0; i < size; i++) map[i] = 0;
for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
{
g->V[0] = q->name;
w(g, "static const unsigned char ~V0[] = { ");
for (i = 0; i < size; i++) {
wi(g, map[i]);
if (i < size - 1) w(g, ", ");
}
w(g, " };~N~N");
}
lose_b(map);
}
static void generate_groupings(struct generator * g) {
struct grouping * q = g->analyser->groupings;
until (q == 0) {
generate_grouping_table(g, q);
q = q->next;
}
}
static void generate_create(struct generator * g) {
int * p = g->analyser->name_count;
g->I[0] = p[t_string];
g->I[1] = p[t_integer];
g->I[2] = p[t_boolean];
w(g, "~N"
"extern struct SN_env * ~pcreate_env(void) { return SN_create_env(~I0, ~I1, ~I2); }"
"~N");
}
static void generate_close(struct generator * g) {
int * p = g->analyser->name_count;
g->I[0] = p[t_string];
w(g, "~Nextern void ~pclose_env(struct SN_env * z) { SN_close_env(z, ~I0); }~N~N");
}
static void generate_create_and_close_templates(struct generator * g) {
w(g, "~N"
"extern struct SN_env * ~pcreate_env(void);~N"
"extern void ~pclose_env(struct SN_env * z);~N"
"~N");
}
static void generate_header_file(struct generator * g) {
struct name * q = g->analyser->names;
char * vp = g->options->variables_prefix;
g->S[0] = vp;
w(g, "~N"
"#ifdef __cplusplus~N"
"extern \"C\" {~N"
"#endif~N"); /* for C++ */
generate_create_and_close_templates(g);
until (q == 0) {
g->V[0] = q;
switch (q->type)
{
case t_external:
w(g, "extern int ~W0(struct SN_env * z);~N");
break;
case t_string: g->S[1] = "S"; goto label0;
case t_integer: g->S[1] = "I"; goto label0;
case t_boolean: g->S[1] = "B";
label0:
if (vp) {
g->I[0] = q->count;
w(g, "#define ~S0");
str_append_b(g->outbuf, q->b);
w(g, " (~S1[~I0])~N");
}
break;
}
q = q->next;
}
w(g, "~N"
"#ifdef __cplusplus~N"
"}~N"
"#endif~N"); /* for C++ */
w(g, "~N");
}
extern void generate_program_c(struct generator * g) {
g->outbuf = str_new();
generate_start_comment(g);
generate_head(g);
generate_routine_headers(g);
w(g, "#ifdef __cplusplus~N"
"extern \"C\" {~N"
"#endif~N"
"~N");
generate_create_and_close_templates(g);
w(g, "~N"
"#ifdef __cplusplus~N"
"}~N"
"#endif~N");
generate_amongs(g);
generate_groupings(g);
g->declarations = g->outbuf;
g->outbuf = str_new();
g->literalstring_count = 0;
{
struct node * p = g->analyser->program;
until (p == 0) { generate(g, p); p = p->right; }
}
generate_create(g);
generate_close(g);
output_str(g->options->output_c, g->declarations);
str_delete(g->declarations);
output_str(g->options->output_c, g->outbuf);
str_clear(g->outbuf);
generate_start_comment(g);
generate_header_file(g);
output_str(g->options->output_h, g->outbuf);
str_delete(g->outbuf);
}
extern struct generator * create_generator_c(struct analyser * a, struct options * o) {
NEW(generator, g);
g->analyser = a;
g->options = o;
g->margin = 0;
g->debug_count = 0;
g->line_count = 0;
return g;
}
extern void close_generator_c(struct generator * g) {
FREE(g);
}
#include <stdlib.h> /* for exit */
#include <string.h> /* for strlen */
#include <stdio.h> /* for fprintf etc */
#include "header.h"
/* prototypes */
static void generate(struct generator * g, struct node * p);
static void w(struct generator * g, const char * s);
static void writef(struct generator * g, const char * s, struct node * p);
enum special_labels {
x_return = -1
};
static int new_label(struct generator * g) {
return g->next_label++;
}
static struct str * vars_newname(struct generator * g) {
struct str * output;
g->var_number ++;
output = str_new();
str_append_string(output, "v_");
str_append_int(output, g->var_number);
return output;
}
/* Output routines */
static void output_str(FILE * outfile, struct str * str) {
char * s = b_to_s(str_data(str));
fprintf(outfile, "%s", s);
free(s);
}
/* Write routines for simple entities */
static void write_char(struct generator * g, int ch) {
str_append_ch(g->outbuf, ch);
}
static void write_newline(struct generator * g) {
str_append_string(g->outbuf, "\n");
}
static void write_string(struct generator * g, const char * s) {
str_append_string(g->outbuf, s);
}
static void write_b(struct generator * g, symbol * b) {
str_append_b(g->outbuf, b);
}
static void write_str(struct generator * g, struct str * str) {
str_append(g->outbuf, str);
}
static void write_int(struct generator * g, int i) {
str_append_int(g->outbuf, i);
}
/* Write routines for items from the syntax tree */
static void write_varname(struct generator * g, struct name * p) {
int ch = "SBIrxg"[p->type];
if (p->type != t_external)
{
write_char(g, ch);
write_char(g, '_');
}
str_append_b(g->outbuf, p->b);
}
static void write_varref(struct generator * g, struct name * p) {
/* In java, references look just the same */
write_varname(g, p);
}
static void write_hexdigit(struct generator * g, int n) {
write_char(g, n < 10 ? n + '0' : n - 10 + 'A');
}
static void write_hex(struct generator * g, int ch) {
write_string(g, "\\u");
{
int i;
for (i = 12; i >= 0; i -= 4) write_hexdigit(g, ch >> i & 0xf);
}
}
static void write_literal_string(struct generator * g, symbol * p) {
int i;
write_string(g, "\"");
for (i = 0; i < SIZE(p); i++) {
int ch = p[i];
if (32 <= ch && ch <= 127) {
if (ch == '\"' || ch == '\\') write_string(g, "\\");
write_char(g, ch);
} else {
write_hex(g, ch);
}
}
write_string(g, "\"");
}
static void write_margin(struct generator * g) {
int i;
for (i = 0; i < g->margin; i++) write_string(g, " ");
}
/* Write a variable declaration. */
static void write_declare(struct generator * g,
char * declaration,
struct node * p) {
struct str * temp = g->outbuf;
g->outbuf = g->declarations;
write_string(g, " ");
writef(g, declaration, p);
write_string(g, ";");
write_newline(g);
g->outbuf = temp;
}
static void write_comment(struct generator * g, struct node * p) {
write_margin(g);
write_string(g, "// ");
write_string(g, (char *) name_of_token(p->type));
if (p->name != 0) {
write_string(g, " ");
str_append_b(g->outbuf, p->name->b);
}
write_string(g, ", line ");
write_int(g, p->line_number);
write_newline(g);
}
static void write_block_start(struct generator * g) {
w(g, "~M{~+~N");
}
static void write_block_end(struct generator * g) /* block end */ {
w(g, "~-~M}~N");
}
static void write_savecursor(struct generator * g, struct node * p,
struct str * savevar) {
g->B[0] = str_data(savevar);
g->S[1] = "";
if (p->mode != m_forward) g->S[1] = "limit - ";
write_declare(g, "int ~B0", p);
writef(g, "~M~B0 = ~S1cursor;~N" , p);
}
static void restore_string(struct node * p, struct str * out, struct str * savevar) {
str_clear(out);
str_append_string(out, "cursor = ");
if (p->mode != m_forward) str_append_string(out, "limit - ");
str_append(out, savevar);
str_append_string(out, ";");
}
static void write_restorecursor(struct generator * g, struct node * p,
struct str * savevar) {
struct str * temp = str_new();
write_margin(g);
restore_string(p, temp, savevar);
write_str(g, temp);
write_newline(g);
str_delete(temp);
}
static void write_inc_cursor(struct generator * g, struct node * p) {
write_margin(g);
write_string(g, p->mode == m_forward ? "cursor++;" : "cursor--;");
write_newline(g);
}
static void wsetlab_begin(struct generator * g, int n) {
w(g, "~Mlab");
write_int(g, n);
w(g, ": do {~+~N");
}
static void wsetlab_end(struct generator * g) {
w(g, "~-~M} while (false);~N");
}
static void wgotol(struct generator * g, int n) {
write_margin(g);
write_string(g, "break lab");
write_int(g, n);
write_string(g, ";");
write_newline(g);
}
static void write_failure(struct generator * g) {
if (str_len(g->failure_str) != 0) {
write_margin(g);
write_str(g, g->failure_str);
write_newline(g);
}
write_margin(g);
switch (g->failure_label)
{
case x_return:
write_string(g, "return false;");
break;
default:
write_string(g, "break lab");
write_int(g, g->failure_label);
write_string(g, ";");
}
write_newline(g);
g->unreachable = true;
}
static void write_failure_if(struct generator * g, char * s, struct node * p) {
writef(g, "~Mif (", p);
writef(g, s, p);
writef(g, ")~N", p);
write_block_start(g);
write_failure(g);
write_block_end(g);
g->unreachable = false;
}
/* if at limit fail */
static void write_check_limit(struct generator * g, struct node * p) {
if (p->mode == m_forward) {
write_failure_if(g, "cursor >= limit", p);
} else {
write_failure_if(g, "cursor <= limit_backward", p);
}
}
/* Formatted write. */
static void writef(struct generator * g, const char * input, struct node * p) {
int i = 0;
int l = strlen(input);
while (i < l) {
int ch = input[i++];
if (ch == '~') {
switch(input[i++]) {
default: write_char(g, input[i - 1]); continue;
case 'C': write_comment(g, p); continue;
case 'f': write_block_start(g);
write_failure(g);
g->unreachable = false;
write_block_end(g);
continue;
case 'M': write_margin(g); continue;
case 'N': write_newline(g); continue;
case '{': write_block_start(g); continue;
case '}': write_block_end(g); continue;
case 'S': write_string(g, g->S[input[i++] - '0']); continue;
case 'B': write_b(g, g->B[input[i++] - '0']); continue;
case 'I': write_int(g, g->I[input[i++] - '0']); continue;
case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
case 'L': write_literal_string(g, g->L[input[i++] - '0']); continue;
case '+': g->margin++; continue;
case '-': g->margin--; continue;
case 'n': write_string(g, g->options->name); continue;
}
} else {
write_char(g, ch);
}
}
}
static void w(struct generator * g, const char * s) {
writef(g, s, 0);
}
static void generate_AE(struct generator * g, struct node * p) {
char * s;
switch (p->type) {
case c_name:
write_varref(g, p->name); break;
case c_number:
write_int(g, p->number); break;
case c_maxint:
write_string(g, "MAXINT"); break;
case c_minint:
write_string(g, "MININT"); break;
case c_neg:
write_string(g, "-"); generate_AE(g, p->right); break;
case c_multiply:
s = " * "; goto label0;
case c_plus:
s = " + "; goto label0;
case c_minus:
s = " - "; goto label0;
case c_divide:
s = " / ";
label0:
write_string(g, "("); generate_AE(g, p->left);
write_string(g, s); generate_AE(g, p->right); write_string(g, ")"); break;
case c_sizeof:
g->V[0] = p->name;
w(g, "(~V0.length())"); break;
case c_cursor:
w(g, "cursor"); break;
case c_limit:
w(g, p->mode == m_forward ? "limit" : "limit_backward"); break;
case c_size:
w(g, "(current.length())"); break;
}
}
/* K_needed() tests to see if we really need to keep c. Not true when the
the command does not touch the cursor. This and repeat_score() could be
elaborated almost indefinitely.
*/
static int K_needed(struct generator * g, struct node * p) {
while (p != 0) {
switch (p->type) {
case c_dollar:
case c_leftslice:
case c_rightslice:
case c_mathassign:
case c_plusassign:
case c_minusassign:
case c_multiplyassign:
case c_divideassign:
case c_eq:
case c_ne:
case c_gr:
case c_ge:
case c_ls:
case c_le:
case c_sliceto:
case c_booltest:
case c_true:
case c_false:
case c_debug:
break;
case c_call:
if (K_needed(g, p->name->definition)) return true;
break;
case c_bra:
if (K_needed(g, p->left)) return true;
break;
default: return true;
}
p = p->right;
}
return false;
}
static int repeat_score(struct generator * g, struct node * p) {
int score = 0;
while (p != 0) {
switch (p->type) {
case c_dollar:
case c_leftslice:
case c_rightslice:
case c_mathassign:
case c_plusassign:
case c_minusassign:
case c_multiplyassign:
case c_divideassign:
case c_eq:
case c_ne:
case c_gr:
case c_ge:
case c_ls:
case c_le:
case c_sliceto: /* case c_not: must not be included here! */
case c_debug:
break;
case c_call:
score += repeat_score(g, p->name->definition);
break;
case c_bra:
score += repeat_score(g, p->left);
break;
case c_name:
case c_literalstring:
case c_next:
case c_grouping:
case c_non:
case c_hop:
score = score + 1;
break;
default:
score = 2;
break;
}
p = p->right;
}
return score;
}
/* tests if an expression requires cursor reinstatement in a repeat */
static int repeat_restore(struct generator * g, struct node * p) {
return repeat_score(g, p) >= 2;
}
static void generate_bra(struct generator * g, struct node * p) {
write_comment(g, p);
p = p->left;
while (p != 0) {
generate(g, p);
p = p->right;
}
}
static void generate_and(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
int keep_c = K_needed(g, p->left);
write_comment(g, p);
if (keep_c) write_savecursor(g, p, savevar);
p = p->left;
while (p != 0) {
generate(g, p);
if (g->unreachable) break;
if (keep_c && p->right != 0) write_restorecursor(g, p, savevar);
p = p->right;
}
str_delete(savevar);
}
static void generate_or(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
int keep_c = K_needed(g, p->left);
int a0 = g->failure_label;
struct str * a1 = str_copy(g->failure_str);
int out_lab = new_label(g);
write_comment(g, p);
wsetlab_begin(g, out_lab);
if (keep_c) write_savecursor(g, p, savevar);
p = p->left;
str_clear(g->failure_str);
if (p == 0) {
/* p should never be 0 after an or: there should be at least two
* sub nodes. */
fprintf(stderr, "Error: \"or\" node without children nodes.");
exit (1);
}
while (p->right != 0) {
g->failure_label = new_label(g);
wsetlab_begin(g, g->failure_label);
generate(g, p);
if (!g->unreachable) wgotol(g, out_lab);
wsetlab_end(g);
g->unreachable = false;
if (keep_c) write_restorecursor(g, p, savevar);
p = p->right;
}
g->failure_label = a0;
str_delete(g->failure_str);
g->failure_str = a1;
generate(g, p);
wsetlab_end(g);
str_delete(savevar);
}
static void generate_backwards(struct generator * g, struct node * p) {
write_comment(g, p);
writef(g,"~Mlimit_backward = cursor; cursor = limit;~N", p);
generate(g, p->left);
w(g, "~Mcursor = limit_backward;");
}
static void generate_not(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
int keep_c = K_needed(g, p->left);
int a0 = g->failure_label;
struct str * a1 = str_copy(g->failure_str);
write_comment(g, p);
if (keep_c) {
write_block_start(g);
write_savecursor(g, p, savevar);
}
g->failure_label = new_label(g);
str_clear(g->failure_str);
wsetlab_begin(g, g->failure_label);
generate(g, p->left);
g->failure_label = a0;
str_delete(g->failure_str);
g->failure_str = a1;
if (!g->unreachable) write_failure(g);
wsetlab_end(g);
g->unreachable = false;
if (keep_c) write_restorecursor(g, p, savevar);
if (keep_c) write_block_end(g);
str_delete(savevar);
}
static void generate_try(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
int keep_c = K_needed(g, p->left);
write_comment(g, p);
if (keep_c) write_savecursor(g, p, savevar);
g->failure_label = new_label(g);
if (keep_c) restore_string(p, g->failure_str, savevar);
wsetlab_begin(g, g->failure_label);
generate(g, p->left);
wsetlab_end(g);
g->unreachable = false;
str_delete(savevar);
}
static void generate_set(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
writef(g, "~M~V0 = true;~N", p);
}
static void generate_unset(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
writef(g, "~M~V0 = false;~N", p);
}
static void generate_fail(struct generator * g, struct node * p) {
write_comment(g, p);
generate(g, p->left);
if (!g->unreachable) write_failure(g);
}
/* generate_test() also implements 'reverse' */
static void generate_test(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
int keep_c = K_needed(g, p->left);
write_comment(g, p);
if (keep_c) {
write_savecursor(g, p, savevar);
}
generate(g, p->left);
if (!g->unreachable) {
if (keep_c) {
write_restorecursor(g, p, savevar);
}
}
str_delete(savevar);
}
static void generate_do(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
int keep_c = K_needed(g, p->left);
write_comment(g, p);
if (keep_c) write_savecursor(g, p, savevar);
g->failure_label = new_label(g);
str_clear(g->failure_str);
wsetlab_begin(g, g->failure_label);
generate(g, p->left);
wsetlab_end(g);
g->unreachable = false;
if (keep_c) write_restorecursor(g, p, savevar);
str_delete(savevar);
}
static void generate_GO(struct generator * g, struct node * p, int style) {
int end_unreachable = false;
struct str * savevar = vars_newname(g);
int keep_c = style == 1 || repeat_restore(g, p->left);
int a0 = g->failure_label;
struct str * a1 = str_copy(g->failure_str);
int golab = new_label(g);
g->I[0] = golab;
write_comment(g, p);
w(g, "~Mgolab~I0: while(true)~N");
w(g, "~{");
if (keep_c) write_savecursor(g, p, savevar);
g->failure_label = new_label(g);
wsetlab_begin(g, g->failure_label);
generate(g, p->left);
if (g->unreachable) {
/* Cannot break out of this loop: therefore the code after the
* end of the loop is unreachable.*/
end_unreachable = true;
} else {
/* include for goto; omit for gopast */
if (style == 1) write_restorecursor(g, p, savevar);
g->I[0] = golab;
w(g, "~Mbreak golab~I0;~N");
}
g->unreachable = false;
wsetlab_end(g);
if (keep_c) write_restorecursor(g, p, savevar);
g->failure_label = a0;
str_delete(g->failure_str);
g->failure_str = a1;
write_check_limit(g, p);
write_inc_cursor(g, p);
write_block_end(g);
str_delete(savevar);
g->unreachable = end_unreachable;
}
static void generate_loop(struct generator * g, struct node * p) {
struct str * loopvar = vars_newname(g);
write_comment(g, p);
g->B[0] = str_data(loopvar);
write_declare(g, "int ~B0", p);
w(g, "~Mfor (~B0 = ");
generate_AE(g, p->AE);
g->B[0] = str_data(loopvar);
writef(g, "; ~B0 > 0; ~B0--)~N", p);
writef(g, "~{", p);
generate(g, p->left);
w(g, "~}");
str_delete(loopvar);
g->unreachable = false;
}
static void generate_repeat(struct generator * g, struct node * p, struct str * loopvar) {
struct str * savevar = vars_newname(g);
int keep_c = repeat_restore(g, p->left);
int replab = new_label(g);
g->I[0] = replab;
write_comment(g, p);
writef(g, "~Mreplab~I0: while(true)~N~{", p);
if (keep_c) write_savecursor(g, p, savevar);
g->failure_label = new_label(g);
str_clear(g->failure_str);
wsetlab_begin(g, g->failure_label);
generate(g, p->left);
if (!g->unreachable) {
if (loopvar != 0) {
g->B[0] = str_data(loopvar);
w(g, "~M~B0--;~N");
}
g->I[0] = replab;
w(g, "~Mcontinue replab~I0;~N");
}
wsetlab_end(g);
g->unreachable = false;
if (keep_c) write_restorecursor(g, p, savevar);
g->I[0] = replab;
w(g, "~Mbreak replab~I0;~N~}");
str_delete(savevar);
}
static void generate_atleast(struct generator * g, struct node * p) {
struct str * loopvar = vars_newname(g);
write_comment(g, p);
w(g, "~{");
g->B[0] = str_data(loopvar);
w(g, "~Mint ~B0 = ");
generate_AE(g, p->AE);
w(g, ";~N");
{
int a0 = g->failure_label;
struct str * a1 = str_copy(g->failure_str);
generate_repeat(g, p, loopvar);
g->failure_label = a0;
str_delete(g->failure_str);
g->failure_str = a1;
}
g->B[0] = str_data(loopvar);
write_failure_if(g, "~B0 > 0", p);
w(g, "~}");
str_delete(loopvar);
}
static void generate_setmark(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
writef(g, "~M~V0 = cursor;~N", p);
}
static void generate_tomark(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? ">" : "<";
w(g, "~Mif (cursor ~S0 "); generate_AE(g, p->AE); w(g, ")~N");
write_block_start(g);
write_failure(g);
write_block_end(g);
g->unreachable = false;
w(g, "~Mcursor = "); generate_AE(g, p->AE); writef(g, ";~N", p);
}
static void generate_atmark(struct generator * g, struct node * p) {
write_comment(g, p);
w(g, "~Mif (cursor != "); generate_AE(g, p->AE); writef(g, ")~N", p);
write_block_start(g);
write_failure(g);
write_block_end(g);
g->unreachable = false;
}
static void generate_hop(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "+" : "-";
w(g, "~{~Mint c = cursor ~S0 ");
generate_AE(g, p->AE);
w(g, ";~N");
g->S[0] = p->mode == m_forward ? "0" : "limit_backward";
write_failure_if(g, "~S0 > c || c > limit", p);
writef(g, "~Mcursor = c;~N", p);
writef(g, "~}", p);
}
static void generate_delete(struct generator * g, struct node * p) {
write_comment(g, p);
writef(g, "~Mslice_del();~N", p);
}
static void generate_next(struct generator * g, struct node * p) {
write_comment(g, p);
write_check_limit(g, p);
write_inc_cursor(g, p);
}
static void generate_tolimit(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "limit" : "limit_backward";
writef(g, "~Mcursor = ~S0;~N", p);
}
static void generate_atlimit(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "limit" : "limit_backward";
g->S[1] = p->mode == m_forward ? "<" : ">";
write_failure_if(g, "cursor ~S1 ~S0", p);
}
static void generate_leftslice(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "bra" : "ket";
writef(g, "~M~S0 = cursor;~N", p);
}
static void generate_rightslice(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "ket" : "bra";
writef(g, "~M~S0 = cursor;~N", p);
}
static void generate_assignto(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
writef(g, "~M~V0 = assign_to(~V0);~N", p);
}
static void generate_sliceto(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
writef(g, "~M~V0 = slice_to(~V0);~N", p);
}
static void generate_address(struct generator * g, struct node * p) {
symbol * b = p->literalstring;
if (b != 0) {
write_literal_string(g, b);
} else {
write_varref(g, p->name);
}
}
static void generate_insert(struct generator * g, struct node * p, int style) {
int keep_c = style == c_attach;
write_comment(g, p);
if (p->mode == m_backward) keep_c = !keep_c;
if (keep_c) w(g, "~{~Mint c = cursor;~N");
writef(g, "~Minsert(cursor, cursor, ", p);
generate_address(g, p);
writef(g, ");~N", p);
if (keep_c) w(g, "~Mcursor = c;~N~}");
}
static void generate_assignfrom(struct generator * g, struct node * p) {
int keep_c = p->mode == m_forward; /* like 'attach' */
write_comment(g, p);
if (keep_c) writef(g, "~{~Mint c = cursor;~N", p);
if (p->mode == m_forward) {
writef(g, "~Minsert(cursor, limit, ", p);
} else {
writef(g, "~Minsert(limit_backward, cursor, ", p);
}
generate_address(g, p);
writef(g, ");~N", p);
if (keep_c) w(g, "~Mcursor = c;~N~}");
}
static void generate_slicefrom(struct generator * g, struct node * p) {
write_comment(g, p);
w(g, "~Mslice_from(");
generate_address(g, p);
writef(g, ");~N", p);
}
static void generate_setlimit(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
struct str * varname = vars_newname(g);
write_comment(g, p);
write_savecursor(g, p, savevar);
generate(g, p->left);
if (!g->unreachable) {
g->B[0] = str_data(varname);
write_declare(g, "int ~B0", p);
if (p->mode == m_forward) {
w(g, "~M~B0 = limit - cursor;~N");
w(g, "~Mlimit = cursor;~N");
} else {
w(g, "~M~B0 = limit_backward;~N");
w(g, "~Mlimit_backward = cursor;~N");
}
write_restorecursor(g, p, savevar);
if (p->mode == m_forward) {
str_assign(g->failure_str, "limit += ");
str_append(g->failure_str, varname);
str_append_ch(g->failure_str, ';');
} else {
str_assign(g->failure_str, "limit_backward = ");
str_append(g->failure_str, varname);
str_append_ch(g->failure_str, ';');
}
generate(g, p->aux);
if (!g->unreachable) {
write_margin(g);
write_str(g, g->failure_str);
write_newline(g);
}
}
str_delete(varname);
str_delete(savevar);
}
/* dollar sets snowball up to operate on a string variable as if it were the
* current string */
static void generate_dollar(struct generator * g, struct node * p) {
struct str * savevar = vars_newname(g);
write_comment(g, p);
g->V[0] = p->name;
str_assign(g->failure_str, "copy_from(");
str_append(g->failure_str, savevar);
str_append_string(g->failure_str, ");");
g->B[0] = str_data(savevar);
writef(g, "~{~M~n ~B0 = this;~N"
"~Mcurrent = ~V0;~N"
"~Mcursor = 0;~N"
"~Mlimit = (current.length());~N", p);
generate(g, p->left);
if (!g->unreachable) {
write_margin(g);
write_str(g, g->failure_str);
write_newline(g);
}
w(g, "~}");
str_delete(savevar);
}
static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
g->V[0] = p->name;
g->S[0] = s;
w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, ";~N");
}
static void generate_integer_test(struct generator * g, struct node * p, char * s) {
g->V[0] = p->name;
g->S[0] = s;
w(g, "~Mif (!(~V0 ~S0 "); generate_AE(g, p->AE); w(g, "))~N");
write_block_start(g);
write_failure(g);
write_block_end(g);
g->unreachable = false;
}
static void generate_call(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
write_failure_if(g, "!~V0()", p);
}
static void generate_grouping(struct generator * g, struct node * p, int complement) {
struct grouping * q = p->name->grouping;
g->S[0] = p->mode == m_forward ? "" : "_b";
g->S[1] = complement ? "out" : "in";
g->V[0] = p->name;
g->I[0] = q->smallest_ch;
g->I[1] = q->largest_ch;
if (q->no_gaps)
write_failure_if(g, "!(~S1_range~S0(~I0, ~I1))", p);
else
write_failure_if(g, "!(~S1_grouping~S0(~V0, ~I0, ~I1))", p);
}
static void generate_namedstring(struct generator * g, struct node * p) {
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "" : "_b";
g->V[0] = p->name;
write_failure_if(g, "!(eq_v~S0(~V0))", p);
}
static void generate_literalstring(struct generator * g, struct node * p) {
symbol * b = p->literalstring;
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "" : "_b";
g->I[0] = SIZE(b);
g->L[0] = b;
write_failure_if(g, "!(eq_s~S0(~I0, ~L0))", p);
}
static void generate_define(struct generator * g, struct node * p) {
struct name * q = p->name;
struct str * saved_output = g->outbuf;
struct str * saved_declarations = g->declarations;
g->S[0] = q->type == t_routine ? "private" : "public";
g->V[0] = q;
w(g, "~+~+~N~M~S0 boolean ~V0() {~+~N");
g->outbuf = str_new();
g->declarations = str_new();
g->next_label = 0;
g->var_number = 0;
if (p->amongvar_needed) write_declare(g, "int among_var", p);
str_clear(g->failure_str);
g->failure_label = x_return;
g->unreachable = false;
generate(g, p->left);
if (!g->unreachable) w(g, "~Mreturn true;~N");
w(g, "~}~-~-");
str_append(saved_output, g->declarations);
str_append(saved_output, g->outbuf);
str_delete(g->declarations);
str_delete(g->outbuf);
g->declarations = saved_declarations;
g->outbuf = saved_output;
}
static void generate_substring(struct generator * g, struct node * p) {
struct among * x = p->among;
write_comment(g, p);
g->S[0] = p->mode == m_forward ? "" : "_b";
g->I[0] = x->number;
g->I[1] = x->literalstring_count;
if (x->command_count == 0 && x->starter == 0) {
write_failure_if(g, "find_among~S0(a_~I0, ~I1) == 0", p);
} else {
writef(g, "~Mamong_var = find_among~S0(a_~I0, ~I1);~N", p);
write_failure_if(g, "among_var == 0", p);
}
}
static void generate_among(struct generator * g, struct node * p) {
struct among * x = p->among;
int case_number = 1;
if (x->substring == 0) generate_substring(g, p);
if (x->command_count == 0 && x->starter == 0) return;
if (x->starter != 0) generate(g, x->starter);
p = p->left;
if (p != 0 && p->type != c_literalstring) p = p->right;
w(g, "~Mswitch(among_var) {~N~+");
w(g, "~Mcase 0:~N~+");
write_failure(g);
g->unreachable = false;
w(g, "~-");
while (p != 0) {
if (p->type == c_bra && p->left != 0) {
g->I[0] = case_number++;
w(g, "~Mcase ~I0:~N~+");
generate(g, p);
if (!g->unreachable) w(g, "~Mbreak;~N");
w(g, "~-");
g->unreachable = false;
}
p = p->right;
}
write_block_end(g);
}
static void generate_booltest(struct generator * g, struct node * p) {
write_comment(g, p);
g->V[0] = p->name;
write_failure_if(g, "!(~V0)", p);
}
static void generate_false(struct generator * g, struct node * p) {
write_comment(g, p);
write_failure(g);
}
static void generate_debug(struct generator * g, struct node * p) {
write_comment(g, p);
g->I[0] = g->debug_count++;
g->I[1] = p->line_number;
writef(g, "~Mdebug(~I0, ~I1);~N", p);
}
static void generate(struct generator * g, struct node * p) {
int a0;
struct str * a1;
if (g->unreachable) return;
a0 = g->failure_label;
a1 = str_copy(g->failure_str);
switch (p->type)
{
case c_define: generate_define(g, p); break;
case c_bra: generate_bra(g, p); break;
case c_and: generate_and(g, p); break;
case c_or: generate_or(g, p); break;
case c_backwards: generate_backwards(g, p); break;
case c_not: generate_not(g, p); break;
case c_set: generate_set(g, p); break;
case c_unset: generate_unset(g, p); break;
case c_try: generate_try(g, p); break;
case c_fail: generate_fail(g, p); break;
case c_reverse:
case c_test: generate_test(g, p); break;
case c_do: generate_do(g, p); break;
case c_goto: generate_GO(g, p, 1); break;
case c_gopast: generate_GO(g, p, 0); break;
case c_repeat: generate_repeat(g, p, 0); break;
case c_loop: generate_loop(g, p); break;
case c_atleast: generate_atleast(g, p); break;
case c_setmark: generate_setmark(g, p); break;
case c_tomark: generate_tomark(g, p); break;
case c_atmark: generate_atmark(g, p); break;
case c_hop: generate_hop(g, p); break;
case c_delete: generate_delete(g, p); break;
case c_next: generate_next(g, p); break;
case c_tolimit: generate_tolimit(g, p); break;
case c_atlimit: generate_atlimit(g, p); break;
case c_leftslice: generate_leftslice(g, p); break;
case c_rightslice: generate_rightslice(g, p); break;
case c_assignto: generate_assignto(g, p); break;
case c_sliceto: generate_sliceto(g, p); break;
case c_assign: generate_assignfrom(g, p); break;
case c_insert:
case c_attach: generate_insert(g, p, p->type); break;
case c_slicefrom: generate_slicefrom(g, p); break;
case c_setlimit: generate_setlimit(g, p); break;
case c_dollar: generate_dollar(g, p); break;
case c_mathassign: generate_integer_assign(g, p, "="); break;
case c_plusassign: generate_integer_assign(g, p, "+="); break;
case c_minusassign: generate_integer_assign(g, p, "-="); break;
case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
case c_divideassign: generate_integer_assign(g, p, "/="); break;
case c_eq: generate_integer_test(g, p, "=="); break;
case c_ne: generate_integer_test(g, p, "!="); break;
case c_gr: generate_integer_test(g, p, ">"); break;
case c_ge: generate_integer_test(g, p, ">="); break;
case c_ls: generate_integer_test(g, p, "<"); break;
case c_le: generate_integer_test(g, p, "<="); break;
case c_call: generate_call(g, p); break;
case c_grouping: generate_grouping(g, p, false); break;
case c_non: generate_grouping(g, p, true); break;
case c_name: generate_namedstring(g, p); break;
case c_literalstring: generate_literalstring(g, p); break;
case c_among: generate_among(g, p); break;
case c_substring: generate_substring(g, p); break;
case c_booltest: generate_booltest(g, p); break;
case c_false: generate_false(g, p); break;
case c_true: break;
case c_debug: generate_debug(g, p); break;
default: fprintf(stderr, "%d encountered\n", p->type);
exit(1);
}
g->failure_label = a0;
str_delete(g->failure_str);
g->failure_str = a1;
}
static void generate_start_comment(struct generator * g) {
w(g, "// This file was generated automatically by the Snowball to Java compiler~N");
w(g, "~N");
}
static void generate_class_begin(struct generator * g) {
w(g, "package " );
w(g, g->options->package);
w(g, ";~N~N" );
w(g, "import ");
w(g, g->options->among_class );
w(g, ";~N"
"~N"
" /**~N"
" * This class was automatically generated by a Snowball to Java compiler ~N"
" * It implements the stemming algorithm defined by a snowball script.~N"
" */~N"
"~N"
"public class ~n extends ");
w(g, g->options->parent_class_name);
w(g, " {~N"
"~N"
"private static final long serialVersionUID = 1L;~N"
"~N"
"~+~+~Mprivate final static ~n methodObject = new ~n ();~N"
"~N");
}
static void generate_class_end(struct generator * g) {
w(g, "~N}");
w(g, "~N~N");
}
static void generate_equals(struct generator * g) {
w(g, "~N"
"~Mpublic boolean equals( Object o ) {~N"
"~+~Mreturn o instanceof ");
w(g, g->options->name);
w(g, ";~N~-~M}~N"
"~N"
"~Mpublic int hashCode() {~N"
"~+~Mreturn ");
w(g, g->options->name);
w(g, ".class.getName().hashCode();~N"
"~-~M}~N");
w(g, "~N~N");
}
static void generate_among_table(struct generator * g, struct among * x) {
struct amongvec * v = x->b;
g->I[0] = x->number;
g->I[1] = x->literalstring_count;
w(g, "~+~+~Mprivate final static Among a_~I0[] = {~N~+");
{
int i;
for (i = 0; i < x->literalstring_count; i++) {
g->I[0] = i;
g->I[1] = v->i;
g->I[2] = v->result;
g->L[0] = v->b;
g->S[0] = i < x->literalstring_count - 1 ? "," : "";
w(g, "~Mnew Among ( ~L0, ~I1, ~I2, \"");
if (v->function != 0) {
write_varname(g, v->function);
}
w(g, "\", methodObject )~S0~N");
v++;
}
}
w(g, "~-~M};~-~-~N~N");
}
static void generate_amongs(struct generator * g) {
struct among * x = g->analyser->amongs;
while (x != 0) {
generate_among_table(g, x);
x = x->next;
}
}
static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
static int bit_is_set(symbol * b, int i) { return b[i/8] & 1 << i%8; }
static void generate_grouping_table(struct generator * g, struct grouping * q) {
int range = q->largest_ch - q->smallest_ch + 1;
int size = (range + 7)/ 8; /* assume 8 bits per symbol */
symbol * b = q->b;
symbol * map = create_b(size);
int i;
for (i = 0; i < size; i++) map[i] = 0;
/* Using unicode would require revision here */
for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
q->no_gaps = true;
for (i = 0; i < range; i++) unless (bit_is_set(map, i)) q->no_gaps = false;
unless (q->no_gaps) {
g->V[0] = q->name;
w(g, "~+~+~Mprivate static final char ~V0[] = {");
for (i = 0; i < size; i++) {
write_int(g, map[i]);
if (i < size - 1) w(g, ", ");
}
w(g, " };~N~-~-~N");
}
lose_b(map);
}
static void generate_groupings(struct generator * g) {
struct grouping * q = g->analyser->groupings;
until (q == 0) {
generate_grouping_table(g, q);
q = q->next;
}
}
static void generate_members(struct generator * g) {
struct name * q = g->analyser->names;
until (q == 0) {
g->V[0] = q;
switch (q->type) {
case t_string:
w(g, " private ");
w(g, g->options->string_class );
w(g, " ~W0 = new ");
w(g, g->options->string_class);
w(g, "();~N");
break;
case t_integer:
w(g, " private int ~W0;~N");
break;
case t_boolean:
w(g, " private boolean ~W0;~N");
break;
}
q = q->next;
}
w(g, "~N");
}
static void generate_copyfrom(struct generator * g) {
struct name * q;
w(g, "~+~+~Mprivate void copy_from(~n other) {~+~N");
for (q = g->analyser->names; q != 0; q = q->next) {
g->V[0] = q;
switch (q->type) {
case t_string:
case t_integer:
case t_boolean:
w(g, "~M~W0 = other.~W0;~N");
break;
}
}
w(g, "~Msuper.copy_from(other);~N");
w(g, "~-~M}~-~-~N");
}
static void generate_methods(struct generator * g) {
struct node * p = g->analyser->program;
while (p != 0) {
generate(g, p);
g->unreachable = false;
p = p->right;
}
}
extern void generate_program_java(struct generator * g) {
g->outbuf = str_new();
g->failure_str = str_new();
generate_start_comment(g);
generate_class_begin(g);
generate_amongs(g);
generate_groupings(g);
generate_members(g);
generate_copyfrom(g);
generate_methods(g);
generate_equals(g);
generate_class_end(g);
output_str(g->options->output_java, g->outbuf);
str_delete(g->failure_str);
str_delete(g->outbuf);
}
extern struct generator * create_generator_java(struct analyser * a, struct options * o) {
NEW(generator, g);
g->analyser = a;
g->options = o;
g->margin = 0;
g->debug_count = 0;
g->unreachable = false;
return g;
}
extern void close_generator_java(struct generator * g) {
FREE(g);
}
typedef unsigned char byte;
typedef unsigned short symbol;
#define true 1
#define false 0
#define repeat while(true)
#define unless(C) if(!(C))
#define until(C) while(!(C))
#define MALLOC check_malloc
#define FREE check_free
#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * n)
#define STARTSIZE 10
#define SIZE(p) ((int *)(p))[-1]
#define CAPACITY(p) ((int *)(p))[-2]
extern symbol * create_b(int n);
extern void report_b(FILE * out, symbol * p);
extern void lose_b(symbol * p);
extern symbol * increase_capacity(symbol * p, int n);
extern symbol * move_to_b(symbol * p, int n, symbol * q);
extern symbol * add_to_b(symbol * p, int n, symbol * q);
extern symbol * copy_b(symbol * p);
extern char * b_to_s(symbol * p);
extern symbol * add_s_to_b(symbol * p, const char * s);
struct str; /* defined in space.c */
extern struct str * str_new(void);
extern void str_delete(struct str * str);
extern void str_append(struct str * str, struct str * add);
extern void str_append_ch(struct str * str, char add);
extern void str_append_b(struct str * str, symbol * q);
extern void str_append_string(struct str * str, const char * s);
extern void str_append_int(struct str * str, int i);
extern void str_clear(struct str * str);
extern void str_assign(struct str * str, char * s);
extern struct str * str_copy(struct str * old);
extern symbol * str_data(struct str * str);
extern int str_len(struct str * str);
extern int get_utf8(const symbol * p, int * slot);
extern int put_utf8(int ch, symbol * p);
struct m_pair {
struct m_pair * next;
symbol * name;
symbol * value;
};
struct input {
struct input * next;
symbol * p;
int c;
int line_number;
};
struct include {
struct include * next;
symbol * b;
};
struct tokeniser {
struct input * next;
symbol * p;
int c;
int line_number;
symbol * b;
symbol * b2;
int number;
int m_start;
int m_end;
struct m_pair * m_pairs;
int get_depth;
int error_count;
int token;
int previous_token;
byte token_held;
byte widechars;
byte utf8;
int omission;
struct include * includes;
};
extern symbol * get_input(symbol * p);
extern struct tokeniser * create_tokeniser(symbol * b);
extern int read_token(struct tokeniser * t);
extern byte * name_of_token(int code);
extern void close_tokeniser(struct tokeniser * t);
enum token_codes {
#include "syswords2.h"
c_mathassign,
c_name,
c_number,
c_literalstring,
c_neg,
c_call,
c_grouping,
c_booltest
};
extern int space_count;
extern void * check_malloc(int n);
extern void check_free(void * p);
struct node;
struct name {
struct name * next;
symbol * b;
int type; /* t_string etc */
int mode; /* )_ for routines, externals */
struct node * definition; /* ) */
int count; /* 0, 1, 2 for each type */
struct grouping * grouping; /* for grouping names */
byte referenced;
byte used;
};
struct literalstring {
struct literalstring * next;
symbol * b;
};
struct amongvec {
symbol * b; /* the string giving the case */
int size; /* - and its size */
struct node * p; /* the corresponding command */
int i; /* the amongvec index of the longest substring of b */
int result; /* the numeric result for the case */
struct name * function;
};
struct among {
struct among * next;
struct amongvec * b; /* pointer to the amongvec */
int number; /* amongs are numbered 0, 1, 2 ... */
int literalstring_count; /* in this among */
int command_count; /* in this among */
struct node * starter; /* i.e. among( (starter) 'string' ... ) */
struct node * substring; /* i.e. substring ... among ( ... ) */
};
struct grouping {
struct grouping * next;
int number; /* groupings are numbered 0, 1, 2 ... */
symbol * b; /* the characters of this group */
int largest_ch; /* character with max code */
int smallest_ch; /* character with min code */
byte no_gaps; /* not used in generator.c after 11/5/05 */
struct name * name; /* so g->name->grouping == g */
};
struct node {
struct node * next;
struct node * left;
struct node * aux; /* used in setlimit */
struct among * among; /* used in among */
struct node * right;
int type;
int mode;
struct node * AE;
struct name * name;
symbol * literalstring;
int number;
int line_number;
int amongvar_needed; /* used in routine definitions */
};
enum name_types {
t_size = 6,
t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
t_grouping = 5
/* If this list is extended, adjust wvn in generator.c */
};
/* In name_count[i] below, remember that
type is
----+----
0 | string
1 | boolean
2 | integer
3 | routine
4 | external
5 | grouping
*/
struct analyser {
struct tokeniser * tokeniser;
struct node * nodes;
struct name * names;
struct literalstring * literalstrings;
int mode;
byte modifyable; /* false inside reverse(...) */
struct node * program;
struct node * program_end;
int name_count[t_size]; /* name_count[i] counts the number of names of type i */
struct among * amongs;
struct among * amongs_end;
int among_count;
int amongvar_needed; /* used in reading routine definitions */
struct grouping * groupings;
struct grouping * groupings_end;
struct node * substring; /* pending 'substring' in current routine definition */
byte utf8;
};
enum analyser_modes {
m_forward = 0, m_backward /*, m_integer */
};
extern void print_program(struct analyser * a);
extern struct analyser * create_analyser(struct tokeniser * t);
extern void close_analyser(struct analyser * a);
extern void read_program(struct analyser * a);
struct generator {
struct analyser * analyser;
struct options * options;
int unreachable; /* 0 if code can be reached, 1 if current code
* is unreachable. */
int var_number; /* Number of next variable to use. */
struct str * outbuf; /* temporary str to store output */
struct str * declarations; /* str storing variable declarations */
int next_label;
int margin;
const char * failure_string; /* String to output in case of a failure. */
struct str * failure_str; /* This is used by the java generator instead of failure_string */
int label_used; /* Keep track of whether the failure label is used. */
int failure_label;
int debug_count;
const char * S[10]; /* strings */
symbol * B[10]; /* blocks */
int I[10]; /* integers */
struct name * V[5]; /* variables */
symbol * L[5]; /* literals, used in formatted write */
int line_count; /* counts number of lines output */
int line_labelled; /* in ANSI C, will need extra ';' if it is a block end */
int literalstring_count;
int keep_count; /* used to number keep/restore pairs to avoid compiler warnings
about shadowed variables */
};
struct options {
/* for the command line: */
char * output_file;
char * name;
FILE * output_c;
FILE * output_h;
FILE * output_java;
byte syntax_tree;
byte widechars;
enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS } make_lang;
char * externals_prefix;
char * variables_prefix;
char * runtime_path;
char * parent_class_name;
char * package;
char * string_class;
char * among_class;
struct include * includes;
struct include * includes_end;
byte utf8;
};
/* Generator for C code. */
extern struct generator * create_generator_c(struct analyser * a, struct options * o);
extern void close_generator_c(struct generator * g);
extern void generate_program_c(struct generator * g);
/* Generator for Java code. */
extern struct generator * create_generator_java(struct analyser * a, struct options * o);
extern void close_generator_java(struct generator * g);
extern void generate_program_java(struct generator * g);
#include <stdio.h> /* for printf */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memmove */
#include "header.h"
#define HEAD 2*sizeof(int)
#define EXTENDER 40
/* This modules provides a simple mechanism for arbitrary length writable
strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
items however.
The calls are:
symbol * b = create_b(n);
- create an empty block b with room for n symbols
b = increase_capacity(b, n);
- increase the capacity of block b by n symbols (b may change)
b2 = copy_b(b)
- copy block b into b2
lose_b(b);
- lose block b
b = move_to_b(b, n, p);
- set the data in b to be the n symbols at address p
b = add_to_b(b, n, p);
- add the n symbols at address p to the end of the data in b
SIZE(b)
- is the number of symbols in b
For example:
symbol * b = create_b(0);
{ int i;
char p[10];
for (i = 0; i < 100; i++) {
sprintf(p, " %d", i);
add_s_to_b(b, p);
}
}
and b contains " 0 1 2 ... 99" spaced out as symbols.
*/
/* For a block b, SIZE(b) is the number of symbols so far written into it,
CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
In fact blocks have 1 extra character over the promised capacity so
they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
overwriting.
*/
extern symbol * create_b(int n) {
symbol * p = (symbol *) (HEAD + (char *) MALLOC(HEAD + (n + 1) * sizeof(symbol)));
CAPACITY(p) = n;
SIZE(p) = 0;
return p;
}
extern void report_b(FILE * out, symbol * p) {
int i;
for (i = 0; i < SIZE(p); i++) fprintf(out, "%c", p[i]);
}
extern void lose_b(symbol * p) {
if (p == 0) return;
FREE((char *) p - HEAD);
}
extern symbol * increase_capacity(symbol * p, int n) {
symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
memmove(q, p, CAPACITY(p) * sizeof(symbol));
SIZE(q) = SIZE(p);
lose_b(p); return q;
}
extern symbol * move_to_b(symbol * p, int n, symbol * q) {
int x = n - CAPACITY(p);
if (x > 0) p = increase_capacity(p, x);
memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
}
extern symbol * add_to_b(symbol * p, int n, symbol * q) {
int x = SIZE(p) + n - CAPACITY(p);
if (x > 0) p = increase_capacity(p, x);
memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
}
extern symbol * copy_b(symbol * p) {
int n = SIZE(p);
symbol * q = create_b(n);
move_to_b(q, n, p);
return q;
}
int space_count = 0;
extern void * check_malloc(int n) {
space_count++;
return malloc(n);
}
extern void check_free(void * p) {
space_count--;
free(p);
}
/* To convert a block to a zero terminated string: */
extern char * b_to_s(symbol * p) {
int n = SIZE(p);
char * s = (char *)malloc(n + 1);
{
int i;
for (i = 0; i < n; i++) s[i] = p[i];
}
s[n] = 0;
return s;
}
/* To add a zero terminated string to a block. If p = 0 the
block is created. */
extern symbol * add_s_to_b(symbol * p, const char * s) {
int n = strlen(s);
int k;
if (p == 0) p = create_b(n);
k = SIZE(p);
{
int x = k + n - CAPACITY(p);
if (x > 0) p = increase_capacity(p, x);
}
{
int i;
for (i = 0; i < n; i++) p[i + k] = s[i];
}
SIZE(p) += n;
return p;
}
/* The next section defines string handling capabilities in terms
of the lower level block handling capabilities of space.c */
/* -------------------------------------------------------------*/
struct str {
symbol * data;
};
/* Create a new string. */
extern struct str * str_new() {
struct str * output = (struct str *) malloc(sizeof(struct str));
output->data = create_b(0);
return output;
}
/* Delete a string. */
extern void str_delete(struct str * str) {
lose_b(str->data);
free(str);
}
/* Append a str to this str. */
extern void str_append(struct str * str, struct str * add) {
symbol * q = add->data;
str->data = add_to_b(str->data, SIZE(q), q);
}
/* Append a character to this str. */
extern void str_append_ch(struct str * str, char add) {
symbol q[1];
q[0] = add;
str->data = add_to_b(str->data, 1, q);
}
/* Append a low level block to a str. */
extern void str_append_b(struct str * str, symbol * q) {
str->data = add_to_b(str->data, SIZE(q), q);
}
/* Append a (char *, null teminated) string to a str. */
extern void str_append_string(struct str * str, const char * s) {
str->data = add_s_to_b(str->data, s);
}
/* Append an integer to a str. */
extern void str_append_int(struct str * str, int i) {
char s[30];
sprintf(s, "%d", i);
str_append_string(str, s);
}
/* Clear a string */
extern void str_clear(struct str * str) {
SIZE(str->data) = 0;
}
/* Set a string */
extern void str_assign(struct str * str, char * s) {
str_clear(str);
str_append_string(str, s);
}
/* Copy a string. */
extern struct str * str_copy(struct str * old) {
struct str * newstr = str_new();
str_append(newstr, old);
return newstr;
}
/* Get the data stored in this str. */
extern symbol * str_data(struct str * str) {
return str->data;
}
/* Get the length of the str. */
extern int str_len(struct str * str) {
return SIZE(str->data);
}
extern int get_utf8(const symbol * p, int * slot) {
int b0, b1;
b0 = *p++;
if (b0 < 0xC0) { /* 1100 0000 */
* slot = b0; return 1;
}
b1 = *p++;
if (b0 < 0xE0) { /* 1110 0000 */
* slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
}
* slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
}
extern int put_utf8(int ch, symbol * p) {
if (ch < 0x80) {
p[0] = ch; return 1;
}
if (ch < 0x800) {
p[0] = (ch >> 6) | 0xC0;
p[1] = (ch & 0x3F) | 0x80; return 2;
}
p[0] = (ch >> 12) | 0xE0;
p[1] = ((ch >> 6) & 0x3F) | 0x80;
p[2] = (ch & 0x3F) | 0x80; return 3;
}
static struct system_word vocab[80+1] = {
{ 0, (byte *)"", 80+1},
{ 1, (byte *)"$", c_dollar },
{ 1, (byte *)"(", c_bra },
{ 1, (byte *)")", c_ket },
{ 1, (byte *)"*", c_multiply },
{ 1, (byte *)"+", c_plus },
{ 1, (byte *)"-", c_minus },
{ 1, (byte *)"/", c_divide },
{ 1, (byte *)"<", c_ls },
{ 1, (byte *)"=", c_assign },
{ 1, (byte *)">", c_gr },
{ 1, (byte *)"?", c_debug },
{ 1, (byte *)"[", c_leftslice },
{ 1, (byte *)"]", c_rightslice },
{ 2, (byte *)"!=", c_ne },
{ 2, (byte *)"*=", c_multiplyassign },
{ 2, (byte *)"+=", c_plusassign },
{ 2, (byte *)"-=", c_minusassign },
{ 2, (byte *)"->", c_sliceto },
{ 2, (byte *)"/*", c_comment2 },
{ 2, (byte *)"//", c_comment1 },
{ 2, (byte *)"/=", c_divideassign },
{ 2, (byte *)"<+", c_insert },
{ 2, (byte *)"<-", c_slicefrom },
{ 2, (byte *)"<=", c_le },
{ 2, (byte *)"==", c_eq },
{ 2, (byte *)"=>", c_assignto },
{ 2, (byte *)">=", c_ge },
{ 2, (byte *)"as", c_as },
{ 2, (byte *)"do", c_do },
{ 2, (byte *)"or", c_or },
{ 3, (byte *)"and", c_and },
{ 3, (byte *)"for", c_for },
{ 3, (byte *)"get", c_get },
{ 3, (byte *)"hex", c_hex },
{ 3, (byte *)"hop", c_hop },
{ 3, (byte *)"non", c_non },
{ 3, (byte *)"not", c_not },
{ 3, (byte *)"set", c_set },
{ 3, (byte *)"try", c_try },
{ 4, (byte *)"fail", c_fail },
{ 4, (byte *)"goto", c_goto },
{ 4, (byte *)"loop", c_loop },
{ 4, (byte *)"next", c_next },
{ 4, (byte *)"size", c_size },
{ 4, (byte *)"test", c_test },
{ 4, (byte *)"true", c_true },
{ 5, (byte *)"among", c_among },
{ 5, (byte *)"false", c_false },
{ 5, (byte *)"limit", c_limit },
{ 5, (byte *)"unset", c_unset },
{ 6, (byte *)"atmark", c_atmark },
{ 6, (byte *)"attach", c_attach },
{ 6, (byte *)"cursor", c_cursor },
{ 6, (byte *)"define", c_define },
{ 6, (byte *)"delete", c_delete },
{ 6, (byte *)"gopast", c_gopast },
{ 6, (byte *)"insert", c_insert },
{ 6, (byte *)"maxint", c_maxint },
{ 6, (byte *)"minint", c_minint },
{ 6, (byte *)"repeat", c_repeat },
{ 6, (byte *)"sizeof", c_sizeof },
{ 6, (byte *)"tomark", c_tomark },
{ 7, (byte *)"atleast", c_atleast },
{ 7, (byte *)"atlimit", c_atlimit },
{ 7, (byte *)"decimal", c_decimal },
{ 7, (byte *)"reverse", c_reverse },
{ 7, (byte *)"setmark", c_setmark },
{ 7, (byte *)"strings", c_strings },
{ 7, (byte *)"tolimit", c_tolimit },
{ 8, (byte *)"booleans", c_booleans },
{ 8, (byte *)"integers", c_integers },
{ 8, (byte *)"routines", c_routines },
{ 8, (byte *)"setlimit", c_setlimit },
{ 9, (byte *)"backwards", c_backwards },
{ 9, (byte *)"externals", c_externals },
{ 9, (byte *)"groupings", c_groupings },
{ 9, (byte *)"stringdef", c_stringdef },
{ 9, (byte *)"substring", c_substring },
{ 12, (byte *)"backwardmode", c_backwardmode },
{ 13, (byte *)"stringescapes", c_stringescapes }
};
c_among = 4, c_and, c_as, c_assign, c_assignto, c_atleast,
c_atlimit, c_atmark, c_attach, c_backwardmode, c_backwards,
c_booleans, c_bra, c_comment1, c_comment2, c_cursor, c_debug,
c_decimal, c_define, c_delete, c_divide, c_divideassign, c_do,
c_dollar, c_eq, c_externals, c_fail, c_false, c_for, c_ge, c_get,
c_gopast, c_goto, c_gr, c_groupings, c_hex, c_hop, c_insert,
c_integers, c_ket, c_le, c_leftslice, c_limit, c_loop, c_ls,
c_maxint, c_minint, c_minus, c_minusassign, c_multiply,
c_multiplyassign, c_ne, c_next, c_non, c_not, c_or, c_plus,
c_plusassign, c_repeat, c_reverse, c_rightslice, c_routines,
c_set, c_setlimit, c_setmark, c_size, c_sizeof, c_slicefrom,
c_sliceto, c_stringdef, c_stringescapes, c_strings, c_substring,
c_test, c_tolimit, c_tomark, c_true, c_try, c_unset,
#include <stdio.h> /* stderr etc */
#include <stdlib.h> /* malloc free */
#include <string.h> /* strlen */
#include <ctype.h> /* isalpha etc */
#include "header.h"
struct system_word {
int s_size; /* size of system word */
byte * s; /* pointer to the system word */
int code; /* it's internal code */
};
/* ASCII collating assumed in syswords.c */
#include "syswords.h"
static int smaller(int a, int b) { return a < b ? a : b; }
extern symbol * get_input(symbol * p) {
char * s = b_to_s(p);
{
FILE * input = fopen(s, "r");
free(s);
if (input == 0) return 0;
{
symbol * u = create_b(STARTSIZE);
int size = 0;
repeat
{ int ch = getc(input);
if (ch == EOF) break;
if (size >= CAPACITY(u)) u = increase_capacity(u, size/2);
u[size++] = ch;
}
fclose(input);
SIZE(u) = size; return u;
}
}
}
static void error(struct tokeniser * t, char * s1, int n, symbol * p, char * s2) {
if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
fprintf(stderr, "Line %d", t->line_number);
if (t->get_depth > 0) fprintf(stderr, " (of included file)");
fprintf(stderr, ": ");
unless (s1 == 0) fprintf(stderr, "%s", s1);
unless (p == 0) {
int i;
for (i = 0; i < n; i++) fprintf(stderr, "%c", p[i]);
}
unless (s2 == 0) fprintf(stderr, "%s", s2);
fprintf(stderr, "\n");
t->error_count++;
}
static void error1(struct tokeniser * t, char * s) {
error(t, s, 0,0, 0);
}
static void error2(struct tokeniser * t, char * s) {
error(t, "unexpected end of text after ", 0,0, s);
}
static int compare_words(int m, symbol * p, int n, byte * q) {
unless (m == n) return m - n;
{
int i; for (i = 0; i < n; i++) {
int diff = p[i] - q[i];
unless (diff == 0) return diff;
}
}
return 0;
}
static int find_word(int n, symbol * p) {
int i = 0; int j = vocab->code;
repeat {
int k = i + (j - i)/2;
struct system_word * w = vocab + k;
int diff = compare_words(n, p, w->s_size, w->s);
if (diff == 0) return w->code;
if (diff < 0) j = k; else i = k;
if (j - i == 1) break;
}
return -1;
}
static int get_number(int n, symbol * p) {
int x = 0;
int i; for (i = 0; i < n; i++) x = 10*x + p[i] - '0';
return x;
}
static int eq_s(struct tokeniser * t, char * s) {
int l = strlen(s);
if (SIZE(t->p) - t->c < l) return false;
{
int i;
for (i = 0; i < l; i++) if (t->p[t->c + i] != s[i]) return false;
}
t->c += l; return true;
}
static int white_space(struct tokeniser * t, int ch) {
switch (ch) {
case '\n': t->line_number++;
case '\r':
case '\t':
case ' ': return true;
}
return false;
}
static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
struct m_pair * q = t->m_pairs;
repeat {
if (q == 0) return 0;
{
symbol * name = q->name;
if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
}
q = q->next;
}
}
static int read_literal_string(struct tokeniser * t, int c) {
symbol * p = t->p;
int ch;
SIZE(t->b) = 0;
repeat {
if (c >= SIZE(p)) { error2(t, "'"); return c; }
ch = p[c];
if (ch == '\n') { error1(t, "string not terminated"); return c; }
c++;
if (ch == t->m_start) {
int c0 = c;
int newlines = false; /* no newlines as yet */
int black_found = false; /* no printing chars as yet */
repeat {
if (c >= SIZE(p)) { error2(t, "'"); return c; }
ch = p[c]; c++;
if (ch == t->m_end) break;
unless (white_space(t, ch)) black_found = true;
if (ch == '\n') newlines = true;
if (newlines && black_found) {
error1(t, "string not terminated");
return c;
}
}
unless (newlines) {
int n = c - c0 - 1; /* macro size */
int firstch = p[c0];
symbol * q = find_in_m(t, n, p + c0);
if (q == 0) {
if (n == 1 && (firstch == '\'' || firstch == t->m_start))
t->b = add_to_b(t->b, 1, p + c0);
else
error(t, "string macro '", n, p + c0, "' undeclared");
} else
t->b = add_to_b(t->b, SIZE(q), q);
}
} else {
if (ch == '\'') return c;
t->b = add_to_b(t->b, 1, p + c - 1);
}
}
}
static int next_token(struct tokeniser * t) {
symbol * p = t->p;
int c = t->c;
int ch;
int code = -1;
repeat {
if (c >= SIZE(p)) { t->c = c; return -1; }
ch = p[c];
if (white_space(t, ch)) { c++; continue; }
if (isalpha(ch)) {
int c0 = c;
while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
code = find_word(c - c0, p + c0);
if (code < 0) {
t->b = move_to_b(t->b, c - c0, p + c0);
code = c_name;
}
} else
if (isdigit(ch)) {
int c0 = c;
while (c < SIZE(p) && isdigit(p[c])) c++;
t->number = get_number(c - c0, p + c0);
code = c_number;
} else
if (ch == '\'') {
c = read_literal_string(t, c + 1);
code = c_literalstring;
} else
{
int lim = smaller(2, SIZE(p) - c);
int i;
for (i = lim; i > 0; i--) {
code = find_word(i, p + c);
if (code >= 0) { c += i; break; }
}
}
if (code >= 0) {
t->c = c;
return code;
}
error(t, "'", 1, p + c, "' unknown");
c++;
continue;
}
}
static int next_char(struct tokeniser * t) {
if (t->c >= SIZE(t->p)) return -1;
return t->p[t->c++];
}
static int next_real_char(struct tokeniser * t) {
repeat {
int ch = next_char(t);
if (white_space(t, ch)) continue;
return ch;
}
}
static void read_chars(struct tokeniser * t) {
int ch = next_real_char(t);
if (ch < 0) { error2(t, "stringdef"); return; }
{
int c0 = t->c-1;
repeat {
ch = next_char(t);
if (white_space(t, ch) || ch < 0) break;
}
t->b2 = move_to_b(t->b2, t->c - c0 - 1, t->p + c0);
}
}
static int decimal_to_num(int ch) {
if ('0' <= ch && ch <= '9') return ch - '0';
return -1;
}
static int hex_to_num(int ch) {
if ('0' <= ch && ch <= '9') return ch - '0';
if ('a' <= ch && ch <= 'f') return ch - 'a' + 10;
return -1;
}
static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
int c = 0; int d = 0;
repeat {
while (c < SIZE(p) && p[c] == ' ') c++;
if (c == SIZE(p)) break;
{
int number = 0;
repeat {
int ch = p[c];
if (c == SIZE(p) || ch == ' ') break;
if (base == 10) {
ch = decimal_to_num(ch);
if (ch < 0) {
error1(t, "decimal string contains non-digits");
return;
}
} else {
ch = hex_to_num(tolower(ch));
if (ch < 0) {
error1(t, "hex string contains non-hex characters");
return;
}
}
number = base * number + ch;
c++;
}
if (t->widechars || t->utf8) {
unless (0 <= number && number <= 0xffff) {
error1(t, "character values exceed 64K");
return;
}
} else {
unless (0 <= number && number <= 0xff) {
error1(t, "character values exceed 256");
return;
}
}
if (t->utf8)
d += put_utf8(number, p + d);
else
p[d++] = number;
}
}
SIZE(p) = d;
}
extern int read_token(struct tokeniser * t) {
symbol * p = t->p;
int held = t->token_held;
t->token_held = false;
if (held) return t->token;
repeat {
int code = next_token(t);
switch (code) {
case c_comment1: /* slash-slash comment */
while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
continue;
case c_comment2: /* slash-star comment */
repeat {
if (t->c >= SIZE(p)) {
error1(t, "/* comment not terminated");
t->token = -1;
return -1;
}
if (p[t->c] == '\n') t->line_number++;
if (eq_s(t, "*/")) break;
t->c++;
}
continue;
case c_stringescapes:
{
int ch1 = next_real_char(t);
int ch2 = next_real_char(t);
if (ch2 < 0)
{ error2(t, "stringescapes"); continue; }
if (ch1 == '\'')
{ error1(t, "first stringescape cannot be '"); continue; }
t->m_start = ch1;
t->m_end = ch2;
}
continue;
case c_stringdef:
{
int base = 0;
read_chars(t);
code = read_token(t);
if (code == c_hex) { base = 16; code = read_token(t); } else
if (code == c_decimal) { base = 10; code = read_token(t); }
unless (code == c_literalstring)
{ error1(t, "string omitted after stringdef"); continue; }
if (base > 0) convert_numeric_string(t, t->b, base);
{ NEW(m_pair, q);
q->next = t->m_pairs;
q->name = copy_b(t->b2);
q->value = copy_b(t->b);
t->m_pairs = q;
}
}
continue;
case c_get:
code = read_token(t);
unless (code == c_literalstring) {
error1(t, "string omitted after get"); continue;
}
t->get_depth++;
if (t->get_depth > 10) {
fprintf(stderr, "get directives go 10 deep. Looping?\n");
exit(1);
}
{
NEW(input, q);
symbol * u = get_input(t->b);
if (u == 0) {
struct include * r = t->includes;
until (r == 0) {
symbol * b = copy_b(r->b);
b = add_to_b(b, SIZE(t->b), t->b);
u = get_input(b);
lose_b(b);
unless (u == 0) break;
r = r->next;
}
}
if (u == 0) {
error(t, "Can't get '", SIZE(t->b), t->b, "'");
exit(1);
}
memmove(q, t, sizeof(struct input));
t->next = q;
t->p = u;
t->c = 0;
t->line_number = 1;
}
p = t->p;
continue;
case -1:
unless (t->next == 0) {
lose_b(p);
{
struct input * q = t->next;
memmove(t, q, sizeof(struct input)); p = t->p;
FREE(q);
}
t->get_depth--;
continue;
}
/* drop through */
default:
t->previous_token = t->token;
t->token = code;
return code;
}
}
}
extern byte * name_of_token(int code) {
int i;
for (i = 1; i < vocab->code; i++)
if ((vocab + i)->code == code) return (vocab + i)->s;
switch (code) {
case c_mathassign: return (byte *) "=";
case c_name: return (byte *) "name";
case c_number: return (byte *) "number";
case c_literalstring:return (byte *) "literal";
case c_neg: return (byte *) "neg";
case c_grouping: return (byte *) "grouping";
case c_call: return (byte *) "call";
case c_booltest: return (byte *) "Boolean test";
case -2: return (byte *) "start of text";
case -1: return (byte *) "end of text";
default: return (byte *) "?";
}
}
extern struct tokeniser * create_tokeniser(symbol * p) {
NEW(tokeniser, t);
t->next = 0;
t->p = p;
t->c = 0;
t->line_number = 1;
t->b = create_b(0);
t->b2 = create_b(0);
t->m_start = -1;
t->m_pairs = 0;
t->get_depth = 0;
t->error_count = 0;
t->token_held = false;
t->token = -2;
t->previous_token = -2;
return t;
}
extern void close_tokeniser(struct tokeniser * t) {
lose_b(t->b);
lose_b(t->b2);
{
struct m_pair * q = t->m_pairs;
until (q == 0) {
struct m_pair * q_next = q->next;
lose_b(q->name);
lose_b(q->value);
FREE(q);
q = q_next;
}
}
{
struct input * q = t->next;
until (q == 0) {
struct input * q_next = q->next;
FREE(q);
q = q_next;
}
}
FREE(t);
}
......@@ -36,9 +36,8 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
{
stemmer_encoding_t enc;
struct stemmer_modules * module;
struct sb_stemmer * stemmer =
(struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
if (stemmer == NULL) return NULL;
struct sb_stemmer * stemmer;
enc = sb_getenc(charenc);
if (enc == ENC_UNKNOWN) return NULL;
......@@ -47,6 +46,9 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
}
if (module->name == NULL) return NULL;
stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
if (stemmer == NULL) return NULL;
stemmer->create = module->create;
stemmer->close = module->close;
stemmer->stem = module->stem;
......
......@@ -36,9 +36,8 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
{
stemmer_encoding_t enc;
struct stemmer_modules * module;
struct sb_stemmer * stemmer =
(struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
if (stemmer == NULL) return NULL;
struct sb_stemmer * stemmer;
enc = sb_getenc(charenc);
if (enc == ENC_UNKNOWN) return NULL;
......@@ -47,6 +46,9 @@ sb_stemmer_new(const char * algorithm, const char * charenc)
}
if (module->name == NULL) return NULL;
stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
if (stemmer == NULL) return NULL;
stemmer->create = module->create;
stemmer->close = module->close;
stemmer->stem = module->stem;
......
......@@ -20,7 +20,7 @@
#include "../src_c/stem_UTF_8_french.h"
#include "../src_c/stem_ISO_8859_1_german.h"
#include "../src_c/stem_UTF_8_german.h"
#include "../src_c/stem_ISO_8859_1_hungarian.h"
#include "../src_c/stem_ISO_8859_2_hungarian.h"
#include "../src_c/stem_UTF_8_hungarian.h"
#include "../src_c/stem_ISO_8859_1_italian.h"
#include "../src_c/stem_UTF_8_italian.h"
......@@ -110,11 +110,11 @@ static struct stemmer_modules modules[] = {
{"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
{"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
{"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
{"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
{"hu", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
{"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
{"hun", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
{"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
{"hungarian", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
{"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
{"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
......
......@@ -15,7 +15,7 @@ english UTF_8,ISO_8859_1 english,en,eng
finnish UTF_8,ISO_8859_1 finnish,fi,fin
french UTF_8,ISO_8859_1 french,fr,fre,fra
german UTF_8,ISO_8859_1 german,de,ger,deu
hungarian UTF_8,ISO_8859_1 hungarian,hu,hun
hungarian UTF_8,ISO_8859_2 hungarian,hu,hun
italian UTF_8,ISO_8859_1 italian,it,ita
norwegian UTF_8,ISO_8859_1 norwegian,no,nor
portuguese UTF_8,ISO_8859_1 portuguese,pt,por
......
......@@ -20,7 +20,7 @@ snowball_sources= \
src_c/stem_UTF_8_french.c \
src_c/stem_ISO_8859_1_german.c \
src_c/stem_UTF_8_german.c \
src_c/stem_ISO_8859_1_hungarian.c \
src_c/stem_ISO_8859_2_hungarian.c \
src_c/stem_UTF_8_hungarian.c \
src_c/stem_ISO_8859_1_italian.c \
src_c/stem_UTF_8_italian.c \
......@@ -56,7 +56,7 @@ snowball_headers= \
src_c/stem_UTF_8_french.h \
src_c/stem_ISO_8859_1_german.h \
src_c/stem_UTF_8_german.h \
src_c/stem_ISO_8859_1_hungarian.h \
src_c/stem_ISO_8859_2_hungarian.h \
src_c/stem_UTF_8_hungarian.h \
src_c/stem_ISO_8859_1_italian.h \
src_c/stem_UTF_8_italian.h \
......
......@@ -54,13 +54,13 @@ static const symbol s_1_6[2] = { 'e', 's' };
static const struct among a_1[7] =
{
/* 0 */ { 1, s_1_0, -1, 1, 0},
/* 0 */ { 1, s_1_0, -1, 2, 0},
/* 1 */ { 2, s_1_1, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 2, 0},
/* 3 */ { 3, s_1_3, -1, 1, 0},
/* 4 */ { 2, s_1_4, -1, 1, 0},
/* 5 */ { 1, s_1_5, -1, 2, 0},
/* 6 */ { 2, s_1_6, 5, 1, 0}
/* 5 */ { 1, s_1_5, -1, 3, 0},
/* 6 */ { 2, s_1_6, 5, 2, 0}
};
static const symbol s_2_0[2] = { 'e', 'n' };
......@@ -123,28 +123,30 @@ static const symbol s_7[] = { 'u' };
static const symbol s_8[] = { 'a' };
static const symbol s_9[] = { 'o' };
static const symbol s_10[] = { 'u' };
static const symbol s_11[] = { 'i', 'g' };
static const symbol s_12[] = { 'e' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'e', 'r' };
static const symbol s_15[] = { 'e', 'n' };
static const symbol s_11[] = { 's' };
static const symbol s_12[] = { 'n', 'i', 's' };
static const symbol s_13[] = { 'i', 'g' };
static const symbol s_14[] = { 'e' };
static const symbol s_15[] = { 'e' };
static const symbol s_16[] = { 'e', 'r' };
static const symbol s_17[] = { 'e', 'n' };
static int r_prelude(struct SN_env * z) {
{ int c_test = z->c; /* test, line 30 */
while(1) { /* repeat, line 30 */
{ int c_test = z->c; /* test, line 35 */
while(1) { /* repeat, line 35 */
int c1 = z->c;
{ int c2 = z->c; /* or, line 33 */
z->bra = z->c; /* [, line 32 */
{ int c2 = z->c; /* or, line 38 */
z->bra = z->c; /* [, line 37 */
if (!(eq_s(z, 1, s_0))) goto lab2;
z->ket = z->c; /* ], line 32 */
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */
z->ket = z->c; /* ], line 37 */
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
z->c = c2;
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 33 */
z->c++; /* next, line 38 */
}
lab1:
continue;
......@@ -154,26 +156,26 @@ static int r_prelude(struct SN_env * z) {
}
z->c = c_test;
}
while(1) { /* repeat, line 36 */
while(1) { /* repeat, line 41 */
int c3 = z->c;
while(1) { /* goto, line 36 */
while(1) { /* goto, line 41 */
int c4 = z->c;
if (in_grouping(z, g_v, 97, 252, 0)) goto lab4;
z->bra = z->c; /* [, line 37 */
{ int c5 = z->c; /* or, line 37 */
z->bra = z->c; /* [, line 42 */
{ int c5 = z->c; /* or, line 42 */
if (!(eq_s(z, 1, s_2))) goto lab6;
z->ket = z->c; /* ], line 37 */
z->ket = z->c; /* ], line 42 */
if (in_grouping(z, g_v, 97, 252, 0)) goto lab6;
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = c5;
if (!(eq_s(z, 1, s_4))) goto lab4;
z->ket = z->c; /* ], line 38 */
z->ket = z->c; /* ], line 43 */
if (in_grouping(z, g_v, 97, 252, 0)) goto lab4;
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */
if (ret < 0) return ret;
}
}
......@@ -183,7 +185,7 @@ static int r_prelude(struct SN_env * z) {
lab4:
z->c = c4;
if (z->c >= z->l) goto lab3;
z->c++; /* goto, line 36 */
z->c++; /* goto, line 41 */
}
continue;
lab3:
......@@ -196,81 +198,81 @@ static int r_prelude(struct SN_env * z) {
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c_test = z->c; /* test, line 47 */
{ int c_test = z->c; /* test, line 52 */
{ int ret = z->c + 3;
if (0 > ret || ret > z->l) return 0;
z->c = ret; /* hop, line 47 */
z->c = ret; /* hop, line 52 */
}
z->I[2] = z->c; /* setmark x, line 47 */
z->I[2] = z->c; /* setmark x, line 52 */
z->c = c_test;
}
{ /* gopast */ /* grouping v, line 49 */
{ /* gopast */ /* grouping v, line 54 */
int ret = out_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 49 */
{ /* gopast */ /* non v, line 54 */
int ret = in_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 49 */
/* try, line 50 */
z->I[0] = z->c; /* setmark p1, line 54 */
/* try, line 55 */
if (!(z->I[0] < z->I[2])) goto lab0;
z->I[0] = z->I[2];
lab0:
{ /* gopast */ /* grouping v, line 51 */
{ /* gopast */ /* grouping v, line 56 */
int ret = out_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 51 */
{ /* gopast */ /* non v, line 56 */
int ret = in_grouping(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 51 */
z->I[1] = z->c; /* setmark p2, line 56 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 55 */
while(1) { /* repeat, line 60 */
int c1 = z->c;
z->bra = z->c; /* [, line 57 */
among_var = find_among(z, a_0, 6); /* substring, line 57 */
z->bra = z->c; /* [, line 62 */
among_var = find_among(z, a_0, 6); /* substring, line 62 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 57 */
z->ket = z->c; /* ], line 62 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */
if (ret < 0) return ret;
}
break;
case 6:
if (z->c >= z->l) goto lab0;
z->c++; /* next, line 63 */
z->c++; /* next, line 68 */
break;
}
continue;
......@@ -293,26 +295,42 @@ static int r_R2(struct SN_env * z) {
static int r_standard_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 74 */
z->ket = z->c; /* [, line 75 */
{ int m1 = z->l - z->c; (void)m1; /* do, line 79 */
z->ket = z->c; /* [, line 80 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_1, 7); /* substring, line 75 */
among_var = find_among_b(z, a_1, 7); /* substring, line 80 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 75 */
z->bra = z->c; /* ], line 80 */
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 75 */
if (ret == 0) goto lab0; /* call R1, line 80 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_del(z); /* delete, line 77 */
{ int ret = slice_del(z); /* delete, line 82 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 85 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */
z->ket = z->c; /* [, line 86 */
if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; }
z->bra = z->c; /* ], line 86 */
if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; }
{ int ret = slice_del(z); /* delete, line 86 */
if (ret < 0) return ret;
}
lab1:
;
}
break;
case 3:
if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 80 */
{ int ret = slice_del(z); /* delete, line 89 */
if (ret < 0) return ret;
}
break;
......@@ -320,175 +338,175 @@ static int r_standard_suffix(struct SN_env * z) {
lab0:
z->c = z->l - m1;
}
{ int m2 = z->l - z->c; (void)m2; /* do, line 84 */
z->ket = z->c; /* [, line 85 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
among_var = find_among_b(z, a_2, 4); /* substring, line 85 */
if (!(among_var)) goto lab1;
z->bra = z->c; /* ], line 85 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 93 */
z->ket = z->c; /* [, line 94 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
among_var = find_among_b(z, a_2, 4); /* substring, line 94 */
if (!(among_var)) goto lab2;
z->bra = z->c; /* ], line 94 */
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 85 */
if (ret == 0) goto lab2; /* call R1, line 94 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab1;
case 0: goto lab2;
case 1:
{ int ret = slice_del(z); /* delete, line 87 */
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab1;
if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab2;
{ int ret = z->c - 3;
if (z->lb > ret || ret > z->l) goto lab1;
z->c = ret; /* hop, line 90 */
if (z->lb > ret || ret > z->l) goto lab2;
z->c = ret; /* hop, line 99 */
}
{ int ret = slice_del(z); /* delete, line 90 */
{ int ret = slice_del(z); /* delete, line 99 */
if (ret < 0) return ret;
}
break;
}
lab1:
lab2:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 94 */
z->ket = z->c; /* [, line 95 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
among_var = find_among_b(z, a_4, 8); /* substring, line 95 */
if (!(among_var)) goto lab2;
z->bra = z->c; /* ], line 95 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 103 */
z->ket = z->c; /* [, line 104 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3;
among_var = find_among_b(z, a_4, 8); /* substring, line 104 */
if (!(among_var)) goto lab3;
z->bra = z->c; /* ], line 104 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 95 */
if (ret == 0) goto lab3; /* call R2, line 104 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab2;
case 0: goto lab3;
case 1:
{ int ret = slice_del(z); /* delete, line 97 */
{ int ret = slice_del(z); /* delete, line 106 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */
z->ket = z->c; /* [, line 98 */
if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; }
z->bra = z->c; /* ], line 98 */
{ int m4 = z->l - z->c; (void)m4; /* not, line 98 */
if (!(eq_s_b(z, 1, s_12))) goto lab4;
{ z->c = z->l - m_keep; goto lab3; }
lab4:
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */
z->ket = z->c; /* [, line 107 */
if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; }
z->bra = z->c; /* ], line 107 */
{ int m4 = z->l - z->c; (void)m4; /* not, line 107 */
if (!(eq_s_b(z, 1, s_14))) goto lab5;
{ z->c = z->l - m_keep; goto lab4; }
lab5:
z->c = z->l - m4;
}
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */
if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 98 */
{ int ret = slice_del(z); /* delete, line 107 */
if (ret < 0) return ret;
}
lab3:
lab4:
;
}
break;
case 2:
{ int m5 = z->l - z->c; (void)m5; /* not, line 101 */
if (!(eq_s_b(z, 1, s_13))) goto lab5;
goto lab2;
lab5:
{ int m5 = z->l - z->c; (void)m5; /* not, line 110 */
if (!(eq_s_b(z, 1, s_15))) goto lab6;
goto lab3;
lab6:
z->c = z->l - m5;
}
{ int ret = slice_del(z); /* delete, line 101 */
{ int ret = slice_del(z); /* delete, line 110 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 104 */
{ int ret = slice_del(z); /* delete, line 113 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */
z->ket = z->c; /* [, line 106 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 106 */
if (!(eq_s_b(z, 2, s_14))) goto lab8;
goto lab7;
lab8:
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */
z->ket = z->c; /* [, line 115 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 115 */
if (!(eq_s_b(z, 2, s_16))) goto lab9;
goto lab8;
lab9:
z->c = z->l - m6;
if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; }
if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; }
}
lab7:
z->bra = z->c; /* ], line 106 */
lab8:
z->bra = z->c; /* ], line 115 */
{ int ret = r_R1(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */
if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 106 */
{ int ret = slice_del(z); /* delete, line 115 */
if (ret < 0) return ret;
}
lab6:
lab7:
;
}
break;
case 4:
{ int ret = slice_del(z); /* delete, line 110 */
{ int ret = slice_del(z); /* delete, line 119 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
z->ket = z->c; /* [, line 112 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; }
among_var = find_among_b(z, a_3, 2); /* substring, line 112 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab9; }
z->bra = z->c; /* ], line 112 */
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */
z->ket = z->c; /* [, line 121 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; }
among_var = find_among_b(z, a_3, 2); /* substring, line 121 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab10; }
z->bra = z->c; /* ], line 121 */
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */
if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: { z->c = z->l - m_keep; goto lab9; }
case 0: { z->c = z->l - m_keep; goto lab10; }
case 1:
{ int ret = slice_del(z); /* delete, line 114 */
{ int ret = slice_del(z); /* delete, line 123 */
if (ret < 0) return ret;
}
break;
}
lab9:
lab10:
;
}
break;
}
lab2:
lab3:
z->c = z->l - m3;
}
return 1;
}
extern int german_ISO_8859_1_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 125 */
{ int c1 = z->c; /* do, line 134 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 125 */
if (ret == 0) goto lab0; /* call prelude, line 134 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 126 */
{ int c2 = z->c; /* do, line 135 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 126 */
if (ret == 0) goto lab1; /* call mark_regions, line 135 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 127 */
z->lb = z->c; z->c = z->l; /* backwards, line 136 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 128 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 137 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 128 */
if (ret == 0) goto lab2; /* call standard_suffix, line 137 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
z->c = z->lb;
{ int c4 = z->c; /* do, line 129 */
{ int c4 = z->c; /* do, line 138 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 129 */
if (ret == 0) goto lab3; /* call postlude, line 138 */
if (ret < 0) return ret;
}
lab3:
......
......@@ -6,7 +6,7 @@
#ifdef __cplusplus
extern "C" {
#endif
extern int hungarian_ISO_8859_1_stem(struct SN_env * z);
extern int hungarian_ISO_8859_2_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
......@@ -29,8 +29,8 @@ extern "C" {
#endif
extern struct SN_env * hungarian_ISO_8859_1_create_env(void);
extern void hungarian_ISO_8859_1_close_env(struct SN_env * z);
extern struct SN_env * hungarian_ISO_8859_2_create_env(void);
extern void hungarian_ISO_8859_2_close_env(struct SN_env * z);
#ifdef __cplusplus
......@@ -1137,7 +1137,7 @@ static int r_plur_owner(struct SN_env * z) {
return 1;
}
extern int hungarian_ISO_8859_1_stem(struct SN_env * z) {
extern int hungarian_ISO_8859_2_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 229 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab0; /* call mark_regions, line 229 */
......@@ -1224,7 +1224,7 @@ extern int hungarian_ISO_8859_1_stem(struct SN_env * z) {
return 1;
}
extern struct SN_env * hungarian_ISO_8859_1_create_env(void) { return SN_create_env(0, 1, 0); }
extern struct SN_env * hungarian_ISO_8859_2_create_env(void) { return SN_create_env(0, 1, 0); }
extern void hungarian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }
extern void hungarian_ISO_8859_2_close_env(struct SN_env * z) { SN_close_env(z, 0); }
......@@ -5,10 +5,10 @@
extern "C" {
#endif
extern struct SN_env * hungarian_ISO_8859_1_create_env(void);
extern void hungarian_ISO_8859_1_close_env(struct SN_env * z);
extern struct SN_env * hungarian_ISO_8859_2_create_env(void);
extern void hungarian_ISO_8859_2_close_env(struct SN_env * z);
extern int hungarian_ISO_8859_1_stem(struct SN_env * z);
extern int hungarian_ISO_8859_2_stem(struct SN_env * z);
#ifdef __cplusplus
}
......
......@@ -10,19 +10,14 @@ extern int dutch_UTF_8_stem(struct SN_env * z);
#ifdef __cplusplus
}
#endif
static int r_measure(struct SN_env * z);
static int r_Step_6(struct SN_env * z);
static int r_Step_7(struct SN_env * z);
static int r_Step_4(struct SN_env * z);
static int r_Step_3(struct SN_env * z);
static int r_Step_2(struct SN_env * z);
static int r_Step_1(struct SN_env * z);
static int r_lengthen_V(struct SN_env * z);
static int r_VX(struct SN_env * z);
static int r_V(struct SN_env * z);
static int r_C(struct SN_env * z);
static int r_standard_suffix(struct SN_env * z);
static int r_undouble(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_en_ending(struct SN_env * z);
static int r_e_ending(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
#ifdef __cplusplus
extern "C" {
......@@ -62,210 +57,77 @@ static const struct among a_0[11] =
/* 10 */ { 2, s_0_10, 0, 5, 0}
};
static const symbol s_1_0[3] = { 'n', 'd', 'e' };
static const symbol s_1_1[2] = { 'e', 'n' };
static const symbol s_1_2[4] = { 'i', 'e', 'e', 'n' };
static const symbol s_1_3[4] = { 'a', 'l', 'e', 'n' };
static const symbol s_1_4[3] = { 'v', 'e', 'n' };
static const symbol s_1_5[1] = { 's' };
static const symbol s_1_6[2] = { '\'', 's' };
static const symbol s_1_7[2] = { 'e', 's' };
static const symbol s_1_8[3] = { 'i', 'e', 's' };
static const symbol s_1_9[3] = { 'a', 'u', 's' };
static const symbol s_1_1[1] = { 'I' };
static const symbol s_1_2[1] = { 'Y' };
static const struct among a_1[10] =
static const struct among a_1[3] =
{
/* 0 */ { 3, s_1_0, -1, 10, 0},
/* 1 */ { 2, s_1_1, -1, 9, 0},
/* 2 */ { 4, s_1_2, 1, 7, 0},
/* 3 */ { 4, s_1_3, 1, 6, 0},
/* 4 */ { 3, s_1_4, 1, 8, 0},
/* 5 */ { 1, s_1_5, -1, 2, 0},
/* 6 */ { 2, s_1_6, 5, 1, 0},
/* 7 */ { 2, s_1_7, 5, 4, 0},
/* 8 */ { 3, s_1_8, 7, 3, 0},
/* 9 */ { 3, s_1_9, 5, 5, 0}
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 1, s_1_1, 0, 2, 0},
/* 2 */ { 1, s_1_2, 0, 1, 0}
};
static const symbol s_2_0[2] = { 'd', 'e' };
static const symbol s_2_1[2] = { 'g', 'e' };
static const symbol s_2_2[5] = { 'i', 's', 'c', 'h', 'e' };
static const symbol s_2_3[2] = { 'j', 'e' };
static const symbol s_2_4[5] = { 'l', 'i', 'j', 'k', 'e' };
static const symbol s_2_5[2] = { 'l', 'e' };
static const symbol s_2_6[3] = { 'e', 'n', 'e' };
static const symbol s_2_7[2] = { 'r', 'e' };
static const symbol s_2_8[2] = { 's', 'e' };
static const symbol s_2_9[2] = { 't', 'e' };
static const symbol s_2_10[4] = { 'i', 'e', 'v', 'e' };
static const symbol s_2_0[2] = { 'd', 'd' };
static const symbol s_2_1[2] = { 'k', 'k' };
static const symbol s_2_2[2] = { 't', 't' };
static const struct among a_2[11] =
static const struct among a_2[3] =
{
/* 0 */ { 2, s_2_0, -1, 5, 0},
/* 1 */ { 2, s_2_1, -1, 2, 0},
/* 2 */ { 5, s_2_2, -1, 4, 0},
/* 3 */ { 2, s_2_3, -1, 1, 0},
/* 4 */ { 5, s_2_4, -1, 3, 0},
/* 5 */ { 2, s_2_5, -1, 9, 0},
/* 6 */ { 3, s_2_6, -1, 10, 0},
/* 7 */ { 2, s_2_7, -1, 8, 0},
/* 8 */ { 2, s_2_8, -1, 7, 0},
/* 9 */ { 2, s_2_9, -1, 6, 0},
/* 10 */ { 4, s_2_10, -1, 11, 0}
/* 0 */ { 2, s_2_0, -1, -1, 0},
/* 1 */ { 2, s_2_1, -1, -1, 0},
/* 2 */ { 2, s_2_2, -1, -1, 0}
};
static const symbol s_3_0[4] = { 'h', 'e', 'i', 'd' };
static const symbol s_3_1[3] = { 'f', 'i', 'e' };
static const symbol s_3_2[3] = { 'g', 'i', 'e' };
static const symbol s_3_3[4] = { 'a', 't', 'i', 'e' };
static const symbol s_3_4[4] = { 'i', 's', 'm', 'e' };
static const symbol s_3_5[3] = { 'i', 'n', 'g' };
static const symbol s_3_6[4] = { 'a', 'r', 'i', 'j' };
static const symbol s_3_7[4] = { 'e', 'r', 'i', 'j' };
static const symbol s_3_8[3] = { 's', 'e', 'l' };
static const symbol s_3_9[4] = { 'r', 'd', 'e', 'r' };
static const symbol s_3_10[4] = { 's', 't', 'e', 'r' };
static const symbol s_3_11[5] = { 'i', 't', 'e', 'i', 't' };
static const symbol s_3_12[3] = { 'd', 's', 't' };
static const symbol s_3_13[3] = { 't', 's', 't' };
static const symbol s_3_0[3] = { 'e', 'n', 'e' };
static const symbol s_3_1[2] = { 's', 'e' };
static const symbol s_3_2[2] = { 'e', 'n' };
static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' };
static const symbol s_3_4[1] = { 's' };
static const struct among a_3[14] =
static const struct among a_3[5] =
{
/* 0 */ { 4, s_3_0, -1, 3, 0},
/* 1 */ { 3, s_3_1, -1, 7, 0},
/* 2 */ { 3, s_3_2, -1, 8, 0},
/* 3 */ { 4, s_3_3, -1, 1, 0},
/* 4 */ { 4, s_3_4, -1, 5, 0},
/* 5 */ { 3, s_3_5, -1, 5, 0},
/* 6 */ { 4, s_3_6, -1, 6, 0},
/* 7 */ { 4, s_3_7, -1, 5, 0},
/* 8 */ { 3, s_3_8, -1, 3, 0},
/* 9 */ { 4, s_3_9, -1, 4, 0},
/* 10 */ { 4, s_3_10, -1, 3, 0},
/* 11 */ { 5, s_3_11, -1, 2, 0},
/* 12 */ { 3, s_3_12, -1, 10, 0},
/* 13 */ { 3, s_3_13, -1, 9, 0}
/* 0 */ { 3, s_3_0, -1, 2, 0},
/* 1 */ { 2, s_3_1, -1, 3, 0},
/* 2 */ { 2, s_3_2, -1, 2, 0},
/* 3 */ { 5, s_3_3, 2, 1, 0},
/* 4 */ { 1, s_3_4, -1, 3, 0}
};
static const symbol s_4_0[3] = { 'e', 'n', 'd' };
static const symbol s_4_1[5] = { 'a', 't', 'i', 'e', 'f' };
static const symbol s_4_2[4] = { 'e', 'r', 'i', 'g' };
static const symbol s_4_3[6] = { 'a', 'c', 'h', 't', 'i', 'g' };
static const symbol s_4_4[6] = { 'i', 'o', 'n', 'e', 'e', 'l' };
static const symbol s_4_5[4] = { 'b', 'a', 'a', 'r' };
static const symbol s_4_6[4] = { 'l', 'a', 'a', 'r' };
static const symbol s_4_7[4] = { 'n', 'a', 'a', 'r' };
static const symbol s_4_8[4] = { 'r', 'a', 'a', 'r' };
static const symbol s_4_9[6] = { 'e', 'r', 'i', 'g', 'e', 'r' };
static const symbol s_4_10[8] = { 'a', 'c', 'h', 't', 'i', 'g', 'e', 'r' };
static const symbol s_4_11[6] = { 'l', 'i', 'j', 'k', 'e', 'r' };
static const symbol s_4_12[4] = { 't', 'a', 'n', 't' };
static const symbol s_4_13[6] = { 'e', 'r', 'i', 'g', 's', 't' };
static const symbol s_4_14[8] = { 'a', 'c', 'h', 't', 'i', 'g', 's', 't' };
static const symbol s_4_15[6] = { 'l', 'i', 'j', 'k', 's', 't' };
static const symbol s_4_1[2] = { 'i', 'g' };
static const symbol s_4_2[3] = { 'i', 'n', 'g' };
static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' };
static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' };
static const symbol s_4_5[3] = { 'b', 'a', 'r' };
static const struct among a_4[16] =
static const struct among a_4[6] =
{
/* 0 */ { 3, s_4_0, -1, 10, 0},
/* 1 */ { 5, s_4_1, -1, 2, 0},
/* 2 */ { 4, s_4_2, -1, 10, 0},
/* 3 */ { 6, s_4_3, -1, 9, 0},
/* 4 */ { 6, s_4_4, -1, 1, 0},
/* 5 */ { 4, s_4_5, -1, 3, 0},
/* 6 */ { 4, s_4_6, -1, 5, 0},
/* 7 */ { 4, s_4_7, -1, 4, 0},
/* 8 */ { 4, s_4_8, -1, 6, 0},
/* 9 */ { 6, s_4_9, -1, 10, 0},
/* 10 */ { 8, s_4_10, -1, 9, 0},
/* 11 */ { 6, s_4_11, -1, 8, 0},
/* 12 */ { 4, s_4_12, -1, 7, 0},
/* 13 */ { 6, s_4_13, -1, 10, 0},
/* 14 */ { 8, s_4_14, -1, 9, 0},
/* 15 */ { 6, s_4_15, -1, 8, 0}
/* 0 */ { 3, s_4_0, -1, 1, 0},
/* 1 */ { 2, s_4_1, -1, 2, 0},
/* 2 */ { 3, s_4_2, -1, 1, 0},
/* 3 */ { 4, s_4_3, -1, 3, 0},
/* 4 */ { 4, s_4_4, -1, 4, 0},
/* 5 */ { 3, s_4_5, -1, 5, 0}
};
static const symbol s_5_0[2] = { 'i', 'g' };
static const symbol s_5_1[4] = { 'i', 'g', 'e', 'r' };
static const symbol s_5_2[4] = { 'i', 'g', 's', 't' };
static const symbol s_5_0[2] = { 'a', 'a' };
static const symbol s_5_1[2] = { 'e', 'e' };
static const symbol s_5_2[2] = { 'o', 'o' };
static const symbol s_5_3[2] = { 'u', 'u' };
static const struct among a_5[3] =
static const struct among a_5[4] =
{
/* 0 */ { 2, s_5_0, -1, 1, 0},
/* 1 */ { 4, s_5_1, -1, 1, 0},
/* 2 */ { 4, s_5_2, -1, 1, 0}
/* 0 */ { 2, s_5_0, -1, -1, 0},
/* 1 */ { 2, s_5_1, -1, -1, 0},
/* 2 */ { 2, s_5_2, -1, -1, 0},
/* 3 */ { 2, s_5_3, -1, -1, 0}
};
static const symbol s_6_0[3] = { 'e', 'e', 'e' };
static const symbol s_6_1[3] = { 'i', 'e', 'e' };
static const symbol s_6_2[2] = { 'f', 't' };
static const symbol s_6_3[2] = { 'k', 't' };
static const symbol s_6_4[2] = { 'p', 't' };
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const struct among a_6[5] =
{
/* 0 */ { 3, s_6_0, -1, 2, 0},
/* 1 */ { 3, s_6_1, -1, 1, 0},
/* 2 */ { 2, s_6_2, -1, 4, 0},
/* 3 */ { 2, s_6_3, -1, 3, 0},
/* 4 */ { 2, s_6_4, -1, 5, 0}
};
static const symbol s_7_0[2] = { 'b', 'b' };
static const symbol s_7_1[2] = { 'c', 'c' };
static const symbol s_7_2[2] = { 'd', 'd' };
static const symbol s_7_3[2] = { 'f', 'f' };
static const symbol s_7_4[2] = { 'g', 'g' };
static const symbol s_7_5[2] = { 'h', 'h' };
static const symbol s_7_6[2] = { 'j', 'j' };
static const symbol s_7_7[2] = { 'k', 'k' };
static const symbol s_7_8[2] = { 'l', 'l' };
static const symbol s_7_9[2] = { 'm', 'm' };
static const symbol s_7_10[2] = { 'n', 'n' };
static const symbol s_7_11[2] = { 'p', 'p' };
static const symbol s_7_12[2] = { 'q', 'q' };
static const symbol s_7_13[2] = { 'r', 'r' };
static const symbol s_7_14[2] = { 's', 's' };
static const symbol s_7_15[2] = { 't', 't' };
static const symbol s_7_16[1] = { 'v' };
static const symbol s_7_17[2] = { 'v', 'v' };
static const symbol s_7_18[2] = { 'w', 'w' };
static const symbol s_7_19[2] = { 'x', 'x' };
static const symbol s_7_20[1] = { 'z' };
static const symbol s_7_21[2] = { 'z', 'z' };
static const struct among a_7[22] =
{
/* 0 */ { 2, s_7_0, -1, 1, 0},
/* 1 */ { 2, s_7_1, -1, 2, 0},
/* 2 */ { 2, s_7_2, -1, 3, 0},
/* 3 */ { 2, s_7_3, -1, 4, 0},
/* 4 */ { 2, s_7_4, -1, 5, 0},
/* 5 */ { 2, s_7_5, -1, 6, 0},
/* 6 */ { 2, s_7_6, -1, 7, 0},
/* 7 */ { 2, s_7_7, -1, 8, 0},
/* 8 */ { 2, s_7_8, -1, 9, 0},
/* 9 */ { 2, s_7_9, -1, 10, 0},
/* 10 */ { 2, s_7_10, -1, 11, 0},
/* 11 */ { 2, s_7_11, -1, 12, 0},
/* 12 */ { 2, s_7_12, -1, 13, 0},
/* 13 */ { 2, s_7_13, -1, 14, 0},
/* 14 */ { 2, s_7_14, -1, 15, 0},
/* 15 */ { 2, s_7_15, -1, 16, 0},
/* 16 */ { 1, s_7_16, -1, 21, 0},
/* 17 */ { 2, s_7_17, 16, 17, 0},
/* 18 */ { 2, s_7_18, -1, 18, 0},
/* 19 */ { 2, s_7_19, -1, 19, 0},
/* 20 */ { 1, s_7_20, -1, 22, 0},
/* 21 */ { 2, s_7_21, 20, 20, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 1 };
static const unsigned char g_v_WX[] = { 17, 65, 208, 1 };
static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const unsigned char g_AOU[] = { 1, 64, 16 };
static const unsigned char g_AIOU[] = { 1, 65, 16 };
static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
static const symbol s_0[] = { 'a' };
static const symbol s_1[] = { 'e' };
......@@ -278,136 +140,59 @@ static const symbol s_7[] = { 'i' };
static const symbol s_8[] = { 'I' };
static const symbol s_9[] = { 'y' };
static const symbol s_10[] = { 'Y' };
static const symbol s_11[] = { 'i', 'j' };
static const symbol s_12[] = { 'i', 'j' };
static const symbol s_13[] = { 'i', 'j' };
static const symbol s_14[] = { 'e' };
static const symbol s_15[] = { 't' };
static const symbol s_16[] = { 'i', 'e' };
static const symbol s_17[] = { 'a', 'r' };
static const symbol s_18[] = { 'e', 'r' };
static const symbol s_19[] = { 'e' };
static const symbol s_20[] = { 'a', 'u' };
static const symbol s_21[] = { 'a', 'a', 'l' };
static const symbol s_22[] = { 'i', 'e' };
static const symbol s_23[] = { 'f' };
static const symbol s_24[] = { 'h', 'e', 'd' };
static const symbol s_25[] = { 'h', 'e', 'i', 'd' };
static const symbol s_26[] = { 'n', 'd' };
static const symbol s_27[] = { 'd' };
static const symbol s_28[] = { 'i' };
static const symbol s_29[] = { 'j' };
static const symbol s_30[] = { 'n', 'd' };
static const symbol s_31[] = { '\'', 't' };
static const symbol s_32[] = { 'e', 't' };
static const symbol s_33[] = { 'r', 'n', 't' };
static const symbol s_34[] = { 'r', 'n' };
static const symbol s_35[] = { 't' };
static const symbol s_36[] = { 'i', 'n', 'k' };
static const symbol s_37[] = { 'i', 'n', 'g' };
static const symbol s_38[] = { 'm', 'p' };
static const symbol s_39[] = { 'm' };
static const symbol s_40[] = { '\'' };
static const symbol s_41[] = { 'g' };
static const symbol s_42[] = { 'l', 'i', 'j', 'k' };
static const symbol s_43[] = { 'i', 's', 'c', 'h' };
static const symbol s_44[] = { 't' };
static const symbol s_45[] = { 's' };
static const symbol s_46[] = { 'r' };
static const symbol s_47[] = { 'l' };
static const symbol s_48[] = { 'e', 'n' };
static const symbol s_49[] = { 'i', 'e', 'f' };
static const symbol s_50[] = { 'e', 'e', 'r' };
static const symbol s_51[] = { 'r' };
static const symbol s_52[] = { 'a', 'a', 'r' };
static const symbol s_53[] = { 'f' };
static const symbol s_54[] = { 'g' };
static const symbol s_55[] = { 't' };
static const symbol s_56[] = { 'd' };
static const symbol s_57[] = { 'i', 'e' };
static const symbol s_58[] = { 'e', 'e', 'r' };
static const symbol s_59[] = { 'n' };
static const symbol s_60[] = { 'l' };
static const symbol s_61[] = { 'r' };
static const symbol s_62[] = { 't', 'e', 'e', 'r' };
static const symbol s_63[] = { 'l', 'i', 'j', 'k' };
static const symbol s_64[] = { 'i', 'e' };
static const symbol s_65[] = { 'e', 'e' };
static const symbol s_66[] = { 'k' };
static const symbol s_67[] = { 'f' };
static const symbol s_68[] = { 'p' };
static const symbol s_69[] = { 'b' };
static const symbol s_70[] = { 'c' };
static const symbol s_71[] = { 'd' };
static const symbol s_72[] = { 'f' };
static const symbol s_73[] = { 'g' };
static const symbol s_74[] = { 'h' };
static const symbol s_75[] = { 'j' };
static const symbol s_76[] = { 'k' };
static const symbol s_77[] = { 'l' };
static const symbol s_78[] = { 'm' };
static const symbol s_79[] = { 'n' };
static const symbol s_80[] = { 'p' };
static const symbol s_81[] = { 'q' };
static const symbol s_82[] = { 'r' };
static const symbol s_83[] = { 's' };
static const symbol s_84[] = { 't' };
static const symbol s_85[] = { 'v' };
static const symbol s_86[] = { 'w' };
static const symbol s_87[] = { 'x' };
static const symbol s_88[] = { 'z' };
static const symbol s_89[] = { 'f' };
static const symbol s_90[] = { 's' };
static const symbol s_91[] = { 'i', 'j' };
static const symbol s_92[] = { 'i', 'j' };
static const symbol s_93[] = { 'y' };
static const symbol s_94[] = { 'Y' };
static const symbol s_95[] = { 'y' };
static const symbol s_96[] = { 'Y' };
static const symbol s_97[] = { 'Y' };
static const symbol s_98[] = { 'y' };
static const symbol s_11[] = { 'y' };
static const symbol s_12[] = { 'i' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'g', 'e', 'm' };
static const symbol s_15[] = { 'h', 'e', 'i', 'd' };
static const symbol s_16[] = { 'h', 'e', 'i', 'd' };
static const symbol s_17[] = { 'c' };
static const symbol s_18[] = { 'e', 'n' };
static const symbol s_19[] = { 'i', 'g' };
static const symbol s_20[] = { 'e' };
static const symbol s_21[] = { 'e' };
static int r_prelude(struct SN_env * z) {
int among_var;
{ int c_test = z->c; /* test, line 49 */
while(1) { /* repeat, line 49 */
{ int c_test = z->c; /* test, line 42 */
while(1) { /* repeat, line 42 */
int c1 = z->c;
z->bra = z->c; /* [, line 50 */
z->bra = z->c; /* [, line 43 */
if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else
among_var = find_among(z, a_0, 11); /* substring, line 50 */
among_var = find_among(z, a_0, 11); /* substring, line 43 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 50 */
z->ket = z->c; /* ], line 43 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_0); /* <-, line 52 */
{ int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 54 */
{ int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_2); /* <-, line 56 */
{ int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 58 */
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_4); /* <-, line 60 */
{ int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 61 */
z->c = ret; /* next, line 54 */
}
break;
}
......@@ -418,35 +203,35 @@ static int r_prelude(struct SN_env * z) {
}
z->c = c_test;
}
{ int c_keep = z->c; /* try, line 64 */
z->bra = z->c; /* [, line 64 */
{ int c_keep = z->c; /* try, line 57 */
z->bra = z->c; /* [, line 57 */
if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; }
z->ket = z->c; /* ], line 64 */
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 64 */
z->ket = z->c; /* ], line 57 */
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */
if (ret < 0) return ret;
}
lab1:
;
}
while(1) { /* repeat, line 65 */
while(1) { /* repeat, line 58 */
int c2 = z->c;
while(1) { /* goto, line 65 */
while(1) { /* goto, line 58 */
int c3 = z->c;
if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3;
z->bra = z->c; /* [, line 66 */
{ int c4 = z->c; /* or, line 66 */
if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3;
z->bra = z->c; /* [, line 59 */
{ int c4 = z->c; /* or, line 59 */
if (!(eq_s(z, 1, s_7))) goto lab5;
z->ket = z->c; /* ], line 66 */
if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab5;
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 66 */
z->ket = z->c; /* ], line 59 */
if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5;
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */
if (ret < 0) return ret;
}
goto lab4;
lab5:
z->c = c4;
if (!(eq_s(z, 1, s_9))) goto lab3;
z->ket = z->c; /* ], line 67 */
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */
z->ket = z->c; /* ], line 60 */
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */
if (ret < 0) return ret;
}
}
......@@ -457,7 +242,7 @@ static int r_prelude(struct SN_env * z) {
z->c = c3;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab2;
z->c = ret; /* goto, line 65 */
z->c = ret; /* goto, line 58 */
}
}
continue;
......@@ -468,1240 +253,382 @@ static int r_prelude(struct SN_env * z) {
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ /* gopast */ /* grouping v, line 69 */
int ret = out_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 69 */
int ret = in_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 69 */
/* try, line 70 */
if (!(z->I[0] < 3)) goto lab0;
z->I[0] = 3;
lab0:
{ /* gopast */ /* grouping v, line 71 */
int ret = out_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 71 */
int ret = in_grouping_U(z, g_v, 97, 232, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 71 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 75 */
int c1 = z->c;
z->bra = z->c; /* [, line 77 */
if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else
among_var = find_among(z, a_1, 3); /* substring, line 77 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 77 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 80 */
}
break;
}
continue;
lab0:
z->c = c1;
break;
}
return 1;
}
static int r_R1(struct SN_env * z) {
z->I[0] = z->c; /* setmark x, line 74 */
if (!(z->I[0] >= z->I[1])) return 0;
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
z->I[0] = z->c; /* setmark x, line 75 */
if (!(z->I[0] >= z->I[2])) return 0;
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_V(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 77 */
{ int m1 = z->l - z->c; (void)m1; /* or, line 77 */
if (in_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_11))) return 0;
}
lab0:
static int r_undouble(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 91 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */
z->c = z->l - m_test;
}
return 1;
}
static int r_VX(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 78 */
z->ket = z->c; /* [, line 91 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) return 0;
z->c = ret; /* next, line 78 */
z->c = ret; /* next, line 91 */
}
{ int m1 = z->l - z->c; (void)m1; /* or, line 78 */
if (in_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1;
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_12))) return 0;
}
lab0:
z->c = z->l - m_test;
z->bra = z->c; /* ], line 91 */
{ int ret = slice_del(z); /* delete, line 91 */
if (ret < 0) return ret;
}
return 1;
}
static int r_C(struct SN_env * z) {
{ int m_test = z->l - z->c; /* test, line 79 */
{ int m1 = z->l - z->c; (void)m1; /* not, line 79 */
if (!(eq_s_b(z, 2, s_13))) goto lab0;
return 0;
lab0:
z->c = z->l - m1;
static int r_e_ending(struct SN_env * z) {
z->B[0] = 0; /* unset e_found, line 95 */
z->ket = z->c; /* [, line 96 */
if (!(eq_s_b(z, 1, s_13))) return 0;
z->bra = z->c; /* ], line 96 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 96 */
if (ret < 0) return ret;
}
if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0;
{ int m_test = z->l - z->c; /* test, line 96 */
if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
z->c = z->l - m_test;
}
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set e_found, line 97 */
{ int ret = r_undouble(z);
if (ret == 0) return 0; /* call undouble, line 98 */
if (ret < 0) return ret;
}
return 1;
}
static int r_lengthen_V(struct SN_env * z) {
{ int m1 = z->l - z->c; (void)m1; /* do, line 81 */
if (out_grouping_b_U(z, g_v_WX, 97, 121, 0)) goto lab0;
z->ket = z->c; /* [, line 82 */
{ int m2 = z->l - z->c; (void)m2; /* or, line 82 */
if (in_grouping_b_U(z, g_AOU, 97, 117, 0)) goto lab2;
z->bra = z->c; /* ], line 82 */
{ int m_test = z->l - z->c; /* test, line 82 */
{ int m3 = z->l - z->c; (void)m3; /* or, line 82 */
if (out_grouping_b_U(z, g_v, 97, 121, 0)) goto lab4;
goto lab3;
lab4:
z->c = z->l - m3;
if (z->c > z->lb) goto lab2; /* atlimit, line 82 */
}
lab3:
z->c = z->l - m_test;
static int r_en_ending(struct SN_env * z) {
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 102 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
{ int m1 = z->l - z->c; (void)m1; /* and, line 102 */
if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0;
z->c = z->l - m1;
{ int m2 = z->l - z->c; (void)m2; /* not, line 102 */
if (!(eq_s_b(z, 3, s_14))) goto lab0;
return 0;
lab0:
z->c = z->l - m2;
if (!(eq_s_b(z, 1, s_14))) goto lab0;
z->bra = z->c; /* ], line 83 */
{ int m_test = z->l - z->c; /* test, line 83 */
{ int m4 = z->l - z->c; (void)m4; /* or, line 83 */
if (out_grouping_b_U(z, g_v, 97, 121, 0)) goto lab6;
goto lab5;
lab6:
z->c = z->l - m4;
if (z->c > z->lb) goto lab0; /* atlimit, line 83 */
}
lab5:
{ int m5 = z->l - z->c; (void)m5; /* not, line 84 */
if (in_grouping_b_U(z, g_AIOU, 97, 117, 0)) goto lab7;
goto lab0;
lab7:
z->c = z->l - m5;
}
{ int m6 = z->l - z->c; (void)m6; /* not, line 85 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) goto lab8;
z->c = ret; /* next, line 85 */
}
if (in_grouping_b_U(z, g_AIOU, 97, 117, 0)) goto lab8;
if (out_grouping_b_U(z, g_v, 97, 121, 0)) goto lab8;
goto lab0;
lab8:
z->c = z->l - m6;
}
z->c = z->l - m_test;
}
}
lab1:
z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 86 */
if (z->S[0] == 0) return -1; /* -> ch, line 86 */
{ int c_keep = z->c;
int ret = insert_v(z, z->c, z->c, z->S[0]); /* <+ ch, line 86 */
z->c = c_keep;
{ int ret = slice_del(z); /* delete, line 102 */
if (ret < 0) return ret;
}
lab0:
z->c = z->l - m1;
{ int ret = r_undouble(z);
if (ret == 0) return 0; /* call undouble, line 103 */
if (ret < 0) return ret;
}
return 1;
}
static int r_Step_1(struct SN_env * z) {
static int r_standard_suffix(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 91 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_1, 10); /* among, line 91 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 91 */
{ int m1 = z->l - z->c; (void)m1; /* do, line 107 */
z->ket = z->c; /* [, line 108 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_3, 5); /* substring, line 108 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 108 */
switch(among_var) {
case 0: return 0;
case 0: goto lab0;
case 1:
{ int ret = slice_del(z); /* delete, line 93 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 94 */
if (ret < 0) return ret;
}
{ int m1 = z->l - z->c; (void)m1; /* not, line 94 */
if (!(eq_s_b(z, 1, s_15))) goto lab0;
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 94 */
if (ret == 0) goto lab0; /* call R1, line 110 */
if (ret < 0) return ret;
}
return 0;
lab0:
z->c = z->l - m1;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 94 */
{ int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 94 */
break;
case 2:
{ int ret = r_en_ending(z);
if (ret == 0) goto lab0; /* call en_ending, line 113 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 95 */
if (ret == 0) goto lab0; /* call R1, line 116 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 2, s_16); /* <-, line 95 */
if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 116 */
if (ret < 0) return ret;
}
break;
case 4:
{ int m2 = z->l - z->c; (void)m2; /* or, line 97 */
if (!(eq_s_b(z, 2, s_17))) goto lab2;
{ int ret = r_R1(z);
if (ret == 0) goto lab2; /* call R1, line 97 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) goto lab2; /* call C, line 97 */
if (ret < 0) return ret;
}
z->bra = z->c; /* ], line 97 */
{ int ret = slice_del(z); /* delete, line 97 */
if (ret < 0) return ret;
lab0:
z->c = z->l - m1;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) goto lab2; /* call lengthen_V, line 97 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 120 */
{ int ret = r_e_ending(z);
if (ret == 0) goto lab1; /* call e_ending, line 120 */
if (ret < 0) return ret;
}
goto lab1;
lab2:
lab1:
z->c = z->l - m2;
if (!(eq_s_b(z, 2, s_18))) goto lab3;
{ int ret = r_R1(z);
if (ret == 0) goto lab3; /* call R1, line 98 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) goto lab3; /* call C, line 98 */
if (ret < 0) return ret;
}
z->bra = z->c; /* ], line 98 */
{ int ret = slice_del(z); /* delete, line 98 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 122 */
z->ket = z->c; /* [, line 122 */
if (!(eq_s_b(z, 4, s_16))) goto lab2;
z->bra = z->c; /* ], line 122 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 122 */
if (ret < 0) return ret;
}
goto lab1;
{ int m4 = z->l - z->c; (void)m4; /* not, line 122 */
if (!(eq_s_b(z, 1, s_17))) goto lab3;
goto lab2;
lab3:
z->c = z->l - m2;
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 99 */
if (ret < 0) return ret;
z->c = z->l - m4;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 99 */
{ int ret = slice_del(z); /* delete, line 122 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_19); /* <-, line 99 */
z->ket = z->c; /* [, line 123 */
if (!(eq_s_b(z, 2, s_18))) goto lab2;
z->bra = z->c; /* ], line 123 */
{ int ret = r_en_ending(z);
if (ret == 0) goto lab2; /* call en_ending, line 123 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
lab1:
break;
case 5:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 101 */
{ int m5 = z->l - z->c; (void)m5; /* do, line 126 */
z->ket = z->c; /* [, line 127 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4;
among_var = find_among_b(z, a_4, 6); /* substring, line 127 */
if (!(among_var)) goto lab4;
z->bra = z->c; /* ], line 127 */
switch(among_var) {
case 0: goto lab4;
case 1:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 129 */
if (ret < 0) return ret;
}
{ int ret = r_V(z);
if (ret == 0) return 0; /* call V, line 101 */
{ int ret = slice_del(z); /* delete, line 129 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 2, s_20); /* <-, line 101 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 130 */
z->ket = z->c; /* [, line 130 */
if (!(eq_s_b(z, 2, s_19))) goto lab6;
z->bra = z->c; /* ], line 130 */
{ int ret = r_R2(z);
if (ret == 0) goto lab6; /* call R2, line 130 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 102 */
if (ret < 0) return ret;
{ int m7 = z->l - z->c; (void)m7; /* not, line 130 */
if (!(eq_s_b(z, 1, s_20))) goto lab7;
goto lab6;
lab7:
z->c = z->l - m7;
}
{ int ret = slice_from_s(z, 3, s_21); /* <-, line 102 */
{ int ret = slice_del(z); /* delete, line 130 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 103 */
goto lab5;
lab6:
z->c = z->l - m6;
{ int ret = r_undouble(z);
if (ret == 0) goto lab4; /* call undouble, line 130 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 2, s_22); /* <-, line 103 */
if (ret < 0) return ret;
}
lab5:
break;
case 8:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 104 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_23); /* <-, line 104 */
case 2:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 133 */
if (ret < 0) return ret;
}
break;
case 9:
{ int m3 = z->l - z->c; (void)m3; /* or, line 105 */
if (!(eq_s_b(z, 3, s_24))) goto lab5;
{ int ret = r_R1(z);
if (ret == 0) goto lab5; /* call R1, line 105 */
if (ret < 0) return ret;
{ int m8 = z->l - z->c; (void)m8; /* not, line 133 */
if (!(eq_s_b(z, 1, s_21))) goto lab8;
goto lab4;
lab8:
z->c = z->l - m8;
}
z->bra = z->c; /* ], line 105 */
{ int ret = slice_from_s(z, 4, s_25); /* <-, line 105 */
{ int ret = slice_del(z); /* delete, line 133 */
if (ret < 0) return ret;
}
goto lab4;
lab5:
z->c = z->l - m3;
if (!(eq_s_b(z, 2, s_26))) goto lab6;
{ int ret = slice_del(z); /* delete, line 106 */
break;
case 3:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 136 */
if (ret < 0) return ret;
}
goto lab4;
lab6:
z->c = z->l - m3;
if (!(eq_s_b(z, 1, s_27))) goto lab7;
{ int ret = r_R1(z);
if (ret == 0) goto lab7; /* call R1, line 107 */
{ int ret = slice_del(z); /* delete, line 136 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) goto lab7; /* call C, line 107 */
{ int ret = r_e_ending(z);
if (ret == 0) goto lab4; /* call e_ending, line 136 */
if (ret < 0) return ret;
}
z->bra = z->c; /* ], line 107 */
{ int ret = slice_del(z); /* delete, line 107 */
break;
case 4:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 139 */
if (ret < 0) return ret;
}
goto lab4;
lab7:
z->c = z->l - m3;
{ int m4 = z->l - z->c; (void)m4; /* or, line 108 */
if (!(eq_s_b(z, 1, s_28))) goto lab10;
goto lab9;
lab10:
z->c = z->l - m4;
if (!(eq_s_b(z, 1, s_29))) goto lab8;
}
lab9:
{ int ret = r_V(z);
if (ret == 0) goto lab8; /* call V, line 108 */
{ int ret = slice_del(z); /* delete, line 139 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 108 */
break;
case 5:
{ int ret = r_R2(z);
if (ret == 0) goto lab4; /* call R2, line 142 */
if (ret < 0) return ret;
}
goto lab4;
lab8:
z->c = z->l - m3;
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 109 */
if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */
{ int ret = slice_del(z); /* delete, line 142 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 109 */
if (ret < 0) return ret;
break;
}
{ int ret = slice_del(z); /* delete, line 109 */
if (ret < 0) return ret;
lab4:
z->c = z->l - m5;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 109 */
if (ret < 0) return ret;
{ int m9 = z->l - z->c; (void)m9; /* do, line 146 */
if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9;
{ int m_test = z->l - z->c; /* test, line 148 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9;
if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */
if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9;
z->c = z->l - m_test;
}
z->ket = z->c; /* [, line 152 */
{ int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
if (ret < 0) goto lab9;
z->c = ret; /* next, line 152 */
}
lab4:
break;
case 10:
{ int ret = slice_from_s(z, 2, s_30); /* <-, line 110 */
z->bra = z->c; /* ], line 152 */
{ int ret = slice_del(z); /* delete, line 152 */
if (ret < 0) return ret;
}
break;
lab9:
z->c = z->l - m9;
}
return 1;
}
static int r_Step_2(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 116 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 101) return 0;
among_var = find_among_b(z, a_2, 11); /* among, line 116 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 116 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m1 = z->l - z->c; (void)m1; /* or, line 117 */
if (!(eq_s_b(z, 2, s_31))) goto lab1;
z->bra = z->c; /* ], line 117 */
{ int ret = slice_del(z); /* delete, line 117 */
if (ret < 0) return ret;
}
goto lab0;
lab1:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_32))) goto lab2;
z->bra = z->c; /* ], line 118 */
{ int ret = r_R1(z);
if (ret == 0) goto lab2; /* call R1, line 118 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) goto lab2; /* call C, line 118 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 118 */
extern int dutch_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 159 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 159 */
if (ret < 0) return ret;
}
goto lab0;
lab2:
z->c = z->l - m1;
if (!(eq_s_b(z, 3, s_33))) goto lab3;
z->bra = z->c; /* ], line 119 */
{ int ret = slice_from_s(z, 2, s_34); /* <-, line 119 */
if (ret < 0) return ret;
lab0:
z->c = c1;
}
goto lab0;
lab3:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_35))) goto lab4;
z->bra = z->c; /* ], line 120 */
{ int ret = r_R1(z);
if (ret == 0) goto lab4; /* call R1, line 120 */
{ int c2 = z->c; /* do, line 160 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 160 */
if (ret < 0) return ret;
}
{ int ret = r_VX(z);
if (ret == 0) goto lab4; /* call VX, line 120 */
if (ret < 0) return ret;
lab1:
z->c = c2;
}
{ int ret = slice_del(z); /* delete, line 120 */
z->lb = z->c; z->c = z->l; /* backwards, line 161 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 162 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 162 */
if (ret < 0) return ret;
}
goto lab0;
lab4:
z->c = z->l - m1;
if (!(eq_s_b(z, 3, s_36))) goto lab5;
z->bra = z->c; /* ], line 121 */
{ int ret = slice_from_s(z, 3, s_37); /* <-, line 121 */
if (ret < 0) return ret;
lab2:
z->c = z->l - m3;
}
goto lab0;
lab5:
z->c = z->l - m1;
if (!(eq_s_b(z, 2, s_38))) goto lab6;
z->bra = z->c; /* ], line 122 */
{ int ret = slice_from_s(z, 1, s_39); /* <-, line 122 */
z->c = z->lb;
{ int c4 = z->c; /* do, line 163 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 163 */
if (ret < 0) return ret;
}
goto lab0;
lab6:
z->c = z->l - m1;
if (!(eq_s_b(z, 1, s_40))) goto lab7;
z->bra = z->c; /* ], line 123 */
{ int ret = r_R1(z);
if (ret == 0) goto lab7; /* call R1, line 123 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 123 */
if (ret < 0) return ret;
}
goto lab0;
lab7:
z->c = z->l - m1;
z->bra = z->c; /* ], line 124 */
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 124 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 124 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 124 */
if (ret < 0) return ret;
}
}
lab0:
break;
case 2:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 125 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_41); /* <-, line 125 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 126 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 4, s_42); /* <-, line 126 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 127 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 4, s_43); /* <-, line 127 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 128 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 128 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 128 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 129 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_44); /* <-, line 129 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 130 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_45); /* <-, line 130 */
if (ret < 0) return ret;
}
break;
case 8:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 131 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_46); /* <-, line 131 */
if (ret < 0) return ret;
}
break;
case 9:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 132 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 132 */
if (ret < 0) return ret;
}
{ int ret = insert_s(z, z->c, z->c, 1, s_47); /* attach, line 132 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 132 */
if (ret < 0) return ret;
}
break;
case 10:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 133 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 133 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 133 */
if (ret < 0) return ret;
}
{ int ret = insert_s(z, z->c, z->c, 2, s_48); /* attach, line 133 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 133 */
if (ret < 0) return ret;
}
break;
case 11:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 134 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 134 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 3, s_49); /* <-, line 134 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_3(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 140 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1316016 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_3, 14); /* among, line 140 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 140 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 141 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 3, s_50); /* <-, line 141 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 142 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 142 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 142 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 145 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 145 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_51); /* <-, line 146 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 149 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 149 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 149 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 150 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 150 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 3, s_52); /* <-, line 150 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 151 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 151 */
if (ret < 0) return ret;
}
{ int ret = insert_s(z, z->c, z->c, 1, s_53); /* attach, line 151 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 151 */
if (ret < 0) return ret;
}
break;
case 8:
{ int ret = r_R2(z);
if (ret == 0) return 0; /* call R2, line 152 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 152 */
if (ret < 0) return ret;
}
{ int ret = insert_s(z, z->c, z->c, 1, s_54); /* attach, line 152 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 152 */
if (ret < 0) return ret;
}
break;
case 9:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 153 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 153 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_55); /* <-, line 153 */
if (ret < 0) return ret;
}
break;
case 10:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 154 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 154 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_56); /* <-, line 154 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_4(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* or, line 179 */
z->ket = z->c; /* [, line 160 */
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1315024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
among_var = find_among_b(z, a_4, 16); /* among, line 160 */
if (!(among_var)) goto lab1;
z->bra = z->c; /* ], line 160 */
switch(among_var) {
case 0: goto lab1;
case 1:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 161 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 2, s_57); /* <-, line 161 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 162 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 3, s_58); /* <-, line 162 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 163 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 163 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 164 */
if (ret < 0) return ret;
}
{ int ret = r_V(z);
if (ret == 0) goto lab1; /* call V, line 164 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_59); /* <-, line 164 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 165 */
if (ret < 0) return ret;
}
{ int ret = r_V(z);
if (ret == 0) goto lab1; /* call V, line 165 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_60); /* <-, line 165 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 166 */
if (ret < 0) return ret;
}
{ int ret = r_V(z);
if (ret == 0) goto lab1; /* call V, line 166 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 1, s_61); /* <-, line 166 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 167 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 4, s_62); /* <-, line 167 */
if (ret < 0) return ret;
}
break;
case 8:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 169 */
if (ret < 0) return ret;
}
{ int ret = slice_from_s(z, 4, s_63); /* <-, line 169 */
if (ret < 0) return ret;
}
break;
case 9:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 172 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 172 */
if (ret < 0) return ret;
}
break;
case 10:
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 176 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) goto lab1; /* call C, line 176 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 176 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) goto lab1; /* call lengthen_V, line 176 */
if (ret < 0) return ret;
}
break;
}
goto lab0;
lab1:
z->c = z->l - m1;
z->ket = z->c; /* [, line 180 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1310848 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_5, 3); /* among, line 180 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 180 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = r_R1(z);
if (ret == 0) return 0; /* call R1, line 183 */
if (ret < 0) return ret;
}
{ int ret = r_C(z);
if (ret == 0) return 0; /* call C, line 183 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 183 */
if (ret < 0) return ret;
}
{ int ret = r_lengthen_V(z);
if (ret == 0) return 0; /* call lengthen_V, line 183 */
if (ret < 0) return ret;
}
break;
}
}
lab0:
return 1;
}
static int r_Step_7(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 190 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 116)) return 0;
among_var = find_among_b(z, a_6, 5); /* among, line 190 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 190 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 2, s_64); /* <-, line 191 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 2, s_65); /* <-, line 192 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_66); /* <-, line 193 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_67); /* <-, line 194 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_68); /* <-, line 195 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_Step_6(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 201 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((98532828 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_7, 22); /* among, line 201 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 201 */
switch(among_var) {
case 0: return 0;
case 1:
{ int ret = slice_from_s(z, 1, s_69); /* <-, line 202 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_70); /* <-, line 203 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_71); /* <-, line 204 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_72); /* <-, line 205 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_73); /* <-, line 206 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = slice_from_s(z, 1, s_74); /* <-, line 207 */
if (ret < 0) return ret;
}
break;
case 7:
{ int ret = slice_from_s(z, 1, s_75); /* <-, line 208 */
if (ret < 0) return ret;
}
break;
case 8:
{ int ret = slice_from_s(z, 1, s_76); /* <-, line 209 */
if (ret < 0) return ret;
}
break;
case 9:
{ int ret = slice_from_s(z, 1, s_77); /* <-, line 210 */
if (ret < 0) return ret;
}
break;
case 10:
{ int ret = slice_from_s(z, 1, s_78); /* <-, line 211 */
if (ret < 0) return ret;
}
break;
case 11:
{ int ret = slice_from_s(z, 1, s_79); /* <-, line 212 */
if (ret < 0) return ret;
}
break;
case 12:
{ int ret = slice_from_s(z, 1, s_80); /* <-, line 213 */
if (ret < 0) return ret;
}
break;
case 13:
{ int ret = slice_from_s(z, 1, s_81); /* <-, line 214 */
if (ret < 0) return ret;
}
break;
case 14:
{ int ret = slice_from_s(z, 1, s_82); /* <-, line 215 */
if (ret < 0) return ret;
}
break;
case 15:
{ int ret = slice_from_s(z, 1, s_83); /* <-, line 216 */
if (ret < 0) return ret;
}
break;
case 16:
{ int ret = slice_from_s(z, 1, s_84); /* <-, line 217 */
if (ret < 0) return ret;
}
break;
case 17:
{ int ret = slice_from_s(z, 1, s_85); /* <-, line 218 */
if (ret < 0) return ret;
}
break;
case 18:
{ int ret = slice_from_s(z, 1, s_86); /* <-, line 219 */
if (ret < 0) return ret;
}
break;
case 19:
{ int ret = slice_from_s(z, 1, s_87); /* <-, line 220 */
if (ret < 0) return ret;
}
break;
case 20:
{ int ret = slice_from_s(z, 1, s_88); /* <-, line 221 */
if (ret < 0) return ret;
}
break;
case 21:
{ int ret = slice_from_s(z, 1, s_89); /* <-, line 222 */
if (ret < 0) return ret;
}
break;
case 22:
{ int ret = slice_from_s(z, 1, s_90); /* <-, line 223 */
if (ret < 0) return ret;
}
break;
}
return 1;
}
static int r_measure(struct SN_env * z) {
{ int c1 = z->c; /* do, line 251 */
z->c = z->l; /* tolimit, line 252 */
z->I[1] = z->c; /* setmark p1, line 253 */
z->I[2] = z->c; /* setmark p2, line 254 */
z->c = c1;
}
{ int c2 = z->c; /* do, line 256 */
while(1) { /* repeat, line 257 */
if (out_grouping_U(z, g_v, 97, 121, 0)) goto lab2;
continue;
lab2:
break;
}
{ int i = 1;
while(1) { /* atleast, line 257 */
int c3 = z->c;
{ int c4 = z->c; /* or, line 257 */
if (!(eq_s(z, 2, s_91))) goto lab5;
goto lab4;
lab5:
z->c = c4;
if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3;
}
lab4:
i--;
continue;
lab3:
z->c = c3;
break;
}
if (i > 0) goto lab1;
}
if (out_grouping_U(z, g_v, 97, 121, 0)) goto lab1;
z->I[1] = z->c; /* setmark p1, line 257 */
while(1) { /* repeat, line 258 */
if (out_grouping_U(z, g_v, 97, 121, 0)) goto lab6;
continue;
lab6:
break;
}
{ int i = 1;
while(1) { /* atleast, line 258 */
int c5 = z->c;
{ int c6 = z->c; /* or, line 258 */
if (!(eq_s(z, 2, s_92))) goto lab9;
goto lab8;
lab9:
z->c = c6;
if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab7;
}
lab8:
i--;
continue;
lab7:
z->c = c5;
break;
}
if (i > 0) goto lab1;
}
if (out_grouping_U(z, g_v, 97, 121, 0)) goto lab1;
z->I[2] = z->c; /* setmark p2, line 258 */
lab1:
z->c = c2;
}
return 1;
}
extern int dutch_UTF_8_stem(struct SN_env * z) {
{ int ret = r_prelude(z);
if (ret == 0) return 0; /* call prelude, line 263 */
if (ret < 0) return ret;
}
z->B[0] = 0; /* unset Y_found, line 264 */
z->B[1] = 0; /* unset stemmed, line 265 */
{ int c1 = z->c; /* do, line 266 */
z->bra = z->c; /* [, line 266 */
if (!(eq_s(z, 1, s_93))) goto lab0;
z->ket = z->c; /* ], line 266 */
{ int ret = slice_from_s(z, 1, s_94); /* <-, line 266 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set Y_found, line 266 */
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 267 */
while(1) { /* repeat, line 267 */
int c3 = z->c;
while(1) { /* goto, line 267 */
int c4 = z->c;
if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3;
z->bra = z->c; /* [, line 267 */
if (!(eq_s(z, 1, s_95))) goto lab3;
z->ket = z->c; /* ], line 267 */
z->c = c4;
break;
lab3:
z->c = c4;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab2;
z->c = ret; /* goto, line 267 */
}
}
{ int ret = slice_from_s(z, 1, s_96); /* <-, line 267 */
if (ret < 0) return ret;
}
z->B[0] = 1; /* set Y_found, line 267 */
continue;
lab2:
z->c = c3;
break;
}
z->c = c2;
}
{ int ret = r_measure(z);
if (ret == 0) return 0; /* call measure, line 269 */
if (ret < 0) return ret;
}
z->lb = z->c; z->c = z->l; /* backwards, line 271 */
{ int m5 = z->l - z->c; (void)m5; /* do, line 272 */
{ int ret = r_Step_1(z);
if (ret == 0) goto lab4; /* call Step_1, line 272 */
if (ret < 0) return ret;
}
z->B[1] = 1; /* set stemmed, line 272 */
lab4:
z->c = z->l - m5;
}
{ int m6 = z->l - z->c; (void)m6; /* do, line 273 */
{ int ret = r_Step_2(z);
if (ret == 0) goto lab5; /* call Step_2, line 273 */
if (ret < 0) return ret;
}
z->B[1] = 1; /* set stemmed, line 273 */
lab5:
z->c = z->l - m6;
}
{ int m7 = z->l - z->c; (void)m7; /* do, line 274 */
{ int ret = r_Step_3(z);
if (ret == 0) goto lab6; /* call Step_3, line 274 */
if (ret < 0) return ret;
}
z->B[1] = 1; /* set stemmed, line 274 */
lab6:
z->c = z->l - m7;
}
{ int m8 = z->l - z->c; (void)m8; /* do, line 275 */
{ int ret = r_Step_4(z);
if (ret == 0) goto lab7; /* call Step_4, line 275 */
if (ret < 0) return ret;
}
z->B[1] = 1; /* set stemmed, line 275 */
lab7:
z->c = z->l - m8;
}
z->c = z->lb;
z->lb = z->c; z->c = z->l; /* backwards, line 289 */
{ int m9 = z->l - z->c; (void)m9; /* do, line 290 */
{ int ret = r_Step_7(z);
if (ret == 0) goto lab8; /* call Step_7, line 290 */
if (ret < 0) return ret;
}
z->B[1] = 1; /* set stemmed, line 290 */
lab8:
z->c = z->l - m9;
}
{ int m10 = z->l - z->c; (void)m10; /* do, line 291 */
{ int m11 = z->l - z->c; (void)m11; /* or, line 291 */
if (!(z->B[1])) goto lab11; /* Boolean test stemmed, line 291 */
goto lab10;
lab11:
z->c = z->l - m11;
{ int ret = r_Step_6(z);
if (ret == 0) goto lab9; /* call Step_6, line 291 */
if (ret < 0) return ret;
}
}
lab10:
lab9:
z->c = z->l - m10;
}
z->c = z->lb;
{ int c12 = z->c; /* do, line 293 */
if (!(z->B[0])) goto lab12; /* Boolean test Y_found, line 293 */
while(1) { /* repeat, line 293 */
int c13 = z->c;
while(1) { /* goto, line 293 */
int c14 = z->c;
z->bra = z->c; /* [, line 293 */
if (!(eq_s(z, 1, s_97))) goto lab14;
z->ket = z->c; /* ], line 293 */
z->c = c14;
break;
lab14:
z->c = c14;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab13;
z->c = ret; /* goto, line 293 */
}
}
{ int ret = slice_from_s(z, 1, s_98); /* <-, line 293 */
if (ret < 0) return ret;
}
continue;
lab13:
z->c = c13;
break;
}
lab12:
z->c = c12;
}
return 1;
}
extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(1, 3, 2); }
extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); }
extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); }
extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }
......@@ -54,13 +54,13 @@ static const symbol s_1_6[2] = { 'e', 's' };
static const struct among a_1[7] =
{
/* 0 */ { 1, s_1_0, -1, 1, 0},
/* 0 */ { 1, s_1_0, -1, 2, 0},
/* 1 */ { 2, s_1_1, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 2, 0},
/* 3 */ { 3, s_1_3, -1, 1, 0},
/* 4 */ { 2, s_1_4, -1, 1, 0},
/* 5 */ { 1, s_1_5, -1, 2, 0},
/* 6 */ { 2, s_1_6, 5, 1, 0}
/* 5 */ { 1, s_1_5, -1, 3, 0},
/* 6 */ { 2, s_1_6, 5, 2, 0}
};
static const symbol s_2_0[2] = { 'e', 'n' };
......@@ -123,21 +123,23 @@ static const symbol s_7[] = { 'u' };
static const symbol s_8[] = { 'a' };
static const symbol s_9[] = { 'o' };
static const symbol s_10[] = { 'u' };
static const symbol s_11[] = { 'i', 'g' };
static const symbol s_12[] = { 'e' };
static const symbol s_13[] = { 'e' };
static const symbol s_14[] = { 'e', 'r' };
static const symbol s_15[] = { 'e', 'n' };
static const symbol s_11[] = { 's' };
static const symbol s_12[] = { 'n', 'i', 's' };
static const symbol s_13[] = { 'i', 'g' };
static const symbol s_14[] = { 'e' };
static const symbol s_15[] = { 'e' };
static const symbol s_16[] = { 'e', 'r' };
static const symbol s_17[] = { 'e', 'n' };
static int r_prelude(struct SN_env * z) {
{ int c_test = z->c; /* test, line 30 */
while(1) { /* repeat, line 30 */
{ int c_test = z->c; /* test, line 35 */
while(1) { /* repeat, line 35 */
int c1 = z->c;
{ int c2 = z->c; /* or, line 33 */
z->bra = z->c; /* [, line 32 */
{ int c2 = z->c; /* or, line 38 */
z->bra = z->c; /* [, line 37 */
if (!(eq_s(z, 2, s_0))) goto lab2;
z->ket = z->c; /* ], line 32 */
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */
z->ket = z->c; /* ], line 37 */
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */
if (ret < 0) return ret;
}
goto lab1;
......@@ -145,7 +147,7 @@ static int r_prelude(struct SN_env * z) {
z->c = c2;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 33 */
z->c = ret; /* next, line 38 */
}
}
lab1:
......@@ -156,26 +158,26 @@ static int r_prelude(struct SN_env * z) {
}
z->c = c_test;
}
while(1) { /* repeat, line 36 */
while(1) { /* repeat, line 41 */
int c3 = z->c;
while(1) { /* goto, line 36 */
while(1) { /* goto, line 41 */
int c4 = z->c;
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4;
z->bra = z->c; /* [, line 37 */
{ int c5 = z->c; /* or, line 37 */
z->bra = z->c; /* [, line 42 */
{ int c5 = z->c; /* or, line 42 */
if (!(eq_s(z, 1, s_2))) goto lab6;
z->ket = z->c; /* ], line 37 */
z->ket = z->c; /* ], line 42 */
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6;
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */
if (ret < 0) return ret;
}
goto lab5;
lab6:
z->c = c5;
if (!(eq_s(z, 1, s_4))) goto lab4;
z->ket = z->c; /* ], line 38 */
z->ket = z->c; /* ], line 43 */
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4;
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */
if (ret < 0) return ret;
}
}
......@@ -186,7 +188,7 @@ static int r_prelude(struct SN_env * z) {
z->c = c4;
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab3;
z->c = ret; /* goto, line 36 */
z->c = ret; /* goto, line 41 */
}
}
continue;
......@@ -200,82 +202,82 @@ static int r_prelude(struct SN_env * z) {
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c_test = z->c; /* test, line 47 */
{ int c_test = z->c; /* test, line 52 */
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
if (ret < 0) return 0;
z->c = ret; /* hop, line 47 */
z->c = ret; /* hop, line 52 */
}
z->I[2] = z->c; /* setmark x, line 47 */
z->I[2] = z->c; /* setmark x, line 52 */
z->c = c_test;
}
{ /* gopast */ /* grouping v, line 49 */
{ /* gopast */ /* grouping v, line 54 */
int ret = out_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 49 */
{ /* gopast */ /* non v, line 54 */
int ret = in_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[0] = z->c; /* setmark p1, line 49 */
/* try, line 50 */
z->I[0] = z->c; /* setmark p1, line 54 */
/* try, line 55 */
if (!(z->I[0] < z->I[2])) goto lab0;
z->I[0] = z->I[2];
lab0:
{ /* gopast */ /* grouping v, line 51 */
{ /* gopast */ /* grouping v, line 56 */
int ret = out_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
{ /* gopast */ /* non v, line 51 */
{ /* gopast */ /* non v, line 56 */
int ret = in_grouping_U(z, g_v, 97, 252, 1);
if (ret < 0) return 0;
z->c += ret;
}
z->I[1] = z->c; /* setmark p2, line 51 */
z->I[1] = z->c; /* setmark p2, line 56 */
return 1;
}
static int r_postlude(struct SN_env * z) {
int among_var;
while(1) { /* repeat, line 55 */
while(1) { /* repeat, line 60 */
int c1 = z->c;
z->bra = z->c; /* [, line 57 */
among_var = find_among(z, a_0, 6); /* substring, line 57 */
z->bra = z->c; /* [, line 62 */
among_var = find_among(z, a_0, 6); /* substring, line 62 */
if (!(among_var)) goto lab0;
z->ket = z->c; /* ], line 57 */
z->ket = z->c; /* ], line 62 */
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */
if (ret < 0) return ret;
}
break;
case 4:
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */
if (ret < 0) return ret;
}
break;
case 5:
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */
if (ret < 0) return ret;
}
break;
case 6:
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
if (ret < 0) goto lab0;
z->c = ret; /* next, line 63 */
z->c = ret; /* next, line 68 */
}
break;
}
......@@ -299,26 +301,42 @@ static int r_R2(struct SN_env * z) {
static int r_standard_suffix(struct SN_env * z) {
int among_var;
{ int m1 = z->l - z->c; (void)m1; /* do, line 74 */
z->ket = z->c; /* [, line 75 */
{ int m1 = z->l - z->c; (void)m1; /* do, line 79 */
z->ket = z->c; /* [, line 80 */
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
among_var = find_among_b(z, a_1, 7); /* substring, line 75 */
among_var = find_among_b(z, a_1, 7); /* substring, line 80 */
if (!(among_var)) goto lab0;
z->bra = z->c; /* ], line 75 */
z->bra = z->c; /* ], line 80 */
{ int ret = r_R1(z);
if (ret == 0) goto lab0; /* call R1, line 75 */
if (ret == 0) goto lab0; /* call R1, line 80 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab0;
case 1:
{ int ret = slice_del(z); /* delete, line 77 */
{ int ret = slice_del(z); /* delete, line 82 */
if (ret < 0) return ret;
}
break;
case 2:
{ int ret = slice_del(z); /* delete, line 85 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */
z->ket = z->c; /* [, line 86 */
if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; }
z->bra = z->c; /* ], line 86 */
if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; }
{ int ret = slice_del(z); /* delete, line 86 */
if (ret < 0) return ret;
}
lab1:
;
}
break;
case 3:
if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0;
{ int ret = slice_del(z); /* delete, line 80 */
{ int ret = slice_del(z); /* delete, line 89 */
if (ret < 0) return ret;
}
break;
......@@ -326,175 +344,175 @@ static int r_standard_suffix(struct SN_env * z) {
lab0:
z->c = z->l - m1;
}
{ int m2 = z->l - z->c; (void)m2; /* do, line 84 */
z->ket = z->c; /* [, line 85 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
among_var = find_among_b(z, a_2, 4); /* substring, line 85 */
if (!(among_var)) goto lab1;
z->bra = z->c; /* ], line 85 */
{ int m2 = z->l - z->c; (void)m2; /* do, line 93 */
z->ket = z->c; /* [, line 94 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
among_var = find_among_b(z, a_2, 4); /* substring, line 94 */
if (!(among_var)) goto lab2;
z->bra = z->c; /* ], line 94 */
{ int ret = r_R1(z);
if (ret == 0) goto lab1; /* call R1, line 85 */
if (ret == 0) goto lab2; /* call R1, line 94 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab1;
case 0: goto lab2;
case 1:
{ int ret = slice_del(z); /* delete, line 87 */
{ int ret = slice_del(z); /* delete, line 96 */
if (ret < 0) return ret;
}
break;
case 2:
if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab1;
if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab2;
{ int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3);
if (ret < 0) goto lab1;
z->c = ret; /* hop, line 90 */
if (ret < 0) goto lab2;
z->c = ret; /* hop, line 99 */
}
{ int ret = slice_del(z); /* delete, line 90 */
{ int ret = slice_del(z); /* delete, line 99 */
if (ret < 0) return ret;
}
break;
}
lab1:
lab2:
z->c = z->l - m2;
}
{ int m3 = z->l - z->c; (void)m3; /* do, line 94 */
z->ket = z->c; /* [, line 95 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
among_var = find_among_b(z, a_4, 8); /* substring, line 95 */
if (!(among_var)) goto lab2;
z->bra = z->c; /* ], line 95 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 103 */
z->ket = z->c; /* [, line 104 */
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3;
among_var = find_among_b(z, a_4, 8); /* substring, line 104 */
if (!(among_var)) goto lab3;
z->bra = z->c; /* ], line 104 */
{ int ret = r_R2(z);
if (ret == 0) goto lab2; /* call R2, line 95 */
if (ret == 0) goto lab3; /* call R2, line 104 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: goto lab2;
case 0: goto lab3;
case 1:
{ int ret = slice_del(z); /* delete, line 97 */
{ int ret = slice_del(z); /* delete, line 106 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */
z->ket = z->c; /* [, line 98 */
if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; }
z->bra = z->c; /* ], line 98 */
{ int m4 = z->l - z->c; (void)m4; /* not, line 98 */
if (!(eq_s_b(z, 1, s_12))) goto lab4;
{ z->c = z->l - m_keep; goto lab3; }
lab4:
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */
z->ket = z->c; /* [, line 107 */
if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; }
z->bra = z->c; /* ], line 107 */
{ int m4 = z->l - z->c; (void)m4; /* not, line 107 */
if (!(eq_s_b(z, 1, s_14))) goto lab5;
{ z->c = z->l - m_keep; goto lab4; }
lab5:
z->c = z->l - m4;
}
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */
if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 98 */
{ int ret = slice_del(z); /* delete, line 107 */
if (ret < 0) return ret;
}
lab3:
lab4:
;
}
break;
case 2:
{ int m5 = z->l - z->c; (void)m5; /* not, line 101 */
if (!(eq_s_b(z, 1, s_13))) goto lab5;
goto lab2;
lab5:
{ int m5 = z->l - z->c; (void)m5; /* not, line 110 */
if (!(eq_s_b(z, 1, s_15))) goto lab6;
goto lab3;
lab6:
z->c = z->l - m5;
}
{ int ret = slice_del(z); /* delete, line 101 */
{ int ret = slice_del(z); /* delete, line 110 */
if (ret < 0) return ret;
}
break;
case 3:
{ int ret = slice_del(z); /* delete, line 104 */
{ int ret = slice_del(z); /* delete, line 113 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */
z->ket = z->c; /* [, line 106 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 106 */
if (!(eq_s_b(z, 2, s_14))) goto lab8;
goto lab7;
lab8:
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */
z->ket = z->c; /* [, line 115 */
{ int m6 = z->l - z->c; (void)m6; /* or, line 115 */
if (!(eq_s_b(z, 2, s_16))) goto lab9;
goto lab8;
lab9:
z->c = z->l - m6;
if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; }
if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; }
}
lab7:
z->bra = z->c; /* ], line 106 */
lab8:
z->bra = z->c; /* ], line 115 */
{ int ret = r_R1(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */
if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */
if (ret < 0) return ret;
}
{ int ret = slice_del(z); /* delete, line 106 */
{ int ret = slice_del(z); /* delete, line 115 */
if (ret < 0) return ret;
}
lab6:
lab7:
;
}
break;
case 4:
{ int ret = slice_del(z); /* delete, line 110 */
{ int ret = slice_del(z); /* delete, line 119 */
if (ret < 0) return ret;
}
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
z->ket = z->c; /* [, line 112 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; }
among_var = find_among_b(z, a_3, 2); /* substring, line 112 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab9; }
z->bra = z->c; /* ], line 112 */
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */
z->ket = z->c; /* [, line 121 */
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; }
among_var = find_among_b(z, a_3, 2); /* substring, line 121 */
if (!(among_var)) { z->c = z->l - m_keep; goto lab10; }
z->bra = z->c; /* ], line 121 */
{ int ret = r_R2(z);
if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */
if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */
if (ret < 0) return ret;
}
switch(among_var) {
case 0: { z->c = z->l - m_keep; goto lab9; }
case 0: { z->c = z->l - m_keep; goto lab10; }
case 1:
{ int ret = slice_del(z); /* delete, line 114 */
{ int ret = slice_del(z); /* delete, line 123 */
if (ret < 0) return ret;
}
break;
}
lab9:
lab10:
;
}
break;
}
lab2:
lab3:
z->c = z->l - m3;
}
return 1;
}
extern int german_UTF_8_stem(struct SN_env * z) {
{ int c1 = z->c; /* do, line 125 */
{ int c1 = z->c; /* do, line 134 */
{ int ret = r_prelude(z);
if (ret == 0) goto lab0; /* call prelude, line 125 */
if (ret == 0) goto lab0; /* call prelude, line 134 */
if (ret < 0) return ret;
}
lab0:
z->c = c1;
}
{ int c2 = z->c; /* do, line 126 */
{ int c2 = z->c; /* do, line 135 */
{ int ret = r_mark_regions(z);
if (ret == 0) goto lab1; /* call mark_regions, line 126 */
if (ret == 0) goto lab1; /* call mark_regions, line 135 */
if (ret < 0) return ret;
}
lab1:
z->c = c2;
}
z->lb = z->c; z->c = z->l; /* backwards, line 127 */
z->lb = z->c; z->c = z->l; /* backwards, line 136 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 128 */
{ int m3 = z->l - z->c; (void)m3; /* do, line 137 */
{ int ret = r_standard_suffix(z);
if (ret == 0) goto lab2; /* call standard_suffix, line 128 */
if (ret == 0) goto lab2; /* call standard_suffix, line 137 */
if (ret < 0) return ret;
}
lab2:
z->c = z->l - m3;
}
z->c = z->lb;
{ int c4 = z->c; /* do, line 129 */
{ int c4 = z->c; /* do, line 138 */
{ int ret = r_postlude(z);
if (ret == 0) goto lab3; /* call postlude, line 129 */
if (ret == 0) goto lab3; /* call postlude, line 138 */
if (ret < 0) return ret;
}
lab3:
......
......@@ -136,14 +136,14 @@ static const symbol s_4_6[3] = { 'n', 'e', 'k' };
static const symbol s_4_7[3] = { 'v', 'a', 'l' };
static const symbol s_4_8[3] = { 'v', 'e', 'l' };
static const symbol s_4_9[2] = { 'u', 'l' };
static const symbol s_4_10[4] = { 'n', 0xC3, 0xA1, 'l' };
static const symbol s_4_11[4] = { 'n', 0xC3, 0xA9, 'l' };
static const symbol s_4_12[4] = { 'b', 0xC3, 0xB3, 'l' };
static const symbol s_4_13[4] = { 'r', 0xC3, 0xB3, 'l' };
static const symbol s_4_14[4] = { 't', 0xC3, 0xB3, 'l' };
static const symbol s_4_15[4] = { 'b', 0xC3, 0xB5, 'l' };
static const symbol s_4_16[4] = { 'r', 0xC3, 0xB5, 'l' };
static const symbol s_4_17[4] = { 't', 0xC3, 0xB5, 'l' };
static const symbol s_4_10[4] = { 'b', 0xC5, 0x91, 'l' };
static const symbol s_4_11[4] = { 'r', 0xC5, 0x91, 'l' };
static const symbol s_4_12[4] = { 't', 0xC5, 0x91, 'l' };
static const symbol s_4_13[4] = { 'n', 0xC3, 0xA1, 'l' };
static const symbol s_4_14[4] = { 'n', 0xC3, 0xA9, 'l' };
static const symbol s_4_15[4] = { 'b', 0xC3, 0xB3, 'l' };
static const symbol s_4_16[4] = { 'r', 0xC3, 0xB3, 'l' };
static const symbol s_4_17[4] = { 't', 0xC3, 0xB3, 'l' };
static const symbol s_4_18[3] = { 0xC3, 0xBC, 'l' };
static const symbol s_4_19[1] = { 'n' };
static const symbol s_4_20[2] = { 'a', 'n' };
......@@ -460,7 +460,7 @@ static const struct among a_11[42] =
/* 41 */ { 4, s_11_41, 35, 3, 0}
};
static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 };
static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 36, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1 };
static const symbol s_0[] = { 'a' };
static const symbol s_1[] = { 'e' };
......@@ -502,8 +502,8 @@ static const symbol s_35[] = { 'e' };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
{ int c1 = z->c; /* or, line 51 */
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab1;
if (in_grouping_U(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */
if (in_grouping_U(z, g_v, 97, 369, 0)) goto lab1;
if (in_grouping_U(z, g_v, 97, 369, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */
{ int c2 = z->c; /* or, line 49 */
if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3;
if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */
......@@ -520,9 +520,9 @@ static int r_mark_regions(struct SN_env * z) {
goto lab0;
lab1:
z->c = c1;
if (out_grouping_U(z, g_v, 97, 252, 0)) return 0;
if (out_grouping_U(z, g_v, 97, 369, 0)) return 0;
{ /* gopast */ /* grouping v, line 53 */
int ret = out_grouping_U(z, g_v, 97, 252, 1);
int ret = out_grouping_U(z, g_v, 97, 369, 1);
if (ret < 0) return 0;
z->c += ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment