# bash -e this in empty subdirectory # with ../models/suc.cstlemma.lemmas from sparv, 100k, FBT format # (sans \r, apparently not an issue) mkdir models cp ../models/suc.cstlemma.lemmas models git clone https://github.com/kuhumcst/hashmap.git git clone https://github.com/kuhumcst/letterfunc.git git clone https://github.com/kuhumcst/parsesgml.git # cstlemma0/cstlemma is the default (STREAM is 0) # cstlemma1/cstlemma can read stdin (STREAM is 1) # (failed to link with make all; bare make worked) git clone https://github.com/kuhumcst/cstlemma.git cstlemma0 sed -i 's/-Wall/-Wall -Wno-reorder/' cstlemma0/src/Makefile make -C cstlemma0/src make -C cstlemma0/src clean git clone https://github.com/kuhumcst/cstlemma.git cstlemma1 sed -i 's/-Wall/-Wall -Wno-reorder/' cstlemma1/src/Makefile sed -i 's/define STREAM 0/define STREAM 1/' cstlemma1/src/defines.h make -C cstlemma1/src make -C cstlemma1/src clean # empty rule set touch empty # dictionary compiled with both versions of cstlemma (identical results) cstlemma0/cstlemma -D -c FBT -i models/suc.cstlemma.lemmas -o models/dict0 cstlemma1/cstlemma -D -c FBT -i models/suc.cstlemma.lemmas -o models/dict1 # These should definitely be in the dictionaries with these tags # because they are extracted from the dictionary itself: # # kommer komma VB.PRS.AKT # kommer kommer UO # # in tabbed: word TAB tag NL # in slashed: word/tag NL grep -w kommer models/suc.cstlemma.lemmas | cut -f 1,3 > tabbed tr '\t' / < tabbed > slashed