# bash -e this where cstlemma{0,1}/cstlemma and models/ are, # models/ containing sparv suc dictionary (sans \r) in text # and compiled with cstlemma{0,1}/cstlemma (identical result) test -e out && rm -r out mkdir out # Issue (both versions of cstlemma): # -t (slashed tokens) works - tag found in dictionary # -I (tabbed tokens) does not - tag not found in dictionary # (there are slashes in tags in the model, so tab is important) # input from file: word SLASH tag NL # with -t: output is ok in all four cases, one stderr is quiet while others are noisy cstlemma0/cstlemma -d models/dict0 -f empty -t -i slashed -o out/s0-tio.res > out/s0-tio.out 2> out/s0-tio.err cstlemma0/cstlemma -d models/dict0 -f empty -t -i slashed > out/s0-ti.res 2> out/s0-ti.err cstlemma1/cstlemma -d models/dict0 -f empty -t -i slashed -o out/s1-tio.res > out/s1-tio.out 2> out/s1-tio.err cstlemma1/cstlemma -d models/dict0 -f empty -t -i slashed > out/s1-ti.res 2> out/s1-ti.err # same input from stdin cstlemma1/cstlemma -d models/dict0 -f empty -t -o out/s1-to.res < slashed > out/s1-to.out 2> out/s1-to.err cstlemma1/cstlemma -d models/dict0 -f empty -t < slashed > out/s1-t.res 2> out/s1-t.err # Issue (stdin version of cstlemma): # spurious final output with -I (because input?) # (both from stdin and from file) # input from file: word SLASH tag NL # with -I '$w/$t\n': output is different (also wrong) cstlemma0/cstlemma -d models/dict0 -f empty -I '$w/$t\n' -i slashed -o out/s0-Iio.res > out/s0-Iio.out 2> out/s0-Iio.err cstlemma0/cstlemma -d models/dict0 -f empty -I '$w/$t\n' -i slashed > out/s0-Ii.res 2> out/s0-Ii.err cstlemma1/cstlemma -d models/dict0 -f empty -I '$w/$t\n' -i slashed -o out/s1-Iio.res > out/s1-Iio.out 2> out/s1-Iio.err cstlemma1/cstlemma -d models/dict0 -f empty -I '$w/$t\n' -i slashed > out/s1-Ii.res 2> out/s1-Ii.err # same input from stdin cstlemma1/cstlemma -d models/dict0 -f empty -I '$w/$t\n' -o out/s1-Io.res < slashed > out/s1-Io.out 2> out/s1-Io.err cstlemma1/cstlemma -d models/dict0 -f empty -I '$w/$t\n' < slashed > out/s1-I.res 2> out/s1-I.err # input from file: word TAB tag NL # with -I '$w/$t\n': output is different (also wrong) cstlemma0/cstlemma -d models/dict0 -f empty -I '$w\t$t\n' -i tabbed -o out/t0-Iio.res > out/t0-Iio.out 2> out/t0-Iio.err cstlemma0/cstlemma -d models/dict0 -f empty -I '$w\t$t\n' -i tabbed > out/t0-Ii.res 2> out/t0-Ii.err cstlemma1/cstlemma -d models/dict0 -f empty -I '$w\t$t\n' -i tabbed -o out/t1-Iio.res > out/t1-Iio.out 2> out/t1-Iio.err cstlemma1/cstlemma -d models/dict0 -f empty -I '$w\t$t\n' -i tabbed > out/t1-Ii.res 2> out/t1-Ii.err # same input from stdin cstlemma1/cstlemma -d models/dict0 -f empty -I '$w\t$t\n' -o out/t1-Io.res < tabbed > out/t1-Io.out 2> out/t1-Io.err cstlemma1/cstlemma -d models/dict0 -f empty -I '$w\t$t\n' < tabbed > out/t1-I.res 2> out/t1-I.err # Issue (which version): # $t not recognized in -W format, contrary to documentation # (is it?) # What else? # Minor issue (both versions of cstlemma?) # cstlemma0 -t (slashed tokens) -o foo - foo ok, noisy stderr # cstlemma0 -t (slashed tokens) - stdout ok, quiet stderr # cstlemma1 -t (slashed tokens) -o foo - foo ok, noisy stderr # cstlemma1 -t (slashed tokens) - stdout ok, noisy stderr