Commit 8bb00e36b6a350b91a1d6e499537ec6293eef3ea
1 parent
b06dc862
generowanie wyjścia dla subsyntax
Showing
8 changed files
with
239 additions
and
13 deletions
LCGparser/test.ml
... | ... | @@ -45,8 +45,8 @@ let examples = [ |
45 | 45 | 0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"], |
46 | 46 | [Forward,Imp(Tensor[Atom "ip"],Forward,Tensor[Atom "np"; AVar "case"])]), |
47 | 47 | [Forward,Imp(Tensor[Atom "np"; AVar "case"],Backward,Tensor[Atom "adjp"; AVar "case"])]))); |
48 | - (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])]))); *) | |
49 | - 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); | |
48 | + 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])]))); | |
49 | + (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) | |
50 | 50 | 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); |
51 | 51 | 3, 4, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); |
52 | 52 | (* 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) |
... | ... |
semsources/src stycz/dzieło stycz1.pdf deleted
No preview for this file type
subsyntax/ENIAMsubsyntaxGraphOf.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMsubsyntaxTypes | |
21 | +open Printf | |
22 | + | |
23 | +let print_tokens path name tokens = | |
24 | + File.file_out (path ^ name ^ ".gv") (fun file -> | |
25 | + fprintf file "digraph G {\n"; | |
26 | + Xlist.iter tokens (fun t -> | |
27 | + let lemma = ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token in | |
28 | + if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.next t.ENIAMtokenizerTypes.orth lemma); | |
29 | + fprintf file "}\n"); | |
30 | + Sys.chdir path; | |
31 | + ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); | |
32 | + String.iter (function '/' -> Sys.chdir ".." | _ -> ()) path | |
... | ... |
subsyntax/ENIAMsubsyntaxHTMLof.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMsubsyntaxTypes | |
21 | +open Printf | |
22 | + | |
23 | +let html_header = | |
24 | +"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"> | |
25 | +<html> | |
26 | + <head> | |
27 | + <META HTTP-EQUIV=\"CONTENT-TYPE\" CONTENT=\"text/html; charset=utf8\"> | |
28 | + <TITLE>ENIAM: Kategorialny Parser Składniowo-Semantyczny</TITLE> | |
29 | + <META HTTP-EQUIV=\"Content-Language\" CONTENT=\"pl\"> | |
30 | + </head> | |
31 | + | |
32 | + <body> | |
33 | + <center>" | |
34 | + | |
35 | +let html_trailer = | |
36 | +"</center> | |
37 | + </body> | |
38 | +</html>" | |
39 | + | |
40 | +let escape_html s = | |
41 | + Int.fold 0 (String.length s - 1) "" (fun t i -> | |
42 | + match String.sub s i 1 with | |
43 | + "<" -> t ^ "<" | |
44 | + | ">" -> t ^ ">" | |
45 | + | "&" -> t ^ "&" | |
46 | + | c -> t ^ c) | |
47 | + | |
48 | +let html_of_struct_sentence tokens paths last = | |
49 | + "<table><tr><td><b>orth</b></td><td><b>token</b></td><td><b>id</b></td><td><b>lnode</b></td><td><b>rnode</b></td></tr>" ^ | |
50 | + String.concat "\n" (Xlist.map (List.sort compare paths) (fun (id,lnode,rnode) -> | |
51 | + let t = ExtArray.get tokens id in | |
52 | + sprintf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>" | |
53 | + t.ENIAMtokenizerTypes.orth (escape_html (ENIAMtokens.string_of_token t.ENIAMtokenizerTypes.token)) id lnode rnode)) ^ | |
54 | + sprintf "<tr><td></td><td></td><td></td><td>%d</td><td></td></tr>" last ^ | |
55 | + "</table>" | |
56 | + | |
57 | +let html_of_dep_sentence tokens paths = | |
58 | + "<table><tr><td><b>orth</b></td><td><b>token</b></td><td><b>id</b></td><td><b>conll_id</b></td><td><b>super</b></td><td><b>label</b></td></tr>" ^ | |
59 | + String.concat "\n" (List.rev (Int.fold 0 (Array.length paths - 1) [] (fun l conll_id -> | |
60 | + let id,super,label = paths.(conll_id) in | |
61 | + let t = ExtArray.get tokens id in | |
62 | + (sprintf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%s</td></tr>" | |
63 | + t.ENIAMtokenizerTypes.orth (escape_html (ENIAMtokens.string_of_token t.ENIAMtokenizerTypes.token)) id conll_id super label) :: l))) ^ | |
64 | + "</table>" | |
65 | + | |
66 | +let html_of_tokens tokens = | |
67 | + "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>beg</b></td><td><b>len</b></td><td><b>next</b></td><td><b>token</b></td></td><td><b>attrs</b></td></tr>" ^ | |
68 | + String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> | |
69 | + let t = ExtArray.get tokens id in | |
70 | + (sprintf "<tr><td>%d</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td></tr>" | |
71 | + id t.ENIAMtokenizerTypes.orth t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.len t.ENIAMtokenizerTypes.next (escape_html (ENIAMtokens.string_of_token t.ENIAMtokenizerTypes.token)) | |
72 | + (String.concat "; " t.ENIAMtokenizerTypes.attrs)) :: l))) ^ | |
73 | + "</table>" | |
74 | + | |
75 | +let rec html_of_sentence path tokens = function | |
76 | + RawSentence s -> s | |
77 | + | StructSentence(paths,last) -> html_of_struct_sentence tokens paths last | |
78 | + | DepSentence paths -> html_of_dep_sentence tokens paths | |
79 | + | QuotedSentences sentences -> | |
80 | + String.concat "<BR>\n" (Xlist.map sentences (fun p -> | |
81 | + sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence))) | |
82 | + | AltSentence l -> (*print_endline "AltSentence";*) | |
83 | + "<table border=1>" ^ | |
84 | + String.concat "\n" (Xlist.map l (fun (mode,sentence) -> | |
85 | + sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_sentence path tokens sentence))) ^ | |
86 | + "</table>" | |
87 | + (* | _ -> failwith "html_of_sentence: ni" *) | |
88 | + | |
89 | +let rec html_of_paragraph path tokens = function | |
90 | + RawParagraph s -> (*print_endline "RawParagraph";*) s | |
91 | + | StructParagraph sentences -> (*print_endline "StructParagraph";*) | |
92 | + String.concat "<BR>\n" (Xlist.map sentences (fun p -> | |
93 | + sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence))) | |
94 | + | AltParagraph l -> (*print_endline "AltParagraph";*) | |
95 | + "<table border=2>" ^ | |
96 | + String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> | |
97 | + sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_paragraph path tokens paragraph))) ^ | |
98 | + "</table>" | |
99 | + | |
100 | +let rec html_of_text path tokens = function | |
101 | + RawText s -> s | |
102 | + | StructText paragraphs -> | |
103 | + String.concat "<BR>\n" (Xlist.map paragraphs (html_of_paragraph path tokens)) | |
104 | + | AltText l -> | |
105 | + "<table border=3>" ^ | |
106 | + String.concat "\n" (Xlist.map l (fun (mode,text) -> | |
107 | + sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_text path tokens text))) ^ | |
108 | + "</table>" | |
109 | + | |
110 | +let print_html_text path name tokens text = | |
111 | + File.file_out (path ^ name ^ ".html") (fun file -> | |
112 | + fprintf file "%s\n" html_header; | |
113 | + fprintf file "%s<BR>\n" (html_of_text path tokens text); | |
114 | + fprintf file "%s<BR>\n" (html_of_tokens tokens); | |
115 | +(* fprintf file "%s<BR>\n" (html_of_tokens_simple_valence tokens); | |
116 | + fprintf file "%s<BR>\n" (html_of_tokens_valence tokens);*) | |
117 | + fprintf file "%s\n" html_trailer) | |
... | ... |
subsyntax/ENIAMsubsyntaxXMLof.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMsubsyntaxTypes | |
21 | +open Printf | |
22 | + | |
23 | +let mode = function | |
24 | + Raw -> "Raw" | |
25 | + | Struct -> "Struct" | |
26 | + | CONLL -> "CONLL" | |
27 | + | ENIAM -> "ENIAM" | |
28 | + | Mate -> "Mate" | |
29 | + | Swigra -> "Swigra" | |
30 | + | POLFIE -> "POLFIE" | |
31 | + | |
32 | +let tokens t = | |
33 | + Xml.Element("tokens",[], "\n" (List.rev (Int.fold 0 (ExtArray.size t - 1) [] (fun l id -> | |
34 | + ENIAMtokens.xml_of_token_record id (ExtArray.get t id))))) | |
35 | + | |
36 | +let xml_of_dep_sentence paths = | |
37 | + Xlist.rev (Int.fold 0 (Array.length paths - 1) [] (fun l conll_id -> | |
38 | + let id,super,label = paths.(conll_id) in | |
39 | + Xml.Element("edge",["conll_id",string_of_int conll_id;"id",string_of_int id] @ | |
40 | + (if super = (-1) then [] else ["super",string_of_int super]) @ | |
41 | + (if label = "" then [] else ["label",label]),[]))) | |
42 | + | |
43 | +let xml_of_edge (id,lnode,rnode) = | |
44 | + Xml.Element("edge",["id",string_of_int id;"lnode",string_of_int lnode;"rnode",string_of_int rnode],[]) | |
45 | + | |
46 | +let set_mode m = | |
47 | + if m = "" then [] else ["mode",m] | |
48 | + | |
49 | +let rec sentence m = function | |
50 | + RawSentence s -> Xml.Element("RawSentence",set_mode m,[Xml.PCData s]) | |
51 | + | StructSentence(paths,last) -> Xml.Element("StructSentence",(set_mode m) @ ["last",last],Xlist.map paths xml_of_edge) | |
52 | + | DepSentence paths -> Xml.Element("DepSentence", | |
53 | + (set_mode m) @ ["size",string_of_int (Array.length paths)],xml_of_dep_sentence paths) | |
54 | + | QuotedSentences sentences -> | |
55 | + Xml.Element("QuotedSentences",set_mode m,Xlist.map sentences (fun p -> | |
56 | + Xml.Element("Sentence",["id",p.pid;"beg",p.pbeg;"len",p.plen;"next",p.pnext],[sentence "" p.psentence]))) | |
57 | + | AltSentence l -> XmlElement("AltSentence",set_mode m,Xlist.map l (fun (m,t) -> sentence (mode m) t)) | |
58 | + | |
59 | +let rec paragraph m = function | |
60 | + RawParagraph s -> Xml.Element("RawParagraph",set_mode m,[Xml.PCData s]) | |
61 | + | StructParagraph sentences -> | |
62 | + XmlElement("StructParagraph",set_mode m,Xlist.map sentences (fun p -> | |
63 | + Xml.Element("Sentence",["id",p.pid;"beg",p.pbeg;"len",p.plen;"next",p.pnext],[sentence "" p.psentence]))) | |
64 | + | AltParagraph l -> XmlElement("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (mode m) t)) | |
65 | + | |
66 | +let rec text m = function | |
67 | + RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) | |
68 | + | StructText paragraphs -> XmlElement("StructText",set_mode m,Xlist.map paragraphs (paragraph "")) | |
69 | + | AltText l -> XmlElement("AltText",set_mode m,Xlist.map l (fun (m,t) -> text (mode m))) | |
... | ... |
subsyntax/makefile
... | ... | @@ -6,15 +6,15 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES= ENIAMsubsyntaxTypes.ml ENIAMsubsyntaxStringOf.ml ENIAMpaths.ml ENIAM_MWE.ml ENIAMsentences.ml ENIAMsubsyntax.ml | |
9 | +SOURCES= ENIAMsubsyntaxTypes.ml ENIAMsubsyntaxStringOf.ml ENIAMsubsyntaxHTMLof.ml ENIAMsubsyntaxGraphOf.ml ENIAMpaths.ml ENIAM_MWE.ml ENIAMsentences.ml ENIAMsubsyntax.ml | |
10 | 10 | |
11 | 11 | all: eniam-subsyntax.cma eniam-subsyntax.cmxa |
12 | 12 | |
13 | 13 | install: all |
14 | 14 | mkdir -p $(INSTALLDIR) |
15 | 15 | cp eniam-subsyntax.cmxa eniam-subsyntax.a eniam-subsyntax.cma $(INSTALLDIR) |
16 | - cp ENIAMsubsyntaxTypes.cmi ENIAMsubsyntaxStringOf.cmi ENIAMpaths.cmi ENIAM_MWE.cmi ENIAMsentences.cmi ENIAMsubsyntax.cmi $(INSTALLDIR) | |
17 | - cp ENIAMsubsyntaxTypes.cmx ENIAMsubsyntaxStringOf.cmx ENIAMpaths.cmx ENIAM_MWE.cmx ENIAMsentences.cmx ENIAMsubsyntax.cmx $(INSTALLDIR) | |
16 | + cp ENIAMsubsyntaxTypes.cmi ENIAMsubsyntaxStringOf.cmi ENIAMsubsyntaxHTMLof.cmi ENIAMsubsyntaxGraphOf.cmi ENIAMpaths.cmi ENIAM_MWE.cmi ENIAMsentences.cmi ENIAMsubsyntax.cmi $(INSTALLDIR) | |
17 | + cp ENIAMsubsyntaxTypes.cmx ENIAMsubsyntaxStringOf.cmx ENIAMsubsyntaxHTMLof.cmx ENIAMsubsyntaxGraphOf.cmx ENIAMpaths.cmx ENIAM_MWE.cmx ENIAMsentences.cmx ENIAMsubsyntax.cmx $(INSTALLDIR) | |
18 | 18 | mkdir -p /usr/share/eniam/subsyntax |
19 | 19 | cp resources/* /usr/share/eniam/subsyntax |
20 | 20 | |
... | ... |
subsyntax/test.ml
... | ... | @@ -19,10 +19,10 @@ |
19 | 19 | |
20 | 20 | |
21 | 21 | let test_strings = [ |
22 | - "Szpak frunie."; | |
22 | + (* "Szpak frunie."; *) | |
23 | 23 | "Kot np. miauczy."; |
24 | - "Ala ma kota."; | |
25 | - "Ale mają kota:" | |
24 | + (* "Ala ma kota."; | |
25 | + "Ale mają kota:" *) | |
26 | 26 | (* "W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; *) |
27 | 27 | (* "W 2003 obronił doktorat nauk technicznych w zakresie architektury i urbanistyki na Politechnice Krakowskiej i został adiunktem w Zakładzie Teorii Architektury, Historii i Konserwacji Zabytków IAiPP."; *) |
28 | 28 | (* "Trzy lata później założył pracownię architektoniczną Atelier Bizio + Ligierko, zajmującą się adaptacjami budynków historycznych."; *) |
... | ... | @@ -31,17 +31,19 @@ let test_strings = [ |
31 | 31 | ] |
32 | 32 | |
33 | 33 | let test_strings2 = [ |
34 | - "Szpak frunie. Kot miauczy."; | |
35 | - "Szpak powiedział: „Frunę. Kiszę.”"; | |
34 | +(* "Szpak frunie. Kot miauczy."; | |
35 | + "Szpak powiedział: „Frunę. Śpiewam.”";*) | |
36 | + (* "Istniejący od XI w. Czersk uzyskał prawa miejskie w 1350 r. Mazowsze było wtedy samodzielnym księstwem."; *) | |
36 | 37 | ] |
37 | 38 | |
38 | 39 | let _ = |
39 | 40 | print_endline "Testy wbudowane"; |
40 | 41 | Xlist.iter test_strings (fun s -> |
41 | 42 | print_endline ("\nTEST: " ^ s); |
42 | - let paths = ENIAMsubsyntax.parse s in | |
43 | + let tokens = ENIAMsubsyntax.parse s in | |
43 | 44 | (* print_endline (ENIAMtokenizer.xml_of tokens); *) |
44 | - print_endline (ENIAMpaths.to_string (paths,0))); | |
45 | + print_endline (ENIAMpaths.to_string (tokens,0)); | |
46 | + ENIAMsubsyntaxGraphOf.print_tokens "results/" "test" tokens); | |
45 | 47 | print_endline "Testy wbudowane 2"; |
46 | 48 | Xlist.iter test_strings2 (fun s -> |
47 | 49 | print_endline ("\nTEST: " ^ s); |
... | ... | @@ -49,7 +51,8 @@ let _ = |
49 | 51 | (* print_endline (ENIAMtokenizer.xml_of tokens); *) |
50 | 52 | print_endline (ENIAMsubsyntaxStringOf.tokens tokens); |
51 | 53 | print_endline ""; |
52 | - print_endline (ENIAMsubsyntaxStringOf.text "" tokens text)); | |
54 | + print_endline (ENIAMsubsyntaxStringOf.text "" tokens text); | |
55 | + ENIAMsubsyntaxHTMLof.print_html_text "results/" "test" tokens text); | |
53 | 56 | (* print_endline "Testy użytkownika."; |
54 | 57 | print_endline "Wpisz tekst i naciśnij ENTER, pusty tekst kończy."; |
55 | 58 | let s = ref (read_line ()) in |
... | ... |
tokenizer/ENIAMtokens.ml
... | ... | @@ -91,6 +91,11 @@ let get_orth = function |
91 | 91 | | Interp orth -> orth |
92 | 92 | | _ -> ""(*failwith "get_orth"*) |
93 | 93 | |
94 | +let rec get_lemma = function | |
95 | + ENIAMtokenizerTypes.Interp orth -> orth | |
96 | + | ENIAMtokenizerTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat | |
97 | + | ENIAMtokenizerTypes.Proper(lemma,cat,_,_) -> lemma ^ "\n" ^ cat | |
98 | + | _ -> "" | |
94 | 99 | |
95 | 100 | let months = StringSet.of_list ["1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; "10"; "11"; "12"] |
96 | 101 | let hours = StringSet.of_list ["0"; "1"; "2"; "3"; "4"; "5"; "6"; "7"; "8"; "9"; "00"; "01"; "02"; "03"; "04"; "05"; "06"; "07"; "08"; "09"; |
... | ... |