Commit 94377238f7e85dc691625ff9e2d93f1e780d80b0
1 parent
82a1d016
Testy i poprawki
Showing
30 changed files
with
140 additions
and
49 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
... | ... | @@ -165,7 +165,7 @@ let make_rules x_flag filename = |
165 | 165 | dict_of_grammar lexicon |
166 | 166 | |
167 | 167 | let find_rules rules cats = |
168 | - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category " ^ cats.pos) in | |
168 | + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in | |
169 | 169 | (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) |
170 | 170 | let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in |
171 | 171 | Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> |
... | ... |
LCGlexicon/ENIAM_LCGlexiconParser.ml
... | ... | @@ -95,6 +95,7 @@ let match_relation = function |
95 | 95 | |
96 | 96 | let rec split_mid i0 rev = function |
97 | 97 | [i,s] -> List.rev ((i,s) :: rev) |
98 | + | (i1,s) :: (i2,"|") :: (i3,"|") :: l -> raise (ParseError("split_mid", "duplicated delimeter found", i2)) | |
98 | 99 | | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l |
99 | 100 | | [] -> raise (ParseError("split_mid", "empty", i0)) |
100 | 101 | | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) |
... | ... |
LCGlexicon/ENIAMcategoriesPL.ml
... | ... | @@ -400,8 +400,8 @@ let clarify_categories proper cat proj = function |
400 | 400 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}] |
401 | 401 | | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] |
402 | 402 | | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] |
403 | - | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"}] | |
404 | - | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"}] | |
403 | + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; proj=proj}] | |
404 | + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; proj=proj*)}] | |
405 | 405 | | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] |
406 | 406 | | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] |
407 | 407 | | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] |
... | ... | @@ -617,7 +617,7 @@ let pos_categories = Xlist.fold [ |
617 | 617 | "part",[Lemma;]; |
618 | 618 | "comp",[Lemma;];(* ctype *) |
619 | 619 | "conj",[Lemma;];(* ctype *) |
620 | - "interj",[Lemma;]; | |
620 | + "interj",[Lemma;Cat;Proj;]; | |
621 | 621 | "sinterj",[Lemma;]; |
622 | 622 | "burk",[Lemma;]; |
623 | 623 | "interp",[Lemma;]; |
... | ... |
LCGlexicon/resources/subst_container.dat
LCGparser/ENIAM_LCG_XMLof.ml
... | ... | @@ -40,7 +40,7 @@ let rec linear_term = function |
40 | 40 | | App(s,t) -> Xml.Element("App",[],[linear_term s;linear_term t]) |
41 | 41 | | Dot -> Xml.Element("Dot",[],[]) |
42 | 42 | | Val s -> Xml.Element("Val",[],[Xml.PCData s]) |
43 | - | SetAttr(e,s,t) -> Xml.Element("SetAttr",["label",v],[linear_term s;linear_term t]) | |
43 | + | SetAttr(e,s,t) -> Xml.Element("SetAttr",["label",e],[linear_term s;linear_term t]) | |
44 | 44 | | Fix(s,t) -> Xml.Element("Fix",[],[linear_term s;linear_term t]) |
45 | 45 | | Empty t -> Xml.Element("Empty",[],[linear_term t]) |
46 | 46 | | Apply t -> Xml.Element("Apply",[],[linear_term t]) |
... | ... | @@ -59,4 +59,3 @@ let linear_term_array a = |
59 | 59 | let l = Int.fold 0 (Array.length a - 1) [] (fun l i -> |
60 | 60 | Xml.Element("element",["index",string_of_int i],[linear_term a.(i)]) :: l) in |
61 | 61 | Xml.Element("array",[],List.rev l) |
62 | - | |
... | ... |
LCGparser/makefile
... | ... | @@ -6,21 +6,21 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES= ENIAM_LCGtypes.ml ENIAM_LCGstringOf.ml ENIAM_LCGrules.ml ENIAM_LCGrenderer.ml ENIAM_LCGchart.ml ENIAM_LCGlatexOf.ml ENIAM_LCGreductions.ml ENIAM_LCGgraphOf.ml | |
9 | +SOURCES= ENIAM_LCGtypes.ml ENIAM_LCGstringOf.ml ENIAM_LCGrules.ml ENIAM_LCGrenderer.ml ENIAM_LCGchart.ml ENIAM_LCGlatexOf.ml ENIAM_LCGreductions.ml ENIAM_LCGgraphOf.ml ENIAM_LCG_XMLof.ml | |
10 | 10 | |
11 | 11 | all: eniam-lcg-parser.cma eniam-lcg-parser.cmxa |
12 | 12 | |
13 | 13 | install: all |
14 | 14 | mkdir -p $(INSTALLDIR) |
15 | 15 | cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) |
16 | - cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi $(INSTALLDIR) | |
17 | - cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx $(INSTALLDIR) | |
16 | + cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi ENIAM_LCG_XMLof.cmi $(INSTALLDIR) | |
17 | + cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx ENIAM_LCG_XMLof.cmx $(INSTALLDIR) | |
18 | 18 | |
19 | 19 | install-local: all |
20 | 20 | mkdir -p $(INSTALLDIR) |
21 | 21 | cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) |
22 | - cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi $(INSTALLDIR) | |
23 | - cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx $(INSTALLDIR) | |
22 | + cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi ENIAM_LCG_XMLof.cmi $(INSTALLDIR) | |
23 | + cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx ENIAM_LCG_XMLof.cmx $(INSTALLDIR) | |
24 | 24 | |
25 | 25 | eniam-lcg-parser.cma: $(SOURCES) |
26 | 26 | ocamlc -linkall -a -o eniam-lcg-parser.cma $(OCAMLFLAGS) $^ |
... | ... |
exec/ENIAMexec.ml
... | ... | @@ -33,6 +33,7 @@ let translate_mode = function |
33 | 33 | | ENIAMsubsyntaxTypes.Mate -> Mate |
34 | 34 | | ENIAMsubsyntaxTypes.Swigra -> Swigra |
35 | 35 | | ENIAMsubsyntaxTypes.POLFIE -> POLFIE |
36 | + | ENIAMsubsyntaxTypes.Error -> Error | |
36 | 37 | |
37 | 38 | let rec translate_sentence = function |
38 | 39 | ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s |
... | ... | @@ -53,6 +54,7 @@ let rec translate_paragraph = function |
53 | 54 | sentence=translate_sentence p.ENIAMsubsyntaxTypes.sentence})) |
54 | 55 | | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) -> |
55 | 56 | translate_mode mode, translate_paragraph paragraph)) |
57 | + | ENIAMsubsyntaxTypes.ErrorParagraph s -> ErrorParagraph s | |
56 | 58 | |
57 | 59 | let rec translate_text = function |
58 | 60 | ENIAMsubsyntaxTypes.RawText s -> RawText s |
... | ... | @@ -64,7 +66,9 @@ let rec translate_text = function |
64 | 66 | let clarify_categories cats token = |
65 | 67 | match token.ENIAMtokenizerTypes.token with |
66 | 68 | ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> |
67 | - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) | |
69 | + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> | |
70 | + (* Printf.printf "lemma=%s pos=%s cat=%s proj=%s\n%!" lemma pos cat (String.concat "," proj); *) | |
71 | + ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) | |
68 | 72 | | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> |
69 | 73 | List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories true cat proj (lemma,pos,interp))))) |
70 | 74 | | ENIAMtokenizerTypes.Interp lemma -> |
... | ... |
exec/ENIAMexecTypes.ml
... | ... | @@ -78,7 +78,7 @@ type semantic_processing_result = { |
78 | 78 | } |
79 | 79 | *) |
80 | 80 | type mode = |
81 | - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | |
81 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error | |
82 | 82 | |
83 | 83 | type sentence = |
84 | 84 | RawSentence of string |
... | ... | @@ -98,6 +98,7 @@ and paragraph = |
98 | 98 | RawParagraph of string |
99 | 99 | | StructParagraph of paragraph_record list (* zdania *) |
100 | 100 | | AltParagraph of (mode * paragraph) list |
101 | + | ErrorParagraph of string | |
101 | 102 | |
102 | 103 | type text = |
103 | 104 | RawText of string |
... | ... | @@ -267,6 +268,7 @@ let rec map_paragraph mode f = function |
267 | 268 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
268 | 269 | mode, map_paragraph mode f paragraph) in |
269 | 270 | AltParagraph(List.rev l) |
271 | + | ErrorParagraph s -> ErrorParagraph s | |
270 | 272 | |
271 | 273 | let rec map_text mode f = function |
272 | 274 | RawText s -> RawText s |
... | ... | @@ -295,6 +297,7 @@ let rec fold_paragraph mode s f = function |
295 | 297 | | AltParagraph l -> |
296 | 298 | Xlist.fold l s (fun s (mode,paragraph) -> |
297 | 299 | fold_paragraph mode s f paragraph) |
300 | + | ErrorParagraph _ -> s | |
298 | 301 | |
299 | 302 | let rec fold_text mode s f = function |
300 | 303 | RawText _ -> s |
... | ... |
exec/ENIAMexecXMLof.ml
... | ... | @@ -64,6 +64,7 @@ let rec paragraph m = function |
64 | 64 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> |
65 | 65 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) |
66 | 66 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMvisualization.string_of_mode m) t)) |
67 | + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) | |
67 | 68 | |
68 | 69 | let rec text m = function |
69 | 70 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) |
... | ... |
exec/ENIAMselectSent.ml
... | ... | @@ -69,6 +69,7 @@ let rec select_sentence_modes_paragraph = function |
69 | 69 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
70 | 70 | mode, select_sentence_modes_paragraph paragraph) in |
71 | 71 | AltParagraph(List.rev l) |
72 | + | ErrorParagraph s -> ErrorParagraph s | |
72 | 73 | |
73 | 74 | let rec select_sentence_modes_text = function |
74 | 75 | RawText s -> RawText s |
... | ... | @@ -148,6 +149,7 @@ let rec select_sentences_paragraph mode = function |
148 | 149 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
149 | 150 | mode, select_sentences_paragraph mode paragraph) in |
150 | 151 | AltParagraph(List.rev l) |
152 | + | ErrorParagraph s -> ErrorParagraph s | |
151 | 153 | |
152 | 154 | let rec select_sentences_text mode = function |
153 | 155 | RawText s -> RawText s |
... | ... |
exec/ENIAMvisualization.ml
... | ... | @@ -667,6 +667,7 @@ let string_of_mode = function |
667 | 667 | | Mate -> "Mate" |
668 | 668 | | Swigra -> "Swigra" |
669 | 669 | | POLFIE -> "POLFIE" |
670 | + | Error -> "Error" | |
670 | 671 | (* |
671 | 672 | (*let rec string_of_sentence = function |
672 | 673 | RawSentence s -> sprintf "RawSentence(%s)" s |
... | ... | @@ -1065,9 +1066,27 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam |
1065 | 1066 | sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ |
1066 | 1067 | "" |
1067 | 1068 | | SemNotValidated -> |
1069 | + if verbosity < 2 then () else ( | |
1070 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b; | |
1071 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9; | |
1072 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a3" result.dependency_tree6b; | |
1073 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a2" result.dependency_tree7; | |
1074 | + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; | |
1075 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; | |
1076 | + ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10; | |
1077 | + ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); | |
1068 | 1078 | if verbosity = 0 then () else ( |
1069 | 1079 | ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_12_semantic_graph") "" result.semantic_graph12); |
1070 | 1080 | sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ |
1081 | + (if verbosity < 2 then "" else | |
1082 | + sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^ | |
1083 | + sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^ | |
1084 | + sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^ | |
1085 | + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^ | |
1086 | + sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix ^ | |
1087 | + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix ^ | |
1088 | + sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^ | |
1089 | + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ | |
1071 | 1090 | (if verbosity = 0 then "" else |
1072 | 1091 | sprintf "<BR><IMG SRC=\"%s_12_semantic_graph.png\">\n" file_prefix) ^ |
1073 | 1092 | "" |
... | ... | @@ -1174,6 +1193,7 @@ let file_prefix_of_mode = function |
1174 | 1193 | | Mate -> "M" |
1175 | 1194 | | Swigra -> "S" |
1176 | 1195 | | POLFIE -> "P" |
1196 | + | Error -> "Er" | |
1177 | 1197 | |
1178 | 1198 | let rec html_of_sentence path file_prefix mode img verbosity tokens = function |
1179 | 1199 | RawSentence s -> escape_html s |
... | ... | @@ -1204,6 +1224,7 @@ let rec html_of_paragraph path mode img verbosity tokens = function |
1204 | 1224 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> |
1205 | 1225 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path mode img verbosity tokens paragraph))) ^ |
1206 | 1226 | "</table>" |
1227 | + | ErrorParagraph s -> sprintf "<font color=\"red\">subsyntax_error</font>: %s\n" (escape_html s) | |
1207 | 1228 | |
1208 | 1229 | let rec html_of_text path mode img verbosity tokens = function |
1209 | 1230 | RawText s -> escape_html s |
... | ... | @@ -1236,6 +1257,7 @@ let rec find_prev_next_paragraph rev = function |
1236 | 1257 | | StructParagraph sentences -> |
1237 | 1258 | Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.id p.file_prefix rev p.sentence) |
1238 | 1259 | | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph) |
1260 | + | ErrorParagraph s -> rev | |
1239 | 1261 | |
1240 | 1262 | let rec make_prev_next_map map prev = function |
1241 | 1263 | [x] -> StringMap.add map x (prev,"") |
... | ... | @@ -1295,6 +1317,7 @@ let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = f |
1295 | 1317 | | StructParagraph sentences -> |
1296 | 1318 | Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence) |
1297 | 1319 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph) |
1320 | + | ErrorParagraph s -> print_not_parsed_main_result cg_bin_path path id "Er" s 0 prev_next_map | |
1298 | 1321 | |
1299 | 1322 | let rec print_main_result_text cg_bin_path path id tokens = function |
1300 | 1323 | RawText s -> () |
... | ... | @@ -1354,6 +1377,7 @@ let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_n |
1354 | 1377 | let p = List.hd sentences in |
1355 | 1378 | print_main_result_first_page_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence |
1356 | 1379 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph) |
1380 | + | ErrorParagraph s -> print_not_parsed_main_result cg_bin_path path id "Er" s 0 prev_next_map | |
1357 | 1381 | |
1358 | 1382 | let rec print_main_result_first_page_text cg_bin_path path id tokens = function |
1359 | 1383 | RawText s -> () |
... | ... |
integration/ENIAMpreIntegration.ml
... | ... | @@ -331,6 +331,7 @@ let rec parse_paragraph mode tokens = function |
331 | 331 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
332 | 332 | mode, parse_paragraph mode tokens paragraph) in |
333 | 333 | AltParagraph(List.rev l) |
334 | + | ErrorParagraph s -> ErrorParagraph s | |
334 | 335 | |
335 | 336 | let rec parse_text mode tokens = function |
336 | 337 | RawText s -> RawText s |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -124,6 +124,7 @@ let rec split_tokens_into_groups_paragraph a = function |
124 | 124 | Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) |
125 | 125 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> |
126 | 126 | split_tokens_into_groups_paragraph a paragraph) |
127 | + | ErrorParagraph s -> () | |
127 | 128 | |
128 | 129 | let rec split_tokens_into_groups_text a = function |
129 | 130 | RawText s -> () |
... | ... | @@ -461,7 +462,7 @@ let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function |
461 | 462 | let id = ExtArray.add tokens empty_token_env in |
462 | 463 | let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in |
463 | 464 | let id2 = ExtArray.add lex_sems lex_sem in |
464 | - if id <>id2 then failwith "create_tokens_for_artificial_nodes_rec" else | |
465 | + if id <> id2 then failwith "create_tokens_for_artificial_nodes_rec: tokens inconsistent with lex_sems" else | |
465 | 466 | let t = if t.symbol = Dot then |
466 | 467 | {t with symbol = match t.pos with |
467 | 468 | "<root>" -> Tuple[Val "<root>"] |
... | ... |
lexSemantics/ENIAMvalence.ml
... | ... | @@ -235,6 +235,12 @@ let transform_qub_pos lemma = function |
235 | 235 | | QUB as morf -> [morf] |
236 | 236 | | pos -> failwith ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) |
237 | 237 | |
238 | +let transform_interj_phrase lemma = function | |
239 | + | phrase -> failwith ("transform_interj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) | |
240 | + | |
241 | +let transform_interj_pos lemma = function | |
242 | + | pos -> failwith ("transform_interj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) | |
243 | + | |
238 | 244 | let transform_siebie_phrase lemma = function |
239 | 245 | | phrase -> failwith ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) |
240 | 246 | |
... | ... | @@ -363,31 +369,31 @@ let transform_preps morf = |
363 | 369 | | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) |
364 | 370 | | PrepNP(psem,prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(psem,prep,c) |
365 | 371 | | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) |
366 | - | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf | |
372 | + | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps 1" else morf | |
367 | 373 | | morf -> morf in |
368 | 374 | match morf with |
369 | 375 | | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) |
370 | - | ComparP _ -> failwith "transform_preps" | |
376 | + | ComparP _ -> failwith "transform_preps 2" | |
371 | 377 | | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) |
372 | 378 | | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) |
373 | 379 | | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] |
374 | 380 | | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] |
375 | - | LexArg(id,lex,COMPAR _) -> failwith "transform_preps" | |
376 | - | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps" | |
381 | + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps 3" | |
382 | + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps 4" | |
377 | 383 | | PrepNP(sem,"per",Str) -> [PrepNP(sem,"per",Case "nom");PrepNP(sem,"per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) |
378 | 384 | | PrepNP(_,_,Case _) as morf -> [morf] |
379 | 385 | | PrepAdjP(_,Case _) as morf -> [morf] |
380 | 386 | | PrepNCP(_,Case _,_,_) as morf -> [morf] |
381 | - | PrepNP(_,"_",CaseUndef) as morf -> [morf] | |
382 | - | PrepNP _ -> failwith "transform_preps" | |
383 | - | PrepAdjP _ -> failwith "transform_preps" | |
384 | - | PrepNCP _ -> failwith "transform_preps" | |
387 | + | PrepNP(_,_,CaseUndef) as morf -> [morf] | |
388 | + | PrepNP _ as morf -> failwith ("transform_preps 5: " ^ ENIAMwalStringOf.phrase morf) | |
389 | + | PrepAdjP _ -> failwith "transform_preps 6" | |
390 | + | PrepNCP _ -> failwith "transform_preps 7" | |
385 | 391 | | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] |
386 | 392 | | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] |
387 | 393 | | LexArg(id,lex,PREP (Case _)) as morf -> [morf] |
388 | 394 | | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] |
389 | - | LexArg(id,lex,PREP _) -> failwith "transform_preps" | |
390 | - | SimpleLexArg(lex,PREP _) -> failwith "transform_preps" | |
395 | + | LexArg(id,lex,PREP _) -> failwith "transform_preps 8" | |
396 | + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps 9" | |
391 | 397 | | morf -> [morf] |
392 | 398 | |
393 | 399 | let transform_pers_schema lemma negation mood schema = |
... | ... | @@ -488,6 +494,7 @@ let transform_schema pos lemma schema = |
488 | 494 | | "comp" -> transform_comp_phrase,transform_comp_pos |
489 | 495 | | "qub" -> transform_qub_phrase,transform_qub_pos |
490 | 496 | | "siebie" -> transform_siebie_phrase,transform_siebie_pos |
497 | + | "interj" -> transform_interj_phrase,transform_interj_pos | |
491 | 498 | | _ -> failwith "transform_schema" |
492 | 499 | in |
493 | 500 | Xlist.map schema (fun s -> |
... | ... | @@ -539,6 +546,9 @@ let transform_entry pos lemma negation pred aspect schema = |
539 | 546 | if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); |
540 | 547 | Xlist.map ["congr";"rec"] (fun acm -> |
541 | 548 | [Acm,Eq,[acm]],transform_num_schema acm schema)) else |
549 | + if pos = "interj" then ( | |
550 | + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 6"); | |
551 | + [[],transform_schema "interj" lemma schema]) else | |
542 | 552 | List.flatten (Xlist.map (expand_negation negation) (fun negation -> |
543 | 553 | let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in |
544 | 554 | if pos = "fin" || pos = "bedzie" then |
... | ... |
lexSemantics/ENIAMwalReduce.ml
... | ... | @@ -90,6 +90,7 @@ let select_comprep_adjuncts lexemes = |
90 | 90 | not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l) |
91 | 91 | with Not_found -> l) |
92 | 92 | |
93 | +(* FIXME: trzeba zanalizować interację tej procedury z Pro w schemacie w wersji z walentym i z semantyką dziedzinową *) | |
93 | 94 | let set_necessary pos schema = |
94 | 95 | Xlist.map schema (fun p -> |
95 | 96 | let nec = |
... | ... | @@ -101,6 +102,8 @@ let set_necessary pos schema = |
101 | 102 | | _ -> b) then Req else |
102 | 103 | if p.gf <> SUBJ && p.cr = [] (*&& p.ce = []*) then Opt else |
103 | 104 | if p.gf = SUBJ && pos = "impt" then ProNG else |
105 | + if p.gf = SUBJ && pos = "pact" then Opt else | |
106 | + if p.gf = OBJ && pos = "ppas" then Opt else | |
104 | 107 | if Xlist.fold p.morfs false (fun b -> function |
105 | 108 | NP NomAgr -> true |
106 | 109 | | NCP(NomAgr,_,_) -> true |
... | ... |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -183,9 +183,11 @@ let render_phrase_cat cat = function |
183 | 183 | | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] |
184 | 184 | | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] |
185 | 185 | | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] |
186 | + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] | |
186 | 187 | | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] |
187 | 188 | | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] |
188 | 189 | | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] |
190 | + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] | |
189 | 191 | | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] |
190 | 192 | | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] |
191 | 193 | (* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*) |
... | ... |
morphology/resources/alt_supplement.tab
... | ... | @@ -4,4 +4,12 @@ sobie siebie siebie:dat.loc |
4 | 4 | sobą siebie siebie:inst |
5 | 5 | to to pred |
6 | 6 | yay yay interj |
7 | +świetnie świetnie interj | |
8 | +doskonale doskonale interj | |
9 | +idealnie idealnie interj | |
10 | +zdecydowanie zdecydowanie interj | |
11 | +ok ok interj | |
12 | +super super interj | |
13 | +dobrze dobrze interj | |
14 | +dzięki dzięki interj | |
7 | 15 | |
... | ... |
semantics/ENIAMsemGraph.ml
... | ... | @@ -25,7 +25,7 @@ let empty_concept = |
25 | 25 | {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list; |
26 | 26 | c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""} |
27 | 27 | |
28 | -let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot} | |
28 | +let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot; cx_label=""; cx_def_label=""} | |
29 | 29 | |
30 | 30 | let rec make_args_list = function |
31 | 31 | Tuple l -> List.flatten (Xlist.map l make_args_list) |
... | ... | @@ -154,7 +154,7 @@ let create_normal_concept tokens lex_sems t cat proj = |
154 | 154 | | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
155 | 155 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
156 | 156 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
157 | - let _ = ExtArray.add lex_sems in | |
157 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | |
158 | 158 | let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in |
159 | 159 | make_relation t (Context cx) else |
160 | 160 | if t.pos = "adj" || t.pos = "adjc" || t.pos = "adjp" || t.pos = "adja" || t.pos = "pact" || t.pos = "ppas" || t.pos = "apron" || t.pos = "ordnum" || t.pos = "roman-adj" then |
... | ... | @@ -249,12 +249,15 @@ let create_normal_concept tokens lex_sems t cat proj = |
249 | 249 | if t.pos = "comp" then |
250 | 250 | make_relation t (SetContextName(c.c_sense,RemoveRelation("CORE","",c.c_relations))) else |
251 | 251 | if t.pos = "conj" then |
252 | - let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos} in | |
252 | + let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat; cx_def_label=c.c_def_label; cx_label=c.c_label} in | |
253 | 253 | let c = Xlist.fold t.attrs c (fun c -> function |
254 | 254 | | "NUM",_ -> c |
255 | 255 | | "CASE",_ -> c |
256 | 256 | | "GEND",_ -> c |
257 | 257 | | "PERS",_ -> c |
258 | + | "ASPECT",_ -> c | |
259 | + | "controller",_ -> c | |
260 | + | "controllee",_ -> c | |
258 | 261 | | e,t -> failwith ("create_normal_concept conj: " ^ e)) in |
259 | 262 | ManageCoordination({t with attrs=[]; args=Dot},Context c) else |
260 | 263 | (* if t.pos = "interj" then |
... | ... | @@ -264,7 +267,10 @@ let create_normal_concept tokens lex_sems t cat proj = |
264 | 267 | if t.pos = "sinterj" || t.pos = "interj" then |
265 | 268 | let c = Xlist.fold t.attrs c (fun c -> function |
266 | 269 | | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in |
267 | - make_relation t (Concept c) else | |
270 | + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in | |
271 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | |
272 | + let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | |
273 | + make_relation t (Context cx) else | |
268 | 274 | if t.lemma = "<root>" then t.args else |
269 | 275 | if t.lemma = "<merge>" then RemoveRelation("null","",t.args) else |
270 | 276 | if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else |
... | ... | @@ -511,7 +517,7 @@ let rec reduce_tree = function |
511 | 517 | (match reduce_tree t with |
512 | 518 | Relation(r,a,t) -> |
513 | 519 | if (r = r0 && a = a0) || r0 = "" then t else |
514 | - Context{empty_context with cx_contents= | |
520 | + Context{empty_context with cx_cat=Val "Situation"; cx_contents= | |
515 | 521 | Concept{empty_concept with c_relations=Relation(r,a,t)}; (*cx_variable=string_of_int id,""; cx_pos=c.c_pos*)} |
516 | 522 | (* | TripleRelation(r,a,s,t) -> |
517 | 523 | Context{empty_context with cx_contents= |
... | ... | @@ -625,6 +631,7 @@ let rec simplify_tree = function |
625 | 631 | let l = Xlist.fold l [] (fun l t -> |
626 | 632 | match simplify_tree t with |
627 | 633 | Dot -> l |
634 | + | Tuple l2 -> l2 @ l | |
628 | 635 | | t -> t :: l) in |
629 | 636 | (match l with |
630 | 637 | [] -> Dot |
... | ... | @@ -655,7 +662,8 @@ let rec simplify_tree = function |
655 | 662 | c_cat = simplify_tree (Variant(e,lt3))} |
656 | 663 | | Context c -> |
657 | 664 | let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function |
658 | - i,Context c2 -> if c.cx_sense = c2.cx_sense then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | |
665 | + i,Context c2 -> if c.cx_sense = c2.cx_sense && c.cx_label = c2.cx_label && | |
666 | + c.cx_def_label = c2.cx_def_label then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | |
659 | 667 | | _ -> raise Not_found) in |
660 | 668 | let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in |
661 | 669 | Context{c with |
... | ... |
semantics/ENIAMsemGraphOf.ml
... | ... | @@ -225,7 +225,9 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f |
225 | 225 | | Context t -> |
226 | 226 | let id = !id_counter in |
227 | 227 | incr id_counter; |
228 | - fprintf file " subgraph cluster%d {\nlabel=\"%s%s\"\n" id | |
228 | + fprintf file " subgraph cluster%d {\nlabel=\"%s%s%s%s\"\n" id | |
229 | + (if t.cx_label="" then "" else "?" ^ t.cx_label ^ " ") | |
230 | + (if t.cx_def_label="" then "" else "*" ^ t.cx_def_label ^ " ") | |
229 | 231 | (if t.cx_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_cat ^ " ")) |
230 | 232 | (if t.cx_sense = Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_sense)); |
231 | 233 | let iid = print_graph2_rec file false "" "" "" 0 t.cx_contents in |
... | ... |
semantics/ENIAMsemLatexOf.ml
... | ... | @@ -66,7 +66,7 @@ let rec linear_term c = function |
66 | 66 | | Context c -> |
67 | 67 | "{\\left[\\begin{array}{ll}" ^ |
68 | 68 | (String.concat "\\\\ " (Xlist.map ([ |
69 | - "SENSE",c.cx_sense;"CAT",c.cx_cat; | |
69 | + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; | |
70 | 70 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); |
71 | 71 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> |
72 | 72 | "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" |
... | ... |
semantics/ENIAMsemStringOf.ml
... | ... | @@ -43,7 +43,7 @@ let rec linear_term c = function |
43 | 43 | | Context c -> |
44 | 44 | "[" ^ |
45 | 45 | (String.concat "; " (Xlist.map ([ |
46 | - "SENSE",c.cx_sense;"CAT",c.cx_cat; | |
46 | + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; | |
47 | 47 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); |
48 | 48 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> |
49 | 49 | e ^ ": " ^ (linear_term 0 t)))) ^ "]" |
... | ... |
semantics/ENIAMsemTypes.ml
... | ... | @@ -57,7 +57,7 @@ and concept = |
57 | 57 | c_pos: int; c_cat: linear_term} |
58 | 58 | |
59 | 59 | and context = |
60 | - {cx_sense: linear_term; cx_contents: linear_term; | |
60 | + {cx_sense: linear_term; cx_contents: linear_term; cx_label: string; cx_def_label: string; | |
61 | 61 | cx_relations: linear_term; cx_variable: (string * string); cx_pos: int; cx_cat: linear_term} |
62 | 62 | |
63 | 63 | and linear_term = |
... | ... |
semantics/ENIAMsemValence.ml
... | ... | @@ -220,6 +220,19 @@ let string_of_arg arg = |
220 | 220 | let string_of_position p = |
221 | 221 | (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs) |
222 | 222 | |
223 | +let manage_arg p t = | |
224 | + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | |
225 | + let t = | |
226 | + if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then | |
227 | + SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) | |
228 | + else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) | |
229 | + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t | |
230 | + else failwith "manage_arg: ni 2" in | |
231 | + let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in | |
232 | + let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in | |
233 | + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | |
234 | + t | |
235 | + | |
223 | 236 | let rec match_arg_positions lemma arg rev = function |
224 | 237 | p :: positions -> |
225 | 238 | (* Printf.printf "match_arg_positions 1: arg=%s rev=[%s] positions=%s :: [%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)) (string_of_position p) (String.concat "; " (Xlist.map positions string_of_position)); *) |
... | ... | @@ -228,16 +241,13 @@ let rec match_arg_positions lemma arg rev = function |
228 | 241 | (match l with |
229 | 242 | [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions lemma arg (p :: rev) positions |
230 | 243 | | [t] -> |
231 | - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | |
232 | - let t = | |
233 | - if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then | |
234 | - SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) | |
235 | - else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) | |
236 | - else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t | |
237 | - else failwith "match_arg_positions: ni 2" in | |
238 | - let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in | |
239 | - let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in | |
240 | - let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | |
244 | + let t = manage_arg p t in | |
245 | + if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) | |
246 | + else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) | |
247 | + | [t1;t2] -> (* FIXME: przydałoby się to uogólnić na listę dowolnej długości *) | |
248 | + let t1 = manage_arg p t1 in | |
249 | + let t2 = manage_arg p t2 in | |
250 | + let t = Variant("",["1",t1;"2",t2]) in | |
241 | 251 | if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) |
242 | 252 | else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) |
243 | 253 | | _ -> failwith ("match_arg_positions: lemma=" ^ lemma ^ " arg=" ^ string_of_arg arg ^ " position=" ^ string_of_position p)) |
... | ... |
semantics/ENIAMsemXMLof.ml
... | ... | @@ -49,8 +49,8 @@ let rec linear_term = function |
49 | 49 | Xml.Element("relations",[],[linear_term c.c_relations]); |
50 | 50 | Xml.Element("cat",[],[linear_term c.c_cat])]) |
51 | 51 | | Context c -> |
52 | - Xml.Element("Context", | |
53 | - ["variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], | |
52 | + Xml.Element("Context",["label",c.cx_label;"def_label",c.cx_def_label; | |
53 | + "variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], | |
54 | 54 | [Xml.Element("sense",[],[linear_term c.cx_sense]); |
55 | 55 | Xml.Element("contents",[],[linear_term c.cx_contents]); |
56 | 56 | Xml.Element("relations",[],[linear_term c.cx_relations]); |
... | ... |
subsyntax/ENIAMsubsyntax.ml
... | ... | @@ -357,11 +357,14 @@ let parse_text_tokens tokens query = |
357 | 357 | let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in |
358 | 358 | let n = if Xlist.size paragraphs = 1 then 0 else 1 in |
359 | 359 | let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph -> |
360 | + try | |
360 | 361 | let paths = parse paragraph in |
361 | 362 | (* print_endline "parse_text 1"; *) |
362 | 363 | let pid = if n = 0 then "" else string_of_int n ^ "_" in |
363 | 364 | let sentences = ENIAMsentences.split_into_sentences pid paragraph tokens paths in |
364 | - (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in | |
365 | + (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1 | |
366 | + with e -> | |
367 | + (AltParagraph[Raw,RawParagraph paragraph; Error,ErrorParagraph (Printexc.to_string e)]) :: paragraphs, n+1) in | |
365 | 368 | AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs)], tokens |
366 | 369 | |
367 | 370 | let parse_text query = |
... | ... |
subsyntax/ENIAMsubsyntaxHTMLof.ml
... | ... | @@ -104,6 +104,7 @@ let rec html_of_paragraph tokens = function |
104 | 104 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> |
105 | 105 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_paragraph tokens paragraph))) ^ |
106 | 106 | "</table>" |
107 | + | ErrorParagraph s -> (*print_endline "ErrorParagraph";*) s | |
107 | 108 | |
108 | 109 | let rec html_of_text tokens = function |
109 | 110 | RawText s -> s |
... | ... |
subsyntax/ENIAMsubsyntaxStringOf.ml
... | ... | @@ -28,6 +28,7 @@ let mode = function |
28 | 28 | | Mate -> "Mate" |
29 | 29 | | Swigra -> "Swigra" |
30 | 30 | | POLFIE -> "POLFIE" |
31 | + | Error -> "Error" | |
31 | 32 | |
32 | 33 | let token_extarray t = |
33 | 34 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size t - 1) [] (fun l id -> |
... | ... | @@ -73,6 +74,7 @@ let rec paragraph spaces t = function |
73 | 74 | | AltParagraph l -> |
74 | 75 | String.concat "\n" (Xlist.map l (fun (m,p) -> |
75 | 76 | sprintf "%sAltParagraph mode=%s %s" spaces (mode m) (paragraph "" t p))) |
77 | + | ErrorParagraph s -> spaces ^ "ErrorParagraph: " ^ s | |
76 | 78 | |
77 | 79 | let rec text spaces t = function |
78 | 80 | RawText s -> spaces ^ "RawText: " ^ s |
... | ... |
subsyntax/ENIAMsubsyntaxTypes.ml
... | ... | @@ -20,7 +20,7 @@ |
20 | 20 | open ENIAMtokenizerTypes |
21 | 21 | |
22 | 22 | type mode = |
23 | - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | |
23 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error | |
24 | 24 | |
25 | 25 | type sentence = |
26 | 26 | RawSentence of string |
... | ... | @@ -38,6 +38,7 @@ and paragraph = |
38 | 38 | RawParagraph of string |
39 | 39 | | StructParagraph of sentence_env list (* zdania *) |
40 | 40 | | AltParagraph of (mode * paragraph) list |
41 | + | ErrorParagraph of string | |
41 | 42 | |
42 | 43 | type text = |
43 | 44 | RawText of string |
... | ... | @@ -81,6 +82,7 @@ let int_of_mode = function |
81 | 82 | | Mate -> 4 |
82 | 83 | | Swigra -> 5 |
83 | 84 | | POLFIE -> 6 |
85 | + | Error -> 7 | |
84 | 86 | |
85 | 87 | let compare_mode x y = |
86 | 88 | compare (int_of_mode x) (int_of_mode y) |
... | ... | @@ -109,6 +111,7 @@ let rec map_paragraph mode f = function |
109 | 111 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
110 | 112 | mode, map_paragraph mode f paragraph) in |
111 | 113 | AltParagraph(List.rev l) |
114 | + | ErrorParagraph s -> ErrorParagraph s | |
112 | 115 | |
113 | 116 | let rec map_text mode f = function |
114 | 117 | RawText s -> RawText s |
... | ... | @@ -137,6 +140,7 @@ let rec fold_paragraph mode s f = function |
137 | 140 | | AltParagraph l -> |
138 | 141 | Xlist.fold l s (fun s (mode,paragraph) -> |
139 | 142 | fold_paragraph mode s f paragraph) |
143 | + | ErrorParagraph _ -> s | |
140 | 144 | |
141 | 145 | let rec fold_text mode s f = function |
142 | 146 | RawText _ -> s |
... | ... |
subsyntax/ENIAMsubsyntaxXMLof.ml
... | ... | @@ -57,6 +57,7 @@ let rec paragraph m = function |
57 | 57 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> |
58 | 58 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) |
59 | 59 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMsubsyntaxStringOf.mode m) t)) |
60 | + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) | |
60 | 61 | |
61 | 62 | let rec text m = function |
62 | 63 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) |
... | ... |
subsyntax/resources/brev.tab
... | ... | @@ -708,7 +708,7 @@ wz. wzorowy subst:_:_:m3 |
708 | 708 | wz . wzorowy adj:_:_:_:pos |
709 | 709 | wzgl . względnie adv:pos |
710 | 710 | x raz subst:_:_:m3 |
711 | -X październik subst:_:_:m3 | |
711 | +#X październik subst:_:_:m3 | |
712 | 712 | x . książę subst:_:_:m1 |
713 | 713 | x . ksiądz subst:_:_:m1 |
714 | 714 | z . zeszyt subst:_:_:m3 |
... | ... |