Commit 94377238f7e85dc691625ff9e2d93f1e780d80b0
1 parent
82a1d016
Testy i poprawki
Showing
30 changed files
with
140 additions
and
49 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
@@ -165,7 +165,7 @@ let make_rules x_flag filename = | @@ -165,7 +165,7 @@ let make_rules x_flag filename = | ||
165 | dict_of_grammar lexicon | 165 | dict_of_grammar lexicon |
166 | 166 | ||
167 | let find_rules rules cats = | 167 | let find_rules rules cats = |
168 | - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category " ^ cats.pos) in | 168 | + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in |
169 | (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) | 169 | (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) |
170 | let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in | 170 | let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in |
171 | Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> | 171 | Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> |
LCGlexicon/ENIAM_LCGlexiconParser.ml
@@ -95,6 +95,7 @@ let match_relation = function | @@ -95,6 +95,7 @@ let match_relation = function | ||
95 | 95 | ||
96 | let rec split_mid i0 rev = function | 96 | let rec split_mid i0 rev = function |
97 | [i,s] -> List.rev ((i,s) :: rev) | 97 | [i,s] -> List.rev ((i,s) :: rev) |
98 | + | (i1,s) :: (i2,"|") :: (i3,"|") :: l -> raise (ParseError("split_mid", "duplicated delimeter found", i2)) | ||
98 | | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l | 99 | | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l |
99 | | [] -> raise (ParseError("split_mid", "empty", i0)) | 100 | | [] -> raise (ParseError("split_mid", "empty", i0)) |
100 | | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) | 101 | | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) |
LCGlexicon/ENIAMcategoriesPL.ml
@@ -400,8 +400,8 @@ let clarify_categories proper cat proj = function | @@ -400,8 +400,8 @@ let clarify_categories proper cat proj = function | ||
400 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}] | 400 | else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}] |
401 | | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] | 401 | | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] |
402 | | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] | 402 | | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] |
403 | - | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"}] | ||
404 | - | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"}] | 403 | + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; proj=proj}] |
404 | + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; proj=proj*)}] | ||
405 | | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] | 405 | | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] |
406 | | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] | 406 | | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] |
407 | | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] | 407 | | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] |
@@ -617,7 +617,7 @@ let pos_categories = Xlist.fold [ | @@ -617,7 +617,7 @@ let pos_categories = Xlist.fold [ | ||
617 | "part",[Lemma;]; | 617 | "part",[Lemma;]; |
618 | "comp",[Lemma;];(* ctype *) | 618 | "comp",[Lemma;];(* ctype *) |
619 | "conj",[Lemma;];(* ctype *) | 619 | "conj",[Lemma;];(* ctype *) |
620 | - "interj",[Lemma;]; | 620 | + "interj",[Lemma;Cat;Proj;]; |
621 | "sinterj",[Lemma;]; | 621 | "sinterj",[Lemma;]; |
622 | "burk",[Lemma;]; | 622 | "burk",[Lemma;]; |
623 | "interp",[Lemma;]; | 623 | "interp",[Lemma;]; |
LCGlexicon/resources/subst_container.dat
LCGparser/ENIAM_LCG_XMLof.ml
@@ -40,7 +40,7 @@ let rec linear_term = function | @@ -40,7 +40,7 @@ let rec linear_term = function | ||
40 | | App(s,t) -> Xml.Element("App",[],[linear_term s;linear_term t]) | 40 | | App(s,t) -> Xml.Element("App",[],[linear_term s;linear_term t]) |
41 | | Dot -> Xml.Element("Dot",[],[]) | 41 | | Dot -> Xml.Element("Dot",[],[]) |
42 | | Val s -> Xml.Element("Val",[],[Xml.PCData s]) | 42 | | Val s -> Xml.Element("Val",[],[Xml.PCData s]) |
43 | - | SetAttr(e,s,t) -> Xml.Element("SetAttr",["label",v],[linear_term s;linear_term t]) | 43 | + | SetAttr(e,s,t) -> Xml.Element("SetAttr",["label",e],[linear_term s;linear_term t]) |
44 | | Fix(s,t) -> Xml.Element("Fix",[],[linear_term s;linear_term t]) | 44 | | Fix(s,t) -> Xml.Element("Fix",[],[linear_term s;linear_term t]) |
45 | | Empty t -> Xml.Element("Empty",[],[linear_term t]) | 45 | | Empty t -> Xml.Element("Empty",[],[linear_term t]) |
46 | | Apply t -> Xml.Element("Apply",[],[linear_term t]) | 46 | | Apply t -> Xml.Element("Apply",[],[linear_term t]) |
@@ -59,4 +59,3 @@ let linear_term_array a = | @@ -59,4 +59,3 @@ let linear_term_array a = | ||
59 | let l = Int.fold 0 (Array.length a - 1) [] (fun l i -> | 59 | let l = Int.fold 0 (Array.length a - 1) [] (fun l i -> |
60 | Xml.Element("element",["index",string_of_int i],[linear_term a.(i)]) :: l) in | 60 | Xml.Element("element",["index",string_of_int i],[linear_term a.(i)]) :: l) in |
61 | Xml.Element("array",[],List.rev l) | 61 | Xml.Element("array",[],List.rev l) |
62 | - |
LCGparser/makefile
@@ -6,21 +6,21 @@ OCAMLFLAGS=$(INCLUDES) -g | @@ -6,21 +6,21 @@ OCAMLFLAGS=$(INCLUDES) -g | ||
6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa |
7 | INSTALLDIR=`ocamlc -where`/eniam | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | ||
9 | -SOURCES= ENIAM_LCGtypes.ml ENIAM_LCGstringOf.ml ENIAM_LCGrules.ml ENIAM_LCGrenderer.ml ENIAM_LCGchart.ml ENIAM_LCGlatexOf.ml ENIAM_LCGreductions.ml ENIAM_LCGgraphOf.ml | 9 | +SOURCES= ENIAM_LCGtypes.ml ENIAM_LCGstringOf.ml ENIAM_LCGrules.ml ENIAM_LCGrenderer.ml ENIAM_LCGchart.ml ENIAM_LCGlatexOf.ml ENIAM_LCGreductions.ml ENIAM_LCGgraphOf.ml ENIAM_LCG_XMLof.ml |
10 | 10 | ||
11 | all: eniam-lcg-parser.cma eniam-lcg-parser.cmxa | 11 | all: eniam-lcg-parser.cma eniam-lcg-parser.cmxa |
12 | 12 | ||
13 | install: all | 13 | install: all |
14 | mkdir -p $(INSTALLDIR) | 14 | mkdir -p $(INSTALLDIR) |
15 | cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) | 15 | cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) |
16 | - cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi $(INSTALLDIR) | ||
17 | - cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx $(INSTALLDIR) | 16 | + cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi ENIAM_LCG_XMLof.cmi $(INSTALLDIR) |
17 | + cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx ENIAM_LCG_XMLof.cmx $(INSTALLDIR) | ||
18 | 18 | ||
19 | install-local: all | 19 | install-local: all |
20 | mkdir -p $(INSTALLDIR) | 20 | mkdir -p $(INSTALLDIR) |
21 | cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) | 21 | cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) |
22 | - cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi $(INSTALLDIR) | ||
23 | - cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx $(INSTALLDIR) | 22 | + cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi ENIAM_LCG_XMLof.cmi $(INSTALLDIR) |
23 | + cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx ENIAM_LCG_XMLof.cmx $(INSTALLDIR) | ||
24 | 24 | ||
25 | eniam-lcg-parser.cma: $(SOURCES) | 25 | eniam-lcg-parser.cma: $(SOURCES) |
26 | ocamlc -linkall -a -o eniam-lcg-parser.cma $(OCAMLFLAGS) $^ | 26 | ocamlc -linkall -a -o eniam-lcg-parser.cma $(OCAMLFLAGS) $^ |
exec/ENIAMexec.ml
@@ -33,6 +33,7 @@ let translate_mode = function | @@ -33,6 +33,7 @@ let translate_mode = function | ||
33 | | ENIAMsubsyntaxTypes.Mate -> Mate | 33 | | ENIAMsubsyntaxTypes.Mate -> Mate |
34 | | ENIAMsubsyntaxTypes.Swigra -> Swigra | 34 | | ENIAMsubsyntaxTypes.Swigra -> Swigra |
35 | | ENIAMsubsyntaxTypes.POLFIE -> POLFIE | 35 | | ENIAMsubsyntaxTypes.POLFIE -> POLFIE |
36 | + | ENIAMsubsyntaxTypes.Error -> Error | ||
36 | 37 | ||
37 | let rec translate_sentence = function | 38 | let rec translate_sentence = function |
38 | ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s | 39 | ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s |
@@ -53,6 +54,7 @@ let rec translate_paragraph = function | @@ -53,6 +54,7 @@ let rec translate_paragraph = function | ||
53 | sentence=translate_sentence p.ENIAMsubsyntaxTypes.sentence})) | 54 | sentence=translate_sentence p.ENIAMsubsyntaxTypes.sentence})) |
54 | | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) -> | 55 | | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) -> |
55 | translate_mode mode, translate_paragraph paragraph)) | 56 | translate_mode mode, translate_paragraph paragraph)) |
57 | + | ENIAMsubsyntaxTypes.ErrorParagraph s -> ErrorParagraph s | ||
56 | 58 | ||
57 | let rec translate_text = function | 59 | let rec translate_text = function |
58 | ENIAMsubsyntaxTypes.RawText s -> RawText s | 60 | ENIAMsubsyntaxTypes.RawText s -> RawText s |
@@ -64,7 +66,9 @@ let rec translate_text = function | @@ -64,7 +66,9 @@ let rec translate_text = function | ||
64 | let clarify_categories cats token = | 66 | let clarify_categories cats token = |
65 | match token.ENIAMtokenizerTypes.token with | 67 | match token.ENIAMtokenizerTypes.token with |
66 | ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> | 68 | ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> |
67 | - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) | 69 | + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> |
70 | + (* Printf.printf "lemma=%s pos=%s cat=%s proj=%s\n%!" lemma pos cat (String.concat "," proj); *) | ||
71 | + ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) | ||
68 | | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> | 72 | | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> |
69 | List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories true cat proj (lemma,pos,interp))))) | 73 | List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories true cat proj (lemma,pos,interp))))) |
70 | | ENIAMtokenizerTypes.Interp lemma -> | 74 | | ENIAMtokenizerTypes.Interp lemma -> |
exec/ENIAMexecTypes.ml
@@ -78,7 +78,7 @@ type semantic_processing_result = { | @@ -78,7 +78,7 @@ type semantic_processing_result = { | ||
78 | } | 78 | } |
79 | *) | 79 | *) |
80 | type mode = | 80 | type mode = |
81 | - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | 81 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error |
82 | 82 | ||
83 | type sentence = | 83 | type sentence = |
84 | RawSentence of string | 84 | RawSentence of string |
@@ -98,6 +98,7 @@ and paragraph = | @@ -98,6 +98,7 @@ and paragraph = | ||
98 | RawParagraph of string | 98 | RawParagraph of string |
99 | | StructParagraph of paragraph_record list (* zdania *) | 99 | | StructParagraph of paragraph_record list (* zdania *) |
100 | | AltParagraph of (mode * paragraph) list | 100 | | AltParagraph of (mode * paragraph) list |
101 | + | ErrorParagraph of string | ||
101 | 102 | ||
102 | type text = | 103 | type text = |
103 | RawText of string | 104 | RawText of string |
@@ -267,6 +268,7 @@ let rec map_paragraph mode f = function | @@ -267,6 +268,7 @@ let rec map_paragraph mode f = function | ||
267 | let l = Xlist.rev_map l (fun (mode,paragraph) -> | 268 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
268 | mode, map_paragraph mode f paragraph) in | 269 | mode, map_paragraph mode f paragraph) in |
269 | AltParagraph(List.rev l) | 270 | AltParagraph(List.rev l) |
271 | + | ErrorParagraph s -> ErrorParagraph s | ||
270 | 272 | ||
271 | let rec map_text mode f = function | 273 | let rec map_text mode f = function |
272 | RawText s -> RawText s | 274 | RawText s -> RawText s |
@@ -295,6 +297,7 @@ let rec fold_paragraph mode s f = function | @@ -295,6 +297,7 @@ let rec fold_paragraph mode s f = function | ||
295 | | AltParagraph l -> | 297 | | AltParagraph l -> |
296 | Xlist.fold l s (fun s (mode,paragraph) -> | 298 | Xlist.fold l s (fun s (mode,paragraph) -> |
297 | fold_paragraph mode s f paragraph) | 299 | fold_paragraph mode s f paragraph) |
300 | + | ErrorParagraph _ -> s | ||
298 | 301 | ||
299 | let rec fold_text mode s f = function | 302 | let rec fold_text mode s f = function |
300 | RawText _ -> s | 303 | RawText _ -> s |
exec/ENIAMexecXMLof.ml
@@ -64,6 +64,7 @@ let rec paragraph m = function | @@ -64,6 +64,7 @@ let rec paragraph m = function | ||
64 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> | 64 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> |
65 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) | 65 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) |
66 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMvisualization.string_of_mode m) t)) | 66 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMvisualization.string_of_mode m) t)) |
67 | + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) | ||
67 | 68 | ||
68 | let rec text m = function | 69 | let rec text m = function |
69 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) | 70 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) |
exec/ENIAMselectSent.ml
@@ -69,6 +69,7 @@ let rec select_sentence_modes_paragraph = function | @@ -69,6 +69,7 @@ let rec select_sentence_modes_paragraph = function | ||
69 | let l = Xlist.rev_map l (fun (mode,paragraph) -> | 69 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
70 | mode, select_sentence_modes_paragraph paragraph) in | 70 | mode, select_sentence_modes_paragraph paragraph) in |
71 | AltParagraph(List.rev l) | 71 | AltParagraph(List.rev l) |
72 | + | ErrorParagraph s -> ErrorParagraph s | ||
72 | 73 | ||
73 | let rec select_sentence_modes_text = function | 74 | let rec select_sentence_modes_text = function |
74 | RawText s -> RawText s | 75 | RawText s -> RawText s |
@@ -148,6 +149,7 @@ let rec select_sentences_paragraph mode = function | @@ -148,6 +149,7 @@ let rec select_sentences_paragraph mode = function | ||
148 | let l = Xlist.rev_map l (fun (mode,paragraph) -> | 149 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
149 | mode, select_sentences_paragraph mode paragraph) in | 150 | mode, select_sentences_paragraph mode paragraph) in |
150 | AltParagraph(List.rev l) | 151 | AltParagraph(List.rev l) |
152 | + | ErrorParagraph s -> ErrorParagraph s | ||
151 | 153 | ||
152 | let rec select_sentences_text mode = function | 154 | let rec select_sentences_text mode = function |
153 | RawText s -> RawText s | 155 | RawText s -> RawText s |
exec/ENIAMvisualization.ml
@@ -667,6 +667,7 @@ let string_of_mode = function | @@ -667,6 +667,7 @@ let string_of_mode = function | ||
667 | | Mate -> "Mate" | 667 | | Mate -> "Mate" |
668 | | Swigra -> "Swigra" | 668 | | Swigra -> "Swigra" |
669 | | POLFIE -> "POLFIE" | 669 | | POLFIE -> "POLFIE" |
670 | + | Error -> "Error" | ||
670 | (* | 671 | (* |
671 | (*let rec string_of_sentence = function | 672 | (*let rec string_of_sentence = function |
672 | RawSentence s -> sprintf "RawSentence(%s)" s | 673 | RawSentence s -> sprintf "RawSentence(%s)" s |
@@ -1065,9 +1066,27 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam | @@ -1065,9 +1066,27 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam | ||
1065 | sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ | 1066 | sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ |
1066 | "" | 1067 | "" |
1067 | | SemNotValidated -> | 1068 | | SemNotValidated -> |
1069 | + if verbosity < 2 then () else ( | ||
1070 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b; | ||
1071 | + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9; | ||
1072 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a3" result.dependency_tree6b; | ||
1073 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a2" result.dependency_tree7; | ||
1074 | + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; | ||
1075 | + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; | ||
1076 | + ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10; | ||
1077 | + ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); | ||
1068 | if verbosity = 0 then () else ( | 1078 | if verbosity = 0 then () else ( |
1069 | ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_12_semantic_graph") "" result.semantic_graph12); | 1079 | ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_12_semantic_graph") "" result.semantic_graph12); |
1070 | sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ | 1080 | sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ |
1081 | + (if verbosity < 2 then "" else | ||
1082 | + sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^ | ||
1083 | + sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^ | ||
1084 | + sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^ | ||
1085 | + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^ | ||
1086 | + sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix ^ | ||
1087 | + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix ^ | ||
1088 | + sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^ | ||
1089 | + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ | ||
1071 | (if verbosity = 0 then "" else | 1090 | (if verbosity = 0 then "" else |
1072 | sprintf "<BR><IMG SRC=\"%s_12_semantic_graph.png\">\n" file_prefix) ^ | 1091 | sprintf "<BR><IMG SRC=\"%s_12_semantic_graph.png\">\n" file_prefix) ^ |
1073 | "" | 1092 | "" |
@@ -1174,6 +1193,7 @@ let file_prefix_of_mode = function | @@ -1174,6 +1193,7 @@ let file_prefix_of_mode = function | ||
1174 | | Mate -> "M" | 1193 | | Mate -> "M" |
1175 | | Swigra -> "S" | 1194 | | Swigra -> "S" |
1176 | | POLFIE -> "P" | 1195 | | POLFIE -> "P" |
1196 | + | Error -> "Er" | ||
1177 | 1197 | ||
1178 | let rec html_of_sentence path file_prefix mode img verbosity tokens = function | 1198 | let rec html_of_sentence path file_prefix mode img verbosity tokens = function |
1179 | RawSentence s -> escape_html s | 1199 | RawSentence s -> escape_html s |
@@ -1204,6 +1224,7 @@ let rec html_of_paragraph path mode img verbosity tokens = function | @@ -1204,6 +1224,7 @@ let rec html_of_paragraph path mode img verbosity tokens = function | ||
1204 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> | 1224 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> |
1205 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path mode img verbosity tokens paragraph))) ^ | 1225 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path mode img verbosity tokens paragraph))) ^ |
1206 | "</table>" | 1226 | "</table>" |
1227 | + | ErrorParagraph s -> sprintf "<font color=\"red\">subsyntax_error</font>: %s\n" (escape_html s) | ||
1207 | 1228 | ||
1208 | let rec html_of_text path mode img verbosity tokens = function | 1229 | let rec html_of_text path mode img verbosity tokens = function |
1209 | RawText s -> escape_html s | 1230 | RawText s -> escape_html s |
@@ -1236,6 +1257,7 @@ let rec find_prev_next_paragraph rev = function | @@ -1236,6 +1257,7 @@ let rec find_prev_next_paragraph rev = function | ||
1236 | | StructParagraph sentences -> | 1257 | | StructParagraph sentences -> |
1237 | Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.id p.file_prefix rev p.sentence) | 1258 | Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.id p.file_prefix rev p.sentence) |
1238 | | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph) | 1259 | | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph) |
1260 | + | ErrorParagraph s -> rev | ||
1239 | 1261 | ||
1240 | let rec make_prev_next_map map prev = function | 1262 | let rec make_prev_next_map map prev = function |
1241 | [x] -> StringMap.add map x (prev,"") | 1263 | [x] -> StringMap.add map x (prev,"") |
@@ -1295,6 +1317,7 @@ let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = f | @@ -1295,6 +1317,7 @@ let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = f | ||
1295 | | StructParagraph sentences -> | 1317 | | StructParagraph sentences -> |
1296 | Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence) | 1318 | Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence) |
1297 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph) | 1319 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph) |
1320 | + | ErrorParagraph s -> print_not_parsed_main_result cg_bin_path path id "Er" s 0 prev_next_map | ||
1298 | 1321 | ||
1299 | let rec print_main_result_text cg_bin_path path id tokens = function | 1322 | let rec print_main_result_text cg_bin_path path id tokens = function |
1300 | RawText s -> () | 1323 | RawText s -> () |
@@ -1354,6 +1377,7 @@ let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_n | @@ -1354,6 +1377,7 @@ let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_n | ||
1354 | let p = List.hd sentences in | 1377 | let p = List.hd sentences in |
1355 | print_main_result_first_page_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence | 1378 | print_main_result_first_page_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence |
1356 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph) | 1379 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph) |
1380 | + | ErrorParagraph s -> print_not_parsed_main_result cg_bin_path path id "Er" s 0 prev_next_map | ||
1357 | 1381 | ||
1358 | let rec print_main_result_first_page_text cg_bin_path path id tokens = function | 1382 | let rec print_main_result_first_page_text cg_bin_path path id tokens = function |
1359 | RawText s -> () | 1383 | RawText s -> () |
integration/ENIAMpreIntegration.ml
@@ -331,6 +331,7 @@ let rec parse_paragraph mode tokens = function | @@ -331,6 +331,7 @@ let rec parse_paragraph mode tokens = function | ||
331 | let l = Xlist.rev_map l (fun (mode,paragraph) -> | 331 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
332 | mode, parse_paragraph mode tokens paragraph) in | 332 | mode, parse_paragraph mode tokens paragraph) in |
333 | AltParagraph(List.rev l) | 333 | AltParagraph(List.rev l) |
334 | + | ErrorParagraph s -> ErrorParagraph s | ||
334 | 335 | ||
335 | let rec parse_text mode tokens = function | 336 | let rec parse_text mode tokens = function |
336 | RawText s -> RawText s | 337 | RawText s -> RawText s |
lexSemantics/ENIAMlexSemantics.ml
@@ -124,6 +124,7 @@ let rec split_tokens_into_groups_paragraph a = function | @@ -124,6 +124,7 @@ let rec split_tokens_into_groups_paragraph a = function | ||
124 | Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) | 124 | Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) |
125 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> | 125 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> |
126 | split_tokens_into_groups_paragraph a paragraph) | 126 | split_tokens_into_groups_paragraph a paragraph) |
127 | + | ErrorParagraph s -> () | ||
127 | 128 | ||
128 | let rec split_tokens_into_groups_text a = function | 129 | let rec split_tokens_into_groups_text a = function |
129 | RawText s -> () | 130 | RawText s -> () |
@@ -461,7 +462,7 @@ let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function | @@ -461,7 +462,7 @@ let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function | ||
461 | let id = ExtArray.add tokens empty_token_env in | 462 | let id = ExtArray.add tokens empty_token_env in |
462 | let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in | 463 | let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in |
463 | let id2 = ExtArray.add lex_sems lex_sem in | 464 | let id2 = ExtArray.add lex_sems lex_sem in |
464 | - if id <>id2 then failwith "create_tokens_for_artificial_nodes_rec" else | 465 | + if id <> id2 then failwith "create_tokens_for_artificial_nodes_rec: tokens inconsistent with lex_sems" else |
465 | let t = if t.symbol = Dot then | 466 | let t = if t.symbol = Dot then |
466 | {t with symbol = match t.pos with | 467 | {t with symbol = match t.pos with |
467 | "<root>" -> Tuple[Val "<root>"] | 468 | "<root>" -> Tuple[Val "<root>"] |
lexSemantics/ENIAMvalence.ml
@@ -235,6 +235,12 @@ let transform_qub_pos lemma = function | @@ -235,6 +235,12 @@ let transform_qub_pos lemma = function | ||
235 | | QUB as morf -> [morf] | 235 | | QUB as morf -> [morf] |
236 | | pos -> failwith ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) | 236 | | pos -> failwith ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) |
237 | 237 | ||
238 | +let transform_interj_phrase lemma = function | ||
239 | + | phrase -> failwith ("transform_interj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) | ||
240 | + | ||
241 | +let transform_interj_pos lemma = function | ||
242 | + | pos -> failwith ("transform_interj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) | ||
243 | + | ||
238 | let transform_siebie_phrase lemma = function | 244 | let transform_siebie_phrase lemma = function |
239 | | phrase -> failwith ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) | 245 | | phrase -> failwith ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) |
240 | 246 | ||
@@ -363,31 +369,31 @@ let transform_preps morf = | @@ -363,31 +369,31 @@ let transform_preps morf = | ||
363 | | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) | 369 | | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) |
364 | | PrepNP(psem,prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(psem,prep,c) | 370 | | PrepNP(psem,prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(psem,prep,c) |
365 | | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) | 371 | | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) |
366 | - | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf | 372 | + | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps 1" else morf |
367 | | morf -> morf in | 373 | | morf -> morf in |
368 | match morf with | 374 | match morf with |
369 | | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) | 375 | | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) |
370 | - | ComparP _ -> failwith "transform_preps" | 376 | + | ComparP _ -> failwith "transform_preps 2" |
371 | | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) | 377 | | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) |
372 | | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) | 378 | | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) |
373 | | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] | 379 | | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] |
374 | | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] | 380 | | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] |
375 | - | LexArg(id,lex,COMPAR _) -> failwith "transform_preps" | ||
376 | - | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps" | 381 | + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps 3" |
382 | + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps 4" | ||
377 | | PrepNP(sem,"per",Str) -> [PrepNP(sem,"per",Case "nom");PrepNP(sem,"per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) | 383 | | PrepNP(sem,"per",Str) -> [PrepNP(sem,"per",Case "nom");PrepNP(sem,"per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) |
378 | | PrepNP(_,_,Case _) as morf -> [morf] | 384 | | PrepNP(_,_,Case _) as morf -> [morf] |
379 | | PrepAdjP(_,Case _) as morf -> [morf] | 385 | | PrepAdjP(_,Case _) as morf -> [morf] |
380 | | PrepNCP(_,Case _,_,_) as morf -> [morf] | 386 | | PrepNCP(_,Case _,_,_) as morf -> [morf] |
381 | - | PrepNP(_,"_",CaseUndef) as morf -> [morf] | ||
382 | - | PrepNP _ -> failwith "transform_preps" | ||
383 | - | PrepAdjP _ -> failwith "transform_preps" | ||
384 | - | PrepNCP _ -> failwith "transform_preps" | 387 | + | PrepNP(_,_,CaseUndef) as morf -> [morf] |
388 | + | PrepNP _ as morf -> failwith ("transform_preps 5: " ^ ENIAMwalStringOf.phrase morf) | ||
389 | + | PrepAdjP _ -> failwith "transform_preps 6" | ||
390 | + | PrepNCP _ -> failwith "transform_preps 7" | ||
385 | | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] | 391 | | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] |
386 | | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] | 392 | | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] |
387 | | LexArg(id,lex,PREP (Case _)) as morf -> [morf] | 393 | | LexArg(id,lex,PREP (Case _)) as morf -> [morf] |
388 | | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] | 394 | | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] |
389 | - | LexArg(id,lex,PREP _) -> failwith "transform_preps" | ||
390 | - | SimpleLexArg(lex,PREP _) -> failwith "transform_preps" | 395 | + | LexArg(id,lex,PREP _) -> failwith "transform_preps 8" |
396 | + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps 9" | ||
391 | | morf -> [morf] | 397 | | morf -> [morf] |
392 | 398 | ||
393 | let transform_pers_schema lemma negation mood schema = | 399 | let transform_pers_schema lemma negation mood schema = |
@@ -488,6 +494,7 @@ let transform_schema pos lemma schema = | @@ -488,6 +494,7 @@ let transform_schema pos lemma schema = | ||
488 | | "comp" -> transform_comp_phrase,transform_comp_pos | 494 | | "comp" -> transform_comp_phrase,transform_comp_pos |
489 | | "qub" -> transform_qub_phrase,transform_qub_pos | 495 | | "qub" -> transform_qub_phrase,transform_qub_pos |
490 | | "siebie" -> transform_siebie_phrase,transform_siebie_pos | 496 | | "siebie" -> transform_siebie_phrase,transform_siebie_pos |
497 | + | "interj" -> transform_interj_phrase,transform_interj_pos | ||
491 | | _ -> failwith "transform_schema" | 498 | | _ -> failwith "transform_schema" |
492 | in | 499 | in |
493 | Xlist.map schema (fun s -> | 500 | Xlist.map schema (fun s -> |
@@ -539,6 +546,9 @@ let transform_entry pos lemma negation pred aspect schema = | @@ -539,6 +546,9 @@ let transform_entry pos lemma negation pred aspect schema = | ||
539 | if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); | 546 | if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); |
540 | Xlist.map ["congr";"rec"] (fun acm -> | 547 | Xlist.map ["congr";"rec"] (fun acm -> |
541 | [Acm,Eq,[acm]],transform_num_schema acm schema)) else | 548 | [Acm,Eq,[acm]],transform_num_schema acm schema)) else |
549 | + if pos = "interj" then ( | ||
550 | + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 6"); | ||
551 | + [[],transform_schema "interj" lemma schema]) else | ||
542 | List.flatten (Xlist.map (expand_negation negation) (fun negation -> | 552 | List.flatten (Xlist.map (expand_negation negation) (fun negation -> |
543 | let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in | 553 | let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in |
544 | if pos = "fin" || pos = "bedzie" then | 554 | if pos = "fin" || pos = "bedzie" then |
lexSemantics/ENIAMwalReduce.ml
@@ -90,6 +90,7 @@ let select_comprep_adjuncts lexemes = | @@ -90,6 +90,7 @@ let select_comprep_adjuncts lexemes = | ||
90 | not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l) | 90 | not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l) |
91 | with Not_found -> l) | 91 | with Not_found -> l) |
92 | 92 | ||
93 | +(* FIXME: trzeba zanalizować interację tej procedury z Pro w schemacie w wersji z walentym i z semantyką dziedzinową *) | ||
93 | let set_necessary pos schema = | 94 | let set_necessary pos schema = |
94 | Xlist.map schema (fun p -> | 95 | Xlist.map schema (fun p -> |
95 | let nec = | 96 | let nec = |
@@ -101,6 +102,8 @@ let set_necessary pos schema = | @@ -101,6 +102,8 @@ let set_necessary pos schema = | ||
101 | | _ -> b) then Req else | 102 | | _ -> b) then Req else |
102 | if p.gf <> SUBJ && p.cr = [] (*&& p.ce = []*) then Opt else | 103 | if p.gf <> SUBJ && p.cr = [] (*&& p.ce = []*) then Opt else |
103 | if p.gf = SUBJ && pos = "impt" then ProNG else | 104 | if p.gf = SUBJ && pos = "impt" then ProNG else |
105 | + if p.gf = SUBJ && pos = "pact" then Opt else | ||
106 | + if p.gf = OBJ && pos = "ppas" then Opt else | ||
104 | if Xlist.fold p.morfs false (fun b -> function | 107 | if Xlist.fold p.morfs false (fun b -> function |
105 | NP NomAgr -> true | 108 | NP NomAgr -> true |
106 | | NCP(NomAgr,_,_) -> true | 109 | | NCP(NomAgr,_,_) -> true |
lexSemantics/ENIAMwalRenderer.ml
@@ -183,9 +183,11 @@ let render_phrase_cat cat = function | @@ -183,9 +183,11 @@ let render_phrase_cat cat = function | ||
183 | | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] | 183 | | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] |
184 | | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | 184 | | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] |
185 | | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | 185 | | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] |
186 | + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] | ||
186 | | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | 187 | | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] |
187 | | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | 188 | | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] |
188 | | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | 189 | | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] |
190 | + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] | ||
189 | | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | 191 | | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] |
190 | | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] | 192 | | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] |
191 | (* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*) | 193 | (* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*) |
morphology/resources/alt_supplement.tab
@@ -4,4 +4,12 @@ sobie siebie siebie:dat.loc | @@ -4,4 +4,12 @@ sobie siebie siebie:dat.loc | ||
4 | sobą siebie siebie:inst | 4 | sobą siebie siebie:inst |
5 | to to pred | 5 | to to pred |
6 | yay yay interj | 6 | yay yay interj |
7 | +świetnie świetnie interj | ||
8 | +doskonale doskonale interj | ||
9 | +idealnie idealnie interj | ||
10 | +zdecydowanie zdecydowanie interj | ||
11 | +ok ok interj | ||
12 | +super super interj | ||
13 | +dobrze dobrze interj | ||
14 | +dzięki dzięki interj | ||
7 | 15 |
semantics/ENIAMsemGraph.ml
@@ -25,7 +25,7 @@ let empty_concept = | @@ -25,7 +25,7 @@ let empty_concept = | ||
25 | {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list; | 25 | {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list; |
26 | c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""} | 26 | c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""} |
27 | 27 | ||
28 | -let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot} | 28 | +let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot; cx_label=""; cx_def_label=""} |
29 | 29 | ||
30 | let rec make_args_list = function | 30 | let rec make_args_list = function |
31 | Tuple l -> List.flatten (Xlist.map l make_args_list) | 31 | Tuple l -> List.flatten (Xlist.map l make_args_list) |
@@ -154,7 +154,7 @@ let create_normal_concept tokens lex_sems t cat proj = | @@ -154,7 +154,7 @@ let create_normal_concept tokens lex_sems t cat proj = | ||
154 | | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | 154 | | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
155 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in | 155 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
156 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in | 156 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
157 | - let _ = ExtArray.add lex_sems in | 157 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in |
158 | let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | 158 | let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in |
159 | make_relation t (Context cx) else | 159 | make_relation t (Context cx) else |
160 | if t.pos = "adj" || t.pos = "adjc" || t.pos = "adjp" || t.pos = "adja" || t.pos = "pact" || t.pos = "ppas" || t.pos = "apron" || t.pos = "ordnum" || t.pos = "roman-adj" then | 160 | if t.pos = "adj" || t.pos = "adjc" || t.pos = "adjp" || t.pos = "adja" || t.pos = "pact" || t.pos = "ppas" || t.pos = "apron" || t.pos = "ordnum" || t.pos = "roman-adj" then |
@@ -249,12 +249,15 @@ let create_normal_concept tokens lex_sems t cat proj = | @@ -249,12 +249,15 @@ let create_normal_concept tokens lex_sems t cat proj = | ||
249 | if t.pos = "comp" then | 249 | if t.pos = "comp" then |
250 | make_relation t (SetContextName(c.c_sense,RemoveRelation("CORE","",c.c_relations))) else | 250 | make_relation t (SetContextName(c.c_sense,RemoveRelation("CORE","",c.c_relations))) else |
251 | if t.pos = "conj" then | 251 | if t.pos = "conj" then |
252 | - let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos} in | 252 | + let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat; cx_def_label=c.c_def_label; cx_label=c.c_label} in |
253 | let c = Xlist.fold t.attrs c (fun c -> function | 253 | let c = Xlist.fold t.attrs c (fun c -> function |
254 | | "NUM",_ -> c | 254 | | "NUM",_ -> c |
255 | | "CASE",_ -> c | 255 | | "CASE",_ -> c |
256 | | "GEND",_ -> c | 256 | | "GEND",_ -> c |
257 | | "PERS",_ -> c | 257 | | "PERS",_ -> c |
258 | + | "ASPECT",_ -> c | ||
259 | + | "controller",_ -> c | ||
260 | + | "controllee",_ -> c | ||
258 | | e,t -> failwith ("create_normal_concept conj: " ^ e)) in | 261 | | e,t -> failwith ("create_normal_concept conj: " ^ e)) in |
259 | ManageCoordination({t with attrs=[]; args=Dot},Context c) else | 262 | ManageCoordination({t with attrs=[]; args=Dot},Context c) else |
260 | (* if t.pos = "interj" then | 263 | (* if t.pos = "interj" then |
@@ -264,7 +267,10 @@ let create_normal_concept tokens lex_sems t cat proj = | @@ -264,7 +267,10 @@ let create_normal_concept tokens lex_sems t cat proj = | ||
264 | if t.pos = "sinterj" || t.pos = "interj" then | 267 | if t.pos = "sinterj" || t.pos = "interj" then |
265 | let c = Xlist.fold t.attrs c (fun c -> function | 268 | let c = Xlist.fold t.attrs c (fun c -> function |
266 | | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in | 269 | | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in |
267 | - make_relation t (Concept c) else | 270 | + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
271 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | ||
272 | + let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in | ||
273 | + make_relation t (Context cx) else | ||
268 | if t.lemma = "<root>" then t.args else | 274 | if t.lemma = "<root>" then t.args else |
269 | if t.lemma = "<merge>" then RemoveRelation("null","",t.args) else | 275 | if t.lemma = "<merge>" then RemoveRelation("null","",t.args) else |
270 | if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else | 276 | if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else |
@@ -511,7 +517,7 @@ let rec reduce_tree = function | @@ -511,7 +517,7 @@ let rec reduce_tree = function | ||
511 | (match reduce_tree t with | 517 | (match reduce_tree t with |
512 | Relation(r,a,t) -> | 518 | Relation(r,a,t) -> |
513 | if (r = r0 && a = a0) || r0 = "" then t else | 519 | if (r = r0 && a = a0) || r0 = "" then t else |
514 | - Context{empty_context with cx_contents= | 520 | + Context{empty_context with cx_cat=Val "Situation"; cx_contents= |
515 | Concept{empty_concept with c_relations=Relation(r,a,t)}; (*cx_variable=string_of_int id,""; cx_pos=c.c_pos*)} | 521 | Concept{empty_concept with c_relations=Relation(r,a,t)}; (*cx_variable=string_of_int id,""; cx_pos=c.c_pos*)} |
516 | (* | TripleRelation(r,a,s,t) -> | 522 | (* | TripleRelation(r,a,s,t) -> |
517 | Context{empty_context with cx_contents= | 523 | Context{empty_context with cx_contents= |
@@ -625,6 +631,7 @@ let rec simplify_tree = function | @@ -625,6 +631,7 @@ let rec simplify_tree = function | ||
625 | let l = Xlist.fold l [] (fun l t -> | 631 | let l = Xlist.fold l [] (fun l t -> |
626 | match simplify_tree t with | 632 | match simplify_tree t with |
627 | Dot -> l | 633 | Dot -> l |
634 | + | Tuple l2 -> l2 @ l | ||
628 | | t -> t :: l) in | 635 | | t -> t :: l) in |
629 | (match l with | 636 | (match l with |
630 | [] -> Dot | 637 | [] -> Dot |
@@ -655,7 +662,8 @@ let rec simplify_tree = function | @@ -655,7 +662,8 @@ let rec simplify_tree = function | ||
655 | c_cat = simplify_tree (Variant(e,lt3))} | 662 | c_cat = simplify_tree (Variant(e,lt3))} |
656 | | Context c -> | 663 | | Context c -> |
657 | let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function | 664 | let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function |
658 | - i,Context c2 -> if c.cx_sense = c2.cx_sense then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | 665 | + i,Context c2 -> if c.cx_sense = c2.cx_sense && c.cx_label = c2.cx_label && |
666 | + c.cx_def_label = c2.cx_def_label then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | ||
659 | | _ -> raise Not_found) in | 667 | | _ -> raise Not_found) in |
660 | let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in | 668 | let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in |
661 | Context{c with | 669 | Context{c with |
semantics/ENIAMsemGraphOf.ml
@@ -225,7 +225,9 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f | @@ -225,7 +225,9 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f | ||
225 | | Context t -> | 225 | | Context t -> |
226 | let id = !id_counter in | 226 | let id = !id_counter in |
227 | incr id_counter; | 227 | incr id_counter; |
228 | - fprintf file " subgraph cluster%d {\nlabel=\"%s%s\"\n" id | 228 | + fprintf file " subgraph cluster%d {\nlabel=\"%s%s%s%s\"\n" id |
229 | + (if t.cx_label="" then "" else "?" ^ t.cx_label ^ " ") | ||
230 | + (if t.cx_def_label="" then "" else "*" ^ t.cx_def_label ^ " ") | ||
229 | (if t.cx_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_cat ^ " ")) | 231 | (if t.cx_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_cat ^ " ")) |
230 | (if t.cx_sense = Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_sense)); | 232 | (if t.cx_sense = Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_sense)); |
231 | let iid = print_graph2_rec file false "" "" "" 0 t.cx_contents in | 233 | let iid = print_graph2_rec file false "" "" "" 0 t.cx_contents in |
semantics/ENIAMsemLatexOf.ml
@@ -66,7 +66,7 @@ let rec linear_term c = function | @@ -66,7 +66,7 @@ let rec linear_term c = function | ||
66 | | Context c -> | 66 | | Context c -> |
67 | "{\\left[\\begin{array}{ll}" ^ | 67 | "{\\left[\\begin{array}{ll}" ^ |
68 | (String.concat "\\\\ " (Xlist.map ([ | 68 | (String.concat "\\\\ " (Xlist.map ([ |
69 | - "SENSE",c.cx_sense;"CAT",c.cx_cat; | 69 | + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; |
70 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); | 70 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); |
71 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> | 71 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> |
72 | "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" | 72 | "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" |
semantics/ENIAMsemStringOf.ml
@@ -43,7 +43,7 @@ let rec linear_term c = function | @@ -43,7 +43,7 @@ let rec linear_term c = function | ||
43 | | Context c -> | 43 | | Context c -> |
44 | "[" ^ | 44 | "[" ^ |
45 | (String.concat "; " (Xlist.map ([ | 45 | (String.concat "; " (Xlist.map ([ |
46 | - "SENSE",c.cx_sense;"CAT",c.cx_cat; | 46 | + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; |
47 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); | 47 | "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); |
48 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> | 48 | "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> |
49 | e ^ ": " ^ (linear_term 0 t)))) ^ "]" | 49 | e ^ ": " ^ (linear_term 0 t)))) ^ "]" |
semantics/ENIAMsemTypes.ml
@@ -57,7 +57,7 @@ and concept = | @@ -57,7 +57,7 @@ and concept = | ||
57 | c_pos: int; c_cat: linear_term} | 57 | c_pos: int; c_cat: linear_term} |
58 | 58 | ||
59 | and context = | 59 | and context = |
60 | - {cx_sense: linear_term; cx_contents: linear_term; | 60 | + {cx_sense: linear_term; cx_contents: linear_term; cx_label: string; cx_def_label: string; |
61 | cx_relations: linear_term; cx_variable: (string * string); cx_pos: int; cx_cat: linear_term} | 61 | cx_relations: linear_term; cx_variable: (string * string); cx_pos: int; cx_cat: linear_term} |
62 | 62 | ||
63 | and linear_term = | 63 | and linear_term = |
semantics/ENIAMsemValence.ml
@@ -220,6 +220,19 @@ let string_of_arg arg = | @@ -220,6 +220,19 @@ let string_of_arg arg = | ||
220 | let string_of_position p = | 220 | let string_of_position p = |
221 | (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs) | 221 | (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs) |
222 | 222 | ||
223 | +let manage_arg p t = | ||
224 | + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | ||
225 | + let t = | ||
226 | + if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then | ||
227 | + SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) | ||
228 | + else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) | ||
229 | + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t | ||
230 | + else failwith "manage_arg: ni 2" in | ||
231 | + let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in | ||
232 | + let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in | ||
233 | + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | ||
234 | + t | ||
235 | + | ||
223 | let rec match_arg_positions lemma arg rev = function | 236 | let rec match_arg_positions lemma arg rev = function |
224 | p :: positions -> | 237 | p :: positions -> |
225 | (* Printf.printf "match_arg_positions 1: arg=%s rev=[%s] positions=%s :: [%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)) (string_of_position p) (String.concat "; " (Xlist.map positions string_of_position)); *) | 238 | (* Printf.printf "match_arg_positions 1: arg=%s rev=[%s] positions=%s :: [%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)) (string_of_position p) (String.concat "; " (Xlist.map positions string_of_position)); *) |
@@ -228,16 +241,13 @@ let rec match_arg_positions lemma arg rev = function | @@ -228,16 +241,13 @@ let rec match_arg_positions lemma arg rev = function | ||
228 | (match l with | 241 | (match l with |
229 | [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions lemma arg (p :: rev) positions | 242 | [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions lemma arg (p :: rev) positions |
230 | | [t] -> | 243 | | [t] -> |
231 | - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in | ||
232 | - let t = | ||
233 | - if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then | ||
234 | - SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) | ||
235 | - else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) | ||
236 | - else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t | ||
237 | - else failwith "match_arg_positions: ni 2" in | ||
238 | - let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in | ||
239 | - let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in | ||
240 | - let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in | 244 | + let t = manage_arg p t in |
245 | + if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) | ||
246 | + else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) | ||
247 | + | [t1;t2] -> (* FIXME: przydałoby się to uogólnić na listę dowolnej długości *) | ||
248 | + let t1 = manage_arg p t1 in | ||
249 | + let t2 = manage_arg p t2 in | ||
250 | + let t = Variant("",["1",t1;"2",t2]) in | ||
241 | if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) | 251 | if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) |
242 | else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) | 252 | else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) |
243 | | _ -> failwith ("match_arg_positions: lemma=" ^ lemma ^ " arg=" ^ string_of_arg arg ^ " position=" ^ string_of_position p)) | 253 | | _ -> failwith ("match_arg_positions: lemma=" ^ lemma ^ " arg=" ^ string_of_arg arg ^ " position=" ^ string_of_position p)) |
semantics/ENIAMsemXMLof.ml
@@ -49,8 +49,8 @@ let rec linear_term = function | @@ -49,8 +49,8 @@ let rec linear_term = function | ||
49 | Xml.Element("relations",[],[linear_term c.c_relations]); | 49 | Xml.Element("relations",[],[linear_term c.c_relations]); |
50 | Xml.Element("cat",[],[linear_term c.c_cat])]) | 50 | Xml.Element("cat",[],[linear_term c.c_cat])]) |
51 | | Context c -> | 51 | | Context c -> |
52 | - Xml.Element("Context", | ||
53 | - ["variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], | 52 | + Xml.Element("Context",["label",c.cx_label;"def_label",c.cx_def_label; |
53 | + "variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], | ||
54 | [Xml.Element("sense",[],[linear_term c.cx_sense]); | 54 | [Xml.Element("sense",[],[linear_term c.cx_sense]); |
55 | Xml.Element("contents",[],[linear_term c.cx_contents]); | 55 | Xml.Element("contents",[],[linear_term c.cx_contents]); |
56 | Xml.Element("relations",[],[linear_term c.cx_relations]); | 56 | Xml.Element("relations",[],[linear_term c.cx_relations]); |
subsyntax/ENIAMsubsyntax.ml
@@ -357,11 +357,14 @@ let parse_text_tokens tokens query = | @@ -357,11 +357,14 @@ let parse_text_tokens tokens query = | ||
357 | let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in | 357 | let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in |
358 | let n = if Xlist.size paragraphs = 1 then 0 else 1 in | 358 | let n = if Xlist.size paragraphs = 1 then 0 else 1 in |
359 | let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph -> | 359 | let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph -> |
360 | + try | ||
360 | let paths = parse paragraph in | 361 | let paths = parse paragraph in |
361 | (* print_endline "parse_text 1"; *) | 362 | (* print_endline "parse_text 1"; *) |
362 | let pid = if n = 0 then "" else string_of_int n ^ "_" in | 363 | let pid = if n = 0 then "" else string_of_int n ^ "_" in |
363 | let sentences = ENIAMsentences.split_into_sentences pid paragraph tokens paths in | 364 | let sentences = ENIAMsentences.split_into_sentences pid paragraph tokens paths in |
364 | - (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in | 365 | + (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1 |
366 | + with e -> | ||
367 | + (AltParagraph[Raw,RawParagraph paragraph; Error,ErrorParagraph (Printexc.to_string e)]) :: paragraphs, n+1) in | ||
365 | AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs)], tokens | 368 | AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs)], tokens |
366 | 369 | ||
367 | let parse_text query = | 370 | let parse_text query = |
subsyntax/ENIAMsubsyntaxHTMLof.ml
@@ -104,6 +104,7 @@ let rec html_of_paragraph tokens = function | @@ -104,6 +104,7 @@ let rec html_of_paragraph tokens = function | ||
104 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> | 104 | String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> |
105 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_paragraph tokens paragraph))) ^ | 105 | sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_paragraph tokens paragraph))) ^ |
106 | "</table>" | 106 | "</table>" |
107 | + | ErrorParagraph s -> (*print_endline "ErrorParagraph";*) s | ||
107 | 108 | ||
108 | let rec html_of_text tokens = function | 109 | let rec html_of_text tokens = function |
109 | RawText s -> s | 110 | RawText s -> s |
subsyntax/ENIAMsubsyntaxStringOf.ml
@@ -28,6 +28,7 @@ let mode = function | @@ -28,6 +28,7 @@ let mode = function | ||
28 | | Mate -> "Mate" | 28 | | Mate -> "Mate" |
29 | | Swigra -> "Swigra" | 29 | | Swigra -> "Swigra" |
30 | | POLFIE -> "POLFIE" | 30 | | POLFIE -> "POLFIE" |
31 | + | Error -> "Error" | ||
31 | 32 | ||
32 | let token_extarray t = | 33 | let token_extarray t = |
33 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size t - 1) [] (fun l id -> | 34 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size t - 1) [] (fun l id -> |
@@ -73,6 +74,7 @@ let rec paragraph spaces t = function | @@ -73,6 +74,7 @@ let rec paragraph spaces t = function | ||
73 | | AltParagraph l -> | 74 | | AltParagraph l -> |
74 | String.concat "\n" (Xlist.map l (fun (m,p) -> | 75 | String.concat "\n" (Xlist.map l (fun (m,p) -> |
75 | sprintf "%sAltParagraph mode=%s %s" spaces (mode m) (paragraph "" t p))) | 76 | sprintf "%sAltParagraph mode=%s %s" spaces (mode m) (paragraph "" t p))) |
77 | + | ErrorParagraph s -> spaces ^ "ErrorParagraph: " ^ s | ||
76 | 78 | ||
77 | let rec text spaces t = function | 79 | let rec text spaces t = function |
78 | RawText s -> spaces ^ "RawText: " ^ s | 80 | RawText s -> spaces ^ "RawText: " ^ s |
subsyntax/ENIAMsubsyntaxTypes.ml
@@ -20,7 +20,7 @@ | @@ -20,7 +20,7 @@ | ||
20 | open ENIAMtokenizerTypes | 20 | open ENIAMtokenizerTypes |
21 | 21 | ||
22 | type mode = | 22 | type mode = |
23 | - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | 23 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error |
24 | 24 | ||
25 | type sentence = | 25 | type sentence = |
26 | RawSentence of string | 26 | RawSentence of string |
@@ -38,6 +38,7 @@ and paragraph = | @@ -38,6 +38,7 @@ and paragraph = | ||
38 | RawParagraph of string | 38 | RawParagraph of string |
39 | | StructParagraph of sentence_env list (* zdania *) | 39 | | StructParagraph of sentence_env list (* zdania *) |
40 | | AltParagraph of (mode * paragraph) list | 40 | | AltParagraph of (mode * paragraph) list |
41 | + | ErrorParagraph of string | ||
41 | 42 | ||
42 | type text = | 43 | type text = |
43 | RawText of string | 44 | RawText of string |
@@ -81,6 +82,7 @@ let int_of_mode = function | @@ -81,6 +82,7 @@ let int_of_mode = function | ||
81 | | Mate -> 4 | 82 | | Mate -> 4 |
82 | | Swigra -> 5 | 83 | | Swigra -> 5 |
83 | | POLFIE -> 6 | 84 | | POLFIE -> 6 |
85 | + | Error -> 7 | ||
84 | 86 | ||
85 | let compare_mode x y = | 87 | let compare_mode x y = |
86 | compare (int_of_mode x) (int_of_mode y) | 88 | compare (int_of_mode x) (int_of_mode y) |
@@ -109,6 +111,7 @@ let rec map_paragraph mode f = function | @@ -109,6 +111,7 @@ let rec map_paragraph mode f = function | ||
109 | let l = Xlist.rev_map l (fun (mode,paragraph) -> | 111 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
110 | mode, map_paragraph mode f paragraph) in | 112 | mode, map_paragraph mode f paragraph) in |
111 | AltParagraph(List.rev l) | 113 | AltParagraph(List.rev l) |
114 | + | ErrorParagraph s -> ErrorParagraph s | ||
112 | 115 | ||
113 | let rec map_text mode f = function | 116 | let rec map_text mode f = function |
114 | RawText s -> RawText s | 117 | RawText s -> RawText s |
@@ -137,6 +140,7 @@ let rec fold_paragraph mode s f = function | @@ -137,6 +140,7 @@ let rec fold_paragraph mode s f = function | ||
137 | | AltParagraph l -> | 140 | | AltParagraph l -> |
138 | Xlist.fold l s (fun s (mode,paragraph) -> | 141 | Xlist.fold l s (fun s (mode,paragraph) -> |
139 | fold_paragraph mode s f paragraph) | 142 | fold_paragraph mode s f paragraph) |
143 | + | ErrorParagraph _ -> s | ||
140 | 144 | ||
141 | let rec fold_text mode s f = function | 145 | let rec fold_text mode s f = function |
142 | RawText _ -> s | 146 | RawText _ -> s |
subsyntax/ENIAMsubsyntaxXMLof.ml
@@ -57,6 +57,7 @@ let rec paragraph m = function | @@ -57,6 +57,7 @@ let rec paragraph m = function | ||
57 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> | 57 | Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> |
58 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) | 58 | Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) |
59 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMsubsyntaxStringOf.mode m) t)) | 59 | | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMsubsyntaxStringOf.mode m) t)) |
60 | + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) | ||
60 | 61 | ||
61 | let rec text m = function | 62 | let rec text m = function |
62 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) | 63 | RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) |
subsyntax/resources/brev.tab
@@ -708,7 +708,7 @@ wz. wzorowy subst:_:_:m3 | @@ -708,7 +708,7 @@ wz. wzorowy subst:_:_:m3 | ||
708 | wz . wzorowy adj:_:_:_:pos | 708 | wz . wzorowy adj:_:_:_:pos |
709 | wzgl . względnie adv:pos | 709 | wzgl . względnie adv:pos |
710 | x raz subst:_:_:m3 | 710 | x raz subst:_:_:m3 |
711 | -X październik subst:_:_:m3 | 711 | +#X październik subst:_:_:m3 |
712 | x . książę subst:_:_:m1 | 712 | x . książę subst:_:_:m1 |
713 | x . ksiądz subst:_:_:m1 | 713 | x . ksiądz subst:_:_:m1 |
714 | z . zeszyt subst:_:_:m3 | 714 | z . zeszyt subst:_:_:m3 |