diff --git a/LCGlexicon/ENIAM_LCGlexicon.ml b/LCGlexicon/ENIAM_LCGlexicon.ml index 2c26ff2..b6db494 100644 --- a/LCGlexicon/ENIAM_LCGlexicon.ml +++ b/LCGlexicon/ENIAM_LCGlexicon.ml @@ -165,7 +165,7 @@ let make_rules x_flag filename = dict_of_grammar lexicon let find_rules rules cats = - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category " ^ cats.pos) in + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith ("find_rules: unable to find rules for category '" ^ cats.pos ^ "' lemma='" ^ cats.lemma ^ "'") in (* Printf.printf "find_rules: %s %s |rules|=%d\n" cats.lemma cats.pos (Xlist.size rules); *) let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> diff --git a/LCGlexicon/ENIAM_LCGlexiconParser.ml b/LCGlexicon/ENIAM_LCGlexiconParser.ml index 3133f95..74cd0a1 100644 --- a/LCGlexicon/ENIAM_LCGlexiconParser.ml +++ b/LCGlexicon/ENIAM_LCGlexiconParser.ml @@ -95,6 +95,7 @@ let match_relation = function let rec split_mid i0 rev = function [i,s] -> List.rev ((i,s) :: rev) + | (i1,s) :: (i2,"|") :: (i3,"|") :: l -> raise (ParseError("split_mid", "duplicated delimeter found", i2)) | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l | [] -> raise (ParseError("split_mid", "empty", i0)) | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) diff --git a/LCGlexicon/ENIAMcategoriesPL.ml b/LCGlexicon/ENIAMcategoriesPL.ml index 5c85ae2..0a3c8c7 100644 --- a/LCGlexicon/ENIAMcategoriesPL.ml +++ b/LCGlexicon/ENIAMcategoriesPL.ml @@ -400,8 +400,8 @@ let clarify_categories proper cat proj = function else [{empty_cats with lemma=lemma; pos="qub"; pos2="qub"}] | lemma,"comp",[] -> [{empty_cats with lemma=lemma; pos="comp"; pos2="comp"}] | lemma,"conj",[] -> [{empty_cats with lemma=lemma; pos="conj"; pos2="conj"}] - | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"}] - | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"}] + | lemma,"interj",[] -> [{empty_cats with lemma=lemma; pos="interj"; pos2="interj"; cat=cat; proj=proj}] + | lemma,"sinterj",[] -> [{empty_cats with lemma=lemma; pos="sinterj"; pos2="sinterj"; (*cat=cat; proj=proj*)}] | lemma,"burk",[] -> [{empty_cats with lemma=lemma; pos="burk"; pos2="burk"}] | ",","interp",[] -> [{empty_cats with lemma=","; pos="conj"; pos2="conj"}] | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}] @@ -617,7 +617,7 @@ let pos_categories = Xlist.fold [ "part",[Lemma;]; "comp",[Lemma;];(* ctype *) "conj",[Lemma;];(* ctype *) - "interj",[Lemma;]; + "interj",[Lemma;Cat;Proj;]; "sinterj",[Lemma;]; "burk",[Lemma;]; "interp",[Lemma;]; diff --git a/LCGlexicon/resources/subst_container.dat b/LCGlexicon/resources/subst_container.dat index 0b84d89..48fff6d 100644 --- a/LCGlexicon/resources/subst_container.dat +++ b/LCGlexicon/resources/subst_container.dat @@ -488,3 +488,4 @@ mrowie rodzaj rozdział gmach +zakres diff --git a/LCGparser/ENIAM_LCG_XMLof.ml b/LCGparser/ENIAM_LCG_XMLof.ml index 1619e20..117ab5f 100644 --- a/LCGparser/ENIAM_LCG_XMLof.ml +++ b/LCGparser/ENIAM_LCG_XMLof.ml @@ -40,7 +40,7 @@ let rec linear_term = function | App(s,t) -> Xml.Element("App",[],[linear_term s;linear_term t]) | Dot -> Xml.Element("Dot",[],[]) | Val s -> Xml.Element("Val",[],[Xml.PCData s]) - | SetAttr(e,s,t) -> Xml.Element("SetAttr",["label",v],[linear_term s;linear_term t]) + | SetAttr(e,s,t) -> Xml.Element("SetAttr",["label",e],[linear_term s;linear_term t]) | Fix(s,t) -> Xml.Element("Fix",[],[linear_term s;linear_term t]) | Empty t -> Xml.Element("Empty",[],[linear_term t]) | Apply t -> Xml.Element("Apply",[],[linear_term t]) @@ -59,4 +59,3 @@ let linear_term_array a = let l = Int.fold 0 (Array.length a - 1) [] (fun l i -> Xml.Element("element",["index",string_of_int i],[linear_term a.(i)]) :: l) in Xml.Element("array",[],List.rev l) - diff --git a/LCGparser/makefile b/LCGparser/makefile index ed94254..2c2173e 100755 --- a/LCGparser/makefile +++ b/LCGparser/makefile @@ -6,21 +6,21 @@ OCAMLFLAGS=$(INCLUDES) -g OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa INSTALLDIR=`ocamlc -where`/eniam -SOURCES= ENIAM_LCGtypes.ml ENIAM_LCGstringOf.ml ENIAM_LCGrules.ml ENIAM_LCGrenderer.ml ENIAM_LCGchart.ml ENIAM_LCGlatexOf.ml ENIAM_LCGreductions.ml ENIAM_LCGgraphOf.ml +SOURCES= ENIAM_LCGtypes.ml ENIAM_LCGstringOf.ml ENIAM_LCGrules.ml ENIAM_LCGrenderer.ml ENIAM_LCGchart.ml ENIAM_LCGlatexOf.ml ENIAM_LCGreductions.ml ENIAM_LCGgraphOf.ml ENIAM_LCG_XMLof.ml all: eniam-lcg-parser.cma eniam-lcg-parser.cmxa install: all mkdir -p $(INSTALLDIR) cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) - cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi $(INSTALLDIR) - cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx $(INSTALLDIR) + cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi ENIAM_LCG_XMLof.cmi $(INSTALLDIR) + cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx ENIAM_LCG_XMLof.cmx $(INSTALLDIR) install-local: all mkdir -p $(INSTALLDIR) cp eniam-lcg-parser.cmxa eniam-lcg-parser.a eniam-lcg-parser.cma $(INSTALLDIR) - cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi $(INSTALLDIR) - cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx $(INSTALLDIR) + cp ENIAM_LCGtypes.cmi ENIAM_LCGstringOf.cmi ENIAM_LCGrules.cmi ENIAM_LCGrenderer.cmi ENIAM_LCGchart.cmi ENIAM_LCGlatexOf.cmi ENIAM_LCGreductions.cmi ENIAM_LCGgraphOf.cmi ENIAM_LCG_XMLof.cmi $(INSTALLDIR) + cp ENIAM_LCGtypes.cmx ENIAM_LCGstringOf.cmx ENIAM_LCGrules.cmx ENIAM_LCGrenderer.cmx ENIAM_LCGchart.cmx ENIAM_LCGlatexOf.cmx ENIAM_LCGreductions.cmx ENIAM_LCGgraphOf.cmx ENIAM_LCG_XMLof.cmx $(INSTALLDIR) eniam-lcg-parser.cma: $(SOURCES) ocamlc -linkall -a -o eniam-lcg-parser.cma $(OCAMLFLAGS) $^ diff --git a/exec/ENIAMexec.ml b/exec/ENIAMexec.ml index 1107782..1b3cbd2 100644 --- a/exec/ENIAMexec.ml +++ b/exec/ENIAMexec.ml @@ -33,6 +33,7 @@ let translate_mode = function | ENIAMsubsyntaxTypes.Mate -> Mate | ENIAMsubsyntaxTypes.Swigra -> Swigra | ENIAMsubsyntaxTypes.POLFIE -> POLFIE + | ENIAMsubsyntaxTypes.Error -> Error let rec translate_sentence = function ENIAMsubsyntaxTypes.RawSentence s -> RawSentence s @@ -53,6 +54,7 @@ let rec translate_paragraph = function sentence=translate_sentence p.ENIAMsubsyntaxTypes.sentence})) | ENIAMsubsyntaxTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) -> translate_mode mode, translate_paragraph paragraph)) + | ENIAMsubsyntaxTypes.ErrorParagraph s -> ErrorParagraph s let rec translate_text = function ENIAMsubsyntaxTypes.RawText s -> RawText s @@ -64,7 +66,9 @@ let rec translate_text = function let clarify_categories cats token = match token.ENIAMtokenizerTypes.token with ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> + (* Printf.printf "lemma=%s pos=%s cat=%s proj=%s\n%!" lemma pos cat (String.concat "," proj); *) + ENIAMcategoriesPL.clarify_categories false cat proj (lemma,pos,interp))))) | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,proj) -> ENIAMcategoriesPL.clarify_categories true cat proj (lemma,pos,interp))))) | ENIAMtokenizerTypes.Interp lemma -> diff --git a/exec/ENIAMexecTypes.ml b/exec/ENIAMexecTypes.ml index c902446..72a0648 100644 --- a/exec/ENIAMexecTypes.ml +++ b/exec/ENIAMexecTypes.ml @@ -78,7 +78,7 @@ type semantic_processing_result = { } *) type mode = - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error type sentence = RawSentence of string @@ -98,6 +98,7 @@ and paragraph = RawParagraph of string | StructParagraph of paragraph_record list (* zdania *) | AltParagraph of (mode * paragraph) list + | ErrorParagraph of string type text = RawText of string @@ -267,6 +268,7 @@ let rec map_paragraph mode f = function let l = Xlist.rev_map l (fun (mode,paragraph) -> mode, map_paragraph mode f paragraph) in AltParagraph(List.rev l) + | ErrorParagraph s -> ErrorParagraph s let rec map_text mode f = function RawText s -> RawText s @@ -295,6 +297,7 @@ let rec fold_paragraph mode s f = function | AltParagraph l -> Xlist.fold l s (fun s (mode,paragraph) -> fold_paragraph mode s f paragraph) + | ErrorParagraph _ -> s let rec fold_text mode s f = function RawText _ -> s diff --git a/exec/ENIAMexecXMLof.ml b/exec/ENIAMexecXMLof.ml index 904a02c..b9c2388 100644 --- a/exec/ENIAMexecXMLof.ml +++ b/exec/ENIAMexecXMLof.ml @@ -64,6 +64,7 @@ let rec paragraph m = function Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMvisualization.string_of_mode m) t)) + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) let rec text m = function RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) diff --git a/exec/ENIAMselectSent.ml b/exec/ENIAMselectSent.ml index 2b08a39..b586af4 100644 --- a/exec/ENIAMselectSent.ml +++ b/exec/ENIAMselectSent.ml @@ -69,6 +69,7 @@ let rec select_sentence_modes_paragraph = function let l = Xlist.rev_map l (fun (mode,paragraph) -> mode, select_sentence_modes_paragraph paragraph) in AltParagraph(List.rev l) + | ErrorParagraph s -> ErrorParagraph s let rec select_sentence_modes_text = function RawText s -> RawText s @@ -148,6 +149,7 @@ let rec select_sentences_paragraph mode = function let l = Xlist.rev_map l (fun (mode,paragraph) -> mode, select_sentences_paragraph mode paragraph) in AltParagraph(List.rev l) + | ErrorParagraph s -> ErrorParagraph s let rec select_sentences_text mode = function RawText s -> RawText s diff --git a/exec/ENIAMvisualization.ml b/exec/ENIAMvisualization.ml index b4a25c5..8d27814 100644 --- a/exec/ENIAMvisualization.ml +++ b/exec/ENIAMvisualization.ml @@ -667,6 +667,7 @@ let string_of_mode = function | Mate -> "Mate" | Swigra -> "Swigra" | POLFIE -> "POLFIE" + | Error -> "Error" (* (*let rec string_of_sentence = function RawSentence s -> sprintf "RawSentence(%s)" s @@ -1065,9 +1066,27 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ "" | SemNotValidated -> + if verbosity < 2 then () else ( + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") result.dependency_tree6b; + ENIAM_LCGgraphOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") result.dependency_tree9; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_6b_dependency_tree") "a3" result.dependency_tree6b; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_7_dependency_tree") "a2" result.dependency_tree7; + ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_8_dependency_tree") "a3" result.dependency_tree8; + ENIAM_LCGlatexOf.print_dependency_tree path (file_prefix ^ "_9_dependency_tree") "a3" result.dependency_tree9; + ENIAMsemLatexOf.print_semantic_graph path (file_prefix ^ "_10_semantic_graph") "a3" result.semantic_graph10; + ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_11_semantic_graph") "" result.semantic_graph11); if verbosity = 0 then () else ( ENIAMsemGraphOf.print_semantic_graph2 path (file_prefix ^ "_12_semantic_graph") "" result.semantic_graph12); sprintf "<font color=\"red\">sem_not_validated</font>: %s paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.msg result.paths_size result.chart_size result.dependency_tree_size ^ + (if verbosity < 2 then "" else + sprintf "<BR><A HREF=\"%s_6b_dependency_tree.pdf\">Dependency Tree References 6b</A>\n" file_prefix ^ + sprintf "<BR><A HREF=\"%s_7_dependency_tree.pdf\">Dependency Tree References 7</A>\n" file_prefix ^ + sprintf "<BR><A HREF=\"%s_8_dependency_tree.pdf\">Dependency Tree References 8</A>\n" file_prefix ^ + sprintf "<BR><A HREF=\"%s_9_dependency_tree.pdf\">Dependency Tree References 9</A>\n" file_prefix ^ + sprintf "<BR><IMG SRC=\"%s_6b_dependency_tree.png\">\n" file_prefix ^ + sprintf "<BR><IMG SRC=\"%s_9_dependency_tree.png\">\n" file_prefix ^ + sprintf "<BR><A HREF=\"%s_10_semantic_graph.pdf\">Semantic Graph References 10</A>\n" file_prefix ^ + sprintf "<BR><IMG SRC=\"%s_11_semantic_graph.png\">\n" file_prefix) ^ (if verbosity = 0 then "" else sprintf "<BR><IMG SRC=\"%s_12_semantic_graph.png\">\n" file_prefix) ^ "" @@ -1174,6 +1193,7 @@ let file_prefix_of_mode = function | Mate -> "M" | Swigra -> "S" | POLFIE -> "P" + | Error -> "Er" let rec html_of_sentence path file_prefix mode img verbosity tokens = function RawSentence s -> escape_html s @@ -1204,6 +1224,7 @@ let rec html_of_paragraph path mode img verbosity tokens = function String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> sprintf "<tr><td>%s</td><td>%s</td></tr>" (string_of_mode mode) (html_of_paragraph path mode img verbosity tokens paragraph))) ^ "</table>" + | ErrorParagraph s -> sprintf "<font color=\"red\">subsyntax_error</font>: %s\n" (escape_html s) let rec html_of_text path mode img verbosity tokens = function RawText s -> escape_html s @@ -1236,6 +1257,7 @@ let rec find_prev_next_paragraph rev = function | StructParagraph sentences -> Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.id p.file_prefix rev p.sentence) | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph) + | ErrorParagraph s -> rev let rec make_prev_next_map map prev = function [x] -> StringMap.add map x (prev,"") @@ -1295,6 +1317,7 @@ let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = f | StructParagraph sentences -> Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence) | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph) + | ErrorParagraph s -> print_not_parsed_main_result cg_bin_path path id "Er" s 0 prev_next_map let rec print_main_result_text cg_bin_path path id tokens = function RawText s -> () @@ -1354,6 +1377,7 @@ let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_n let p = List.hd sentences in print_main_result_first_page_sentence cg_bin_path path id p.file_prefix tokens p.id prev_next_map p.sentence | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph) + | ErrorParagraph s -> print_not_parsed_main_result cg_bin_path path id "Er" s 0 prev_next_map let rec print_main_result_first_page_text cg_bin_path path id tokens = function RawText s -> () diff --git a/integration/ENIAMpreIntegration.ml b/integration/ENIAMpreIntegration.ml index 31c0243..424e5d8 100644 --- a/integration/ENIAMpreIntegration.ml +++ b/integration/ENIAMpreIntegration.ml @@ -331,6 +331,7 @@ let rec parse_paragraph mode tokens = function let l = Xlist.rev_map l (fun (mode,paragraph) -> mode, parse_paragraph mode tokens paragraph) in AltParagraph(List.rev l) + | ErrorParagraph s -> ErrorParagraph s let rec parse_text mode tokens = function RawText s -> RawText s diff --git a/lexSemantics/ENIAMlexSemantics.ml b/lexSemantics/ENIAMlexSemantics.ml index 495ce08..ac89b8f 100644 --- a/lexSemantics/ENIAMlexSemantics.ml +++ b/lexSemantics/ENIAMlexSemantics.ml @@ -124,6 +124,7 @@ let rec split_tokens_into_groups_paragraph a = function Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> split_tokens_into_groups_paragraph a paragraph) + | ErrorParagraph s -> () let rec split_tokens_into_groups_text a = function RawText s -> () @@ -461,7 +462,7 @@ let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function let id = ExtArray.add tokens empty_token_env in let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in let id2 = ExtArray.add lex_sems lex_sem in - if id <>id2 then failwith "create_tokens_for_artificial_nodes_rec" else + if id <> id2 then failwith "create_tokens_for_artificial_nodes_rec: tokens inconsistent with lex_sems" else let t = if t.symbol = Dot then {t with symbol = match t.pos with "<root>" -> Tuple[Val "<root>"] diff --git a/lexSemantics/ENIAMvalence.ml b/lexSemantics/ENIAMvalence.ml index 8726f10..7e2888b 100644 --- a/lexSemantics/ENIAMvalence.ml +++ b/lexSemantics/ENIAMvalence.ml @@ -235,6 +235,12 @@ let transform_qub_pos lemma = function | QUB as morf -> [morf] | pos -> failwith ("transform_qub_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) +let transform_interj_phrase lemma = function + | phrase -> failwith ("transform_interj_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) + +let transform_interj_pos lemma = function + | pos -> failwith ("transform_interj_pos: " ^ lemma ^ " " ^ ENIAMwalStringOf.pos pos) + let transform_siebie_phrase lemma = function | phrase -> failwith ("transform_siebie_phrase: " ^ lemma ^ " " ^ ENIAMwalStringOf.phrase phrase) @@ -363,31 +369,31 @@ let transform_preps morf = | SimpleLexArg(lex,PREP c) -> if is_compar lex then SimpleLexArg(lex,COMPAR c) else SimpleLexArg(lex,PREP c) | PrepNP(psem,prep,c) -> if is_compar prep then ComparP(prep,c) else PrepNP(psem,prep,c) | PrepAdjP(prep,c) -> if is_compar prep then ComparP(prep,c) else PrepAdjP(prep,c) - | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps" else morf + | PrepNCP(prep,case,ctype,comp) as morf -> if is_compar prep then failwith "transform_preps 1" else morf | morf -> morf in match morf with | ComparP(prep,Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> ComparP(prep,Case case)) - | ComparP _ -> failwith "transform_preps" + | ComparP _ -> failwith "transform_preps 2" | LexArg(id,lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> LexArg(id,lex,COMPAR (Case case))) | SimpleLexArg(lex,COMPAR Str) -> Xlist.map ["nom";"gen";"dat";"acc";"inst";"postp"] (fun case -> SimpleLexArg(lex,COMPAR (Case case))) | LexArg(id,lex,COMPAR (Case _)) as morf -> [morf] | SimpleLexArg(lex,COMPAR (Case _)) as morf -> [morf] - | LexArg(id,lex,COMPAR _) -> failwith "transform_preps" - | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps" + | LexArg(id,lex,COMPAR _) -> failwith "transform_preps 3" + | SimpleLexArg(lex,COMPAR _) -> failwith "transform_preps 4" | PrepNP(sem,"per",Str) -> [PrepNP(sem,"per",Case "nom");PrepNP(sem,"per",Case "voc")] (* FIXME: voc do poprawienie w leksykonie *) | PrepNP(_,_,Case _) as morf -> [morf] | PrepAdjP(_,Case _) as morf -> [morf] | PrepNCP(_,Case _,_,_) as morf -> [morf] - | PrepNP(_,"_",CaseUndef) as morf -> [morf] - | PrepNP _ -> failwith "transform_preps" - | PrepAdjP _ -> failwith "transform_preps" - | PrepNCP _ -> failwith "transform_preps" + | PrepNP(_,_,CaseUndef) as morf -> [morf] + | PrepNP _ as morf -> failwith ("transform_preps 5: " ^ ENIAMwalStringOf.phrase morf) + | PrepAdjP _ -> failwith "transform_preps 6" + | PrepNCP _ -> failwith "transform_preps 7" | LexArg(id,"w",PREP Str) -> [LexArg(id,"w",PREP (Case "acc"));LexArg(id,"w",PREP (Case "loc"));] | SimpleLexArg("w",PREP Str) -> [SimpleLexArg("w",PREP (Case "acc"));SimpleLexArg("w",PREP (Case "loc"))] | LexArg(id,lex,PREP (Case _)) as morf -> [morf] | SimpleLexArg(lex,PREP (Case _)) as morf -> [morf] - | LexArg(id,lex,PREP _) -> failwith "transform_preps" - | SimpleLexArg(lex,PREP _) -> failwith "transform_preps" + | LexArg(id,lex,PREP _) -> failwith "transform_preps 8" + | SimpleLexArg(lex,PREP _) -> failwith "transform_preps 9" | morf -> [morf] let transform_pers_schema lemma negation mood schema = @@ -488,6 +494,7 @@ let transform_schema pos lemma schema = | "comp" -> transform_comp_phrase,transform_comp_pos | "qub" -> transform_qub_phrase,transform_qub_pos | "siebie" -> transform_siebie_phrase,transform_siebie_pos + | "interj" -> transform_interj_phrase,transform_interj_pos | _ -> failwith "transform_schema" in Xlist.map schema (fun s -> @@ -539,6 +546,9 @@ let transform_entry pos lemma negation pred aspect schema = if negation <> NegationUndef || aspect <> AspectUndef then failwith ("transform_entry 5"); Xlist.map ["congr";"rec"] (fun acm -> [Acm,Eq,[acm]],transform_num_schema acm schema)) else + if pos = "interj" then ( + if negation <> NegationUndef || pred <> PredFalse || aspect <> AspectUndef then failwith ("transform_entry 6"); + [[],transform_schema "interj" lemma schema]) else List.flatten (Xlist.map (expand_negation negation) (fun negation -> let sel = [Negation,Eq,[ENIAMwalStringOf.negation negation]] @ aspect_sel aspect in if pos = "fin" || pos = "bedzie" then diff --git a/lexSemantics/ENIAMwalReduce.ml b/lexSemantics/ENIAMwalReduce.ml index c1ea21c..d8e5f10 100644 --- a/lexSemantics/ENIAMwalReduce.ml +++ b/lexSemantics/ENIAMwalReduce.ml @@ -90,6 +90,7 @@ let select_comprep_adjuncts lexemes = not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l) with Not_found -> l) +(* FIXME: trzeba zanalizować interację tej procedury z Pro w schemacie w wersji z walentym i z semantyką dziedzinową *) let set_necessary pos schema = Xlist.map schema (fun p -> let nec = @@ -101,6 +102,8 @@ let set_necessary pos schema = | _ -> b) then Req else if p.gf <> SUBJ && p.cr = [] (*&& p.ce = []*) then Opt else if p.gf = SUBJ && pos = "impt" then ProNG else + if p.gf = SUBJ && pos = "pact" then Opt else + if p.gf = OBJ && pos = "ppas" then Opt else if Xlist.fold p.morfs false (fun b -> function NP NomAgr -> true | NCP(NomAgr,_,_) -> true diff --git a/lexSemantics/ENIAMwalRenderer.ml b/lexSemantics/ENIAMwalRenderer.ml index 2f62d83..4cda25e 100644 --- a/lexSemantics/ENIAMwalRenderer.ml +++ b/lexSemantics/ENIAMwalRenderer.ml @@ -183,9 +183,11 @@ let render_phrase_cat cat = function | NP CaseUndef -> Tensor[Atom "np"; Atom cat; Top; Top; Top; Top] | PrepNP(Psem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] | PrepNP(Psem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Top; Top] + | PrepNP(Psem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Top] | PrepNP(Psem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "sem"; Atom prep; Atom case] | PrepNP(Pnosem,"",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] | PrepNP(Pnosem,"_",CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Top; Top] + | PrepNP(Pnosem,prep,CaseUndef) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Top] | PrepNP(Pnosem,prep,Case case) -> Tensor[Atom "prepnp"; Atom cat; Atom "nosem"; Atom prep; Atom case] | AdjP(Case case) -> Tensor[Atom "adjp"; Atom cat; Top; Atom case; Top] (* | AdjP NomAgr -> Tensor[Atom "adjp"; AVar "number"; Atom "nom"; AVar "gender"]*) diff --git a/morphology/resources/alt_supplement.tab b/morphology/resources/alt_supplement.tab index 6a8c9c4..85c5579 100644 --- a/morphology/resources/alt_supplement.tab +++ b/morphology/resources/alt_supplement.tab @@ -4,4 +4,12 @@ sobie siebie siebie:dat.loc sobą siebie siebie:inst to to pred yay yay interj +świetnie świetnie interj +doskonale doskonale interj +idealnie idealnie interj +zdecydowanie zdecydowanie interj +ok ok interj +super super interj +dobrze dobrze interj +dzięki dzięki interj diff --git a/semantics/ENIAMsemGraph.ml b/semantics/ENIAMsemGraph.ml index 690c2e5..a2bf414 100644 --- a/semantics/ENIAMsemGraph.ml +++ b/semantics/ENIAMsemGraph.ml @@ -25,7 +25,7 @@ let empty_concept = {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list; c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""} -let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot} +let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot; cx_label=""; cx_def_label=""} let rec make_args_list = function Tuple l -> List.flatten (Xlist.map l make_args_list) @@ -154,7 +154,7 @@ let create_normal_concept tokens lex_sems t cat proj = | "NEGATION",Val "neg" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | e,t -> failwith ("create_normal_concept verb: " ^ e)) in let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in - let _ = ExtArray.add lex_sems in + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in make_relation t (Context cx) else if t.pos = "adj" || t.pos = "adjc" || t.pos = "adjp" || t.pos = "adja" || t.pos = "pact" || t.pos = "ppas" || t.pos = "apron" || t.pos = "ordnum" || t.pos = "roman-adj" then @@ -249,12 +249,15 @@ let create_normal_concept tokens lex_sems t cat proj = if t.pos = "comp" then make_relation t (SetContextName(c.c_sense,RemoveRelation("CORE","",c.c_relations))) else if t.pos = "conj" then - let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos} in + let c = {empty_context with cx_sense=t.meaning; cx_contents=t.args; cx_variable=c.c_variable; cx_pos=c.c_pos; cx_cat=c.c_cat; cx_def_label=c.c_def_label; cx_label=c.c_label} in let c = Xlist.fold t.attrs c (fun c -> function | "NUM",_ -> c | "CASE",_ -> c | "GEND",_ -> c | "PERS",_ -> c + | "ASPECT",_ -> c + | "controller",_ -> c + | "controllee",_ -> c | e,t -> failwith ("create_normal_concept conj: " ^ e)) in ManageCoordination({t with attrs=[]; args=Dot},Context c) else (* if t.pos = "interj" then @@ -264,7 +267,10 @@ let create_normal_concept tokens lex_sems t cat proj = if t.pos = "sinterj" || t.pos = "interj" then let c = Xlist.fold t.attrs c (fun c -> function | e,t -> failwith ("create_normal_concept sinterj: " ^ e)) in - make_relation t (Concept c) else + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in + let cx = {empty_context with cx_contents=add_proj proj c; cx_variable=string_of_int id,""; cx_pos=c.c_pos; cx_cat=Val "Situation"} in + make_relation t (Context cx) else if t.lemma = "<root>" then t.args else if t.lemma = "<merge>" then RemoveRelation("null","",t.args) else if t.pos = "interp" && t.lemma = "?" && t.args = Dot then SingleRelation(Val "int") else @@ -511,7 +517,7 @@ let rec reduce_tree = function (match reduce_tree t with Relation(r,a,t) -> if (r = r0 && a = a0) || r0 = "" then t else - Context{empty_context with cx_contents= + Context{empty_context with cx_cat=Val "Situation"; cx_contents= Concept{empty_concept with c_relations=Relation(r,a,t)}; (*cx_variable=string_of_int id,""; cx_pos=c.c_pos*)} (* | TripleRelation(r,a,s,t) -> Context{empty_context with cx_contents= @@ -625,6 +631,7 @@ let rec simplify_tree = function let l = Xlist.fold l [] (fun l t -> match simplify_tree t with Dot -> l + | Tuple l2 -> l2 @ l | t -> t :: l) in (match l with [] -> Dot @@ -655,7 +662,8 @@ let rec simplify_tree = function c_cat = simplify_tree (Variant(e,lt3))} | Context c -> let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function - i,Context c2 -> if c.cx_sense = c2.cx_sense then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found + i,Context c2 -> if c.cx_sense = c2.cx_sense && c.cx_label = c2.cx_label && + c.cx_def_label = c2.cx_def_label then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found | _ -> raise Not_found) in let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in Context{c with diff --git a/semantics/ENIAMsemGraphOf.ml b/semantics/ENIAMsemGraphOf.ml index f3b2e53..f6f9183 100644 --- a/semantics/ENIAMsemGraphOf.ml +++ b/semantics/ENIAMsemGraphOf.ml @@ -225,7 +225,9 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f | Context t -> let id = !id_counter in incr id_counter; - fprintf file " subgraph cluster%d {\nlabel=\"%s%s\"\n" id + fprintf file " subgraph cluster%d {\nlabel=\"%s%s%s%s\"\n" id + (if t.cx_label="" then "" else "?" ^ t.cx_label ^ " ") + (if t.cx_def_label="" then "" else "*" ^ t.cx_def_label ^ " ") (if t.cx_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_cat ^ " ")) (if t.cx_sense = Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.cx_sense)); let iid = print_graph2_rec file false "" "" "" 0 t.cx_contents in diff --git a/semantics/ENIAMsemLatexOf.ml b/semantics/ENIAMsemLatexOf.ml index 71375fa..a9cc9a8 100644 --- a/semantics/ENIAMsemLatexOf.ml +++ b/semantics/ENIAMsemLatexOf.ml @@ -66,7 +66,7 @@ let rec linear_term c = function | Context c -> "{\\left[\\begin{array}{ll}" ^ (String.concat "\\\\ " (Xlist.map ([ - "SENSE",c.cx_sense;"CAT",c.cx_cat; + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" diff --git a/semantics/ENIAMsemStringOf.ml b/semantics/ENIAMsemStringOf.ml index 3cd69e0..29cfcc3 100644 --- a/semantics/ENIAMsemStringOf.ml +++ b/semantics/ENIAMsemStringOf.ml @@ -43,7 +43,7 @@ let rec linear_term c = function | Context c -> "[" ^ (String.concat "; " (Xlist.map ([ - "SENSE",c.cx_sense;"CAT",c.cx_cat; + "SENSE",c.cx_sense;"CAT",c.cx_cat;"LABEL",Val c.cx_label;"DEF-LABEL",Val c.cx_def_label; "VARIABLE",Val (fst c.cx_variable ^ "_" ^ snd c.cx_variable);"POS",Val (string_of_int c.cx_pos); "RELATIONS",c.cx_relations;"CONTENTS",c.cx_contents]) (fun (e,t) -> e ^ ": " ^ (linear_term 0 t)))) ^ "]" diff --git a/semantics/ENIAMsemTypes.ml b/semantics/ENIAMsemTypes.ml index a038a47..72293b5 100644 --- a/semantics/ENIAMsemTypes.ml +++ b/semantics/ENIAMsemTypes.ml @@ -57,7 +57,7 @@ and concept = c_pos: int; c_cat: linear_term} and context = - {cx_sense: linear_term; cx_contents: linear_term; + {cx_sense: linear_term; cx_contents: linear_term; cx_label: string; cx_def_label: string; cx_relations: linear_term; cx_variable: (string * string); cx_pos: int; cx_cat: linear_term} and linear_term = diff --git a/semantics/ENIAMsemValence.ml b/semantics/ENIAMsemValence.ml index 818560b..973bfb6 100644 --- a/semantics/ENIAMsemValence.ml +++ b/semantics/ENIAMsemValence.ml @@ -220,6 +220,19 @@ let string_of_arg arg = let string_of_position p = (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs) +let manage_arg p t = + let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in + let t = + if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then + SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) + else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) + else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t + else failwith "manage_arg: ni 2" in + let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in + let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in + let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in + t + let rec match_arg_positions lemma arg rev = function p :: positions -> (* Printf.printf "match_arg_positions 1: arg=%s rev=[%s] positions=%s :: [%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)) (string_of_position p) (String.concat "; " (Xlist.map positions string_of_position)); *) @@ -228,16 +241,13 @@ let rec match_arg_positions lemma arg rev = function (match l with [] -> (*print_endline "match_arg_positions: not matched";*) match_arg_positions lemma arg (p :: rev) positions | [t] -> - let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in - let t = - if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then - SetAttr("role",p.role,SetAttr("role-attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) - else if p.gf = ENIAMwalTypes.CORE then SetAttr("selprefs",p.selprefs,t) - else if p.gf = ENIAMwalTypes.ADJUNCT || p.gf = ENIAMwalTypes.NOSEM || p.gf = ENIAMwalTypes.CORE then t - else failwith "match_arg_positions: ni 2" in - let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in - let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in - let t = if p.gf = ENIAMwalTypes.NOSEM then Dot else t in + let t = manage_arg p t in + if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) + else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) + | [t1;t2] -> (* FIXME: przydałoby się to uogólnić na listę dowolnej długości *) + let t1 = manage_arg p t1 in + let t2 = manage_arg p t2 in + let t = Variant("",["1",t1;"2",t2]) in if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions lemma arg (p :: rev) positions) else (t, rev @ positions) :: (match_arg_positions lemma arg (p :: rev) positions) | _ -> failwith ("match_arg_positions: lemma=" ^ lemma ^ " arg=" ^ string_of_arg arg ^ " position=" ^ string_of_position p)) diff --git a/semantics/ENIAMsemXMLof.ml b/semantics/ENIAMsemXMLof.ml index c93cab0..84b84d9 100644 --- a/semantics/ENIAMsemXMLof.ml +++ b/semantics/ENIAMsemXMLof.ml @@ -49,8 +49,8 @@ let rec linear_term = function Xml.Element("relations",[],[linear_term c.c_relations]); Xml.Element("cat",[],[linear_term c.c_cat])]) | Context c -> - Xml.Element("Context", - ["variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], + Xml.Element("Context",["label",c.cx_label;"def_label",c.cx_def_label; + "variable",fst c.cx_variable ^ "_" ^ snd c.cx_variable;"pos",string_of_int c.cx_pos], [Xml.Element("sense",[],[linear_term c.cx_sense]); Xml.Element("contents",[],[linear_term c.cx_contents]); Xml.Element("relations",[],[linear_term c.cx_relations]); diff --git a/subsyntax/ENIAMsubsyntax.ml b/subsyntax/ENIAMsubsyntax.ml index c381f91..6922d86 100644 --- a/subsyntax/ENIAMsubsyntax.ml +++ b/subsyntax/ENIAMsubsyntax.ml @@ -357,11 +357,14 @@ let parse_text_tokens tokens query = let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in let n = if Xlist.size paragraphs = 1 then 0 else 1 in let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph -> + try let paths = parse paragraph in (* print_endline "parse_text 1"; *) let pid = if n = 0 then "" else string_of_int n ^ "_" in let sentences = ENIAMsentences.split_into_sentences pid paragraph tokens paths in - (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in + (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1 + with e -> + (AltParagraph[Raw,RawParagraph paragraph; Error,ErrorParagraph (Printexc.to_string e)]) :: paragraphs, n+1) in AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs)], tokens let parse_text query = diff --git a/subsyntax/ENIAMsubsyntaxHTMLof.ml b/subsyntax/ENIAMsubsyntaxHTMLof.ml index 4c78eed..211d8c7 100644 --- a/subsyntax/ENIAMsubsyntaxHTMLof.ml +++ b/subsyntax/ENIAMsubsyntaxHTMLof.ml @@ -104,6 +104,7 @@ let rec html_of_paragraph tokens = function String.concat "\n" (Xlist.map l (fun (mode,paragraph) -> sprintf "<tr><td>%s</td><td>%s</td></tr>" (ENIAMsubsyntaxStringOf.mode mode) (html_of_paragraph tokens paragraph))) ^ "</table>" + | ErrorParagraph s -> (*print_endline "ErrorParagraph";*) s let rec html_of_text tokens = function RawText s -> s diff --git a/subsyntax/ENIAMsubsyntaxStringOf.ml b/subsyntax/ENIAMsubsyntaxStringOf.ml index 8f1e6ea..e1a9568 100644 --- a/subsyntax/ENIAMsubsyntaxStringOf.ml +++ b/subsyntax/ENIAMsubsyntaxStringOf.ml @@ -28,6 +28,7 @@ let mode = function | Mate -> "Mate" | Swigra -> "Swigra" | POLFIE -> "POLFIE" + | Error -> "Error" let token_extarray t = String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size t - 1) [] (fun l id -> @@ -73,6 +74,7 @@ let rec paragraph spaces t = function | AltParagraph l -> String.concat "\n" (Xlist.map l (fun (m,p) -> sprintf "%sAltParagraph mode=%s %s" spaces (mode m) (paragraph "" t p))) + | ErrorParagraph s -> spaces ^ "ErrorParagraph: " ^ s let rec text spaces t = function RawText s -> spaces ^ "RawText: " ^ s diff --git a/subsyntax/ENIAMsubsyntaxTypes.ml b/subsyntax/ENIAMsubsyntaxTypes.ml index 445e4ad..ee6fb4f 100644 --- a/subsyntax/ENIAMsubsyntaxTypes.ml +++ b/subsyntax/ENIAMsubsyntaxTypes.ml @@ -20,7 +20,7 @@ open ENIAMtokenizerTypes type mode = - Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | Error type sentence = RawSentence of string @@ -38,6 +38,7 @@ and paragraph = RawParagraph of string | StructParagraph of sentence_env list (* zdania *) | AltParagraph of (mode * paragraph) list + | ErrorParagraph of string type text = RawText of string @@ -81,6 +82,7 @@ let int_of_mode = function | Mate -> 4 | Swigra -> 5 | POLFIE -> 6 + | Error -> 7 let compare_mode x y = compare (int_of_mode x) (int_of_mode y) @@ -109,6 +111,7 @@ let rec map_paragraph mode f = function let l = Xlist.rev_map l (fun (mode,paragraph) -> mode, map_paragraph mode f paragraph) in AltParagraph(List.rev l) + | ErrorParagraph s -> ErrorParagraph s let rec map_text mode f = function RawText s -> RawText s @@ -137,6 +140,7 @@ let rec fold_paragraph mode s f = function | AltParagraph l -> Xlist.fold l s (fun s (mode,paragraph) -> fold_paragraph mode s f paragraph) + | ErrorParagraph _ -> s let rec fold_text mode s f = function RawText _ -> s diff --git a/subsyntax/ENIAMsubsyntaxXMLof.ml b/subsyntax/ENIAMsubsyntaxXMLof.ml index 310b0af..6f65c9a 100644 --- a/subsyntax/ENIAMsubsyntaxXMLof.ml +++ b/subsyntax/ENIAMsubsyntaxXMLof.ml @@ -57,6 +57,7 @@ let rec paragraph m = function Xml.Element("StructParagraph",set_mode m,Xlist.map sentences (fun p -> Xml.Element("Sentence",["id",p.id;"beg",string_of_int p.beg;"len",string_of_int p.len;"next",string_of_int p.next],[sentence "" p.sentence]))) | AltParagraph l -> Xml.Element("AltParagraph",set_mode m,Xlist.map l (fun (m,t) -> paragraph (ENIAMsubsyntaxStringOf.mode m) t)) + | ErrorParagraph s -> Xml.Element("ErrorParagraph",set_mode m,[Xml.PCData s]) let rec text m = function RawText s -> Xml.Element("RawText",set_mode m,[Xml.PCData s]) diff --git a/subsyntax/resources/brev.tab b/subsyntax/resources/brev.tab index bbcb419..b7e2c9a 100644 --- a/subsyntax/resources/brev.tab +++ b/subsyntax/resources/brev.tab @@ -708,7 +708,7 @@ wz. wzorowy subst:_:_:m3 wz . wzorowy adj:_:_:_:pos wzgl . względnie adv:pos x raz subst:_:_:m3 -X październik subst:_:_:m3 +#X październik subst:_:_:m3 x . książę subst:_:_:m1 x . ksiądz subst:_:_:m1 z . zeszyt subst:_:_:m3