Commit 6ac2b82599ef4ddb5943d239033d6236284614e5

Authored by Wojciech Jaworski
1 parent 2dac6350

Implementacja koreferencji przy kontroli

LCGlexicon/resources/lexicon-pl.dic
... ... @@ -9,6 +9,16 @@
9 9 url email day-month day year date hour hour-minute
10 10 się nie by s <root> <conll_root> or or2 <colon> <speaker> <speaker-end> <squery>
11 11  
  12 + <subst> <depr> <ppron12> <ppron3> <siebie> <prep> <num> <intnum>
  13 + <realnum> <intnum-interval> <realnum-interval> <symbol> <ordnum>
  14 + <date> <date-interval> <hour-minute> <hour> <hour-minute-interval>
  15 + <hour-interval> <year> <year-interval> <day> <day-interval> <day-month>
  16 + <day-month-interval> <month-interval> <roman> <roman-interval> <roman-ordnum>
  17 + <match-result> <url> <email> <obj-id> <adj> <apron> <adjc> <adjp> <adja>
  18 + <adv> <ger> <pact> <ppas> <fin> <bedzie> <praet> <winien> <impt>
  19 + <imps> <pred> <aglt> <inf> <pcon> <pant> <qub> <comp> <compar> <conj> <interj>
  20 + <sinterj> <burk> <interp> <part> <unk> <building-number>
  21 +
12 22 @WEIGHTS
13 23 symbol_weight=1
14 24 measure_weight=0.5
... ... @@ -78,7 +88,7 @@ pos=subst,case=gen,nsyn!=pronoun,nsem!=measure:
78 88 QUANT[unumber=all_numbers,ucase=dat,ugender=all_genders, uperson=all_persons,case=dat]
79 89 np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)};
80 90 pos=subst,case=gen,nsyn!=pronoun,nsem!=measure:
81   - QUANT[unumber=all_numbers,ucase=acc,ugender=all_genders, uperson=all_persons,case=acc]
  91 + QUANT[unumber=all_numbers,ucase=acc,ugender=all_genders, uperson=all_persons,case=acc]
82 92 np*unumber*ucase*ugender*uperson{\measure*unumber*ucase*ugender*uperson}{schema}{\(1+qub),/(1+inclusion)};
83 93 pos=subst,case=gen,nsyn!=pronoun,nsem!=measure:
84 94 QUANT[unumber=all_numbers,ucase=inst,ugender=all_genders, uperson=all_persons,case=inst]
... ... @@ -329,3 +339,68 @@ lemma=&lt;or-sentence&gt;,pos=interp: BRACKET ((&lt;root&gt;/&lt;speaker-end&gt;)/(ip*T*T*T/or))
329 339 lemma=</or-sentence>,pos=interp: BRACKET or2\?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj);
330 340 lemma=<sentence>,pos=interp: BRACKET ((<root>/<speaker-end>)/or)/np*T*nom*T*T;
331 341 lemma=</sentence>,pos=interp: BRACKET <speaker-end>;
  342 +
  343 +pos=subst: <subst>;
  344 +pos=year: <year>;
  345 +pos=year-interval: <year-interval>;
  346 +pos=prep: <prep>;
  347 +pos=depr: <depr>;
  348 +pos=ppron12: <ppron12>;
  349 +pos=ppron3: <ppron3>;
  350 +pos=siebie: <siebie>;
  351 +pos=num: <num>;
  352 +pos=intnum: <intnum>;
  353 +pos=realnum: <realnum>;
  354 +pos=intnum-interval: <intnum-interval>;
  355 +pos=realnum-interval: <realnum-interval>;
  356 +pos=symbol: <symbol>;
  357 +pos=ordnum: <ordnum>;
  358 +pos=date: <date>;
  359 +pos=date-interval: <date-interval>;
  360 +pos=hour-minute: <hour-minute>;
  361 +pos=hour: <hour>;
  362 +pos=hour-minute-interval: <hour-minute-interval>;
  363 +pos=hour-interval: <hour-interval>;
  364 +pos=day: <day>;
  365 +pos=day-interval: <day-interval>;
  366 +pos=day-month: <day-month>;
  367 +pos=day-month-interval: <day-month-interval>;
  368 +pos=month-interval: <month-interval>;
  369 +pos=roman: <roman>;
  370 +pos=roman-interval: <roman-interval>;
  371 +pos=roman-ordnum: <roman-ordnum>;
  372 +pos=match-result: <match-result>;
  373 +pos=building-number: <building-number>;
  374 +pos=url: <url>;
  375 +pos=email: <email>;
  376 +pos=obj-id: <obj-id>;
  377 +pos=apron: <apron>;
  378 +pos=adj: <adj>;
  379 +pos=adjc: <adjc>;
  380 +pos=adjp: <adjp>;
  381 +pos=adja: <adja>;
  382 +pos=adv: <adv>;
  383 +pos=ger: <ger>;
  384 +pos=pact: <pact>;
  385 +pos=ppas: <ppas>;
  386 +pos=fin: <fin>;
  387 +pos=bedzie: <bedzie>;
  388 +pos=praet: <praet>;
  389 +pos=winien: <winien>;
  390 +pos=impt: <impt>;
  391 +pos=imps: <imps>;
  392 +pos=pred: <pred>;
  393 +pos=aglt: <aglt>;
  394 +pos=inf: <inf>;
  395 +pos=pcon: <pcon>;
  396 +pos=pant: <pant>;
  397 +pos=qub: <qub>;
  398 +pos=comp: <comp>;
  399 +pos=conj: <conj>;
  400 +pos=interj: <interj>;
  401 +pos=sinterj: <sinterj>;
  402 +pos=burk: <burk>;
  403 +pos=interp: <interp>;
  404 +pos=part: <part>;
  405 +pos=compar: <compar>;
  406 +pos=unk: <unk>;
... ...
exec/ENIAMexec.ml
... ... @@ -79,6 +79,7 @@ let create_chart rules tokens lex_sems paths last =
79 79 let s = ExtArray.get lex_sems id in
80 80 ENIAM_LCGrenderer.reset_variable_names ();
81 81 ENIAM_LCGrenderer.add_variable_numbers ();
  82 + if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else
82 83 Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,schema) ->
83 84 let cats = clarify_categories cats t in
84 85 (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *)
... ... @@ -144,7 +145,7 @@ let eniam_parse_sentence timeout verbosity rules tokens lex_sems paths last =
144 145 let result = if verbosity = 0 then result else {result with chart1=chart} in
145 146 (* print_endline "eniam_parse_sentence 2"; *)
146 147 let chart,references = ENIAM_LCGchart.lazify chart in
147   - let result = if verbosity = 0 then result else {result with chart2=chart; references2=ExtArray.copy references} in
  148 + let result = if verbosity = 0 then result else {result with chart2=Array.copy chart; references2=ExtArray.copy references} in
148 149 (* print_endline "eniam_parse_sentence 3"; *)
149 150 let time2 = time_fun () in
150 151 let result = {result with lex_time=time2 -. time1} in
... ... @@ -399,7 +400,6 @@ let eniam_semantic_processing verbosity tokens lex_sems (result : eniam_parse_re
399 400 let tree,result =
400 401 try
401 402 let tree = ENIAMsemValence.assign_frames tokens lex_sems result.dependency_tree6b in
402   - (* ENIAMlexSemantics.create_tokens_for_artificial_nodes tokens lex_sems tree; *) (* FIXME: to powinno gdzieś być żeby dodać tokeny dla pro *)
403 403 let result = if verbosity < 2 then result else {result with dependency_tree7=tree} in
404 404 tree,result
405 405 with e -> [| |],{result with status=SemValenceError; msg=string_of_exn e} in
... ... @@ -423,6 +423,8 @@ let eniam_semantic_processing verbosity tokens lex_sems (result : eniam_parse_re
423 423 let tree = ENIAMdisambiguation.merge tree in
424 424 (* let tree = ENIAMdisambiguation.random_tree tokens lex_sems tree in *) (* FIXME: tokens lex_sems nie są potrzebne *)
425 425 let tree = ENIAM_LCGreductions.reshape_dependency_tree(*ExtArray.to_array*) tree in
  426 + ENIAMlexSemantics.create_tokens_for_artificial_nodes tokens lex_sems tree;
  427 + ENIAMcoreference.resolve tree;
426 428 let result = if verbosity = 0 then result else {result with dependency_tree9=tree} in
427 429 tree,result
428 430 with e -> [| |],{result with status=SemValenceError; msg=string_of_exn e} in
... ... @@ -451,7 +453,7 @@ let eniam_semantic_processing verbosity tokens lex_sems (result : eniam_parse_re
451 453 if !r <> [] then {result with status = SemGraphError; msg=String.concat "<BR>" !r} else
452 454 let graph,result =
453 455 try
454   - let graph = ENIAMsemGraph.simplify_tree graph in
  456 + let graph = ENIAMsemGraph.greater_simplify graph in
455 457 (* let graph = ENIAMsemGraph.manage_quantification graph in *)
456 458 let graph = ENIAMsemGraph.simplify_gender graph in
457 459 let result = (*if verbosity = 0 then result else*) {result with semantic_graph11=graph; semantic_graph12=graph} in
... ...
exec/parser.ml
... ... @@ -37,6 +37,7 @@ let timeout = ref 30.
37 37 let select_sentence_modes_flag = ref false
38 38 let select_sentences_flag = ref true
39 39 let semantic_processing_flag = ref true
  40 +let discontinuous_parsing_flag = ref false
40 41 let output_dir = ref "results/"
41 42 let spec_list = [
42 43 "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
... ... @@ -62,7 +63,11 @@ let spec_list = [
62 63 "--sel-sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)";
63 64 "--no-sel-sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences";
64 65 "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)";
65   - "--no-sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing";
  66 + "--no-sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perform semantic processing";
  67 + "--discontinuous", Arg.Unit (fun () -> discontinuous_parsing_flag:=true), "Parse discontinuous constituents";
  68 + "--no-discontinuous", Arg.Unit (fun () -> discontinuous_parsing_flag:=false), "Do not parse discontinuous constituents (default)";
  69 + "--partial", Arg.Unit (fun () -> ENIAMexecTypes.partial_parsing_flag:=true), "Build derivation trees for partially parsed sentences";
  70 + "--no-partial", Arg.Unit (fun () -> ENIAMexecTypes.partial_parsing_flag:=false), "Build derivation trees for partially parsed sentences (default)";
66 71 ]
67 72  
68 73 let usage_msg =
... ... @@ -117,9 +122,12 @@ let get_sock_addr host_name port =
117 122  
118 123 let _ =
119 124 prerr_endline message;
  125 + ENIAMsemTypes.user_ontology_flag := false;
120 126 ENIAMcategoriesPL.initialize ();
121 127 ENIAMsemLexicon.initialize ();
122 128 Arg.parse spec_list anon_fun usage_msg;
  129 + if !discontinuous_parsing_flag then ENIAMexecTypes.lcg_rules := ENIAM_LCGrules.application_rules @ ENIAM_LCGrules.cross_composition_rules
  130 + else ENIAMexecTypes.lcg_rules := ENIAM_LCGrules.application_rules;
123 131 if !lexSemantics_built_in then ENIAMlexSemantics.initialize ();
124 132 Gc.compact ();
125 133 let sub_in,sub_out =
... ...
exec/semparser.ml
... ... @@ -69,7 +69,7 @@ let spec_list = [
69 69 "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)";
70 70 "--no-sel-sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences";
71 71 "--sem", Arg.Unit (fun () -> semantic_processing_flag:=true), "Perform semantic processing (default)";
72   - "--no-sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perforf semantic processing";
  72 + "--no-sem", Arg.Unit (fun () -> semantic_processing_flag:=false), "Do not perform semantic processing";
73 73 ]
74 74  
75 75 let usage_msg =
... ...
lexSemantics/ENIAMlexSemantics.ml
... ... @@ -304,6 +304,7 @@ let assign_valence tokens lex_sems group =
304 304 let schemata = Xlist.rev_map schemata (fun (selectors,schema) ->
305 305 selectors,["X",["X"]],ENIAMwalRenderer.render_simple_schema schema) in
306 306 let schemata = List.flatten (Xlist.rev_map schemata (ENIAMadjuncts.add_adjuncts preps compreps compars pos2)) in
  307 + let schemata = if schemata = [] then [[],["X",["X"]],[]] else schemata in
307 308 (* Printf.printf "D %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
308 309 let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in
309 310 let entries = Xlist.map entries (fun (selectors,entry) ->
... ... @@ -341,6 +342,9 @@ let assign_valence tokens lex_sems group =
341 342 let connected = Xlist.rev_map connected (add_sem_args lemma pos) in
342 343 let connected = Xlist.rev_map connected (mark_reversed_hipero lemma pos) in
343 344 let connected = Xlist.rev_map connected mark_nosem in
  345 + let connected = if connected = [] then semantize lemma pos ([],[]) else connected in
  346 + let connected = Xlist.rev_map connected (fun f ->
  347 + if f.meanings = [] then {f with meanings=[lemma, ["X",1], unknown_meaning_weight]} else f) in
344 348 (* let connected = List.flatten (Xlist.rev_map connected (set_context lemma pos)) in *)
345 349 (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *)
346 350 ExtArray.set lex_sems id {(*(ExtArray.get lex_sems id) with*)
... ... @@ -451,22 +455,31 @@ let initialize () =
451 455  
452 456 open ENIAM_LCGtypes
453 457  
454   -let create_tokens_for_artificial_nodes tokens lex_sems dependency_tree =
455   - (* print_endline "create_tokens_for_artificial_nodes"; *)
456   - Int.iter 0 (Array.length dependency_tree - 1) (fun i ->
457   - match dependency_tree.(i) with
458   - Node t ->
459   - if t.id = 0 then (
460   - (* print_endline "create_tokens_for_artificial_nodes 2"; *)
  458 +let rec create_tokens_for_artificial_nodes_rec tokens lex_sems = function
  459 + Node t ->
  460 + let t = if t.id = 0 then (
461 461 let id = ExtArray.add tokens empty_token_env in
462 462 let lex_sem = {empty_lex_sem with frames=[{empty_frame with meanings=[t.lemma, [t.lemma,0], unknown_meaning_weight]}]} in
463 463 let id2 = ExtArray.add lex_sems lex_sem in
464   - if id <>id2 then failwith "create_tokens_for_artificial_nodes 2" else
  464 + if id <>id2 then failwith "create_tokens_for_artificial_nodes_rec" else
465 465 let t = if t.symbol = Dot then
466 466 {t with symbol = match t.pos with
467 467 "<root>" -> Tuple[Val "<root>"]
468 468 | "<merge>" -> Tuple[Val "<merge>"]
469 469 | "pro" -> Tuple[Val "pro"]
470   - | s -> failwith ("create_tokens_for_artificial_nodes: " ^ s)} else t in
471   - dependency_tree.(i) <- ENIAM_LCGtypes.Node{t with id=id})
472   - | _ -> failwith "create_tokens_for_artificial_nodes 1");
  470 + | s -> failwith ("create_tokens_for_artificial_nodes_rec: " ^ s)} else t in
  471 + {t with id=id}) else t in
  472 + Node{t with args = create_tokens_for_artificial_nodes_rec tokens lex_sems t.args}
  473 + | Tuple l ->
  474 + Tuple(List.rev (Xlist.rev_map l (create_tokens_for_artificial_nodes_rec tokens lex_sems)))
  475 + | Variant(e,l) ->
  476 + Variant(e,List.rev (Xlist.rev_map l (fun (i,t) ->
  477 + i, create_tokens_for_artificial_nodes_rec tokens lex_sems t)))
  478 + | Dot -> Dot
  479 + | Ref i -> Ref i
  480 + | t -> failwith ("create_tokens_for_artificial_nodes_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  481 +
  482 +let create_tokens_for_artificial_nodes tokens lex_sems dependency_tree =
  483 + (* print_endline "create_tokens_for_artificial_nodes"; *)
  484 + Int.iter 0 (Array.length dependency_tree - 1) (fun i ->
  485 + dependency_tree.(i) <- create_tokens_for_artificial_nodes_rec tokens lex_sems dependency_tree.(i))
... ...
lexSemantics/ENIAMwalReduce.ml
... ... @@ -106,6 +106,8 @@ let set_necessary pos schema =
106 106 | NCP(NomAgr,_,_) -> true
107 107 | E (NCP(NomAgr,CompTypeUndef,CompUndef)) -> true
108 108 | E (NP(NomAgr)) -> true
  109 + | Pro -> true
  110 + | ProNG -> true (* czy to jest potrzebne? *)
109 111 | _ -> b) then ProNG else Pro in
110 112 {p with is_necessary=nec})
111 113  
... ...
lexSemantics/interface.ml
... ... @@ -21,13 +21,10 @@ type output = Text | Xml | Html | Marsh | Graphviz
21 21  
22 22 let output = ref Text
23 23 let comm_stdio = ref true
24   -(* let sentence_split = ref true *)
25 24 let port = ref 5439
26 25 let perform_integration = ref false
27 26  
28 27 let spec_list = [
29   - (* "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)";
30   - "-n", Arg.Unit (fun () -> sentence_split:=false), "Do not split input into sentences"; *)
31 28 "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)";
32 29 "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number";
33 30 "-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)";
... ... @@ -43,11 +40,6 @@ let spec_list = [
43 40 "--no_dep_parser", Arg.Unit (fun () ->
44 41 ENIAMpreIntegration.concraft_enabled := false;
45 42 ENIAMpreIntegration.mate_parser_enabled := false), "Disable dependency parser (default)";
46   - (* "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off"; *)
47   - (* "-r", Arg.String (fun p ->
48   - ENIAMtokenizerTypes.set_resource_path p;
49   - ENIAMmorphologyTypes.set_resource_path p;
50   - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
51 43 ]
52 44  
53 45 let usage_msg =
... ...
lexSemantics/resources/proper_classes.tab
... ... @@ -50,3 +50,30 @@ wiadukt wiadukt-1 0.
50 50 szosa szosa-1 0.
51 51 trakt trakt-2 0.
52 52 trasa trasa-1 0.
  53 +#część miasta część miasta-1 0.
  54 +część miasta osiedle-1 0.
  55 +część miejscowości osiedle-1 0.
  56 +delegatura dzielnica-1 0.
  57 +dzielnica dzielnica-1 0.
  58 +kolonia kolonia-1 0.
  59 +miasto miasto-1 0.
  60 +osada osada-1 0.
  61 +osada leśna osada-1 0.
  62 +osiedle osiedle-1 0.
  63 +przysiółek przysiółek-1 0.
  64 +#schronisko turystyczne schronisko turystyczne-1 0.
  65 +schronisko turystyczne schronisko-1 0.
  66 +wieś wieś-1 0.
  67 +gmina miejska gmina miejska-1 0.
  68 +gmina miejska, miasto stołeczne gmina miejska-1 0.
  69 +gmina miejsko-wiejska gmina miejsko-wiejska-1 0.
  70 +gmina wiejska gmina wiejska-1 0.
  71 +miasto na prawach powiatu miasto-1 0.
  72 +miasto stołeczne, na prawach powiatu miasto stołeczne-1 0.
  73 +obszar wiejski obszar wiejski-1 0.
  74 +powiat powiat-1 0.
  75 +województwo województwo-1 0.
  76 +
  77 +
  78 +
  79 +
... ...
semantics/ENIAMcoreference.ml 0 → 100644
  1 +(*
  2 + * ENIAMsemantics implements semantic processing for ENIAM
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open Xstd
  21 +open ENIAM_LCGtypes
  22 +
  23 +let transitive_closure l =
  24 + let map = Xlist.fold l IntMap.empty (fun map set ->
  25 + IntSet.fold set map (fun map x ->
  26 + IntMap.add_inc map x set (fun set2 -> IntSet.union set set2))) in
  27 + let f = ref true in
  28 + let r = ref map in
  29 + while !f do
  30 + f := false;
  31 + r := IntMap.fold (!r) (!r) (fun map k set ->
  32 + let set2 = IntSet.fold set set (fun set2 v ->
  33 + IntSet.union set2 (IntMap.find map v)) in
  34 + if IntSet.size set2 > IntSet.size set then f := true;
  35 + IntMap.add map k set2)
  36 + done;
  37 + let map = IntMap.fold (!r) IntMap.empty (fun map _ set ->
  38 + IntMap.add map (IntSet.min_elt set) set) in
  39 + IntMap.fold map [] (fun l _ set -> set :: l)
  40 +
  41 +let rec resolve_rec (cr,ce,co) = function
  42 + Node t ->
  43 + let cr,ce,co = Xlist.fold t.attrs (cr,ce,co) (fun (cr,ce,co) -> function
  44 + "controller",Val c -> StringMap.add_inc cr c (IntSet.singleton t.id) (fun set -> IntSet.add set t.id),ce,co
  45 + | "controllee",Val c -> cr,StringMap.add_inc ce c (IntSet.singleton t.id) (fun set -> IntSet.add set t.id),co
  46 + | "coref",Val c -> cr,ce,StringMap.add_inc co c (IntSet.singleton t.id) (fun set -> IntSet.add set t.id)
  47 + | "controller",_ -> failwith "resolve_rec"
  48 + | "controllee",_ -> failwith "resolve_rec"
  49 + | "coref",_ -> failwith "resolve_rec"
  50 + | _ -> cr,ce,co) in
  51 + resolve_rec (cr,ce,co) t.args (* FIXME: czy to wywołanie jest potrzebne ? *)
  52 + | Tuple l -> Xlist.fold l (cr,ce,co) resolve_rec
  53 + | Variant(e,l) -> Xlist.fold l (cr,ce,co) (fun (cr,ce,co) (i,t) -> resolve_rec (cr,ce,co) t)
  54 + | Dot -> cr,ce,co
  55 + | Ref i -> cr,ce,co
  56 + | t -> failwith ("resolve_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  57 +
  58 +let rec set_coref use_label co = function
  59 + Node t ->
  60 + let t = if IntMap.mem co t.id then {t with attrs=((if IntSet.mem use_label t.id then "label" else "def-label"),Val(IntMap.find co t.id)) :: t.attrs} else t in
  61 + Node{t with args = set_coref use_label co t.args}
  62 + | Tuple l -> Tuple(List.rev (Xlist.rev_map l (set_coref use_label co)))
  63 + | Variant(e,l) ->
  64 + Variant(e,List.rev (Xlist.rev_map l (fun (i,t) ->
  65 + i, set_coref use_label co t)))
  66 + | Dot -> Dot
  67 + | Ref i -> Ref i
  68 + | t -> failwith ("set_coref: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  69 +
  70 +let resolve dependency_tree =
  71 + let cr,ce,co = Int.fold 0 (Array.length dependency_tree - 1) (StringMap.empty,StringMap.empty,StringMap.empty) (fun (cr,ce,co) i ->
  72 + resolve_rec (cr,ce,co) dependency_tree.(i)) in
  73 + let use_label = StringMap.fold co IntSet.empty (fun use_label _ set -> IntSet.union use_label set) in
  74 + let co = StringMap.fold co [] (fun co c set -> (IntSet.union set (try StringMap.find cr c with Not_found -> failwith "resolve")) :: co) in
  75 + let co,_ = Xlist.fold (transitive_closure co) (IntMap.empty,1) (fun (co,i) set ->
  76 + let label = string_of_int i in
  77 + IntSet.fold set co (fun co id ->
  78 + IntMap.add co id label), i+1) in
  79 + Int.iter 0 (Array.length dependency_tree - 1) (fun i ->
  80 + dependency_tree.(i) <- set_coref use_label co dependency_tree.(i))
... ...
semantics/ENIAMsemGraph.ml
... ... @@ -21,24 +21,12 @@ open ENIAMsemTypes
21 21 open Xstd
22 22 open Printf
23 23  
24   -(*let pro_id_counter = ref 100000 (* FIXME: to trzeba usunąć !!! *)
25   -
26   -let get_pro_id () =
27   - incr pro_id_counter;
28   - !pro_id_counter*)
29   -
30 24 let empty_concept =
31 25 {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list;
32   - c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot(*; c_proj=Dot*)}
  26 + c_left_input_pos: int; c_right_input_pos: int;*) c_relations=Dot; c_variable="",""; c_pos=(-1); c_cat=Dot; c_label=""; c_def_label=""}
33 27  
34 28 let empty_context = {cx_sense=Dot; cx_contents=Dot; cx_relations=Dot; cx_variable="",""; cx_pos=(-1); cx_cat=Dot}
35 29  
36   -(*let make_sem_args = function
37   - [] -> Dot
38   - | [s] -> Val s
39   - | l -> Variant(LCGreductions.get_variant_label (), fst (Xlist.fold l ([],1) (fun (l,i) t ->
40   - (string_of_int i, Val t) :: l,i+1)))*)
41   -
42 30 let rec make_args_list = function
43 31 Tuple l -> List.flatten (Xlist.map l make_args_list)
44 32 | t -> [t]
... ... @@ -55,11 +43,6 @@ let rec get_person = function
55 43 | _ :: l -> get_person l
56 44 | [] -> ""
57 45  
58   -(* let rec get_attr pat = function
59   - (s,Val t) :: l -> if s = pat then t else get_attr pat l
60   - | (s,t) :: l -> if s = pat then failwith ("get_attr 1: " ^ s) else get_attr pat l
61   - | [] -> failwith ("get_attr 2: " ^ pat) *)
62   -
63 46 let make_relation t c =
64 47 match t.gf with
65 48 "subj" | "obj" | "arg" ->
... ... @@ -92,18 +75,20 @@ let add_proj2 proj c =
92 75 Concept{empty_concept with c_cat=proj; c_relations=Relation("Has","",c)}
93 76  
94 77  
95   -let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj =
  78 +let create_normal_concept tokens lex_sems t cat proj =
96 79 (*if t.agf = ENIAMwalTypes.NOSEM then t.args else*)
  80 + let cat,proj = if !user_ontology_flag then cat,proj else Dot,Dot in
97 81 let proj = if proj = cat then Dot else proj in
98 82 let c = {empty_concept with
99   - c_sense = (*if t.lemma = "<root>" then Dot else*) (*t.meaning*)Val t.lemma; (* FIXME: zaślepka na potrzeby gramatyk semantycznych *)
  83 + c_sense = if !user_ontology_flag then Val t.lemma else (*if t.lemma = "<root>" then Dot else*) t.meaning;
100 84 c_relations=t.args;
101   - c_quant=(*make_sem_args*) t.sem_args;
  85 + c_quant=if t.label = "" then t.sem_args else Dot; (* FIXME: zakładam że t.label <> "" występuje tylko dla pro *)
102 86 c_variable=string_of_int t.id,"";
103   - c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; (* FIXME: pro nie mają przydzielonego id *)
  87 + c_pos=(ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg;
104 88 c_local_quant=true;
105   - c_cat=cat(*;
106   - c_proj=proj*)} in
  89 + c_cat=cat;
  90 + c_label=t.label;
  91 + c_def_label=t.def_label} in
107 92 if t.pos = "subst" || t.pos = "depr" || t.pos = "ger" || t.pos = "unk" || StringSet.mem symbols t.pos then (* FIXME: wykrywanie plurale tantum *)
108 93 let c = {c with c_local_quant=false} in
109 94 let c,measure,cx_flag = Xlist.fold t.attrs (c,false,false) (fun (c,measure,cx_flag) -> function
... ... @@ -224,15 +209,18 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t cat proj =
224 209 if t.pos = "pro" || t.pos = "ppron12" || t.pos = "ppron3" || t.pos = "siebie" then (* FIXME: indexicalność *)
225 210 let c = {c with c_local_quant=false} in
226 211 let c = Xlist.fold t.attrs c (fun c -> function
227   - "NUM",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]}
  212 + (* "NUM",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]}
228 213 | "GEND",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]}
229   - | "PERS",t2 -> if t.pos = "siebie" then c else {c with c_relations=Tuple[c.c_relations;SingleRelation t2]}
  214 + | "PERS",t2 -> if t.pos = "siebie" then c else {c with c_relations=Tuple[c.c_relations;SingleRelation t2]} *)
  215 + "NUM",t -> {c with c_quant=Tuple[c.c_quant;t]}
  216 + | "GEND",t -> {c with c_quant=Tuple[c.c_quant;t]}
  217 + | "PERS",t2 -> if t.pos = "siebie" then c else {c with c_quant=Tuple[c.c_quant;t2]}
230 218 | "CASE",_ -> c
231 219 | "SYN",_ -> c
232 220 | "NSEM",_ -> c
233 221 | "controller",_ -> c
234 222 | "controllee",_ -> c
235   - | "coref",_ -> c
  223 + (* | "coref",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation (Val "coref")]} (* FIXME: zaślepka do poprawienia przy implementacji kontroli *) *)
236 224 | e,t -> failwith ("create_normal_concept pron: " ^ e)) in
237 225 make_relation t (Concept c) else
238 226 if t.pos = "num" || t.pos = "intnum" || t.pos = "realnum" || t.pos = "intnum-interval" || t.pos = "realnum-interval" then
... ... @@ -322,7 +310,7 @@ let rec translate_node tokens lex_sems t =
322 310 orth=t.ENIAM_LCGtypes.orth; lemma=t.ENIAM_LCGtypes.lemma; pos=t.ENIAM_LCGtypes.pos; weight=t.ENIAM_LCGtypes.weight;
323 311 id=t.ENIAM_LCGtypes.id; symbol=create_concepts tokens lex_sems t.ENIAM_LCGtypes.symbol; arg_symbol=create_concepts tokens lex_sems t.ENIAM_LCGtypes.arg_symbol;
324 312 arg_dir=t.ENIAM_LCGtypes.arg_dir;
325   - attrs=[];
  313 + attrs=[]; label=""; def_label="";
326 314 args=create_concepts tokens lex_sems t.ENIAM_LCGtypes.args;
327 315 gf=""; role=""; role_attr=""; selprefs=Dot; meaning=Dot; arole=""; arole_attr=""; arev=false; sem_args=Dot} in
328 316 let t,attrs,cat,proj = Xlist.fold attrs (t,[],Dot,Dot) (fun (t,attrs,cat,proj) -> function
... ... @@ -341,13 +329,6 @@ let rec translate_node tokens lex_sems t =
341 329 | "rev-hipero",_ -> t,attrs,cat,proj
342 330 | "fopinion",_ -> t,attrs,cat,proj
343 331 | "sopinion",_ -> t,attrs,cat,proj
344   -(* | "",s -> t,("",s) :: attrs,cat,proj
345   - | "",s -> t,("",s) :: attrs,cat,proj
346   - | "",s -> t,("",s) :: attrs,cat,proj
347   - | "",s -> t,("",s) :: attrs,cat,proj
348   - | "",s -> t,("",s) :: attrs,cat,proj
349   - | "",s -> t,("",s) :: attrs,cat,proj
350   - | "",s -> t,("",s) :: attrs,cat,proj*)
351 332 | "ACM",s -> t,("ACM",s) :: attrs,cat,proj
352 333 | "ASPECT",s -> t,("ASPECT",s) :: attrs,cat,proj
353 334 | "NEGATION",s -> t,("NEGATION",s) :: attrs,cat,proj
... ... @@ -355,7 +336,9 @@ let rec translate_node tokens lex_sems t =
355 336 | "TENSE",s -> t,("TENSE",s) :: attrs,cat,proj
356 337 | "controller",s -> t,("controller",s) :: attrs,cat,proj
357 338 | "controllee",s -> t,("controllee",s) :: attrs,cat,proj
358   - | "coref",s -> t,("coref",s) :: attrs,cat,proj
  339 + | "coref",s -> t,attrs,cat,proj
  340 + | "label",Val s -> {t with label=s},attrs,cat,proj
  341 + | "def-label",Val s -> {t with def_label=s},attrs,cat,proj
359 342 | "CAT",s -> t,attrs,s,proj
360 343 | "PROJ",s -> t,attrs,cat,s
361 344 | "NUM",s -> t,("NUM",s) :: attrs,cat,proj
... ... @@ -367,13 +350,6 @@ let rec translate_node tokens lex_sems t =
367 350 | "MODE",s -> t,("MODE",s) :: attrs,cat,proj
368 351 | "GRAD",s -> t,("GRAD",s) :: attrs,cat,proj
369 352 | "PSEM",s -> t,("PSEM",s) :: attrs,cat,proj
370   -(* | "",Val s -> {t with =s},attrs,cat,proj
371   - | "",Val s -> {t with =s},attrs,cat,proj
372   - | "",Val s -> {t with =s},attrs,cat,proj
373   - | "",Val s -> {t with =s},attrs,cat,proj
374   - | "",Val s -> {t with =s},attrs,cat,proj
375   - | "",Val s -> {t with =s},attrs,cat,proj
376   - | "",Val s -> {t with =s},attrs,cat,proj*)
377 353 (* | k,v -> printf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v); t, (k,v) :: attrs,cat,proj) in *)
378 354 | k,v -> failwith (sprintf "translate_node: %s %s\n%!" k (ENIAMsemStringOf.linear_term 0 v))) in
379 355 {t with attrs=attrs},cat,proj
... ... @@ -591,42 +567,60 @@ let rec validate_reduction r = function
591 567  
592 568 (***************************************************************************************)
593 569  
  570 +let rec count_variant_labels map = function
  571 + Concept c -> Xlist.fold [c.c_sense; c.c_name; c.c_quant; c.c_cat; c.c_relations] map count_variant_labels
  572 + | Context c -> Xlist.fold [c.cx_sense; c.cx_contents; c.cx_cat; c.cx_relations] map count_variant_labels
  573 + | Relation(_,_,t) -> count_variant_labels map t
  574 + | RevRelation(_,_,t) -> count_variant_labels map t
  575 + | SingleRelation t -> count_variant_labels map t
  576 + | Tuple l -> Xlist.fold l map count_variant_labels
  577 + | Variant(e,l) ->
  578 + let map = StringQMap.add map e in
  579 + Xlist.fold l map (fun map (i,t) -> count_variant_labels map t)
  580 + | Dot -> map
  581 + | Val s -> map
  582 + | t -> failwith ("count_variant_labels: " ^ ENIAMsemStringOf.linear_term 0 t)
  583 +
  584 +let rec remove_variant_labels map = function
  585 + Concept c -> Concept{c with
  586 + c_sense=remove_variant_labels map c.c_sense;
  587 + c_name=remove_variant_labels map c.c_name;
  588 + c_quant=remove_variant_labels map c.c_quant;
  589 + c_cat=remove_variant_labels map c.c_cat;
  590 + c_relations=remove_variant_labels map c.c_relations}
  591 + | Context c -> Context{c with
  592 + cx_sense=remove_variant_labels map c.cx_sense;
  593 + cx_contents=remove_variant_labels map c.cx_contents;
  594 + cx_cat=remove_variant_labels map c.cx_cat;
  595 + cx_relations=remove_variant_labels map c.cx_relations}
  596 + | Relation(r,a,t) -> Relation(r,a,remove_variant_labels map t)
  597 + | RevRelation(r,a,t) -> RevRelation(r,a,remove_variant_labels map t)
  598 + | SingleRelation r -> SingleRelation r
  599 + | Tuple l -> Tuple(List.rev (Xlist.rev_map l (remove_variant_labels map)))
  600 + | Variant(e,l) ->
  601 + let e = if StringQMap.find map e = 1 then "" else e in
  602 + let l = Xlist.rev_map l (fun (i,t) -> i, remove_variant_labels map t) in
  603 + Variant(e,Xlist.sort l (fun x y -> compare (fst x) (fst y)))
  604 + | Dot -> Dot
  605 + | Val s -> Val s
  606 + | t -> failwith ("remove_variant_labels: " ^ ENIAMsemStringOf.linear_term 0 t)
  607 +
594 608 let rec simplify_tree = function
595   - Node t -> Node{t with args=simplify_tree t.args}
596   - | Concept c -> Concept{c with c_relations=simplify_tree c.c_relations}
597   - | Context c -> Context{c with cx_contents=simplify_tree c.cx_contents; cx_relations=simplify_tree c.cx_relations}
  609 + Concept c -> Concept{c with
  610 + c_sense=simplify_tree c.c_sense;
  611 + c_name=simplify_tree c.c_name;
  612 + c_quant=simplify_tree c.c_quant;
  613 + c_cat=simplify_tree c.c_cat;
  614 + c_relations=simplify_tree c.c_relations}
  615 + | Context c -> Context{c with
  616 + cx_sense=simplify_tree c.cx_sense;
  617 + cx_contents=simplify_tree c.cx_contents;
  618 + cx_cat=simplify_tree c.cx_cat;
  619 + cx_relations=simplify_tree c.cx_relations}
598 620 | Relation(r,a,t) -> Relation(r,a,simplify_tree t)
599 621 | RevRelation(r,a,t) -> RevRelation(r,a,simplify_tree t)
600 622 | SingleRelation r -> SingleRelation r
601 623 (* | TripleRelation(r,a,s,t) -> TripleRelation(r,a,simplify_tree s,simplify_tree t) *)
602   -(* | AddRelation(Concept c,r,a,s) -> simplify_tree (Concept{c with c_relations=Tuple[Relation(Val r,Val a,s);c.c_relations]})
603   - | AddRelation(Context c,r,a,s) -> simplify_tree (Context{c with cx_relations=Tuple[Relation(Val r,Val a,s);c.cx_relations]})*)
604   - | AddRelation(t,r,a,s) -> simplify_tree_add_relation r a (simplify_tree s) (simplify_tree t)
605   -(* let t = simplify_tree t in
606   - let s = simplify_tree s in
607   - (match t with
608   - Concept c -> Concept{c with c_relations=Tuple[Relation(Val r,Val a,s);c.c_relations]}
609   - | Context c -> Context{c with cx_relations=Tuple[Relation(Val r,Val a,s);c.cx_relations]}
610   - | _ -> AddRelation(t,r,a,s))*)
611   -(* | RemoveRelation t ->
612   - (match simplify_tree t with
613   - Relation (_,_,t) -> t
614   - | Dot -> Dot
615   - | Variant(e,l) -> simplify_tree (Variant(e,Xlist.map l (fun (i,t) -> i,RemoveRelation t)))
616   - | Tuple l -> simplify_tree (Tuple(Xlist.map l (fun t -> RemoveRelation t)))
617   - | Context t -> Context t
618   - | Concept t -> Concept t
619   - | t -> RemoveRelation t)*)
620   - (* | SetContextName(s,t) ->
621   - (match simplify_tree t with
622   - Context t -> Context{t with cx_sense=s}
623   - | t -> SetContextName(s,t)) *)
624   -(* | MakeTripleRelation(r,a,t) ->
625   - (match simplify_tree t with
626   - Concept t ->
627   - let core,t = get_core t in
628   - TripleRelation(r,a,Concept t,simplify_tree (RemoveRelation core))
629   - | t -> MakeTripleRelation(r,a,t))*)
630 624 | Tuple l ->
631 625 let l = Xlist.fold l [] (fun l t ->
632 626 match simplify_tree t with
... ... @@ -639,27 +633,52 @@ let rec simplify_tree = function
639 633 | Variant(_,[_,t]) -> simplify_tree t
640 634 | Variant(e,l) ->
641 635 let l = Xlist.map l (fun (i,t) -> i, simplify_tree t) in
  636 + let set = Xlist.fold l TermSet.empty (fun set (_,t) -> TermSet.add set t) in
  637 + if TermSet.size set = 1 then TermSet.max_elt set else
  638 + let l = List.rev (fst (TermSet.fold set ([],1) (fun (l,i) t -> (string_of_int i,t) :: l, i+1))) in
642 639 let _,t = List.hd l in
643 640 let b = Xlist.fold (List.tl l) true (fun b (_,s) -> if s = t then b else false) in
644 641 if b then t else
645 642 (try
646 643 (match t with
647 644 Concept c ->
648   - let lt = Xlist.fold l [] (fun lt -> function
649   - i,Concept c2 -> if c.c_sense = c2.c_sense && c.c_quant = c2.c_quant then (i,c2.c_relations) :: lt else raise Not_found
  645 + let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function
  646 + i,Concept c2 ->
  647 + if c.c_sense = c2.c_sense && c.c_name = c2.c_name &&
  648 + c.c_local_quant = c2.c_local_quant && c.c_label = c2.c_label &&
  649 + c.c_def_label = c2.c_def_label then (i,c2.c_quant) :: lt1, (i,c2.c_relations) :: lt2, (i,c2.c_cat) :: lt3 else raise Not_found
650 650 | _ -> raise Not_found) in
651   - Concept{c with c_relations = simplify_tree (Variant(e,lt))}
  651 + let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in
  652 + Concept{c with
  653 + c_quant = simplify_tree (Variant(e,lt1));
  654 + c_relations = simplify_tree (Variant(e,lt2));
  655 + c_cat = simplify_tree (Variant(e,lt3))}
652 656 | Context c ->
653   - let lt1,lt2 = Xlist.fold l ([],[]) (fun (lt1,lt2) -> function
654   - i,Context c2 -> (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2
  657 + let lt1,lt2,lt3 = Xlist.fold l ([],[],[]) (fun (lt1,lt2,lt3) -> function
  658 + i,Context c2 -> if c.cx_sense = c2.cx_sense then (i,c2.cx_contents) :: lt1, (i,c2.cx_relations) :: lt2, (i,c2.cx_cat) :: lt3 else raise Not_found
  659 + | _ -> raise Not_found) in
  660 + let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in
  661 + Context{c with
  662 + cx_contents= simplify_tree (Variant(e,lt1));
  663 + cx_relations = simplify_tree (Variant(e,lt2));
  664 + cx_cat = simplify_tree (Variant(e,lt3))}
  665 + | Relation(r,a,t) ->
  666 + let lt = Xlist.fold l [] (fun lt -> function
  667 + i,Relation(r2,a2,t2) -> if r = r2 && a = a2 then (i,t2) :: lt else raise Not_found
  668 + | _ -> raise Not_found) in
  669 + simplify_tree (Relation(r,a,Variant(e,lt)))
  670 + (* | TripleRelation(r,a,s,t) ->
  671 + let ls,lt = Xlist.fold l ([],[]) (fun (ls,lt) -> function
  672 + i,TripleRelation(r2,a2,s2,t2) -> if r = r2 && a = a2 then (i,s2) :: ls, (i,t2) :: lt else raise Not_found
655 673 | _ -> raise Not_found) in
656   - Context{c with cx_contents= simplify_tree (Variant(e,lt1)); cx_relations = simplify_tree (Variant(e,lt2))}
  674 + simplify_tree (TripleRelation(r,a,Variant(e,ls),Variant(e,lt))) *)
657 675 | Tuple tl ->
658 676 (* print_endline ("V3: " ^ LCGstringOf.linear_term 0 (Variant l)); *)
659 677 let n = Xlist.size tl in
660 678 let lt = Xlist.fold l [] (fun lt -> function
661 679 i,Tuple tl -> if n = Xlist.size tl then (i,tl) :: lt else raise Not_found
662 680 | _ -> raise Not_found) in
  681 + let e = if e = "" then ENIAM_LCGreductions.get_variant_label () else e in
663 682 let t = Tuple(transpose_tuple_variant e lt) in
664 683 (* print_endline ("V4: " ^ LCGstringOf.linear_term 0 t); *)
665 684 simplify_tree t
... ... @@ -673,6 +692,14 @@ let rec simplify_tree = function
673 692 | Val s -> Val s
674 693 | t -> failwith ("simplify_tree: " ^ ENIAMsemStringOf.linear_term 0 t)
675 694  
  695 +let greater_simplify tree =
  696 + let map = count_variant_labels StringQMap.empty tree in
  697 + let tree = remove_variant_labels map tree in
  698 + let tree = simplify_tree tree in
  699 + let map = count_variant_labels StringQMap.empty tree in
  700 + let tree = remove_variant_labels map tree in
  701 + tree
  702 +
676 703 let rec manage_quantification2 (quants,quant) = function
677 704 Tuple l -> Xlist.fold l (quants,quant) manage_quantification2
678 705 | Dot -> quants,quant
... ... @@ -697,38 +724,26 @@ let rec manage_quantification = function
697 724 | t -> failwith ("manage_quantification: " ^ ENIAMsemStringOf.linear_term 0 t)
698 725  
699 726 let simplify_gender2 = function
700   - Variant(e,l) ->
701   - (try
702   - let l2 = List.sort compare (Xlist.rev_map l (function (_,Val s) -> s | _ -> raise Not_found)) in
703   - match l2 with
704   - ["f"; "m1"; "m2"; "m3"; "n"(*"n1"; "n2"; "p1"; "p2"; "p3"*)] -> Dot
  727 + Variant("",l) ->
  728 + let l2 = List.sort compare (Xlist.rev_map l (function (_,Val s) -> s | _ -> raise Not_found)) in
  729 + (match l2 with
  730 + ["f"; "m1"; "m2"; "m3"; "n"] -> Dot
705 731 | ["m1"; "m2"; "m3"] -> Val "m"
706   - (* | ["n1"; "n2"] -> Val "n" *)
707   - | ["f"; "m2"; "m3"; "n"(*"n1"; "n2"; "p2"; "p3"*)] -> Val "nmo"
708   - (* | ["m1"; "p1"] -> Val "mo" *)
709   - (* | ["f"; "m1"; "m2"; "m3"; "n1"; "n2"] -> Dot *)
  732 + | ["f"; "m2"; "m3"; "n"] -> Val "nmo"
710 733 | ["pl"; "sg"] -> Dot
711   -(* | -> Val ""
712   - | -> Val ""*)
713   - | _ -> (*print_endline ("[\"" ^ String.concat "\"; \"" l2 ^ "\"]");*) Variant(e,l)
714   - with Not_found -> Variant(e,l))
715   - | t -> t
  734 + | _ -> raise Not_found)
  735 + | _ -> raise Not_found
716 736  
717 737 let rec simplify_gender = function
718   - Node t -> Node{t with args=simplify_gender t.args}
719   - | Concept c -> Concept{c with c_relations=simplify_gender c.c_relations}
  738 + Concept c -> Concept{c with c_relations=simplify_gender c.c_relations; c_quant=simplify_gender c.c_quant}
720 739 | Context c -> Context{c with cx_contents=simplify_gender c.cx_contents; cx_relations=simplify_gender c.cx_relations}
721 740 | Relation(r,a,t) -> Relation(r,a,simplify_gender t)
722 741 | RevRelation(r,a,t) -> RevRelation(r,a,simplify_gender t)
723   - | SingleRelation r ->
724   - let t = simplify_gender2 r in
725   - if t = Dot then Dot else SingleRelation t
726   - (* | TripleRelation(r,a,s,t) -> TripleRelation(r,a,simplify_gender s,simplify_gender t) *)
727   - | AddRelation(t,r,a,s) -> AddRelation(simplify_gender t,r,a,simplify_gender s)
728   - (* | RemoveRelation t -> RemoveRelation(simplify_gender t) *)
729   - (* | MakeTripleRelation(r,a,t) -> MakeTripleRelation(r,a,simplify_gender t) *)
  742 + | SingleRelation r -> SingleRelation r
730 743 | Tuple l -> Tuple(Xlist.map l simplify_gender)
731   - | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, simplify_gender t))
  744 + | Variant(e,l) ->
  745 + (try simplify_gender2 (Variant(e,l)) with Not_found ->
  746 + Variant(e,Xlist.map l (fun (i,t) -> i, simplify_gender t)))
732 747 | Dot -> Dot
733 748 | Val s -> Val s
734 749 | t -> failwith ("simplify_gender: " ^ ENIAMsemStringOf.linear_term 0 t)
... ...
semantics/ENIAMsemGraphOf.ml
... ... @@ -30,7 +30,7 @@ let escape_string s =
30 30  
31 31 let string_of_node t =
32 32 let l = [
33   - "ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);
  33 + "ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);"LABEL",Val t.label;"DEF-LABEL",Val t.def_label;"WEIGHT",Val (string_of_float t.weight);
34 34 "SYMBOL",t.symbol;"ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir;
35 35 "GF",Val t.gf;"ROLE",Val t.role;"ROLE_ATTR",Val t.role_attr;"SELPREFS",t.selprefs;"MEANING",t.meaning;
36 36 "AROLE",Val t.arole;"AROLE_ATTR",Val t.role_attr;"AREV",Val (string_of_bool t.arev);"SEM_ARGS",t.sem_args] @ t.attrs in
... ... @@ -212,9 +212,10 @@ let rec print_graph2_rec file edge_rev edge_label edge_style edge_head upper = f
212 212 | Concept t ->
213 213 let id = !id_counter in
214 214 incr id_counter;
215   - fprintf file " %d [shape=box,label=<%s%s%s %s>]\n" id
  215 + fprintf file " %d [shape=box,label=<%s%s%s%s%s %s>]\n" id
216 216 (string_of_quant t.c_quant)
217   - (* (if t.c_proj=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.c_proj ^ " ")) *)
  217 + (if t.c_label="" then "" else "?" ^ t.c_label ^ " ")
  218 + (if t.c_def_label="" then "" else "*" ^ t.c_def_label ^ " ")
218 219 (if t.c_cat=Dot then "" else escape_string (ENIAMsemStringOf.linear_term 0 t.c_cat ^ " "))
219 220 (escape_string (ENIAMsemStringOf.linear_term 0 t.c_sense))
220 221 (if t.c_name=Dot then "" else "„" ^ ENIAMsemStringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *)
... ...
semantics/ENIAMsemLatexOf.ml
... ... @@ -46,7 +46,7 @@ let rec linear_term c = function
46 46 | Val s -> "\\text{" ^ Xlatex.escape_string s ^ "}"
47 47 | Node t ->
48 48 "{\\left[\\begin{array}{ll}" ^
49   - (String.concat "\\\\ " (Xlist.map (["ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);
  49 + (String.concat "\\\\ " (Xlist.map (["ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);"LABEL",Val t.label;"DEF-LABEL",Val t.def_label;
50 50 "WEIGHT",Val (string_of_float t.weight);"SYMBOL",t.symbol;
51 51 "ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir;
52 52 "GF",Val t.gf; "ROLE", Val t.role; "ROLE-ATTR", Val t.role_attr;
... ... @@ -59,7 +59,7 @@ let rec linear_term c = function
59 59 | Concept c ->
60 60 "{\\left[\\begin{array}{ll}" ^
61 61 (String.concat "\\\\ " (Xlist.map ([
62   - "SENSE",c.c_sense;"NAME",c.c_name;"CAT",c.c_cat;(*"PROJ",c.c_proj;*)
  62 + "SENSE",c.c_sense;"NAME",c.c_name;"CAT",c.c_cat;"LABEL",Val c.c_label;"DEF-LABEL",Val c.c_def_label;
63 63 "VARIABLE",Val (fst c.c_variable ^ "_" ^ snd c.c_variable);"POS",Val (string_of_int c.c_pos);
64 64 "QUANT",c.c_quant;"LOCAL-QUANT",if c.c_local_quant then Val "+" else Val "-";"RELATIONS",c.c_relations]) (fun (e,t) ->
65 65 "\\text{" ^ (Xlatex.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}"
... ...
semantics/ENIAMsemStringOf.ml
... ... @@ -28,7 +28,7 @@ let rec linear_term c = function
28 28 | Val s -> s
29 29 | Node t ->
30 30 "[" ^
31   - (String.concat "; " (Xlist.map (["ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);
  31 + (String.concat "; " (Xlist.map (["ORTH",Val t.orth;"LEMMA",Val t.lemma;"POS",Val t.pos;"ID",Val (string_of_int t.id);"LABEL",Val t.label;"DEF-LABEL",Val t.def_label;
32 32 "WEIGHT",Val (string_of_float t.weight);"SYMBOL",t.symbol;
33 33 "ARG_SYMBOL",t.arg_symbol;"ARG_DIR",Val t.arg_dir;"ARGS",t.args] @ t.attrs) (fun (e,t) ->
34 34 e ^ ": " ^ (linear_term 0 t)))) ^ "]"
... ... @@ -36,7 +36,7 @@ let rec linear_term c = function
36 36 | Concept c ->
37 37 "[" ^
38 38 (String.concat "; " (Xlist.map ([
39   - "SENSE",c.c_sense;"NAME",c.c_name;"CAT",c.c_cat;(*"PROJ",c.c_proj;*)
  39 + "SENSE",c.c_sense;"NAME",c.c_name;"CAT",c.c_cat;"LABEL",Val c.c_label;"DEF-LABEL",Val c.c_def_label;
40 40 "VARIABLE",Val (fst c.c_variable ^ "_" ^ snd c.c_variable);"POS",Val (string_of_int c.c_pos);
41 41 "QUANT",c.c_quant;"LOCAL-QUANT",if c.c_local_quant then Val "+" else Val "-";"RELATIONS",c.c_relations]) (fun (e,t) ->
42 42 e ^ ": " ^ (linear_term 0 t)))) ^ "]"
... ...
semantics/ENIAMsemTypes.ml
... ... @@ -45,14 +45,16 @@ type node = {
45 45 arole_attr: string;
46 46 arev: bool;
47 47 sem_args: linear_term;
  48 + label: string;
  49 + def_label: string;
48 50 }
49 51  
50 52 and concept =
51 53 {c_sense: linear_term; c_name: linear_term; (*c_visible_var: bool;*)
52   - c_quant: linear_term; c_local_quant: bool;
  54 + c_quant: linear_term; c_local_quant: bool; c_label: string; c_def_label: string;
53 55 (*c_modalities: (string * type_term) list;*)
54 56 c_relations: linear_term; c_variable: (string * string);
55   - c_pos: int; c_cat: linear_term(*; c_proj: linear_term*)}
  57 + c_pos: int; c_cat: linear_term}
56 58  
57 59 and context =
58 60 {cx_sense: linear_term; cx_contents: linear_term;
... ... @@ -79,3 +81,46 @@ and linear_term =
79 81 | Ref of int
80 82  
81 83 let sem_lexicon_filename = ENIAMwalTypes.resource_path ^ "/semantics/lexicon-pl.dic"
  84 +
  85 +let user_ontology_flag = ref false
  86 +
  87 +let rec compare_linear_term = function
  88 + Concept c,Concept d ->
  89 + if c.c_sense = d.c_sense && c.c_name = d.c_name && c.c_label = d.c_label &&
  90 + c.c_def_label = d.c_def_label && c.c_cat = d.c_cat && c.c_local_quant = d.c_local_quant &&
  91 + compare_linear_term (c.c_quant,d.c_quant) = 0 &&
  92 + compare_linear_term (c.c_relations,d.c_relations) = 0 then 0 else compare c d
  93 + | Context c,Context d ->
  94 + if c.cx_sense = d.cx_sense && c.cx_cat = d.cx_cat &&
  95 + compare_linear_term (c.cx_contents,d.cx_contents) = 0 &&
  96 + compare_linear_term (c.cx_relations,d.cx_relations) = 0 then 0 else compare c d
  97 + | (Relation(r1,a1,t1) as s),(Relation(r2,a2,t2) as t) ->
  98 + if r1 = r2 && a1 = a2 then compare_linear_term (t1,t2) else compare s t
  99 + | (RevRelation(r1,a1,t1) as s),(RevRelation(r2,a2,t2) as t) ->
  100 + if r1 = r2 && a1 = a2 then compare_linear_term (t1,t2) else compare s t
  101 + | SingleRelation s,SingleRelation t -> compare_linear_term (s,t)
  102 + | (Tuple l1 as s),(Tuple l2 as t) ->
  103 + if Xlist.size l1 = Xlist.size l2 then
  104 + if Xlist.fold2 l1 l2 true (fun b t1 t2 ->
  105 + b && compare_linear_term (t1,t2) = 0) then 0
  106 + else compare s t
  107 + else compare s t
  108 + | (Variant(e1,l1) as s),(Variant(e2,l2) as t) ->
  109 + if e1 = e2 && Xlist.size l1 = Xlist.size l2 then
  110 + if Xlist.fold2 l1 l2 true (fun b (i1,t1) (i2,t2) ->
  111 + b && i1 = i2 && compare_linear_term (t1,t2) = 0) then 0
  112 + else compare s t
  113 + else compare s t
  114 + | s,t -> compare s t
  115 +
  116 +
  117 +module OrderedTerm = struct
  118 +
  119 + type t = linear_term
  120 +
  121 + let compare s t = compare_linear_term (s,t)
  122 +
  123 +end
  124 +
  125 +module TermMap = Xmap.Make(OrderedTerm)
  126 +module TermSet = Xset.Make(OrderedTerm)
... ...
semantics/ENIAMsemValence.ml
... ... @@ -256,15 +256,17 @@ let rec match_args_positions_rec lemma prong_attrs positions = function
256 256 if b then [] else
257 257 [Xlist.fold positions [] (fun found p ->
258 258 if not p.is_pro then found else
259   - let attrs = if p.is_prong then prong_attrs else [] in (* FIXME: dodać number, gender *)
  259 + let attrs = if p.is_prong then prong_attrs else [] in
  260 + let cats = p.selprefs(*ENIAM_LCGrules.make_variant (ENIAMwalRenderer.extract_sel_prefs p.sel_prefs)*) in
260 261 let lemma = get_pro_lemma attrs in
  262 + let attrs = ["CAT",cats;"PROJ",cats] @ attrs in
261 263 let sem_args = try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> failwith "match_args_positions_rec" in
262 264 let attrs = ["meaning",Val lemma;"hipero",Tuple[Val "ALL"; Val "0"];"role",p.role;
263 265 "role-attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf);
264 266 "agf",Val ""; "sem-args",make_sem_args sem_args; "rev-hipero",Val "+"] @ attrs in
265 267 let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in
266 268 let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in
267   - Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)]
  269 + Node{ENIAM_LCGrenderer.empty_node with lemma=lemma; pos="pro"; attrs=attrs} :: found)]
268 270  
269 271 (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *)
270 272  
... ...
semantics/makefile
... ... @@ -9,23 +9,23 @@ OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa b
9 9 eniam-lexSemantics.cmxa #eniam-semantics.cmxa
10 10 INSTALLDIR=`ocamlc -where`/eniam
11 11  
12   -SOURCES= ENIAMsemTypes.ml ENIAMsemLexicon.ml ENIAMsemValence.ml ENIAMdisambiguation.ml ENIAMsemStringOf.ml ENIAMsemGraph.ml ENIAMsemLatexOf.ml ENIAMsemGraphOf.ml
  12 +SOURCES= ENIAMsemTypes.ml ENIAMsemLexicon.ml ENIAMsemValence.ml ENIAMdisambiguation.ml ENIAMcoreference.ml ENIAMsemStringOf.ml ENIAMsemGraph.ml ENIAMsemLatexOf.ml ENIAMsemGraphOf.ml
13 13  
14 14 all: eniam-semantics.cma eniam-semantics.cmxa
15 15  
16 16 install: all
17 17 mkdir -p $(INSTALLDIR)
18 18 cp eniam-semantics.cmxa eniam-semantics.a eniam-semantics.cma $(INSTALLDIR)
19   - cp ENIAMsemTypes.cmi ENIAMsemLexicon.cmi ENIAMsemValence.cmi ENIAMdisambiguation.cmi ENIAMsemStringOf.cmi ENIAMsemGraph.cmi ENIAMsemLatexOf.cmi ENIAMsemGraphOf.cmi $(INSTALLDIR)
20   - cp ENIAMsemTypes.cmx ENIAMsemLexicon.cmx ENIAMsemValence.cmx ENIAMdisambiguation.cmx ENIAMsemStringOf.cmx ENIAMsemGraph.cmx ENIAMsemLatexOf.cmx ENIAMsemGraphOf.cmx $(INSTALLDIR)
  19 + cp ENIAMsemTypes.cmi ENIAMsemLexicon.cmi ENIAMsemValence.cmi ENIAMdisambiguation.cmi ENIAMcoreference.cmi ENIAMsemStringOf.cmi ENIAMsemGraph.cmi ENIAMsemLatexOf.cmi ENIAMsemGraphOf.cmi $(INSTALLDIR)
  20 + cp ENIAMsemTypes.cmx ENIAMsemLexicon.cmx ENIAMsemValence.cmx ENIAMdisambiguation.cmx ENIAMcoreference.cmx ENIAMsemStringOf.cmx ENIAMsemGraph.cmx ENIAMsemLatexOf.cmx ENIAMsemGraphOf.cmx $(INSTALLDIR)
21 21 mkdir -p /usr/share/eniam/semantics
22 22 cp resources/* /usr/share/eniam/semantics
23 23  
24 24 install-local: all
25 25 mkdir -p $(INSTALLDIR)
26 26 cp eniam-semantics.cmxa eniam-semantics.a eniam-semantics.cma $(INSTALLDIR)
27   - cp ENIAMsemTypes.cmi ENIAMsemLexicon.cmi ENIAMsemValence.cmi ENIAMdisambiguation.cmi ENIAMsemStringOf.cmi ENIAMsemGraph.cmi ENIAMsemLatexOf.cmi ENIAMsemGraphOf.cmi $(INSTALLDIR)
28   - cp ENIAMsemTypes.cmx ENIAMsemLexicon.cmx ENIAMsemValence.cmx ENIAMdisambiguation.cmx ENIAMsemStringOf.cmx ENIAMsemGraph.cmx ENIAMsemLatexOf.cmx ENIAMsemGraphOf.cmx $(INSTALLDIR)
  27 + cp ENIAMsemTypes.cmi ENIAMsemLexicon.cmi ENIAMsemValence.cmi ENIAMdisambiguation.cmi ENIAMcoreference.cmi ENIAMsemStringOf.cmi ENIAMsemGraph.cmi ENIAMsemLatexOf.cmi ENIAMsemGraphOf.cmi $(INSTALLDIR)
  28 + cp ENIAMsemTypes.cmx ENIAMsemLexicon.cmx ENIAMsemValence.cmx ENIAMdisambiguation.cmx ENIAMcoreference.cmx ENIAMsemStringOf.cmx ENIAMsemGraph.cmx ENIAMsemLatexOf.cmx ENIAMsemGraphOf.cmx $(INSTALLDIR)
29 29 mkdir -p /usr/local/share/eniam/semantics
30 30 cp resources/* /usr/local/share/eniam/semantics
31 31  
... ...
semantics/resources/lexicon-pl.dic
... ... @@ -98,7 +98,7 @@ rparen: null;
98 98 rparen2: null;
99 99  
100 100 <conll_root>: /(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
101   -s: \?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
  101 +s: \?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):null;
102 102 <root>: /(1+s):unk /(1+<speaker-end>):unk /(1+or):unk /(1+np*T*nom*T*T):unk /(1+ip*T*T*T):unk;
103 103  
104 104 or: null;
... ...
subsyntax/interface.ml
... ... @@ -36,10 +36,6 @@ let spec_list = [
36 36 "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off";
37 37 "--strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=true), "Perform strong disambiguation";
38 38 "--no-strong-disamb", Arg.Unit (fun () -> ENIAMsubsyntaxTypes.strong_disambiguate_flag:=false), "Do not perform strong disambiguation (default)";
39   - (* "-r", Arg.String (fun p ->
40   - ENIAMtokenizerTypes.set_resource_path p;
41   - ENIAMmorphologyTypes.set_resource_path p;
42   - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *)
43 39 ]
44 40  
45 41 let usage_msg =
... ...