Commit b8f39c6574ce14459ad32c358b36a2f6c1ba9248

Authored by Wojciech Jaworski
1 parent d102eb5a

Analiza controli w Walentym

LCGlexicon/ENIAMcategoriesPL.ml
... ... @@ -86,6 +86,7 @@ let subst_time_lexemes = ref StringSet.empty
86 86  
87 87 let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"]
88 88 let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"]
  89 +let compar_lexemes = StringSet.of_list ["jak"; "jako"; "niż"; "niczym"; "niby"; "co"; "zamiast"]
89 90  
90 91 (* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *)
91 92  
... ... @@ -200,12 +201,12 @@ let clarify_categories proper cat = function
200 201 let cases = expand_cases cases in
201 202 [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}]
202 203 | lemma,"prep",[cases;woks] ->
203   - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" || lemma = "zamiast" then
  204 + if StringSet.mem compar_lexemes lemma then
204 205 [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
205 206 let cases = expand_cases cases in
206 207 [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases}]
207 208 | lemma,"prep",[cases] ->
208   - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" || lemma = "zamiast" then
  209 + if StringSet.mem compar_lexemes lemma then
209 210 [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else
210 211 let cases = expand_cases cases in
211 212 [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases}]
... ...
LCGlexicon/TODO
  1 +- day-lex nie jest argumentem w żadnej regule
  2 +
1 3 - dodać uzgodnienie policzalności liczebnika i rzeczownika
2 4  
3 5 - dodac prepncp
... ...
LCGlexicon/resources/lexicon-pl.dic
... ... @@ -105,7 +105,10 @@ lemma=jakiś|ten|taki,pos=apron:
105 105  
106 106 # liczebniki
107 107 # FIXME: liczba po rzeczowniku # FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR
108   -pos=num|intnum|realnum|intnum-interval|realnum-interval:
  108 +# dwie reguły są potrzebne po to, żeby w ENIAMsemValence.match_value nie pojawiał się variant
  109 +pos=num|intnum|realnum|intnum-interval|realnum-interval,nsem=count:
  110 + num*number*case*gender*person*acm*nsem{\(1+qub),/(1+inclusion)}; # FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie?
  111 +pos=num|intnum|realnum|intnum-interval|realnum-interval,nsem=mass:
109 112 num*number*case*gender*person*acm*nsem{\(1+qub),/(1+inclusion)}; # FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie?
110 113  
111 114 # pojemniki
... ... @@ -138,7 +141,7 @@ lemma=w,pos=prep,case=loc: prepnp*lemma*case{\(1+advp*T),/(day-month+day+ye
138 141  
139 142 # komparatywy
140 143 # FIXME: trzeba poprawić comparnp i comparpp w walencji
141   -pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)};
  144 +pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)};
142 145 pos=compar: QUANT[case=postp] compar*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)};
143 146  
144 147 # frazy przymiotnikowe
... ...
lexSemantics/ENIAMlexSemantics.ml
... ... @@ -205,6 +205,20 @@ let mark_nosem frame =
205 205 else p))}
206 206  
207 207 let assign_prep_semantics lemma =
  208 + if StringSet.mem ENIAMcategoriesPL.compar_lexemes lemma then
  209 + [{empty_frame with
  210 + meanings = [find_prep_meaning lemma [Predef "ALL"]];
  211 + positions= [{empty_position with
  212 + dir=Forward_; gf=CORE;
  213 + morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}];
  214 + agf="arg"};
  215 + {empty_frame with
  216 + meanings = [find_prep_meaning lemma [Predef "ALL"]];
  217 + positions= [{empty_position with
  218 + sel_prefs=[SynsetName "ALL"]; dir=Forward_; gf=CORE;
  219 + morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}];
  220 + arole="Arg"; arole_attr=""; arev=false; agf="adjunct"}]
  221 + else
208 222 let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in
209 223 (* Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); *)
210 224 {empty_frame with
... ... @@ -213,6 +227,14 @@ let assign_prep_semantics lemma =
213 227 dir=if lemma="temu" then Backward_ else Forward_; gf=CORE;
214 228 morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}];
215 229 agf="arg"} ::
  230 + (if roles = [] then (* FIXME: zaślepka do usunięcia po stworzeniu listy przyimków *)
  231 + [{empty_frame with
  232 + meanings = [find_prep_meaning lemma [Predef "ALL"]];
  233 + positions= [{empty_position with
  234 + sel_prefs=[SynsetName "ALL"]; dir=if lemma="temu" then Backward_ else Forward_; gf=CORE;
  235 + morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}];
  236 + arole="Arg"; arole_attr=""; arev=false; agf="adjunct"}]
  237 + else
216 238 Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) ->
217 239 (* Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; *)
218 240 let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *)
... ... @@ -222,7 +244,7 @@ let assign_prep_semantics lemma =
222 244 morfs=ENIAMwalRenderer.prep_morfs(*ENIAMwalRenderer.assing_prep_morfs (lemma,case)*); is_necessary=Req}] in
223 245 (* print_endline "assign_prep_semantics 2"; *)
224 246 {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions;
225   - arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"})
  247 + arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"}))
226 248  
227 249 let assign_num_semantics lemma =
228 250 let sems = try StringMap.find !num_sem lemma with Not_found -> [] in
... ... @@ -369,7 +391,7 @@ let disambiguate_senses lex_sems group =
369 391 Xlist.fold frame.positions prefs (fun prefs t ->
370 392 Xlist.fold t.sel_prefs prefs (fun prefs -> function
371 393 SynsetName s -> StringSet.add prefs s
372   - | _ -> failwith "disambiguate_senses")))) in
  394 + | t -> failwith ("disambiguate_senses: " ^ ENIAMwalStringOf.sel_prefs t))))) in
373 395 (*let hipero = Xlist.fold group (StringSet.singleton "ALL") (fun hipero id ->
374 396 Xlist.fold (ExtArray.get lex_sems id).senses hipero (fun hipero (_,l,_) ->
375 397 Xlist.fold l hipero StringSet.add)) in
... ...
lexSemantics/ENIAMlexSemanticsData.ml
... ... @@ -267,19 +267,19 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
267 267 "przed","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
268 268 "w","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
269 269 "za","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
270   - "dzięki","dat", "Condition","",["CZEMU"],[];
271   - "na","acc", "Condition","",["CZEMU"],[];
272   - "na","postp", "Condition","",["CZEMU"],[];
273   - "od","gen", "Condition","",["CZEMU"],[];
274   - "przez","acc", "Condition","",["CZEMU"],[];
275   - "wskutek","gen", "Condition","",["CZEMU"],[];
276   - "z","gen", "Condition","",["CZEMU"],[];
277   - "dla","gen", "Purpose","",["CZEMU"],[];
278   - "do","gen", "Purpose","",["CZEMU"],[];
279   - "ku","dat", "Purpose","",["CZEMU"],[];
280   - "na","acc", "Purpose","",["CZEMU"],[];
281   - "na","postp", "Purpose","",["CZEMU"],[];
282   - "po","acc", "Purpose","",["CZEMU"],[];
  270 + "dzięki","dat", "Condition","",["CZEMU"],["ALL"];
  271 + "na","acc", "Condition","",["CZEMU"],["ALL"];
  272 + "na","postp", "Condition","",["CZEMU"],["ALL"];
  273 + "od","gen", "Condition","",["CZEMU"],["ALL"];
  274 + "przez","acc", "Condition","",["CZEMU"],["ALL"];
  275 + "wskutek","gen", "Condition","",["CZEMU"],["ALL"];
  276 + "z","gen", "Condition","",["CZEMU"],["ALL"];
  277 + "dla","gen", "Purpose","",["CZEMU"],["ALL"];
  278 + "do","gen", "Purpose","",["CZEMU"],["ALL"];
  279 + "ku","dat", "Purpose","",["CZEMU"],["ALL"];
  280 + "na","acc", "Purpose","",["CZEMU"],["ALL"];
  281 + "na","postp", "Purpose","",["CZEMU"],["ALL"];
  282 + "po","acc", "Purpose","",["CZEMU"],["ALL"];
283 283 "do","gen", "Duration","",["CZAS"],["CZAS"];
284 284 "od","gen", "Duration","",["CZAS"],["CZAS"];
285 285 "przez","acc", "Duration","",["CZAS"],["CZAS"];
... ... @@ -326,13 +326,13 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
326 326 "wedle","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
327 327 "wpośród","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
328 328 "wśrzód","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
329   - "po","postp", "Manner","",["ALL"],[];
330   - "bez","gen", "Manner","",["ALL"],[];
  329 + "po","postp", "Manner","",["ALL"],["ALL"];
  330 + "bez","gen", "Manner","",["ALL"],["ALL"];
331 331 (* "jako","str", "Manner","",[],[];
332 332 "jak","str", "Manner","",[],[];*)
333   - "pod","acc", "Manner","",["ALL"],[];
334   - "z","inst", "Manner","",["ALL"],[];
335   - "z","postp", "Manner","",["ALL"],[];
  333 + "pod","acc", "Manner","",["ALL"],["ALL"];
  334 + "z","inst", "Manner","",["ALL"],["ALL"];
  335 + "z","postp", "Manner","",["ALL"],["ALL"];
336 336 "dokoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"];
337 337 "dookoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"];
338 338 "koło","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"];
... ... @@ -368,9 +368,10 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
368 368 "w","loc", "Time","",["CZAS"],["CZAS"];
369 369 "temu","acc", "Time","",["CZAS"],["CZAS"]; (* dodane *)
370 370 "za","gen", "Time","",["CZAS"],["CZAS"]; (* dodane *)
  371 + "o","acc", "Arg","",["ALL"],["ALL"]; (* FIXME: zaślepka *)
371 372 ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) ->
372 373 let hipero = Xlist.map hipero (fun hipero -> ENIAMwalTypes.Predef hipero) in
373   - let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.Predef sel_prefs) in
  374 + let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.SynsetName sel_prefs) in
374 375 StringMap.add_inc map lemma [case,role,role_attr,hipero,sel_prefs]
375 376 (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l))
376 377 (* "przeciwko","dat","Dat";
... ...
lexSemantics/ENIAMvalence.ml
... ... @@ -641,6 +641,7 @@ let get_aroles schema lemma = function
641 641 | "perl" -> ([Mode,Eq,["perl"]],"Path","",false) :: l
642 642 | "dur" -> ([Mode,Eq,["dur"]],"Duration","",false) :: l
643 643 | "temp" -> ([Mode,Eq,["temp"]],"Time","",false) :: l
  644 + | "pron" -> ([Mode,Eq,["mod"]],"Manner","",false) :: l
644 645 | _ -> failwith "get_aroles")
645 646 | "qub" ->
646 647 let l = try StringMap.find ENIAMlexSemanticsData.qub_roles lemma with Not_found -> ["Arg",""] in
... ...
lexSemantics/ENIAMwalRenderer.ml
... ... @@ -363,3 +363,10 @@ let prep_morfs = [
363 363 LCG(Tensor[Atom "day"]);
364 364 LCG(Tensor[Atom "date"]);
365 365 ]
  366 +
  367 +let compar_morfs = [
  368 + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]);
  369 + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]);
  370 + LCG(Tensor[Atom "prepnp"; Top; Top]);
  371 + LCG(Tensor[Atom "prepadjp"; Top; Top]);
  372 + ]
... ...
semantics/ENIAMsemGraph.ml
... ... @@ -94,6 +94,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t =
94 94 | "NSYN",Val "pronoun" -> c(*{c with c_quant=Tuple[c.c_quant;Val "indexical"]}*),measure,cx_flag
95 95 | "NSEM",Val "count" -> c(*{c with c_quant=Tuple[c.c_quant;Val "count"]}*),measure,cx_flag
96 96 | "NSEM",Val "mass" -> {c with c_quant=Tuple[c.c_quant;Val "mass"]},measure,cx_flag
  97 + | "NSEM",Variant(e,[a,Val "mass";b,Val "count"]) -> {c with c_quant=Tuple[c.c_quant;Variant(e,[a,Val "mass";b,Val "count"])]},measure,cx_flag (* FIXME: tu by należało podzielić to na dwa pudełka *)
  98 + | "NSEM",Variant(e,[a,Val "count";b,Val "mass"]) -> {c with c_quant=Tuple[c.c_quant;Variant(e,[a,Val "count";b,Val "mass"])]},measure,cx_flag
97 99 | "NSEM",Val "measure" -> c,true,cx_flag
98 100 | "NSEM",Val "time" -> c,measure,cx_flag(*failwith "create_normal_concept: time"*)
99 101 | "NUM",t -> {c with c_quant=Tuple[c.c_quant;t]},measure,cx_flag
... ... @@ -110,7 +112,7 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t =
110 112 | "LEX",_ -> c,measure,cx_flag (* FIXME *) *)
111 113 (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]},measure *)
112 114 (* | "TYPE",_ -> c,measure,cx_flag (* FIXME *) *)
113   - | e,t -> failwith ("create_normal_concept noun: " ^ e)) in
  115 + | e,t -> failwith ("create_normal_concept noun: " ^ e ^ ": " ^ ENIAMsemStringOf.linear_term 0 t)) in
114 116 (* let c = if t.pos = "depr" then {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "depr")]} else c in *)
115 117 if cx_flag then
116 118 let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in
... ...
semantics/ENIAMsemLexicon.ml
... ... @@ -65,6 +65,8 @@ let parse_role p = function
65 65 "adjunct" -> {p with gf=ADJUNCT}
66 66 | "unk" -> {p with role="unk"}
67 67 | "nosem" -> {p with gf=NOSEM}
  68 + | "Poss" -> {p with role="Poss"; sel_prefs=[SynsetName "ALL"]}
  69 + | "Coref" -> {p with role="Coref"; sel_prefs=[SynsetName "ALL"]}
68 70 | "Count" -> {p with role="Count"; sel_prefs=[SynsetName "ALL"]}
69 71 | "Measure" -> {p with role="Measure"; sel_prefs=[SynsetName "ALL"]}
70 72 | s -> failwith ("parse_role: " ^ s)
... ...
semantics/ENIAMsemValence.ml
... ... @@ -278,8 +278,8 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
278 278 let s = ExtArray.get lex_sems t.id in
279 279 let symbol = get_phrase_symbol t.symbol in
280 280 let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames frame ->
281   - print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors);
282   - Printf.printf "assign_frames_rec: lemma=%s positions=[%s]\n%!" t.lemma (ENIAMwalStringOf.schema frame.positions);
  281 + (* print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors); *)
  282 + (* Printf.printf "assign_frames_rec: lemma=%s positions=[%s]\n%!" t.lemma (ENIAMwalStringOf.schema frame.positions); *)
283 283 try
284 284 let attrs = apply_selectors t.attrs frame.selectors in
285 285 let frame = ENIAMsemLexicon.extend_frame symbol frame in
... ... @@ -380,14 +380,21 @@ exception AGF
380 380  
381 381 let rec manage_agf = function
382 382 | Node t ->
  383 + (* print_endline ("manage_agf 1 " ^ ENIAM_LCGstringOf.linear_term 0 (Node t)); *)
383 384 let attrs,agf = try extract_attr "agf" [] t.attrs with Not_found -> failwith "manage_agf" in
  385 + (* print_endline "manage_agf 2"; *)
384 386 let gf = try get_attr "gf" t.attrs with Not_found -> Dot in (* FIXME: to by się chyba przydało poprawić, żeby gf było zawsze ustalone *)
  387 + (* print_endline "manage_agf 3"; *)
385 388 if agf = Val "" || agf=gf then Node{t with attrs=attrs} else raise AGF
386 389 | Variant(e,l) ->
  390 + (* print_endline ("manage_agf 4: " ^ ENIAM_LCGstringOf.linear_term 0 (Variant(e,l))); *)
387 391 let l = Xlist.fold l [] (fun l (i,t) -> try (i, manage_agf t) :: l with AGF -> l) in
  392 + (* print_endline ("manage_agf 5: " ^ ENIAM_LCGstringOf.linear_term 0 (Variant(e,l))); *)
388 393 if l = [] then raise AGF else Variant(e,List.rev l)
389 394 | Tuple l ->
  395 + (* print_endline "manage_agf 6"; *)
390 396 let l = Xlist.rev_map l manage_agf in
  397 + (* print_endline "manage_agf 7"; *)
391 398 Tuple(List.rev l)
392 399 | Dot -> Dot
393 400 | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t)
... ... @@ -408,7 +415,7 @@ let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = functio
408 415 | Node t ->
409 416 let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in
410 417 (* print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *)
411   - let args = try manage_agf args with AGF -> failwith "reduce_tree_rec: AGF" in
  418 + let args = try manage_agf args with AGF -> failwith "reduce_tree_rec: AGF" in (* FIXME: to nie musi być błąd, należałoby przechwytywać wyjątek na poziorie wariantu powyżej *)
412 419 let args = cut_nodes result_tree args in
413 420 (* print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args); *)
414 421 (*let id =
... ...
semantics/resources/lexicon-pl.dic
1 1  
2 2 day-lex: /(date+day+day-month):unk;
3 3 date: /(1+year-lex):unk;
4   -day: /month-lex:unk;
5   -day-interval: /month-lex:unk;
6   -day-month: /(1+year-lex):unk;
  4 +day: /month-lex:Poss;
  5 +day-interval: /month-lex:Poss;
  6 +day-month: /(1+year-lex):Poss;
7 7 year-lex: |(1+adjp*number*case*gender):unk;
8   -month-lex: /(1+year+np*T*gen*T*T):unk;
  8 +month-lex: /(1+year+np*T*gen*T*T):Poss;
9 9  
10 10 date-interval: null;
11 11 day-month-interval: null;
... ... @@ -26,8 +26,8 @@ email: null;
26 26 np:
27 27 \(1+num*number*case*gender*person*congr*nsem+num*number*case*gender*person*rec*nsem):adjunct \(1+qub):adjunct /(1+inclusion):adjunct
28 28 \(1+measure*unumber*ucase*ugender*uperson):Measure
29   - /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval):unk
30   - |(1+year):unk /(1+obj-id):unk;
  29 + /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval+roman+roman-interval):Coref
  30 + |(1+year):unk /(1+obj-id):Coref;
31 31  
32 32 num: \(1+qub):adjunct /(1+inclusion):adjunct;
33 33  
... ...
testy/testy_podstawowe.txt
... ... @@ -200,3 +200,4 @@ Ala zjadła kota.
200 200 Kot wyszedł spod komody.
201 201 Aranżuję na kilka fortepianów.
202 202 Aranżuję na wiele fortepianów.
  203 +Wiele wody płynie.
... ...
walenty/.gitignore
1 1 converter
2 2 results/*.tab
  3 +analyze
... ...
walenty/ENIAMwalGenerate.ml
... ... @@ -167,223 +167,3 @@ let _ =
167 167 print_meanings "resources/meanings.tab" meanings;
168 168 print_adv_types "resources/adv_modes.tab" adv_types;
169 169 ())
170   -
171   -(* Test wczytywania Walentego TEI *)
172   -(* let _ =
173   - let walenty,phrases = ENIAMwalTEI.load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" in
174   - let n = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = connect e in n + Xlist.size l) in
175   - let m = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = schemata e in n + Xlist.size l) in
176   - Printf.printf "%d connected\n%d schemata\n|phrases|=%d\n" n m (IntMap.size phrases);
177   - () *)
178   -
179   -(* Test unikalności indeksów sensów *)
180   -(* let _ =
181   - let walenty,phrases = ENIAMwalTEI.load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" in
182   - Xlist.fold walenty IntMap.empty (fun map e ->
183   - Xlist.fold e.meanings map (fun map m ->
184   - IntMap.add_inc map m.mng_id m (fun m1 -> if m1 = m then m else failwith "meaning"))) *)
185   -
186   -(*
187   -(* let insert_phrases phrases = function
188   - Frame(atrs,s) -> Frame(atrs,Xlist.map s (fun p ->
189   - {p with morfs=Xlist.map p.morfs (function
190   - MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "insert_phrases")
191   - | _ -> failwith "insert_phrases")}))
192   - | _ -> failwith "insert_phrases: ni"
193   -
194   -let print_entry pos_map pos orth =
195   - let orth_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in
196   - let frames = try StringMap.find orth_map orth with Not_found -> [] in
197   - Xlist.iter frames (fun frame ->
198   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
199   - print_endline (ENIAMwalStringOf.frame orth frame)) *)
200   -
201   -(* Wypisanie hasła *)
202   -(* let _ =
203   - print_entry connected_walenty "verb" "brudzić";
204   - () *)
205   -
206   -(* let has_nontrivial_lex = function
207   - Frame(atrs,s) -> Xlist.fold s false (fun b p ->
208   - if p.role = "Lemma" && p.role_attr = "" then b else
209   - Xlist.fold p.morfs b (fun b -> function
210   - MorfId id -> failwith "has_nontrivial_lex"
211   - | LexPhrase _ -> true
212   - (* | LexRPhrase _ -> true
213   - | LexPhraseMode _ -> true *)
214   - | _ -> b))
215   - | _ -> failwith "has_nontrivial_lex: ni" *)
216   -
217   -(* Leksykalizacje nie wchodzące do lematu *)
218   -(* let _ =
219   - StringMap.iter connected_walenty (fun _ orth_map ->
220   - StringMap.iter orth_map (fun orth frames ->
221   - Xlist.iter frames (fun frame ->
222   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
223   - if has_nontrivial_lex frame then
224   - print_endline (ENIAMwalStringOf.frame orth frame)))) *)
225   -
226   -let simplify_frame_verb = function
227   - Phrase(NP(Case "dat")) -> []
228   - | Phrase(NP(Case "inst")) -> []
229   - | Phrase(PrepNP _) -> []
230   - | Phrase(ComprepNP _) -> []
231   - | Phrase(AdvP) -> []
232   - | t -> [t]
233   -
234   -let simplify_frame_noun = function
235   - Phrase(NP(Case "gen")) -> []
236   - | Phrase(NP(Case "nom")) -> []
237   - | Phrase(NP(CaseAgr)) -> []
238   - | Phrase(PrepNP _) -> []
239   - | Phrase(ComprepNP _) -> []
240   - | Phrase(AdjP CaseAgr) -> []
241   - | PhraseComp(Ncp(Case "gen"),_)
242   - | PhraseComp(Prepncp(_,_),_) -> []
243   - | PhraseAbbr(Possp,[]) -> []
244   - | t -> [t]
245   -
246   -let simplify_frame_adj = function
247   - | t -> [t]
248   -
249   -let simplify_frame_adv = function
250   - | t -> [t]
251   -
252   -
253   -(* let simplify_frame pos = function
254   - Frame(atrs,s) ->
255   - let schema = Xlist.fold s [] (fun schema p ->
256   - let morfs = Xlist.fold p.morfs [] (fun morfs morf ->
257   - match pos with
258   - "verb" -> simplify_frame_verb morf @ morfs
259   - | "noun" -> simplify_frame_noun morf @ morfs
260   - | "adj" -> simplify_frame_adj morf @ morfs
261   - | "adv" -> simplify_frame_adv morf @ morfs
262   - | _ -> failwith "simplify_frame") in
263   - if morfs = [] then schema else
264   - {p with ce=[]; cr=[]; morfs=morfs} :: schema) in
265   - if schema = [] then [] else [Frame(atrs,schema)]
266   - | _ -> failwith "simplify_frame: ni" *)
267   -
268   -
269   -(* Uproszczone schematy *)
270   -(* let _ =
271   - StringMap.iter schemata_walenty (fun pos orth_map ->
272   - if pos = "noun" then
273   - StringMap.iter orth_map (fun orth frames ->
274   - Xlist.iter frames (fun frame ->
275   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
276   - let frames = simplify_frame pos frame in
277   - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame orth frame))))) *)
278   -
279   -(* let has_mode_coordination = function
280   - Frame(atrs,s) -> Xlist.fold s false (fun b p ->
281   - let n = Xlist.fold p.morfs 0 (fun n -> function
282   - MorfId id -> failwith "has_nontrivial_lex"
283   - | PhraseAbbr(Advp _,_) -> n+1
284   - | PhraseAbbr(Xp _,_) -> n+1
285   - (* | LexPhraseMode _ -> n+1 FIXME*)
286   - | _ -> n) in
287   - if n>1 then true else b)
288   - | _ -> failwith "has_nontrivial_lex: ni" *)
289   -
290   -(* Koordynacja z mode *)
291   -(* let _ =
292   - StringMap.iter schemata_walenty(*connected_walenty*) (fun _ orth_map ->
293   - StringMap.iter orth_map (fun orth frames ->
294   - Xlist.iter frames (fun frame ->
295   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
296   - if has_mode_coordination frame then
297   - print_endline (ENIAMwalStringOf.frame orth frame)))) *)
298   -
299   -
300   -(* let get_entry orth pos *)
301   - (*
302   -let load_walenty2 () =
303   - let walenty = load_walenty walenty_filename in
304   - Xlist.fold walenty StringMap.empty (fun walenty entry ->
305   - if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) ->
306   - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
307   - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
308   - StringMap.add walenty pos map)
309   - else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) ->
310   - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
311   - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
312   - StringMap.add walenty pos map))
313   -
314   -
315   -let print_stringqmap filename qmap =
316   - let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in
317   - File.file_out filename (fun file ->
318   - Xlist.iter (Xlist.sort l compare) (fun (v,k) ->
319   - Printf.fprintf file "%5d %s\n" v k))
320   -
321   -let sel_prefs_quantities walenty =
322   - Xlist.fold walenty StringQMap.empty (fun quant e ->
323   - Xlist.fold e.frames quant (fun quant f ->
324   - Xlist.fold f.arguments quant (fun quant a ->
325   - Xlist.fold a.sel_prefs quant (fun quant l ->
326   - Xlist.fold l quant (fun quant -> function
327   - Numeric s ->
328   - let name = try ENIAMplWordnet.synset_name s with Not_found -> "unknown" in
329   - StringQMap.add quant ("N " ^ s ^ " " ^ name)
330   - | Symbol s -> StringQMap.add quant ("S " ^ s)
331   - | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t))))))
332   -*)
333   -(*let _ =
334   - let walenty = load_walenty walenty_filename in
335   - let quant = sel_prefs_quantities walenty in
336   - print_stringqmap "results/quant_sel_prefs.txt" quant*)
337   -
338   -(*let _ =
339   - let walenty = load_walenty2 () in
340   - let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in
341   - Xlist.iter frames_sem (fun frame ->
342   - print_endline (WalStringOf.frame "bębnić" frame))*)
343   -
344   -
345   -(* Wypisanie realizacji *)
346   -(* let _ =
347   - Xlist.iter ENIAMwalTEI.expands (fun (id,morf,l) ->
348   - Printf.printf "%d %s:\n" id (ENIAMwalStringOf.morf morf);
349   - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *)
350   -
351   -(* Wypisanie realizacji po przetworzeniu *)
352   -(* let _ =
353   - AbbrMap.iter expands (fun morf l ->
354   - Printf.printf "%s:\n" (ENIAMwalStringOf.phrase_abbr morf);
355   - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *)
356   -
357   -let has_realization = function
358   - PhraseAbbr _ -> true
359   - | PhraseComp _ -> true
360   - | _ -> false
361   -
362   -(* Wypisanie fraz, które podlegają rozwijaniu *)
363   -(*let _ =
364   - IntMap.iter ENIAMwalTEI.phrases (fun i morf ->
365   - if has_realization morf then
366   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *)
367   -
368   -(* Wypisanie fraz, które podlegają rozwijaniu *)
369   -(* let _ =
370   - IntMap.iter phrases (fun i morf ->
371   - if has_realization morf then
372   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *)
373   -
374   -(* let test_phrases = [17088; 17133; 1642]
375   - let _ =
376   - Xlist.iter test_phrases (fun i ->
377   - let m1 = IntMap.find ENIAMwalTEI.phrases i in
378   - let m2 = IntMap.find phrases i in
379   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m1);
380   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m2)) *)
381   -
382   -(* let print_entries entries =
383   - StringMap.iter entries (fun pos entries2 ->
384   - StringMap.iter entries2 (fun lemma entries3 ->
385   - EntrySet.iter entries3 (fun entry ->
386   - Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry)))) *)
387   -
388   -(* let _ = print_entries entries *)
389   -*)
... ...
walenty/ENIAMwalTEI.ml
... ... @@ -86,9 +86,9 @@ let parse_gf = function
86 86  
87 87 let parse_control arg = function
88 88 "controller" -> {arg with cr="1" :: arg.cr}
89   - | "controllee" -> {arg with ce="1" :: arg.cr}
  89 + | "controllee" -> {arg with ce="1" :: arg.ce}
90 90 | "controller2" -> {arg with cr="2" :: arg.cr}
91   - | "controllee2" -> {arg with ce="2" :: arg.cr}
  91 + | "controllee2" -> {arg with ce="2" :: arg.ce}
92 92 | s -> failwith ("parse_control: " ^ s)
93 93  
94 94 let parse_case = function
... ...
walenty/makefile
... ... @@ -6,11 +6,15 @@ OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa
7 7 INSTALLDIR=`ocamlc -where`/eniam
8 8  
9   -SOURCES=entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml ENIAMwalGenerate.ml
  9 +SOURCES=entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml
10 10  
11   -all: $(SOURCES)
  11 +all: $(SOURCES) ENIAMwalGenerate.ml
12 12 $(OCAMLOPT) -o converter $(OCAMLOPTFLAGS) $^
13 13  
  14 +analyze: $(SOURCES) ENIAMwalAnalyze.ml
  15 + mkdir -p results
  16 + $(OCAMLOPT) -o analyze $(OCAMLOPTFLAGS) $^
  17 +
14 18 install:
15 19 mkdir -p /usr/share/eniam/Walenty
16 20 cp resources/* /usr/share/eniam/Walenty
... ... @@ -40,4 +44,4 @@ install-local:
40 44 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
41 45  
42 46 clean:
43   - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a converter resources/*.tab
  47 + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a converter analyze resources/*.tab results/*
... ...