Commit 63d47e4b17ab6c6ece55922bc166eb08e03068f1

Authored by Wojciech Jaworski
1 parent 3d8c3471

Walencja semantyczna przyimków

Too many changes to show.

To preserve performance only 17 of 28 files are displayed.

LCGlexicon/TODO
  1 +- dodac prepncp
  2 +
1 3 - dodać podniesione comprepy
2 4 Pod jakim tytułem brykasz?
3 5 Niezależnie od kogo brykasz?
... ...
LCGlexicon/resources/lexicon-pl.dic
... ... @@ -104,11 +104,12 @@ pos=subst,case=gen,nsem=measure:
104 104 measure*sg*case*n2*person{\num*number*case*gender*person*rec}{schema}{\(1+qub),/(1+inclusion)}: measure_weight; # UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika
105 105  
106 106 # frazy przyimkowe
107   -pos=prep: prepnp*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)};
108   -pos=prep: prepadjp*lemma*case{\(1+advp*T),/adjp*T*case*T}{\(1+qub),/(1+inclusion)};
  107 +lemma!=temu,pos=prep: prepnp*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)};
  108 +lemma!=temu,pos=prep: prepadjp*lemma*case{\(1+advp*T),/adjp*T*case*T}{\(1+qub),/(1+inclusion)};
109 109 lemma=po,pos=prep: QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/(adjp*sg*dat*m1+adjp*T*postp*T)}{\(1+qub),/(1+inclusion)}; # po polsku, po kreciemu
110 110 lemma=z,pos=prep: QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/adjp*sg*nom*f}{\(1+qub),/(1+inclusion)}; # z bliska
111 111 lemma=na,pos=prep: QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/advp*T}{\(1+qub),/(1+inclusion)}; # na lewo
  112 +lemma=temu,pos=prep: prepnp*lemma*case\np*T*case*T*T; # chwilę temu
112 113  
113 114 # przimkowe określenia czasu
114 115 lemma=z,pos=prep,case=gen: prepnp*lemma*case{\(1+advp*T),/(day-month+day+year+date+hour+hour-minute)}{\(1+qub),/(1+inclusion)};
... ...
lexSemantics/ENIAMlexSemantics.ml
... ... @@ -29,27 +29,42 @@ let find_meaning m =
29 29 with Not_found ->
30 30 m.name ^ "-" ^ m.variant, [], unknown_meaning_weight
31 31  
  32 +let find_prep_meaning lemma hipero =
  33 + let hipero = match hipero with
  34 + [Predef hipero] -> hipero
  35 + | _ -> failwith "find_prep_meaning" in
  36 + if hipero = "ALL" then lemma, [hipero,0], unknown_meaning_weight else
  37 + let syn_id = StringMap.find !ENIAMplWordnet.predef hipero in
  38 + let hipero = IntMap.fold (ENIAMplWordnet.get_hipero syn_id) [] (fun hipero syn_id cost -> (ENIAMplWordnet.synset_name syn_id, cost) :: hipero) in
  39 + lemma, hipero, unknown_meaning_weight
  40 +
32 41 let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB)))
33 42  
34   -let find_senses t s = (* FIXME: sensy zawierające 'się' *)
35   - let set = Xlist.fold s.frames StringSet.empty (fun set frame ->
  43 +(* FIXME: naiwnie wierzymy, że jeśli leksem jest opisany semantycznie w walentym to zawiera ramy dla wszystkich sensów *)
  44 +let find_senses t s =
  45 + (*let set = Xlist.fold s.frames StringSet.empty (fun set frame ->
36 46 Xlist.fold frame.meanings set (fun set (name,hipero,weight) ->
37   - StringSet.add set name)) in
  47 + StringSet.add set name)) in*)
38 48 let senses = match t.token with
39 49 Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos
40 50 | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses
41 51 | _ -> [] in
42   - let senses = Xlist.fold senses [] (fun senses (name,hipero,weight) ->
43   - if StringSet.mem set name then senses else (name,hipero,weight) :: senses) in
  52 + (* let senses =
  53 + Xlist.fold senses [] (fun senses (name,hipero,weight) ->
  54 + if StringSet.mem set name then senses else (name,hipero,weight) :: senses) in *)
44 55 let senses_sie = match t.token with
45 56 Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses (lemma ^ " się") pos
46 57 | Proper(_,_,_,senses) -> []
47 58 | _ -> [] in
48   - let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) ->
  59 +(* let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) ->
49 60 if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in
50 61 let frames = if senses = [] then s.frames else {empty_frame with meanings=senses} :: s.frames in
51 62 let frames = if senses_sie = [] then frames else {empty_frame with meanings=senses_sie;
52   - positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]}]} :: frames in (* FIXME: czy to nie usuwa elementów z ramy? *)
  63 + positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req}]} :: frames in*) (* FIXME: czy to nie usuwa elementów z ramy? *)
  64 + let frames = Xlist.fold s.frames [] (fun frames f ->
  65 + if f.meanings <> [] then f :: frames else
  66 + (if senses_sie = [] then [] else [{f with meanings=senses_sie; positions={empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req} :: f.positions}]) @
  67 + [{f with meanings=senses}] @ frames) in
53 68 {s with frames=frames}
54 69  
55 70 let find_selprefs schema = (* FIXME: RelationRole *)
... ... @@ -135,6 +150,33 @@ let get_preps tokens group = (* FIXME: To nie zadziała przy kilku wystąpieniac
135 150 | _ -> preps,compars) in
136 151 StringMap.fold preps [] (fun l prep v -> (prep, StringSet.to_list v) :: l), StringSet.to_list compars
137 152  
  153 +let make_unique schemata =
  154 + let map = Xlist.fold schemata StringMap.empty (fun map (selectors,schema) ->
  155 + let s = "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "}" in
  156 + StringMap.add map s (selectors,schema)) in
  157 + StringMap.fold map [] (fun l _ (selectors,schema) -> (selectors,schema) :: l)
  158 +
  159 +let semantize lemma pos (selectors,schema) =
  160 + let schema = Xlist.rev_map schema (fun p ->
  161 + {p with role="Arg"; sel_prefs=[Predef "X"]}) in (* FIXME: zaślepka, żeby preferować znane argumenty *)
  162 + Xlist.rev_map (ENIAMvalence.get_aroles schema lemma pos) (fun (sel,arole,arole_attr,arev) ->
  163 + {empty_frame with selectors=sel @ selectors; positions=schema;
  164 + arole=arole; arole_attr=arole_attr; arev=arev})
  165 +
  166 +let assign_prep_semantics lemma =
  167 + let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in
  168 + Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles);
  169 + Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) ->
  170 + Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr;
  171 + let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *)
  172 + print_endline "assign_prep_semantics 1";
  173 + let positions = [{empty_position with
  174 + sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_;
  175 + morfs=ENIAMwalRenderer.assing_pref_morfs (lemma,case); is_necessary=Req}] in
  176 + print_endline "assign_prep_semantics 2";
  177 + {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions;
  178 + arole=arole; arole_attr=arole_attr; arev=false})
  179 +
138 180 let assign_valence tokens lex_sems group =
139 181 let lexemes = Xlist.fold group StringSet.empty (fun lexemes id ->
140 182 let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in
... ... @@ -151,10 +193,10 @@ let assign_valence tokens lex_sems group =
151 193 (* Printf.printf "A %s %s %s |schemata|=%d\n" lemma pos pos2 (Xlist.size schemata); *)
152 194 let entries = Entries.find entries pos lemma in
153 195 let connected = Entries.find connected pos2 lemma in
154   - let schemata = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) ->
155   - ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* FIXME: gubię opinię *)
  196 + let schemata1 = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) ->
  197 + ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* gubię opinię *)
156 198 (* Printf.printf "B %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
157   - let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata in
  199 + let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in
158 200 (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
159 201 let schemata = Xlist.rev_map schemata (fun (selectors,schema) ->
160 202 selectors,ENIAMwalRenderer.render_simple_schema schema) in
... ... @@ -163,15 +205,31 @@ let assign_valence tokens lex_sems group =
163 205 let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in
164 206 let entries = Xlist.map entries (fun (selectors,entry) ->
165 207 selectors,ENIAMwalRenderer.render_lex_entry entry) in
166   - let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema) ->
167   - Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema) (fun (selectors,schema) ->
168   - {empty_frame with selectors=selectors; meanings= Xlist.map meanings find_meaning; positions=schema}))) in (* FIXME: gubię opinię *)
  208 + let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema1) ->
  209 + List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) ->
  210 + Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) ->
  211 + {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema;
  212 + arole=arole; arole_attr=arole_attr; arev=arev; sopinion=sopinion; fopinion=fopinion}))))) in
  213 + (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *)
  214 + let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in
  215 + (* Printf.printf "F %s |connected|=%d\n" lemma (Xlist.size connected); *)
169 216 let connected = Xlist.fold connected [] (fun connected frame ->
170 217 if ENIAMadjuncts.check_selector_lex_constraints lexemes pos frame.selectors then frame :: connected else connected) in
  218 + (* Printf.printf "G %s |connected|=%d\n" lemma (Xlist.size connected); *)
171 219 let connected = Xlist.rev_map connected (fun frame ->
172 220 {frame with
173 221 positions = find_selprefs (ENIAMwalRenderer.render_connected_schema (ENIAMwalReduce.set_necessary frame.positions))}) in
  222 + (* Printf.printf "H %s |connected|=%d\n" lemma (Xlist.size connected); *)
174 223 let connected = List.flatten (Xlist.rev_map connected (ENIAMadjuncts.add_connected_adjuncts preps compreps compars pos2)) in
  224 + (* Printf.printf "I %s |connected|=%d\n" lemma (Xlist.size connected); *)
  225 + let connected = if pos = "prep" then
  226 + if connected <> [] then failwith "assign_valence" else
  227 + assign_prep_semantics lemma else connected in
  228 + (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *)
  229 + let connected = if connected = [] then
  230 + Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) ->
  231 + {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in
  232 + (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *)
175 233 ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with
176 234 schemata=schemata; lex_entries=entries; frames=connected})
177 235  
... ... @@ -253,7 +311,6 @@ let assign tokens text =
253 311 (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *)
254 312 remove_unused_tokens tokens groups;
255 313 Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);
256   - (* Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);*)
257 314 Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
258 315 let token = ExtArray.get tokens i in
259 316 let lex_sem = ExtArray.get lex_sems i in
... ... @@ -275,4 +332,5 @@ let initialize () =
275 332 ENIAMwalParser.initialize ();
276 333 ENIAMwalReduce.initialize ();
277 334 ENIAMplWordnet.initialize ();
  335 + ENIAMcategoriesPL.initialize ();
278 336 ()
... ...
lexSemantics/ENIAMlexSemanticsData.ml
... ... @@ -21,13 +21,13 @@ open ENIAMtokenizerTypes
21 21 open ENIAMlexSemanticsTypes
22 22 open Xstd
23 23  
24   -let subst_inst_roles = Xlist.fold [
25   - "wiosna", "Time","";
26   - "lato", "Time","";
27   - "jesień", "Time","";
28   - "zima", "Time","";
29   - "wieczór", "Time","";
30   - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
  24 +let subst_inst_time = StringSet.of_list [
  25 + "wiosna";
  26 + "lato";
  27 + "jesień";
  28 + "zima";
  29 + "wieczór";
  30 + ]
31 31  
32 32 let adj_roles = Xlist.fold [
33 33 "ten", "Apoz","";
... ... @@ -59,7 +59,7 @@ let adj_roles = Xlist.fold [
59 59 "taki", "Attribute","";
60 60 "czyj", "Possesive","";
61 61 "który", "Attribute","";
62   - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
  62 + ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
63 63  
64 64 let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *)
65 65 (* operators: nielokalnie zmieniaja formułe logiczna *)
... ... @@ -80,7 +80,7 @@ let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *)
80 80 "dlatego", "Condition",""; (* odniesieniem argumentu jest sytuacji/kontekst *)
81 81 "tak", "Manner",""; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *)
82 82  
83   - "skąd", "Location","Source";
  83 +(* "skąd", "Location","Source";
84 84 "skądkolwiek", "Location","Source";
85 85 "skądś", "Location","Source";
86 86 "skądże", "Location","Source";
... ... @@ -209,8 +209,8 @@ let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *)
209 209 "ongi", "Time","";
210 210 "ongiś", "Time","";
211 211 "wczas", "Time","";
212   - "wonczas", "Time","";
213   - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
  212 + "wonczas", "Time","";*)
  213 + ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
214 214  
215 215 let qub_roles = Xlist.fold [
216 216 "tylko", "Quantifier","";
... ... @@ -236,10 +236,10 @@ let qub_roles = Xlist.fold [
236 236 "ponad", "Mod","";
237 237 "prawie", "Mod","";
238 238 "przynajmniej", "Mod","";
239   - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
  239 + ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
240 240  
241 241  
242   -let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *)(* FIXME: problem z podwójnymi przypisaniami *)
  242 +let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *)
243 243 "od","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
244 244 "spod","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
245 245 "spomiędzy","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
... ... @@ -249,12 +249,14 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
249 249 "spoza","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
250 250 "sprzed","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
251 251 "z","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
  252 + "z","postp", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
252 253 "znad","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
253 254 "zza","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
254 255 "do","gen", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
255 256 "ku","dat", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
256 257 "między","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
257 258 "na","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
  259 + "na","postp", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
258 260 "nad","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
259 261 "nieopodal","gen", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
260 262 "opodal","gen", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
... ... @@ -267,6 +269,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
267 269 "za","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
268 270 "dzięki","dat", "Condition","",["CZEMU"],[];
269 271 "na","acc", "Condition","",["CZEMU"],[];
  272 + "na","postp", "Condition","",["CZEMU"],[];
270 273 "od","gen", "Condition","",["CZEMU"],[];
271 274 "przez","acc", "Condition","",["CZEMU"],[];
272 275 "wskutek","gen", "Condition","",["CZEMU"],[];
... ... @@ -275,6 +278,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
275 278 "do","gen", "Purpose","",["CZEMU"],[];
276 279 "ku","dat", "Purpose","",["CZEMU"],[];
277 280 "na","acc", "Purpose","",["CZEMU"],[];
  281 + "na","postp", "Purpose","",["CZEMU"],[];
278 282 "po","acc", "Purpose","",["CZEMU"],[];
279 283 "do","gen", "Duration","",["CZAS"],["CZAS"];
280 284 "od","gen", "Duration","",["CZAS"],["CZAS"];
... ... @@ -285,6 +289,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
285 289 "między","inst", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
286 290 "nad","inst", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
287 291 "na","loc", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
  292 + "na","postp", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
288 293 "naokoło","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
289 294 "naprzeciw","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
290 295 "naprzeciwko","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"];
... ... @@ -327,6 +332,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
327 332 "jak","str", "Manner","",[],[];*)
328 333 "pod","acc", "Manner","",["ALL"],[];
329 334 "z","inst", "Manner","",["ALL"],[];
  335 + "z","postp", "Manner","",["ALL"],[];
330 336 "dokoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"];
331 337 "dookoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"];
332 338 "koło","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"];
... ... @@ -363,10 +369,10 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
363 369 "temu","acc", "Time","",["CZAS"],["CZAS"]; (* dodane *)
364 370 "za","gen", "Time","",["CZAS"],["CZAS"]; (* dodane *)
365 371 ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) ->
366   - let hipero = Xlist.fold hipero StringSet.empty ENIAMplWordnet.get_hipero_rec in
367   - let map2 = try StringMap.find map lemma with Not_found -> StringMap.empty in
368   - let map2 = StringMap.add_inc map2 case [case,role,role_attr,hipero,sel_prefs] (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l) in
369   - StringMap.add map lemma map2)
  372 + let hipero = Xlist.map hipero (fun hipero -> ENIAMwalTypes.Predef hipero) in
  373 + let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.Predef sel_prefs) in
  374 + StringMap.add_inc map lemma [case,role,role_attr,hipero,sel_prefs]
  375 + (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l))
370 376 (* "przeciwko","dat","Dat";
371 377 "przeciw","dat","Dat";
372 378 "o","acc","Theme";
... ... @@ -374,14 +380,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
374 380 "według","gen","Manr";
375 381 "wobec","gen","Dat";*)
376 382  
377   -let assign_prep_semantics lemma cases t =
378   - try
379   - let map = StringMap.find prep_roles lemma in
380   - let l = List.flatten (Xlist.map cases (fun case ->
381   - try StringMap.find map case with Not_found -> [])) in
382   - if l = [] then Normal else PrepSemantics l
383   - with Not_found -> Normal
384   -
  383 +(*
385 384 let subst_special_lexemes = Xlist.fold [
386 385 "jutro", ["indexical"];(*"dzień"*)
387 386 "pojutrze", ["indexical"];(*"dzień"*)
... ... @@ -553,3 +552,4 @@ let assign_semantics tokens lex_sems group =
553 552 {t with semantics=assign_prep_semantics lemma (StringSet.to_list cases) t}
554 553 | _ -> t in
555 554 ExtArray.set lex_sems id t)
  555 +*)
... ...
lexSemantics/ENIAMlexSemanticsTypes.ml
... ... @@ -43,9 +43,12 @@ type frame = {
43 43 arole: string;
44 44 arole_attr: string;
45 45 arev: bool;
  46 + sopinion: ENIAMwalTypes.opinion;
  47 + fopinion: ENIAMwalTypes.opinion;
46 48 }
47 49  
48   -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false}
  50 +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false;
  51 + sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony}
49 52  
50 53 type lex_sem = {
51 54 schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
... ...
lexSemantics/ENIAMplWordnet.ml
... ... @@ -26,6 +26,7 @@ let synmap = ref IntMap.empty
26 26 let ex_hipo = ref IntMap.empty
27 27 let predef_names = ref IntMap.empty
28 28 let proper_classes = ref StringMap.empty
  29 +let predef = ref StringMap.empty
29 30  
30 31 let load_lu filename =
31 32 File.fold_tab filename (IntMap.empty,StringMap.empty) (fun (lu_names,lumap) -> function
... ... @@ -65,7 +66,7 @@ let syn_id_of_sense sense =
65 66 | _ -> failwith ("syn_id_of_sense 2: " ^ lemma)
66 67  
67 68 let load_predef ex_hipo filename =
68   - let ex_hipo,predef_names,_,_ =
  69 + let ex_hipo,predef_names,predef,_ =
69 70 File.fold_tab filename (ex_hipo,IntMap.empty,StringMap.empty,-1) (fun (ex_hipo,predef_names,predef,id) -> function
70 71 name :: senses ->
71 72 let ex_hipo = Xlist.fold senses ex_hipo (fun ex_hipo sense ->
... ... @@ -77,7 +78,7 @@ let load_predef ex_hipo filename =
77 78 let predef = StringMap.add_inc predef name id (fun _ -> failwith "load_predef 2") in
78 79 ex_hipo, predef_names, predef, id-1
79 80 | l -> failwith ("load_predef: " ^ String.concat "\t" l)) in
80   - ex_hipo,predef_names
  81 + ex_hipo,predef_names,predef
81 82  
82 83 let rec get_hipero_rec found ex_hipo id cost =
83 84 let cost2 = try IntMap.find found id with Not_found -> max_int in
... ... @@ -164,8 +165,9 @@ let initialize () =
164 165 lumap := b;
165 166 synmap := load_syn syn_filename;
166 167 ex_hipo := load_ex_hipo ex_hipo_filename;
167   - let a,b = load_predef !ex_hipo predef_filename in
  168 + let a,b,c = load_predef !ex_hipo predef_filename in
168 169 ex_hipo := a;
169 170 predef_names := b;
  171 + predef := c;
170 172 proper_classes := load_proper_classes proper_classes_filename;
171 173 ()
... ...
lexSemantics/ENIAMvalence.ml
... ... @@ -613,3 +613,34 @@ let get_default_valence = function
613 613 | "adj" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
614 614 | "adv" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
615 615 | _ -> []
  616 +
  617 +open ENIAMcategoriesPL
  618 +
  619 +let get_aroles schema lemma = function
  620 + "pact" -> [Xlist.fold schema ([],"Arg","",true) (fun (sel,arole,arole_attr,arev) p ->
  621 + if p.gf = SUBJ then sel,p.role,p.role_attr,arev else sel,arole,arole_attr,arev)]
  622 + | "ppas" -> [Xlist.fold schema ([],"Arg","",true) (fun (sel,arole,arole_attr,arev) p ->
  623 + if p.gf = OBJ then sel,p.role,p.role_attr,arev else sel,arole,arole_attr,arev)]
  624 + | "subst" -> [
  625 + [Case,Eq,["dat"]],"Recipent","",false;
  626 + [Case,Eq,["inst"]],(if StringSet.mem ENIAMlexSemanticsData.subst_inst_time lemma then "Time" else "Instrument"),"",false;
  627 + [Case,Neq,["dat";"inst"]],"","",false]
  628 + | "adj" | "adjc" | "adjp" -> (* FIXME czy adjc i adjp mogą być adjunctami? *)
  629 + let l = try StringMap.find ENIAMlexSemanticsData.adj_roles lemma with Not_found -> ["Attribute",""] in
  630 + Xlist.map l (fun (role,role_attr) -> [],role,role_attr,false)
  631 + | "adv" ->
  632 + let modes = ENIAMcategoriesPL.adv_mode lemma in
  633 + let roles = try StringMap.find ENIAMlexSemanticsData.adv_roles lemma with Not_found -> ["Manner",""] in
  634 + Xlist.fold modes [] (fun l -> function
  635 + "mod" -> Xlist.fold roles l (fun l (role,role_attr) -> ([Mode,Eq,["mod"]],role,role_attr,false) :: l)
  636 + | "abl" -> ([Mode,Eq,["abl"]],"Location","Souce",false) :: l
  637 + | "adl" -> ([Mode,Eq,["adl"]],"Location","Goal",false) :: l
  638 + | "locat" -> ([Mode,Eq,["locat"]],"Location","",false) :: l
  639 + | "perl" -> ([Mode,Eq,["perl"]],"Path","",false) :: l
  640 + | "dur" -> ([Mode,Eq,["dur"]],"Duration","",false) :: l
  641 + | "temp" -> ([Mode,Eq,["temp"]],"Time","",false) :: l
  642 + | _ -> failwith "get_aroles")
  643 + | "qub" ->
  644 + let l = try StringMap.find ENIAMlexSemanticsData.qub_roles lemma with Not_found -> ["Arg",""] in
  645 + Xlist.map l (fun (role,role_attr) -> [],role,role_attr,false)
  646 + | _ -> [[],"","",false]
... ...
lexSemantics/ENIAMwalRenderer.ml
... ... @@ -338,3 +338,13 @@ let adv_adjuncts_simp = [
338 338 let adv_connected_adjuncts_simp = [
339 339 adjunct [Tensor[Atom "advp"; Top]];
340 340 ]
  341 +
  342 +let assing_pref_morfs = function
  343 + "po","postp" -> [
  344 + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]);
  345 + LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])]
  346 + | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])]
  347 + | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])]
  348 + | _,case -> [
  349 + LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]);
  350 + LCG(Tensor[Atom "adjp"; Top; Atom case; Top])]
... ...
lexSemantics/makefile
... ... @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa
7 7 INSTALLDIR=`ocamlc -where`/eniam
8 8  
9   -SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMlexSemanticsTypes.ml ENIAMadjuncts.ml \
  9 +SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMlexSemanticsData.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMlexSemanticsTypes.ml ENIAMadjuncts.ml \
10 10 ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMplWordnet.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml
11 11  
12 12 all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa
... ... @@ -14,8 +14,8 @@ all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa
14 14 install: all
15 15 mkdir -p $(INSTALLDIR)
16 16 cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR)
17   - cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
18   - cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
  17 + cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMlexSemanticsData.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
  18 + cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMlexSemanticsData.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
19 19 mkdir -p /usr/share/eniam/lexSemantics
20 20 cp resources/* /usr/share/eniam/lexSemantics
21 21 # ln -s /usr/share/eniam/lexSemantics/proper_names_20160104.tab /usr/share/eniam/lexSemantics/proper_names.tab
... ... @@ -24,8 +24,8 @@ install: all
24 24 install-local: all
25 25 mkdir -p $(INSTALLDIR)
26 26 cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR)
27   - cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
28   - cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
  27 + cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMlexSemanticsData.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
  28 + cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMlexSemanticsData.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
29 29 mkdir -p /usr/local/share/eniam/lexSemantics
30 30 cp resources/* /usr/local/share/eniam/lexSemantics
31 31 # ln -s /usr/local/share/eniam/lexSemantics/proper_names_20160104.tab /usr/local/share/eniam/lexSemantics/proper_names.tab
... ...
plWordnet/ENIAMplWordnet.ml
1 1 (*
2   - * ENIAMplWordnet, an interface for "Słowosieć", a Polish Wordnet.
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  2 + * ENIAMplWordnet, a converter for Polish Wordnet "Słowosieć".
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5 5 *
6 6 * This library is free software: you can redistribute it and/or modify
7 7 * it under the terms of the GNU Lesser General Public License as published by
... ... @@ -17,177 +17,329 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -let resource_path =
21   - try Sys.getenv "ENIAM_RESOURCE_PATH"
22   - with Not_found ->
23   - if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else
24   - if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else
25   - failwith "resource directory does not exists"
  20 +open Xstd
  21 +open ENIAMplWordnetTypes
26 22  
27   -let rzeczownik_filename = resource_path ^ "/plWordnet/rzeczownik.tab"
28   -let czasownik_filename = resource_path ^ "/plWordnet/czasownik.tab"
29   -let przymiotnik_filename = resource_path ^ "/plWordnet/przymiotnik.tab"
30   -let synsets_filename = resource_path ^ "/plWordnet/synsets.tab"
31   -let hipero_filename = resource_path ^ "/plWordnet/hipero.tab"
32   -let predef_filename = resource_path ^ "/plWordnet/predef_prefs.tab"
33   -let proper_classes_filename = resource_path ^ "/plWordnet/proper_classes.tab"
  23 +let process_unit = function
  24 + Xml.Element("unit-id",[],[Xml.PCData s]) -> int_of_string s, empty_lu
  25 + | node -> failwith ("process_unit " ^ (Xml.to_string node))
34 26  
  27 +let process_tests = function
  28 + Xml.Element("test",["text",text;"pos",pos],[]) -> text,pos
  29 + | node -> failwith ("process_tests " ^ (Xml.to_string node))
35 30  
36   -open Xstd
37   -(* open PreTypes *)
38   -
39   -let load_lu names filename =
40   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
41   - Xlist.fold l (StringMap.empty,names) (fun (lu,names) line ->
42   - if String.length line = 0 then lu,names else
43   - if String.get line 0 = '#' then lu,names else
44   - match Str.split_delim (Str.regexp "\t") line with
45   - [id; lemma; variant] ->
46   - StringMap.add_inc lu lemma [id,variant] (fun l -> (id,variant) :: l),
47   - StringMap.add_inc names id (lemma ^ " " ^ variant) (fun _ -> failwith "load_lu")
48   - | _ -> failwith ("load_lu: " ^ line))
49   -
50   -let noun_lu,lu_names = load_lu StringMap.empty rzeczownik_filename
51   -let verb_lu,lu_names = load_lu lu_names czasownik_filename
52   -let adj_lu,lu_names = load_lu lu_names przymiotnik_filename
53   -
54   -let load_synsets filename =
55   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
56   - Xlist.fold l (StringMap.empty,StringMap.empty) (fun (syn,names) line ->
57   - if String.length line = 0 then (syn,names) else
58   - if String.get line 0 = '#' then (syn,names) else
59   - match Str.split_delim (Str.regexp "\t") line with
60   - [syn_id; lu_ids] ->
61   - let lu_ids = Str.split_delim (Str.regexp " ") lu_ids in
62   - let syn = Xlist.fold lu_ids syn (fun syn lu_id ->
63   - StringMap.add_inc syn lu_id syn_id (fun _ -> failwith ("load_synsets 1: " ^ lu_id))) in
64   - let lu_id = try List.hd lu_ids with _ -> failwith ("load_synsets 2: " ^ syn_id) in
65   - let name = try StringMap.find lu_names lu_id with Not_found -> "syn_id: " ^ syn_id in (* nieznane synsety są z en wordnetu *)
66   - let names = StringMap.add_inc names syn_id name (fun _ -> failwith ("load_synsets 4: " ^ syn_id)) in
67   - syn,names
68   - | _ -> failwith ("load_synsets 5: " ^ line))
69   -
70   -let synsets, syn_names = load_synsets synsets_filename
71   -
72   -let load_hipero filename =
73   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
74   - Xlist.fold l StringMap.empty (fun hip line ->
75   - if String.length line = 0 then hip else
76   - if String.get line 0 = '#' then hip else
77   - match Str.split_delim (Str.regexp "\t") line with
78   - [id; ids] ->
79   - let ids = Str.split_delim (Str.regexp " ") ids in
80   - StringMap.add_inc hip id ids (fun _ -> failwith "load_hipero")
81   - | _ -> failwith ("load_hipero: " ^ line))
82   -
83   -let hipero = load_hipero hipero_filename
84   -
85   -let rec get_lu_id variant = function
86   - (id,v) :: l -> if variant = v then id else get_lu_id variant l
87   - | [] -> failwith "get_lu_id"
88   -
89   -let lu_id_of_sense sense =
90   - let lemma,variant =
91   - match List.rev (Str.split (Str.regexp " ") sense) with
92   -(* [lemma] -> lemma,"" *)
93   - | variant :: l -> String.concat " " (List.rev l), variant
94   - | _ -> failwith "lu_id_of_sense 1" in
95   - if variant = "" then lemma else
96   - let l = try StringMap.find noun_lu lemma with Not_found -> failwith ("lu_id_of_sense 2: " ^ lemma) in
97   - get_lu_id variant l
98   -
99   -let load_predef hipero filename =
100   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
101   - Xlist.fold l (hipero,StringSet.empty) (fun (hipero,predef) line ->
102   - if String.length line = 0 then hipero,predef else
103   - if String.get line 0 = '#' then hipero,predef else
104   - match Str.split_delim (Str.regexp "\t") line with
105   - id :: senses ->
106   - let hipero = Xlist.fold senses hipero (fun hipero sense ->
107   - if StringSet.mem predef sense then StringMap.add_inc hipero sense [id] (fun l -> id :: l) else
108   - let lu_id = lu_id_of_sense sense in
109   - let syn_id = try StringMap.find synsets lu_id with Not_found -> lu_id in
110   - StringMap.add_inc hipero syn_id [id] (fun l -> id :: l)) in
111   - hipero, StringSet.add predef id
112   - | _ -> failwith ("load_predef: " ^ line))
113   -
114   -let hipero,predef = load_predef hipero predef_filename
115   -
116   -let rec get_hipero_rec found id =
117   - if StringSet.mem found id then found else
118   - let found = StringSet.add found id in
119   - let l = try StringMap.find hipero id with Not_found -> [] in
120   - Xlist.fold l found get_hipero_rec
121   -
122   -let get_hipero lu_id =
123   - let syn_id = StringMap.find synsets lu_id in
124   - StringSet.to_list (get_hipero_rec StringSet.empty syn_id)
125   -
126   -let synset_name id =
127   - if StringSet.mem predef id then id else
128   - try StringMap.find syn_names id with Not_found -> failwith "synset_name"
129   -
130   -let rename_sense sense =
131   - let lu_id = lu_id_of_sense sense in
132   - StringMap.find synsets lu_id
133   -
134   -let load_proper_classes filename =
135   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
136   - Xlist.fold l StringMap.empty (fun map line ->
137   - if String.length line = 0 then map else
138   - if String.get line 0 = '#' then map else
139   - match Str.split_delim (Str.regexp "\t") line with
140   - id :: senses ->
141   - let senses = Xlist.map senses (fun sense ->
142   - match List.rev (Str.split (Str.regexp " ") sense) with
143   - weight :: l -> String.concat " " (List.rev l), (try float_of_string weight with _ -> failwith "load_proper_classes 2")
144   - | _ -> failwith "load_proper_classes 4") in
145   - let senses = Xlist.map senses (fun (sense,weight) ->
146   - let sense = if sense = "antroponim 1" then "nazwa własna 1" else sense in
147   - let sense = if sense = "godzina 4" then "godzina 3" else sense in
148   -(* print_endline sense; *)
149   - let lu_id = lu_id_of_sense sense in
150   - sense,Xlist.map (get_hipero lu_id) synset_name,weight) in
151   - StringMap.add_inc map id senses (fun _ -> failwith ("load_proper_classes 3: " ^ id))
152   - | _ -> failwith ("load_proper_classes: " ^ line))
153   -
154   -let proper_classes = load_proper_classes proper_classes_filename
155   -
156   -let simplify_pos = function
157   - "subst" -> "noun"
158   - | "depr" -> "noun"
159   - | "adj" -> "adj"
160   - | "adja" -> "adj"
161   - | "adjc" -> "adj"
162   - | "adjp" -> "adj"
163   - | "ger" -> "verb"
164   - | "pact" -> "verb"
165   - | "ppas" -> "verb"
166   - | "fin" -> "verb"
167   - | "bedzie" -> "verb"
168   - | "praet" -> "verb"
169   - | "winien" -> "verb"
170   - | "impt" -> "verb"
171   - | "imps" -> "verb"
172   - | "inf" -> "verb"
173   - | "pcon" -> "verb"
174   - | "pant" -> "verb"
175   - | "pred" -> "verb"
176   - | s -> s
177   -
178   -let find_senses lemma pos =
179   - (*if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then {t with senses=[lemma,["0"],0.]} else*) (* FIXME: ustalić co z zaimkami *)
180   - let lu = match simplify_pos pos with
181   - "noun" -> noun_lu
182   - | "adj" -> adj_lu
183   - | "verb" -> verb_lu
184   - | _ -> StringMap.empty in
185   - if StringMap.mem lu lemma then
186   - let l = StringMap.find lu lemma in
187   - Xlist.rev_map l (fun (id,variant) ->
188   - lemma ^ " " ^ variant, Xlist.map (get_hipero id) synset_name, log10 (1. /. (try float_of_string variant with _ -> 3.)))
189   - else []
190   -
191   -let find_proper_senses senses =
192   - List.flatten (Xlist.rev_map senses (fun sense ->
193   - try StringMap.find proper_classes sense with Not_found -> failwith ("find_proper_senses: " ^ sense)))
  31 +let process_abstract = function
  32 + "true" -> true
  33 + | "false" -> false
  34 + | _ -> failwith "process_abstract"
  35 +
  36 +(* funkcja zwraca:
  37 +lexical-unit map - wiąże leksemy z identyfikatorami
  38 +synset map
  39 +lexicalrelations
  40 +synsetrelations
  41 +relationtypes map
  42 +*)
  43 +let process_entry (lumap,synmap,lr,sr,rtmap) = function
  44 + Xml.Element("lexical-unit",["id",id;"name",name;"pos",pos;"tagcount",tagcount;"domain",domain;"workstate",workstate;
  45 + "source",source;"variant",variant],[]) ->
  46 + let lumap = IntMap.add_inc lumap (int_of_string id) {lu_id=int_of_string id; lu_name=name; lu_pos=pos; lu_tagcount=tagcount; lu_domain=domain; lu_desc="";
  47 + lu_workstate=workstate; lu_source=source; lu_variant=variant; lu_syn=(-1)} (fun _ -> failwith "process_entry 2") in
  48 + lumap,synmap,lr,sr,rtmap
  49 + | Xml.Element("lexical-unit",["id",id;"name",name;"pos",pos;"tagcount",tagcount;"domain",domain;"desc",desc;"workstate",workstate;
  50 + "source",source;"variant",variant],[]) ->
  51 + let lumap = IntMap.add_inc lumap (int_of_string id) {lu_id=int_of_string id; lu_name=name; lu_pos=pos; lu_tagcount=tagcount; lu_domain=domain; lu_desc=desc;
  52 + lu_workstate=workstate; lu_source=source; lu_variant=variant; lu_syn=(-1)} (fun _ -> failwith "process_entry 3") in
  53 + lumap,synmap,lr,sr,rtmap
  54 + | Xml.Element("synset",["id",id;"workstate",workstate;"split",split;"owner",owner;"definition",definition;"desc",desc;
  55 + "abstract",abstract],units) ->
  56 + let units = Xlist.map units process_unit in
  57 + let synmap = IntMap.add_inc synmap (int_of_string id) {syn_workstate=workstate; syn_split=split; syn_owner=owner; syn_definition=definition;
  58 + syn_desc=desc; syn_abstract=process_abstract abstract; syn_units=units; syn_pos=""; syn_no_hipo=0; syn_domain=""} (fun _ -> failwith "process_entry 4") in
  59 + lumap,synmap,lr,sr,rtmap
  60 + | Xml.Element("synset",["id",id;"workstate",workstate;"split",split;"owner",owner;"desc",desc;
  61 + "abstract",abstract],units) ->
  62 + let units = Xlist.map units process_unit in
  63 + let synmap = IntMap.add_inc synmap (int_of_string id) {syn_workstate=workstate; syn_split=split; syn_owner=owner; syn_definition="";
  64 + syn_desc=desc; syn_abstract=process_abstract abstract; syn_units=units; syn_pos=""; syn_no_hipo=0; syn_domain=""} (fun _ -> failwith "process_entry 4") in
  65 + lumap,synmap,lr,sr,rtmap
  66 + | Xml.Element("lexicalrelations",["parent",parent;"child",child;"relation",relation;"valid",valid;"owner",owner],[]) ->
  67 + let lr = {r_parent=int_of_string parent; r_child=int_of_string child; r_relation=int_of_string relation; r_valid=valid; r_owner=owner} :: lr in
  68 + lumap,synmap,lr,sr,rtmap
  69 + | Xml.Element("synsetrelations",["parent",parent;"child",child;"relation",relation;"valid",valid;"owner",owner],[]) ->
  70 + let sr = {r_parent=int_of_string parent; r_child=int_of_string child; r_relation=int_of_string relation; r_valid=valid; r_owner=owner} :: sr in
  71 + lumap,synmap,lr,sr,rtmap
  72 + | Xml.Element("relationtypes",["id",id;"type",typ;"reverse",reverse;"name",name;"description",description;
  73 + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse;
  74 + "pwn",pwn],tests) ->
  75 + let tests = Xlist.map tests process_tests in
  76 + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=int_of_string reverse; rt_name=name; rt_description=description;
  77 + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests}
  78 + (fun _ -> failwith "process_entry 5") in
  79 + lumap,synmap,lr,sr,rtmap
  80 + | Xml.Element("relationtypes",["id",id;"type",typ;"name",name;"description",description;
  81 + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse;
  82 + "pwn",pwn],tests) ->
  83 + let tests = Xlist.map tests process_tests in
  84 + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=(-1); rt_name=name; rt_description=description;
  85 + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests}
  86 + (fun _ -> failwith "process_entry 5") in
  87 + lumap,synmap,lr,sr,rtmap
  88 + | Xml.Element("relationtypes",["id",id;"type",typ;"parent",parent;"reverse",reverse;"name",name;"description",description;
  89 + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse;
  90 + "pwn",pwn],tests) ->
  91 + let tests = Xlist.map tests process_tests in
  92 + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=int_of_string reverse; rt_name=name; rt_description=description;
  93 + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests}
  94 + (fun _ -> failwith "process_entry 5") in
  95 + lumap,synmap,lr,sr,rtmap
  96 + | Xml.Element("relationtypes",["id",id;"type",typ;"parent",parent;"name",name;"description",description;
  97 + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse;
  98 + "pwn",pwn],tests) ->
  99 + let tests = Xlist.map tests process_tests in
  100 + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=(-1); rt_name=name; rt_description=description;
  101 + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests}
  102 + (fun _ -> failwith "process_entry 5") in
  103 + lumap,synmap,lr,sr,rtmap
  104 + | node -> print_endline (Xml.to_string node); failwith "process_entry 1"
  105 +
  106 +let load_data filename =
  107 + match try Xml.parse_file filename with Xml.Error e -> failwith ("load_data Xml.Error " ^ Xml.error e) with
  108 + Xml.Element("array-list",_,entries) ->
  109 + Xlist.fold entries (IntMap.empty,IntMap.empty,[],[],IntMap.empty) process_entry
  110 + | node -> failwith ("load_data " ^ (Xml.to_string node))
  111 +
  112 +let check_lu_syn_consistency lumap synmap =
  113 + let set = IntMap.fold lumap IntSet.empty (fun set id _ ->
  114 + if IntSet.mem set id then failwith "check_lu_syn_consistency 1" else
  115 + IntSet.add set id) in
  116 + let set = IntMap.fold synmap set (fun set _ syn ->
  117 + Xlist.fold syn.syn_units set (fun set (id,_) ->
  118 + if not (IntSet.mem set id) then failwith "check_lu_syn_consistency 2" else
  119 + IntSet.remove set id)) in
  120 + if not (IntSet.is_empty set) then failwith "check_lu_syn_consistency 3" else
  121 + ()
  122 +
  123 +let merge_lu_syn lumap synmap =
  124 + IntMap.map synmap (fun syn ->
  125 + let units = Xlist.map syn.syn_units (fun (id,_) -> id, IntMap.find lumap id) in
  126 + let pos = match StringSet.to_list (Xlist.fold units StringSet.empty (fun set (_,lu) ->
  127 + StringSet.add set lu.lu_pos)) with
  128 + [] -> failwith "merge_lu_syn: empty synset"
  129 + | [pos] -> pos
  130 + | _ -> failwith "merge_lu_syn: inconsistent pos" in
  131 + {syn with syn_units=units; syn_pos=pos})
  132 +
  133 +let set_lu_syn lumap synmap =
  134 + IntMap.fold synmap lumap (fun lumap syn_id syn ->
  135 + Xlist.fold syn.syn_units lumap (fun lumap (id,_) ->
  136 + let lu = try IntMap.find lumap id with Not_found -> failwith "set_lu_syn" in
  137 + if lu.lu_syn <> -1 then failwith "set_lu_syn" else
  138 + IntMap.add lumap id {lu with lu_syn=syn_id}))
  139 +
  140 +let count_relations qmap rtmap rels =
  141 + Xlist.fold rels qmap (fun qmap rel ->
  142 + if not (IntMap.mem rtmap rel.r_relation) then print_endline ("unknown relation: " ^ string_of_int rel.r_relation);
  143 + IntQMap.add qmap rel.r_relation)
  144 +
  145 +let lu_name lu =
  146 + lu.lu_name ^ "-" ^ lu.lu_variant
  147 +
  148 +let syn_name syn =
  149 + String.concat ", " (Xlist.map syn.syn_units (fun (_,lu) -> lu_name lu))
  150 +
  151 +let syn_name_single syn =
  152 + if syn.syn_units = [] then "empty" else
  153 + lu_name (snd (List.hd syn.syn_units))
  154 +
  155 +let pwn_pos = ["czasownik pwn"; "przymiotnik pwn"; "przysłówek pwn"; "rzeczownik pwn"]
  156 +
  157 +let is_pwn_lu lu =
  158 + Xlist.mem pwn_pos lu.lu_pos
  159 +
  160 +let is_pwn_syn syn =
  161 + Xlist.mem pwn_pos syn.syn_pos
  162 +
  163 +let get_pos_lu lu = lu.lu_pos
  164 +let get_pos_syn syn = syn.syn_pos
  165 +
  166 +let add_pwn_qmap map rel parent child =
  167 + let s = Printf.sprintf "%s-%s" parent child in
  168 + IntMap.add_inc map rel.r_relation (StringQMap.add StringQMap.empty s) (fun qmap -> StringQMap.add qmap s)
  169 +
  170 +let test_pwn_elem is_pwn_fun map elem =
  171 + try
  172 + if is_pwn_fun (IntMap.find map elem) then "en" else "pl"
  173 + with Not_found -> "NF"
  174 +
  175 +let test_pos_elem get_pos_fun map elem =
  176 + try
  177 + get_pos_fun (IntMap.find map elem)
  178 + with Not_found -> "NF"
  179 +
  180 +let count_pwn_relation qmap lumap synmap rtmap rels t =
  181 + Xlist.fold rels qmap (fun qmap rel ->
  182 + match (*(IntMap.find rtmap rel.r_relation).rt_type,*)t with
  183 + (*"relacja pomiędzy synsetami",*)"sr" -> add_pwn_qmap qmap rel (test_pwn_elem is_pwn_syn synmap rel.r_parent) (test_pwn_elem is_pwn_syn synmap rel.r_child)
  184 + | (*"relacja leksykalna",*)"lr" -> add_pwn_qmap qmap rel (test_pwn_elem is_pwn_lu lumap rel.r_parent) (test_pwn_elem is_pwn_lu lumap rel.r_child)
  185 + (* | "relacja synonimii" -> qmap *)
  186 + | _ -> failwith "count_pwn_relation")
  187 +
  188 +let count_pos_relation qmap lumap synmap rtmap rels t =
  189 + Xlist.fold rels qmap (fun qmap rel ->
  190 + match (*(IntMap.find rtmap rel.r_relation).rt_type,*)t with
  191 + (*"relacja pomiędzy synsetami",*)"sr" -> add_pwn_qmap qmap rel (test_pos_elem get_pos_syn synmap rel.r_parent) (test_pos_elem get_pos_syn synmap rel.r_child)
  192 + | (*"relacja leksykalna",*)"lr" -> add_pwn_qmap qmap rel (test_pos_elem get_pos_lu lumap rel.r_parent) (test_pos_elem get_pos_lu lumap rel.r_child)
  193 + (* | "relacja synonimii" -> qmap *)
  194 + | _ -> failwith "count_pwn_relation")
  195 +
  196 +let select_plWordnet lumap synmap lr sr rtmap =
  197 + let lr = Xlist.fold lr [] (fun lr rel ->
  198 + if test_pwn_elem is_pwn_lu lumap rel.r_parent = "pl" &&
  199 + test_pwn_elem is_pwn_lu lumap rel.r_child = "pl" &&
  200 + IntSet.mem pl_pl_relations rel.r_relation then rel :: lr else lr) in
  201 + let sr = Xlist.fold sr [] (fun sr rel ->
  202 + if test_pwn_elem is_pwn_syn synmap rel.r_parent = "pl" &&
  203 + test_pwn_elem is_pwn_syn synmap rel.r_child = "pl" &&
  204 + IntSet.mem pl_pl_relations rel.r_relation then rel :: sr else sr) in
  205 + let lumap = IntMap.fold lumap IntMap.empty (fun lumap id lu ->
  206 + if is_pwn_lu lu then lumap else IntMap.add lumap id lu) in
  207 + let synmap = IntMap.fold synmap IntMap.empty (fun synmap id syn ->
  208 + if is_pwn_syn syn then synmap else IntMap.add synmap id syn) in
  209 + let rtmap = IntMap.fold rtmap IntMap.empty (fun rtmap id rt ->
  210 + if IntSet.mem pl_pl_relations id then IntMap.add rtmap id rt else rtmap) in
  211 + lumap,synmap,lr,sr,rtmap
  212 +
  213 +let create_relation_map rel_id rels =
  214 + Xlist.fold rels Relation.empty (fun graph r ->
  215 + if r.r_relation = rel_id then
  216 + Relation.add_new graph r.r_parent r.r_child 0
  217 + else graph)
  218 +
  219 +let create_relation_maps rel_maps rels =
  220 + Xlist.fold rels rel_maps (fun graphs r ->
  221 + let graph = try IntMap.find graphs r.r_relation with Not_found -> Relation.empty in
  222 + let graph = Relation.add_new graph r.r_parent r.r_child 0 in
  223 + IntMap.add graphs r.r_relation graph)
  224 +
  225 +let create_relation_map_lex lumap rel_id rels =
  226 + Xlist.fold rels Relation.empty (fun graph r ->
  227 + if r.r_relation = rel_id then
  228 + let parent = (IntMap.find lumap r.r_parent).lu_syn in
  229 + let child = (IntMap.find lumap r.r_child).lu_syn in
  230 + Relation.add graph parent child 0
  231 + else graph)
  232 +
  233 +let create_relation_maps_lex rel_maps lumap rels =
  234 + Xlist.fold rels rel_maps (fun graphs r ->
  235 + let graph = try IntMap.find graphs r.r_relation with Not_found -> Relation.empty in
  236 + let parent = (IntMap.find lumap r.r_parent).lu_syn in
  237 + let child = (IntMap.find lumap r.r_child).lu_syn in
  238 + let graph = Relation.add graph parent child 0 in
  239 + IntMap.add graphs r.r_relation graph)
  240 +
  241 +let assign_no_hipo synmap hipo =
  242 + IntMap.mapi synmap (fun id syn ->
  243 + {syn with syn_no_hipo=IntSet.size (Relation.find_descendants hipo id)})
  244 +
  245 +let check_rel_class_coverage rel_maps rel_sets =
  246 + let set = Xlist.fold (List.tl rel_sets) (List.hd rel_sets) IntSet.union in
  247 + IntMap.iter rel_maps (fun rel_id _ ->
  248 + if not (IntSet.mem set rel_id) then Printf.printf "only in rel_maps: %d\n" rel_id);
  249 + IntSet.iter set (fun rel_id ->
  250 + if not (IntMap.mem rel_maps rel_id) then Printf.printf "only in rel_sets: %d\n" rel_id)
  251 +
  252 +let get_syn_id synmap lu_name lu_variant =
  253 + let found = IntMap.fold synmap [] (fun found id syn ->
  254 + Xlist.fold syn.syn_units found (fun found (_,lu) ->
  255 + if lu.lu_name = lu_name && lu.lu_variant = lu_variant then
  256 + id :: found else found)) in
  257 + match found with
  258 + [] -> failwith "get_syn_id: not found"
  259 + | [id] -> id
  260 + | _ -> failwith "get_syn_id: multiple id found"
  261 +
  262 +let add_relations rel_maps rev_rel_maps ex_hipo relations =
  263 + Xlist.fold relations ex_hipo (fun ex_hipo (cost,dir,rel_ids) ->
  264 + Xlist.fold rel_ids ex_hipo (fun ex_hipo rel_id ->
  265 + let graph = IntMap.find (if dir = Straight then rel_maps else rev_rel_maps) rel_id in
  266 + IntMap.fold graph ex_hipo (fun ex_hipo parent children ->
  267 + IntMap.fold children ex_hipo (fun ex_hipo child _ ->
  268 + Relation.add_inc ex_hipo parent child cost min))))
  269 +
  270 +let add_hipo_extensions synmap rel_maps rev_rel_maps ex_hipo hipo_extensions =
  271 + Xlist.fold hipo_extensions ex_hipo (fun ex_hipo (cost,lu_name,lu_variant,dir,rel_ids) ->
  272 + let hiper_id = get_syn_id synmap lu_name lu_variant in
  273 + Xlist.fold rel_ids ex_hipo (fun ex_hipo rel_id ->
  274 + let graph = IntMap.find (if dir = Parent then rel_maps else rev_rel_maps) rel_id in
  275 + IntMap.fold graph ex_hipo (fun ex_hipo hipo_id _ ->
  276 + Relation.add_inc ex_hipo hipo_id hiper_id cost min)))
  277 +
  278 +let add_hipo_extensions2 synmap ex_hipo hipo_extensions =
  279 + Xlist.fold hipo_extensions ex_hipo (fun ex_hipo (cost,lu_name,lu_variant,poss) ->
  280 + let hiper_id = get_syn_id synmap lu_name lu_variant in
  281 + IntMap.fold synmap ex_hipo (fun ex_hipo hipo_id syn ->
  282 + if Xlist.mem poss syn.syn_pos then Relation.add_inc ex_hipo hipo_id hiper_id cost min else ex_hipo))
  283 +
  284 +let create_ex_hipo synmap rel_maps rev_rel_maps =
  285 + let ex_hipo = add_relations rel_maps rev_rel_maps IntMap.empty hipo_relations in
  286 + let ex_hipo = add_hipo_extensions synmap rel_maps rev_rel_maps ex_hipo hipo_extensions in
  287 + let ex_hipo = add_hipo_extensions2 synmap ex_hipo hipo_extensions2 in
  288 + ex_hipo
  289 +
  290 +let rec get_hipero_rec found ex_hipo id cost =
  291 + let cost2 = try IntMap.find found id with Not_found -> max_int in
  292 + if cost2 <= cost || cost > 7 then found else
  293 + let found = IntMap.add found id cost in
  294 + let map = try IntMap.find ex_hipo id with Not_found -> IntMap.empty in
  295 + IntMap.fold map found (fun found id2 cost2 ->
  296 + get_hipero_rec found ex_hipo id2 (cost + cost2))
  297 +
  298 +let get_hipero ex_hipo syn_id =
  299 + get_hipero_rec IntMap.empty ex_hipo syn_id 0
  300 +
  301 +let select_big_synsets synmap threshold =
  302 + IntMap.fold synmap IntSet.empty (fun selected id syn ->
  303 + if syn.syn_no_hipo >= threshold then IntSet.add selected id else selected)
  304 +
  305 +let print_subtree synmap ex_hipo path lu_name lu_variant =
  306 + let syn_id = get_syn_id synmap lu_name lu_variant in
  307 + let tree = Relation.descendants_tree ex_hipo syn_id 0 in
  308 + File.file_out (path ^ lu_name ^ "-" ^ lu_variant ^ ".txt") (fun file ->
  309 + Relation.print_tree file tree (fun syn_id cost ->
  310 + let syn = IntMap.find synmap syn_id in
  311 + let abstract = if syn.syn_abstract then "*" else "" in
  312 + Printf.sprintf "%d %s%s" syn.syn_no_hipo abstract (syn_name syn)));
  313 + File.file_out (path ^ lu_name ^ "-" ^ lu_variant ^ ".xml") (fun file ->
  314 + Relation.print_tree_xml file tree (fun syn_id cost ->
  315 + let syn = IntMap.find synmap syn_id in
  316 + ["name",syn_name syn;
  317 + "size",string_of_int syn.syn_no_hipo] @
  318 + (if syn.syn_abstract then ["abstract","true"] else [])))
  319 +
  320 +(* w semimport/plWordnet.ml była jeszcze procedura wypisująca poddrzewa słowosieci scalone z Walentym *)
  321 +
  322 +let print_subtree_graph synmap hipo path lu_name lu_variant threshold =
  323 + let syn_id = get_syn_id synmap lu_name lu_variant in
  324 + let big = select_big_synsets synmap threshold in
  325 + let hipo = Relation.select hipo (fun parent child cost -> IntSet.mem big parent && IntSet.mem big child) in
  326 + let descendants = Relation.find_descendants hipo syn_id in
  327 + let hipo2 = Relation.select hipo (fun parent child cost -> IntSet.mem descendants parent || IntSet.mem descendants child) in
  328 + Relation.print_graph path (lu_name ^ "-" ^ lu_variant) true hipo2 (fun id ->
  329 + let syn = IntMap.find synmap id in
  330 + Printf.sprintf "%s\\n%d" (syn_name_single syn) syn.syn_no_hipo) (fun _ -> "")
  331 +
  332 +let rt_names = ["type"; "reverse"; "name"; "description"; "posstr"; "display"; "shortcut"; "autoreverse"; "pwn"; "tests"]
  333 +
  334 +let string_of_tests tests =
  335 + String.concat " " (Xlist.map tests (fun (t,p) -> "(" ^ t ^ "," ^ p ^ ")"))
  336 +
  337 +let string_of_rt rt =
  338 + Printf.sprintf "\"%s\";\"%d\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" rt.rt_type rt.rt_reverse rt.rt_name rt.rt_description rt.rt_posstr
  339 + rt.rt_display rt.rt_shortcut rt.rt_autoreverse rt.rt_pwn (string_of_tests rt.rt_tests)
  340 +
  341 +let print_rt_map filename rel_count rtmap =
  342 + File.file_out filename (fun file ->
  343 + Printf.fprintf file "id;quantity;%s\n" (String.concat ";" rt_names);
  344 + IntMap.iter rtmap (fun id rt ->
  345 + Printf.fprintf file "%d;%d;%s\n" id (try IntQMap.find rel_count id with Not_found -> 0) (string_of_rt rt)))
... ...
slowosiec/ENIAMplWordnetAnalyze.ml renamed to plWordnet/ENIAMplWordnetAnalyze.ml
slowosiec/ENIAMplWordnetGenerate.ml renamed to plWordnet/ENIAMplWordnetGenerate.ml
slowosiec/ENIAMplWordnetTypes.ml renamed to plWordnet/ENIAMplWordnetTypes.ml
plWordnet/README
1 1 ENIAMplWordnet Version 1.0 :
2 2 -----------------------
3 3  
4   -ENIAMplWordnet is a library that provides an interface for
5   -"Słowosieć", a Polish Wordnet.
  4 +ENIAMplWordnet is a library that converts
  5 +Polish Wordnet "Słowosieć" into format used by ENIAM.
6 6  
7 7 Install
8 8 -------
9 9  
10 10 ENIAMplWordnet requires OCaml version 4.02.3 compiler
11   -together with Xlib library version 3.1 or later.
  11 +together with Xlib library version 3.2.
12 12  
13   -In order to install type:
  13 +In order to use ENIAMplWordnet you must first download plWordnet.
14 14  
15   -make install
  15 +Then, compile ENIAMplWordnet:
16 16  
17   -by default, ENIAMplWordnet is installed in the 'ocamlc -where'/eniam directory.
18   -you can change it by editing the Makefile.
  17 +make
19 18  
20   -In order to test library type:
21   -make test
22   -./test
  19 +convert plWordnet:
23 20  
24   -By default ENIAMplWordnet looks for resources in /usr/share/eniam directory.
25   -However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH
26   -environment variable.
  21 +./converter <plWordnet_file>
27 22  
28   -Credits
29   --------
30   -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
31   -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences
  23 +install converted plWordnet:
32 24  
33   -The library uses the following licensed resources:
  25 +make install
34 26  
35   -plWordNet 2.1 © 2013 by Wrocław University of Technology. All rights reserved.
  27 +remove temporary files:
36 28  
  29 +make clean
  30 +
  31 +Credits
  32 +-------
  33 +Copyright © 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  34 +Copyright © 2016-2017 Institute of Computer Science Polish Academy of Sciences
37 35  
38 36 Licence
39 37 -------
... ...
plWordnet/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5   -OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-plWordnet.cmxa
7   -INSTALLDIR=`ocamlc -where`/eniam
  4 +INCLUDES=-I +extlib -I +xml-light -I +gsl -I +xlib -I +zip -I +bz2
  5 +OCAMLFLAGS=$(INCLUDES)
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa
8 7  
9   -SOURCES= ENIAMplWordnet.ml
  8 +SOURCES=relation.ml ENIAMplWordnetTypes.ml ENIAMplWordnet.ml
10 9  
11   -all: eniam-plWordnet.cma eniam-plWordnet.cmxa
  10 +all: $(SOURCES) ENIAMplWordnetGenerate.ml
  11 + mkdir -p resources
  12 + mkdir -p results
  13 + $(OCAMLOPT) -o converter $(OCAMLOPTFLAGS) $^
12 14  
13   -install: all
14   - mkdir -p $(INSTALLDIR)
15   - cp eniam-plWordnet.cmxa eniam-plWordnet.a eniam-plWordnet.cma $(INSTALLDIR)
16   - cp ENIAMplWordnet.cmi $(INSTALLDIR)
17   - cp ENIAMplWordnet.cmx $(INSTALLDIR)
  15 +analyze: $(SOURCES) ENIAMplWordnetAnalyze.ml
  16 + mkdir -p results/rels
  17 + $(OCAMLOPT) -o analyze $(OCAMLOPTFLAGS) $^
  18 +
  19 +install:
18 20 mkdir -p /usr/share/eniam/plWordnet
19 21 cp resources/* /usr/share/eniam/plWordnet
20 22  
21   -install-local: all
22   - mkdir -p $(INSTALLDIR)
23   - cp eniam-plWordnet.cmxa eniam-plWordnet.a eniam-plWordnet.cma $(INSTALLDIR)
24   - cp ENIAMplWordnet.cmi $(INSTALLDIR)
25   - cp ENIAMplWordnet.cmx $(INSTALLDIR)
  23 +install-local:
26 24 mkdir -p /usr/local/share/eniam/plWordnet
27 25 cp resources/* /usr/local/share/eniam/plWordnet
28 26  
29   -eniam-plWordnet.cma: $(SOURCES)
30   - ocamlc -linkall -a -o eniam-plWordnet.cma $(OCAMLFLAGS) $^
31   -
32   -eniam-plWordnet.cmxa: $(SOURCES)
33   - ocamlopt -linkall -a -o eniam-plWordnet.cmxa $(INCLUDES) $^
34   -
35   -test: test.ml
36   - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
37   -
38 27 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
39 28  
40 29 .mll.ml:
... ... @@ -55,5 +44,11 @@ test: test.ml
55 44 .ml.cmx:
56 45 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
57 46  
  47 +xlib.cmxa:
  48 + cd xlib; make $@
  49 +
  50 +xlib.cma:
  51 + cd xlib; make $@
  52 +
58 53 clean:
59   - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test
  54 + rm -f *~ *.cm[oix] *.o analyze converter resources/* results/*
... ...
slowosiec/relation.ml renamed to plWordnet/relation.ml
plWordnet/resources/README deleted
1   -Files in this folder were created on the basis of
2   -plWordNet 2.1 © 2013 by Wrocław University of Technology. All rights reserved.
3   -