Commit 63d47e4b17ab6c6ece55922bc166eb08e03068f1
1 parent
3d8c3471
Walencja semantyczna przyimków
Showing
28 changed files
with
521 additions
and
493546 deletions
Too many changes to show.
To preserve performance only 17 of 28 files are displayed.
LCGlexicon/TODO
LCGlexicon/resources/lexicon-pl.dic
... | ... | @@ -104,11 +104,12 @@ pos=subst,case=gen,nsem=measure: |
104 | 104 | measure*sg*case*n2*person{\num*number*case*gender*person*rec}{schema}{\(1+qub),/(1+inclusion)}: measure_weight; # UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika |
105 | 105 | |
106 | 106 | # frazy przyimkowe |
107 | -pos=prep: prepnp*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)}; | |
108 | -pos=prep: prepadjp*lemma*case{\(1+advp*T),/adjp*T*case*T}{\(1+qub),/(1+inclusion)}; | |
107 | +lemma!=temu,pos=prep: prepnp*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)}; | |
108 | +lemma!=temu,pos=prep: prepadjp*lemma*case{\(1+advp*T),/adjp*T*case*T}{\(1+qub),/(1+inclusion)}; | |
109 | 109 | lemma=po,pos=prep: QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/(adjp*sg*dat*m1+adjp*T*postp*T)}{\(1+qub),/(1+inclusion)}; # po polsku, po kreciemu |
110 | 110 | lemma=z,pos=prep: QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/adjp*sg*nom*f}{\(1+qub),/(1+inclusion)}; # z bliska |
111 | 111 | lemma=na,pos=prep: QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/advp*T}{\(1+qub),/(1+inclusion)}; # na lewo |
112 | +lemma=temu,pos=prep: prepnp*lemma*case\np*T*case*T*T; # chwilę temu | |
112 | 113 | |
113 | 114 | # przimkowe określenia czasu |
114 | 115 | lemma=z,pos=prep,case=gen: prepnp*lemma*case{\(1+advp*T),/(day-month+day+year+date+hour+hour-minute)}{\(1+qub),/(1+inclusion)}; |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -29,27 +29,42 @@ let find_meaning m = |
29 | 29 | with Not_found -> |
30 | 30 | m.name ^ "-" ^ m.variant, [], unknown_meaning_weight |
31 | 31 | |
32 | +let find_prep_meaning lemma hipero = | |
33 | + let hipero = match hipero with | |
34 | + [Predef hipero] -> hipero | |
35 | + | _ -> failwith "find_prep_meaning" in | |
36 | + if hipero = "ALL" then lemma, [hipero,0], unknown_meaning_weight else | |
37 | + let syn_id = StringMap.find !ENIAMplWordnet.predef hipero in | |
38 | + let hipero = IntMap.fold (ENIAMplWordnet.get_hipero syn_id) [] (fun hipero syn_id cost -> (ENIAMplWordnet.synset_name syn_id, cost) :: hipero) in | |
39 | + lemma, hipero, unknown_meaning_weight | |
40 | + | |
32 | 41 | let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB))) |
33 | 42 | |
34 | -let find_senses t s = (* FIXME: sensy zawierające 'się' *) | |
35 | - let set = Xlist.fold s.frames StringSet.empty (fun set frame -> | |
43 | +(* FIXME: naiwnie wierzymy, że jeśli leksem jest opisany semantycznie w walentym to zawiera ramy dla wszystkich sensów *) | |
44 | +let find_senses t s = | |
45 | + (*let set = Xlist.fold s.frames StringSet.empty (fun set frame -> | |
36 | 46 | Xlist.fold frame.meanings set (fun set (name,hipero,weight) -> |
37 | - StringSet.add set name)) in | |
47 | + StringSet.add set name)) in*) | |
38 | 48 | let senses = match t.token with |
39 | 49 | Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos |
40 | 50 | | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses |
41 | 51 | | _ -> [] in |
42 | - let senses = Xlist.fold senses [] (fun senses (name,hipero,weight) -> | |
43 | - if StringSet.mem set name then senses else (name,hipero,weight) :: senses) in | |
52 | + (* let senses = | |
53 | + Xlist.fold senses [] (fun senses (name,hipero,weight) -> | |
54 | + if StringSet.mem set name then senses else (name,hipero,weight) :: senses) in *) | |
44 | 55 | let senses_sie = match t.token with |
45 | 56 | Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses (lemma ^ " się") pos |
46 | 57 | | Proper(_,_,_,senses) -> [] |
47 | 58 | | _ -> [] in |
48 | - let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) -> | |
59 | +(* let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) -> | |
49 | 60 | if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in |
50 | 61 | let frames = if senses = [] then s.frames else {empty_frame with meanings=senses} :: s.frames in |
51 | 62 | let frames = if senses_sie = [] then frames else {empty_frame with meanings=senses_sie; |
52 | - positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]}]} :: frames in (* FIXME: czy to nie usuwa elementów z ramy? *) | |
63 | + positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req}]} :: frames in*) (* FIXME: czy to nie usuwa elementów z ramy? *) | |
64 | + let frames = Xlist.fold s.frames [] (fun frames f -> | |
65 | + if f.meanings <> [] then f :: frames else | |
66 | + (if senses_sie = [] then [] else [{f with meanings=senses_sie; positions={empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req} :: f.positions}]) @ | |
67 | + [{f with meanings=senses}] @ frames) in | |
53 | 68 | {s with frames=frames} |
54 | 69 | |
55 | 70 | let find_selprefs schema = (* FIXME: RelationRole *) |
... | ... | @@ -135,6 +150,33 @@ let get_preps tokens group = (* FIXME: To nie zadziała przy kilku wystąpieniac |
135 | 150 | | _ -> preps,compars) in |
136 | 151 | StringMap.fold preps [] (fun l prep v -> (prep, StringSet.to_list v) :: l), StringSet.to_list compars |
137 | 152 | |
153 | +let make_unique schemata = | |
154 | + let map = Xlist.fold schemata StringMap.empty (fun map (selectors,schema) -> | |
155 | + let s = "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "}" in | |
156 | + StringMap.add map s (selectors,schema)) in | |
157 | + StringMap.fold map [] (fun l _ (selectors,schema) -> (selectors,schema) :: l) | |
158 | + | |
159 | +let semantize lemma pos (selectors,schema) = | |
160 | + let schema = Xlist.rev_map schema (fun p -> | |
161 | + {p with role="Arg"; sel_prefs=[Predef "X"]}) in (* FIXME: zaślepka, żeby preferować znane argumenty *) | |
162 | + Xlist.rev_map (ENIAMvalence.get_aroles schema lemma pos) (fun (sel,arole,arole_attr,arev) -> | |
163 | + {empty_frame with selectors=sel @ selectors; positions=schema; | |
164 | + arole=arole; arole_attr=arole_attr; arev=arev}) | |
165 | + | |
166 | +let assign_prep_semantics lemma = | |
167 | + let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in | |
168 | + Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); | |
169 | + Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) -> | |
170 | + Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; | |
171 | + let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *) | |
172 | + print_endline "assign_prep_semantics 1"; | |
173 | + let positions = [{empty_position with | |
174 | + sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_; | |
175 | + morfs=ENIAMwalRenderer.assing_pref_morfs (lemma,case); is_necessary=Req}] in | |
176 | + print_endline "assign_prep_semantics 2"; | |
177 | + {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions; | |
178 | + arole=arole; arole_attr=arole_attr; arev=false}) | |
179 | + | |
138 | 180 | let assign_valence tokens lex_sems group = |
139 | 181 | let lexemes = Xlist.fold group StringSet.empty (fun lexemes id -> |
140 | 182 | let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in |
... | ... | @@ -151,10 +193,10 @@ let assign_valence tokens lex_sems group = |
151 | 193 | (* Printf.printf "A %s %s %s |schemata|=%d\n" lemma pos pos2 (Xlist.size schemata); *) |
152 | 194 | let entries = Entries.find entries pos lemma in |
153 | 195 | let connected = Entries.find connected pos2 lemma in |
154 | - let schemata = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) -> | |
155 | - ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* FIXME: gubię opinię *) | |
196 | + let schemata1 = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) -> | |
197 | + ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* gubię opinię *) | |
156 | 198 | (* Printf.printf "B %s |schemata|=%d\n" lemma (Xlist.size schemata); *) |
157 | - let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata in | |
199 | + let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in | |
158 | 200 | (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *) |
159 | 201 | let schemata = Xlist.rev_map schemata (fun (selectors,schema) -> |
160 | 202 | selectors,ENIAMwalRenderer.render_simple_schema schema) in |
... | ... | @@ -163,15 +205,31 @@ let assign_valence tokens lex_sems group = |
163 | 205 | let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in |
164 | 206 | let entries = Xlist.map entries (fun (selectors,entry) -> |
165 | 207 | selectors,ENIAMwalRenderer.render_lex_entry entry) in |
166 | - let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema) -> | |
167 | - Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema) (fun (selectors,schema) -> | |
168 | - {empty_frame with selectors=selectors; meanings= Xlist.map meanings find_meaning; positions=schema}))) in (* FIXME: gubię opinię *) | |
208 | + let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema1) -> | |
209 | + List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) -> | |
210 | + Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) -> | |
211 | + {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema; | |
212 | + arole=arole; arole_attr=arole_attr; arev=arev; sopinion=sopinion; fopinion=fopinion}))))) in | |
213 | + (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
214 | + let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in | |
215 | + (* Printf.printf "F %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
169 | 216 | let connected = Xlist.fold connected [] (fun connected frame -> |
170 | 217 | if ENIAMadjuncts.check_selector_lex_constraints lexemes pos frame.selectors then frame :: connected else connected) in |
218 | + (* Printf.printf "G %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
171 | 219 | let connected = Xlist.rev_map connected (fun frame -> |
172 | 220 | {frame with |
173 | 221 | positions = find_selprefs (ENIAMwalRenderer.render_connected_schema (ENIAMwalReduce.set_necessary frame.positions))}) in |
222 | + (* Printf.printf "H %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
174 | 223 | let connected = List.flatten (Xlist.rev_map connected (ENIAMadjuncts.add_connected_adjuncts preps compreps compars pos2)) in |
224 | + (* Printf.printf "I %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
225 | + let connected = if pos = "prep" then | |
226 | + if connected <> [] then failwith "assign_valence" else | |
227 | + assign_prep_semantics lemma else connected in | |
228 | + (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
229 | + let connected = if connected = [] then | |
230 | + Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) -> | |
231 | + {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in | |
232 | + (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *) | |
175 | 233 | ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with |
176 | 234 | schemata=schemata; lex_entries=entries; frames=connected}) |
177 | 235 | |
... | ... | @@ -253,7 +311,6 @@ let assign tokens text = |
253 | 311 | (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *) |
254 | 312 | remove_unused_tokens tokens groups; |
255 | 313 | Xlist.iter groups (fun group -> assign_valence tokens lex_sems group); |
256 | - (* Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);*) | |
257 | 314 | Int.iter 1 (ExtArray.size tokens - 1) (fun i -> |
258 | 315 | let token = ExtArray.get tokens i in |
259 | 316 | let lex_sem = ExtArray.get lex_sems i in |
... | ... | @@ -275,4 +332,5 @@ let initialize () = |
275 | 332 | ENIAMwalParser.initialize (); |
276 | 333 | ENIAMwalReduce.initialize (); |
277 | 334 | ENIAMplWordnet.initialize (); |
335 | + ENIAMcategoriesPL.initialize (); | |
278 | 336 | () |
... | ... |
lexSemantics/ENIAMlexSemanticsData.ml
... | ... | @@ -21,13 +21,13 @@ open ENIAMtokenizerTypes |
21 | 21 | open ENIAMlexSemanticsTypes |
22 | 22 | open Xstd |
23 | 23 | |
24 | -let subst_inst_roles = Xlist.fold [ | |
25 | - "wiosna", "Time",""; | |
26 | - "lato", "Time",""; | |
27 | - "jesień", "Time",""; | |
28 | - "zima", "Time",""; | |
29 | - "wieczór", "Time",""; | |
30 | - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a)) | |
24 | +let subst_inst_time = StringSet.of_list [ | |
25 | + "wiosna"; | |
26 | + "lato"; | |
27 | + "jesień"; | |
28 | + "zima"; | |
29 | + "wieczór"; | |
30 | + ] | |
31 | 31 | |
32 | 32 | let adj_roles = Xlist.fold [ |
33 | 33 | "ten", "Apoz",""; |
... | ... | @@ -59,7 +59,7 @@ let adj_roles = Xlist.fold [ |
59 | 59 | "taki", "Attribute",""; |
60 | 60 | "czyj", "Possesive",""; |
61 | 61 | "który", "Attribute",""; |
62 | - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a)) | |
62 | + ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l)) | |
63 | 63 | |
64 | 64 | let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *) |
65 | 65 | (* operators: nielokalnie zmieniaja formułe logiczna *) |
... | ... | @@ -80,7 +80,7 @@ let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *) |
80 | 80 | "dlatego", "Condition",""; (* odniesieniem argumentu jest sytuacji/kontekst *) |
81 | 81 | "tak", "Manner",""; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *) |
82 | 82 | |
83 | - "skąd", "Location","Source"; | |
83 | +(* "skąd", "Location","Source"; | |
84 | 84 | "skądkolwiek", "Location","Source"; |
85 | 85 | "skądś", "Location","Source"; |
86 | 86 | "skądże", "Location","Source"; |
... | ... | @@ -209,8 +209,8 @@ let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *) |
209 | 209 | "ongi", "Time",""; |
210 | 210 | "ongiś", "Time",""; |
211 | 211 | "wczas", "Time",""; |
212 | - "wonczas", "Time",""; | |
213 | - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a)) | |
212 | + "wonczas", "Time","";*) | |
213 | + ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l)) | |
214 | 214 | |
215 | 215 | let qub_roles = Xlist.fold [ |
216 | 216 | "tylko", "Quantifier",""; |
... | ... | @@ -236,10 +236,10 @@ let qub_roles = Xlist.fold [ |
236 | 236 | "ponad", "Mod",""; |
237 | 237 | "prawie", "Mod",""; |
238 | 238 | "przynajmniej", "Mod",""; |
239 | - ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a)) | |
239 | + ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l)) | |
240 | 240 | |
241 | 241 | |
242 | -let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *)(* FIXME: problem z podwójnymi przypisaniami *) | |
242 | +let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *) | |
243 | 243 | "od","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
244 | 244 | "spod","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
245 | 245 | "spomiędzy","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
... | ... | @@ -249,12 +249,14 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
249 | 249 | "spoza","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
250 | 250 | "sprzed","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
251 | 251 | "z","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
252 | + "z","postp", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; | |
252 | 253 | "znad","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
253 | 254 | "zza","gen", "Location","Source",["POŁOŻENIE"],["POŁOŻENIE"]; |
254 | 255 | "do","gen", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
255 | 256 | "ku","dat", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
256 | 257 | "między","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
257 | 258 | "na","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
259 | + "na","postp", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; | |
258 | 260 | "nad","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
259 | 261 | "nieopodal","gen", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
260 | 262 | "opodal","gen", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
... | ... | @@ -267,6 +269,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
267 | 269 | "za","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
268 | 270 | "dzięki","dat", "Condition","",["CZEMU"],[]; |
269 | 271 | "na","acc", "Condition","",["CZEMU"],[]; |
272 | + "na","postp", "Condition","",["CZEMU"],[]; | |
270 | 273 | "od","gen", "Condition","",["CZEMU"],[]; |
271 | 274 | "przez","acc", "Condition","",["CZEMU"],[]; |
272 | 275 | "wskutek","gen", "Condition","",["CZEMU"],[]; |
... | ... | @@ -275,6 +278,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
275 | 278 | "do","gen", "Purpose","",["CZEMU"],[]; |
276 | 279 | "ku","dat", "Purpose","",["CZEMU"],[]; |
277 | 280 | "na","acc", "Purpose","",["CZEMU"],[]; |
281 | + "na","postp", "Purpose","",["CZEMU"],[]; | |
278 | 282 | "po","acc", "Purpose","",["CZEMU"],[]; |
279 | 283 | "do","gen", "Duration","",["CZAS"],["CZAS"]; |
280 | 284 | "od","gen", "Duration","",["CZAS"],["CZAS"]; |
... | ... | @@ -285,6 +289,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
285 | 289 | "między","inst", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
286 | 290 | "nad","inst", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
287 | 291 | "na","loc", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
292 | + "na","postp", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; | |
288 | 293 | "naokoło","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
289 | 294 | "naprzeciw","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
290 | 295 | "naprzeciwko","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
... | ... | @@ -327,6 +332,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
327 | 332 | "jak","str", "Manner","",[],[];*) |
328 | 333 | "pod","acc", "Manner","",["ALL"],[]; |
329 | 334 | "z","inst", "Manner","",["ALL"],[]; |
335 | + "z","postp", "Manner","",["ALL"],[]; | |
330 | 336 | "dokoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"]; |
331 | 337 | "dookoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"]; |
332 | 338 | "koło","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"]; |
... | ... | @@ -363,10 +369,10 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
363 | 369 | "temu","acc", "Time","",["CZAS"],["CZAS"]; (* dodane *) |
364 | 370 | "za","gen", "Time","",["CZAS"],["CZAS"]; (* dodane *) |
365 | 371 | ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) -> |
366 | - let hipero = Xlist.fold hipero StringSet.empty ENIAMplWordnet.get_hipero_rec in | |
367 | - let map2 = try StringMap.find map lemma with Not_found -> StringMap.empty in | |
368 | - let map2 = StringMap.add_inc map2 case [case,role,role_attr,hipero,sel_prefs] (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l) in | |
369 | - StringMap.add map lemma map2) | |
372 | + let hipero = Xlist.map hipero (fun hipero -> ENIAMwalTypes.Predef hipero) in | |
373 | + let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.Predef sel_prefs) in | |
374 | + StringMap.add_inc map lemma [case,role,role_attr,hipero,sel_prefs] | |
375 | + (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l)) | |
370 | 376 | (* "przeciwko","dat","Dat"; |
371 | 377 | "przeciw","dat","Dat"; |
372 | 378 | "o","acc","Theme"; |
... | ... | @@ -374,14 +380,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
374 | 380 | "według","gen","Manr"; |
375 | 381 | "wobec","gen","Dat";*) |
376 | 382 | |
377 | -let assign_prep_semantics lemma cases t = | |
378 | - try | |
379 | - let map = StringMap.find prep_roles lemma in | |
380 | - let l = List.flatten (Xlist.map cases (fun case -> | |
381 | - try StringMap.find map case with Not_found -> [])) in | |
382 | - if l = [] then Normal else PrepSemantics l | |
383 | - with Not_found -> Normal | |
384 | - | |
383 | +(* | |
385 | 384 | let subst_special_lexemes = Xlist.fold [ |
386 | 385 | "jutro", ["indexical"];(*"dzień"*) |
387 | 386 | "pojutrze", ["indexical"];(*"dzień"*) |
... | ... | @@ -553,3 +552,4 @@ let assign_semantics tokens lex_sems group = |
553 | 552 | {t with semantics=assign_prep_semantics lemma (StringSet.to_list cases) t} |
554 | 553 | | _ -> t in |
555 | 554 | ExtArray.set lex_sems id t) |
555 | +*) | |
... | ... |
lexSemantics/ENIAMlexSemanticsTypes.ml
... | ... | @@ -43,9 +43,12 @@ type frame = { |
43 | 43 | arole: string; |
44 | 44 | arole_attr: string; |
45 | 45 | arev: bool; |
46 | + sopinion: ENIAMwalTypes.opinion; | |
47 | + fopinion: ENIAMwalTypes.opinion; | |
46 | 48 | } |
47 | 49 | |
48 | -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false} | |
50 | +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; | |
51 | + sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} | |
49 | 52 | |
50 | 53 | type lex_sem = { |
51 | 54 | schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * |
... | ... |
lexSemantics/ENIAMplWordnet.ml
... | ... | @@ -26,6 +26,7 @@ let synmap = ref IntMap.empty |
26 | 26 | let ex_hipo = ref IntMap.empty |
27 | 27 | let predef_names = ref IntMap.empty |
28 | 28 | let proper_classes = ref StringMap.empty |
29 | +let predef = ref StringMap.empty | |
29 | 30 | |
30 | 31 | let load_lu filename = |
31 | 32 | File.fold_tab filename (IntMap.empty,StringMap.empty) (fun (lu_names,lumap) -> function |
... | ... | @@ -65,7 +66,7 @@ let syn_id_of_sense sense = |
65 | 66 | | _ -> failwith ("syn_id_of_sense 2: " ^ lemma) |
66 | 67 | |
67 | 68 | let load_predef ex_hipo filename = |
68 | - let ex_hipo,predef_names,_,_ = | |
69 | + let ex_hipo,predef_names,predef,_ = | |
69 | 70 | File.fold_tab filename (ex_hipo,IntMap.empty,StringMap.empty,-1) (fun (ex_hipo,predef_names,predef,id) -> function |
70 | 71 | name :: senses -> |
71 | 72 | let ex_hipo = Xlist.fold senses ex_hipo (fun ex_hipo sense -> |
... | ... | @@ -77,7 +78,7 @@ let load_predef ex_hipo filename = |
77 | 78 | let predef = StringMap.add_inc predef name id (fun _ -> failwith "load_predef 2") in |
78 | 79 | ex_hipo, predef_names, predef, id-1 |
79 | 80 | | l -> failwith ("load_predef: " ^ String.concat "\t" l)) in |
80 | - ex_hipo,predef_names | |
81 | + ex_hipo,predef_names,predef | |
81 | 82 | |
82 | 83 | let rec get_hipero_rec found ex_hipo id cost = |
83 | 84 | let cost2 = try IntMap.find found id with Not_found -> max_int in |
... | ... | @@ -164,8 +165,9 @@ let initialize () = |
164 | 165 | lumap := b; |
165 | 166 | synmap := load_syn syn_filename; |
166 | 167 | ex_hipo := load_ex_hipo ex_hipo_filename; |
167 | - let a,b = load_predef !ex_hipo predef_filename in | |
168 | + let a,b,c = load_predef !ex_hipo predef_filename in | |
168 | 169 | ex_hipo := a; |
169 | 170 | predef_names := b; |
171 | + predef := c; | |
170 | 172 | proper_classes := load_proper_classes proper_classes_filename; |
171 | 173 | () |
... | ... |
lexSemantics/ENIAMvalence.ml
... | ... | @@ -613,3 +613,34 @@ let get_default_valence = function |
613 | 613 | | "adj" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] |
614 | 614 | | "adv" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] |
615 | 615 | | _ -> [] |
616 | + | |
617 | +open ENIAMcategoriesPL | |
618 | + | |
619 | +let get_aroles schema lemma = function | |
620 | + "pact" -> [Xlist.fold schema ([],"Arg","",true) (fun (sel,arole,arole_attr,arev) p -> | |
621 | + if p.gf = SUBJ then sel,p.role,p.role_attr,arev else sel,arole,arole_attr,arev)] | |
622 | + | "ppas" -> [Xlist.fold schema ([],"Arg","",true) (fun (sel,arole,arole_attr,arev) p -> | |
623 | + if p.gf = OBJ then sel,p.role,p.role_attr,arev else sel,arole,arole_attr,arev)] | |
624 | + | "subst" -> [ | |
625 | + [Case,Eq,["dat"]],"Recipent","",false; | |
626 | + [Case,Eq,["inst"]],(if StringSet.mem ENIAMlexSemanticsData.subst_inst_time lemma then "Time" else "Instrument"),"",false; | |
627 | + [Case,Neq,["dat";"inst"]],"","",false] | |
628 | + | "adj" | "adjc" | "adjp" -> (* FIXME czy adjc i adjp mogą być adjunctami? *) | |
629 | + let l = try StringMap.find ENIAMlexSemanticsData.adj_roles lemma with Not_found -> ["Attribute",""] in | |
630 | + Xlist.map l (fun (role,role_attr) -> [],role,role_attr,false) | |
631 | + | "adv" -> | |
632 | + let modes = ENIAMcategoriesPL.adv_mode lemma in | |
633 | + let roles = try StringMap.find ENIAMlexSemanticsData.adv_roles lemma with Not_found -> ["Manner",""] in | |
634 | + Xlist.fold modes [] (fun l -> function | |
635 | + "mod" -> Xlist.fold roles l (fun l (role,role_attr) -> ([Mode,Eq,["mod"]],role,role_attr,false) :: l) | |
636 | + | "abl" -> ([Mode,Eq,["abl"]],"Location","Souce",false) :: l | |
637 | + | "adl" -> ([Mode,Eq,["adl"]],"Location","Goal",false) :: l | |
638 | + | "locat" -> ([Mode,Eq,["locat"]],"Location","",false) :: l | |
639 | + | "perl" -> ([Mode,Eq,["perl"]],"Path","",false) :: l | |
640 | + | "dur" -> ([Mode,Eq,["dur"]],"Duration","",false) :: l | |
641 | + | "temp" -> ([Mode,Eq,["temp"]],"Time","",false) :: l | |
642 | + | _ -> failwith "get_aroles") | |
643 | + | "qub" -> | |
644 | + let l = try StringMap.find ENIAMlexSemanticsData.qub_roles lemma with Not_found -> ["Arg",""] in | |
645 | + Xlist.map l (fun (role,role_attr) -> [],role,role_attr,false) | |
646 | + | _ -> [[],"","",false] | |
... | ... |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -338,3 +338,13 @@ let adv_adjuncts_simp = [ |
338 | 338 | let adv_connected_adjuncts_simp = [ |
339 | 339 | adjunct [Tensor[Atom "advp"; Top]]; |
340 | 340 | ] |
341 | + | |
342 | +let assing_pref_morfs = function | |
343 | + "po","postp" -> [ | |
344 | + LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]); | |
345 | + LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])] | |
346 | + | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])] | |
347 | + | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])] | |
348 | + | _,case -> [ | |
349 | + LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]); | |
350 | + LCG(Tensor[Atom "adjp"; Top; Atom case; Top])] | |
... | ... |
lexSemantics/makefile
... | ... | @@ -6,7 +6,7 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMlexSemanticsTypes.ml ENIAMadjuncts.ml \ | |
9 | +SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMlexSemanticsData.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMlexSemanticsTypes.ml ENIAMadjuncts.ml \ | |
10 | 10 | ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMplWordnet.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml |
11 | 11 | |
12 | 12 | all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa |
... | ... | @@ -14,8 +14,8 @@ all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa |
14 | 14 | install: all |
15 | 15 | mkdir -p $(INSTALLDIR) |
16 | 16 | cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR) |
17 | - cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) | |
18 | - cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) | |
17 | + cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMlexSemanticsData.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) | |
18 | + cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMlexSemanticsData.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) | |
19 | 19 | mkdir -p /usr/share/eniam/lexSemantics |
20 | 20 | cp resources/* /usr/share/eniam/lexSemantics |
21 | 21 | # ln -s /usr/share/eniam/lexSemantics/proper_names_20160104.tab /usr/share/eniam/lexSemantics/proper_names.tab |
... | ... | @@ -24,8 +24,8 @@ install: all |
24 | 24 | install-local: all |
25 | 25 | mkdir -p $(INSTALLDIR) |
26 | 26 | cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR) |
27 | - cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) | |
28 | - cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) | |
27 | + cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMlexSemanticsData.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) | |
28 | + cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMlexSemanticsData.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) | |
29 | 29 | mkdir -p /usr/local/share/eniam/lexSemantics |
30 | 30 | cp resources/* /usr/local/share/eniam/lexSemantics |
31 | 31 | # ln -s /usr/local/share/eniam/lexSemantics/proper_names_20160104.tab /usr/local/share/eniam/lexSemantics/proper_names.tab |
... | ... |
plWordnet/ENIAMplWordnet.ml
1 | 1 | (* |
2 | - * ENIAMplWordnet, an interface for "Słowosieć", a Polish Wordnet. | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
2 | + * ENIAMplWordnet, a converter for Polish Wordnet "Słowosieć". | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | 5 | * |
6 | 6 | * This library is free software: you can redistribute it and/or modify |
7 | 7 | * it under the terms of the GNU Lesser General Public License as published by |
... | ... | @@ -17,177 +17,329 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -let resource_path = | |
21 | - try Sys.getenv "ENIAM_RESOURCE_PATH" | |
22 | - with Not_found -> | |
23 | - if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else | |
24 | - if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else | |
25 | - failwith "resource directory does not exists" | |
20 | +open Xstd | |
21 | +open ENIAMplWordnetTypes | |
26 | 22 | |
27 | -let rzeczownik_filename = resource_path ^ "/plWordnet/rzeczownik.tab" | |
28 | -let czasownik_filename = resource_path ^ "/plWordnet/czasownik.tab" | |
29 | -let przymiotnik_filename = resource_path ^ "/plWordnet/przymiotnik.tab" | |
30 | -let synsets_filename = resource_path ^ "/plWordnet/synsets.tab" | |
31 | -let hipero_filename = resource_path ^ "/plWordnet/hipero.tab" | |
32 | -let predef_filename = resource_path ^ "/plWordnet/predef_prefs.tab" | |
33 | -let proper_classes_filename = resource_path ^ "/plWordnet/proper_classes.tab" | |
23 | +let process_unit = function | |
24 | + Xml.Element("unit-id",[],[Xml.PCData s]) -> int_of_string s, empty_lu | |
25 | + | node -> failwith ("process_unit " ^ (Xml.to_string node)) | |
34 | 26 | |
27 | +let process_tests = function | |
28 | + Xml.Element("test",["text",text;"pos",pos],[]) -> text,pos | |
29 | + | node -> failwith ("process_tests " ^ (Xml.to_string node)) | |
35 | 30 | |
36 | -open Xstd | |
37 | -(* open PreTypes *) | |
38 | - | |
39 | -let load_lu names filename = | |
40 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
41 | - Xlist.fold l (StringMap.empty,names) (fun (lu,names) line -> | |
42 | - if String.length line = 0 then lu,names else | |
43 | - if String.get line 0 = '#' then lu,names else | |
44 | - match Str.split_delim (Str.regexp "\t") line with | |
45 | - [id; lemma; variant] -> | |
46 | - StringMap.add_inc lu lemma [id,variant] (fun l -> (id,variant) :: l), | |
47 | - StringMap.add_inc names id (lemma ^ " " ^ variant) (fun _ -> failwith "load_lu") | |
48 | - | _ -> failwith ("load_lu: " ^ line)) | |
49 | - | |
50 | -let noun_lu,lu_names = load_lu StringMap.empty rzeczownik_filename | |
51 | -let verb_lu,lu_names = load_lu lu_names czasownik_filename | |
52 | -let adj_lu,lu_names = load_lu lu_names przymiotnik_filename | |
53 | - | |
54 | -let load_synsets filename = | |
55 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
56 | - Xlist.fold l (StringMap.empty,StringMap.empty) (fun (syn,names) line -> | |
57 | - if String.length line = 0 then (syn,names) else | |
58 | - if String.get line 0 = '#' then (syn,names) else | |
59 | - match Str.split_delim (Str.regexp "\t") line with | |
60 | - [syn_id; lu_ids] -> | |
61 | - let lu_ids = Str.split_delim (Str.regexp " ") lu_ids in | |
62 | - let syn = Xlist.fold lu_ids syn (fun syn lu_id -> | |
63 | - StringMap.add_inc syn lu_id syn_id (fun _ -> failwith ("load_synsets 1: " ^ lu_id))) in | |
64 | - let lu_id = try List.hd lu_ids with _ -> failwith ("load_synsets 2: " ^ syn_id) in | |
65 | - let name = try StringMap.find lu_names lu_id with Not_found -> "syn_id: " ^ syn_id in (* nieznane synsety są z en wordnetu *) | |
66 | - let names = StringMap.add_inc names syn_id name (fun _ -> failwith ("load_synsets 4: " ^ syn_id)) in | |
67 | - syn,names | |
68 | - | _ -> failwith ("load_synsets 5: " ^ line)) | |
69 | - | |
70 | -let synsets, syn_names = load_synsets synsets_filename | |
71 | - | |
72 | -let load_hipero filename = | |
73 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
74 | - Xlist.fold l StringMap.empty (fun hip line -> | |
75 | - if String.length line = 0 then hip else | |
76 | - if String.get line 0 = '#' then hip else | |
77 | - match Str.split_delim (Str.regexp "\t") line with | |
78 | - [id; ids] -> | |
79 | - let ids = Str.split_delim (Str.regexp " ") ids in | |
80 | - StringMap.add_inc hip id ids (fun _ -> failwith "load_hipero") | |
81 | - | _ -> failwith ("load_hipero: " ^ line)) | |
82 | - | |
83 | -let hipero = load_hipero hipero_filename | |
84 | - | |
85 | -let rec get_lu_id variant = function | |
86 | - (id,v) :: l -> if variant = v then id else get_lu_id variant l | |
87 | - | [] -> failwith "get_lu_id" | |
88 | - | |
89 | -let lu_id_of_sense sense = | |
90 | - let lemma,variant = | |
91 | - match List.rev (Str.split (Str.regexp " ") sense) with | |
92 | -(* [lemma] -> lemma,"" *) | |
93 | - | variant :: l -> String.concat " " (List.rev l), variant | |
94 | - | _ -> failwith "lu_id_of_sense 1" in | |
95 | - if variant = "" then lemma else | |
96 | - let l = try StringMap.find noun_lu lemma with Not_found -> failwith ("lu_id_of_sense 2: " ^ lemma) in | |
97 | - get_lu_id variant l | |
98 | - | |
99 | -let load_predef hipero filename = | |
100 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
101 | - Xlist.fold l (hipero,StringSet.empty) (fun (hipero,predef) line -> | |
102 | - if String.length line = 0 then hipero,predef else | |
103 | - if String.get line 0 = '#' then hipero,predef else | |
104 | - match Str.split_delim (Str.regexp "\t") line with | |
105 | - id :: senses -> | |
106 | - let hipero = Xlist.fold senses hipero (fun hipero sense -> | |
107 | - if StringSet.mem predef sense then StringMap.add_inc hipero sense [id] (fun l -> id :: l) else | |
108 | - let lu_id = lu_id_of_sense sense in | |
109 | - let syn_id = try StringMap.find synsets lu_id with Not_found -> lu_id in | |
110 | - StringMap.add_inc hipero syn_id [id] (fun l -> id :: l)) in | |
111 | - hipero, StringSet.add predef id | |
112 | - | _ -> failwith ("load_predef: " ^ line)) | |
113 | - | |
114 | -let hipero,predef = load_predef hipero predef_filename | |
115 | - | |
116 | -let rec get_hipero_rec found id = | |
117 | - if StringSet.mem found id then found else | |
118 | - let found = StringSet.add found id in | |
119 | - let l = try StringMap.find hipero id with Not_found -> [] in | |
120 | - Xlist.fold l found get_hipero_rec | |
121 | - | |
122 | -let get_hipero lu_id = | |
123 | - let syn_id = StringMap.find synsets lu_id in | |
124 | - StringSet.to_list (get_hipero_rec StringSet.empty syn_id) | |
125 | - | |
126 | -let synset_name id = | |
127 | - if StringSet.mem predef id then id else | |
128 | - try StringMap.find syn_names id with Not_found -> failwith "synset_name" | |
129 | - | |
130 | -let rename_sense sense = | |
131 | - let lu_id = lu_id_of_sense sense in | |
132 | - StringMap.find synsets lu_id | |
133 | - | |
134 | -let load_proper_classes filename = | |
135 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
136 | - Xlist.fold l StringMap.empty (fun map line -> | |
137 | - if String.length line = 0 then map else | |
138 | - if String.get line 0 = '#' then map else | |
139 | - match Str.split_delim (Str.regexp "\t") line with | |
140 | - id :: senses -> | |
141 | - let senses = Xlist.map senses (fun sense -> | |
142 | - match List.rev (Str.split (Str.regexp " ") sense) with | |
143 | - weight :: l -> String.concat " " (List.rev l), (try float_of_string weight with _ -> failwith "load_proper_classes 2") | |
144 | - | _ -> failwith "load_proper_classes 4") in | |
145 | - let senses = Xlist.map senses (fun (sense,weight) -> | |
146 | - let sense = if sense = "antroponim 1" then "nazwa własna 1" else sense in | |
147 | - let sense = if sense = "godzina 4" then "godzina 3" else sense in | |
148 | -(* print_endline sense; *) | |
149 | - let lu_id = lu_id_of_sense sense in | |
150 | - sense,Xlist.map (get_hipero lu_id) synset_name,weight) in | |
151 | - StringMap.add_inc map id senses (fun _ -> failwith ("load_proper_classes 3: " ^ id)) | |
152 | - | _ -> failwith ("load_proper_classes: " ^ line)) | |
153 | - | |
154 | -let proper_classes = load_proper_classes proper_classes_filename | |
155 | - | |
156 | -let simplify_pos = function | |
157 | - "subst" -> "noun" | |
158 | - | "depr" -> "noun" | |
159 | - | "adj" -> "adj" | |
160 | - | "adja" -> "adj" | |
161 | - | "adjc" -> "adj" | |
162 | - | "adjp" -> "adj" | |
163 | - | "ger" -> "verb" | |
164 | - | "pact" -> "verb" | |
165 | - | "ppas" -> "verb" | |
166 | - | "fin" -> "verb" | |
167 | - | "bedzie" -> "verb" | |
168 | - | "praet" -> "verb" | |
169 | - | "winien" -> "verb" | |
170 | - | "impt" -> "verb" | |
171 | - | "imps" -> "verb" | |
172 | - | "inf" -> "verb" | |
173 | - | "pcon" -> "verb" | |
174 | - | "pant" -> "verb" | |
175 | - | "pred" -> "verb" | |
176 | - | s -> s | |
177 | - | |
178 | -let find_senses lemma pos = | |
179 | - (*if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then {t with senses=[lemma,["0"],0.]} else*) (* FIXME: ustalić co z zaimkami *) | |
180 | - let lu = match simplify_pos pos with | |
181 | - "noun" -> noun_lu | |
182 | - | "adj" -> adj_lu | |
183 | - | "verb" -> verb_lu | |
184 | - | _ -> StringMap.empty in | |
185 | - if StringMap.mem lu lemma then | |
186 | - let l = StringMap.find lu lemma in | |
187 | - Xlist.rev_map l (fun (id,variant) -> | |
188 | - lemma ^ " " ^ variant, Xlist.map (get_hipero id) synset_name, log10 (1. /. (try float_of_string variant with _ -> 3.))) | |
189 | - else [] | |
190 | - | |
191 | -let find_proper_senses senses = | |
192 | - List.flatten (Xlist.rev_map senses (fun sense -> | |
193 | - try StringMap.find proper_classes sense with Not_found -> failwith ("find_proper_senses: " ^ sense))) | |
31 | +let process_abstract = function | |
32 | + "true" -> true | |
33 | + | "false" -> false | |
34 | + | _ -> failwith "process_abstract" | |
35 | + | |
36 | +(* funkcja zwraca: | |
37 | +lexical-unit map - wiąże leksemy z identyfikatorami | |
38 | +synset map | |
39 | +lexicalrelations | |
40 | +synsetrelations | |
41 | +relationtypes map | |
42 | +*) | |
43 | +let process_entry (lumap,synmap,lr,sr,rtmap) = function | |
44 | + Xml.Element("lexical-unit",["id",id;"name",name;"pos",pos;"tagcount",tagcount;"domain",domain;"workstate",workstate; | |
45 | + "source",source;"variant",variant],[]) -> | |
46 | + let lumap = IntMap.add_inc lumap (int_of_string id) {lu_id=int_of_string id; lu_name=name; lu_pos=pos; lu_tagcount=tagcount; lu_domain=domain; lu_desc=""; | |
47 | + lu_workstate=workstate; lu_source=source; lu_variant=variant; lu_syn=(-1)} (fun _ -> failwith "process_entry 2") in | |
48 | + lumap,synmap,lr,sr,rtmap | |
49 | + | Xml.Element("lexical-unit",["id",id;"name",name;"pos",pos;"tagcount",tagcount;"domain",domain;"desc",desc;"workstate",workstate; | |
50 | + "source",source;"variant",variant],[]) -> | |
51 | + let lumap = IntMap.add_inc lumap (int_of_string id) {lu_id=int_of_string id; lu_name=name; lu_pos=pos; lu_tagcount=tagcount; lu_domain=domain; lu_desc=desc; | |
52 | + lu_workstate=workstate; lu_source=source; lu_variant=variant; lu_syn=(-1)} (fun _ -> failwith "process_entry 3") in | |
53 | + lumap,synmap,lr,sr,rtmap | |
54 | + | Xml.Element("synset",["id",id;"workstate",workstate;"split",split;"owner",owner;"definition",definition;"desc",desc; | |
55 | + "abstract",abstract],units) -> | |
56 | + let units = Xlist.map units process_unit in | |
57 | + let synmap = IntMap.add_inc synmap (int_of_string id) {syn_workstate=workstate; syn_split=split; syn_owner=owner; syn_definition=definition; | |
58 | + syn_desc=desc; syn_abstract=process_abstract abstract; syn_units=units; syn_pos=""; syn_no_hipo=0; syn_domain=""} (fun _ -> failwith "process_entry 4") in | |
59 | + lumap,synmap,lr,sr,rtmap | |
60 | + | Xml.Element("synset",["id",id;"workstate",workstate;"split",split;"owner",owner;"desc",desc; | |
61 | + "abstract",abstract],units) -> | |
62 | + let units = Xlist.map units process_unit in | |
63 | + let synmap = IntMap.add_inc synmap (int_of_string id) {syn_workstate=workstate; syn_split=split; syn_owner=owner; syn_definition=""; | |
64 | + syn_desc=desc; syn_abstract=process_abstract abstract; syn_units=units; syn_pos=""; syn_no_hipo=0; syn_domain=""} (fun _ -> failwith "process_entry 4") in | |
65 | + lumap,synmap,lr,sr,rtmap | |
66 | + | Xml.Element("lexicalrelations",["parent",parent;"child",child;"relation",relation;"valid",valid;"owner",owner],[]) -> | |
67 | + let lr = {r_parent=int_of_string parent; r_child=int_of_string child; r_relation=int_of_string relation; r_valid=valid; r_owner=owner} :: lr in | |
68 | + lumap,synmap,lr,sr,rtmap | |
69 | + | Xml.Element("synsetrelations",["parent",parent;"child",child;"relation",relation;"valid",valid;"owner",owner],[]) -> | |
70 | + let sr = {r_parent=int_of_string parent; r_child=int_of_string child; r_relation=int_of_string relation; r_valid=valid; r_owner=owner} :: sr in | |
71 | + lumap,synmap,lr,sr,rtmap | |
72 | + | Xml.Element("relationtypes",["id",id;"type",typ;"reverse",reverse;"name",name;"description",description; | |
73 | + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse; | |
74 | + "pwn",pwn],tests) -> | |
75 | + let tests = Xlist.map tests process_tests in | |
76 | + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=int_of_string reverse; rt_name=name; rt_description=description; | |
77 | + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests} | |
78 | + (fun _ -> failwith "process_entry 5") in | |
79 | + lumap,synmap,lr,sr,rtmap | |
80 | + | Xml.Element("relationtypes",["id",id;"type",typ;"name",name;"description",description; | |
81 | + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse; | |
82 | + "pwn",pwn],tests) -> | |
83 | + let tests = Xlist.map tests process_tests in | |
84 | + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=(-1); rt_name=name; rt_description=description; | |
85 | + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests} | |
86 | + (fun _ -> failwith "process_entry 5") in | |
87 | + lumap,synmap,lr,sr,rtmap | |
88 | + | Xml.Element("relationtypes",["id",id;"type",typ;"parent",parent;"reverse",reverse;"name",name;"description",description; | |
89 | + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse; | |
90 | + "pwn",pwn],tests) -> | |
91 | + let tests = Xlist.map tests process_tests in | |
92 | + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=int_of_string reverse; rt_name=name; rt_description=description; | |
93 | + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests} | |
94 | + (fun _ -> failwith "process_entry 5") in | |
95 | + lumap,synmap,lr,sr,rtmap | |
96 | + | Xml.Element("relationtypes",["id",id;"type",typ;"parent",parent;"name",name;"description",description; | |
97 | + "posstr",posstr;"display",display;"shortcut",shortcut;"autoreverse",autoreverse; | |
98 | + "pwn",pwn],tests) -> | |
99 | + let tests = Xlist.map tests process_tests in | |
100 | + let rtmap = IntMap.add_inc rtmap (int_of_string id) {rt_type=typ; rt_reverse=(-1); rt_name=name; rt_description=description; | |
101 | + rt_posstr=posstr; rt_display=display; rt_shortcut=shortcut; rt_autoreverse=autoreverse; rt_pwn=pwn; rt_tests=tests} | |
102 | + (fun _ -> failwith "process_entry 5") in | |
103 | + lumap,synmap,lr,sr,rtmap | |
104 | + | node -> print_endline (Xml.to_string node); failwith "process_entry 1" | |
105 | + | |
106 | +let load_data filename = | |
107 | + match try Xml.parse_file filename with Xml.Error e -> failwith ("load_data Xml.Error " ^ Xml.error e) with | |
108 | + Xml.Element("array-list",_,entries) -> | |
109 | + Xlist.fold entries (IntMap.empty,IntMap.empty,[],[],IntMap.empty) process_entry | |
110 | + | node -> failwith ("load_data " ^ (Xml.to_string node)) | |
111 | + | |
112 | +let check_lu_syn_consistency lumap synmap = | |
113 | + let set = IntMap.fold lumap IntSet.empty (fun set id _ -> | |
114 | + if IntSet.mem set id then failwith "check_lu_syn_consistency 1" else | |
115 | + IntSet.add set id) in | |
116 | + let set = IntMap.fold synmap set (fun set _ syn -> | |
117 | + Xlist.fold syn.syn_units set (fun set (id,_) -> | |
118 | + if not (IntSet.mem set id) then failwith "check_lu_syn_consistency 2" else | |
119 | + IntSet.remove set id)) in | |
120 | + if not (IntSet.is_empty set) then failwith "check_lu_syn_consistency 3" else | |
121 | + () | |
122 | + | |
123 | +let merge_lu_syn lumap synmap = | |
124 | + IntMap.map synmap (fun syn -> | |
125 | + let units = Xlist.map syn.syn_units (fun (id,_) -> id, IntMap.find lumap id) in | |
126 | + let pos = match StringSet.to_list (Xlist.fold units StringSet.empty (fun set (_,lu) -> | |
127 | + StringSet.add set lu.lu_pos)) with | |
128 | + [] -> failwith "merge_lu_syn: empty synset" | |
129 | + | [pos] -> pos | |
130 | + | _ -> failwith "merge_lu_syn: inconsistent pos" in | |
131 | + {syn with syn_units=units; syn_pos=pos}) | |
132 | + | |
133 | +let set_lu_syn lumap synmap = | |
134 | + IntMap.fold synmap lumap (fun lumap syn_id syn -> | |
135 | + Xlist.fold syn.syn_units lumap (fun lumap (id,_) -> | |
136 | + let lu = try IntMap.find lumap id with Not_found -> failwith "set_lu_syn" in | |
137 | + if lu.lu_syn <> -1 then failwith "set_lu_syn" else | |
138 | + IntMap.add lumap id {lu with lu_syn=syn_id})) | |
139 | + | |
140 | +let count_relations qmap rtmap rels = | |
141 | + Xlist.fold rels qmap (fun qmap rel -> | |
142 | + if not (IntMap.mem rtmap rel.r_relation) then print_endline ("unknown relation: " ^ string_of_int rel.r_relation); | |
143 | + IntQMap.add qmap rel.r_relation) | |
144 | + | |
145 | +let lu_name lu = | |
146 | + lu.lu_name ^ "-" ^ lu.lu_variant | |
147 | + | |
148 | +let syn_name syn = | |
149 | + String.concat ", " (Xlist.map syn.syn_units (fun (_,lu) -> lu_name lu)) | |
150 | + | |
151 | +let syn_name_single syn = | |
152 | + if syn.syn_units = [] then "empty" else | |
153 | + lu_name (snd (List.hd syn.syn_units)) | |
154 | + | |
155 | +let pwn_pos = ["czasownik pwn"; "przymiotnik pwn"; "przysłówek pwn"; "rzeczownik pwn"] | |
156 | + | |
157 | +let is_pwn_lu lu = | |
158 | + Xlist.mem pwn_pos lu.lu_pos | |
159 | + | |
160 | +let is_pwn_syn syn = | |
161 | + Xlist.mem pwn_pos syn.syn_pos | |
162 | + | |
163 | +let get_pos_lu lu = lu.lu_pos | |
164 | +let get_pos_syn syn = syn.syn_pos | |
165 | + | |
166 | +let add_pwn_qmap map rel parent child = | |
167 | + let s = Printf.sprintf "%s-%s" parent child in | |
168 | + IntMap.add_inc map rel.r_relation (StringQMap.add StringQMap.empty s) (fun qmap -> StringQMap.add qmap s) | |
169 | + | |
170 | +let test_pwn_elem is_pwn_fun map elem = | |
171 | + try | |
172 | + if is_pwn_fun (IntMap.find map elem) then "en" else "pl" | |
173 | + with Not_found -> "NF" | |
174 | + | |
175 | +let test_pos_elem get_pos_fun map elem = | |
176 | + try | |
177 | + get_pos_fun (IntMap.find map elem) | |
178 | + with Not_found -> "NF" | |
179 | + | |
180 | +let count_pwn_relation qmap lumap synmap rtmap rels t = | |
181 | + Xlist.fold rels qmap (fun qmap rel -> | |
182 | + match (*(IntMap.find rtmap rel.r_relation).rt_type,*)t with | |
183 | + (*"relacja pomiędzy synsetami",*)"sr" -> add_pwn_qmap qmap rel (test_pwn_elem is_pwn_syn synmap rel.r_parent) (test_pwn_elem is_pwn_syn synmap rel.r_child) | |
184 | + | (*"relacja leksykalna",*)"lr" -> add_pwn_qmap qmap rel (test_pwn_elem is_pwn_lu lumap rel.r_parent) (test_pwn_elem is_pwn_lu lumap rel.r_child) | |
185 | + (* | "relacja synonimii" -> qmap *) | |
186 | + | _ -> failwith "count_pwn_relation") | |
187 | + | |
188 | +let count_pos_relation qmap lumap synmap rtmap rels t = | |
189 | + Xlist.fold rels qmap (fun qmap rel -> | |
190 | + match (*(IntMap.find rtmap rel.r_relation).rt_type,*)t with | |
191 | + (*"relacja pomiędzy synsetami",*)"sr" -> add_pwn_qmap qmap rel (test_pos_elem get_pos_syn synmap rel.r_parent) (test_pos_elem get_pos_syn synmap rel.r_child) | |
192 | + | (*"relacja leksykalna",*)"lr" -> add_pwn_qmap qmap rel (test_pos_elem get_pos_lu lumap rel.r_parent) (test_pos_elem get_pos_lu lumap rel.r_child) | |
193 | + (* | "relacja synonimii" -> qmap *) | |
194 | + | _ -> failwith "count_pwn_relation") | |
195 | + | |
196 | +let select_plWordnet lumap synmap lr sr rtmap = | |
197 | + let lr = Xlist.fold lr [] (fun lr rel -> | |
198 | + if test_pwn_elem is_pwn_lu lumap rel.r_parent = "pl" && | |
199 | + test_pwn_elem is_pwn_lu lumap rel.r_child = "pl" && | |
200 | + IntSet.mem pl_pl_relations rel.r_relation then rel :: lr else lr) in | |
201 | + let sr = Xlist.fold sr [] (fun sr rel -> | |
202 | + if test_pwn_elem is_pwn_syn synmap rel.r_parent = "pl" && | |
203 | + test_pwn_elem is_pwn_syn synmap rel.r_child = "pl" && | |
204 | + IntSet.mem pl_pl_relations rel.r_relation then rel :: sr else sr) in | |
205 | + let lumap = IntMap.fold lumap IntMap.empty (fun lumap id lu -> | |
206 | + if is_pwn_lu lu then lumap else IntMap.add lumap id lu) in | |
207 | + let synmap = IntMap.fold synmap IntMap.empty (fun synmap id syn -> | |
208 | + if is_pwn_syn syn then synmap else IntMap.add synmap id syn) in | |
209 | + let rtmap = IntMap.fold rtmap IntMap.empty (fun rtmap id rt -> | |
210 | + if IntSet.mem pl_pl_relations id then IntMap.add rtmap id rt else rtmap) in | |
211 | + lumap,synmap,lr,sr,rtmap | |
212 | + | |
213 | +let create_relation_map rel_id rels = | |
214 | + Xlist.fold rels Relation.empty (fun graph r -> | |
215 | + if r.r_relation = rel_id then | |
216 | + Relation.add_new graph r.r_parent r.r_child 0 | |
217 | + else graph) | |
218 | + | |
219 | +let create_relation_maps rel_maps rels = | |
220 | + Xlist.fold rels rel_maps (fun graphs r -> | |
221 | + let graph = try IntMap.find graphs r.r_relation with Not_found -> Relation.empty in | |
222 | + let graph = Relation.add_new graph r.r_parent r.r_child 0 in | |
223 | + IntMap.add graphs r.r_relation graph) | |
224 | + | |
225 | +let create_relation_map_lex lumap rel_id rels = | |
226 | + Xlist.fold rels Relation.empty (fun graph r -> | |
227 | + if r.r_relation = rel_id then | |
228 | + let parent = (IntMap.find lumap r.r_parent).lu_syn in | |
229 | + let child = (IntMap.find lumap r.r_child).lu_syn in | |
230 | + Relation.add graph parent child 0 | |
231 | + else graph) | |
232 | + | |
233 | +let create_relation_maps_lex rel_maps lumap rels = | |
234 | + Xlist.fold rels rel_maps (fun graphs r -> | |
235 | + let graph = try IntMap.find graphs r.r_relation with Not_found -> Relation.empty in | |
236 | + let parent = (IntMap.find lumap r.r_parent).lu_syn in | |
237 | + let child = (IntMap.find lumap r.r_child).lu_syn in | |
238 | + let graph = Relation.add graph parent child 0 in | |
239 | + IntMap.add graphs r.r_relation graph) | |
240 | + | |
241 | +let assign_no_hipo synmap hipo = | |
242 | + IntMap.mapi synmap (fun id syn -> | |
243 | + {syn with syn_no_hipo=IntSet.size (Relation.find_descendants hipo id)}) | |
244 | + | |
245 | +let check_rel_class_coverage rel_maps rel_sets = | |
246 | + let set = Xlist.fold (List.tl rel_sets) (List.hd rel_sets) IntSet.union in | |
247 | + IntMap.iter rel_maps (fun rel_id _ -> | |
248 | + if not (IntSet.mem set rel_id) then Printf.printf "only in rel_maps: %d\n" rel_id); | |
249 | + IntSet.iter set (fun rel_id -> | |
250 | + if not (IntMap.mem rel_maps rel_id) then Printf.printf "only in rel_sets: %d\n" rel_id) | |
251 | + | |
252 | +let get_syn_id synmap lu_name lu_variant = | |
253 | + let found = IntMap.fold synmap [] (fun found id syn -> | |
254 | + Xlist.fold syn.syn_units found (fun found (_,lu) -> | |
255 | + if lu.lu_name = lu_name && lu.lu_variant = lu_variant then | |
256 | + id :: found else found)) in | |
257 | + match found with | |
258 | + [] -> failwith "get_syn_id: not found" | |
259 | + | [id] -> id | |
260 | + | _ -> failwith "get_syn_id: multiple id found" | |
261 | + | |
262 | +let add_relations rel_maps rev_rel_maps ex_hipo relations = | |
263 | + Xlist.fold relations ex_hipo (fun ex_hipo (cost,dir,rel_ids) -> | |
264 | + Xlist.fold rel_ids ex_hipo (fun ex_hipo rel_id -> | |
265 | + let graph = IntMap.find (if dir = Straight then rel_maps else rev_rel_maps) rel_id in | |
266 | + IntMap.fold graph ex_hipo (fun ex_hipo parent children -> | |
267 | + IntMap.fold children ex_hipo (fun ex_hipo child _ -> | |
268 | + Relation.add_inc ex_hipo parent child cost min)))) | |
269 | + | |
270 | +let add_hipo_extensions synmap rel_maps rev_rel_maps ex_hipo hipo_extensions = | |
271 | + Xlist.fold hipo_extensions ex_hipo (fun ex_hipo (cost,lu_name,lu_variant,dir,rel_ids) -> | |
272 | + let hiper_id = get_syn_id synmap lu_name lu_variant in | |
273 | + Xlist.fold rel_ids ex_hipo (fun ex_hipo rel_id -> | |
274 | + let graph = IntMap.find (if dir = Parent then rel_maps else rev_rel_maps) rel_id in | |
275 | + IntMap.fold graph ex_hipo (fun ex_hipo hipo_id _ -> | |
276 | + Relation.add_inc ex_hipo hipo_id hiper_id cost min))) | |
277 | + | |
278 | +let add_hipo_extensions2 synmap ex_hipo hipo_extensions = | |
279 | + Xlist.fold hipo_extensions ex_hipo (fun ex_hipo (cost,lu_name,lu_variant,poss) -> | |
280 | + let hiper_id = get_syn_id synmap lu_name lu_variant in | |
281 | + IntMap.fold synmap ex_hipo (fun ex_hipo hipo_id syn -> | |
282 | + if Xlist.mem poss syn.syn_pos then Relation.add_inc ex_hipo hipo_id hiper_id cost min else ex_hipo)) | |
283 | + | |
284 | +let create_ex_hipo synmap rel_maps rev_rel_maps = | |
285 | + let ex_hipo = add_relations rel_maps rev_rel_maps IntMap.empty hipo_relations in | |
286 | + let ex_hipo = add_hipo_extensions synmap rel_maps rev_rel_maps ex_hipo hipo_extensions in | |
287 | + let ex_hipo = add_hipo_extensions2 synmap ex_hipo hipo_extensions2 in | |
288 | + ex_hipo | |
289 | + | |
290 | +let rec get_hipero_rec found ex_hipo id cost = | |
291 | + let cost2 = try IntMap.find found id with Not_found -> max_int in | |
292 | + if cost2 <= cost || cost > 7 then found else | |
293 | + let found = IntMap.add found id cost in | |
294 | + let map = try IntMap.find ex_hipo id with Not_found -> IntMap.empty in | |
295 | + IntMap.fold map found (fun found id2 cost2 -> | |
296 | + get_hipero_rec found ex_hipo id2 (cost + cost2)) | |
297 | + | |
298 | +let get_hipero ex_hipo syn_id = | |
299 | + get_hipero_rec IntMap.empty ex_hipo syn_id 0 | |
300 | + | |
301 | +let select_big_synsets synmap threshold = | |
302 | + IntMap.fold synmap IntSet.empty (fun selected id syn -> | |
303 | + if syn.syn_no_hipo >= threshold then IntSet.add selected id else selected) | |
304 | + | |
305 | +let print_subtree synmap ex_hipo path lu_name lu_variant = | |
306 | + let syn_id = get_syn_id synmap lu_name lu_variant in | |
307 | + let tree = Relation.descendants_tree ex_hipo syn_id 0 in | |
308 | + File.file_out (path ^ lu_name ^ "-" ^ lu_variant ^ ".txt") (fun file -> | |
309 | + Relation.print_tree file tree (fun syn_id cost -> | |
310 | + let syn = IntMap.find synmap syn_id in | |
311 | + let abstract = if syn.syn_abstract then "*" else "" in | |
312 | + Printf.sprintf "%d %s%s" syn.syn_no_hipo abstract (syn_name syn))); | |
313 | + File.file_out (path ^ lu_name ^ "-" ^ lu_variant ^ ".xml") (fun file -> | |
314 | + Relation.print_tree_xml file tree (fun syn_id cost -> | |
315 | + let syn = IntMap.find synmap syn_id in | |
316 | + ["name",syn_name syn; | |
317 | + "size",string_of_int syn.syn_no_hipo] @ | |
318 | + (if syn.syn_abstract then ["abstract","true"] else []))) | |
319 | + | |
320 | +(* w semimport/plWordnet.ml była jeszcze procedura wypisująca poddrzewa słowosieci scalone z Walentym *) | |
321 | + | |
322 | +let print_subtree_graph synmap hipo path lu_name lu_variant threshold = | |
323 | + let syn_id = get_syn_id synmap lu_name lu_variant in | |
324 | + let big = select_big_synsets synmap threshold in | |
325 | + let hipo = Relation.select hipo (fun parent child cost -> IntSet.mem big parent && IntSet.mem big child) in | |
326 | + let descendants = Relation.find_descendants hipo syn_id in | |
327 | + let hipo2 = Relation.select hipo (fun parent child cost -> IntSet.mem descendants parent || IntSet.mem descendants child) in | |
328 | + Relation.print_graph path (lu_name ^ "-" ^ lu_variant) true hipo2 (fun id -> | |
329 | + let syn = IntMap.find synmap id in | |
330 | + Printf.sprintf "%s\\n%d" (syn_name_single syn) syn.syn_no_hipo) (fun _ -> "") | |
331 | + | |
332 | +let rt_names = ["type"; "reverse"; "name"; "description"; "posstr"; "display"; "shortcut"; "autoreverse"; "pwn"; "tests"] | |
333 | + | |
334 | +let string_of_tests tests = | |
335 | + String.concat " " (Xlist.map tests (fun (t,p) -> "(" ^ t ^ "," ^ p ^ ")")) | |
336 | + | |
337 | +let string_of_rt rt = | |
338 | + Printf.sprintf "\"%s\";\"%d\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\";\"%s\"" rt.rt_type rt.rt_reverse rt.rt_name rt.rt_description rt.rt_posstr | |
339 | + rt.rt_display rt.rt_shortcut rt.rt_autoreverse rt.rt_pwn (string_of_tests rt.rt_tests) | |
340 | + | |
341 | +let print_rt_map filename rel_count rtmap = | |
342 | + File.file_out filename (fun file -> | |
343 | + Printf.fprintf file "id;quantity;%s\n" (String.concat ";" rt_names); | |
344 | + IntMap.iter rtmap (fun id rt -> | |
345 | + Printf.fprintf file "%d;%d;%s\n" id (try IntQMap.find rel_count id with Not_found -> 0) (string_of_rt rt))) | |
... | ... |
slowosiec/ENIAMplWordnetAnalyze.ml renamed to plWordnet/ENIAMplWordnetAnalyze.ml
slowosiec/ENIAMplWordnetGenerate.ml renamed to plWordnet/ENIAMplWordnetGenerate.ml
slowosiec/ENIAMplWordnetTypes.ml renamed to plWordnet/ENIAMplWordnetTypes.ml
plWordnet/README
1 | 1 | ENIAMplWordnet Version 1.0 : |
2 | 2 | ----------------------- |
3 | 3 | |
4 | -ENIAMplWordnet is a library that provides an interface for | |
5 | -"Słowosieć", a Polish Wordnet. | |
4 | +ENIAMplWordnet is a library that converts | |
5 | +Polish Wordnet "Słowosieć" into format used by ENIAM. | |
6 | 6 | |
7 | 7 | Install |
8 | 8 | ------- |
9 | 9 | |
10 | 10 | ENIAMplWordnet requires OCaml version 4.02.3 compiler |
11 | -together with Xlib library version 3.1 or later. | |
11 | +together with Xlib library version 3.2. | |
12 | 12 | |
13 | -In order to install type: | |
13 | +In order to use ENIAMplWordnet you must first download plWordnet. | |
14 | 14 | |
15 | -make install | |
15 | +Then, compile ENIAMplWordnet: | |
16 | 16 | |
17 | -by default, ENIAMplWordnet is installed in the 'ocamlc -where'/eniam directory. | |
18 | -you can change it by editing the Makefile. | |
17 | +make | |
19 | 18 | |
20 | -In order to test library type: | |
21 | -make test | |
22 | -./test | |
19 | +convert plWordnet: | |
23 | 20 | |
24 | -By default ENIAMplWordnet looks for resources in /usr/share/eniam directory. | |
25 | -However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH | |
26 | -environment variable. | |
21 | +./converter <plWordnet_file> | |
27 | 22 | |
28 | -Credits | |
29 | -------- | |
30 | -Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
31 | -Copyright © 2016 Institute of Computer Science Polish Academy of Sciences | |
23 | +install converted plWordnet: | |
32 | 24 | |
33 | -The library uses the following licensed resources: | |
25 | +make install | |
34 | 26 | |
35 | -plWordNet 2.1 © 2013 by Wrocław University of Technology. All rights reserved. | |
27 | +remove temporary files: | |
36 | 28 | |
29 | +make clean | |
30 | + | |
31 | +Credits | |
32 | +------- | |
33 | +Copyright © 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
34 | +Copyright © 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
37 | 35 | |
38 | 36 | Licence |
39 | 37 | ------- |
... | ... |
plWordnet/makefile
1 | 1 | OCAMLC=ocamlc |
2 | 2 | OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam | |
5 | -OCAMLFLAGS=$(INCLUDES) -g | |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-plWordnet.cmxa | |
7 | -INSTALLDIR=`ocamlc -where`/eniam | |
4 | +INCLUDES=-I +extlib -I +xml-light -I +gsl -I +xlib -I +zip -I +bz2 | |
5 | +OCAMLFLAGS=$(INCLUDES) | |
6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa | |
8 | 7 | |
9 | -SOURCES= ENIAMplWordnet.ml | |
8 | +SOURCES=relation.ml ENIAMplWordnetTypes.ml ENIAMplWordnet.ml | |
10 | 9 | |
11 | -all: eniam-plWordnet.cma eniam-plWordnet.cmxa | |
10 | +all: $(SOURCES) ENIAMplWordnetGenerate.ml | |
11 | + mkdir -p resources | |
12 | + mkdir -p results | |
13 | + $(OCAMLOPT) -o converter $(OCAMLOPTFLAGS) $^ | |
12 | 14 | |
13 | -install: all | |
14 | - mkdir -p $(INSTALLDIR) | |
15 | - cp eniam-plWordnet.cmxa eniam-plWordnet.a eniam-plWordnet.cma $(INSTALLDIR) | |
16 | - cp ENIAMplWordnet.cmi $(INSTALLDIR) | |
17 | - cp ENIAMplWordnet.cmx $(INSTALLDIR) | |
15 | +analyze: $(SOURCES) ENIAMplWordnetAnalyze.ml | |
16 | + mkdir -p results/rels | |
17 | + $(OCAMLOPT) -o analyze $(OCAMLOPTFLAGS) $^ | |
18 | + | |
19 | +install: | |
18 | 20 | mkdir -p /usr/share/eniam/plWordnet |
19 | 21 | cp resources/* /usr/share/eniam/plWordnet |
20 | 22 | |
21 | -install-local: all | |
22 | - mkdir -p $(INSTALLDIR) | |
23 | - cp eniam-plWordnet.cmxa eniam-plWordnet.a eniam-plWordnet.cma $(INSTALLDIR) | |
24 | - cp ENIAMplWordnet.cmi $(INSTALLDIR) | |
25 | - cp ENIAMplWordnet.cmx $(INSTALLDIR) | |
23 | +install-local: | |
26 | 24 | mkdir -p /usr/local/share/eniam/plWordnet |
27 | 25 | cp resources/* /usr/local/share/eniam/plWordnet |
28 | 26 | |
29 | -eniam-plWordnet.cma: $(SOURCES) | |
30 | - ocamlc -linkall -a -o eniam-plWordnet.cma $(OCAMLFLAGS) $^ | |
31 | - | |
32 | -eniam-plWordnet.cmxa: $(SOURCES) | |
33 | - ocamlopt -linkall -a -o eniam-plWordnet.cmxa $(INCLUDES) $^ | |
34 | - | |
35 | -test: test.ml | |
36 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml | |
37 | - | |
38 | 27 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
39 | 28 | |
40 | 29 | .mll.ml: |
... | ... | @@ -55,5 +44,11 @@ test: test.ml |
55 | 44 | .ml.cmx: |
56 | 45 | $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< |
57 | 46 | |
47 | +xlib.cmxa: | |
48 | + cd xlib; make $@ | |
49 | + | |
50 | +xlib.cma: | |
51 | + cd xlib; make $@ | |
52 | + | |
58 | 53 | clean: |
59 | - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test | |
54 | + rm -f *~ *.cm[oix] *.o analyze converter resources/* results/* | |
... | ... |
slowosiec/relation.ml renamed to plWordnet/relation.ml
plWordnet/resources/README deleted