Commit fb5972b2d30b5e00f78ccfb8febd526c895dd0c3
1 parent
3db2202f
argumenty semantyczne w lexSemantics
Showing
8 changed files
with
146 additions
and
180 deletions
LCGlexicon/ENIAM_LCGlexiconTypes.ml
@@ -101,3 +101,4 @@ let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral. | @@ -101,3 +101,4 @@ let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral. | ||
101 | let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat" | 101 | let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat" |
102 | 102 | ||
103 | let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab" | 103 | let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab" |
104 | +let num_nsems_filename = resource_path ^ "/LCGlexicon/num_nsems.tab" |
LCGlexicon/ENIAMcategoriesPL.ml
@@ -91,9 +91,17 @@ let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; | @@ -91,9 +91,17 @@ let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; | ||
91 | let load_adv_modes filename adv_modes = | 91 | let load_adv_modes filename adv_modes = |
92 | File.fold_tab filename adv_modes (fun adv_modes -> function | 92 | File.fold_tab filename adv_modes (fun adv_modes -> function |
93 | [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l) | 93 | [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l) |
94 | - | _ -> failwith "adv_modes") | 94 | + | _ -> failwith "load_adv_modes") |
95 | + | ||
96 | +let load_num_nsems filename num_nsems = | ||
97 | + File.fold_tab filename num_nsems (fun num_nsems -> function | ||
98 | + lemma :: _ :: nsems :: _ -> | ||
99 | + Xlist.fold (Xstring.split "," nsems) num_nsems (fun num_nsems nsem -> | ||
100 | + StringMap.add_inc num_nsems lemma [nsem] (fun l -> nsem :: l)) | ||
101 | + | _ -> failwith "load_num_nsems") | ||
95 | 102 | ||
96 | let adv_modes = ref (StringMap.empty : string list StringMap.t) | 103 | let adv_modes = ref (StringMap.empty : string list StringMap.t) |
104 | +let num_nsems = ref (StringMap.empty : string list StringMap.t) | ||
97 | 105 | ||
98 | let initialize () = | 106 | let initialize () = |
99 | subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty; | 107 | subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty; |
@@ -102,6 +110,7 @@ let initialize () = | @@ -102,6 +110,7 @@ let initialize () = | ||
102 | subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty; | 110 | subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty; |
103 | subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty; | 111 | subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty; |
104 | adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty; | 112 | adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty; |
113 | + num_nsems := File.catch_no_file (load_num_nsems num_nsems_filename) StringMap.empty; | ||
105 | () | 114 | () |
106 | 115 | ||
107 | let noun_type proper lemma pos = | 116 | let noun_type proper lemma pos = |
@@ -126,6 +135,12 @@ let adv_mode lemma = | @@ -126,6 +135,12 @@ let adv_mode lemma = | ||
126 | StringMap.find !adv_modes lemma | 135 | StringMap.find !adv_modes lemma |
127 | with Not_found -> ["mod"] | 136 | with Not_found -> ["mod"] |
128 | 137 | ||
138 | +let num_nsem lemma = | ||
139 | + try | ||
140 | + StringMap.find !num_nsems lemma | ||
141 | + with Not_found -> failwith ("num_nsem: " ^ lemma) | ||
142 | + | ||
143 | + | ||
129 | let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] | 144 | let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] |
130 | 145 | ||
131 | let clarify_categories proper cat = function | 146 | let clarify_categories proper cat = function |
@@ -195,20 +210,21 @@ let clarify_categories proper cat = function | @@ -195,20 +210,21 @@ let clarify_categories proper cat = function | ||
195 | let numbers = expand_numbers numbers in | 210 | let numbers = expand_numbers numbers in |
196 | let cases = expand_cases cases in | 211 | let cases = expand_cases cases in |
197 | let genders = expand_genders genders in | 212 | let genders = expand_genders genders in |
198 | - [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms}] | 213 | + let nsem = num_nsem lemma in |
214 | + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}] | ||
199 | | lemma,"numc",[] -> [] | 215 | | lemma,"numc",[] -> [] |
200 | | lemma,"intnum",[] -> | 216 | | lemma,"intnum",[] -> |
201 | let numbers,acms = | 217 | let numbers,acms = |
202 | if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else | 218 | if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else |
203 | let s = String.get lemma (String.length lemma - 1) in | 219 | let s = String.get lemma (String.length lemma - 1) in |
204 | ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in | 220 | ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in |
205 | - [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms}] | 221 | + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}] |
206 | | lemma,"realnum",[] -> | 222 | | lemma,"realnum",[] -> |
207 | - [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]}] | 223 | + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] |
208 | | lemma,"intnum-interval",[] -> | 224 | | lemma,"intnum-interval",[] -> |
209 | - [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]}] | 225 | + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}] |
210 | | lemma,"realnum-interval",[] -> | 226 | | lemma,"realnum-interval",[] -> |
211 | - [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]}] | 227 | + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}] |
212 | | lemma,"symbol",[] -> | 228 | | lemma,"symbol",[] -> |
213 | [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] | 229 | [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] |
214 | | lemma,"ordnum",[] -> | 230 | | lemma,"ordnum",[] -> |
@@ -515,11 +531,11 @@ let pos_categories = Xlist.fold [ | @@ -515,11 +531,11 @@ let pos_categories = Xlist.fold [ | ||
515 | "siebie",[Lemma;Number;Case;Gender;Person;]; | 531 | "siebie",[Lemma;Number;Case;Gender;Person;]; |
516 | "prep",[Lemma;Cat;Case;]; | 532 | "prep",[Lemma;Cat;Case;]; |
517 | "compar",[Lemma;Cat;Case;]; | 533 | "compar",[Lemma;Cat;Case;]; |
518 | - "num",[Lemma;Number;Case;Gender;Person;Acm;]; | ||
519 | - "intnum",[Lemma;Number;Case;Gender;Person;Acm;]; | ||
520 | - "realnum",[Lemma;Number;Case;Gender;Person;Acm;]; | ||
521 | - "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;]; | ||
522 | - "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;]; | 534 | + "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; |
535 | + "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | ||
536 | + "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | ||
537 | + "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | ||
538 | + "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;]; | ||
523 | "symbol",[Lemma;Number;Case;Gender;Person;]; | 539 | "symbol",[Lemma;Number;Case;Gender;Person;]; |
524 | "ordnum",[Lemma;Number;Case;Gender;Grad;]; | 540 | "ordnum",[Lemma;Number;Case;Gender;Grad;]; |
525 | "date",[Lemma;Nsyn;Nsem;]; | 541 | "date",[Lemma;Nsyn;Nsem;]; |
LCGlexicon/TODO
LCGlexicon/resources/num.tab
0 → 100644
1 | +zero num count 0 | ||
2 | +jeden num count 1 stress | ||
3 | +dwa num count 2 | ||
4 | +trzy num count 3 | ||
5 | +cztery num count 4 | ||
6 | +pięć num count 5 | ||
7 | +sześć num count 6 | ||
8 | +siedem num count 7 | ||
9 | +osiem num count 8 | ||
10 | +dziewięć num count 9 | ||
11 | +dziesięć num count 10 | ||
12 | +jedenaście num count 11 | ||
13 | +dwanaście num count 12 | ||
14 | +trzynaście num count 13 | ||
15 | +czternaście num count 14 | ||
16 | +piętnaście num count 15 | ||
17 | +szesnaście num count 16 | ||
18 | +siedemnaście num count 17 | ||
19 | +osiemnaście num count 18 | ||
20 | +dziewiętnaście num count 19 | ||
21 | +dwadzieścia num count 20 | ||
22 | +trzydzieści num count 30 | ||
23 | +czterdzieści num count 40 | ||
24 | +pięćdziesiąt num count 50 | ||
25 | +sześćdziesiąt num count 60 | ||
26 | +siedemdziesiąt num count 70 | ||
27 | +osiemdziesiąt num count 80 | ||
28 | +dziewięćdziesiąt num count 90 | ||
29 | +sto num count 100 | ||
30 | +dwieście num count 200 | ||
31 | +trzysta num count 300 | ||
32 | +czterysta num count 400 | ||
33 | +pięćset num count 500 | ||
34 | +sześćset num count 600 | ||
35 | +siedemset num count 700 | ||
36 | +osiemset num count 800 | ||
37 | +dziewięćset num count 900 | ||
38 | +tysiąc num count 1000 | ||
39 | +milion num count 1000000 | ||
40 | +miliard num count 1000000000 | ||
41 | +bilion num count 1000000000000 | ||
42 | +biliard num count 1000000000000000 | ||
43 | +trylion num count 1E+018 | ||
44 | +tryliard num count 1E+021 | ||
45 | +kwadrylion num count 1E+024 | ||
46 | +mniej num count,mass comparative | ||
47 | +najmniej num count,mass comparative | ||
48 | +najwięcej num count,mass comparative | ||
49 | +więcej num count,mass comparative | ||
50 | +oba num count ep??? | ||
51 | +obydwa num count ep??? | ||
52 | +tyle num count,mass indexical,correferential,deictic ??? | ||
53 | +ile num count,mass interrogative,rel | ||
54 | +dużo num count,mass relational | ||
55 | +mało num count,mass relational | ||
56 | +mnóstwo num-NKJP1M count,mass relational | ||
57 | +moc num count,mass relational | ||
58 | +nieco num count,mass relational | ||
59 | +niedużo num count,mass relational | ||
60 | +niemało num count,mass relational | ||
61 | +niewiele num count,mass relational | ||
62 | +sporo num count,mass relational | ||
63 | +trochę num count,mass relational | ||
64 | +troszeczkę num-NKJP1M count,mass relational | ||
65 | +troszkę num-NKJP1M count,mass relational | ||
66 | +wiele num count,mass relational | ||
67 | +ileż num count,mass stress | ||
68 | +tyleż num count,mass stress,indexical,correferential,deictic ??? | ||
69 | +wieleż num count,mass stress,relational | ||
70 | +ćwierć num count | ||
71 | +gros num count | ||
72 | +ilekolwiek num count,mass | ||
73 | +ileś num count,mass | ||
74 | +kilkadziesiąt num count | ||
75 | +kilkanaście num count | ||
76 | +kilka num count | ||
77 | +kilkaset num count | ||
78 | +kupa num-NKJP1M count | ||
79 | +parędziesiąt num count | ||
80 | +paręnaście num count | ||
81 | +parę num count | ||
82 | +paręset num count | ||
83 | +pół num count | ||
84 | +półtora num count | ||
85 | +tysiące num count | ||
86 | +wieledziesiąt num count | ||
87 | +wieleset num count |
lexSemantics/ENIAMlexSemantics.ml
@@ -164,6 +164,17 @@ let semantize lemma pos (selectors,schema) = | @@ -164,6 +164,17 @@ let semantize lemma pos (selectors,schema) = | ||
164 | {empty_frame with selectors=sel @ selectors; positions=schema; | 164 | {empty_frame with selectors=sel @ selectors; positions=schema; |
165 | arole=arole; arole_attr=arole_attr; arev=arev}) | 165 | arole=arole; arole_attr=arole_attr; arev=arev}) |
166 | 166 | ||
167 | +let add_sem_args lemma pos frame = | ||
168 | + {frame with sem_args = | ||
169 | + match pos with | ||
170 | + "subst" | "depr" -> (try StringMap.find ENIAMlexSemanticsData.noun_sem_args lemma with Not_found -> []) | ||
171 | + | "adj" | "adjc" | "adjp" -> (try StringMap.find ENIAMlexSemanticsData.adj_sem_args lemma with Not_found -> []) | ||
172 | + | "adv" -> (try StringMap.find ENIAMlexSemanticsData.adv_sem_args lemma with Not_found -> []) | ||
173 | + | "qub" -> (try StringMap.find ENIAMlexSemanticsData.qub_sem_args lemma with Not_found -> []) | ||
174 | + | "ppron12" | "ppron3" | "siebie" -> (try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> []) | ||
175 | + | "num" -> (try StringMap.find !num_sem_args lemma with Not_found -> []) | ||
176 | + | _ -> []} | ||
177 | + | ||
167 | let assign_prep_semantics lemma = | 178 | let assign_prep_semantics lemma = |
168 | let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in | 179 | let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in |
169 | Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); | 180 | Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); |
@@ -226,10 +237,14 @@ let assign_valence tokens lex_sems group = | @@ -226,10 +237,14 @@ let assign_valence tokens lex_sems group = | ||
226 | let connected = if pos = "prep" then | 237 | let connected = if pos = "prep" then |
227 | if connected <> [] then failwith "assign_valence" else | 238 | if connected <> [] then failwith "assign_valence" else |
228 | assign_prep_semantics lemma else connected in | 239 | assign_prep_semantics lemma else connected in |
240 | + let connected = if pos = "num" then | ||
241 | + if connected <> [] then failwith "assign_valence" else | ||
242 | + assign_num_semantics lemma else connected in | ||
229 | (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *) | 243 | (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *) |
230 | let connected = if connected = [] then | 244 | let connected = if connected = [] then |
231 | Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) -> | 245 | Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) -> |
232 | {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in | 246 | {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in |
247 | + let connected = Xlist.rev_map connected (add_sem_args lemma pos) in | ||
233 | (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *) | 248 | (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *) |
234 | ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with | 249 | ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with |
235 | schemata=schemata; lex_entries=entries; frames=connected}) | 250 | schemata=schemata; lex_entries=entries; frames=connected}) |
lexSemantics/ENIAMlexSemanticsData.ml
@@ -380,8 +380,8 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr | @@ -380,8 +380,8 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr | ||
380 | "według","gen","Manr"; | 380 | "według","gen","Manr"; |
381 | "wobec","gen","Dat";*) | 381 | "wobec","gen","Dat";*) |
382 | 382 | ||
383 | -(* | ||
384 | -let subst_special_lexemes = Xlist.fold [ | 383 | + |
384 | +let noun_sem_args = Xlist.fold [ | ||
385 | "jutro", ["indexical"];(*"dzień"*) | 385 | "jutro", ["indexical"];(*"dzień"*) |
386 | "pojutrze", ["indexical"];(*"dzień"*) | 386 | "pojutrze", ["indexical"];(*"dzień"*) |
387 | "cóż", ["interrogative"]; | 387 | "cóż", ["interrogative"]; |
@@ -397,9 +397,9 @@ let subst_special_lexemes = Xlist.fold [ | @@ -397,9 +397,9 @@ let subst_special_lexemes = Xlist.fold [ | ||
397 | "cokolwiek", []; | 397 | "cokolwiek", []; |
398 | "ktokolwiek", []; | 398 | "ktokolwiek", []; |
399 | "ktoś", []; | 399 | "ktoś", []; |
400 | - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) | 400 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
401 | 401 | ||
402 | -let adj_special_lexemes = Xlist.fold [ | 402 | +let adj_sem_args = Xlist.fold [ |
403 | (* "1935", "=",[],"name",[]); (*"rok"*) | 403 | (* "1935", "=",[],"name",[]); (*"rok"*) |
404 | "1998", "=",[],"name",[]); (*"rok"*) | 404 | "1998", "=",[],"name",[]); (*"rok"*) |
405 | "25", "=",[],"name",[]); (*"dzień"*) | 405 | "25", "=",[],"name",[]); (*"dzień"*) |
@@ -424,9 +424,9 @@ let adj_special_lexemes = Xlist.fold [ | @@ -424,9 +424,9 @@ let adj_special_lexemes = Xlist.fold [ | ||
424 | "taki", ["indexical"]; | 424 | "taki", ["indexical"]; |
425 | "czyj", ["interrogative"]; | 425 | "czyj", ["interrogative"]; |
426 | "który", ["interrogative"];(* FIXME: dodać relative *) | 426 | "który", ["interrogative"];(* FIXME: dodać relative *) |
427 | - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) | 427 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
428 | 428 | ||
429 | -let adv_special_lexemes = Xlist.fold [ | 429 | +let adv_sem_args = Xlist.fold [ |
430 | "tymczasem", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) | 430 | "tymczasem", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) |
431 | "wtedy", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) | 431 | "wtedy", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) |
432 | "wówczas", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) | 432 | "wówczas", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) |
@@ -452,9 +452,9 @@ let adv_special_lexemes = Xlist.fold [ | @@ -452,9 +452,9 @@ let adv_special_lexemes = Xlist.fold [ | ||
452 | "kiedy", ["interrogative"]; | 452 | "kiedy", ["interrogative"]; |
453 | "dlatego", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst *) | 453 | "dlatego", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst *) |
454 | "tak", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *) | 454 | "tak", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *) |
455 | - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) | 455 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
456 | 456 | ||
457 | -let qub_special_lexemes = Xlist.fold [ | 457 | +let qub_sem_args = Xlist.fold [ |
458 | "tylko", []; (* przyrematyczny (wskazuje remat) *) | 458 | "tylko", []; (* przyrematyczny (wskazuje remat) *) |
459 | "jeszcze", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który spełnia ten sam event w tej samej roli *) (* operator: restrykcja zadana przez remat, zakres przez temat; semantycznie: isnieje porządek, sąd jest prawdziwy dla pewnego obiektu mniejszego w porządku i dla aktualnego obiektu ; pragmatycznie: mówca spodziewa się, że sąd nie jest prawdziwy dla elementu większego w porządku (np. przestanie być w późniejszym momencie) *) | 459 | "jeszcze", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który spełnia ten sam event w tej samej roli *) (* operator: restrykcja zadana przez remat, zakres przez temat; semantycznie: isnieje porządek, sąd jest prawdziwy dla pewnego obiektu mniejszego w porządku i dla aktualnego obiektu ; pragmatycznie: mówca spodziewa się, że sąd nie jest prawdziwy dla elementu większego w porządku (np. przestanie być w późniejszym momencie) *) |
460 | "już", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który nie spełnia tego eventu w tej roli *) (* dualny do jeszcze *) | 460 | "już", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który nie spełnia tego eventu w tej roli *) (* dualny do jeszcze *) |
@@ -478,9 +478,9 @@ let qub_special_lexemes = Xlist.fold [ | @@ -478,9 +478,9 @@ let qub_special_lexemes = Xlist.fold [ | ||
478 | "ponad", []; | 478 | "ponad", []; |
479 | "prawie", []; | 479 | "prawie", []; |
480 | "przynajmniej", []; | 480 | "przynajmniej", []; |
481 | - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) | 481 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
482 | 482 | ||
483 | -let pron_lexemes = Xlist.fold [ | 483 | +let pron_sem_args = Xlist.fold [ |
484 | "ja", ["indexical"]; (* elementy zdarzenia komunikacyjnego; wyrażenie okazjonalne; kontekst komunikacji oznaczamy przez "indexical" *) | 484 | "ja", ["indexical"]; (* elementy zdarzenia komunikacyjnego; wyrażenie okazjonalne; kontekst komunikacji oznaczamy przez "indexical" *) |
485 | "my", ["indexical"(*; "zbiór indywiduów"*)]; | 485 | "my", ["indexical"(*; "zbiór indywiduów"*)]; |
486 | "pro1", ["indexical"(*; "zbiór indywiduów"*)]; | 486 | "pro1", ["indexical"(*; "zbiór indywiduów"*)]; |
@@ -500,7 +500,7 @@ let pron_lexemes = Xlist.fold [ | @@ -500,7 +500,7 @@ let pron_lexemes = Xlist.fold [ | ||
500 | "pro3pl",["coreferential";"deictic"]; | 500 | "pro3pl",["coreferential";"deictic"]; |
501 | "pro", ["indexical";"coreferential";"deictic"]; | 501 | "pro", ["indexical";"coreferential";"deictic"]; |
502 | "siebie",["coreferential"]; | 502 | "siebie",["coreferential"]; |
503 | - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) | 503 | + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l) |
504 | 504 | ||
505 | (*let num_lexemes = Xlist.fold [ | 505 | (*let num_lexemes = Xlist.fold [ |
506 | "10", "10",[],"exact",[]); | 506 | "10", "10",[],"exact",[]); |
@@ -529,27 +529,3 @@ let pron_lexemes = Xlist.fold [ | @@ -529,27 +529,3 @@ let pron_lexemes = Xlist.fold [ | ||
529 | "trochę", "trochę",[],"approx",[]); | 529 | "trochę", "trochę",[],"approx",[]); |
530 | "wiele", "wiele",[],"approx",[]); | 530 | "wiele", "wiele",[],"approx",[]); |
531 | ] StringMap.empty (fun map (k,v,w) -> StringMap.add map k (SpecialMod(v,w)))*) | 531 | ] StringMap.empty (fun map (k,v,w) -> StringMap.add map k (SpecialMod(v,w)))*) |
532 | - | ||
533 | -(* UWAGA: przy przetwarzaniu danych zdezambiguowanych ta procedura nie zmienia liczby tokenów *) | ||
534 | -let assign_semantics tokens lex_sems group = | ||
535 | - Xlist.iter group (fun id -> | ||
536 | - let token = (ExtArray.get tokens id).token in | ||
537 | - let t = ExtArray.get lex_sems id in | ||
538 | - let t = match token with | ||
539 | - Lemma(lemma,"subst",_) -> {t with lroles=(try StringMap.find subst_inst_roles lemma with Not_found -> "",""); semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal} | ||
540 | - | Lemma(lemma,"depr",_) -> {t with semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal} | ||
541 | - | Lemma(lemma,"adj",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal} | ||
542 | - | Lemma(lemma,"adjc",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal} | ||
543 | - | Lemma(lemma,"adjp",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal} | ||
544 | - | Lemma(lemma,"adv",_) -> {t with lroles=(try StringMap.find adv_roles lemma with Not_found -> "",""); semantics=try StringMap.find adv_special_lexemes lemma with Not_found -> Normal} | ||
545 | - | Lemma(lemma,"qub",_) -> {t with lroles=(try StringMap.find qub_roles lemma with Not_found -> "",""); semantics=try StringMap.find qub_special_lexemes lemma with Not_found -> Normal} | ||
546 | -(* | Lemma(lemma,"num",_) -> [{t with semantics=try StringMap.find num_lexemes lemma with Not_found -> Normal}] *) | ||
547 | - | Lemma(lemma,"ppron12",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal} | ||
548 | - | Lemma(lemma,"ppron3",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal} | ||
549 | - | Lemma(lemma,"siebie",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal} | ||
550 | - | Lemma(lemma,"prep",l) -> | ||
551 | - let cases = Xlist.fold l StringSet.empty (fun set -> function cases :: _ -> Xlist.fold cases set StringSet.add | _ -> set) in | ||
552 | - {t with semantics=assign_prep_semantics lemma (StringSet.to_list cases) t} | ||
553 | - | _ -> t in | ||
554 | - ExtArray.set lex_sems id t) | ||
555 | -*) |
lexSemantics/ENIAMlexSemanticsTypes.ml
@@ -20,22 +20,6 @@ | @@ -20,22 +20,6 @@ | ||
20 | open ENIAMtokenizerTypes | 20 | open ENIAMtokenizerTypes |
21 | open Xstd | 21 | open Xstd |
22 | 22 | ||
23 | -(* FIXME: usunąć *) | ||
24 | -(*type labels = { | ||
25 | - number: string; | ||
26 | - case: string; | ||
27 | - gender: string; | ||
28 | - person: string; | ||
29 | - aspect: string; | ||
30 | - }*) | ||
31 | - | ||
32 | -type semantics = | ||
33 | - Normal | ||
34 | - | Special of string list | ||
35 | -(* | SpecialNoun of type_arg list * type_term | ||
36 | - | SpecialMod of string * (type_arg list * type_term)*) | ||
37 | - | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *) | ||
38 | - | ||
39 | type frame = { | 23 | type frame = { |
40 | selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list; | 24 | selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list; |
41 | meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list; | 25 | meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list; |
@@ -43,11 +27,12 @@ type frame = { | @@ -43,11 +27,12 @@ type frame = { | ||
43 | arole: string; | 27 | arole: string; |
44 | arole_attr: string; | 28 | arole_attr: string; |
45 | arev: bool; | 29 | arev: bool; |
30 | + sem_args: string list; | ||
46 | sopinion: ENIAMwalTypes.opinion; | 31 | sopinion: ENIAMwalTypes.opinion; |
47 | fopinion: ENIAMwalTypes.opinion; | 32 | fopinion: ENIAMwalTypes.opinion; |
48 | } | 33 | } |
49 | 34 | ||
50 | -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; | 35 | +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; sem_args=[]; |
51 | sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} | 36 | sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} |
52 | 37 | ||
53 | type lex_sem = { | 38 | type lex_sem = { |
@@ -57,39 +42,10 @@ type lex_sem = { | @@ -57,39 +42,10 @@ type lex_sem = { | ||
57 | ENIAM_LCGtypes.grammar_symbol) list; | 42 | ENIAM_LCGtypes.grammar_symbol) list; |
58 | frames: frame list; | 43 | frames: frame list; |
59 | cats: string list; | 44 | cats: string list; |
60 | - (* e: labels; *) | ||
61 | - (* valence: (int * ENIAMwalTypes.frame) list; | ||
62 | - simple_valence: (int * ENIAMwalTypes.frame) list; | ||
63 | - very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *) | ||
64 | - (* senses: (string * (string * int) list * float) list; *) | ||
65 | - (* lroles: string * string; *) | ||
66 | - semantics: semantics; | ||
67 | } | 45 | } |
68 | 46 | ||
69 | -(*let empty_labels = { | ||
70 | - number=""; | ||
71 | - case=""; | ||
72 | - gender=""; | ||
73 | - person=""; | ||
74 | - aspect=""; | ||
75 | - }*) | ||
76 | - | ||
77 | let empty_lex_sem = { | 47 | let empty_lex_sem = { |
78 | - schemata=[]; lex_entries=[]; frames=[]; cats=["X"]; | ||
79 | - (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) (*senses=[];*) | ||
80 | - (*lroles="","";*) semantics=Normal} | ||
81 | - | ||
82 | -(* FIXME: poprawić katalog *) | ||
83 | -(*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat" | ||
84 | -let subst_uncountable_lexemes_filename2 = resource_path ^ "/lexSemantics/subst_uncountable_stare.dat" | ||
85 | -let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat" | ||
86 | -let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat" | ||
87 | - let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*) | ||
88 | -(* let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat" | ||
89 | -let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat" | ||
90 | -let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat" | ||
91 | -let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat" | ||
92 | -let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *) | 48 | + schemata=[]; lex_entries=[]; frames=[]; cats=["X"]} |
93 | 49 | ||
94 | let hipero_threshold = 3 | 50 | let hipero_threshold = 3 |
95 | let unknown_meaning_weight = -1. | 51 | let unknown_meaning_weight = -1. |
semsources/num.tab deleted
1 | -zero num Count 0 | ||
2 | -jeden num Count 1 stress | ||
3 | -dwa num Count 2 | ||
4 | -trzy num Count 3 | ||
5 | -cztery num Count 4 | ||
6 | -pięć num Count 5 | ||
7 | -sześć num Count 6 | ||
8 | -siedem num Count 7 | ||
9 | -osiem num Count 8 | ||
10 | -dziewięć num Count 9 | ||
11 | -dziesięć num Count 10 | ||
12 | -jedenaście num Count 11 | ||
13 | -dwanaście num Count 12 | ||
14 | -trzynaście num Count 13 | ||
15 | -czternaście num Count 14 | ||
16 | -piętnaście num Count 15 | ||
17 | -szesnaście num Count 16 | ||
18 | -siedemnaście num Count 17 | ||
19 | -osiemnaście num Count 18 | ||
20 | -dziewiętnaście num Count 19 | ||
21 | -dwadzieścia num Count 20 | ||
22 | -trzydzieści num Count 30 | ||
23 | -czterdzieści num Count 40 | ||
24 | -pięćdziesiąt num Count 50 | ||
25 | -sześćdziesiąt num Count 60 | ||
26 | -siedemdziesiąt num Count 70 | ||
27 | -osiemdziesiąt num Count 80 | ||
28 | -dziewięćdziesiąt num Count 90 | ||
29 | -sto num Count 100 | ||
30 | -dwieście num Count 200 | ||
31 | -trzysta num Count 300 | ||
32 | -czterysta num Count 400 | ||
33 | -pięćset num Count 500 | ||
34 | -sześćset num Count 600 | ||
35 | -siedemset num Count 700 | ||
36 | -osiemset num Count 800 | ||
37 | -dziewięćset num Count 900 | ||
38 | -tysiąc num Count 1000 | ||
39 | -milion num Count 1000000 | ||
40 | -miliard num Count 1000000000 | ||
41 | -bilion num Count 1000000000000 | ||
42 | -biliard num Count 1000000000000000 | ||
43 | -trylion num Count 1E+018 | ||
44 | -tryliard num Count 1E+021 | ||
45 | -kwadrylion num Count 1E+024 | ||
46 | -mniej num Count,Measure comparative | ||
47 | -najmniej num Count,Measure comparative | ||
48 | -najwięcej num Count,Measure comparative | ||
49 | -więcej num Count,Measure comparative | ||
50 | -oba num Count ep??? | ||
51 | -obydwa num Count ep??? | ||
52 | -tyle num Count,Measure indexical, correferential, deictic ??? | ||
53 | -ile num Count,Measure interrogative, rel | ||
54 | -dużo num Count,Measure relational | ||
55 | -mało num Count,Measure relational | ||
56 | -mnóstwo num-NKJP1M Count,Measure relational | ||
57 | -moc num Count,Measure relational | ||
58 | -nieco num Count,Measure relational | ||
59 | -niedużo num Count,Measure relational | ||
60 | -niemało num Count,Measure relational | ||
61 | -niewiele num Count,Measure relational | ||
62 | -sporo num Count,Measure relational | ||
63 | -trochę num Count,Measure relational | ||
64 | -troszeczkę num-NKJP1M Count,Measure relational | ||
65 | -troszkę num-NKJP1M Count,Measure relational | ||
66 | -wiele num Count,Measure relational | ||
67 | -ileż num Count,Measure stress | ||
68 | -tyleż num Count,Measure stress, indexical, correferential, deictic ??? | ||
69 | -wieleż num Count,Measure stress, relational | ||
70 | -ćwierć num Count | ||
71 | -gros num Count | ||
72 | -ilekolwiek num Count,Measure | ||
73 | -ileś num Count,Measure | ||
74 | -kilkadziesiąt num Count | ||
75 | -kilkanaście num Count | ||
76 | -kilka num Count | ||
77 | -kilkaset num Count | ||
78 | -kupa num-NKJP1M Count | ||
79 | -parędziesiąt num Count | ||
80 | -paręnaście num Count | ||
81 | -parę num Count | ||
82 | -paręset num Count | ||
83 | -pół num Count | ||
84 | -półtora num Count | ||
85 | -tysiące num Count | ||
86 | -wieledziesiąt num Count | ||
87 | -wieleset num Count |