Commit fb5972b2d30b5e00f78ccfb8febd526c895dd0c3

Authored by Wojciech Jaworski
1 parent 3db2202f

argumenty semantyczne w lexSemantics

LCGlexicon/ENIAM_LCGlexiconTypes.ml
@@ -101,3 +101,4 @@ let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral. @@ -101,3 +101,4 @@ let subst_numeral_lexemes_filename = resource_path ^ "/LCGlexicon/subst_numeral.
101 let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat" 101 let subst_time_lexemes_filename = resource_path ^ "/LCGlexicon/subst_time.dat"
102 102
103 let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab" 103 let adv_modes_filename = resource_path ^ "/Walenty/adv_modes.tab"
  104 +let num_nsems_filename = resource_path ^ "/LCGlexicon/num_nsems.tab"
LCGlexicon/ENIAMcategoriesPL.ml
@@ -91,9 +91,17 @@ let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; @@ -91,9 +91,17 @@ let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś";
91 let load_adv_modes filename adv_modes = 91 let load_adv_modes filename adv_modes =
92 File.fold_tab filename adv_modes (fun adv_modes -> function 92 File.fold_tab filename adv_modes (fun adv_modes -> function
93 [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l) 93 [adv;mode] -> StringMap.add_inc adv_modes adv [mode] (fun l -> mode :: l)
94 - | _ -> failwith "adv_modes") 94 + | _ -> failwith "load_adv_modes")
  95 +
  96 +let load_num_nsems filename num_nsems =
  97 + File.fold_tab filename num_nsems (fun num_nsems -> function
  98 + lemma :: _ :: nsems :: _ ->
  99 + Xlist.fold (Xstring.split "," nsems) num_nsems (fun num_nsems nsem ->
  100 + StringMap.add_inc num_nsems lemma [nsem] (fun l -> nsem :: l))
  101 + | _ -> failwith "load_num_nsems")
95 102
96 let adv_modes = ref (StringMap.empty : string list StringMap.t) 103 let adv_modes = ref (StringMap.empty : string list StringMap.t)
  104 +let num_nsems = ref (StringMap.empty : string list StringMap.t)
97 105
98 let initialize () = 106 let initialize () =
99 subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty; 107 subst_uncountable_lexemes := File.catch_no_file (load_subst_data subst_uncountable_lexemes_filename) StringSet.empty;
@@ -102,6 +110,7 @@ let initialize () = @@ -102,6 +110,7 @@ let initialize () =
102 subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty; 110 subst_numeral_lexemes := File.catch_no_file (load_subst_data subst_numeral_lexemes_filename) StringSet.empty;
103 subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty; 111 subst_time_lexemes := File.catch_no_file (load_subst_data subst_time_lexemes_filename) StringSet.empty;
104 adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty; 112 adv_modes := File.catch_no_file (load_adv_modes adv_modes_filename) StringMap.empty;
  113 + num_nsems := File.catch_no_file (load_num_nsems num_nsems_filename) StringMap.empty;
105 () 114 ()
106 115
107 let noun_type proper lemma pos = 116 let noun_type proper lemma pos =
@@ -126,6 +135,12 @@ let adv_mode lemma = @@ -126,6 +135,12 @@ let adv_mode lemma =
126 StringMap.find !adv_modes lemma 135 StringMap.find !adv_modes lemma
127 with Not_found -> ["mod"] 136 with Not_found -> ["mod"]
128 137
  138 +let num_nsem lemma =
  139 + try
  140 + StringMap.find !num_nsems lemma
  141 + with Not_found -> failwith ("num_nsem: " ^ lemma)
  142 +
  143 +
129 let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] 144 let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"]
130 145
131 let clarify_categories proper cat = function 146 let clarify_categories proper cat = function
@@ -195,20 +210,21 @@ let clarify_categories proper cat = function @@ -195,20 +210,21 @@ let clarify_categories proper cat = function
195 let numbers = expand_numbers numbers in 210 let numbers = expand_numbers numbers in
196 let cases = expand_cases cases in 211 let cases = expand_cases cases in
197 let genders = expand_genders genders in 212 let genders = expand_genders genders in
198 - [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms}] 213 + let nsem = num_nsem lemma in
  214 + [{empty_cats with lemma=lemma; pos="num"; pos2="num"; numbers=numbers; cases=cases; genders=genders; persons=["ter"]; acms=acms; nsem=nsem}]
199 | lemma,"numc",[] -> [] 215 | lemma,"numc",[] -> []
200 | lemma,"intnum",[] -> 216 | lemma,"intnum",[] ->
201 let numbers,acms = 217 let numbers,acms =
202 if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else 218 if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else
203 let s = String.get lemma (String.length lemma - 1) in 219 let s = String.get lemma (String.length lemma - 1) in
204 ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in 220 ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in
205 - [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms}] 221 + [{empty_cats with lemma=lemma; pos="intnum"; pos2="num"; numbers=numbers; cases=all_cases; genders=all_genders; persons=["ter"]; acms=acms; nsem=["count"]}]
206 | lemma,"realnum",[] -> 222 | lemma,"realnum",[] ->
207 - [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]}] 223 + [{empty_cats with lemma=lemma; pos="realnum"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
208 | lemma,"intnum-interval",[] -> 224 | lemma,"intnum-interval",[] ->
209 - [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]}] 225 + [{empty_cats with lemma=lemma; pos="intnum-interval"; pos2="num"; numbers=["pl"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec";"congr"]; nsem=["count"]}]
210 | lemma,"realnum-interval",[] -> 226 | lemma,"realnum-interval",[] ->
211 - [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]}] 227 + [{empty_cats with lemma=lemma; pos="realnum-interval"; pos2="num"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]; acms=["rec"]; nsem=["count"]}]
212 | lemma,"symbol",[] -> 228 | lemma,"symbol",[] ->
213 [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}] 229 [{empty_cats with lemma=lemma; pos="symbol"; pos2="noun"; numbers=["sg"]; cases=all_cases; genders=all_genders; persons=["ter"]}]
214 | lemma,"ordnum",[] -> 230 | lemma,"ordnum",[] ->
@@ -515,11 +531,11 @@ let pos_categories = Xlist.fold [ @@ -515,11 +531,11 @@ let pos_categories = Xlist.fold [
515 "siebie",[Lemma;Number;Case;Gender;Person;]; 531 "siebie",[Lemma;Number;Case;Gender;Person;];
516 "prep",[Lemma;Cat;Case;]; 532 "prep",[Lemma;Cat;Case;];
517 "compar",[Lemma;Cat;Case;]; 533 "compar",[Lemma;Cat;Case;];
518 - "num",[Lemma;Number;Case;Gender;Person;Acm;];  
519 - "intnum",[Lemma;Number;Case;Gender;Person;Acm;];  
520 - "realnum",[Lemma;Number;Case;Gender;Person;Acm;];  
521 - "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;];  
522 - "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;]; 534 + "num",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  535 + "intnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  536 + "realnum",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  537 + "intnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
  538 + "realnum-interval",[Lemma;Number;Case;Gender;Person;Acm;Nsem;];
523 "symbol",[Lemma;Number;Case;Gender;Person;]; 539 "symbol",[Lemma;Number;Case;Gender;Person;];
524 "ordnum",[Lemma;Number;Case;Gender;Grad;]; 540 "ordnum",[Lemma;Number;Case;Gender;Grad;];
525 "date",[Lemma;Nsyn;Nsem;]; 541 "date",[Lemma;Nsyn;Nsem;];
LCGlexicon/TODO
  1 +- dodać uzgodnienie policzalności liczebnika i rzeczownika
  2 +
1 - dodac prepncp 3 - dodac prepncp
2 4
3 - dodać podniesione comprepy 5 - dodać podniesione comprepy
LCGlexicon/resources/num.tab 0 → 100644
  1 +zero num count 0
  2 +jeden num count 1 stress
  3 +dwa num count 2
  4 +trzy num count 3
  5 +cztery num count 4
  6 +pięć num count 5
  7 +sześć num count 6
  8 +siedem num count 7
  9 +osiem num count 8
  10 +dziewięć num count 9
  11 +dziesięć num count 10
  12 +jedenaście num count 11
  13 +dwanaście num count 12
  14 +trzynaście num count 13
  15 +czternaście num count 14
  16 +piętnaście num count 15
  17 +szesnaście num count 16
  18 +siedemnaście num count 17
  19 +osiemnaście num count 18
  20 +dziewiętnaście num count 19
  21 +dwadzieścia num count 20
  22 +trzydzieści num count 30
  23 +czterdzieści num count 40
  24 +pięćdziesiąt num count 50
  25 +sześćdziesiąt num count 60
  26 +siedemdziesiąt num count 70
  27 +osiemdziesiąt num count 80
  28 +dziewięćdziesiąt num count 90
  29 +sto num count 100
  30 +dwieście num count 200
  31 +trzysta num count 300
  32 +czterysta num count 400
  33 +pięćset num count 500
  34 +sześćset num count 600
  35 +siedemset num count 700
  36 +osiemset num count 800
  37 +dziewięćset num count 900
  38 +tysiąc num count 1000
  39 +milion num count 1000000
  40 +miliard num count 1000000000
  41 +bilion num count 1000000000000
  42 +biliard num count 1000000000000000
  43 +trylion num count 1E+018
  44 +tryliard num count 1E+021
  45 +kwadrylion num count 1E+024
  46 +mniej num count,mass comparative
  47 +najmniej num count,mass comparative
  48 +najwięcej num count,mass comparative
  49 +więcej num count,mass comparative
  50 +oba num count ep???
  51 +obydwa num count ep???
  52 +tyle num count,mass indexical,correferential,deictic ???
  53 +ile num count,mass interrogative,rel
  54 +dużo num count,mass relational
  55 +mało num count,mass relational
  56 +mnóstwo num-NKJP1M count,mass relational
  57 +moc num count,mass relational
  58 +nieco num count,mass relational
  59 +niedużo num count,mass relational
  60 +niemało num count,mass relational
  61 +niewiele num count,mass relational
  62 +sporo num count,mass relational
  63 +trochę num count,mass relational
  64 +troszeczkę num-NKJP1M count,mass relational
  65 +troszkę num-NKJP1M count,mass relational
  66 +wiele num count,mass relational
  67 +ileż num count,mass stress
  68 +tyleż num count,mass stress,indexical,correferential,deictic ???
  69 +wieleż num count,mass stress,relational
  70 +ćwierć num count
  71 +gros num count
  72 +ilekolwiek num count,mass
  73 +ileś num count,mass
  74 +kilkadziesiąt num count
  75 +kilkanaście num count
  76 +kilka num count
  77 +kilkaset num count
  78 +kupa num-NKJP1M count
  79 +parędziesiąt num count
  80 +paręnaście num count
  81 +parę num count
  82 +paręset num count
  83 +pół num count
  84 +półtora num count
  85 +tysiące num count
  86 +wieledziesiąt num count
  87 +wieleset num count
lexSemantics/ENIAMlexSemantics.ml
@@ -164,6 +164,17 @@ let semantize lemma pos (selectors,schema) = @@ -164,6 +164,17 @@ let semantize lemma pos (selectors,schema) =
164 {empty_frame with selectors=sel @ selectors; positions=schema; 164 {empty_frame with selectors=sel @ selectors; positions=schema;
165 arole=arole; arole_attr=arole_attr; arev=arev}) 165 arole=arole; arole_attr=arole_attr; arev=arev})
166 166
  167 +let add_sem_args lemma pos frame =
  168 + {frame with sem_args =
  169 + match pos with
  170 + "subst" | "depr" -> (try StringMap.find ENIAMlexSemanticsData.noun_sem_args lemma with Not_found -> [])
  171 + | "adj" | "adjc" | "adjp" -> (try StringMap.find ENIAMlexSemanticsData.adj_sem_args lemma with Not_found -> [])
  172 + | "adv" -> (try StringMap.find ENIAMlexSemanticsData.adv_sem_args lemma with Not_found -> [])
  173 + | "qub" -> (try StringMap.find ENIAMlexSemanticsData.qub_sem_args lemma with Not_found -> [])
  174 + | "ppron12" | "ppron3" | "siebie" -> (try StringMap.find ENIAMlexSemanticsData.pron_sem_args lemma with Not_found -> [])
  175 + | "num" -> (try StringMap.find !num_sem_args lemma with Not_found -> [])
  176 + | _ -> []}
  177 +
167 let assign_prep_semantics lemma = 178 let assign_prep_semantics lemma =
168 let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in 179 let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in
169 Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); 180 Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles);
@@ -226,10 +237,14 @@ let assign_valence tokens lex_sems group = @@ -226,10 +237,14 @@ let assign_valence tokens lex_sems group =
226 let connected = if pos = "prep" then 237 let connected = if pos = "prep" then
227 if connected <> [] then failwith "assign_valence" else 238 if connected <> [] then failwith "assign_valence" else
228 assign_prep_semantics lemma else connected in 239 assign_prep_semantics lemma else connected in
  240 + let connected = if pos = "num" then
  241 + if connected <> [] then failwith "assign_valence" else
  242 + assign_num_semantics lemma else connected in
229 (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *) 243 (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *)
230 let connected = if connected = [] then 244 let connected = if connected = [] then
231 Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) -> 245 Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) ->
232 {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in 246 {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in
  247 + let connected = Xlist.rev_map connected (add_sem_args lemma pos) in
233 (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *) 248 (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *)
234 ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with 249 ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with
235 schemata=schemata; lex_entries=entries; frames=connected}) 250 schemata=schemata; lex_entries=entries; frames=connected})
lexSemantics/ENIAMlexSemanticsData.ml
@@ -380,8 +380,8 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr @@ -380,8 +380,8 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
380 "według","gen","Manr"; 380 "według","gen","Manr";
381 "wobec","gen","Dat";*) 381 "wobec","gen","Dat";*)
382 382
383 -(*  
384 -let subst_special_lexemes = Xlist.fold [ 383 +
  384 +let noun_sem_args = Xlist.fold [
385 "jutro", ["indexical"];(*"dzień"*) 385 "jutro", ["indexical"];(*"dzień"*)
386 "pojutrze", ["indexical"];(*"dzień"*) 386 "pojutrze", ["indexical"];(*"dzień"*)
387 "cóż", ["interrogative"]; 387 "cóż", ["interrogative"];
@@ -397,9 +397,9 @@ let subst_special_lexemes = Xlist.fold [ @@ -397,9 +397,9 @@ let subst_special_lexemes = Xlist.fold [
397 "cokolwiek", []; 397 "cokolwiek", [];
398 "ktokolwiek", []; 398 "ktokolwiek", [];
399 "ktoś", []; 399 "ktoś", [];
400 - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) 400 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
401 401
402 -let adj_special_lexemes = Xlist.fold [ 402 +let adj_sem_args = Xlist.fold [
403 (* "1935", "=",[],"name",[]); (*"rok"*) 403 (* "1935", "=",[],"name",[]); (*"rok"*)
404 "1998", "=",[],"name",[]); (*"rok"*) 404 "1998", "=",[],"name",[]); (*"rok"*)
405 "25", "=",[],"name",[]); (*"dzień"*) 405 "25", "=",[],"name",[]); (*"dzień"*)
@@ -424,9 +424,9 @@ let adj_special_lexemes = Xlist.fold [ @@ -424,9 +424,9 @@ let adj_special_lexemes = Xlist.fold [
424 "taki", ["indexical"]; 424 "taki", ["indexical"];
425 "czyj", ["interrogative"]; 425 "czyj", ["interrogative"];
426 "który", ["interrogative"];(* FIXME: dodać relative *) 426 "który", ["interrogative"];(* FIXME: dodać relative *)
427 - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) 427 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
428 428
429 -let adv_special_lexemes = Xlist.fold [ 429 +let adv_sem_args = Xlist.fold [
430 "tymczasem", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) 430 "tymczasem", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*)
431 "wtedy", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) 431 "wtedy", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*)
432 "wówczas", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*) 432 "wówczas", ["coreferential"(*czas wypowiedzenia*)];(*"czas"*)
@@ -452,9 +452,9 @@ let adv_special_lexemes = Xlist.fold [ @@ -452,9 +452,9 @@ let adv_special_lexemes = Xlist.fold [
452 "kiedy", ["interrogative"]; 452 "kiedy", ["interrogative"];
453 "dlatego", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst *) 453 "dlatego", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst *)
454 "tak", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *) 454 "tak", ["coreferential"]; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *)
455 - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) 455 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
456 456
457 -let qub_special_lexemes = Xlist.fold [ 457 +let qub_sem_args = Xlist.fold [
458 "tylko", []; (* przyrematyczny (wskazuje remat) *) 458 "tylko", []; (* przyrematyczny (wskazuje remat) *)
459 "jeszcze", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który spełnia ten sam event w tej samej roli *) (* operator: restrykcja zadana przez remat, zakres przez temat; semantycznie: isnieje porządek, sąd jest prawdziwy dla pewnego obiektu mniejszego w porządku i dla aktualnego obiektu ; pragmatycznie: mówca spodziewa się, że sąd nie jest prawdziwy dla elementu większego w porządku (np. przestanie być w późniejszym momencie) *) 459 "jeszcze", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który spełnia ten sam event w tej samej roli *) (* operator: restrykcja zadana przez remat, zakres przez temat; semantycznie: isnieje porządek, sąd jest prawdziwy dla pewnego obiektu mniejszego w porządku i dla aktualnego obiektu ; pragmatycznie: mówca spodziewa się, że sąd nie jest prawdziwy dla elementu większego w porządku (np. przestanie być w późniejszym momencie) *)
460 "już", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który nie spełnia tego eventu w tej roli *) (* dualny do jeszcze *) 460 "już", ["order"(*relacja porządkująca*)]; (* reprezentacja: określamy obiekt ktory jest w skali i stwierdzamy że istnieje inny obiekt wcześniej w skali, który nie spełnia tego eventu w tej roli *) (* dualny do jeszcze *)
@@ -478,9 +478,9 @@ let qub_special_lexemes = Xlist.fold [ @@ -478,9 +478,9 @@ let qub_special_lexemes = Xlist.fold [
478 "ponad", []; 478 "ponad", [];
479 "prawie", []; 479 "prawie", [];
480 "przynajmniej", []; 480 "przynajmniej", [];
481 - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) 481 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
482 482
483 -let pron_lexemes = Xlist.fold [ 483 +let pron_sem_args = Xlist.fold [
484 "ja", ["indexical"]; (* elementy zdarzenia komunikacyjnego; wyrażenie okazjonalne; kontekst komunikacji oznaczamy przez "indexical" *) 484 "ja", ["indexical"]; (* elementy zdarzenia komunikacyjnego; wyrażenie okazjonalne; kontekst komunikacji oznaczamy przez "indexical" *)
485 "my", ["indexical"(*; "zbiór indywiduów"*)]; 485 "my", ["indexical"(*; "zbiór indywiduów"*)];
486 "pro1", ["indexical"(*; "zbiór indywiduów"*)]; 486 "pro1", ["indexical"(*; "zbiór indywiduów"*)];
@@ -500,7 +500,7 @@ let pron_lexemes = Xlist.fold [ @@ -500,7 +500,7 @@ let pron_lexemes = Xlist.fold [
500 "pro3pl",["coreferential";"deictic"]; 500 "pro3pl",["coreferential";"deictic"];
501 "pro", ["indexical";"coreferential";"deictic"]; 501 "pro", ["indexical";"coreferential";"deictic"];
502 "siebie",["coreferential"]; 502 "siebie",["coreferential"];
503 - ] StringMap.empty (fun map (k,l) -> StringMap.add map k (Special l)) 503 + ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
504 504
505 (*let num_lexemes = Xlist.fold [ 505 (*let num_lexemes = Xlist.fold [
506 "10", "10",[],"exact",[]); 506 "10", "10",[],"exact",[]);
@@ -529,27 +529,3 @@ let pron_lexemes = Xlist.fold [ @@ -529,27 +529,3 @@ let pron_lexemes = Xlist.fold [
529 "trochę", "trochę",[],"approx",[]); 529 "trochę", "trochę",[],"approx",[]);
530 "wiele", "wiele",[],"approx",[]); 530 "wiele", "wiele",[],"approx",[]);
531 ] StringMap.empty (fun map (k,v,w) -> StringMap.add map k (SpecialMod(v,w)))*) 531 ] StringMap.empty (fun map (k,v,w) -> StringMap.add map k (SpecialMod(v,w)))*)
532 -  
533 -(* UWAGA: przy przetwarzaniu danych zdezambiguowanych ta procedura nie zmienia liczby tokenów *)  
534 -let assign_semantics tokens lex_sems group =  
535 - Xlist.iter group (fun id ->  
536 - let token = (ExtArray.get tokens id).token in  
537 - let t = ExtArray.get lex_sems id in  
538 - let t = match token with  
539 - Lemma(lemma,"subst",_) -> {t with lroles=(try StringMap.find subst_inst_roles lemma with Not_found -> "",""); semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal}  
540 - | Lemma(lemma,"depr",_) -> {t with semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal}  
541 - | Lemma(lemma,"adj",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}  
542 - | Lemma(lemma,"adjc",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}  
543 - | Lemma(lemma,"adjp",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}  
544 - | Lemma(lemma,"adv",_) -> {t with lroles=(try StringMap.find adv_roles lemma with Not_found -> "",""); semantics=try StringMap.find adv_special_lexemes lemma with Not_found -> Normal}  
545 - | Lemma(lemma,"qub",_) -> {t with lroles=(try StringMap.find qub_roles lemma with Not_found -> "",""); semantics=try StringMap.find qub_special_lexemes lemma with Not_found -> Normal}  
546 -(* | Lemma(lemma,"num",_) -> [{t with semantics=try StringMap.find num_lexemes lemma with Not_found -> Normal}] *)  
547 - | Lemma(lemma,"ppron12",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}  
548 - | Lemma(lemma,"ppron3",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}  
549 - | Lemma(lemma,"siebie",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}  
550 - | Lemma(lemma,"prep",l) ->  
551 - let cases = Xlist.fold l StringSet.empty (fun set -> function cases :: _ -> Xlist.fold cases set StringSet.add | _ -> set) in  
552 - {t with semantics=assign_prep_semantics lemma (StringSet.to_list cases) t}  
553 - | _ -> t in  
554 - ExtArray.set lex_sems id t)  
555 -*)  
lexSemantics/ENIAMlexSemanticsTypes.ml
@@ -20,22 +20,6 @@ @@ -20,22 +20,6 @@
20 open ENIAMtokenizerTypes 20 open ENIAMtokenizerTypes
21 open Xstd 21 open Xstd
22 22
23 -(* FIXME: usunąć *)  
24 -(*type labels = {  
25 - number: string;  
26 - case: string;  
27 - gender: string;  
28 - person: string;  
29 - aspect: string;  
30 - }*)  
31 -  
32 -type semantics =  
33 - Normal  
34 - | Special of string list  
35 -(* | SpecialNoun of type_arg list * type_term  
36 - | SpecialMod of string * (type_arg list * type_term)*)  
37 - | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *)  
38 -  
39 type frame = { 23 type frame = {
40 selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list; 24 selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list;
41 meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list; 25 meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list;
@@ -43,11 +27,12 @@ type frame = { @@ -43,11 +27,12 @@ type frame = {
43 arole: string; 27 arole: string;
44 arole_attr: string; 28 arole_attr: string;
45 arev: bool; 29 arev: bool;
  30 + sem_args: string list;
46 sopinion: ENIAMwalTypes.opinion; 31 sopinion: ENIAMwalTypes.opinion;
47 fopinion: ENIAMwalTypes.opinion; 32 fopinion: ENIAMwalTypes.opinion;
48 } 33 }
49 34
50 -let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; 35 +let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false; sem_args=[];
51 sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} 36 sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony}
52 37
53 type lex_sem = { 38 type lex_sem = {
@@ -57,39 +42,10 @@ type lex_sem = { @@ -57,39 +42,10 @@ type lex_sem = {
57 ENIAM_LCGtypes.grammar_symbol) list; 42 ENIAM_LCGtypes.grammar_symbol) list;
58 frames: frame list; 43 frames: frame list;
59 cats: string list; 44 cats: string list;
60 - (* e: labels; *)  
61 - (* valence: (int * ENIAMwalTypes.frame) list;  
62 - simple_valence: (int * ENIAMwalTypes.frame) list;  
63 - very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *)  
64 - (* senses: (string * (string * int) list * float) list; *)  
65 - (* lroles: string * string; *)  
66 - semantics: semantics;  
67 } 45 }
68 46
69 -(*let empty_labels = {  
70 - number="";  
71 - case="";  
72 - gender="";  
73 - person="";  
74 - aspect="";  
75 - }*)  
76 -  
77 let empty_lex_sem = { 47 let empty_lex_sem = {
78 - schemata=[]; lex_entries=[]; frames=[]; cats=["X"];  
79 - (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) (*senses=[];*)  
80 - (*lroles="","";*) semantics=Normal}  
81 -  
82 -(* FIXME: poprawić katalog *)  
83 -(*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat"  
84 -let subst_uncountable_lexemes_filename2 = resource_path ^ "/lexSemantics/subst_uncountable_stare.dat"  
85 -let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat"  
86 -let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat"  
87 - let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*)  
88 -(* let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"  
89 -let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"  
90 -let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"  
91 -let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"  
92 -let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *) 48 + schemata=[]; lex_entries=[]; frames=[]; cats=["X"]}
93 49
94 let hipero_threshold = 3 50 let hipero_threshold = 3
95 let unknown_meaning_weight = -1. 51 let unknown_meaning_weight = -1.
semsources/num.tab deleted
1 -zero num Count 0  
2 -jeden num Count 1 stress  
3 -dwa num Count 2  
4 -trzy num Count 3  
5 -cztery num Count 4  
6 -pięć num Count 5  
7 -sześć num Count 6  
8 -siedem num Count 7  
9 -osiem num Count 8  
10 -dziewięć num Count 9  
11 -dziesięć num Count 10  
12 -jedenaście num Count 11  
13 -dwanaście num Count 12  
14 -trzynaście num Count 13  
15 -czternaście num Count 14  
16 -piętnaście num Count 15  
17 -szesnaście num Count 16  
18 -siedemnaście num Count 17  
19 -osiemnaście num Count 18  
20 -dziewiętnaście num Count 19  
21 -dwadzieścia num Count 20  
22 -trzydzieści num Count 30  
23 -czterdzieści num Count 40  
24 -pięćdziesiąt num Count 50  
25 -sześćdziesiąt num Count 60  
26 -siedemdziesiąt num Count 70  
27 -osiemdziesiąt num Count 80  
28 -dziewięćdziesiąt num Count 90  
29 -sto num Count 100  
30 -dwieście num Count 200  
31 -trzysta num Count 300  
32 -czterysta num Count 400  
33 -pięćset num Count 500  
34 -sześćset num Count 600  
35 -siedemset num Count 700  
36 -osiemset num Count 800  
37 -dziewięćset num Count 900  
38 -tysiąc num Count 1000  
39 -milion num Count 1000000  
40 -miliard num Count 1000000000  
41 -bilion num Count 1000000000000  
42 -biliard num Count 1000000000000000  
43 -trylion num Count 1E+018  
44 -tryliard num Count 1E+021  
45 -kwadrylion num Count 1E+024  
46 -mniej num Count,Measure comparative  
47 -najmniej num Count,Measure comparative  
48 -najwięcej num Count,Measure comparative  
49 -więcej num Count,Measure comparative  
50 -oba num Count ep???  
51 -obydwa num Count ep???  
52 -tyle num Count,Measure indexical, correferential, deictic ???  
53 -ile num Count,Measure interrogative, rel  
54 -dużo num Count,Measure relational  
55 -mało num Count,Measure relational  
56 -mnóstwo num-NKJP1M Count,Measure relational  
57 -moc num Count,Measure relational  
58 -nieco num Count,Measure relational  
59 -niedużo num Count,Measure relational  
60 -niemało num Count,Measure relational  
61 -niewiele num Count,Measure relational  
62 -sporo num Count,Measure relational  
63 -trochę num Count,Measure relational  
64 -troszeczkę num-NKJP1M Count,Measure relational  
65 -troszkę num-NKJP1M Count,Measure relational  
66 -wiele num Count,Measure relational  
67 -ileż num Count,Measure stress  
68 -tyleż num Count,Measure stress, indexical, correferential, deictic ???  
69 -wieleż num Count,Measure stress, relational  
70 -ćwierć num Count  
71 -gros num Count  
72 -ilekolwiek num Count,Measure  
73 -ileś num Count,Measure  
74 -kilkadziesiąt num Count  
75 -kilkanaście num Count  
76 -kilka num Count  
77 -kilkaset num Count  
78 -kupa num-NKJP1M Count  
79 -parędziesiąt num Count  
80 -paręnaście num Count  
81 -parę num Count  
82 -paręset num Count  
83 -pół num Count  
84 -półtora num Count  
85 -tysiące num Count  
86 -wieledziesiąt num Count  
87 -wieleset num Count