Commit af8828aa74aa7370fc151618c82e73f25c3c6fc8
1 parent
7c67798f
redukcja schematów
Showing
6 changed files
with
433 additions
and
55 deletions
lexSemantics/ENIAMwalParser.ml
... | ... | @@ -146,14 +146,14 @@ let parse_aspect = function |
146 | 146 | [Text "perf"] -> Aspect "perf" |
147 | 147 | | [Text "imperf"] -> Aspect "imperf" |
148 | 148 | | [Text "_"] -> AspectUndef |
149 | - | [Text ""] -> AspectNA | |
149 | + (* | [Text ""] -> AspectNA *) | |
150 | 150 | | l -> failwith ("parse_aspect: " ^ string_of_token_list l) |
151 | 151 | |
152 | 152 | let parse_negation = function |
153 | 153 | [Text "_"] -> NegationUndef |
154 | 154 | | [Text "neg"] -> Negation |
155 | 155 | | [Text "aff"] -> Aff |
156 | - | [Text ""] -> NegationNA | |
156 | + (* | [Text ""] -> NegationNA *) | |
157 | 157 | | l -> failwith ("parse_negation: " ^ string_of_token_list l) |
158 | 158 | |
159 | 159 | let parse_refl = function |
... | ... | @@ -261,8 +261,8 @@ let rec parse_phrase = function |
261 | 261 | | "fixed",[[Text lemma]] -> FixedP lemma |
262 | 262 | | "fixed",[[Text lemma1];[Text lemma2]] -> FixedP (lemma1 ^ "," ^ lemma2) |
263 | 263 | | "or",[] -> Or |
264 | - | "refl",[] -> Refl | |
265 | - | "recip",[] -> Recip | |
264 | + (* | "refl",[] -> Refl | |
265 | + | "recip",[] -> Recip *) | |
266 | 266 | | "E",[morf] -> E(parse_morf morf) |
267 | 267 | | "advp",[] -> AdvP |
268 | 268 | | "null",[] -> Null |
... | ... |
lexSemantics/ENIAMwalReduce.ml
... | ... | @@ -20,6 +20,50 @@ |
20 | 20 | open ENIAMwalTypes |
21 | 21 | open Xstd |
22 | 22 | |
23 | +let create_phrase_reqs s (reqs,noreqs) = function | |
24 | + | PrepNP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
25 | + | PrepAdjP(prep,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
26 | + | PrepNCP(prep,_,_,_) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
27 | + | ComparP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
28 | + | FixedP(prep) -> StringMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
29 | + | SimpleLexArg(lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs | |
30 | + | LexArg(_,lex,_) -> StringMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs | |
31 | + | MorfId _ -> failwith "create_phrase_reqs" | |
32 | + | _ -> reqs, StringSet.add noreqs s | |
33 | + | |
34 | +let create_phrase_reqs2 s (reqs,noreqs) = function | |
35 | + | PrepNP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
36 | + | PrepAdjP(prep,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
37 | + | PrepNCP(prep,_,_,_) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
38 | + | ComparP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
39 | + | FixedP(prep) -> IntMap.add_inc reqs s (StringSet.singleton prep) (fun set -> StringSet.add set prep), noreqs | |
40 | + | SimpleLexArg(lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs | |
41 | + | LexArg(_,lex,_) -> IntMap.add_inc reqs s (StringSet.singleton lex) (fun set -> StringSet.add set lex), noreqs | |
42 | + | MorfId _ -> failwith "create_phrase_reqs2" | |
43 | + | _ -> reqs, IntSet.add noreqs s | |
44 | + | |
45 | +let create_comprep_reqs entries = | |
46 | + let reqs,noreqs,reqs2 = Entries.fold entries (StringMap.empty,StringSet.empty,StringMap.empty) (fun (reqs,noreqs,reqs2) _ lemma -> function | |
47 | + ComprepNPEntry(s,NoRestr,[p]) -> | |
48 | + let reqs,noreqs = Xlist.fold p.morfs (reqs,noreqs) (create_phrase_reqs s) in | |
49 | + reqs,noreqs,StringMap.add_inc reqs2 s (StringSet.singleton lemma) (fun set -> StringSet.add set lemma) | |
50 | + | ComprepNPEntry(s,NoRestr,_) -> reqs, StringSet.add noreqs s, reqs2 | |
51 | + | ComprepNPEntry _ -> failwith "create_comprep_reqs" | |
52 | + | _ -> reqs,noreqs,reqs2) in | |
53 | + StringMap.fold reqs StringMap.empty (fun reqs s l -> | |
54 | + if StringSet.mem noreqs s then reqs else StringMap.add reqs s l),reqs2 | |
55 | + | |
56 | +let create_lexarg_reqs entries = | |
57 | + let reqs,noreqs = Entries.fold entries (IntMap.empty,IntSet.empty) (fun (reqs,noreqs) _ _ -> function | |
58 | + LexEntry(id,_,_,NoRestr,[p]) -> Xlist.fold p.morfs (reqs,noreqs) (create_phrase_reqs2 id) | |
59 | + | LexEntry(id,_,_,NoRestr,_) -> reqs, IntSet.add noreqs id | |
60 | + | _ -> reqs,noreqs) in | |
61 | + IntMap.fold reqs IntMap.empty (fun reqs s l -> | |
62 | + if IntSet.mem noreqs s then reqs else IntMap.add reqs s l) | |
63 | + | |
64 | +let comprep_reqs,comprep_reqs2 = create_comprep_reqs ENIAMwalParser.entries | |
65 | +let lexarg_reqs = create_lexarg_reqs ENIAMwalParser.entries | |
66 | + | |
23 | 67 | (* let rec assign_pro_args schema = |
24 | 68 | Xlist.map schema (fun s -> |
25 | 69 | let morfs = match s.morfs with |
... | ... | @@ -51,53 +95,139 @@ open Xstd |
51 | 95 | |
52 | 96 | exception ImpossibleSchema |
53 | 97 | |
54 | -let rec reduce_comp lexemes = function | |
55 | - Comp s -> if StringMap.mem lexemes s then Comp s else raise Not_found | |
56 | - | Zeby -> if StringMap.mem lexemes "żeby" || StringMap.mem lexemes "że" then Zeby else raise Not_found | |
57 | - | Gdy -> if StringMap.mem lexemes "gdy" || StringMap.mem lexemes "gdyby" then Gdy else raise Not_found | |
98 | +let rec reduce_comp test_lexemes = function | |
99 | + Comp s -> if test_lexemes s then Comp s else raise Not_found | |
100 | + | Zeby -> if test_lexemes "żeby" || test_lexemes "że" then Zeby else raise Not_found | |
101 | + | Gdy -> if test_lexemes "gdy" || test_lexemes "gdyby" then Gdy else raise Not_found | |
58 | 102 | | CompUndef -> failwith "reduce_comp" |
59 | 103 | |
60 | -let reduce_phrase comprep_reqs lexemes = function | |
61 | - | PrepNP(prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
62 | - | PrepAdjP(prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
63 | - | ComprepNP(prep) as phrase -> if Xlist.fold (try StringMap.find comprep_reqs prep with Not_found -> []) true (fun b s -> b && StringMap.mem lexemes s) then phrase else raise Not_found | |
64 | - | ComparP(prep) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
65 | - | CP(ctype,comp) -> CP(ctype,reduce_comp lexemes comp) | |
66 | - | NCP(case,ctype,comp) -> if StringMap.mem lexemes "to" then NCP(case,ctype,reduce_comp lexemes comp) else raise Not_found | |
67 | - | PrepNCP(prep,case,ctype,comp) -> if StringMap.mem lexemes prep && StringMap.mem lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp lexemes comp) else raise Not_found | |
68 | - | SimpleLexArg(lemma,_) as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found | |
69 | - | LexArg(_,lemma,_) as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found | |
70 | - | FixedP lemma as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found | |
104 | +let reduce_phrase (test_comprep_reqs,test_comprep_reqs2,test_lexarg_reqs,test_lexemes) = function | |
105 | + | PrepNP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found | |
106 | + | PrepAdjP(prep,case) as phrase -> if test_lexemes prep then phrase else raise Not_found | |
107 | + | ComprepNP(prep) as phrase -> if test_comprep_reqs prep && test_comprep_reqs2 prep then phrase else raise Not_found | |
108 | + | ComparP(prep) as phrase -> if test_lexemes prep then phrase else raise Not_found | |
109 | + | CP(ctype,comp) -> CP(ctype,reduce_comp test_lexemes comp) | |
110 | + | NCP(case,ctype,comp) -> if test_lexemes "to" then NCP(case,ctype,reduce_comp test_lexemes comp) else raise Not_found | |
111 | + | PrepNCP(prep,case,ctype,comp) -> if test_lexemes prep && test_lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp test_lexemes comp) else raise Not_found | |
112 | + | SimpleLexArg(lemma,_) as phrase -> if test_lexemes lemma then phrase else raise Not_found | |
113 | + | LexArg(id,lemma,_) as phrase -> if test_lexemes lemma && test_lexarg_reqs id then phrase else raise Not_found | |
114 | + | FixedP lemma as phrase -> if test_lexemes lemma then phrase else raise Not_found | |
71 | 115 | | phrase -> phrase |
72 | 116 | |
73 | -let rec reduce_morfs comprep_reqs lexemes = function | |
117 | +let rec reduce_morfs tests = function | |
74 | 118 | [] -> [] |
75 | - | morf :: l -> (try [reduce_phrase comprep_reqs lexemes morf] with Not_found -> []) @ reduce_morfs comprep_reqs lexemes l | |
119 | + | morf :: l -> (try [reduce_phrase tests morf] | |
120 | + with Not_found -> []) @ reduce_morfs tests l | |
76 | 121 | |
77 | -let rec reduce_schema2 comprep_reqs lexemes = function | |
122 | +let rec reduce_schema2 tests = function | |
78 | 123 | [] -> [] |
79 | 124 | | s :: l -> |
80 | - let morfs = reduce_morfs comprep_reqs lexemes s.morfs in | |
81 | - if morfs = [] then reduce_schema2 comprep_reqs lexemes l else | |
82 | - {s with morfs=morfs} :: reduce_schema2 comprep_reqs lexemes l | |
125 | + let morfs = reduce_morfs tests s.morfs in | |
126 | + if morfs = [] then reduce_schema2 tests l else | |
127 | + {s with morfs=morfs} :: reduce_schema2 tests l | |
83 | 128 | |
84 | -let rec reduce_schema comprep_reqs lexemes = function | |
129 | +let rec reduce_schema tests = function | |
85 | 130 | [] -> [] |
86 | 131 | | s :: l -> |
87 | - let morfs = reduce_morfs comprep_reqs lexemes s.morfs in | |
132 | + let morfs = reduce_morfs tests s.morfs in | |
88 | 133 | if morfs = [] then raise ImpossibleSchema else |
89 | - {s with morfs=morfs} :: reduce_schema comprep_reqs lexemes l | |
90 | - | |
91 | -(* let reduce_schema_frame lexemes = function | |
92 | - Frame(atrs,schema) -> Frame(atrs,reduce_schema lexemes schema) | |
93 | - (* | ComprepFrame(s,morfs) -> | |
94 | - let morfs = reduce_morfs lexemes morfs in | |
95 | - if morfs = [] then raise ImpossibleSchema else ComprepFrame(s,morfs)*) | |
96 | - | _ -> failwith "reduce_schema_frame" *) | |
97 | - | |
134 | + {s with morfs=morfs} :: reduce_schema tests l | |
98 | 135 | |
99 | 136 | let reduce_entries lexemes entries = |
100 | 137 | StringMap.map entries (fun entries -> |
101 | 138 | StringSet.fold lexemes StringMap.empty (fun reduced lemma -> |
102 | 139 | try StringMap.add reduced lemma (StringMap.find entries lemma) |
103 | 140 | with Not_found -> reduced)) |
141 | + | |
142 | +let merge_schema phrases schema = | |
143 | + Xlist.map schema (fun p -> | |
144 | + let morfs = List.flatten (Xlist.map p.morfs (function | |
145 | + MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "merge_schema") | |
146 | + | _ -> failwith "merge_schema")) in | |
147 | + {p with morfs=morfs}) | |
148 | + | |
149 | +let merge_entries phrases entries = | |
150 | + Entries.map entries (fun _ _ (opinion,neg,pred,aspect,schema) -> | |
151 | + opinion,neg,pred,aspect,merge_schema phrases schema) | |
152 | + | |
153 | +let merge_entries_conn phrases meanings entries = | |
154 | + Entries.map entries (fun _ _ (sopinion,fopinion,meaning_ids,neg,pred,aspect,schema) -> | |
155 | + let meanings = Xlist.map meaning_ids (fun id -> | |
156 | + try IntMap.find meanings id with Not_found -> failwith "merge_entries_conn") in | |
157 | + sopinion,fopinion,meanings,neg,pred,aspect,merge_schema phrases schema) | |
158 | + | |
159 | +let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes = | |
160 | + (fun s -> | |
161 | + if StringMap.mem comprep_reqs s then | |
162 | + not (StringSet.is_empty (StringSet.intersection (StringMap.find comprep_reqs s) lexemes)) | |
163 | + else true), | |
164 | + (fun s -> | |
165 | + if StringMap.mem comprep_reqs s then | |
166 | + not (StringSet.is_empty (StringSet.intersection (StringMap.find comprep_reqs2 s) lexemes)) | |
167 | + else true), | |
168 | + (fun s -> | |
169 | + if IntMap.mem lexarg_reqs s then | |
170 | + not (StringSet.is_empty (StringSet.intersection (IntMap.find lexarg_reqs s) lexemes)) | |
171 | + else true), | |
172 | + StringSet.mem lexemes | |
173 | + | |
174 | + | |
175 | +let select_entries phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes = | |
176 | + let tests = create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes in | |
177 | + let entries = reduce_entries lexemes entries in | |
178 | + let schemata = reduce_entries lexemes schemata in | |
179 | + let connected = reduce_entries lexemes connected in | |
180 | + let schemata = merge_entries phrases schemata in | |
181 | + let entries = Entries.flatten_map entries (fun _ _ entry -> | |
182 | + try (match entry with | |
183 | + | LexEntry(id,lemma,pos,NoRestr,schema) -> [LexEntry(id,lemma,pos,NoRestr,reduce_schema tests schema)] | |
184 | + | ComprepNPEntry(s,NoRestr,schema) -> [ComprepNPEntry(s,NoRestr,reduce_schema tests schema)] | |
185 | + | _ -> [entry]) | |
186 | + with ImpossibleSchema -> []) in | |
187 | + let schemata = Entries.map schemata (fun _ _ (opinion,neg,pred,aspect,schema) -> | |
188 | + opinion,neg,pred,aspect,reduce_schema2 tests schema) in | |
189 | + let connected = merge_entries_conn phrases meanings connected in | |
190 | + entries,schemata,connected | |
191 | + | |
192 | +let select_all_entries phrases entries schemata connected meanings = | |
193 | + let schemata = merge_entries phrases schemata in | |
194 | + let connected = merge_entries_conn phrases meanings connected in | |
195 | + entries,schemata,connected | |
196 | + | |
197 | +let entries,schemata,connected = | |
198 | + (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *) | |
199 | + let lexemes = StringSet.of_list ["dorastać"; "dorobić"; "po"; "bok"; "na"] in | |
200 | + select_entries ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata | |
201 | + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes | |
202 | + | |
203 | +(* TODO | |
204 | + - usunięcie adjunctów | |
205 | + - uwzględnienie cech morfoskładniowych | |
206 | + - scalenie schematów | |
207 | + - dodanie adjunctów | |
208 | +*) | |
209 | + | |
210 | +(* let _ = | |
211 | + StringMap.iter comprep_reqs (fun s set -> | |
212 | + Printf.printf "%s: %s\n" s (String.concat " " (StringSet.to_list set))) *) | |
213 | + | |
214 | +(* let _ = | |
215 | + StringMap.iter comprep_reqs2 (fun s set -> | |
216 | + Printf.printf "%s: %s\n" s (String.concat " " (StringSet.to_list set))) *) | |
217 | + | |
218 | +(* let _ = | |
219 | + IntMap.iter lexarg_reqs (fun s set -> | |
220 | + Printf.printf "%d: %s\n" s (String.concat " " (StringSet.to_list set))) *) | |
221 | + | |
222 | +(* let _ = | |
223 | + Entries.iter entries (fun pos lemma entry -> | |
224 | + Printf.printf "%s\t%s\t%s\n" pos lemma (ENIAMwalStringOf.lex_entry entry)); | |
225 | + Entries.iter schemata (fun pos lemma (_,_,_,_,schema) -> | |
226 | + Printf.printf "%s\t%s\t%s\n" pos lemma (ENIAMwalStringOf.schema schema)); | |
227 | + Xlist.iter (Entries.find ENIAMwalParser.schemata "verb" "dorobić") (fun (_,_,_,_,schema) -> | |
228 | + let schema = merge_schema ENIAMwalParser.phrases schema in | |
229 | + Printf.printf "%s\n" (ENIAMwalStringOf.schema schema)); | |
230 | + Xlist.iter (Entries.find ENIAMwalParser.schemata "verb" "dorastać") (fun (_,_,_,_,schema) -> | |
231 | + let schema = merge_schema ENIAMwalParser.phrases schema in | |
232 | + Printf.printf "%s\n" (ENIAMwalStringOf.schema schema)); | |
233 | + () *) | |
... | ... |
lexSemantics/ENIAMwalStringOf.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMwalenty, a converter for Polish Valence Dictionary "Walenty". | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMwalTypes | |
21 | + | |
22 | +let opinion = function | |
23 | + Pewny -> "cer" | |
24 | + | Potoczny -> "col" | |
25 | + | Watpliwy -> "unc" | |
26 | + | Archaiczny -> "dat" | |
27 | + | Zly -> "bad" | |
28 | + | Wulgarny -> "vul" | |
29 | + | Dziedzinowy -> "dom" | |
30 | + | Nieokreslony -> "unk" | |
31 | + | Metaforyczny -> "met" | |
32 | + | Sporadyczny -> "rar" | |
33 | + (* | OpinionUndef -> failwith "ENIAMwalStringOf.opinion" *) | |
34 | + | |
35 | +let negation = function | |
36 | + Negation -> "neg" | |
37 | + | Aff -> "aff" | |
38 | + | NegationUndef -> "_" | |
39 | + (* | NegationNA -> "" *) | |
40 | + | |
41 | +let pred = function | |
42 | + (* PredNA -> "" *) | |
43 | + | PredTrue -> "pred" | |
44 | + | PredFalse -> "nopred" | |
45 | + | PredUndef -> "_" | |
46 | + | |
47 | +let aspect = function | |
48 | + Aspect s -> s | |
49 | + | AspectUndef -> "_" | |
50 | + (* | AspectNA -> "" *) | |
51 | + | |
52 | +let case = function | |
53 | + Case s -> s | |
54 | + | Str -> "str" | |
55 | + | Part -> "part" | |
56 | + | CaseAgr -> "agr" | |
57 | + (* | CaseUAgr -> "uagr" | |
58 | + | NomAgr -> "nomagr" | |
59 | + | GenAgr -> "genagr" | |
60 | + | AllAgr -> "allagr" | |
61 | + | AllUAgr -> "alluagr" *) | |
62 | + | CaseUndef -> "_" | |
63 | + | |
64 | +let rec comp = function | |
65 | + Comp s -> s | |
66 | + | Zeby -> "żeby2" | |
67 | + | Gdy -> "gdy" | |
68 | + | CompUndef -> "_" | |
69 | + | |
70 | +let rec comp_type = function | |
71 | + Int -> "int" | |
72 | + | Rel -> "rel" | |
73 | + | CompTypeUndef -> "_" | |
74 | + | |
75 | +let number = function | |
76 | + Number s -> s | |
77 | + | NumberAgr -> "agr" | |
78 | + | NumberUndef -> "_" | |
79 | + | |
80 | +let gender = function | |
81 | + Gender s -> s | |
82 | + | GenderUndef -> "_" | |
83 | + | GenderAgr -> "agr" | |
84 | + | Genders l -> String.concat "." l | |
85 | + | |
86 | +let grad = function | |
87 | + Grad s -> s | |
88 | + | GradUndef -> "_" | |
89 | + | |
90 | +let refl = function | |
91 | + (* ReflEmpty -> "" *) | |
92 | + | ReflTrue -> "się" | |
93 | + | ReflFalse -> "nosię" | |
94 | + | ReflUndef -> "_" | |
95 | + | |
96 | +let acm = function | |
97 | + Acm s -> s | |
98 | + | AcmUndef -> "_" | |
99 | + | |
100 | +let gf = function | |
101 | + SUBJ -> "subj" | |
102 | + | OBJ -> "obj" | |
103 | + | ARG -> "arg"(*""*) | |
104 | + | |
105 | +let pos = function | |
106 | + SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")" | |
107 | + | PPRON12(n,c) -> "PPRON12(" ^ number n ^ "," ^ case c ^ ")" | |
108 | + | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")" | |
109 | + | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")" | |
110 | + | PREP(c) -> "PREP(" ^ case c ^ ")" | |
111 | + | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")" | |
112 | + | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")" | |
113 | + | ADV(gr) -> "ADV(" ^ grad gr ^ ")" | |
114 | + | GER(n,c,g,a,neg,r) -> "GER(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" | |
115 | + | PACT(n,c,g,a,neg,r) -> "PACT(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ "," ^ refl r ^ ")" | |
116 | + | PPAS(n,c,g,a,neg) -> "PPAS(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ aspect a ^ "," ^ negation neg ^ ")" | |
117 | + | INF(a,n,r) -> "INF(" ^ aspect a ^ "," ^ negation n ^ "," ^ refl r ^ ")" | |
118 | + | QUB -> "QUB" | |
119 | + | COMPAR -> "COMPAR" | |
120 | + | COMP(c) -> "COMP(" ^ comp_type c ^ ")" | |
121 | + | PERS(n,r) -> "PERS(" ^ negation n ^ "," ^ refl r ^ ")" | |
122 | + | FIXED -> "FIXED" | |
123 | + | |
124 | +let rec phrase = function | |
125 | + NP c -> "np(" ^ case c ^ ")" | |
126 | + | PrepNP(prep,c) -> "prepnp(" ^ prep ^ "," ^ case c ^ ")" | |
127 | + | AdjP c -> "adjp(" ^ case c ^ ")" | |
128 | + | PrepAdjP(prep,c) -> "prepadjp(" ^ prep ^ "," ^ case c ^ ")" | |
129 | + (* | NumP(c) -> "nump(" ^ case c ^ ")" | |
130 | + | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")" *) | |
131 | + | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")" | |
132 | + | ComparP(prep) -> "comparp(" ^ prep ^ ")" | |
133 | + | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")" | |
134 | + | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" | |
135 | + | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" | |
136 | + | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")" | |
137 | + | AdvP -> "advp" | |
138 | + | FixedP s -> "fixed(" ^ s ^ ")" | |
139 | + (* | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" *) | |
140 | + | Or -> "or" | |
141 | + | Qub -> "qub" | |
142 | + (* | Pro -> "pro" | |
143 | + | ProNG -> "prong" *) | |
144 | + | Null -> "null" | |
145 | + (* | GerP c -> "gerp(" ^ case c ^ ")" | |
146 | + | PrepGerP(prep,c) -> "prepgerp(" ^ prep ^ "," ^ case c ^ ")" | |
147 | + | PpasP c -> "ppasp(" ^ case c ^ ")" | |
148 | + | PrepPpasP(prep,c) -> "prepppasp(" ^ prep ^ "," ^ case c ^ ")" | |
149 | + | PactP c -> "pactp(" ^ case c ^ ")" *) | |
150 | + | E p -> "E(" ^ phrase p ^ ")" | |
151 | + | MorfId id -> Printf.sprintf "id(%d)" id | |
152 | + | SimpleLexArg(le,p) -> "lex(" ^ le ^ "," ^ pos p ^ ")" | |
153 | + | LexArg(id,le,p) -> "lex(" ^ string_of_int id ^ "," ^ le ^ "," ^ pos p ^ ")" | |
154 | + | |
155 | + | |
156 | +let restr = function | |
157 | + Natr -> "natr" | |
158 | + | Atr -> "atr" | |
159 | + | Ratr -> "ratr" | |
160 | + | Ratrs -> "ratrs" | |
161 | + | Atr1 -> "atr1" | |
162 | + | Ratr1 -> "ratr1" | |
163 | + | NoRestr -> "" | |
164 | + | |
165 | +let controllers l = | |
166 | + Xlist.map l (function | |
167 | + "1" -> "controller" | |
168 | + | n -> "controller" ^ n) | |
169 | + | |
170 | +let controllees l = | |
171 | + Xlist.map l (function | |
172 | + "1" -> "controllee" | |
173 | + | n -> "controllee" ^ n) | |
174 | + | |
175 | +let rec schema schema = | |
176 | + String.concat "+" (Xlist.map schema (fun s -> | |
177 | + String.concat "," ( | |
178 | + (if s.gf = ARG then [] else [gf s.gf])@s.mode@ | |
179 | + (if s.role = "" then [] else [s.role])@ | |
180 | + (if s.role_attr = "" then [] else [s.role_attr])@ | |
181 | + (*s.sel_prefs@*)(controllers s.cr)@(controllees s.ce)) ^ "{" ^ String.concat ";" (Xlist.map s.morfs phrase) ^ "}")) | |
182 | +(* | |
183 | +and morf = function | |
184 | + Phrase p -> phrase p | |
185 | + | LexPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" | |
186 | + | PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]" | |
187 | + | PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]" | |
188 | + | |
189 | +let simple_morf = function | |
190 | + | MorfId id -> Printf.sprintf "%d" id | |
191 | + | _ -> failwith "ENIAMwalStringOf.simple_morf" | |
192 | + | |
193 | +let rec simple_schema schema = | |
194 | + String.concat "+" (Xlist.map schema (fun s -> | |
195 | + String.concat "," ( | |
196 | + (if s.gf = ARG then [] else [gf s.gf])@ | |
197 | + s.mode@(controllers s.cr)@(controllees s.ce)) ^ | |
198 | + "{" ^ String.concat ";" (Xlist.map s.morfs simple_morf) ^ "}")) | |
199 | + | |
200 | +let sel_prefs = function | |
201 | + SynsetId id -> Printf.sprintf "synset(%d)" id | |
202 | + | Predef id -> id | |
203 | + | RelationArgId _ -> failwith "sel_prefs" | |
204 | + | RelationRole(rel,role,role_attr) -> | |
205 | + if role_attr = "" then Printf.sprintf "relation(%s,%s)" rel role | |
206 | + else Printf.sprintf "relation(%s,%s,%s)" rel role role_attr | |
207 | + | |
208 | + | |
209 | +let sem_frame (s:position) = | |
210 | + s.role ^ | |
211 | + (if s.role_attr="" then "" else "," ^ s.role_attr) ^ | |
212 | + (if s.sel_prefs = [] then "" else | |
213 | + "[" ^ String.concat ";" (Xlist.map s.sel_prefs sel_prefs) ^ "]") | |
214 | + | |
215 | +let rec connected_schema schema = | |
216 | + String.concat "+" (Xlist.map schema (fun s -> | |
217 | + String.concat "," ( | |
218 | + (if s.gf = ARG then [] else [gf s.gf])@ | |
219 | + s.mode@(controllers s.cr)@(controllees s.ce)) ^ | |
220 | + "{" ^ String.concat ";" (Xlist.map s.morfs simple_morf) ^ "}:" ^ sem_frame s)) | |
221 | +*) | |
222 | + | |
223 | +(* let meaning m = | |
224 | + m.name ^ "-" ^ m.variant *) | |
225 | + | |
226 | +let lex_entry = function | |
227 | + SimpleLexEntry(le,p) -> | |
228 | + Printf.sprintf "lex(%s,%s)" le p | |
229 | + | LexEntry(id,le,p,NoRestr,s) -> | |
230 | + Printf.sprintf "lex(%d,%s,%s)\t%s" id le p (schema s) | |
231 | + | LexEntry(id,le,p,r,[]) -> | |
232 | + Printf.sprintf "lex(%d,%s,%s)\t%s" id le p (restr r) | |
233 | + | ComprepNPEntry(le,NoRestr,s) -> | |
234 | + Printf.sprintf "comprepnp(%s)\t%s" le (schema s) | |
235 | + | ComprepNPEntry(le,r,[]) -> | |
236 | + Printf.sprintf "comprepnp(%s)\t%s" le (restr r) | |
237 | + | _ -> failwith "ENIAMwalStringOf.lex_entry" | |
... | ... |
lexSemantics/ENIAMwalTypes.ml
... | ... | @@ -21,23 +21,23 @@ open Xstd |
21 | 21 | |
22 | 22 | type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Nieokreslony |
23 | 23 | | Metaforyczny | Dziedzinowy | Sporadyczny |
24 | -type negation = Negation | Aff | NegationUndef | NegationNA | |
25 | -type pred = PredTrue | PredFalse | PredUndef | PredNA | |
26 | -type aspect = Aspect of string | AspectUndef | AspectNA | |
27 | -type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr | |
24 | +type negation = Negation | Aff | NegationUndef (*| NegationNA*) | |
25 | +type pred = PredTrue | PredFalse | PredUndef (*| PredNA*) | |
26 | +type aspect = Aspect of string | AspectUndef (*| AspectNA*) | |
27 | +type case = Case of string | Str | Part | CaseAgr (*| NomAgr | GenAgr | AllAgr*) | CaseUndef (*| AllUAgr | CaseUAgr*) | |
28 | 28 | type comp = Comp of string | Zeby | Gdy | CompUndef |
29 | 29 | type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*) |
30 | 30 | type number = Number of string | NumberUndef | NumberAgr |
31 | 31 | type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list |
32 | 32 | type grad = Grad of string | GradUndef |
33 | -type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef | |
33 | +type refl = (*ReflEmpty |*) ReflTrue | ReflFalse | ReflUndef | |
34 | 34 | type acm = Acm of string | AcmUndef |
35 | 35 | |
36 | -type mood = (*Mood of*) string (*| MoodUndef*) | |
36 | +(*type mood = (*Mood of*) string (*| MoodUndef*) | |
37 | 37 | type tense = string |
38 | 38 | type aux = NoAux | PastAux | FutAux | ImpAux |
39 | 39 | |
40 | -type nsem = Common of string | Time | |
40 | + type nsem = Common of string | Time*) | |
41 | 41 | |
42 | 42 | type gf = SUBJ | OBJ | ARG |
43 | 43 | |
... | ... | @@ -65,8 +65,8 @@ type phrase = |
65 | 65 | | PrepNP of string * case |
66 | 66 | | AdjP of case |
67 | 67 | | PrepAdjP of string * case |
68 | - | NumP of case | |
69 | - | PrepNumP of string * case | |
68 | + (* | NumP of case | |
69 | + | PrepNumP of string * case *) | |
70 | 70 | | ComprepNP of string |
71 | 71 | | ComparP of string (** case*) |
72 | 72 | | CP of comp_type * comp |
... | ... | @@ -75,19 +75,19 @@ type phrase = |
75 | 75 | | InfP of aspect |
76 | 76 | | AdvP |
77 | 77 | | FixedP of string |
78 | - | Num of case * acm | |
78 | + (* | Num of case * acm *) | |
79 | 79 | | Or |
80 | - | Refl | |
81 | - | Recip | |
80 | + (* | Refl | |
81 | + | Recip *) | |
82 | 82 | | Qub |
83 | - | Pro | |
84 | - | ProNG | |
83 | + (* | Pro | |
84 | + | ProNG *) | |
85 | 85 | | Null |
86 | - | GerP of case | |
86 | + (* | GerP of case | |
87 | 87 | | PrepGerP of string * case |
88 | 88 | | PpasP of case |
89 | 89 | | PrepPpasP of string * case |
90 | - | PactP of case | |
90 | + | PactP of case *) | |
91 | 91 | | SimpleLexArg of string * pos |
92 | 92 | | LexArg of int * string * pos |
93 | 93 | | E of phrase |
... | ... |
lexSemantics/entries.ml
... | ... | @@ -31,6 +31,12 @@ let add_inc_list pos_map pos lemma entries = |
31 | 31 | let lemma_map = StringMap.add_inc lemma_map lemma entries (fun l -> entries @ l) in |
32 | 32 | StringMap.add pos_map pos lemma_map |
33 | 33 | |
34 | +let map pos_map f = | |
35 | + StringMap.mapi pos_map (fun pos lemma_map -> | |
36 | + StringMap.mapi lemma_map (fun lemma entries -> | |
37 | + Xlist.rev_map entries (fun entry -> | |
38 | + f pos lemma entry))) | |
39 | + | |
34 | 40 | let flatten_map pos_map f = |
35 | 41 | StringMap.mapi pos_map (fun pos lemma_map -> |
36 | 42 | StringMap.mapi lemma_map (fun lemma entries -> |
... | ... | @@ -53,3 +59,8 @@ let fold pos_map s f = |
53 | 59 | StringMap.fold lemma_map s (fun s lemma entries -> |
54 | 60 | Xlist.fold entries s (fun s entry -> |
55 | 61 | f s pos lemma entry))) |
62 | + | |
63 | +let find pos_map pos lemma = | |
64 | + try | |
65 | + StringMap.find (StringMap.find pos_map pos) lemma | |
66 | + with Not_found -> [] | |
... | ... |
lexSemantics/makefile
... | ... | @@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | 4 | INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam |
5 | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-lcg-parser.cmxa eniam-lcg-grammar-pl.cmxa #eniam-lexSemantics.cmxa | |
6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-lcg-parser.cmxa #eniam-lcg-grammar-pl.cmxa #eniam-lexSemantics.cmxa | |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | 9 | SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml |
... | ... | @@ -28,8 +28,8 @@ eniam-lexSemantics.cmxa: $(SOURCES) |
28 | 28 | |
29 | 29 | # test: test.ml |
30 | 30 | # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml |
31 | -test: entries.ml ENIAMwalTypes.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
32 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
31 | +test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
32 | + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
33 | 33 | |
34 | 34 | |
35 | 35 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
... | ... |