Commit d794672dd3115aec142d353e08d987ffd833d32c
1 parent
ae919ded
parsowanie przetworzonego Walentego
Showing
15 changed files
with
1139 additions
and
1224 deletions
walenty/.gitignore
walenty/ENIAMwalConnect.ml
... | ... | @@ -18,17 +18,7 @@ |
18 | 18 | *) |
19 | 19 | |
20 | 20 | open Xstd |
21 | -open ENIAMwalTypes | |
22 | - | |
23 | -(* let process_morfs = function | |
24 | - MorfId id -> id | |
25 | - | _ -> failwith "process_morfs" | |
26 | - | |
27 | -let process_positions positions = | |
28 | - Xlist.fold positions IntMap.empty (fun positions position -> | |
29 | - let r,cr,ce = ENIAMwalParser.parse_roles (position.gf :: position.control) in | |
30 | - let phrases = List.rev (Xlist.rev_map position.morfs process_morfs) in | |
31 | - IntMap.add positions position.psn_id (r,cr,ce,phrases)) *) | |
21 | +open ENIAMwalTypes2 | |
32 | 22 | |
33 | 23 | let process_morfs morfs = |
34 | 24 | Xlist.fold morfs IntMap.empty (fun morfs -> function |
... | ... | @@ -41,19 +31,18 @@ let process_positions positions = |
41 | 31 | |
42 | 32 | let process_schemata schemata = |
43 | 33 | Xlist.fold schemata IntMap.empty (fun schemata schema -> |
44 | - let schema_atrs = DefaultAtrs([],schema.reflexiveMark, | |
45 | - schema.opinion, schema.negativity, schema.predicativity, schema.aspect) in | |
34 | + let atrs = schema.negativity, schema.predicativity, schema.aspect in | |
46 | 35 | let positions = process_positions schema.positions in |
47 | - IntMap.add schemata schema.sch_id (schema_atrs,positions)) | |
36 | + IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions)) | |
48 | 37 | |
49 | 38 | let process_arguments arguments = |
50 | 39 | Xlist.fold arguments IntMap.empty (fun arguments argument -> |
51 | 40 | IntMap.add arguments argument.arg_id argument) |
52 | 41 | |
53 | -let process_frames frames = (* FIXME: pomijam opinię *) | |
42 | +let process_frames frames = | |
54 | 43 | Xlist.fold frames IntMap.empty (fun frames frame -> |
55 | 44 | let arguments = process_arguments frame.arguments in |
56 | - IntMap.add frames frame.frm_id (frame.meanings,arguments)) | |
45 | + IntMap.add frames frame.frm_id (frame,arguments)) | |
57 | 46 | |
58 | 47 | let process_meanings meanings = |
59 | 48 | Xlist.fold meanings IntMap.empty (fun meanings meaning -> |
... | ... | @@ -67,18 +56,15 @@ let process_sel_pref arguments = function |
67 | 56 | RelationRole(s,arg.role,arg.role_attribute) |
68 | 57 | | RelationRole _ -> failwith "process_sel_pref" |
69 | 58 | |
70 | -let add_meanings meanings = function | |
71 | - DefaultAtrs(_,r,o,n,p,a) -> DefaultAtrs(meanings,r,o,n,p,a) | |
72 | - | _ -> failwith "add_meanings" | |
73 | - | |
74 | 59 | let connect entry = |
75 | 60 | let schemata = process_schemata entry.schemata in |
76 | 61 | let frames = process_frames entry.frames in |
77 | 62 | let meanings = process_meanings entry.meanings in |
78 | 63 | Xlist.fold entry.alternations [] (fun found alt -> |
79 | - let schema_atrs,positions = IntMap.find schemata alt.schema in | |
80 | - let meaning_ids,arguments = IntMap.find frames alt.frame in | |
81 | - let conn_positions = Xlist.fold alt.connections [] (fun conn_positions conn -> | |
64 | + let refl,opinion,schema_atrs,positions = IntMap.find schemata alt.schema in | |
65 | + let frame,arguments = IntMap.find frames alt.frame in | |
66 | + let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in | |
67 | + let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn -> | |
82 | 68 | let arg = IntMap.find arguments conn.argument in |
83 | 69 | let sel_prefs = Xlist.map arg.sel_prefs (process_sel_pref arguments) in |
84 | 70 | Xlist.fold conn.phrases conn_positions (fun conn_positions (position_id,phrase_ids) -> |
... | ... | @@ -89,191 +75,13 @@ let connect entry = |
89 | 75 | with Not_found -> Printf.printf "%s\n%!" entry.form_orth;morfs) in |
90 | 76 | {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs; |
91 | 77 | morfs=List.rev morfs} :: conn_positions)) in |
92 | - let meanings = List.rev (Xlist.rev_map meaning_ids (fun id -> | |
78 | + let meanings = List.rev (Xlist.rev_map frame.meanings (fun id -> | |
93 | 79 | IntMap.find meanings id)) in |
94 | - let schema_atrs = add_meanings meanings schema_atrs in | |
95 | - (Frame(schema_atrs,conn_positions)) :: found) | |
80 | + (opinion,frame.opinion,meanings,schema_atrs,conn_positions) :: found) | |
96 | 81 | |
97 | 82 | let schemata entry = |
98 | 83 | let schemata = process_schemata entry.schemata in |
99 | - IntMap.fold schemata [] (fun found _ (schema_atrs,positions) -> | |
84 | + IntMap.fold schemata [] (fun found _ (refl,opinion,schema_atrs,positions) -> | |
100 | 85 | let positions = IntMap.fold positions [] (fun positions _ position -> position :: positions) in |
101 | - (Frame(schema_atrs,positions)) :: found) | |
102 | - | |
103 | -(* Test wczytywania *) | |
104 | -(* let _ = | |
105 | - let n = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = connect e in n + Xlist.size l) in | |
106 | - let m = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = schemata e in n + Xlist.size l) in | |
107 | - Printf.printf "%d connected\n%d schemata\n" n m; | |
108 | - () *) | |
109 | - | |
110 | -let connected_walenty = | |
111 | - Xlist.fold ENIAMwalTEI.walenty StringMap.empty (fun pos_map e -> | |
112 | - let orth_map = try StringMap.find pos_map e.form_pos with Not_found -> StringMap.empty in | |
113 | - let frames = connect e in | |
114 | - let orth_map = StringMap.add_inc orth_map e.form_orth frames (fun l -> frames @ l) in | |
115 | - StringMap.add pos_map e.form_pos orth_map) | |
116 | - | |
117 | -let schemata_walenty = | |
118 | - Xlist.fold ENIAMwalTEI.walenty StringMap.empty (fun pos_map e -> | |
119 | - let orth_map = try StringMap.find pos_map e.form_pos with Not_found -> StringMap.empty in | |
120 | - let frames = schemata e in | |
121 | - let orth_map = StringMap.add_inc orth_map e.form_orth frames (fun l -> frames @ l) in | |
122 | - StringMap.add pos_map e.form_pos orth_map) | |
123 | - | |
124 | -let insert_phrases phrases = function | |
125 | - Frame(atrs,s) -> Frame(atrs,Xlist.map s (fun p -> | |
126 | - {p with morfs=Xlist.map p.morfs (function | |
127 | - MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "insert_phrases") | |
128 | - | _ -> failwith "insert_phrases")})) | |
129 | - | _ -> failwith "insert_phrases: ni" | |
130 | - | |
131 | -let print_entry pos_map pos orth = | |
132 | - let orth_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in | |
133 | - let frames = try StringMap.find orth_map orth with Not_found -> [] in | |
134 | - Xlist.iter frames (fun frame -> | |
135 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
136 | - print_endline (ENIAMwalStringOf.frame orth frame)) | |
137 | - | |
138 | -(* Wypisanie hasła *) | |
139 | -(* let _ = | |
140 | - print_entry connected_walenty "verb" "brudzić"; | |
141 | - () *) | |
142 | - | |
143 | -let has_nontrivial_lex = function | |
144 | - Frame(atrs,s) -> Xlist.fold s false (fun b p -> | |
145 | - if p.role = "Lemma" && p.role_attr = "" then b else | |
146 | - Xlist.fold p.morfs b (fun b -> function | |
147 | - MorfId id -> failwith "has_nontrivial_lex" | |
148 | - | LexPhrase _ -> true | |
149 | - (* | LexRPhrase _ -> true | |
150 | - | LexPhraseMode _ -> true *) | |
151 | - | _ -> b)) | |
152 | - | _ -> failwith "has_nontrivial_lex: ni" | |
153 | - | |
154 | -(* Leksykalizacje nie wchodzące do lematu *) | |
155 | -(* let _ = | |
156 | - StringMap.iter connected_walenty (fun _ orth_map -> | |
157 | - StringMap.iter orth_map (fun orth frames -> | |
158 | - Xlist.iter frames (fun frame -> | |
159 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
160 | - if has_nontrivial_lex frame then | |
161 | - print_endline (ENIAMwalStringOf.frame orth frame)))) *) | |
162 | - | |
163 | -let simplify_frame_verb = function | |
164 | - Phrase(NP(Case "dat")) -> [] | |
165 | - | Phrase(NP(Case "inst")) -> [] | |
166 | - | Phrase(PrepNP _) -> [] | |
167 | - | Phrase(ComprepNP _) -> [] | |
168 | - | Phrase(AdvP) -> [] | |
169 | - | t -> [t] | |
170 | - | |
171 | -let simplify_frame_noun = function | |
172 | - Phrase(NP(Case "gen")) -> [] | |
173 | - | Phrase(NP(Case "nom")) -> [] | |
174 | - | Phrase(NP(CaseAgr)) -> [] | |
175 | - | Phrase(PrepNP _) -> [] | |
176 | - | Phrase(ComprepNP _) -> [] | |
177 | - | Phrase(AdjP CaseAgr) -> [] | |
178 | - | PhraseComp(Ncp(Case "gen"),_) | |
179 | - | PhraseComp(Prepncp(_,_),_) -> [] | |
180 | - | PhraseAbbr(Possp,[]) -> [] | |
181 | - | t -> [t] | |
182 | - | |
183 | -let simplify_frame_adj = function | |
184 | - | t -> [t] | |
185 | - | |
186 | -let simplify_frame_adv = function | |
187 | - | t -> [t] | |
188 | - | |
189 | - | |
190 | -let simplify_frame pos = function | |
191 | - Frame(atrs,s) -> | |
192 | - let schema = Xlist.fold s [] (fun schema p -> | |
193 | - let morfs = Xlist.fold p.morfs [] (fun morfs morf -> | |
194 | - match pos with | |
195 | - "verb" -> simplify_frame_verb morf @ morfs | |
196 | - | "noun" -> simplify_frame_noun morf @ morfs | |
197 | - | "adj" -> simplify_frame_adj morf @ morfs | |
198 | - | "adv" -> simplify_frame_adv morf @ morfs | |
199 | - | _ -> failwith "simplify_frame") in | |
200 | - if morfs = [] then schema else | |
201 | - {p with ce=[]; cr=[]; morfs=morfs} :: schema) in | |
202 | - if schema = [] then [] else [Frame(atrs,schema)] | |
203 | - | _ -> failwith "simplify_frame: ni" | |
204 | - | |
205 | - | |
206 | -(* Uproszczone schematy *) | |
207 | -(* let _ = | |
208 | - StringMap.iter schemata_walenty (fun pos orth_map -> | |
209 | - if pos = "noun" then | |
210 | - StringMap.iter orth_map (fun orth frames -> | |
211 | - Xlist.iter frames (fun frame -> | |
212 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
213 | - let frames = simplify_frame pos frame in | |
214 | - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame orth frame))))) *) | |
215 | - | |
216 | -let has_mode_coordination = function | |
217 | - Frame(atrs,s) -> Xlist.fold s false (fun b p -> | |
218 | - let n = Xlist.fold p.morfs 0 (fun n -> function | |
219 | - MorfId id -> failwith "has_nontrivial_lex" | |
220 | - | PhraseAbbr(Advp _,_) -> n+1 | |
221 | - | PhraseAbbr(Xp _,_) -> n+1 | |
222 | - (* | LexPhraseMode _ -> n+1 FIXME*) | |
223 | - | _ -> n) in | |
224 | - if n>1 then true else b) | |
225 | - | _ -> failwith "has_nontrivial_lex: ni" | |
226 | - | |
227 | -(* Koordynacja z mode *) | |
228 | -(* let _ = | |
229 | - StringMap.iter schemata_walenty(*connected_walenty*) (fun _ orth_map -> | |
230 | - StringMap.iter orth_map (fun orth frames -> | |
231 | - Xlist.iter frames (fun frame -> | |
232 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
233 | - if has_mode_coordination frame then | |
234 | - print_endline (ENIAMwalStringOf.frame orth frame)))) *) | |
235 | - | |
236 | - | |
237 | -(* let get_entry orth pos *) | |
238 | - (* | |
239 | -let load_walenty2 () = | |
240 | - let walenty = load_walenty walenty_filename in | |
241 | - Xlist.fold walenty StringMap.empty (fun walenty entry -> | |
242 | - if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) -> | |
243 | - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in | |
244 | - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in | |
245 | - StringMap.add walenty pos map) | |
246 | - else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) -> | |
247 | - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in | |
248 | - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in | |
249 | - StringMap.add walenty pos map)) | |
250 | - | |
251 | - | |
252 | -let print_stringqmap filename qmap = | |
253 | - let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in | |
254 | - File.file_out filename (fun file -> | |
255 | - Xlist.iter (Xlist.sort l compare) (fun (v,k) -> | |
256 | - Printf.fprintf file "%5d %s\n" v k)) | |
257 | - | |
258 | -let sel_prefs_quantities walenty = | |
259 | - Xlist.fold walenty StringQMap.empty (fun quant e -> | |
260 | - Xlist.fold e.frames quant (fun quant f -> | |
261 | - Xlist.fold f.arguments quant (fun quant a -> | |
262 | - Xlist.fold a.sel_prefs quant (fun quant l -> | |
263 | - Xlist.fold l quant (fun quant -> function | |
264 | - Numeric s -> | |
265 | - let name = try ENIAMplWordnet.synset_name s with Not_found -> "unknown" in | |
266 | - StringQMap.add quant ("N " ^ s ^ " " ^ name) | |
267 | - | Symbol s -> StringQMap.add quant ("S " ^ s) | |
268 | - | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t)))))) | |
269 | -*) | |
270 | -(*let _ = | |
271 | - let walenty = load_walenty walenty_filename in | |
272 | - let quant = sel_prefs_quantities walenty in | |
273 | - print_stringqmap "results/quant_sel_prefs.txt" quant*) | |
274 | - | |
275 | -(*let _ = | |
276 | - let walenty = load_walenty2 () in | |
277 | - let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in | |
278 | - Xlist.iter frames_sem (fun frame -> | |
279 | - print_endline (WalStringOf.frame "bębnić" frame))*) | |
86 | + let positions = if refl then ENIAMwalTEI.refl_position :: positions else positions in | |
87 | + (opinion,schema_atrs,positions) :: found) | |
... | ... |
walenty/ENIAMwalFrames.ml
... | ... | @@ -217,35 +217,6 @@ and assign_role_and_sense_morfs morfs = |
217 | 217 | | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))] |
218 | 218 | | _ -> failwith "assign_role_and_sense_morfs")) |
219 | 219 | |
220 | -let rec assign_pro_args schema = | |
221 | - Xlist.map schema (fun s -> | |
222 | - let morfs = match s.morfs with | |
223 | - (E p) :: l -> E Pro :: (E p) :: l | |
224 | - | [LexPhrase _] as morfs -> morfs | |
225 | - | [Phrase(FixedP _)] as morfs -> morfs | |
226 | - | [Phrase(Lex _)] as morfs -> morfs | |
227 | -(* | [Phrase Refl] as morfs -> morfs | |
228 | - | [Phrase Recip] as morfs -> morfs*) | |
229 | - | Phrase Null :: _ as morfs -> morfs | |
230 | - | Phrase Pro :: _ as morfs -> morfs | |
231 | - | morfs -> if s.gf <> SUBJ && s.cr = [] && s.ce = [] then (Phrase Null) :: morfs else (Phrase Pro) :: morfs in (* FIXME: ustalić czy są inne przypadki uzgodnienia *) | |
232 | -(* let morfs = assign_pro_args_lex morfs in *) (* bez pro wewnątrz leksykalizacji *) | |
233 | - {s with morfs=morfs}) | |
234 | - | |
235 | -(*let assign_pro_args_lex morfs = | |
236 | - Xlist.map morfs (function | |
237 | - Lex(morf,specs,lex,restr) -> LexN(morf,specs,lex,assign_pro_args_restr restr) | |
238 | - | LexNum(morf,lex1,lex2,restr) -> LexNum(morf,lex1,lex2,assign_pro_args_restr restr) | |
239 | - | LexCompar(morf,l) -> LexCompar(morf,make_gfs_lex l) | |
240 | - | morf -> morf) | |
241 | - | |
242 | -and assign_pro_args_restr = function | |
243 | - Natr -> Natr | |
244 | - | Ratr1 schema -> Ratr1(assign_pro_args schema) | |
245 | - | Atr1 schema -> Atr1(assign_pro_args schema) | |
246 | - | Ratr schema -> Ratr(assign_pro_args schema) | |
247 | - | Atr schema -> Atr(assign_pro_args schema)*) | |
248 | - | |
249 | 220 | (*let _ = |
250 | 221 | Xlist.iter walenty_filenames (fun filename -> |
251 | 222 | print_endline filename; |
... | ... | @@ -258,92 +229,6 @@ and assign_pro_args_restr = function |
258 | 229 | ignore (process_aspect [Text aspect]); |
259 | 230 | ignore (assign_pro_args (make_gfs (process_schema expands subtypes equivs schema))))))*) |
260 | 231 | |
261 | -exception ImpossibleSchema | |
262 | - | |
263 | -let rec reduce_comp lexemes = function | |
264 | - Comp s -> if StringMap.mem lexemes s then Comp s else raise Not_found | |
265 | - | Zeby -> if StringMap.mem lexemes "żeby" || StringMap.mem lexemes "że" then Zeby else raise Not_found | |
266 | - | Gdy -> if StringMap.mem lexemes "gdy" || StringMap.mem lexemes "gdyby" then Gdy else raise Not_found | |
267 | - | CompUndef -> failwith "reduce_comp" | |
268 | - | |
269 | -let reduce_phrase lexemes = function | |
270 | - | PrepNP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
271 | - | PrepAdjP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
272 | - | PrepNumP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
273 | - | ComprepNP(_,prep) as phrase -> if Xlist.fold (try StringMap.find comprep_reqs prep with Not_found -> []) true (fun b s -> b && StringMap.mem lexemes s) then phrase else raise Not_found | |
274 | - | ComparNP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
275 | - | ComparPP(_,prep) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
276 | - | CP(ctype,comp) -> CP(ctype,reduce_comp lexemes comp) | |
277 | - | NCP(case,ctype,comp) -> if StringMap.mem lexemes "to" then NCP(case,ctype,reduce_comp lexemes comp) else raise Not_found | |
278 | - | PrepNCP(sem,prep,case,ctype,comp) -> if StringMap.mem lexemes prep && StringMap.mem lexemes "to" then PrepNCP(sem,prep,case,ctype,reduce_comp lexemes comp) else raise Not_found | |
279 | - | phrase -> phrase | |
280 | - | |
281 | -let rec reduce_lex lexemes = function | |
282 | - Lexeme s -> if StringMap.mem lexemes s then Lexeme s else raise Not_found | |
283 | - | ORconcat l -> | |
284 | - let l = List.rev (Xlist.fold l [] (fun l lex -> try reduce_lex lexemes lex :: l with Not_found -> l)) in | |
285 | - (match l with | |
286 | - [] -> raise Not_found | |
287 | - | [x] -> x | |
288 | - | l -> ORconcat l) | |
289 | - | ORcoord l -> | |
290 | - let l = List.rev (Xlist.fold l [] (fun l lex -> try reduce_lex lexemes lex :: l with Not_found -> l)) in | |
291 | - (match l with | |
292 | - [] -> raise Not_found | |
293 | - | [x] -> x | |
294 | - | l -> ORcoord l) | |
295 | - | XOR l -> | |
296 | - let l = List.rev (Xlist.fold l [] (fun l lex -> try reduce_lex lexemes lex :: l with Not_found -> l)) in | |
297 | - (match l with | |
298 | - [] -> raise Not_found | |
299 | - | [x] -> x | |
300 | - | l -> XOR l) | |
301 | - | Elexeme gender -> Elexeme gender | |
302 | - | |
303 | -let rec reduce_restr lexemes = function (* leksykalizacje wewnątrz leksykalizacji są w niektórych sytuacjach opcjonalne *) | |
304 | - Natr,[] -> Natr,[] | |
305 | - | Atr,[] -> Atr,[] | |
306 | - | Ratr,[] -> Ratr,[] | |
307 | - | Atr1,[] -> Atr1,[] | |
308 | - | Ratr1,[] -> Ratr1,[] | |
309 | - | Ratr1,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then raise Not_found else Ratr1,schema | |
310 | - | Atr1,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then Natr,[] else Atr1,schema | |
311 | - | Ratr,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then raise Not_found else Ratr,schema | |
312 | - | Atr,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then Natr,[] else Atr,schema | |
313 | - | Ratrs,schema -> Ratrs,reduce_schema lexemes schema | |
314 | - | _ -> failwith "reduce_restr" | |
315 | - | |
316 | -and reduce_morf lexemes = function (* leksykalizacje, które się z czymś koordynują nie są obowiązakowe *) | |
317 | - Phrase phrase -> Phrase(reduce_phrase lexemes phrase) | |
318 | - | E phrases -> E phrases (* FIXME: uproszczenie *) | |
319 | - | LexPhrase(pos_lex,restr) -> LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, reduce_lex lexemes lex),reduce_restr lexemes restr) | |
320 | - | morf -> failwith ("reduce_morf: " ^ ENIAMwalStringOf.morf morf) | |
321 | - | |
322 | -and reduce_morfs lexemes = function | |
323 | - [] -> [] | |
324 | - | morf :: l -> (try [reduce_morf lexemes morf] with Not_found -> []) @ reduce_morfs lexemes l | |
325 | - | |
326 | -and reduce_schema2 lexemes = function | |
327 | - [] -> [] | |
328 | - | s :: l -> | |
329 | - let morfs = reduce_morfs lexemes s.morfs in | |
330 | - if morfs = [] then reduce_schema2 lexemes l else | |
331 | - {s with morfs=morfs} :: reduce_schema2 lexemes l | |
332 | - | |
333 | -and reduce_schema lexemes = function | |
334 | - [] -> [] | |
335 | - | s :: l -> | |
336 | - let morfs = reduce_morfs lexemes s.morfs in | |
337 | - if morfs = [] then raise ImpossibleSchema else | |
338 | - {s with morfs=morfs} :: reduce_schema lexemes l | |
339 | - | |
340 | -let reduce_schema_frame lexemes = function | |
341 | - Frame(atrs,schema) -> Frame(atrs,reduce_schema lexemes schema) | |
342 | -(* | ComprepFrame(s,morfs) -> | |
343 | - let morfs = reduce_morfs lexemes morfs in | |
344 | - if morfs = [] then raise ImpossibleSchema else ComprepFrame(s,morfs)*) | |
345 | - | _ -> failwith "reduce_schema_frame" | |
346 | - | |
347 | 232 | let remove_trivial_args schema = |
348 | 233 | Xlist.fold schema [] (fun l (_,_,_,morfs) -> |
349 | 234 | let morfs = Xlist.fold morfs [] (fun morfs -> function |
... | ... |
walenty/ENIAMwalGenerate.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMwalTypes2 | |
21 | +open Xstd | |
22 | + | |
23 | +let load_walenty walenty_filename expands_filename = | |
24 | + let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in | |
25 | + let expands = ENIAMwalTEI.load_expands expands_filename in | |
26 | + let connected_walenty = | |
27 | + Xlist.fold walenty Entries.empty (fun connected_walenty e -> | |
28 | + let entries = ENIAMwalConnect.connect e in | |
29 | + Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in | |
30 | + let schemata_walenty = | |
31 | + Xlist.fold walenty Entries.empty (fun schemata_walenty e -> | |
32 | + let entries = ENIAMwalConnect.schemata e in | |
33 | + Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in | |
34 | + let expands,compreps,subtypes,equivs = | |
35 | + ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in | |
36 | + let phrases = | |
37 | + IntMap.map phrases (fun morf -> | |
38 | + let morf = ENIAMwalRealizations.expand_schema_morf expands morf in | |
39 | + let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in | |
40 | + let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in | |
41 | + morf) in | |
42 | + let compreps = Xlist.map compreps (fun (lemma,morfs) -> | |
43 | + lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in | |
44 | + let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in | |
45 | + let phrases,entries = | |
46 | + IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> | |
47 | + let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in | |
48 | + let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in | |
49 | + IntMap.add phrases id morfs, entries) in | |
50 | + let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) -> | |
51 | + Entries.add_inc entries pos lemma entry) in | |
52 | + let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in | |
53 | + let entries = Entries.flatten_map entries (fun pos lemma entry -> | |
54 | + ENIAMwalLex.expand_restr [] lemma pos entry) in | |
55 | + (* let entries = | |
56 | + StringMap.mapi entries (fun pos entries2 -> | |
57 | + StringMap.mapi entries2 (fun lemma entries3 -> | |
58 | + EntrySet.fold entries3 [] (fun entries3 entry -> | |
59 | + (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *) | |
60 | + connected_walenty, schemata_walenty, phrases, entries | |
61 | + | |
62 | +let print_entries filename entries = | |
63 | + File.file_out filename (fun file -> | |
64 | + Entries.iter entries (fun pos lemma entry -> | |
65 | + Printf.fprintf file "%s\t%s\t%s\n" pos lemma (ENIAMwalStringOf.lex_entry entry))) | |
66 | + | |
67 | +let print_phrases filename phrases = | |
68 | + File.file_out filename (fun file -> | |
69 | + IntMap.iter phrases (fun id morfs -> | |
70 | + let morfs = Xlist.map morfs ENIAMwalStringOf.morf in | |
71 | + Printf.fprintf file "%d\t%s\n" id (String.concat "\t" morfs))) | |
72 | + | |
73 | +let print_schemata filename schemata = | |
74 | + File.file_out filename (fun file -> | |
75 | + Entries.iter schemata (fun pos lemma (opinion,(n,p,a),schema) -> | |
76 | + Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma | |
77 | + (ENIAMwalStringOf.opinion opinion) | |
78 | + (ENIAMwalStringOf.negation n) | |
79 | + (ENIAMwalStringOf.pred p) | |
80 | + (ENIAMwalStringOf.aspect a) | |
81 | + (ENIAMwalStringOf.simple_schema schema))) | |
82 | + | |
83 | +let split_tokens s = | |
84 | + let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function | |
85 | + Str.Delim " " -> [] | |
86 | + | Str.Delim s -> [s] | |
87 | + | Str.Text s -> [s])) in | |
88 | + String.concat " " l | |
89 | + | |
90 | +let print_fixed filename fixed = | |
91 | + File.file_out filename (fun file -> | |
92 | + StringSet.iter fixed (fun s -> | |
93 | + let t = split_tokens s in | |
94 | + Printf.fprintf file "%s\t%s\tfixed\n" t s)) | |
95 | + | |
96 | + | |
97 | +let add_fixed fixed = function | |
98 | + Phrase (FixedP s) -> StringSet.add fixed s | |
99 | + | SimpleLexArg(s,FIXED) -> StringSet.add fixed s | |
100 | + | LexArg(_,s,FIXED) -> StringSet.add fixed s | |
101 | + | _ -> fixed | |
102 | + | |
103 | +let find_fixed_schema fixed schema = | |
104 | + Xlist.fold schema fixed (fun schema p -> | |
105 | + Xlist.fold p.morfs fixed add_fixed) | |
106 | + | |
107 | +let find_fixed phrases entries = | |
108 | + let fixed = IntMap.fold phrases StringSet.empty (fun fixed _ morfs -> | |
109 | + Xlist.fold morfs fixed add_fixed) in | |
110 | + Entries.fold entries fixed (fun fixed pos lemma -> function | |
111 | + SimpleLexEntry(s,"fixed") -> StringSet.add fixed s | |
112 | + | SimpleLexEntry(s,_) -> fixed | |
113 | + | LexEntry(_,s,"fixed",_,schema) -> find_fixed_schema (StringSet.add fixed s) schema | |
114 | + | LexEntry(_,_,_,_,schema) -> find_fixed_schema fixed schema | |
115 | + | ComprepNPEntry(_,_,schema) -> find_fixed_schema fixed schema) | |
116 | + | |
117 | +let connected_walenty, schemata_walenty, phrases, entries = load_walenty | |
118 | + "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" | |
119 | + "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" | |
120 | + | |
121 | +(* FIXME: uporządkowanie "winien" *) | |
122 | +(* Generowanie zasobów *) | |
123 | +let _ = | |
124 | + print_entries "results/entries.tab" entries; | |
125 | + print_phrases "results/phrases.tab" phrases; | |
126 | + print_schemata "results/schemata.tab" schemata_walenty; | |
127 | + (* print_connected "results/connected.dic" connected_walenty); *) | |
128 | + print_fixed "results/fixed.tab" (find_fixed phrases entries); | |
129 | + () | |
130 | + | |
131 | +(* Test wczytywania Walentego TEI *) | |
132 | +(* let _ = | |
133 | + let walenty,phrases = ENIAMwalTEI.load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" | |
134 | + let n = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = connect e in n + Xlist.size l) in | |
135 | + let m = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = schemata e in n + Xlist.size l) in | |
136 | + Printf.printf "%d connected\n%d schemata\n|phrases|=%d\n" n m (IntMap.size phrases); | |
137 | + () *) | |
138 | + | |
139 | +(* let insert_phrases phrases = function | |
140 | + Frame(atrs,s) -> Frame(atrs,Xlist.map s (fun p -> | |
141 | + {p with morfs=Xlist.map p.morfs (function | |
142 | + MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "insert_phrases") | |
143 | + | _ -> failwith "insert_phrases")})) | |
144 | + | _ -> failwith "insert_phrases: ni" | |
145 | + | |
146 | +let print_entry pos_map pos orth = | |
147 | + let orth_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in | |
148 | + let frames = try StringMap.find orth_map orth with Not_found -> [] in | |
149 | + Xlist.iter frames (fun frame -> | |
150 | + let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
151 | + print_endline (ENIAMwalStringOf.frame orth frame)) *) | |
152 | + | |
153 | +(* Wypisanie hasła *) | |
154 | +(* let _ = | |
155 | + print_entry connected_walenty "verb" "brudzić"; | |
156 | + () *) | |
157 | + | |
158 | +(* let has_nontrivial_lex = function | |
159 | + Frame(atrs,s) -> Xlist.fold s false (fun b p -> | |
160 | + if p.role = "Lemma" && p.role_attr = "" then b else | |
161 | + Xlist.fold p.morfs b (fun b -> function | |
162 | + MorfId id -> failwith "has_nontrivial_lex" | |
163 | + | LexPhrase _ -> true | |
164 | + (* | LexRPhrase _ -> true | |
165 | + | LexPhraseMode _ -> true *) | |
166 | + | _ -> b)) | |
167 | + | _ -> failwith "has_nontrivial_lex: ni" *) | |
168 | + | |
169 | +(* Leksykalizacje nie wchodzące do lematu *) | |
170 | +(* let _ = | |
171 | + StringMap.iter connected_walenty (fun _ orth_map -> | |
172 | + StringMap.iter orth_map (fun orth frames -> | |
173 | + Xlist.iter frames (fun frame -> | |
174 | + let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
175 | + if has_nontrivial_lex frame then | |
176 | + print_endline (ENIAMwalStringOf.frame orth frame)))) *) | |
177 | + | |
178 | +let simplify_frame_verb = function | |
179 | + Phrase(NP(Case "dat")) -> [] | |
180 | + | Phrase(NP(Case "inst")) -> [] | |
181 | + | Phrase(PrepNP _) -> [] | |
182 | + | Phrase(ComprepNP _) -> [] | |
183 | + | Phrase(AdvP) -> [] | |
184 | + | t -> [t] | |
185 | + | |
186 | +let simplify_frame_noun = function | |
187 | + Phrase(NP(Case "gen")) -> [] | |
188 | + | Phrase(NP(Case "nom")) -> [] | |
189 | + | Phrase(NP(CaseAgr)) -> [] | |
190 | + | Phrase(PrepNP _) -> [] | |
191 | + | Phrase(ComprepNP _) -> [] | |
192 | + | Phrase(AdjP CaseAgr) -> [] | |
193 | + | PhraseComp(Ncp(Case "gen"),_) | |
194 | + | PhraseComp(Prepncp(_,_),_) -> [] | |
195 | + | PhraseAbbr(Possp,[]) -> [] | |
196 | + | t -> [t] | |
197 | + | |
198 | +let simplify_frame_adj = function | |
199 | + | t -> [t] | |
200 | + | |
201 | +let simplify_frame_adv = function | |
202 | + | t -> [t] | |
203 | + | |
204 | + | |
205 | +(* let simplify_frame pos = function | |
206 | + Frame(atrs,s) -> | |
207 | + let schema = Xlist.fold s [] (fun schema p -> | |
208 | + let morfs = Xlist.fold p.morfs [] (fun morfs morf -> | |
209 | + match pos with | |
210 | + "verb" -> simplify_frame_verb morf @ morfs | |
211 | + | "noun" -> simplify_frame_noun morf @ morfs | |
212 | + | "adj" -> simplify_frame_adj morf @ morfs | |
213 | + | "adv" -> simplify_frame_adv morf @ morfs | |
214 | + | _ -> failwith "simplify_frame") in | |
215 | + if morfs = [] then schema else | |
216 | + {p with ce=[]; cr=[]; morfs=morfs} :: schema) in | |
217 | + if schema = [] then [] else [Frame(atrs,schema)] | |
218 | + | _ -> failwith "simplify_frame: ni" *) | |
219 | + | |
220 | + | |
221 | +(* Uproszczone schematy *) | |
222 | +(* let _ = | |
223 | + StringMap.iter schemata_walenty (fun pos orth_map -> | |
224 | + if pos = "noun" then | |
225 | + StringMap.iter orth_map (fun orth frames -> | |
226 | + Xlist.iter frames (fun frame -> | |
227 | + let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
228 | + let frames = simplify_frame pos frame in | |
229 | + Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame orth frame))))) *) | |
230 | + | |
231 | +(* let has_mode_coordination = function | |
232 | + Frame(atrs,s) -> Xlist.fold s false (fun b p -> | |
233 | + let n = Xlist.fold p.morfs 0 (fun n -> function | |
234 | + MorfId id -> failwith "has_nontrivial_lex" | |
235 | + | PhraseAbbr(Advp _,_) -> n+1 | |
236 | + | PhraseAbbr(Xp _,_) -> n+1 | |
237 | + (* | LexPhraseMode _ -> n+1 FIXME*) | |
238 | + | _ -> n) in | |
239 | + if n>1 then true else b) | |
240 | + | _ -> failwith "has_nontrivial_lex: ni" *) | |
241 | + | |
242 | +(* Koordynacja z mode *) | |
243 | +(* let _ = | |
244 | + StringMap.iter schemata_walenty(*connected_walenty*) (fun _ orth_map -> | |
245 | + StringMap.iter orth_map (fun orth frames -> | |
246 | + Xlist.iter frames (fun frame -> | |
247 | + let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
248 | + if has_mode_coordination frame then | |
249 | + print_endline (ENIAMwalStringOf.frame orth frame)))) *) | |
250 | + | |
251 | + | |
252 | +(* let get_entry orth pos *) | |
253 | + (* | |
254 | +let load_walenty2 () = | |
255 | + let walenty = load_walenty walenty_filename in | |
256 | + Xlist.fold walenty StringMap.empty (fun walenty entry -> | |
257 | + if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) -> | |
258 | + let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in | |
259 | + let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in | |
260 | + StringMap.add walenty pos map) | |
261 | + else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) -> | |
262 | + let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in | |
263 | + let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in | |
264 | + StringMap.add walenty pos map)) | |
265 | + | |
266 | + | |
267 | +let print_stringqmap filename qmap = | |
268 | + let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in | |
269 | + File.file_out filename (fun file -> | |
270 | + Xlist.iter (Xlist.sort l compare) (fun (v,k) -> | |
271 | + Printf.fprintf file "%5d %s\n" v k)) | |
272 | + | |
273 | +let sel_prefs_quantities walenty = | |
274 | + Xlist.fold walenty StringQMap.empty (fun quant e -> | |
275 | + Xlist.fold e.frames quant (fun quant f -> | |
276 | + Xlist.fold f.arguments quant (fun quant a -> | |
277 | + Xlist.fold a.sel_prefs quant (fun quant l -> | |
278 | + Xlist.fold l quant (fun quant -> function | |
279 | + Numeric s -> | |
280 | + let name = try ENIAMplWordnet.synset_name s with Not_found -> "unknown" in | |
281 | + StringQMap.add quant ("N " ^ s ^ " " ^ name) | |
282 | + | Symbol s -> StringQMap.add quant ("S " ^ s) | |
283 | + | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t)))))) | |
284 | +*) | |
285 | +(*let _ = | |
286 | + let walenty = load_walenty walenty_filename in | |
287 | + let quant = sel_prefs_quantities walenty in | |
288 | + print_stringqmap "results/quant_sel_prefs.txt" quant*) | |
289 | + | |
290 | +(*let _ = | |
291 | + let walenty = load_walenty2 () in | |
292 | + let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in | |
293 | + Xlist.iter frames_sem (fun frame -> | |
294 | + print_endline (WalStringOf.frame "bębnić" frame))*) | |
295 | + | |
296 | + | |
297 | +(* Wypisanie realizacji *) | |
298 | +(* let _ = | |
299 | + Xlist.iter ENIAMwalTEI.expands (fun (id,morf,l) -> | |
300 | + Printf.printf "%d %s:\n" id (ENIAMwalStringOf.morf morf); | |
301 | + Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *) | |
302 | + | |
303 | +(* Wypisanie realizacji po przetworzeniu *) | |
304 | +(* let _ = | |
305 | + AbbrMap.iter expands (fun morf l -> | |
306 | + Printf.printf "%s:\n" (ENIAMwalStringOf.phrase_abbr morf); | |
307 | + Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *) | |
308 | + | |
309 | +let has_realization = function | |
310 | + PhraseAbbr _ -> true | |
311 | + | PhraseComp _ -> true | |
312 | + | _ -> false | |
313 | + | |
314 | +(* Wypisanie fraz, które podlegają rozwijaniu *) | |
315 | +(*let _ = | |
316 | + IntMap.iter ENIAMwalTEI.phrases (fun i morf -> | |
317 | + if has_realization morf then | |
318 | + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *) | |
319 | + | |
320 | +(* Wypisanie fraz, które podlegają rozwijaniu *) | |
321 | +(* let _ = | |
322 | + IntMap.iter phrases (fun i morf -> | |
323 | + if has_realization morf then | |
324 | + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *) | |
325 | + | |
326 | +(* let test_phrases = [17088; 17133; 1642] | |
327 | + let _ = | |
328 | + Xlist.iter test_phrases (fun i -> | |
329 | + let m1 = IntMap.find ENIAMwalTEI.phrases i in | |
330 | + let m2 = IntMap.find phrases i in | |
331 | + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m1); | |
332 | + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m2)) *) | |
333 | + | |
334 | +(* let print_entries entries = | |
335 | + StringMap.iter entries (fun pos entries2 -> | |
336 | + StringMap.iter entries2 (fun lemma entries3 -> | |
337 | + EntrySet.iter entries3 (fun entry -> | |
338 | + Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry)))) *) | |
339 | + | |
340 | +(* let _ = print_entries entries *) | |
... | ... |
walenty/ENIAMwalLex.ml
... | ... | @@ -17,7 +17,7 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open ENIAMwalTypes | |
20 | +open ENIAMwalTypes2 | |
21 | 21 | open Xstd |
22 | 22 | |
23 | 23 | let prep_arg_schema_field morfs = |
... | ... | @@ -63,7 +63,8 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen |
63 | 63 | (* | LexPhrase([PREP _,_;ADJ _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] |
64 | 64 | | LexPhrase([PREP _,_;PPAS _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] |
65 | 65 | | LexPhrase([PREP _,_;PACT _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *) |
66 | - | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[prep_arg_schema_field(*2*) [Phrase(NumP(case))]]))] | |
66 | + (* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[prep_arg_schema_field(*2*) [Phrase(NumP(case))]]))] *) | |
67 | + | Phrase(PrepNumP(prep,case)) -> [Phrase(PrepNP(prep,case))] (* FIXME: celowe uproszczenie *) | |
67 | 68 | | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([QUB,_],_)]} as s])) -> |
68 | 69 | (* print_endline (ENIAMwalStringOf.morf morf); *) |
69 | 70 | [LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))]])); |
... | ... | @@ -80,13 +81,13 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen |
80 | 81 | | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) -> |
81 | 82 | let genders,lexs = split_elexeme lex in |
82 | 83 | Xlist.map genders (fun gender -> |
83 | - LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Pro]]))]]))) @ | |
84 | + LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*) | |
84 | 85 | Xlist.map lexs (fun lex -> |
85 | 86 | LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [LexPhrase([pos,lex],restr)]]))]]))) |
86 | 87 | | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) -> |
87 | 88 | let genders,lexs = split_elexeme lex in |
88 | 89 | Xlist.map genders (fun gender -> |
89 | - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Pro]]))) @ | |
90 | + LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Null(*Pro*)]]))) @ | |
90 | 91 | Xlist.map lexs (fun lex -> |
91 | 92 | LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [LexPhrase([pos,lex],restr)]]))) |
92 | 93 | | LexPhrase([COMP ctype,clex;pos,lex],restr) -> |
... | ... | @@ -157,6 +158,20 @@ let get_pos lex = function |
157 | 158 | | COMPAR -> ["compar"] |
158 | 159 | | COMP _ -> ["comp"] |
159 | 160 | | FIXED -> ["fixed"] |
161 | + | _ -> failwith "get_pos" | |
162 | + | |
163 | +let map_pos lemma = function | |
164 | + SUBST(number,case) -> | |
165 | + (match lemma with | |
166 | + "ja" -> PPRON12(number,case) | |
167 | + | "my" -> PPRON12(number,case) | |
168 | + | "ty" -> PPRON12(number,case) | |
169 | + | "wy" -> PPRON12(number,case) | |
170 | + | "on" -> PPRON3(number,case) | |
171 | + | "siebie" -> SIEBIE case | |
172 | + | "się" -> QUB | |
173 | + | _ -> SUBST(number,case)) | |
174 | + | p -> p | |
160 | 175 | |
161 | 176 | let lex_id_counter = ref 0 |
162 | 177 | |
... | ... | @@ -166,16 +181,20 @@ let get_lex_id () = |
166 | 181 | |
167 | 182 | (* FIXME: to trzeba będzie poprawić przy unlike coordination *) |
168 | 183 | (* FIXME: słownik pos wywołuje redundancję *) |
184 | +(* FIXME: parametr refl z typu pos można przenieść do schematu *) | |
169 | 185 | let rec extract_lex_entries (morfs,entries) = function |
170 | 186 | LexPhrase([pos,lex],(Natr,[])) -> |
171 | 187 | let lexemes = get_lexemes lex in |
172 | 188 | let entries = Xlist.fold lexemes entries (fun entries lemma -> |
173 | 189 | Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> |
190 | + (pos2,lemma,SimpleLexEntry(lemma,pos2)) :: entries)) in | |
191 | + (* let entries = Xlist.fold lexemes entries (fun entries lemma -> | |
192 | + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> | |
174 | 193 | let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in |
175 | 194 | let entry = SimpleLexEntry(lemma,pos2) in |
176 | 195 | let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in |
177 | - StringMap.add entries pos2 entries2)) in | |
178 | - let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> SimpleLexArg(lemma,pos) :: morfs) in | |
196 | + StringMap.add entries pos2 entries2)) in *) | |
197 | + let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> SimpleLexArg(lemma,map_pos lemma pos) :: morfs) in | |
179 | 198 | morfs,entries |
180 | 199 | | LexPhrase([pos,lex],(restr,schema)) -> |
181 | 200 | let id = get_lex_id () in |
... | ... | @@ -183,11 +202,14 @@ let rec extract_lex_entries (morfs,entries) = function |
183 | 202 | let schema,entries = extract_lex_entries_schema entries schema in |
184 | 203 | let entries = Xlist.fold lexemes entries (fun entries lemma -> |
185 | 204 | Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> |
205 | + (pos2,lemma,LexEntry(id,lemma,pos2,restr,schema)) :: entries)) in | |
206 | + (* let entries = Xlist.fold lexemes entries (fun entries lemma -> | |
207 | + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> | |
186 | 208 | let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in |
187 | 209 | let entry = LexEntry(id,lemma,pos2,restr,schema) in |
188 | 210 | let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in |
189 | - StringMap.add entries pos2 entries2)) in | |
190 | - let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> LexArg(id,lemma,pos) :: morfs) in | |
211 | + StringMap.add entries pos2 entries2)) in *) | |
212 | + let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> LexArg(id,lemma,map_pos lemma pos) :: morfs) in | |
191 | 213 | morfs,entries |
192 | 214 | | LexPhrase _ as morf -> failwith ("extract_lex_entries: " ^ ENIAMwalStringOf.morf morf) |
193 | 215 | | morf -> morf :: morfs, entries |
... | ... | @@ -207,35 +229,21 @@ let extract_lex_entries_comprepnp entries compreps = |
207 | 229 | let schema,entries = extract_lex_entries_schema entries schema in |
208 | 230 | Xlist.fold lexemes entries (fun entries lemma -> |
209 | 231 | Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> |
232 | + (pos2,lemma,ComprepNPEntry(clemma,restr,schema)) :: entries)) | |
233 | + (* Xlist.fold lexemes entries (fun entries lemma -> | |
234 | + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 -> | |
210 | 235 | let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in |
211 | 236 | let entry = ComprepNPEntry(clemma,restr,schema) in |
212 | 237 | let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in |
213 | - StringMap.add entries pos2 entries2)) | |
238 | + StringMap.add entries pos2 entries2)) *) | |
214 | 239 | | _ -> failwith "extract_lex_entries_comprepnp")) |
215 | 240 | |
216 | -let phrases,entries = | |
217 | - let compreps = Xlist.map ENIAMwalRealizations.compreps (fun (lemma,morfs) -> | |
218 | - lemma, expand_lexicalizations_morfs morfs) in | |
219 | - let entries = extract_lex_entries_comprepnp StringMap.empty compreps in | |
220 | - IntMap.fold ENIAMwalRealizations.phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs -> | |
221 | - let morfs = expand_lexicalizations_morfs morfs in | |
222 | - let morfs,entries = Xlist.fold morfs ([],entries) extract_lex_entries in | |
223 | - IntMap.add phrases id morfs, entries) | |
224 | - | |
225 | -let print_entries entries = | |
226 | - StringMap.iter entries (fun pos entries2 -> | |
227 | - StringMap.iter entries2 (fun lemma entries3 -> | |
228 | - EntrySet.iter entries3 (fun entry -> | |
229 | - Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry)))) | |
230 | - | |
231 | -(* let _ = print_entries entries *) | |
232 | - | |
233 | 241 | let rec expand_restr valence lexeme pos = function |
234 | 242 | SimpleLexEntry(lemma,pos2) -> [SimpleLexEntry(lemma,pos2)] |
235 | 243 | (* | LexEntry(id,lemma,pos2,Natr,[]) -> [LexEntry(id,lemma,pos2,NoRestr,[])] *) |
236 | 244 | | LexEntry(id,lemma,pos2,Natr,_) -> failwith "expand_restr" |
237 | 245 | | LexEntry(id,lemma,pos2,restr,[]) -> |
238 | - print_endline (lexeme ^ " " ^ pos); | |
246 | + (* print_endline (lexeme ^ " " ^ pos); *) | |
239 | 247 | [LexEntry(id,lemma,pos2,restr,[])] (* FIXME *) |
240 | 248 | (* (* print_endline "expand_restr"; *) |
241 | 249 | let frames = try StringMap.find (StringMap.find valence lexeme) pos |
... | ... | @@ -265,7 +273,7 @@ let rec expand_restr valence lexeme pos = function |
265 | 273 | | LexEntry(id,lemma,pos2,NoRestr,_) -> failwith "expand_restr" |
266 | 274 | (* | ComprepNPEntry(lemma,Natr,[]) -> [ComprepNPEntry(lemma,NoRestr,[])] *) |
267 | 275 | | ComprepNPEntry(lemma,Natr,_) -> failwith "expand_restr" |
268 | - | ComprepNPEntry(lemma,restr,[]) as entry -> failwith ("expand_restr: " ^ ENIAMwalStringOf.entry entry) | |
276 | + | ComprepNPEntry(lemma,restr,[]) as entry -> failwith ("expand_restr: " ^ ENIAMwalStringOf.lex_entry entry) | |
269 | 277 | | ComprepNPEntry(lemma,Atr,schema) -> |
270 | 278 | let schema = Xlist.map schema (fun p -> {p with morfs=Phrase Null :: p.morfs}) in |
271 | 279 | [ComprepNPEntry(lemma,NoRestr,schema)] |
... | ... | @@ -279,35 +287,4 @@ let rec expand_restr valence lexeme pos = function |
279 | 287 | Xlist.map schema (fun x -> ComprepNPEntry(lemma,NoRestr,[x])) |
280 | 288 | | ComprepNPEntry(lemma,Ratrs,schema) -> [ComprepNPEntry(lemma,NoRestr,schema)] |
281 | 289 | | ComprepNPEntry(lemma,NoRestr,_) -> failwith "expand_restr" |
282 | - (* | Frame _ as frame -> [frame] *) | |
283 | - | _ -> failwith "expand_restr" | |
284 | - | |
285 | -let entries = | |
286 | - StringMap.mapi entries (fun pos entries2 -> | |
287 | - StringMap.mapi entries2 (fun lemma entries3 -> | |
288 | - EntrySet.fold entries3 [] (fun entries3 entry -> | |
289 | - (expand_restr [] lemma pos entry) @ entries3))) | |
290 | - | |
291 | - | |
292 | -(* | |
293 | -let convert morfs = | |
294 | - let morfs = expand_lexicalizations_morfs morfs in | |
295 | - let morfs,frames = Xlist.fold morfs ([],[]) extract_lex_morf in | |
296 | - (*Xlist.fold frames(*extract_lex_frames lexeme pos [] frame*) valence (fun valence -> function | |
297 | - lexeme,pos,Frame(atrs,schema) -> | |
298 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
299 | - Xlist.fold schemas valence (fun valence schema -> | |
300 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
301 | - let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in | |
302 | - StringMap.add valence lexeme poss) | |
303 | - | lexeme,pos,LexFrame(id,pos2,restr,schema) -> | |
304 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
305 | - Xlist.fold schemas valence (fun valence schema -> | |
306 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
307 | - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in | |
308 | - StringMap.add valence lexeme poss) | |
309 | - | _ -> failwith "convert_frame") *)() | |
310 | - | |
311 | - | |
312 | -let phrases = IntMap.map ENIAMwalRealizations.phrases convert | |
313 | -*) | |
290 | + (* | _ -> failwith "expand_restr" *) | |
... | ... |
walenty/ENIAMwalParser.ml
... | ... | @@ -18,9 +18,16 @@ |
18 | 18 | *) |
19 | 19 | |
20 | 20 | open ENIAMwalTypes |
21 | - | |
22 | 21 | open Xstd |
23 | 22 | |
23 | +type token = | |
24 | + Text of string | |
25 | + | Paren of token list | |
26 | + | Bracet of token list | |
27 | + | SqBra of token list | |
28 | + | LParen | RParen | LBracet | RBracet | LSqBra | RSqBra | |
29 | + | Semic | Plus | Comma | Quot | |
30 | + | |
24 | 31 | let rec find_brackets = function |
25 | 32 | LParen :: l -> |
26 | 33 | let found,l = find_rbracket RParen [] l in |
... | ... | @@ -50,6 +57,21 @@ and find_rbracket bracket rev = function |
50 | 57 | | s :: l -> find_rbracket bracket (s :: rev) l |
51 | 58 | | [] -> failwith "find_rbracket" |
52 | 59 | |
60 | +let split_text schema = | |
61 | + find_brackets (Xlist.map (Str.full_split (Str.regexp "\\]\\|\\+\\|{\\|}\\|(\\|)\\|,\\|;\\|'\\|\\[") schema) (function | |
62 | + Str.Text s -> Text s | |
63 | + | Str.Delim "(" -> LParen | |
64 | + | Str.Delim ")" -> RParen | |
65 | + | Str.Delim "{" -> LBracet | |
66 | + | Str.Delim "}" -> RBracet | |
67 | + | Str.Delim "[" -> LSqBra | |
68 | + | Str.Delim "]" -> RSqBra | |
69 | + | Str.Delim ";" -> Semic | |
70 | + | Str.Delim "+" -> Plus | |
71 | + | Str.Delim "," -> Comma | |
72 | + | Str.Delim "'" -> Quot | |
73 | + | _ -> failwith "parse_text")) | |
74 | + | |
53 | 75 | let rec split_symbol symb rev = function |
54 | 76 | [] -> [List.rev rev](*failwith "split_symbol"*) |
55 | 77 | | s :: l -> |
... | ... | @@ -58,56 +80,24 @@ let rec split_symbol symb rev = function |
58 | 80 | else (List.rev rev) :: (split_symbol symb [] l) |
59 | 81 | else split_symbol symb (s :: rev) l |
60 | 82 | |
61 | -let parse_opinion = function | |
62 | - "pewny" -> Pewny | |
63 | - | "cer" -> Pewny | |
64 | - | "potoczny" -> Potoczny | |
65 | - | "col" -> Potoczny | |
66 | - | "wątpliwy" -> Watpliwy | |
67 | - | "unc" -> Watpliwy | |
68 | - | "archaiczny" -> Archaiczny | |
69 | - | "dat" -> Archaiczny | |
70 | - | "zły" -> Zly | |
71 | - | "bad" -> Zly | |
72 | - | "wulgarny" -> Wulgarny | |
73 | - | "vul" -> Wulgarny | |
74 | - | x -> failwith ("parse_opinion: " ^ x) | |
75 | - | |
76 | -let parse_roles l = | |
77 | - let r,cr,ce = Xlist.fold l ([],[],[]) (fun (r,controller,controllee) -> function | |
78 | - "subj" -> SUBJ :: r, controller, controllee | |
79 | - | "obj" -> OBJ :: r, controller, controllee | |
80 | - | "controller" -> r, "1" :: controller, controllee | |
81 | - | "controllee" -> r, controller, "1" :: controllee | |
82 | - | "controller2" -> r, "2" :: controller, controllee | |
83 | - | "controllee2" -> r, controller, "2" :: controllee | |
84 | - | "" -> r, controller, controllee | |
85 | - | x -> failwith ("parse_roles: " ^ x)) in | |
86 | - (match r with | |
87 | - [] -> ARG | |
88 | - | [x] -> x | |
89 | - | _ -> failwith "parse_roles"),cr,ce | |
90 | - | |
91 | -let parse_negation = function | |
92 | - [Text "_"] -> NegationUndef | |
93 | - | [Text "neg"] -> Negation | |
94 | - | [Text "aff"] -> Aff | |
95 | - | [Text ""] -> NegationNA | |
96 | - | l -> failwith ("parse_negation: " ^ ENIAMwalStringOf.token_list l) | |
97 | - | |
98 | -let parse_pred = function | |
99 | - [Text ""] -> PredNA | |
100 | - | [Text "pred"] -> Pred | |
101 | - | [Text "false"] -> PredNA | |
102 | - | [Text "true"] -> Pred | |
103 | - | l -> failwith ("parse_pred: " ^ ENIAMwalStringOf.token_list l) | |
104 | - | |
105 | -let parse_aspect = function | |
106 | - [Text "perf"] -> Aspect "perf" | |
107 | - | [Text "imperf"] -> Aspect "imperf" | |
108 | - | [Text "_"] -> AspectUndef | |
109 | - | [Text ""] -> AspectNA | |
110 | - | l -> failwith ("parse_aspect: " ^ ENIAMwalStringOf.token_list l) | |
83 | +let rec string_of_token = function | |
84 | + Text s -> s | |
85 | + | Paren l -> "(" ^ String.concat "" (Xlist.map l string_of_token) ^ ")" | |
86 | + | Bracet l -> "{" ^ String.concat "" (Xlist.map l string_of_token) ^ "}" | |
87 | + | SqBra l -> "[" ^ String.concat "" (Xlist.map l string_of_token) ^ "]" | |
88 | + | LParen -> "(" | |
89 | + | RParen -> ")" | |
90 | + | LBracet -> "{" | |
91 | + | RBracet -> "}" | |
92 | + | LSqBra -> "[" | |
93 | + | RSqBra -> "]" | |
94 | + | Semic -> ";" | |
95 | + | Plus -> "+" | |
96 | + | Comma -> "," | |
97 | + | Quot -> "'" | |
98 | + | |
99 | +let string_of_token_list l = | |
100 | + String.concat "" (Xlist.map l string_of_token) | |
111 | 101 | |
112 | 102 | let parse_case = function |
113 | 103 | [Text "nom"] -> Case "nom" |
... | ... | @@ -121,169 +111,15 @@ let parse_case = function |
121 | 111 | | [Text "part"] -> Part |
122 | 112 | | [Text "postp"] -> Case "postp" |
123 | 113 | | [Text "agr"] -> CaseAgr |
124 | - | l -> failwith ("parse_case: " ^ ENIAMwalStringOf.token_list l) | |
125 | - | |
126 | -let parse_prep = function | |
127 | - | [Text "niż"] -> "niż" | |
128 | - | [Text "w czasie"] -> "w czasie" | |
129 | - | [Text "podczas"] -> "podczas" | |
130 | - | [Text "w roli"] -> "w roli" | |
131 | - | [Text "pomiędzy"] -> "pomiędzy" | |
132 | - | [Text "według"] -> "według" | |
133 | - | [Text "zza"] -> "zza" | |
134 | - | [Text "poza"] -> "poza" | |
135 | - | [Text "spośród"] -> "spośród" | |
136 | - | [Text "spod"] -> "spod" | |
137 | - | [Text "koło"] -> "koło" | |
138 | - | [Text "względem"] -> "względem" | |
139 | - | [Text "wśród"] -> "wśród" | |
140 | - | [Text "wskutek"] -> "wskutek" | |
141 | - | [Text "przez"] -> "przez" | |
142 | - | [Text "co do"] -> "co do" | |
143 | - | [Text "wokół"] -> "wokół" | |
144 | - | [Text "między"] -> "między" | |
145 | - | [Text "bez"] -> "bez" | |
146 | - | [Text "przy"] -> "przy" | |
147 | - | [Text "na rzecz"] -> "na rzecz" | |
148 | - | [Text "na temat"] -> "na temat" | |
149 | - | [Text "po"] -> "po" | |
150 | - | [Text "u"] -> "u" | |
151 | - | [Text "pod"] -> "pod" | |
152 | - | [Text "ponad"] -> "ponad" | |
153 | - | [Text "jako"] -> "jako" | |
154 | - | [Text "w kwestii"] -> "w kwestii" | |
155 | - | [Text "w sprawie"] -> "w sprawie" | |
156 | - | [Text "ku"] -> "ku" | |
157 | - | [Text "przeciw"] -> "przeciw" | |
158 | - | [Text "nad"] -> "nad" | |
159 | - | [Text "z powodu"] -> "z powodu" | |
160 | - | [Text "przed"] -> "przed" | |
161 | - | [Text "od"] -> "od" | |
162 | - | [Text "o"] -> "o" | |
163 | - | [Text "w"] -> "w" | |
164 | - | [Text "za"] -> "za" | |
165 | - | [Text "dla"] -> "dla" | |
166 | - | [Text "na"] -> "na" | |
167 | - | [Text "z"] -> "z" | |
168 | - | [Text "do"] -> "do" | |
169 | - | [Text "wobec"] -> "wobec" | |
170 | - | [Text "jak"] -> "jak" | |
171 | - | [Text "spomiędzy"] -> "spomiędzy" | |
172 | - | [Text "sponad"] -> "sponad" | |
173 | - | [Text "spopod"] -> "spopod" | |
174 | - | [Text "spoza"] -> "spoza" | |
175 | - | [Text "sprzed"] -> "sprzed" | |
176 | - | [Text "znad"] -> "znad" | |
177 | - | [Text "dokoła"] -> "dokoła" | |
178 | - | [Text "dookoła"] -> "dookoła" | |
179 | - | [Text "naokoło"] -> "naokoło" | |
180 | - | [Text "naprzeciw"] -> "naprzeciw" | |
181 | - | [Text "obok"] -> "obok" | |
182 | - | [Text "poniżej"] -> "poniżej" | |
183 | - | [Text "popod"] -> "popod" | |
184 | - | [Text "pośrodku"] -> "pośrodku" | |
185 | - | [Text "pośród"] -> "pośród" | |
186 | - | [Text "powyżej"] -> "powyżej" | |
187 | - | [Text "wewnątrz"] -> "wewnątrz" | |
188 | - | [Text "wkoło"] -> "wkoło" | |
189 | - | [Text "wzdłuż"] -> "wzdłuż" | |
190 | - | [Text "naokół"] -> "naokół" | |
191 | - | [Text "śród"] -> "śród" | |
192 | - | [Text "wpośród"] -> "wpośród" | |
193 | - | [Text "poprzez"] -> "poprzez" | |
194 | - | [Text "około"] -> "około" | |
195 | - | [Text "na punkcie"] -> "na punkcie" | |
196 | - | [Text "pod względem"] -> "pod względem" | |
197 | - | [Text "pod wpływem"] -> "pod wpływem" | |
198 | - | [Text "na skutek"] -> "na skutek" | |
199 | - | [Text "na polu"] -> "na polu" | |
200 | - | [Text "na poczet"] -> "na poczet" | |
201 | - | [Text "na widok"] -> "na widok" | |
202 | - | [Text "w dziedzinie"] -> "w dziedzinie" | |
203 | - | [Text "pod pozorem"] -> "pod pozorem" | |
204 | - | [Text "pod pretekstem"] -> "pod pretekstem" | |
205 | - | [Text "za pomocą"] -> "za pomocą" | |
206 | - | [Text "pod adresem"] -> "pod adresem" | |
207 | - | [Text "w kierunku"] -> "w kierunku" | |
208 | - | [Text "w stronę"] -> "w stronę" | |
209 | - | [Text "w obliczu"] -> "w obliczu" | |
210 | - | [Text "u podstaw"] -> "u podstaw" | |
211 | - | [Text "pod kątem"] -> "pod kątem" | |
212 | - | [Text "na okoliczność"] -> "na okoliczność" | |
213 | - | [Text "w temacie"] -> "w temacie" | |
214 | - | [Text "od strony"] -> "od strony" | |
215 | - | [Text "ze strony"] -> "ze strony" | |
216 | - | [Text "po stronie"] -> "po stronie" | |
217 | - | [Text "na myśl"] -> "na myśl" | |
218 | - | [Text "w charakterze"] -> "w charakterze" | |
219 | - | [Text "w zakresie"] -> "w zakresie" | |
220 | - | [Text "ze względu na"] -> "ze względu na" | |
221 | - | [Text "na podstawie"] -> "na podstawie" | |
222 | - | [Text "w stosunku do"] -> "w stosunku do" | |
223 | - | [Text "z tytułu"] -> "z tytułu" | |
224 | - | [Text "z okazji"] -> "z okazji" | |
225 | - | [Text "z zakresu"] -> "z zakresu" | |
226 | - | [Text "w wysokości"] -> "w wysokości" | |
227 | - | [Text "na gruncie"] -> "na gruncie" | |
228 | - | [Text "z dziedziny"] -> "z dziedziny" | |
229 | - | [Text "na tle"] -> "na tle" | |
230 | - | [Text "na niwie"] -> "na niwie" | |
231 | - | [Text "w ramach"] -> "w ramach" | |
232 | - | [Text "na korzyść"] -> "na korzyść" | |
233 | - | [Text "w intencji"] -> "w intencji" | |
234 | - | [Text "na kanwie"] -> "na kanwie" | |
235 | - | [Text "na niekorzyść"] -> "na niekorzyść" | |
236 | - | [Text "z ramienia"] -> "z ramienia" | |
237 | - | [Text "w wyniku"] -> "w wyniku" | |
238 | - | [Text "za sprawą"] -> "za sprawą" | |
239 | - | [Text "w imię"] -> "w imię" | |
240 | - | [Text "w celu"] -> "w celu" | |
241 | - | [Text "z pomocą"] -> "z pomocą" | |
242 | - | [Text "per"] -> "per" | |
243 | - | [Text "co"] -> "co" | |
244 | - | [Text s] -> s | |
245 | - | l -> failwith ("parse_prep: " ^ ENIAMwalStringOf.token_list l) | |
246 | - | |
247 | -let rec parse_comp = function | |
248 | - [Text "int"] -> Int,[] | |
249 | - | [Text "rel"] -> Rel,[] | |
250 | - | [Text "int";SqBra l] -> Int,List.flatten (Xlist.map (split_symbol Semic [] l) (fun c -> snd (parse_comp c))) | |
251 | - | [Text "rel";SqBra l] -> Rel,List.flatten (Xlist.map (split_symbol Semic [] l) (fun c -> snd (parse_comp c))) | |
252 | - | [Text "co"] -> CompTypeUndef,[Comp "co"] (* subst qub prep comp *) | |
253 | - | [Text "kto"] -> CompTypeUndef,[Comp "kto"] (* subst *) | |
254 | - | [Text "ile"] -> CompTypeUndef,[Comp "ile"] (* num adv *) | |
255 | - | [Text "jaki"] -> CompTypeUndef,[Comp "jaki"] (* adj *) | |
256 | - | [Text "który"] -> CompTypeUndef,[Comp "który"] (* adj *) | |
257 | - | [Text "czyj"] -> CompTypeUndef,[Comp "czyj"] (* adj *) | |
258 | - | [Text "jak"] -> CompTypeUndef,[Comp "jak"] (* prep conj adv *) | |
259 | - | [Text "kiedy"] -> CompTypeUndef,[Comp "kiedy"] (* comp adv *) | |
260 | - | [Text "gdzie"] -> CompTypeUndef,[Comp "gdzie"] (* qub adv *) | |
261 | - | [Text "odkąd"] -> CompTypeUndef,[Comp "odkąd"] (* adv *) | |
262 | - | [Text "skąd"] -> CompTypeUndef,[Comp "skąd"] (* adv *) | |
263 | - | [Text "dokąd"] -> CompTypeUndef,[Comp "dokąd"] (* adv *) | |
264 | - | [Text "którędy"] -> CompTypeUndef,[Comp "którędy"] (* adv *) | |
265 | - | [Text "dlaczego"] -> CompTypeUndef,[Comp "dlaczego"] (* adv *) | |
266 | - | [Text "czemu"] -> CompTypeUndef,[Comp "czemu"] (* adv *) | |
267 | - | [Text "czy"] -> CompTypeUndef,[Comp "czy"] (* qub conj *) | |
268 | - | [Text "jakby"] -> CompTypeUndef,[Comp "jakby"] (* qub comp *) | |
269 | - | [Text "jakoby"] -> CompTypeUndef,[Comp "jakoby"] (* qub comp *) | |
270 | - | [Text "gdy"] -> CompTypeUndef,[Gdy] (* adv; gdyby: qub comp *) | |
271 | - | [Text "dopóki"] -> CompTypeUndef,[Comp "dopóki"] (* comp *) | |
272 | - | [Text "zanim"] -> CompTypeUndef,[Comp "zanim"] (* comp *) | |
273 | - | [Text "jeśli"] -> CompTypeUndef,[Comp "jeśli"] (* comp *) | |
274 | - | [Text "żeby2"] -> CompTypeUndef,[Zeby] | |
275 | - | [Text "żeby"] -> CompTypeUndef,[Comp "żeby"] (* qub comp *) | |
276 | - | [Text "że"] -> CompTypeUndef,[Comp "że"] (* qub comp *) | |
277 | - | [Text "aż"] -> CompTypeUndef,[Comp "aż"] (* qub comp *) | |
278 | - | [Text "bo"] -> CompTypeUndef,[Comp "bo"] (* qub comp *) | |
279 | - | l -> failwith ("parse_comp: " ^ ENIAMwalStringOf.token_list l) | |
114 | + | [Text "_"] -> CaseUndef | |
115 | + | l -> failwith ("parse_case: " ^ string_of_token_list l) | |
280 | 116 | |
281 | 117 | let parse_number = function |
282 | 118 | [Text "sg"] -> Number "sg" |
283 | 119 | | [Text "pl"] -> Number "pl" |
284 | 120 | | [Text "agr"] -> NumberAgr |
285 | 121 | | [Text "_"] -> NumberUndef |
286 | - | l -> failwith ("parse_number: " ^ ENIAMwalStringOf.token_list l) | |
122 | + | l -> failwith ("parse_number: " ^ string_of_token_list l) | |
287 | 123 | |
288 | 124 | let parse_gender = function |
289 | 125 | [Text "m1"] -> Gender "m1" |
... | ... | @@ -291,247 +127,273 @@ let parse_gender = function |
291 | 127 | | [Text "n"] -> Genders["n1";"n2"] |
292 | 128 | | [Text "f"] -> Gender "f" |
293 | 129 | | [Text "m1.n"] -> Genders["m1";"n1";"n2"] |
130 | + | [Text "m1.n1.n2"] -> Genders["m1";"n1";"n2"] | |
131 | + | [Text "n1.n2"] -> Genders["n1";"n2"] | |
294 | 132 | | [Text "_"] -> GenderUndef |
295 | 133 | | [Text "agr"] -> GenderAgr |
296 | - | l -> failwith ("parse_gender: " ^ ENIAMwalStringOf.token_list l) | |
134 | + | l -> failwith ("parse_gender: " ^ string_of_token_list l) | |
297 | 135 | |
298 | 136 | let parse_grad = function |
299 | 137 | [Text "pos"] -> Grad "pos" |
300 | 138 | | [Text "com"] -> Grad "com" |
301 | 139 | | [Text "sup"] -> Grad "sup" |
302 | 140 | | [Text "_"] -> GradUndef |
303 | - | l -> failwith ("parse_grad: " ^ ENIAMwalStringOf.token_list l) | |
141 | + | l -> failwith ("parse_grad: " ^ string_of_token_list l) | |
142 | + | |
143 | +let parse_aspect = function | |
144 | + [Text "perf"] -> Aspect "perf" | |
145 | + | [Text "imperf"] -> Aspect "imperf" | |
146 | + | [Text "_"] -> AspectUndef | |
147 | + | [Text ""] -> AspectNA | |
148 | + | l -> failwith ("parse_aspect: " ^ string_of_token_list l) | |
149 | + | |
150 | +let parse_negation = function | |
151 | + [Text "_"] -> NegationUndef | |
152 | + | [Text "neg"] -> Negation | |
153 | + | [Text "aff"] -> Aff | |
154 | + | [Text ""] -> NegationNA | |
155 | + | l -> failwith ("parse_negation: " ^ string_of_token_list l) | |
304 | 156 | |
305 | 157 | let parse_refl = function |
306 | - [] -> ReflEmpty | |
158 | + (* [] -> ReflEmpty | |
307 | 159 | | [Text "się"] -> ReflSie |
308 | 160 | | [Text ""] -> ReflEmpty |
309 | 161 | | [Text "false"] -> ReflEmpty |
310 | - | [Text "true"] -> ReflSie | |
311 | - | l -> failwith ("parse_refl: " ^ ENIAMwalStringOf.token_list l) | |
312 | - | |
313 | -let rec parse_lex = function | |
314 | - [Quot; Text "E"; Paren[Text gender]; Quot] -> Elexeme(parse_gender [Text gender]) | |
315 | - | [Quot; Text x; Quot] -> Lexeme x | |
316 | - | [Quot; Text x; Comma; Text y; Quot] -> Lexeme (x ^ "," ^ y) | |
317 | - | [Text "OR"; Paren l] -> | |
318 | - (match split_symbol Semic [] l with | |
319 | - [l] -> (match split_symbol Comma [] l with | |
320 | - [_] -> failwith "parse_lex OR" | |
321 | - | ll -> ORconcat(Xlist.map ll parse_lex)) | |
322 | - | ll -> ORcoord(Xlist.map ll parse_lex)) | |
323 | - | [Text "XOR"; Paren l] -> XOR(Xlist.map (List.flatten (Xlist.map (split_symbol Semic [] l) (split_symbol Comma []))) parse_lex) | |
324 | - | l -> failwith ("parse_lex: " ^ ENIAMwalStringOf.token_list l) | |
325 | - | |
326 | -let get_lexeme = function | |
327 | - Lexeme s -> s | |
328 | - | _ -> failwith "get_lexeme" | |
329 | - | |
330 | -let new_schema r cr ce morfs = | |
331 | - {gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=morfs} | |
332 | - | |
333 | -let rec parse_restr = function | |
334 | - [Text "natr"] -> Natr,[] | |
335 | - | [Text "atr"] -> Atr,[] | |
336 | - | [Text "ratr"] -> Ratr,[] | |
337 | - | [Text "atr1"] -> Atr1,[] | |
338 | - | [Text "ratr1"] -> Ratr1,[] | |
339 | - | [Text "ratr1"; Paren schema] -> Ratr1, parse_schema_simple schema | |
340 | - | [Text "ratr"; Paren schema] -> Ratr, parse_schema_simple schema | |
341 | - | [Text "atr1"; Paren schema] -> Atr1, parse_schema_simple schema | |
342 | - | [Text "atr"; Paren schema] -> Atr, parse_schema_simple schema | |
343 | - | l -> failwith ("parse_restr: " ^ ENIAMwalStringOf.token_list l) | |
344 | - | |
345 | -and parse_schema_simple schema = | |
346 | - Xlist.map (split_symbol Plus [] schema) (function | |
347 | - [Bracet b] -> let r,cr,ce = parse_roles [] in new_schema r cr ce (parse_morfs b) | |
348 | - | [Text s1;Bracet b] -> let r,cr,ce = parse_roles [s1] in new_schema r cr ce (parse_morfs b) | |
349 | - | _ -> failwith "parse_schema_simple") | |
350 | - | |
351 | -and parse_mode = function | |
352 | - [Text "abl"] -> "abl",[] | |
353 | - | [Text "adl"] -> "adl",[] | |
354 | - | [Text "caus"] -> "caus",[] | |
355 | - | [Text "dest"] -> "dest",[] | |
356 | - | [Text "dur"] -> "dur",[] | |
357 | - | [Text "instr"] -> "instr",[] | |
358 | - | [Text "locat"] -> "locat",[] | |
359 | - | [Text "perl"] -> "perl",[] | |
360 | - | [Text "temp"] -> "temp",[] | |
361 | - | [Text "abl";SqBra l] -> "abl",parse_morfs l | |
362 | - | [Text "adl";SqBra l] -> "adl",parse_morfs l | |
363 | - | [Text "caus";SqBra l] -> "caus",parse_morfs l | |
364 | - | [Text "dest";SqBra l] -> "dest",parse_morfs l | |
365 | - | [Text "dur";SqBra l] -> "dur",parse_morfs l | |
366 | - | [Text "instr";SqBra l] -> "instr",parse_morfs l | |
367 | - | [Text "locat";SqBra l] -> "locat",parse_morfs l | |
368 | - | [Text "perl";SqBra l] -> "perl",parse_morfs l | |
369 | - | [Text "temp";SqBra l] -> "temp",parse_morfs l | |
370 | - | [Text "mod"] -> "mod",[] | |
371 | - | [Text "mod";SqBra l] -> "mod",parse_morfs l | |
372 | - | [Text "pron"] -> "pron",[] | |
373 | - | [Text "misc"] -> "misc",[] | |
374 | - | l -> failwith ("parse_mode: " ^ ENIAMwalStringOf.token_list l) | |
162 | + | [Text "true"] -> ReflSie *) | |
163 | + | [Text "nosię"] -> ReflFalse | |
164 | + | [Text "się"] -> ReflTrue | |
165 | + | l -> failwith ("parse_refl: " ^ string_of_token_list l) | |
166 | + | |
167 | +let parse_ctype = function | |
168 | + [Text "int"] -> Int | |
169 | + | [Text "rel"] -> Rel | |
170 | + | [Text "_"] -> CompTypeUndef | |
171 | + | l -> failwith ("parse_ctype: " ^ string_of_token_list l) | |
172 | + | |
173 | +let parse_acm = function | |
174 | + (* [Text "int"] -> Int | |
175 | + | [Text "rel"] -> Rel *) | |
176 | + | [Text "_"] -> AcmUndef | |
177 | + | l -> failwith ("parse_acm: " ^ string_of_token_list l) | |
178 | + | |
179 | +let parse_comp = function | |
180 | + | [Text "co"] -> Comp "co" (* subst qub prep comp *) | |
181 | + | [Text "kto"] -> Comp "kto" (* subst *) | |
182 | + | [Text "ile"] -> Comp "ile" (* num adv *) | |
183 | + | [Text "jaki"] -> Comp "jaki" (* adj *) | |
184 | + | [Text "który"] -> Comp "który" (* adj *) | |
185 | + | [Text "czyj"] -> Comp "czyj" (* adj *) | |
186 | + | [Text "jak"] -> Comp "jak" (* prep conj adv *) | |
187 | + | [Text "kiedy"] -> Comp "kiedy" (* comp adv *) | |
188 | + | [Text "gdzie"] -> Comp "gdzie" (* qub adv *) | |
189 | + | [Text "odkąd"] -> Comp "odkąd" (* adv *) | |
190 | + | [Text "skąd"] -> Comp "skąd" (* adv *) | |
191 | + | [Text "dokąd"] -> Comp "dokąd" (* adv *) | |
192 | + | [Text "którędy"] -> Comp "którędy" (* adv *) | |
193 | + | [Text "dlaczego"] -> Comp "dlaczego" (* adv *) | |
194 | + | [Text "czemu"] -> Comp "czemu" (* adv *) | |
195 | + | [Text "czy"] -> Comp "czy" (* qub conj *) | |
196 | + | [Text "jakby"] -> Comp "jakby" (* qub comp *) | |
197 | + | [Text "jakoby"] -> Comp "jakoby" (* qub comp *) | |
198 | + | [Text "gdy"] -> Gdy (* adv; gdyby: qub comp *) | |
199 | + | [Text "dopóki"] -> Comp "dopóki" (* comp *) | |
200 | + | [Text "zanim"] -> Comp "zanim" (* comp *) | |
201 | + | [Text "jeśli"] -> Comp "jeśli" (* comp *) | |
202 | + | [Text "żeby2"] -> Zeby | |
203 | + | [Text "żeby"] -> Comp "żeby" (* qub comp *) | |
204 | + | [Text "że"] -> Comp "że" (* qub comp *) | |
205 | + | [Text "aż"] -> Comp "aż" (* qub comp *) | |
206 | + | [Text "bo"] -> Comp "bo" (* qub comp *) | |
207 | + | [Text "niczym"] -> Comp "niczym" | |
208 | + | [Text "_"] -> CompUndef | |
209 | + | l -> failwith ("parse_comp: " ^ string_of_token_list l) | |
210 | + | |
211 | +let parse_opinion = function | |
212 | + "pewny" -> Pewny | |
213 | + (* | "cer" -> Pewny *) | |
214 | + | "potoczny" -> Potoczny | |
215 | + (* | "col" -> Potoczny *) | |
216 | + | "wątpliwy" -> Watpliwy | |
217 | + (* | "unc" -> Watpliwy *) | |
218 | + | "archaiczny" -> Archaiczny | |
219 | + (* | "dat" -> Archaiczny *) | |
220 | + | "zły" -> Zly | |
221 | + (* | "bad" -> Zly *) | |
222 | + | "wulgarny" -> Wulgarny | |
223 | + (* | "vul" -> Wulgarny *) | |
224 | + | x -> failwith ("parse_opinion: " ^ x) | |
225 | + | |
226 | +let parse_pred = function | |
227 | + "pred" -> PredTrue | |
228 | + | "nopred" -> PredFalse | |
229 | + | s -> failwith ("parse_pred: " ^ s) | |
230 | + | |
231 | +let parse_pos = function | |
232 | + "SUBST",[number;case] -> SUBST(parse_number number,parse_case case) | |
233 | + | "PPRON12",[number;case] -> PPRON12(parse_number number,parse_case case) | |
234 | + | "PPRON3",[number;case] -> PPRON3(parse_number number,parse_case case) | |
235 | + | "SIEBIE",[case] -> SIEBIE(parse_case case) | |
236 | + | "PREP",[case] -> PREP(parse_case case) | |
237 | + | "NUM",[case;gender;acm] -> NUM(parse_case case,parse_gender gender,parse_acm acm) | |
238 | + | "ADJ",[number;case;gender;grad] -> ADJ(parse_number number,parse_case case,parse_gender gender,parse_grad grad) | |
239 | + | "ADV",[grad] -> ADV(parse_grad grad) | |
240 | + | "GER",[number;case;gender;aspect;negation;refl] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl) | |
241 | + | "PPAS",[number;case;gender;aspect;negation] -> PPAS(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation) | |
242 | + | "PACT",[number;case;gender;aspect;negation;refl] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl) | |
243 | + | "INF",[aspect;negation;refl] -> INF(parse_aspect aspect,parse_negation negation,parse_refl refl) | |
244 | + | "QUB",[] -> QUB | |
245 | + | "COMPAR",[] -> COMPAR | |
246 | + | "COMP",[ctype] -> COMP(parse_ctype ctype) | |
247 | + | "PERS",[negation;refl] -> PERS(parse_negation negation,parse_refl refl) | |
248 | + | s,ll -> print_endline ("parse_pos: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")"); FIXED | |
249 | + | |
250 | +let rec parse_phrase = function | |
251 | + "np",[case] -> NP(parse_case case) | |
252 | + | "prepnp",[[Text prep]; case] -> PrepNP(prep,parse_case case) | |
253 | + | "adjp",[case] -> AdjP(parse_case case) | |
254 | + | "prepadjp",[[Text prep]; case] -> PrepAdjP(prep,parse_case case) | |
255 | + | "comprepnp",[[Text prep]] -> ComprepNP prep | |
256 | + | "comparp",[[Text prep]] -> ComparP prep | |
257 | + | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp) | |
258 | + | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp) | |
259 | + | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp) | |
260 | + | "infp",[aspect] -> InfP(parse_aspect aspect) | |
261 | + | "fixed",[[Text lemma]] -> FixedP lemma | |
262 | + | "fixed",[[Text lemma1];[Text lemma2]] -> FixedP (lemma1 ^ "," ^ lemma2) | |
263 | + | "or",[] -> Or | |
264 | + | "refl",[] -> Refl | |
265 | + | "recip",[] -> Recip | |
266 | + | "E",[morf] -> E(parse_morf morf) | |
267 | + | "advp",[] -> AdvP | |
268 | + | "null",[] -> Null | |
269 | + | "lex",[[Text lemma];[Text pos; Paren p]] -> SimpleLexArg(lemma,parse_pos (pos, split_symbol Comma [] p)) | |
270 | + | "lex",[[Text lemma];[Text pos]] -> SimpleLexArg(lemma,parse_pos (pos, [])) | |
271 | + | "lex",[[Text id];[Text lemma];[Text pos; Paren p]] -> LexArg(int_of_string id,lemma,parse_pos (pos, split_symbol Comma [] p)) | |
272 | + | "lex",[[Text id];[Text lemma];[Text pos]] -> LexArg(int_of_string id,lemma,parse_pos (pos, [])) | |
273 | + | s,ll -> print_endline ("parse_phrase: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")"); Null | |
375 | 274 | |
376 | 275 | and parse_morf = function |
377 | - "np",[case] -> Phrase(NP(parse_case case)) | |
378 | - | "prepnp",[prep; case] -> Phrase(PrepNP(Sem,parse_prep prep,parse_case case)) | |
379 | - | "adjp",[case] -> Phrase(AdjP(parse_case case)) | |
380 | - | "prepadjp",[prep; case] -> Phrase(PrepAdjP(Sem,parse_prep prep,parse_case case)) | |
381 | - | "nump",[case] -> Phrase(NumP(parse_case case)) | |
382 | - | "prepnump",[prep; case] -> Phrase(PrepNumP(Sem,parse_prep prep,parse_case case)) | |
383 | - | "comprepnp",[prep] -> Phrase(ComprepNP(Sem,parse_prep prep)) | |
384 | - | "compar",[prep] -> PhraseAbbr(ComparP(parse_prep prep),[]) | |
385 | - | "cp",[comp] -> PhraseComp(Cp,parse_comp comp) | |
386 | - | "ncp",[case; comp] -> PhraseComp(Ncp(parse_case case),parse_comp comp) | |
387 | - | "prepncp",[prep; case; comp] -> PhraseComp(Prepncp(parse_prep prep,parse_case case),parse_comp comp) | |
388 | - | "infp",[aspect] -> Phrase(InfP(parse_aspect aspect(*,ReqUndef*))) | |
389 | - | "fixed",[morf;lex] -> Phrase(FixedP((*parse_morf_single morf,*) get_lexeme (parse_lex lex))) | |
390 | - | "fixed",[morf;lex;lex2] -> Phrase(FixedP((*parse_morf_single morf,*) get_lexeme (parse_lex (lex @ [Comma] @ lex2)))) | |
391 | - | "or",[] -> Phrase Or | |
392 | - | "refl",[] -> Phrase (*Refl*)(Lex "się") | |
393 | - | "recip",[] -> Phrase (*Recip*)(Lex "się") (* FIXME *) | |
394 | - | "E",[] -> E Null | |
395 | - | "advp",[mode] -> let mode,morfs = parse_mode mode in PhraseAbbr(Advp mode,morfs) | |
396 | - | "xp",[mode] -> let mode,morfs = parse_mode mode in PhraseAbbr(Xp mode,morfs) | |
397 | - | "nonch",[] -> PhraseAbbr(Nonch,[]) | |
398 | - | "distrp",[] -> PhraseAbbr(Distrp,[]) | |
399 | - | "possp",[] -> PhraseAbbr(Possp,[]) | |
400 | - | "null",[] -> Phrase Null | |
401 | - | "lex",[Text a; Paren p] :: ll -> parse_lex_morf (a, split_symbol Comma [] p, ll) | |
402 | - | "lex",[Text a] :: ll -> parse_lex_morf (a, [], ll) | |
403 | - | s,ll -> failwith ("parse_morf: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll ENIAMwalStringOf.token_list) ^ ")") | |
404 | - | |
405 | -and parse_lex_morf = function | |
406 | - | "np",[case],[num;lex;restr] -> LexPhrase([SUBST(parse_number num,parse_case case),parse_lex lex], parse_restr restr) | |
407 | - | "prepnp",[prep; case],[num;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);SUBST(parse_number num,parse_case case),parse_lex lex], parse_restr restr) | |
408 | - | "adjp",[case],[num;gender;grad;lex;restr] -> LexPhrase([ADJ(parse_number num,parse_case case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr) | |
409 | - | "prepadjp",[prep; case],[num;gender;grad;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);ADJ(parse_number num,parse_case case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr) | |
410 | - | "ppasp",[case],[num;gender;negation;lex;restr] -> LexPhrase([PPAS(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation),parse_lex lex], parse_restr restr) | |
411 | - | "prepppasp",[prep;case],[num;gender;negation;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);PPAS(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation),parse_lex lex], parse_restr restr) | |
412 | - | "pactp",[case],[num;gender;negation;lex;refl;restr] -> LexPhrase([PACT(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
413 | - | "preppactp",[prep;case],[num;gender;negation;lex;refl;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);PACT(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
414 | - | "gerp",[case],[num;negation;lex;refl;restr] -> LexPhrase([GER(parse_number num,parse_case case,GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
415 | - | "prepgerp",[prep;case],[num;negation;lex;refl;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);GER(parse_number num,parse_case case,GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
416 | - | "nump",[case],[lex1;lex2;restr] -> LexPhrase([NUM(parse_case case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr) | |
417 | - | "prepnump",[prep;case],[lex1;lex2;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);NUM(parse_case case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr) | |
418 | - | "compar",[prep],[morfs] -> LexPhrase([COMPAR,Lexeme(parse_prep prep)],(Ratrs,Xlist.map (split_symbol Plus [] morfs) (fun morf -> new_schema ARG [] [] [parse_morf_single morf]))) | |
419 | - | "infp",[aspect],[negation;lex;refl;restr] -> LexPhrase([INF(parse_aspect aspect,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
420 | - | "qub",[],[lex;restr] -> LexPhrase([QUB,parse_lex lex], parse_restr restr) | |
421 | - | "advp",[mode],[grad;lex;restr] -> | |
422 | - (match parse_mode mode with | |
423 | - mode, [] -> LexPhraseMode(mode,[ADV(parse_grad grad),parse_lex lex], parse_restr restr) | |
424 | - | _ -> failwith "parse_lex_morf") | |
425 | - | "xp",[[Text mode;SqBra [Text "prepgerp"; Paren [prep;Comma;case]]]],[num;negation;lex;refl;restr] -> | |
426 | - LexPhraseMode(mode,[PREP(parse_case [case]),Lexeme(parse_prep [prep]);GER(parse_number num,parse_case [case],GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
427 | - | "xp",[mode],ll -> | |
428 | - (match parse_mode mode,ll with | |
429 | - (mode,[Phrase(NP case)]),[num;lex;restr] -> LexPhraseMode(mode,[SUBST(parse_number num,case),parse_lex lex], parse_restr restr) | |
430 | - | (mode,[Phrase(PrepNP(_,prep,case))]),[num;lex;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;SUBST(parse_number num,case),parse_lex lex], parse_restr restr) | |
431 | - | (mode,[Phrase(PrepAdjP(_,prep,case))]),[num;gender;grad;lex;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;ADJ(parse_number num,case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr) | |
432 | - | (mode,[Phrase(NumP case)]),[lex1;lex2;restr] -> LexPhraseMode(mode,[NUM(case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr) | |
433 | - | (mode,[Phrase(PrepNumP(_,prep,case))]),[lex1;lex2;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr) | |
434 | - | (mode,[PhraseAbbr(Advp _,[])]),[grad;lex;restr] -> LexPhraseMode(mode,[ADV(parse_grad grad),parse_lex lex], parse_restr restr) | |
435 | - | (mode,[PhraseAbbr(ComparP prep,[])]),[morfs] -> LexPhraseMode(mode,[COMPAR,Lexeme prep],(Ratrs,Xlist.map (split_symbol Plus [] morfs) (fun morf -> new_schema ARG [] [] [parse_morf_single morf]))) | |
436 | - | (mode,[PhraseComp(Cp,(ctype,[Comp comp]))]),[negation;lex;refl;restr] -> LexPhrase([COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
437 | - | _ -> failwith ("parse_lex_morf: xp(" ^ ENIAMwalStringOf.token_list mode ^ ")," ^ String.concat "," (Xlist.map ll ENIAMwalStringOf.token_list) ^ ")")) | |
438 | - | "cp",[comp],[negation;lex;refl;restr] -> | |
439 | - (match parse_comp comp with | |
440 | - ctype,[Comp comp] -> LexPhrase([COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
441 | - | ctype,[Comp comp1;Comp comp2] -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
442 | - | _,comp -> failwith ("parse_lex_morf comp: " (*^ ENIAMwalStringOf.comp comp*))) | |
443 | - | "ncp",[case;comp],[negation;lex;refl;restr] -> | |
444 | - (match parse_comp comp with | |
445 | - ctype,[Comp comp] -> LexPhrase([SUBST(Number "sg",parse_case case),Lexeme "to";COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr) | |
446 | - | _,comp -> failwith ("parse_lex_morf comp: " (*^ ENIAMwalStringOf.comp comp*))) | |
447 | - | s,ll,ll2 -> failwith ("parse_lex_morf: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll ENIAMwalStringOf.token_list) ^ ")," ^ String.concat "," (Xlist.map ll2 ENIAMwalStringOf.token_list) ^ ")") | |
448 | - | |
449 | -and parse_morfs l = | |
450 | - let morfs = Xlist.map (split_symbol Semic [] l) (function | |
451 | - [Text a; Paren p] -> parse_morf (a, split_symbol Comma [] p) | |
452 | - | [Text a] -> parse_morf (a, []) | |
453 | - | l -> failwith ("parse_morfs: " ^ ENIAMwalStringOf.token_list l)) in | |
454 | -(* expand_prep_morfs equivs (expand_comp_morfs equivs morfs) *)morfs | |
455 | - | |
456 | -and parse_morf_single l = (* FIXME: problem z equivs *) | |
457 | - match parse_morfs l with | |
458 | - [x] -> x | |
459 | - | l -> failwith ("parse_morf_single: " ^ String.concat ";" (Xlist.map l ENIAMwalStringOf.morf)) | |
460 | - | |
461 | -let split_schema schema = | |
462 | - find_brackets (Xlist.map (Str.full_split (Str.regexp "\\]\\|\\+\\|{\\|}\\|(\\|)\\|,\\|;\\|'\\|\\[") schema) (function | |
463 | - Str.Text s -> Text s | |
464 | - | Str.Delim "(" -> LParen | |
465 | - | Str.Delim ")" -> RParen | |
466 | - | Str.Delim "{" -> LBracet | |
467 | - | Str.Delim "}" -> RBracet | |
468 | - | Str.Delim "[" -> LSqBra | |
469 | - | Str.Delim "]" -> RSqBra | |
470 | - | Str.Delim ";" -> Semic | |
471 | - | Str.Delim "+" -> Plus | |
472 | - | Str.Delim "," -> Comma | |
473 | - | Str.Delim "'" -> Quot | |
474 | - | _ -> failwith "parse_schema")) | |
276 | + [Text a; Paren p] -> parse_phrase (a, split_symbol Comma [] p) | |
277 | + | [Text a] -> parse_phrase (a, []) | |
278 | + | l -> failwith ("parse_morf: " ^ string_of_token_list l) | |
475 | 279 | |
476 | -let parse_schema schema = | |
477 | - if schema = "" then [] else ( | |
478 | - let l = split_schema schema in | |
479 | -(* print_endline schema; *) | |
280 | +let parse_roles l = | |
281 | + let r,cr,ce,m = Xlist.fold l ([],[],[],[]) (fun (r,controller,controllee,m) -> function | |
282 | + "subj" -> SUBJ :: r, controller, controllee, m | |
283 | + | "obj" -> OBJ :: r, controller, controllee, m | |
284 | + | "controller" -> r, "1" :: controller, controllee, m | |
285 | + | "controllee" -> r, controller, "1" :: controllee, m | |
286 | + | "controller2" -> r, "2" :: controller, controllee, m | |
287 | + | "controllee2" -> r, controller, "2" :: controllee, m | |
288 | + | "misc" -> r, controller, controllee, "misc" :: m | |
289 | + | "locat" -> r, controller, controllee, "locat" :: m | |
290 | + | "abl" -> r, controller, controllee, "abl" :: m | |
291 | + | "adl" -> r, controller, controllee, "adl" :: m | |
292 | + | "caus" -> r, controller, controllee, "caus" :: m | |
293 | + | "mod" -> r, controller, controllee, "mod" :: m | |
294 | + | "temp" -> r, controller, controllee, "temp" :: m | |
295 | + | "dur" -> r, controller, controllee, "dur" :: m | |
296 | + | "possp" -> r, controller, controllee, "possp" :: m | |
297 | + | "perl" -> r, controller, controllee, "perl" :: m | |
298 | + | "instr" -> r, controller, controllee, "instr" :: m | |
299 | + | "dest" -> r, controller, controllee, "dest" :: m | |
300 | + | "distrp" -> r, controller, controllee, "distrp" :: m | |
301 | + | "lemma" -> r, controller, controllee, "lemma" :: m | |
302 | + | "refl" -> r, controller, controllee, "refl" :: m | |
303 | + | "recip" -> r, controller, controllee, "recip" :: m | |
304 | + | "nonch" -> r, controller, controllee, "nonch" :: m | |
305 | + | "pron" -> r, controller, controllee, "pron" :: m | |
306 | + | "" -> r, controller, controllee, m | |
307 | + | x -> failwith ("parse_roles: " ^ x)) in | |
308 | + (match r with | |
309 | + [] -> ARG | |
310 | + | [x] -> x | |
311 | + | _ -> failwith "parse_roles"),cr,ce,m | |
312 | + | |
313 | +let parse_schema = function | |
314 | + [] -> NoRestr,[] | |
315 | + | [Text "atr"] -> Atr,[] | |
316 | + | [Text "ratr"] -> Ratr,[] | |
317 | + | [Text "atr1"] -> Atr1,[] | |
318 | + | [Text "ratr1"] -> Ratr1,[] | |
319 | + | l -> NoRestr,Xlist.map (split_symbol Plus [] l) (function | |
320 | + [Bracet l] -> {empty_position with morfs=Xlist.map (split_symbol Semic [] l) parse_morf} | |
321 | + | [Text s; Bracet l] -> | |
322 | + let gf,cr,ce,m = parse_roles [s] in | |
323 | + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=Xlist.map (split_symbol Semic [] l) parse_morf} | |
324 | + | l -> print_endline ("parse_schema: " ^ string_of_token_list l); empty_position) | |
325 | + | |
326 | +let parse_simple_morf = function | |
327 | + [Text id] -> MorfId (int_of_string id) | |
328 | + | l -> print_endline ("parse_simple_morf: " ^ string_of_token_list l); Null | |
329 | + | |
330 | +let parse_simple_schema l = | |
331 | + if l = [] then [] else | |
480 | 332 | Xlist.map (split_symbol Plus [] l) (function |
481 | - [Bracet b] -> let r,cr,ce = parse_roles [] in new_schema r cr ce (parse_morfs b) | |
482 | - | [Text s1;Bracet b] -> let r,cr,ce = parse_roles [s1] in new_schema r cr ce (parse_morfs b) | |
483 | - | [Text s1;Comma;Text s2;Bracet b] -> let r,cr,ce = parse_roles [s1;s2] in new_schema r cr ce (parse_morfs b) | |
484 | - | [Text s1;Comma;Text s2;Comma;Text s3;Bracet b] -> let r,cr,ce = parse_roles [s1;s2;s3] in new_schema r cr ce (parse_morfs b) | |
485 | - | _ -> failwith ("parse_schema: " ^ schema))) | |
486 | - | |
487 | -let parse_lexeme s = | |
488 | - match Str.split (Str.regexp " ") s with | |
489 | - [s] -> s,"" | |
490 | - | [s;"się"] -> s,"się" | |
491 | - | _ -> failwith ("parse_lexeme: " ^ s) | |
492 | - | |
493 | -let load_frames filename = | |
494 | - Xlist.fold (List.tl (Str.split (Str.regexp "\n") (File.load_file filename))) StringMap.empty (fun schemas line -> | |
495 | - if String.sub line 0 1 = "%" then schemas else | |
496 | - match Str.split_delim (Str.regexp ": ") line with | |
497 | - [lexeme;opinion;negation;pred;aspect;schema] -> | |
498 | - let lexeme,refl = parse_lexeme lexeme in | |
499 | - StringMap.add_inc schemas lexeme [refl,opinion,negation,pred,aspect,schema] (fun l -> (refl,opinion,negation,pred,aspect,schema) :: l) | |
500 | - | _ -> failwith ("load_frames: " ^ line)) | |
501 | - | |
502 | - | |
503 | - | |
504 | -let rec extract_fixed_schema fixed schema = | |
505 | - Xlist.fold schema fixed (fun fixed s -> | |
506 | - Xlist.fold s.morfs fixed (fun fixed -> function | |
507 | - Phrase(FixedP s) -> StringSet.add fixed s | |
508 | - | LexPhrase(_,(_,schema)) -> extract_fixed_schema fixed schema | |
509 | - | LexPhraseMode(_,_,(_,schema)) -> extract_fixed_schema fixed schema | |
510 | - | _ -> fixed)) | |
511 | - | |
512 | -(*let extract_fixed fixed_filename = (* FIXME: nie wykrywa fixed w argumentach leksykalizacji *) | |
513 | -(* let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *) | |
514 | - let fixed = Xlist.fold Paths.walenty_filenames StringSet.empty (fun fixed filename -> | |
515 | -(* print_endline filename; *) | |
516 | - let frames = load_frames (Paths.walenty_path ^ filename) in | |
517 | - StringMap.fold frames fixed (fun fixed _ l -> | |
518 | - Xlist.fold l fixed (fun fixed (refl,opinion,negation,pred,aspect,schema) -> | |
519 | - extract_fixed_schema fixed (parse_schema schema)))) in | |
520 | - let entries = StringSet.fold fixed StringSet.empty (fun entries s -> | |
521 | - let tokens = List.rev (Xlist.fold (Str.full_split (Str.regexp " \\|,") s) [] (fun l -> function | |
522 | - Str.Text t -> t :: l | |
523 | - | Str.Delim " " -> l | |
524 | - | Str.Delim t -> t :: l)) in | |
525 | - let tokens_string = String.concat " " tokens in | |
526 | - Xlist.fold tokens entries (fun entries token -> | |
527 | - StringSet.add entries (Printf.sprintf "%s\t%s\t%s:fixed\tfixed" token s tokens_string))) in | |
528 | - File.file_out fixed_filename (fun file -> | |
529 | - StringSet.iter entries (fun entry -> | |
530 | - Printf.fprintf file "%s\n" entry)) | |
531 | - | |
532 | -(* generowanie fixed.tab *) | |
533 | -(* let _ = extract_fixed "data/fixed.tab" *) | |
333 | + [Bracet l] -> | |
334 | + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in | |
335 | + {empty_position with morfs=morfs} | |
336 | + | [Text s; Bracet l] -> | |
337 | + let gf,cr,ce,m = parse_roles [s] in | |
338 | + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in | |
339 | + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs} | |
340 | + | [Text s1; Comma; Text s2; Bracet l] -> | |
341 | + let gf,cr,ce,m = parse_roles [s1;s2] in | |
342 | + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in | |
343 | + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs} | |
344 | + | [Text s1; Comma; Text s2; Comma; Text s3; Bracet l] -> | |
345 | + let gf,cr,ce,m = parse_roles [s1;s2;s3] in | |
346 | + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in | |
347 | + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs} | |
348 | + | [Text s1; Comma; Text s2; Comma; Text s3; Comma; Text s4; Bracet l] -> | |
349 | + let gf,cr,ce,m = parse_roles [s1;s2;s3;s4] in | |
350 | + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in | |
351 | + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs} | |
352 | + | l -> print_endline ("parse_simple_schema: " ^ string_of_token_list l); empty_position) | |
353 | + | |
354 | +let parse_entry (restr,schema) = function | |
355 | + [Text "lex"; Paren[Text lemma;Comma;Text pos]] -> SimpleLexEntry(lemma,pos) | |
356 | + | [Text "lex"; Paren[Text id;Comma;Text lemma;Comma;Text pos]] -> LexEntry(int_of_string id,lemma,pos,restr,schema) | |
357 | + | [Text "comprepnp"; Paren[Text lemma]] -> ComprepNPEntry(lemma,restr,schema) | |
358 | + | l -> print_endline ("parse_entry: " ^ string_of_token_list l); SimpleLexEntry("","") | |
359 | + | |
360 | +let load_entries filename = | |
361 | + let l = File.load_tab filename (function | |
362 | + [pos; lemma; entry; schema] -> pos, lemma, entry, schema | |
363 | + | [pos; lemma; entry] -> pos, lemma, entry, "" | |
364 | + | _ -> failwith "load_entries") in | |
365 | + Xlist.fold l Entries.empty (fun entries (pos,lemma,entry,schema) -> | |
366 | + let schema = parse_schema (split_text schema) in | |
367 | + let entry = parse_entry schema (split_text entry) in | |
368 | + Entries.add_inc entries pos lemma entry) | |
369 | + | |
370 | +let load_phrases filename = | |
371 | + let l = File.load_tab filename (function | |
372 | + id :: morfs -> int_of_string id, morfs | |
373 | + | _ -> failwith "load_phrases") in | |
374 | + Xlist.fold l IntMap.empty (fun phrases (id,morfs) -> | |
375 | + (* print_endline (string_of_int id); *) | |
376 | + let morfs = Xlist.map morfs (fun morf -> parse_morf (split_text morf)) in | |
377 | + IntMap.add phrases id morfs) | |
378 | + | |
379 | +let load_schemata filename = | |
380 | + let l = File.load_tab filename (function | |
381 | + [pos; lemma; opinion; neg; pred; aspect; schema] -> pos, lemma, opinion, neg, pred, aspect, schema | |
382 | + | _ -> failwith "load_schemata") in | |
383 | + Xlist.fold l Entries.empty (fun entries (pos,lemma,opinion,neg,pred,aspect,schema) -> | |
384 | + let opinion = parse_opinion opinion in | |
385 | + let neg = parse_negation [Text neg] in | |
386 | + let pred = parse_pred pred in | |
387 | + let aspect = parse_aspect [Text aspect] in | |
388 | + let schema = parse_simple_schema (split_text schema) in | |
389 | + let entry = opinion,neg,pred,aspect,schema in | |
390 | + Entries.add_inc entries pos lemma entry) | |
391 | + | |
392 | +let phrases = load_phrases "results/phrases.tab" | |
393 | +let entries = load_entries "results/entries.tab" | |
394 | +let schemata = load_schemata "results/schemata.tab" | |
534 | 395 | |
396 | +(* | |
535 | 397 | let print_subjs () = |
536 | 398 | (* let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *) |
537 | 399 | let subjs = Xlist.fold Paths.walenty_filenames StringQMap.empty (fun subjs filename -> |
... | ... | @@ -560,18 +422,4 @@ let print_ctrls () = |
560 | 422 | Printf.printf "%5d %s\n" v s) |
561 | 423 | |
562 | 424 | (* let _ = print_ctrls () *) |
563 | - | |
564 | -(* Test parsowania *) | |
565 | -(*let _ = | |
566 | - let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in | |
567 | - Xlist.iter Paths.walenty_filenames (fun filename -> | |
568 | - print_endline filename; | |
569 | - let frames = load_frames (Paths.walenty_path ^ filename) in | |
570 | - StringMap.iter frames (fun _ l -> | |
571 | - Xlist.iter l (fun (refl,opinion,negation,pred,aspect,schema) -> | |
572 | -(* print_endline schema; *) | |
573 | - ignore (parse_opinion opinion); | |
574 | - ignore (parse_negation [Text negation]); | |
575 | - ignore (parse_pred [Text pred]); | |
576 | - ignore (parse_aspect [Text aspect]); | |
577 | - ignore (expand_equivs_schema equivs (expand_subtypes subtypes (expand_schema expands (parse_schema schema)))))))*)*) | |
425 | +*) | |
... | ... |
walenty/ENIAMwalRealizations.ml
... | ... | @@ -17,31 +17,21 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open ENIAMwalTypes | |
20 | +open ENIAMwalTypes2 | |
21 | 21 | open Xstd |
22 | 22 | |
23 | 23 | let rec expand_schema_morf expands = function |
24 | - PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[]) | |
25 | - | PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[]) | |
26 | - (* | PhraseAbbr(ComparP s,[]) -> PhraseAbbr(ComparP s,[Phrase(ComparNP(s,Str));Phrase(ComparPP(s))]) (* FIXME: albo do walTEI albo usunąć *) *) | |
24 | + PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[Phrase AdvP]) | |
25 | + | PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[Phrase AdvP]) | |
27 | 26 | | PhraseAbbr(abbr,[]) -> (try PhraseAbbr(abbr,AbbrMap.find expands abbr) with Not_found -> failwith "expand_schema_morf") |
28 | 27 | | PhraseAbbr(abbr,morfs) -> PhraseAbbr(abbr,Xlist.map morfs (expand_schema_morf expands)) |
29 | 28 | | LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_schema expands schema)) |
30 | - (* | LexPhraseMode(mode,pos_lex,(restr,schema)) -> LexPhraseMode(mode,pos_lex,(restr,expand_schema expands schema)) *) | |
31 | 29 | | morf -> morf |
32 | 30 | |
33 | 31 | and expand_schema expands schema = |
34 | 32 | Xlist.map schema (fun s -> |
35 | 33 | {s with morfs=Xlist.map s.morfs (expand_schema_morf expands)}) |
36 | 34 | |
37 | -(* let get_mode = function | |
38 | - Xp(m) -> m | |
39 | - | Advp(m) -> m | |
40 | - (* | ComparP prep -> "compar" *) | |
41 | - | Nonch -> "nonch" | |
42 | - | Distrp -> "distrp" | |
43 | - | Possp -> "possp" *) | |
44 | - | |
45 | 35 | let rec expand_subtypes_morf subtypes = function |
46 | 36 | PhraseComp(comp_morf,(ctype,comps)) -> |
47 | 37 | let comps = if comps = [] then (try CompMap.find subtypes ctype with Not_found -> failwith "expand_subtypes_schema") else comps in |
... | ... | @@ -50,18 +40,8 @@ let rec expand_subtypes_morf subtypes = function |
50 | 40 | | Ncp case -> NCP(case,ctype,comp) |
51 | 41 | | Prepncp(prep,case) -> PrepNCP(prep,case,ctype,comp))) |
52 | 42 | | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,expand_subtypes subtypes schema))] |
53 | - (* | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,pos_lex,(restr,expand_subtypes subtypes schema))] *) | |
54 | 43 | | PhraseAbbr(abbr,morfs) -> |
55 | - (* let mode = get_mode abbr in *) | |
56 | 44 | List.flatten (Xlist.map morfs (expand_subtypes_morf subtypes)) |
57 | - (* Xlist.map morfs (function | |
58 | - Phrase p -> PhraseMode(mode,p) | |
59 | - | PhraseMode(_,p) -> PhraseMode(mode,p) | |
60 | - | LexPhrase(pos_lex,r) -> LexPhraseMode(mode,pos_lex,r) | |
61 | - | LexRPhrase(pos_lex,r) -> LexRPhraseMode(mode,pos_lex,r) | |
62 | - | LexPhraseMode(m,pos_lex,r) -> LexPhraseMode(mode,pos_lex,r) | |
63 | - | LexRPhraseMode(m,pos_lex,r) -> LexRPhraseMode(mode,pos_lex,r) | |
64 | - | _ -> failwith "expand_subtypes_morf") *) | |
65 | 45 | | E Null -> [E(NP(Str));E(NCP(Str,CompTypeUndef,CompUndef));E(CP(CompTypeUndef,CompUndef)); E(Or)] |
66 | 46 | | morf -> [morf] |
67 | 47 | |
... | ... | @@ -74,7 +54,6 @@ let expand_equivs_phrase equivs = function |
74 | 54 | | PrepAdjP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepAdjP(prep,case)) |
75 | 55 | | PrepNumP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNumP(prep,case)) |
76 | 56 | | ComprepNP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComprepNP(prep)) |
77 | - (* | ComparNP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparNP(prep,case)) *) | |
78 | 57 | | ComparP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparP(prep)) |
79 | 58 | | CP(ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> CP(ctype,Comp comp)) |
80 | 59 | | NCP(case,ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> NCP(case,ctype,Comp comp)) |
... | ... | @@ -93,80 +72,27 @@ let rec expand_equivs_lex equivs = function |
93 | 72 | |
94 | 73 | let rec expand_equivs_morf equivs = function |
95 | 74 | Phrase phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> Phrase phrase) |
96 | - (* | PhraseMode(mode,phrase) -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> PhraseMode(mode,phrase)) *) | |
97 | 75 | | E phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> E phrase) |
98 | 76 | | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] |
99 | - (* | LexRPhrase(pos_lex,(restr,schema)) -> [LexRPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] | |
100 | - | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] | |
101 | - | LexRPhraseMode(mode,pos_lex,(restr,schema)) -> [LexRPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] *) | |
102 | -(* | PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_equivs_morf equivs)))] *) | |
103 | 77 | | morf -> failwith ("expand_equivs_morf: " ^ ENIAMwalStringOf.morf morf) |
104 | 78 | |
105 | 79 | and expand_equivs_schema equivs schema = |
106 | 80 | Xlist.map schema (fun s -> |
107 | 81 | {s with morfs=List.flatten (Xlist.map s.morfs (expand_equivs_morf equivs))}) |
108 | 82 | |
109 | - | |
110 | -let rec load_realizations_rec (expands,subtypes,equivs) found rev = function | |
111 | - [] -> if rev <> [] || found <> [] then failwith "load_realizations_rec" else expands,subtypes,equivs | |
112 | - | [Str.Text s; Str.Delim "-->"] :: l -> load_realizations_rec (expands,subtypes,equivs) ((s,rev) :: found) [] l | |
113 | - | [Str.Delim " "; Str.Text s; Str.Delim "\t"; Str.Text t] :: l -> | |
114 | - load_realizations_rec (expands,subtypes,equivs) found ((s,t) :: rev) l | |
115 | - | [Str.Delim " "; Str.Text s] :: l -> | |
116 | - load_realizations_rec (expands,subtypes,equivs) found ((s,"") :: rev) l | |
117 | - | [Str.Delim "% "; Str.Text "Phrase types expand:"] :: l -> load_realizations_rec (found,subtypes,equivs) [] rev l | |
118 | - | [Str.Delim "% "; Str.Text "Attributes subtypes:"] :: l -> load_realizations_rec (expands,found,equivs) [] rev l | |
119 | - | [Str.Delim "% "; Str.Text "Attributes equivalents:"] :: l -> load_realizations_rec (expands,subtypes,found) [] rev l | |
120 | -(* | [Str.Delim "% "; Str.Text s] :: l -> print_endline s; load_realizations_rec found rev l *) | |
121 | - | [] :: l -> load_realizations_rec (expands,subtypes,equivs) found rev l | |
122 | - | _ -> failwith "load_realizations_rec" | |
123 | - | |
124 | -(* let find_comprep_reqs compreps = | |
125 | - Xlist.fold compreps StringMap.empty (fun comprep_reqs (s,l) -> | |
126 | - let l = Xlist.map l (function | |
127 | - LexPhrase(pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function | |
128 | - _,Lexeme s -> StringSet.add set s | |
129 | - | _ -> set) | |
130 | - (* | LexPhraseMode(_,pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function | |
131 | - _,Lexeme s -> StringSet.add set s | |
132 | - | _ -> set) *) | |
133 | - | morf -> failwith ("find_compreps_reqs: " ^ ENIAMwalStringOf.morf morf)) in | |
134 | - if l = [] then failwith "find_compreps_reqs"; | |
135 | - StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union))) *) | |
136 | - | |
137 | -(* let create_comprep_dict compreps = | |
138 | - Xlist.fold compreps StringMap.empty (fun compreps (s,l) -> | |
139 | - Xlist.fold l compreps (fun compreps -> function | |
140 | - LexPhrase([PREP _,_;SUBST _,lex],_) as morf -> | |
141 | - let lexemes = get_lexemes lex in | |
142 | - Xlist.fold lexemes compreps (fun compreps lexeme -> | |
143 | - StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l)) | |
144 | - (* | LexPhraseMode("misc",[ADV grad,lex],restr) -> | |
145 | - let morf = LexPhrase([ADV grad,lex],restr) in | |
146 | - let lexemes = get_lexemes lex in | |
147 | - Xlist.fold lexemes compreps (fun compreps lexeme -> | |
148 | - StringMap.add_inc compreps lexeme ["adv",(s,morf)] (fun l -> ("adv",(s,morf)) :: l)) *) | |
149 | - | LexPhrase([PREP _,_;NUM _,_;SUBST _,lex],_) as morf -> | |
150 | - let lexemes = get_lexemes lex in | |
151 | - Xlist.fold lexemes compreps (fun compreps lexeme -> | |
152 | - StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l)) | |
153 | - | morf -> failwith ("create_comprep_dict: " ^ ENIAMwalStringOf.morf morf))) *) | |
154 | - | |
155 | -let load_realizations () = | |
156 | - (* let lines = Str.split (Str.regexp "\n") (File.load_file realizations_filename) in | |
157 | - let lines = Xlist.rev_map lines (fun line -> Str.full_split (Str.regexp "% \\|-->\\| \\|\t") line) in | |
158 | - let expands,subtypes,equivs = load_realizations_rec ([],[],[]) [] [] lines in *) | |
159 | - let subtypes = Xlist.fold ENIAMwalTEI.subtypes CompMap.empty (fun subtypes -> function | |
83 | +(* UWAGA: aktualnie equivs nie są wstawiane do expands *) | |
84 | +let load_realizations (expands,subtypes,equivs) = | |
85 | + let subtypes = Xlist.fold subtypes CompMap.empty (fun subtypes -> function | |
160 | 86 | "int",l -> CompMap.add subtypes Int (List.flatten (Xlist.map l (fun v -> snd(ENIAMwalTEI.parse_comp v)))) |
161 | 87 | | "rel",l -> CompMap.add subtypes Rel (List.flatten (Xlist.map l (fun v -> snd(ENIAMwalTEI.parse_comp v)))) |
162 | 88 | | _ -> failwith "load_realizations 1") in |
163 | - let equivs = Xlist.fold ENIAMwalTEI.equivs StringMap.empty (fun equivs (k,l) -> StringMap.add equivs k (k :: l)) in | |
164 | - let expands,compreps = Xlist.fold ENIAMwalTEI.expands (AbbrMap.empty,[]) (fun (expands, compreps) (id,k,l) -> | |
89 | + let equivs = Xlist.fold equivs StringMap.empty (fun equivs (k,l) -> StringMap.add equivs k (k :: l)) in | |
90 | + let expands,compreps = Xlist.fold expands (AbbrMap.empty,[]) (fun (expands, compreps) (id,k,l) -> | |
165 | 91 | match k with |
166 | 92 | PhraseAbbr(Advp m,[]) -> AbbrMap.add expands (Advp m) l, compreps |
167 | 93 | | PhraseAbbr(Nonch,[]) -> AbbrMap.add expands Nonch l, compreps |
168 | 94 | | PhraseAbbr(Xp m,[]) -> AbbrMap.add expands (Xp m) (List.flatten (Xlist.map l (function |
169 | - PhraseAbbr(Advp m,[]) -> (try AbbrMap.find expands (Advp m) with Not_found -> [PhraseAbbr(Advp m,[])]) (* FIXME: zakładam, że advp się nie rozmnoży *) | |
95 | + PhraseAbbr(Advp m,[]) -> (try AbbrMap.find expands (Advp m) with Not_found -> [PhraseAbbr(Advp m,[])]) (* zakładam, że advp się nie rozmnoży *) | |
170 | 96 | | morf -> [morf]))), compreps |
171 | 97 | | Phrase(ComprepNP s) -> expands, (s, l) :: compreps |
172 | 98 | | PhraseAbbr(Distrp,[]) -> AbbrMap.add expands Distrp l, compreps |
... | ... | @@ -174,53 +100,4 @@ let load_realizations () = |
174 | 100 | | _ -> failwith "load_realizations 2") in |
175 | 101 | let compreps = Xlist.map compreps (fun (s,morfs) -> |
176 | 102 | s, List.flatten (List.flatten (Xlist.map morfs (fun morf -> Xlist.map (expand_subtypes_morf subtypes (expand_schema_morf expands morf)) (expand_equivs_morf equivs))))) in |
177 | - (* let comprep_reqs = find_comprep_reqs compreps in *) | |
178 | - (* let compreps = create_comprep_dict compreps in *) | |
179 | - expands,compreps,(*comprep_reqs,*)subtypes,equivs | |
180 | - | |
181 | -let expands,compreps,(*comprep_reqs,*)subtypes,equivs = load_realizations () | |
182 | - | |
183 | - | |
184 | -(* Wypisanie realizacji *) | |
185 | -(* let _ = | |
186 | - Xlist.iter ENIAMwalTEI.expands (fun (id,morf,l) -> | |
187 | - Printf.printf "%d %s:\n" id (ENIAMwalStringOf.morf morf); | |
188 | - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *) | |
189 | - | |
190 | -(* Wypisanie realizacji po przetworzeniu *) | |
191 | -(* let _ = | |
192 | - AbbrMap.iter expands (fun morf l -> | |
193 | - Printf.printf "%s:\n" (ENIAMwalStringOf.phrase_abbr morf); | |
194 | - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *) | |
195 | - | |
196 | -let has_realization = function | |
197 | - PhraseAbbr _ -> true | |
198 | - | PhraseComp _ -> true | |
199 | - | _ -> false | |
200 | - | |
201 | -(* Wypisanie fraz, które podlegają rozwijaniu *) | |
202 | -(*let _ = | |
203 | - IntMap.iter ENIAMwalTEI.phrases (fun i morf -> | |
204 | - if has_realization morf then | |
205 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *) | |
206 | - | |
207 | -let phrases = | |
208 | - IntMap.map ENIAMwalTEI.phrases (fun morf -> | |
209 | - let morf = expand_schema_morf expands morf in | |
210 | - let morfs = expand_subtypes_morf subtypes morf in | |
211 | - let morf = List.flatten (Xlist.map morfs (expand_equivs_morf equivs)) in | |
212 | - morf) | |
213 | - | |
214 | -(* Wypisanie fraz, które podlegają rozwijaniu *) | |
215 | -(* let _ = | |
216 | - IntMap.iter phrases (fun i morf -> | |
217 | - if has_realization morf then | |
218 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *) | |
219 | - | |
220 | -(* let test_phrases = [17088; 17133; 1642] | |
221 | -let _ = | |
222 | - Xlist.iter test_phrases (fun i -> | |
223 | - let m1 = IntMap.find ENIAMwalTEI.phrases i in | |
224 | - let m2 = IntMap.find phrases i in | |
225 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m1); | |
226 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m2)) *) | |
103 | + expands,compreps,subtypes,equivs | |
... | ... |
walenty/ENIAMwalReduce.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMwalTypes | |
21 | +open Xstd | |
22 | + | |
23 | +(* let rec assign_pro_args schema = | |
24 | + Xlist.map schema (fun s -> | |
25 | + let morfs = match s.morfs with | |
26 | + (E p) :: l -> E Pro :: (E p) :: l | |
27 | + | [LexPhrase _] as morfs -> morfs | |
28 | + | [Phrase(FixedP _)] as morfs -> morfs | |
29 | + | [Phrase(Lex _)] as morfs -> morfs | |
30 | + (* | [Phrase Refl] as morfs -> morfs | |
31 | + | [Phrase Recip] as morfs -> morfs*) | |
32 | + | Phrase Null :: _ as morfs -> morfs | |
33 | + | Phrase Pro :: _ as morfs -> morfs | |
34 | + | morfs -> if s.gf <> SUBJ && s.cr = [] && s.ce = [] then (Phrase Null) :: morfs else (Phrase Pro) :: morfs in (* FIXME: ustalić czy są inne przypadki uzgodnienia *) | |
35 | + (* let morfs = assign_pro_args_lex morfs in *) (* bez pro wewnątrz leksykalizacji *) | |
36 | + {s with morfs=morfs}) *) | |
37 | + | |
38 | +(*let assign_pro_args_lex morfs = | |
39 | + Xlist.map morfs (function | |
40 | + Lex(morf,specs,lex,restr) -> LexN(morf,specs,lex,assign_pro_args_restr restr) | |
41 | + | LexNum(morf,lex1,lex2,restr) -> LexNum(morf,lex1,lex2,assign_pro_args_restr restr) | |
42 | + | LexCompar(morf,l) -> LexCompar(morf,make_gfs_lex l) | |
43 | + | morf -> morf) | |
44 | + | |
45 | + and assign_pro_args_restr = function | |
46 | + Natr -> Natr | |
47 | + | Ratr1 schema -> Ratr1(assign_pro_args schema) | |
48 | + | Atr1 schema -> Atr1(assign_pro_args schema) | |
49 | + | Ratr schema -> Ratr(assign_pro_args schema) | |
50 | + | Atr schema -> Atr(assign_pro_args schema)*) | |
51 | + | |
52 | +exception ImpossibleSchema | |
53 | + | |
54 | +let rec reduce_comp lexemes = function | |
55 | + Comp s -> if StringMap.mem lexemes s then Comp s else raise Not_found | |
56 | + | Zeby -> if StringMap.mem lexemes "żeby" || StringMap.mem lexemes "że" then Zeby else raise Not_found | |
57 | + | Gdy -> if StringMap.mem lexemes "gdy" || StringMap.mem lexemes "gdyby" then Gdy else raise Not_found | |
58 | + | CompUndef -> failwith "reduce_comp" | |
59 | + | |
60 | +let reduce_phrase comprep_reqs lexemes = function | |
61 | + | PrepNP(prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
62 | + | PrepAdjP(prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
63 | + | ComprepNP(prep) as phrase -> if Xlist.fold (try StringMap.find comprep_reqs prep with Not_found -> []) true (fun b s -> b && StringMap.mem lexemes s) then phrase else raise Not_found | |
64 | + | ComparP(prep) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found | |
65 | + | CP(ctype,comp) -> CP(ctype,reduce_comp lexemes comp) | |
66 | + | NCP(case,ctype,comp) -> if StringMap.mem lexemes "to" then NCP(case,ctype,reduce_comp lexemes comp) else raise Not_found | |
67 | + | PrepNCP(prep,case,ctype,comp) -> if StringMap.mem lexemes prep && StringMap.mem lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp lexemes comp) else raise Not_found | |
68 | + | SimpleLexArg(lemma,_) as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found | |
69 | + | LexArg(_,lemma,_) as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found | |
70 | + | FixedP lemma as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found | |
71 | + | phrase -> phrase | |
72 | + | |
73 | +let rec reduce_morfs comprep_reqs lexemes = function | |
74 | + [] -> [] | |
75 | + | morf :: l -> (try [reduce_phrase comprep_reqs lexemes morf] with Not_found -> []) @ reduce_morfs comprep_reqs lexemes l | |
76 | + | |
77 | +let rec reduce_schema2 comprep_reqs lexemes = function | |
78 | + [] -> [] | |
79 | + | s :: l -> | |
80 | + let morfs = reduce_morfs comprep_reqs lexemes s.morfs in | |
81 | + if morfs = [] then reduce_schema2 comprep_reqs lexemes l else | |
82 | + {s with morfs=morfs} :: reduce_schema2 comprep_reqs lexemes l | |
83 | + | |
84 | +let rec reduce_schema comprep_reqs lexemes = function | |
85 | + [] -> [] | |
86 | + | s :: l -> | |
87 | + let morfs = reduce_morfs comprep_reqs lexemes s.morfs in | |
88 | + if morfs = [] then raise ImpossibleSchema else | |
89 | + {s with morfs=morfs} :: reduce_schema comprep_reqs lexemes l | |
90 | + | |
91 | +(* let reduce_schema_frame lexemes = function | |
92 | + Frame(atrs,schema) -> Frame(atrs,reduce_schema lexemes schema) | |
93 | + (* | ComprepFrame(s,morfs) -> | |
94 | + let morfs = reduce_morfs lexemes morfs in | |
95 | + if morfs = [] then raise ImpossibleSchema else ComprepFrame(s,morfs)*) | |
96 | + | _ -> failwith "reduce_schema_frame" *) | |
97 | + | |
98 | + | |
99 | +let reduce_entries lexemes entries = | |
100 | + StringMap.map entries (fun entries -> | |
101 | + StringSet.fold lexemes StringMap.empty (fun reduced lemma -> | |
102 | + try StringMap.add reduced lemma (StringMap.find entries lemma) | |
103 | + with Not_found -> reduced)) | |
... | ... |
walenty/ENIAMwalStringOf.ml
... | ... | @@ -17,26 +17,7 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open ENIAMwalTypes | |
21 | - | |
22 | -(*let rec token = function | |
23 | - Text s -> s | |
24 | - | Paren l -> "(" ^ String.concat "" (Xlist.map l token) ^ ")" | |
25 | - | Bracet l -> "{" ^ String.concat "" (Xlist.map l token) ^ "}" | |
26 | - | SqBra l -> "[" ^ String.concat "" (Xlist.map l token) ^ "]" | |
27 | - | LParen -> "(" | |
28 | - | RParen -> ")" | |
29 | - | LBracet -> "{" | |
30 | - | RBracet -> "}" | |
31 | - | LSqBra -> "[" | |
32 | - | RSqBra -> "]" | |
33 | - | Semic -> ";" | |
34 | - | Plus -> "+" | |
35 | - | Comma -> "," | |
36 | - | Quot -> "'" | |
37 | - | |
38 | -let token_list l = | |
39 | - String.concat "" (Xlist.map l token)*) | |
20 | +open ENIAMwalTypes2 | |
40 | 21 | |
41 | 22 | let opinion = function |
42 | 23 | Pewny -> "pewny" |
... | ... | @@ -87,10 +68,7 @@ let rec comp = function |
87 | 68 | let rec comp_type = function |
88 | 69 | Int -> "int" |
89 | 70 | | Rel -> "rel" |
90 | - (* | Sub -> "sub" | |
91 | - | Coord -> "coord" *) | |
92 | 71 | | CompTypeUndef -> "_" |
93 | - (* | CompTypeAgr -> "agr" *) | |
94 | 72 | |
95 | 73 | let number = function |
96 | 74 | Number s -> s |
... | ... | @@ -117,29 +95,16 @@ let acm = function |
117 | 95 | Acm s -> s |
118 | 96 | | AcmUndef -> "_" |
119 | 97 | |
120 | -(* let sem = function | |
121 | - Sem -> "sem" | |
122 | - | NoSem -> "nosem" *) | |
123 | - | |
124 | -(*let req = function | |
125 | - Req -> ",req" | |
126 | - | NReq -> ",nreq" | |
127 | - | ReqUndef -> ""*) | |
128 | - | |
129 | 98 | let gf = function |
130 | 99 | SUBJ -> "subj" |
131 | 100 | | OBJ -> "obj" |
132 | 101 | | ARG -> "arg"(*""*) |
133 | - (* | CORE -> "core" | |
134 | - | NOSEM -> "nosem" | |
135 | - | NOGF -> "nogf" | |
136 | - | ADJUNCT -> "adjunct" | |
137 | - | RAISED -> "raised" | |
138 | - | CLAUSE -> "clause" | |
139 | - | SENTENCE -> "sentence" *) | |
140 | 102 | |
141 | 103 | let pos = function |
142 | 104 | SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")" |
105 | + | PPRON12(n,c) -> "PPRON12(" ^ number n ^ "," ^ case c ^ ")" | |
106 | + | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")" | |
107 | + | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")" | |
143 | 108 | | PREP(c) -> "PREP(" ^ case c ^ ")" |
144 | 109 | | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")" |
145 | 110 | | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")" |
... | ... | @@ -162,7 +127,6 @@ let phrase = function |
162 | 127 | | NumP(c) -> "nump(" ^ case c ^ ")" |
163 | 128 | | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")" |
164 | 129 | | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")" |
165 | - (* | ComparNP(prep,c) -> "comparnp(" ^ prep ^ "," ^ case c ^ ")" *) | |
166 | 130 | | ComparP(prep) -> "comparp(" ^ prep ^ ")" |
167 | 131 | | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")" |
168 | 132 | | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" |
... | ... | @@ -172,11 +136,9 @@ let phrase = function |
172 | 136 | | FixedP s -> "fixed(" ^ s ^ ")" |
173 | 137 | | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")" |
174 | 138 | | Or -> "or" |
175 | - | Refl -> "refl" | |
176 | - | Recip -> "recip" | |
177 | 139 | | Qub -> "qub" |
178 | - | Pro -> "pro" | |
179 | - | ProNG -> "prong" | |
140 | + (* | Pro -> "pro" | |
141 | + | ProNG -> "prong" *) | |
180 | 142 | | Null -> "null" |
181 | 143 | | GerP c -> "gerp(" ^ case c ^ ")" |
182 | 144 | | PrepGerP(prep,c) -> "prepgerp(" ^ prep ^ "," ^ case c ^ ")" |
... | ... | @@ -188,7 +150,6 @@ let phrase = function |
188 | 150 | let phrase_abbr = function |
189 | 151 | Xp(m) -> "xp(" ^ m ^ ")" |
190 | 152 | | Advp(m) -> "advp(" ^ m ^ ")" |
191 | - (* | ComparP prep -> "compar(" ^ prep ^ ")" *) | |
192 | 153 | | Nonch -> "nonch" |
193 | 154 | | Distrp -> "distrp" |
194 | 155 | | Possp -> "possp" |
... | ... | @@ -212,13 +173,7 @@ let restr = function |
212 | 173 | | Ratrs -> "ratrs" |
213 | 174 | | Atr1 -> "atr1" |
214 | 175 | | Ratr1 -> "ratr1" |
215 | -(* | Ratr1,s -> "ratr1(" ^ schema s ^ ")" | |
216 | - | Atr1,s -> "atr1(" ^ schema s ^ ")" | |
217 | - | Ratr,s -> "ratr(" ^ schema s ^ ")" | |
218 | - | Atr,s -> "atr(" ^ schema s ^ ")" | |
219 | - | Ratrs,s -> "ratrs(" ^ schema s ^ ")" | |
220 | - *) | NoRestr -> "" | |
221 | -(* | NoRestr,s -> schema s *) | |
176 | + | NoRestr -> "" | |
222 | 177 | |
223 | 178 | let controllers l = |
224 | 179 | Xlist.map l (function |
... | ... | @@ -230,42 +185,10 @@ let controllees l = |
230 | 185 | "1" -> "controllee" |
231 | 186 | | n -> "controllee" ^ n) |
232 | 187 | |
233 | -(*let lex_specs = function | |
234 | - NSpecs num -> number num | |
235 | - | AdvSpecs gr -> grad gr | |
236 | - | AdjSpecs(num,g,gr) -> number num ^ "," ^ gender g ^ "," ^ grad gr | |
237 | - | PpasSpecs(num,g,neg) -> number num ^ "," ^ gender g ^ "," ^ negation neg | |
238 | - | PactSpecs(num,g,neg,r) -> number num ^ "," ^ gender g ^ "," ^ negation neg ^ "," ^ refl r | |
239 | - | GerSpecs(num,neg,r) -> number num ^ "," ^ negation neg ^ "," ^ refl r | |
240 | - | CSpecs(neg,r) -> negation neg ^ "," ^ refl r | |
241 | - | NumSpecs g -> gender g | |
242 | - | EmptySpecs -> ""*) | |
243 | - | |
244 | -let mood = function | |
245 | - (*Mood*) s -> s | |
246 | - (*| MoodUndef -> "_"*) | |
247 | - | |
248 | -let tense t = t | |
249 | - | |
250 | -let aux = function | |
251 | - NoAux -> "-" | |
252 | - | PastAux -> "aux-past" | |
253 | - | FutAux -> "aux-fut" | |
254 | - | ImpAux -> "aux-imp" | |
255 | - | |
256 | -let nsem = function | |
257 | - Common s -> s | |
258 | - | Time -> "time" | |
259 | - | |
260 | -(* let direction = function | |
261 | - Forward -> "/" | |
262 | - | Backward -> "\\" | |
263 | - | Both -> "|" *) | |
264 | - | |
265 | 188 | let rec schema schema = |
266 | 189 | String.concat "+" (Xlist.map schema (fun s -> |
267 | 190 | String.concat "," ( |
268 | - (if s.gf = ARG then [] else [gf s.gf])@ | |
191 | + (if s.gf = ARG then [] else [gf s.gf])@s.mode@ | |
269 | 192 | (if s.role = "" then [] else [s.role])@ |
270 | 193 | (if s.role_attr = "" then [] else [s.role_attr])@ |
271 | 194 | (*s.sel_prefs@*)(controllers s.cr)@(controllees s.ce)) ^ "{" ^ String.concat ";" (Xlist.map s.morfs morf) ^ "}")) |
... | ... | @@ -276,82 +199,38 @@ let rec schema schema = |
276 | 199 | |
277 | 200 | and morf = function |
278 | 201 | Phrase p -> phrase p |
279 | - (* | PhraseMode(m,p) -> m ^ ":" ^ phrase p *) | |
280 | 202 | | E p -> "E(" ^ phrase p ^ ")" |
281 | 203 | | LexPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" |
282 | - (* | LexRPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) | |
283 | - (* | LexPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) | |
284 | - (* | LexRPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) | |
285 | 204 | | PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]" |
286 | 205 | | PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]" |
287 | - (* | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" | |
288 | - | LexArg(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ le ^ ")" *) | |
289 | - (* | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" *) | |
290 | 206 | | MorfId id -> Printf.sprintf "id(%d)" id |
291 | 207 | | SimpleLexArg(le,p) -> "lex(" ^ le ^ "," ^ pos p ^ ")" |
292 | 208 | | LexArg(id,le,p) -> "lex(" ^ string_of_int id ^ "," ^ le ^ "," ^ pos p ^ ")" |
293 | -(* | LexRealization(mrf,le) -> "lex(" ^ morf mrf ^ "," ^ le ^ ")"*) | |
294 | - (* | Raised(mrf1,dir,mrf2) -> "raised([" ^ String.concat ";" mrf1 ^ "]," ^ direction dir ^ "[" ^ String.concat ";" mrf2 ^ "])" | |
295 | - | Multi l -> "multi(" ^ String.concat ";" (Xlist.map l phrase) ^ ")" *) | |
296 | - | |
297 | -(*and mode = function | |
298 | - Mode(s,[]) -> s | |
299 | - | Mode(s,l) -> s ^ "[" ^ "..."(*String.concat ";" (Xlist.map l morf)*) ^ "]" | |
300 | -(* | Mod l -> "mod[...]" *) | |
301 | - | Pron [] -> "pron" | |
302 | - | Pron l -> "pron" ^ "[" ^ "..."(*String.concat ";" (Xlist.map l morf)*) ^ "]" | |
303 | - | Misc -> "misc"*) | |
304 | - | |
305 | -let meaning m = | |
306 | - m.name ^ "-" ^ m.variant | |
307 | 209 | |
308 | -let frame_atrs = function | |
309 | - DefaultAtrs(m,r,o,neg,p,a) -> Printf.sprintf "%s: %s: %s: %s: %s: %s" (String.concat "; " (Xlist.map m meaning)) (refl r) (opinion o) (negation neg) (pred p) (aspect a) | |
310 | - | EmptyAtrs m -> Printf.sprintf "%s" (String.concat "; " (Xlist.map m meaning)) | |
311 | - | NounAtrs(m,nsyn,s(*,typ*)) -> Printf.sprintf "%s: %s: %s" (String.concat "; " (Xlist.map m meaning)) nsyn (nsem s) (*(String.concat ";" typ)*) | |
312 | - | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> Printf.sprintf "%s: %s: %s" (String.concat "; " (Xlist.map m meaning)) (case c) adjsyn (*adjsem (String.concat ";" typ)*) | |
313 | - | PersAtrs(m,le,neg,mo,t,au,a) -> Printf.sprintf "%s: %s: %s: %s: %s: %s: %s" (String.concat "; " (Xlist.map m meaning)) le (negation neg) (mood mo) (tense t) (aux au) (aspect a) | |
314 | - | GerAtrs(m,le,neg,a) -> Printf.sprintf "%s: %s: %s: %s" (String.concat "; " (Xlist.map m meaning)) le (negation neg) (aspect a) | |
315 | - | NonPersAtrs(m,le,role,role_attr,neg,a) -> Printf.sprintf "%s: %s: %s,%s: %s: %s" (String.concat "; " (Xlist.map m meaning)) le role role_attr (negation neg) (aspect a) | |
316 | - | _ -> failwith "WalStringOf.frame_atrs" | |
210 | +let simple_morf = function | |
211 | + | MorfId id -> Printf.sprintf "%d" id | |
212 | + | _ -> failwith "ENIAMwalStringOf.simple_morf" | |
317 | 213 | |
318 | -let frame lexeme = function | |
319 | - Frame(atrs,s) -> | |
320 | - Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema s) | |
321 | - | SimpleLexEntry(le,p) -> | |
322 | - Printf.sprintf "%s: %s" le p | |
323 | - | LexEntry(id,le,p,r,s) -> | |
324 | - Printf.sprintf "%d: %s: %s: %s: %s" id le p (restr r) (schema s) | |
325 | - | ComprepNPEntry(le,r,s) -> | |
326 | - Printf.sprintf "%s: %s: %s" le (restr r) (schema s) | |
327 | - (* | LexFrame(id,p,r,s) -> | |
328 | - Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema s) | |
329 | - | ComprepFrame(le,p,r,s) -> | |
330 | - Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema s) *) | |
331 | -(* | FrameR(atrs,s) -> | |
332 | - Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema_role s) | |
333 | - | LexFrameR(id,p,r,s) -> | |
334 | - Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema_role s) | |
335 | - | ComprepFrameR(le,p,r,s) -> | |
336 | - Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema_role s)*) | |
337 | -(* | _ -> failwith "WalStringOf.frame" *) | |
214 | +let rec simple_schema schema = | |
215 | + String.concat "+" (Xlist.map schema (fun s -> | |
216 | + String.concat "," ( | |
217 | + (if s.gf = ARG then [] else [gf s.gf])@ | |
218 | + s.mode@(controllers s.cr)@(controllees s.ce)) ^ | |
219 | + "{" ^ String.concat ";" (Xlist.map s.morfs simple_morf) ^ "}")) | |
338 | 220 | |
339 | -let entry = function | |
340 | - Frame(atrs,s) -> | |
341 | - Printf.sprintf "%s: %s: %s" "lexeme" (frame_atrs atrs) (schema s) | |
342 | - | SimpleLexEntry(le,p) -> | |
343 | - Printf.sprintf "%s: %s" le p | |
344 | - | LexEntry(id,le,p,r,s) -> | |
345 | - Printf.sprintf "%d: %s: %s: %s: %s" id le p (restr r) (schema s) | |
346 | - | ComprepNPEntry(le,r,s) -> | |
347 | - Printf.sprintf "%s: %s: %s" le (restr r) (schema s) | |
348 | 221 | |
349 | -(* let fnum_frame lexeme = function | |
350 | - fnum,Frame(atrs,s) -> | |
351 | - Printf.sprintf "%d: %s: %s: %s" fnum lexeme (frame_atrs atrs) (schema s) | |
352 | - | fnum,LexFrame(id,p,r,s) -> | |
353 | - Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme id (pos p) (restr r) (schema s) | |
354 | - | fnum,ComprepFrame(le,p,r,s) -> | |
355 | - Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme le (pos p) (restr r) (schema s) *) | |
222 | +let meaning m = | |
223 | + m.name ^ "-" ^ m.variant | |
356 | 224 | |
357 | -let unparsed_frame lexeme (r,o,neg,p,a,s) = lexeme ^ " " ^ String.concat ": " [r;o;neg;p;a;s] | |
225 | +let lex_entry = function | |
226 | + SimpleLexEntry(le,p) -> | |
227 | + Printf.sprintf "lex(%s,%s)" le p | |
228 | + | LexEntry(id,le,p,NoRestr,s) -> | |
229 | + Printf.sprintf "lex(%d,%s,%s)\t%s" id le p (schema s) | |
230 | + | LexEntry(id,le,p,r,[]) -> | |
231 | + Printf.sprintf "lex(%d,%s,%s)\t%s" id le p (restr r) | |
232 | + | ComprepNPEntry(le,NoRestr,s) -> | |
233 | + Printf.sprintf "comprepnp(%s)\t%s" le (schema s) | |
234 | + | ComprepNPEntry(le,r,[]) -> | |
235 | + Printf.sprintf "comprepnp(%s)\t%s" le (restr r) | |
236 | + | _ -> failwith "ENIAMwalStringOf.lex_entry" | |
... | ... |
walenty/ENIAMwalTEI.ml
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | *) |
20 | 20 | |
21 | -open ENIAMwalTypes | |
21 | +open ENIAMwalTypes2 | |
22 | 22 | open Xstd |
23 | 23 | |
24 | 24 | type id = {hash: bool; suffix: string; numbers: int list} |
... | ... | @@ -261,6 +261,10 @@ let rec process_lex lex = function |
261 | 261 | (ENIAMwalStringOf.lex lemma) (ENIAMwalStringOf.lex numeral_lemma) in |
262 | 262 | failwith ("process_lex: " ^ s) |
263 | 263 | |
264 | +(* UWAGA: refl_id może się zmienić wraz z wersją Walentego *) | |
265 | +let refl_id = 25 | |
266 | +let refl_position = {empty_position with role="Lemma"; mode=["lemma"]; morfs=[MorfId refl_id]} | |
267 | + | |
264 | 268 | let rec load_category = function |
265 | 269 | | F("category",Fs("category_def",x)) -> |
266 | 270 | (match x with |
... | ... | @@ -321,7 +325,7 @@ and load_phrase mode = function |
321 | 325 | | Fs("advp", [e;F("category",Symbol a)]) -> mode:=a :: !mode; PhraseAbbr(Advp(a),[]) |
322 | 326 | | Fs("nonch", []) -> mode:="nonch" :: !mode; PhraseAbbr(Nonch,[]) |
323 | 327 | | Fs("or", []) -> Phrase Or |
324 | - | Fs("refl", []) -> Phrase Refl | |
328 | + | Fs("refl", []) -> mode:="refl" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[])) | |
325 | 329 | | Fs("E", []) -> E Null |
326 | 330 | | Fs("lex", x) -> |
327 | 331 | let lex = Xlist.fold x empty_lex load_lex in |
... | ... | @@ -330,7 +334,7 @@ and load_phrase mode = function |
330 | 334 | | Fs("fixed", [F("argument",a);F("string",TEIstring b)]) -> Phrase (FixedP((*snd (load_phrase a),*)b)) |
331 | 335 | | Fs("possp", [e]) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[]) |
332 | 336 | | Fs("possp", []) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[]) |
333 | - | Fs("recip", []) -> Phrase Recip | |
337 | + | Fs("recip", []) -> mode:="recip" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[])) | |
334 | 338 | | Fs("distrp", [e]) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[]) |
335 | 339 | | Fs("distrp", []) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[]) |
336 | 340 | | Fs("compar", [F("compar_category",Symbol value)]) -> Phrase(ComparP value) |
... | ... | @@ -395,8 +399,7 @@ let parse_opinion = function |
395 | 399 | |
396 | 400 | let load_schema_info ent phrases (arg:schema) = function |
397 | 401 | | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value} |
398 | - | F("inherent_sie",Binary true) -> {arg with reflexiveMark = ReflTrue} | |
399 | - | F("inherent_sie",Binary false) -> {arg with reflexiveMark = ReflFalse} | |
402 | + | F("inherent_sie",Binary b) -> {arg with reflexiveMark = b} | |
400 | 403 | | F("aspect",Symbol aspect_value) -> {arg with aspect = parse_aspect aspect_value} |
401 | 404 | | Fset("aspect", []) -> arg |
402 | 405 | | F("negativity",Symbol negativity_value) -> {arg with negativity = parse_negation negativity_value} |
... | ... | @@ -411,7 +414,7 @@ let load_schema_info ent phrases (arg:schema) = function |
411 | 414 | |
412 | 415 | let load_schema ent phrases = function |
413 | 416 | Fs("schema", schema) -> |
414 | - let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = ReflUndef; aspect = AspectUndef; | |
417 | + let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = false; aspect = AspectUndef; | |
415 | 418 | negativity = NegationUndef; predicativity = PredUndef; positions = []; text_rep=""} in |
416 | 419 | let result = Xlist.fold schema result (load_schema_info ent phrases) in |
417 | 420 | result |
... | ... | @@ -587,22 +590,6 @@ let load_rentry = function |
587 | 590 | id,morf,expansions |
588 | 591 | | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml) |
589 | 592 | |
590 | -let load_expands filename = | |
591 | - begin | |
592 | - match Xml.parse_file filename with | |
593 | - Xml.Element("TEI", _, | |
594 | - [Xml.Element("teiHeader",_,_) ; | |
595 | - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> | |
596 | - List.rev (Xlist.rev_map entries load_rentry) | |
597 | - | _ -> failwith "load_walenty" | |
598 | - end | |
599 | - | |
600 | - | |
601 | - (*let walenty = load_walenty Paths.walenty_filename *) | |
602 | -let walenty,phrases = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" | |
603 | - | |
604 | -(* let _ = Printf.printf "|phrases|=%d\n" (IntMap.size phrases) *) | |
605 | - | |
606 | 593 | let expands_supplement = [ |
607 | 594 | (-2), PhraseAbbr(Nonch,[]), [ |
608 | 595 | LexPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[])); |
... | ... | @@ -615,7 +602,15 @@ let expands_supplement = [ |
615 | 602 | LexPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[])) |
616 | 603 | ]] |
617 | 604 | |
618 | -let expands = expands_supplement @ load_expands "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" | |
605 | +let load_expands filename = | |
606 | + begin | |
607 | + match Xml.parse_file filename with | |
608 | + Xml.Element("TEI", _, | |
609 | + [Xml.Element("teiHeader",_,_) ; | |
610 | + Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> | |
611 | + expands_supplement @ List.rev (Xlist.rev_map entries load_rentry) | |
612 | + | _ -> failwith "load_walenty" | |
613 | + end | |
619 | 614 | |
620 | 615 | let subtypes = [ |
621 | 616 | "int",[ |
... | ... |
walenty/ENIAMwalTypes.ml
... | ... | @@ -42,6 +42,9 @@ type gf = SUBJ | OBJ | ARG |
42 | 42 | |
43 | 43 | type pos = |
44 | 44 | SUBST of number * case |
45 | + | PPRON12 of number * case | |
46 | + | PPRON3 of number * case | |
47 | + | SIEBIE of case | |
45 | 48 | | PREP of case |
46 | 49 | | NUM of case * gender * acm |
47 | 50 | | ADJ of number * case * gender * grad |
... | ... | @@ -84,39 +87,13 @@ type phrase = |
84 | 87 | | PpasP of case |
85 | 88 | | PrepPpasP of string * case |
86 | 89 | | PactP of case |
87 | - | |
88 | -type phrase_abbr = | |
89 | - Xp of string | |
90 | - | Advp of string | |
91 | - | Nonch | |
92 | - | Distrp | |
93 | - | Possp | |
94 | - | |
95 | -type phrase_comp = | |
96 | - Cp | |
97 | - | Ncp of case | |
98 | - | Prepncp of string * case | |
99 | - | |
100 | -type lex = | |
101 | - Lexeme of string | |
102 | - | ORconcat of lex list | |
103 | - | ORcoord of lex list | |
104 | - | XOR of lex list | |
105 | - | Elexeme of gender | |
90 | + | SimpleLexArg of string * pos | |
91 | + | LexArg of int * string * pos | |
92 | + | E of phrase | |
93 | + | MorfId of int | |
106 | 94 | |
107 | 95 | type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr |
108 | 96 | |
109 | -(*type lex_specs = | |
110 | - NSpecs of number | |
111 | - | AdvSpecs of grad | |
112 | - | AdjSpecs of number * gender * grad | |
113 | - | PpasSpecs of number * gender * negation | |
114 | - | PactSpecs of number * gender * negation * refl | |
115 | - | GerSpecs of number * negation * refl | |
116 | - | CSpecs of negation * refl | |
117 | - | NumSpecs of gender | |
118 | - | EmptySpecs *) | |
119 | - | |
120 | 97 | type sel_prefs = |
121 | 98 | SynsetId of int |
122 | 99 | | Predef of string |
... | ... | @@ -124,49 +101,11 @@ type sel_prefs = |
124 | 101 | | RelationRole of string * string * string (* relacji * rola * atrybut roli *) |
125 | 102 | |
126 | 103 | type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list; |
127 | - mode: string list; cr: string list; ce: string list; morfs: morf list} | |
128 | - | |
129 | -and morf = | |
130 | - Phrase of phrase | |
131 | - (* | PhraseMode of string * phrase *) | |
132 | - | E of phrase | |
133 | - | LexPhrase of (pos * lex) list * (restr * position list) | |
134 | - (* | LexRPhrase of (pos * lex) list * (restr * position list) *) | |
135 | - (* | LexPhraseMode of string * (pos * lex) list * (restr * position list) *) | |
136 | - (* | LexRPhraseMode of string * (pos * lex) list * (restr * position list) *) | |
137 | - | PhraseAbbr of phrase_abbr * morf list | |
138 | - | PhraseComp of phrase_comp * (comp_type * comp list) | |
139 | - (* | LexPhraseId of string * pos * lex | |
140 | - | LexArg of string * pos * string *) | |
141 | - | MorfId of int | |
142 | -(* | LexRealization of morf * string*) | |
143 | -(* | Raised of string list * direction * string list | |
144 | - | Multi of phrase list*) | |
145 | - | SimpleLexArg of string * pos | |
146 | - | LexArg of int * string * pos | |
104 | + mode: string list; cr: string list; ce: string list; morfs: phrase list} | |
147 | 105 | |
148 | 106 | let empty_position = |
149 | 107 | {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]} |
150 | 108 | |
151 | -type lex_record = { | |
152 | - lex_argument: morf; | |
153 | - lex_arguments: morf list; | |
154 | - lex_lemma: lex; | |
155 | - lex_numeral_lemma: lex; | |
156 | - lex_mode: string list; | |
157 | - lex_negation: negation; | |
158 | - lex_degree: grad; | |
159 | - lex_number: number; | |
160 | - lex_reflex: refl; | |
161 | - lex_gender: gender; | |
162 | - lex_modification: restr * position list; | |
163 | -} | |
164 | - | |
165 | -let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme ""; | |
166 | - lex_numeral_lemma=Lexeme ""; lex_mode=[]; lex_negation=NegationUndef; | |
167 | - lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef; | |
168 | - lex_gender=GenderUndef; lex_modification = Natr,[]} | |
169 | - | |
170 | 109 | type meaning = {mng_id: int; |
171 | 110 | name: string; |
172 | 111 | variant: string; |
... | ... | @@ -211,21 +150,6 @@ end |
211 | 150 | |
212 | 151 | module EntrySet = Xset.Make(OrderedEntry) |
213 | 152 | |
214 | -module OrderedAbbr = struct | |
215 | - type t = phrase_abbr | |
216 | - let compare = compare | |
217 | -end | |
218 | - | |
219 | -module AbbrMap = Xmap.Make(OrderedAbbr) | |
220 | - | |
221 | -module OrderedComp = struct | |
222 | - type t = comp_type | |
223 | - let compare = compare | |
224 | -end | |
225 | - | |
226 | -(* module MorfSet = Xset.Make(OrderedMorf) *) | |
227 | -module CompMap = Xmap.Make(OrderedComp) | |
228 | - | |
229 | 153 | let resource_path = |
230 | 154 | try Sys.getenv "ENIAM_RESOURCE_PATH" |
231 | 155 | with Not_found -> "/usr/share/eniam" |
... | ... |
walenty/ENIAMwalTypes2.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open Xstd | |
21 | + | |
22 | +type opinion = Dobry | Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny | OpinionUndef | |
23 | +type negation = Negation | Aff | NegationUndef | NegationNA | |
24 | +type pred = PredTrue | PredFalse | PredUndef | PredNA | |
25 | +type aspect = Aspect of string | AspectUndef | AspectNA | |
26 | +type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr | |
27 | +type comp = Comp of string | Zeby | Gdy | CompUndef | |
28 | +type comp_type = Int | Rel | CompTypeUndef | |
29 | +type number = Number of string | NumberUndef | NumberAgr | |
30 | +type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list | |
31 | +type grad = Grad of string | GradUndef | |
32 | +type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef | |
33 | +type acm = Acm of string | AcmUndef | |
34 | + | |
35 | +type gf = SUBJ | OBJ | ARG | |
36 | + | |
37 | +type pos = | |
38 | + SUBST of number * case | |
39 | + | PPRON12 of number * case | |
40 | + | PPRON3 of number * case | |
41 | + | SIEBIE of case | |
42 | + | PREP of case | |
43 | + | NUM of case * gender * acm | |
44 | + | ADJ of number * case * gender * grad | |
45 | + | ADV of grad | |
46 | + | GER of number * case * gender * aspect * negation * refl | |
47 | + | PACT of number * case * gender * aspect * negation * refl | |
48 | + | PPAS of number * case * gender * aspect * negation | |
49 | + | INF of aspect * negation * refl | |
50 | + | QUB | |
51 | + | COMPAR | |
52 | + | COMP of comp_type | |
53 | + | PERS of (*number * gender * aspect * person * *)negation * refl | |
54 | + | FIXED | |
55 | + | |
56 | +type phrase = | |
57 | + NP of case | |
58 | + | PrepNP of string * case | |
59 | + | AdjP of case | |
60 | + | PrepAdjP of string * case | |
61 | + | NumP of case | |
62 | + | PrepNumP of string * case | |
63 | + | ComprepNP of string | |
64 | + | ComparP of string (** case*) | |
65 | + | CP of comp_type * comp | |
66 | + | NCP of case * comp_type * comp | |
67 | + | PrepNCP of string * case * comp_type * comp | |
68 | + | InfP of aspect | |
69 | + | AdvP | |
70 | + | FixedP of string | |
71 | + | Num of case * acm | |
72 | + | Or | |
73 | + | Qub | |
74 | + (* | Pro | |
75 | + | ProNG *) | |
76 | + | Null | |
77 | + | GerP of case | |
78 | + | PrepGerP of string * case | |
79 | + | PpasP of case | |
80 | + | PrepPpasP of string * case | |
81 | + | PactP of case | |
82 | + | |
83 | +type phrase_abbr = | |
84 | + Xp of string | |
85 | + | Advp of string | |
86 | + | Nonch | |
87 | + | Distrp | |
88 | + | Possp | |
89 | + | |
90 | +type phrase_comp = | |
91 | + Cp | |
92 | + | Ncp of case | |
93 | + | Prepncp of string * case | |
94 | + | |
95 | +type lex = | |
96 | + Lexeme of string | |
97 | + | ORconcat of lex list | |
98 | + | ORcoord of lex list | |
99 | + | XOR of lex list | |
100 | + | Elexeme of gender | |
101 | + | |
102 | +type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr | |
103 | + | |
104 | +type sel_prefs = | |
105 | + SynsetId of int | |
106 | + | Predef of string | |
107 | + | RelationArgId of string * int (* nazwa relacji * id argumentu ramy *) | |
108 | + | RelationRole of string * string * string (* relacji * rola * atrybut roli *) | |
109 | + | |
110 | +type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list; | |
111 | + mode: string list; cr: string list; ce: string list; morfs: morf list} | |
112 | + | |
113 | +and morf = | |
114 | + Phrase of phrase | |
115 | + | E of phrase | |
116 | + | LexPhrase of (pos * lex) list * (restr * position list) | |
117 | + | PhraseAbbr of phrase_abbr * morf list | |
118 | + | PhraseComp of phrase_comp * (comp_type * comp list) | |
119 | + | MorfId of int | |
120 | + | SimpleLexArg of string * pos | |
121 | + | LexArg of int * string * pos | |
122 | + | |
123 | +let empty_position = | |
124 | + {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]} | |
125 | + | |
126 | +type lex_record = { | |
127 | + lex_argument: morf; | |
128 | + lex_arguments: morf list; | |
129 | + lex_lemma: lex; | |
130 | + lex_numeral_lemma: lex; | |
131 | + lex_mode: string list; | |
132 | + lex_negation: negation; | |
133 | + lex_degree: grad; | |
134 | + lex_number: number; | |
135 | + lex_reflex: refl; | |
136 | + lex_gender: gender; | |
137 | + lex_modification: restr * position list; | |
138 | +} | |
139 | + | |
140 | +let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme ""; | |
141 | + lex_numeral_lemma=Lexeme ""; lex_mode=[]; lex_negation=NegationUndef; | |
142 | + lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef; | |
143 | + lex_gender=GenderUndef; lex_modification = Natr,[]} | |
144 | + | |
145 | +type meaning = {mng_id: int; | |
146 | + name: string; | |
147 | + variant: string; | |
148 | + plwnluid: int; | |
149 | + gloss: string} | |
150 | + | |
151 | +let empty_meaning = {mng_id = (-1); | |
152 | + name = ""; | |
153 | + variant = ""; | |
154 | + plwnluid = (-1); | |
155 | + gloss = ""} | |
156 | + | |
157 | +type schema = {sch_id: int; opinion: opinion; reflexiveMark: bool; aspect: aspect; | |
158 | + negativity: negation; predicativity: pred; positions: position list; text_rep: string} | |
159 | + | |
160 | +type lex_entry = | |
161 | + SimpleLexEntry of string * string | |
162 | + | LexEntry of int * string * string * restr * position list | |
163 | + | ComprepNPEntry of string * restr * position list | |
164 | + | |
165 | + | |
166 | +module OrderedEntry = struct | |
167 | + type t = lex_entry | |
168 | + let compare = compare | |
169 | +end | |
170 | + | |
171 | +module EntrySet = Xset.Make(OrderedEntry) | |
172 | + | |
173 | +module OrderedAbbr = struct | |
174 | + type t = phrase_abbr | |
175 | + let compare = compare | |
176 | +end | |
177 | + | |
178 | +module AbbrMap = Xmap.Make(OrderedAbbr) | |
179 | + | |
180 | +module OrderedComp = struct | |
181 | + type t = comp_type | |
182 | + let compare = compare | |
183 | +end | |
184 | + | |
185 | +module CompMap = Xmap.Make(OrderedComp) | |
186 | + | |
187 | +type example = {exm_id: int; | |
188 | + meaning: int; | |
189 | + phrases: (int * int * int) list; | |
190 | + sentence: string; | |
191 | + source: string; | |
192 | + opinion: opinion; | |
193 | + note: string} | |
194 | + | |
195 | +type argument = {arg_id: int; | |
196 | + role: string; | |
197 | + role_attribute: string; | |
198 | + sel_prefs: sel_prefs list} | |
199 | + | |
200 | +type frame = {frm_id: int; | |
201 | + opinion: string; | |
202 | + meanings: int list; | |
203 | + arguments: argument list} | |
204 | + | |
205 | +type connection = {argument: int; | |
206 | + phrases: (int * int list) list} | |
207 | + | |
208 | +type alternation = {schema: int; frame: int; connections: connection list} | |
209 | + | |
210 | +type entry = {ent_id: int; | |
211 | + status: string; | |
212 | + form_orth: string; | |
213 | + form_pos: string; | |
214 | + schemata: schema list; | |
215 | + examples: example list; | |
216 | + frames: frame list; | |
217 | + meanings: meaning list; | |
218 | + alternations: alternation list} | |
219 | + | |
220 | +let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[]; | |
221 | + frames=[]; meanings=[]; alternations=[]} | |
... | ... |
walenty/entries.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty". | |
3 | + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open Xstd | |
21 | + | |
22 | +let empty = StringMap.empty | |
23 | + | |
24 | +let add_inc pos_map pos lemma entry = | |
25 | + let lemma_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in | |
26 | + let lemma_map = StringMap.add_inc lemma_map lemma [entry] (fun l -> entry :: l) in | |
27 | + StringMap.add pos_map pos lemma_map | |
28 | + | |
29 | +let add_inc_list pos_map pos lemma entries = | |
30 | + let lemma_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in | |
31 | + let lemma_map = StringMap.add_inc lemma_map lemma entries (fun l -> entries @ l) in | |
32 | + StringMap.add pos_map pos lemma_map | |
33 | + | |
34 | +let flatten_map pos_map f = | |
35 | + StringMap.mapi pos_map (fun pos lemma_map -> | |
36 | + StringMap.mapi lemma_map (fun lemma entries -> | |
37 | + List.flatten (Xlist.rev_map entries (fun entry -> | |
38 | + f pos lemma entry)))) | |
39 | + | |
40 | +let map2 pos_map f = | |
41 | + StringMap.mapi pos_map (fun pos lemma_map -> | |
42 | + StringMap.mapi lemma_map (fun lemma entries -> | |
43 | + f pos lemma entries)) | |
44 | + | |
45 | +let iter pos_map f = | |
46 | + StringMap.iter pos_map (fun pos lemma_map -> | |
47 | + StringMap.iter lemma_map (fun lemma entries -> | |
48 | + Xlist.iter entries (fun entry -> | |
49 | + f pos lemma entry))) | |
50 | + | |
51 | +let fold pos_map s f = | |
52 | + StringMap.fold pos_map s (fun s pos lemma_map -> | |
53 | + StringMap.fold lemma_map s (fun s lemma entries -> | |
54 | + Xlist.fold entries s (fun s entry -> | |
55 | + f s pos lemma entry))) | |
... | ... |
walenty/makefile
... | ... | @@ -25,11 +25,12 @@ eniam-walenty.cma: $(SOURCES) |
25 | 25 | eniam-walenty.cmxa: $(SOURCES) |
26 | 26 | ocamlopt -linkall -a -o eniam-walenty.cmxa $(INCLUDES) $^ |
27 | 27 | |
28 | -test: test.ml | |
29 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml | |
28 | +test: entries.ml ENIAMwalTypes.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
29 | + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml | |
30 | 30 | |
31 | -loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
32 | - $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
31 | +loader: entries.ml ENIAMwalTypes2.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml ENIAMwalGenerate.ml | |
32 | + mkdir -p results | |
33 | + $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes2.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml ENIAMwalGenerate.ml | |
33 | 34 | |
34 | 35 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
35 | 36 | |
... | ... |
walenty/test.ml