Commit d794672dd3115aec142d353e08d987ffd833d32c

Authored by Wojciech Jaworski
1 parent ae919ded

parsowanie przetworzonego Walentego

walenty/.gitignore
1 1 loader
  2 +test
... ...
walenty/ENIAMwalConnect.ml
... ... @@ -18,17 +18,7 @@
18 18 *)
19 19  
20 20 open Xstd
21   -open ENIAMwalTypes
22   -
23   -(* let process_morfs = function
24   - MorfId id -> id
25   - | _ -> failwith "process_morfs"
26   -
27   -let process_positions positions =
28   - Xlist.fold positions IntMap.empty (fun positions position ->
29   - let r,cr,ce = ENIAMwalParser.parse_roles (position.gf :: position.control) in
30   - let phrases = List.rev (Xlist.rev_map position.morfs process_morfs) in
31   - IntMap.add positions position.psn_id (r,cr,ce,phrases)) *)
  21 +open ENIAMwalTypes2
32 22  
33 23 let process_morfs morfs =
34 24 Xlist.fold morfs IntMap.empty (fun morfs -> function
... ... @@ -41,19 +31,18 @@ let process_positions positions =
41 31  
42 32 let process_schemata schemata =
43 33 Xlist.fold schemata IntMap.empty (fun schemata schema ->
44   - let schema_atrs = DefaultAtrs([],schema.reflexiveMark,
45   - schema.opinion, schema.negativity, schema.predicativity, schema.aspect) in
  34 + let atrs = schema.negativity, schema.predicativity, schema.aspect in
46 35 let positions = process_positions schema.positions in
47   - IntMap.add schemata schema.sch_id (schema_atrs,positions))
  36 + IntMap.add schemata schema.sch_id (schema.reflexiveMark,schema.opinion,atrs,positions))
48 37  
49 38 let process_arguments arguments =
50 39 Xlist.fold arguments IntMap.empty (fun arguments argument ->
51 40 IntMap.add arguments argument.arg_id argument)
52 41  
53   -let process_frames frames = (* FIXME: pomijam opinię *)
  42 +let process_frames frames =
54 43 Xlist.fold frames IntMap.empty (fun frames frame ->
55 44 let arguments = process_arguments frame.arguments in
56   - IntMap.add frames frame.frm_id (frame.meanings,arguments))
  45 + IntMap.add frames frame.frm_id (frame,arguments))
57 46  
58 47 let process_meanings meanings =
59 48 Xlist.fold meanings IntMap.empty (fun meanings meaning ->
... ... @@ -67,18 +56,15 @@ let process_sel_pref arguments = function
67 56 RelationRole(s,arg.role,arg.role_attribute)
68 57 | RelationRole _ -> failwith "process_sel_pref"
69 58  
70   -let add_meanings meanings = function
71   - DefaultAtrs(_,r,o,n,p,a) -> DefaultAtrs(meanings,r,o,n,p,a)
72   - | _ -> failwith "add_meanings"
73   -
74 59 let connect entry =
75 60 let schemata = process_schemata entry.schemata in
76 61 let frames = process_frames entry.frames in
77 62 let meanings = process_meanings entry.meanings in
78 63 Xlist.fold entry.alternations [] (fun found alt ->
79   - let schema_atrs,positions = IntMap.find schemata alt.schema in
80   - let meaning_ids,arguments = IntMap.find frames alt.frame in
81   - let conn_positions = Xlist.fold alt.connections [] (fun conn_positions conn ->
  64 + let refl,opinion,schema_atrs,positions = IntMap.find schemata alt.schema in
  65 + let frame,arguments = IntMap.find frames alt.frame in
  66 + let conn_positions = if refl then [ENIAMwalTEI.refl_position] else [] in
  67 + let conn_positions = Xlist.fold alt.connections conn_positions (fun conn_positions conn ->
82 68 let arg = IntMap.find arguments conn.argument in
83 69 let sel_prefs = Xlist.map arg.sel_prefs (process_sel_pref arguments) in
84 70 Xlist.fold conn.phrases conn_positions (fun conn_positions (position_id,phrase_ids) ->
... ... @@ -89,191 +75,13 @@ let connect entry =
89 75 with Not_found -> Printf.printf "%s\n%!" entry.form_orth;morfs) in
90 76 {position with role=arg.role; role_attr=arg.role_attribute; sel_prefs=sel_prefs;
91 77 morfs=List.rev morfs} :: conn_positions)) in
92   - let meanings = List.rev (Xlist.rev_map meaning_ids (fun id ->
  78 + let meanings = List.rev (Xlist.rev_map frame.meanings (fun id ->
93 79 IntMap.find meanings id)) in
94   - let schema_atrs = add_meanings meanings schema_atrs in
95   - (Frame(schema_atrs,conn_positions)) :: found)
  80 + (opinion,frame.opinion,meanings,schema_atrs,conn_positions) :: found)
96 81  
97 82 let schemata entry =
98 83 let schemata = process_schemata entry.schemata in
99   - IntMap.fold schemata [] (fun found _ (schema_atrs,positions) ->
  84 + IntMap.fold schemata [] (fun found _ (refl,opinion,schema_atrs,positions) ->
100 85 let positions = IntMap.fold positions [] (fun positions _ position -> position :: positions) in
101   - (Frame(schema_atrs,positions)) :: found)
102   -
103   -(* Test wczytywania *)
104   -(* let _ =
105   - let n = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = connect e in n + Xlist.size l) in
106   - let m = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = schemata e in n + Xlist.size l) in
107   - Printf.printf "%d connected\n%d schemata\n" n m;
108   - () *)
109   -
110   -let connected_walenty =
111   - Xlist.fold ENIAMwalTEI.walenty StringMap.empty (fun pos_map e ->
112   - let orth_map = try StringMap.find pos_map e.form_pos with Not_found -> StringMap.empty in
113   - let frames = connect e in
114   - let orth_map = StringMap.add_inc orth_map e.form_orth frames (fun l -> frames @ l) in
115   - StringMap.add pos_map e.form_pos orth_map)
116   -
117   -let schemata_walenty =
118   - Xlist.fold ENIAMwalTEI.walenty StringMap.empty (fun pos_map e ->
119   - let orth_map = try StringMap.find pos_map e.form_pos with Not_found -> StringMap.empty in
120   - let frames = schemata e in
121   - let orth_map = StringMap.add_inc orth_map e.form_orth frames (fun l -> frames @ l) in
122   - StringMap.add pos_map e.form_pos orth_map)
123   -
124   -let insert_phrases phrases = function
125   - Frame(atrs,s) -> Frame(atrs,Xlist.map s (fun p ->
126   - {p with morfs=Xlist.map p.morfs (function
127   - MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "insert_phrases")
128   - | _ -> failwith "insert_phrases")}))
129   - | _ -> failwith "insert_phrases: ni"
130   -
131   -let print_entry pos_map pos orth =
132   - let orth_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in
133   - let frames = try StringMap.find orth_map orth with Not_found -> [] in
134   - Xlist.iter frames (fun frame ->
135   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
136   - print_endline (ENIAMwalStringOf.frame orth frame))
137   -
138   -(* Wypisanie hasła *)
139   -(* let _ =
140   - print_entry connected_walenty "verb" "brudzić";
141   - () *)
142   -
143   -let has_nontrivial_lex = function
144   - Frame(atrs,s) -> Xlist.fold s false (fun b p ->
145   - if p.role = "Lemma" && p.role_attr = "" then b else
146   - Xlist.fold p.morfs b (fun b -> function
147   - MorfId id -> failwith "has_nontrivial_lex"
148   - | LexPhrase _ -> true
149   - (* | LexRPhrase _ -> true
150   - | LexPhraseMode _ -> true *)
151   - | _ -> b))
152   - | _ -> failwith "has_nontrivial_lex: ni"
153   -
154   -(* Leksykalizacje nie wchodzące do lematu *)
155   -(* let _ =
156   - StringMap.iter connected_walenty (fun _ orth_map ->
157   - StringMap.iter orth_map (fun orth frames ->
158   - Xlist.iter frames (fun frame ->
159   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
160   - if has_nontrivial_lex frame then
161   - print_endline (ENIAMwalStringOf.frame orth frame)))) *)
162   -
163   -let simplify_frame_verb = function
164   - Phrase(NP(Case "dat")) -> []
165   - | Phrase(NP(Case "inst")) -> []
166   - | Phrase(PrepNP _) -> []
167   - | Phrase(ComprepNP _) -> []
168   - | Phrase(AdvP) -> []
169   - | t -> [t]
170   -
171   -let simplify_frame_noun = function
172   - Phrase(NP(Case "gen")) -> []
173   - | Phrase(NP(Case "nom")) -> []
174   - | Phrase(NP(CaseAgr)) -> []
175   - | Phrase(PrepNP _) -> []
176   - | Phrase(ComprepNP _) -> []
177   - | Phrase(AdjP CaseAgr) -> []
178   - | PhraseComp(Ncp(Case "gen"),_)
179   - | PhraseComp(Prepncp(_,_),_) -> []
180   - | PhraseAbbr(Possp,[]) -> []
181   - | t -> [t]
182   -
183   -let simplify_frame_adj = function
184   - | t -> [t]
185   -
186   -let simplify_frame_adv = function
187   - | t -> [t]
188   -
189   -
190   -let simplify_frame pos = function
191   - Frame(atrs,s) ->
192   - let schema = Xlist.fold s [] (fun schema p ->
193   - let morfs = Xlist.fold p.morfs [] (fun morfs morf ->
194   - match pos with
195   - "verb" -> simplify_frame_verb morf @ morfs
196   - | "noun" -> simplify_frame_noun morf @ morfs
197   - | "adj" -> simplify_frame_adj morf @ morfs
198   - | "adv" -> simplify_frame_adv morf @ morfs
199   - | _ -> failwith "simplify_frame") in
200   - if morfs = [] then schema else
201   - {p with ce=[]; cr=[]; morfs=morfs} :: schema) in
202   - if schema = [] then [] else [Frame(atrs,schema)]
203   - | _ -> failwith "simplify_frame: ni"
204   -
205   -
206   -(* Uproszczone schematy *)
207   -(* let _ =
208   - StringMap.iter schemata_walenty (fun pos orth_map ->
209   - if pos = "noun" then
210   - StringMap.iter orth_map (fun orth frames ->
211   - Xlist.iter frames (fun frame ->
212   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
213   - let frames = simplify_frame pos frame in
214   - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame orth frame))))) *)
215   -
216   -let has_mode_coordination = function
217   - Frame(atrs,s) -> Xlist.fold s false (fun b p ->
218   - let n = Xlist.fold p.morfs 0 (fun n -> function
219   - MorfId id -> failwith "has_nontrivial_lex"
220   - | PhraseAbbr(Advp _,_) -> n+1
221   - | PhraseAbbr(Xp _,_) -> n+1
222   - (* | LexPhraseMode _ -> n+1 FIXME*)
223   - | _ -> n) in
224   - if n>1 then true else b)
225   - | _ -> failwith "has_nontrivial_lex: ni"
226   -
227   -(* Koordynacja z mode *)
228   -(* let _ =
229   - StringMap.iter schemata_walenty(*connected_walenty*) (fun _ orth_map ->
230   - StringMap.iter orth_map (fun orth frames ->
231   - Xlist.iter frames (fun frame ->
232   - let frame = insert_phrases ENIAMwalTEI.phrases frame in
233   - if has_mode_coordination frame then
234   - print_endline (ENIAMwalStringOf.frame orth frame)))) *)
235   -
236   -
237   -(* let get_entry orth pos *)
238   - (*
239   -let load_walenty2 () =
240   - let walenty = load_walenty walenty_filename in
241   - Xlist.fold walenty StringMap.empty (fun walenty entry ->
242   - if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) ->
243   - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
244   - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
245   - StringMap.add walenty pos map)
246   - else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) ->
247   - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
248   - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
249   - StringMap.add walenty pos map))
250   -
251   -
252   -let print_stringqmap filename qmap =
253   - let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in
254   - File.file_out filename (fun file ->
255   - Xlist.iter (Xlist.sort l compare) (fun (v,k) ->
256   - Printf.fprintf file "%5d %s\n" v k))
257   -
258   -let sel_prefs_quantities walenty =
259   - Xlist.fold walenty StringQMap.empty (fun quant e ->
260   - Xlist.fold e.frames quant (fun quant f ->
261   - Xlist.fold f.arguments quant (fun quant a ->
262   - Xlist.fold a.sel_prefs quant (fun quant l ->
263   - Xlist.fold l quant (fun quant -> function
264   - Numeric s ->
265   - let name = try ENIAMplWordnet.synset_name s with Not_found -> "unknown" in
266   - StringQMap.add quant ("N " ^ s ^ " " ^ name)
267   - | Symbol s -> StringQMap.add quant ("S " ^ s)
268   - | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t))))))
269   -*)
270   -(*let _ =
271   - let walenty = load_walenty walenty_filename in
272   - let quant = sel_prefs_quantities walenty in
273   - print_stringqmap "results/quant_sel_prefs.txt" quant*)
274   -
275   -(*let _ =
276   - let walenty = load_walenty2 () in
277   - let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in
278   - Xlist.iter frames_sem (fun frame ->
279   - print_endline (WalStringOf.frame "bębnić" frame))*)
  86 + let positions = if refl then ENIAMwalTEI.refl_position :: positions else positions in
  87 + (opinion,schema_atrs,positions) :: found)
... ...
walenty/ENIAMwalFrames.ml
... ... @@ -217,35 +217,6 @@ and assign_role_and_sense_morfs morfs =
217 217 | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))]
218 218 | _ -> failwith "assign_role_and_sense_morfs"))
219 219  
220   -let rec assign_pro_args schema =
221   - Xlist.map schema (fun s ->
222   - let morfs = match s.morfs with
223   - (E p) :: l -> E Pro :: (E p) :: l
224   - | [LexPhrase _] as morfs -> morfs
225   - | [Phrase(FixedP _)] as morfs -> morfs
226   - | [Phrase(Lex _)] as morfs -> morfs
227   -(* | [Phrase Refl] as morfs -> morfs
228   - | [Phrase Recip] as morfs -> morfs*)
229   - | Phrase Null :: _ as morfs -> morfs
230   - | Phrase Pro :: _ as morfs -> morfs
231   - | morfs -> if s.gf <> SUBJ && s.cr = [] && s.ce = [] then (Phrase Null) :: morfs else (Phrase Pro) :: morfs in (* FIXME: ustalić czy są inne przypadki uzgodnienia *)
232   -(* let morfs = assign_pro_args_lex morfs in *) (* bez pro wewnątrz leksykalizacji *)
233   - {s with morfs=morfs})
234   -
235   -(*let assign_pro_args_lex morfs =
236   - Xlist.map morfs (function
237   - Lex(morf,specs,lex,restr) -> LexN(morf,specs,lex,assign_pro_args_restr restr)
238   - | LexNum(morf,lex1,lex2,restr) -> LexNum(morf,lex1,lex2,assign_pro_args_restr restr)
239   - | LexCompar(morf,l) -> LexCompar(morf,make_gfs_lex l)
240   - | morf -> morf)
241   -
242   -and assign_pro_args_restr = function
243   - Natr -> Natr
244   - | Ratr1 schema -> Ratr1(assign_pro_args schema)
245   - | Atr1 schema -> Atr1(assign_pro_args schema)
246   - | Ratr schema -> Ratr(assign_pro_args schema)
247   - | Atr schema -> Atr(assign_pro_args schema)*)
248   -
249 220 (*let _ =
250 221 Xlist.iter walenty_filenames (fun filename ->
251 222 print_endline filename;
... ... @@ -258,92 +229,6 @@ and assign_pro_args_restr = function
258 229 ignore (process_aspect [Text aspect]);
259 230 ignore (assign_pro_args (make_gfs (process_schema expands subtypes equivs schema))))))*)
260 231  
261   -exception ImpossibleSchema
262   -
263   -let rec reduce_comp lexemes = function
264   - Comp s -> if StringMap.mem lexemes s then Comp s else raise Not_found
265   - | Zeby -> if StringMap.mem lexemes "żeby" || StringMap.mem lexemes "że" then Zeby else raise Not_found
266   - | Gdy -> if StringMap.mem lexemes "gdy" || StringMap.mem lexemes "gdyby" then Gdy else raise Not_found
267   - | CompUndef -> failwith "reduce_comp"
268   -
269   -let reduce_phrase lexemes = function
270   - | PrepNP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
271   - | PrepAdjP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
272   - | PrepNumP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
273   - | ComprepNP(_,prep) as phrase -> if Xlist.fold (try StringMap.find comprep_reqs prep with Not_found -> []) true (fun b s -> b && StringMap.mem lexemes s) then phrase else raise Not_found
274   - | ComparNP(_,prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
275   - | ComparPP(_,prep) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
276   - | CP(ctype,comp) -> CP(ctype,reduce_comp lexemes comp)
277   - | NCP(case,ctype,comp) -> if StringMap.mem lexemes "to" then NCP(case,ctype,reduce_comp lexemes comp) else raise Not_found
278   - | PrepNCP(sem,prep,case,ctype,comp) -> if StringMap.mem lexemes prep && StringMap.mem lexemes "to" then PrepNCP(sem,prep,case,ctype,reduce_comp lexemes comp) else raise Not_found
279   - | phrase -> phrase
280   -
281   -let rec reduce_lex lexemes = function
282   - Lexeme s -> if StringMap.mem lexemes s then Lexeme s else raise Not_found
283   - | ORconcat l ->
284   - let l = List.rev (Xlist.fold l [] (fun l lex -> try reduce_lex lexemes lex :: l with Not_found -> l)) in
285   - (match l with
286   - [] -> raise Not_found
287   - | [x] -> x
288   - | l -> ORconcat l)
289   - | ORcoord l ->
290   - let l = List.rev (Xlist.fold l [] (fun l lex -> try reduce_lex lexemes lex :: l with Not_found -> l)) in
291   - (match l with
292   - [] -> raise Not_found
293   - | [x] -> x
294   - | l -> ORcoord l)
295   - | XOR l ->
296   - let l = List.rev (Xlist.fold l [] (fun l lex -> try reduce_lex lexemes lex :: l with Not_found -> l)) in
297   - (match l with
298   - [] -> raise Not_found
299   - | [x] -> x
300   - | l -> XOR l)
301   - | Elexeme gender -> Elexeme gender
302   -
303   -let rec reduce_restr lexemes = function (* leksykalizacje wewnątrz leksykalizacji są w niektórych sytuacjach opcjonalne *)
304   - Natr,[] -> Natr,[]
305   - | Atr,[] -> Atr,[]
306   - | Ratr,[] -> Ratr,[]
307   - | Atr1,[] -> Atr1,[]
308   - | Ratr1,[] -> Ratr1,[]
309   - | Ratr1,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then raise Not_found else Ratr1,schema
310   - | Atr1,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then Natr,[] else Atr1,schema
311   - | Ratr,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then raise Not_found else Ratr,schema
312   - | Atr,schema -> let schema = reduce_schema2 lexemes schema in if schema = [] then Natr,[] else Atr,schema
313   - | Ratrs,schema -> Ratrs,reduce_schema lexemes schema
314   - | _ -> failwith "reduce_restr"
315   -
316   -and reduce_morf lexemes = function (* leksykalizacje, które się z czymś koordynują nie są obowiązakowe *)
317   - Phrase phrase -> Phrase(reduce_phrase lexemes phrase)
318   - | E phrases -> E phrases (* FIXME: uproszczenie *)
319   - | LexPhrase(pos_lex,restr) -> LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, reduce_lex lexemes lex),reduce_restr lexemes restr)
320   - | morf -> failwith ("reduce_morf: " ^ ENIAMwalStringOf.morf morf)
321   -
322   -and reduce_morfs lexemes = function
323   - [] -> []
324   - | morf :: l -> (try [reduce_morf lexemes morf] with Not_found -> []) @ reduce_morfs lexemes l
325   -
326   -and reduce_schema2 lexemes = function
327   - [] -> []
328   - | s :: l ->
329   - let morfs = reduce_morfs lexemes s.morfs in
330   - if morfs = [] then reduce_schema2 lexemes l else
331   - {s with morfs=morfs} :: reduce_schema2 lexemes l
332   -
333   -and reduce_schema lexemes = function
334   - [] -> []
335   - | s :: l ->
336   - let morfs = reduce_morfs lexemes s.morfs in
337   - if morfs = [] then raise ImpossibleSchema else
338   - {s with morfs=morfs} :: reduce_schema lexemes l
339   -
340   -let reduce_schema_frame lexemes = function
341   - Frame(atrs,schema) -> Frame(atrs,reduce_schema lexemes schema)
342   -(* | ComprepFrame(s,morfs) ->
343   - let morfs = reduce_morfs lexemes morfs in
344   - if morfs = [] then raise ImpossibleSchema else ComprepFrame(s,morfs)*)
345   - | _ -> failwith "reduce_schema_frame"
346   -
347 232 let remove_trivial_args schema =
348 233 Xlist.fold schema [] (fun l (_,_,_,morfs) ->
349 234 let morfs = Xlist.fold morfs [] (fun morfs -> function
... ...
walenty/ENIAMwalGenerate.ml 0 → 100644
  1 +(*
  2 + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAMwalTypes2
  21 +open Xstd
  22 +
  23 +let load_walenty walenty_filename expands_filename =
  24 + let walenty,phrases = ENIAMwalTEI.load_walenty walenty_filename in
  25 + let expands = ENIAMwalTEI.load_expands expands_filename in
  26 + let connected_walenty =
  27 + Xlist.fold walenty Entries.empty (fun connected_walenty e ->
  28 + let entries = ENIAMwalConnect.connect e in
  29 + Entries.add_inc_list connected_walenty e.form_pos e.form_orth entries) in
  30 + let schemata_walenty =
  31 + Xlist.fold walenty Entries.empty (fun schemata_walenty e ->
  32 + let entries = ENIAMwalConnect.schemata e in
  33 + Entries.add_inc_list schemata_walenty e.form_pos e.form_orth entries) in
  34 + let expands,compreps,subtypes,equivs =
  35 + ENIAMwalRealizations.load_realizations (expands,ENIAMwalTEI.subtypes,ENIAMwalTEI.equivs) in
  36 + let phrases =
  37 + IntMap.map phrases (fun morf ->
  38 + let morf = ENIAMwalRealizations.expand_schema_morf expands morf in
  39 + let morfs = ENIAMwalRealizations.expand_subtypes_morf subtypes morf in
  40 + let morf = List.flatten (Xlist.map morfs (ENIAMwalRealizations.expand_equivs_morf equivs)) in
  41 + morf) in
  42 + let compreps = Xlist.map compreps (fun (lemma,morfs) ->
  43 + lemma, ENIAMwalLex.expand_lexicalizations_morfs morfs) in
  44 + let entries = ENIAMwalLex.extract_lex_entries_comprepnp [] compreps in
  45 + let phrases,entries =
  46 + IntMap.fold phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs ->
  47 + let morfs = ENIAMwalLex.expand_lexicalizations_morfs morfs in
  48 + let morfs,entries = Xlist.fold morfs ([],entries) ENIAMwalLex.extract_lex_entries in
  49 + IntMap.add phrases id morfs, entries) in
  50 + let entries = Xlist.fold entries Entries.empty (fun entries (pos,lemma,entry) ->
  51 + Entries.add_inc entries pos lemma entry) in
  52 + let entries = Entries.map2 entries (fun pos lemma entries -> EntrySet.to_list (EntrySet.of_list entries)) in
  53 + let entries = Entries.flatten_map entries (fun pos lemma entry ->
  54 + ENIAMwalLex.expand_restr [] lemma pos entry) in
  55 + (* let entries =
  56 + StringMap.mapi entries (fun pos entries2 ->
  57 + StringMap.mapi entries2 (fun lemma entries3 ->
  58 + EntrySet.fold entries3 [] (fun entries3 entry ->
  59 + (ENIAMwalLex.expand_restr [] lemma pos entry) @ entries3))) in *)
  60 + connected_walenty, schemata_walenty, phrases, entries
  61 +
  62 +let print_entries filename entries =
  63 + File.file_out filename (fun file ->
  64 + Entries.iter entries (fun pos lemma entry ->
  65 + Printf.fprintf file "%s\t%s\t%s\n" pos lemma (ENIAMwalStringOf.lex_entry entry)))
  66 +
  67 +let print_phrases filename phrases =
  68 + File.file_out filename (fun file ->
  69 + IntMap.iter phrases (fun id morfs ->
  70 + let morfs = Xlist.map morfs ENIAMwalStringOf.morf in
  71 + Printf.fprintf file "%d\t%s\n" id (String.concat "\t" morfs)))
  72 +
  73 +let print_schemata filename schemata =
  74 + File.file_out filename (fun file ->
  75 + Entries.iter schemata (fun pos lemma (opinion,(n,p,a),schema) ->
  76 + Printf.fprintf file "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" pos lemma
  77 + (ENIAMwalStringOf.opinion opinion)
  78 + (ENIAMwalStringOf.negation n)
  79 + (ENIAMwalStringOf.pred p)
  80 + (ENIAMwalStringOf.aspect a)
  81 + (ENIAMwalStringOf.simple_schema schema)))
  82 +
  83 +let split_tokens s =
  84 + let l = List.flatten (Xlist.map (Str.full_split (Str.regexp " \\|,\\|-") s) (function
  85 + Str.Delim " " -> []
  86 + | Str.Delim s -> [s]
  87 + | Str.Text s -> [s])) in
  88 + String.concat " " l
  89 +
  90 +let print_fixed filename fixed =
  91 + File.file_out filename (fun file ->
  92 + StringSet.iter fixed (fun s ->
  93 + let t = split_tokens s in
  94 + Printf.fprintf file "%s\t%s\tfixed\n" t s))
  95 +
  96 +
  97 +let add_fixed fixed = function
  98 + Phrase (FixedP s) -> StringSet.add fixed s
  99 + | SimpleLexArg(s,FIXED) -> StringSet.add fixed s
  100 + | LexArg(_,s,FIXED) -> StringSet.add fixed s
  101 + | _ -> fixed
  102 +
  103 +let find_fixed_schema fixed schema =
  104 + Xlist.fold schema fixed (fun schema p ->
  105 + Xlist.fold p.morfs fixed add_fixed)
  106 +
  107 +let find_fixed phrases entries =
  108 + let fixed = IntMap.fold phrases StringSet.empty (fun fixed _ morfs ->
  109 + Xlist.fold morfs fixed add_fixed) in
  110 + Entries.fold entries fixed (fun fixed pos lemma -> function
  111 + SimpleLexEntry(s,"fixed") -> StringSet.add fixed s
  112 + | SimpleLexEntry(s,_) -> fixed
  113 + | LexEntry(_,s,"fixed",_,schema) -> find_fixed_schema (StringSet.add fixed s) schema
  114 + | LexEntry(_,_,_,_,schema) -> find_fixed_schema fixed schema
  115 + | ComprepNPEntry(_,_,schema) -> find_fixed_schema fixed schema)
  116 +
  117 +let connected_walenty, schemata_walenty, phrases, entries = load_walenty
  118 + "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml"
  119 + "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml"
  120 +
  121 +(* FIXME: uporządkowanie "winien" *)
  122 +(* Generowanie zasobów *)
  123 +let _ =
  124 + print_entries "results/entries.tab" entries;
  125 + print_phrases "results/phrases.tab" phrases;
  126 + print_schemata "results/schemata.tab" schemata_walenty;
  127 + (* print_connected "results/connected.dic" connected_walenty); *)
  128 + print_fixed "results/fixed.tab" (find_fixed phrases entries);
  129 + ()
  130 +
  131 +(* Test wczytywania Walentego TEI *)
  132 +(* let _ =
  133 + let walenty,phrases = ENIAMwalTEI.load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml"
  134 + let n = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = connect e in n + Xlist.size l) in
  135 + let m = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = schemata e in n + Xlist.size l) in
  136 + Printf.printf "%d connected\n%d schemata\n|phrases|=%d\n" n m (IntMap.size phrases);
  137 + () *)
  138 +
  139 +(* let insert_phrases phrases = function
  140 + Frame(atrs,s) -> Frame(atrs,Xlist.map s (fun p ->
  141 + {p with morfs=Xlist.map p.morfs (function
  142 + MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "insert_phrases")
  143 + | _ -> failwith "insert_phrases")}))
  144 + | _ -> failwith "insert_phrases: ni"
  145 +
  146 +let print_entry pos_map pos orth =
  147 + let orth_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in
  148 + let frames = try StringMap.find orth_map orth with Not_found -> [] in
  149 + Xlist.iter frames (fun frame ->
  150 + let frame = insert_phrases ENIAMwalTEI.phrases frame in
  151 + print_endline (ENIAMwalStringOf.frame orth frame)) *)
  152 +
  153 +(* Wypisanie hasła *)
  154 +(* let _ =
  155 + print_entry connected_walenty "verb" "brudzić";
  156 + () *)
  157 +
  158 +(* let has_nontrivial_lex = function
  159 + Frame(atrs,s) -> Xlist.fold s false (fun b p ->
  160 + if p.role = "Lemma" && p.role_attr = "" then b else
  161 + Xlist.fold p.morfs b (fun b -> function
  162 + MorfId id -> failwith "has_nontrivial_lex"
  163 + | LexPhrase _ -> true
  164 + (* | LexRPhrase _ -> true
  165 + | LexPhraseMode _ -> true *)
  166 + | _ -> b))
  167 + | _ -> failwith "has_nontrivial_lex: ni" *)
  168 +
  169 +(* Leksykalizacje nie wchodzące do lematu *)
  170 +(* let _ =
  171 + StringMap.iter connected_walenty (fun _ orth_map ->
  172 + StringMap.iter orth_map (fun orth frames ->
  173 + Xlist.iter frames (fun frame ->
  174 + let frame = insert_phrases ENIAMwalTEI.phrases frame in
  175 + if has_nontrivial_lex frame then
  176 + print_endline (ENIAMwalStringOf.frame orth frame)))) *)
  177 +
  178 +let simplify_frame_verb = function
  179 + Phrase(NP(Case "dat")) -> []
  180 + | Phrase(NP(Case "inst")) -> []
  181 + | Phrase(PrepNP _) -> []
  182 + | Phrase(ComprepNP _) -> []
  183 + | Phrase(AdvP) -> []
  184 + | t -> [t]
  185 +
  186 +let simplify_frame_noun = function
  187 + Phrase(NP(Case "gen")) -> []
  188 + | Phrase(NP(Case "nom")) -> []
  189 + | Phrase(NP(CaseAgr)) -> []
  190 + | Phrase(PrepNP _) -> []
  191 + | Phrase(ComprepNP _) -> []
  192 + | Phrase(AdjP CaseAgr) -> []
  193 + | PhraseComp(Ncp(Case "gen"),_)
  194 + | PhraseComp(Prepncp(_,_),_) -> []
  195 + | PhraseAbbr(Possp,[]) -> []
  196 + | t -> [t]
  197 +
  198 +let simplify_frame_adj = function
  199 + | t -> [t]
  200 +
  201 +let simplify_frame_adv = function
  202 + | t -> [t]
  203 +
  204 +
  205 +(* let simplify_frame pos = function
  206 + Frame(atrs,s) ->
  207 + let schema = Xlist.fold s [] (fun schema p ->
  208 + let morfs = Xlist.fold p.morfs [] (fun morfs morf ->
  209 + match pos with
  210 + "verb" -> simplify_frame_verb morf @ morfs
  211 + | "noun" -> simplify_frame_noun morf @ morfs
  212 + | "adj" -> simplify_frame_adj morf @ morfs
  213 + | "adv" -> simplify_frame_adv morf @ morfs
  214 + | _ -> failwith "simplify_frame") in
  215 + if morfs = [] then schema else
  216 + {p with ce=[]; cr=[]; morfs=morfs} :: schema) in
  217 + if schema = [] then [] else [Frame(atrs,schema)]
  218 + | _ -> failwith "simplify_frame: ni" *)
  219 +
  220 +
  221 +(* Uproszczone schematy *)
  222 +(* let _ =
  223 + StringMap.iter schemata_walenty (fun pos orth_map ->
  224 + if pos = "noun" then
  225 + StringMap.iter orth_map (fun orth frames ->
  226 + Xlist.iter frames (fun frame ->
  227 + let frame = insert_phrases ENIAMwalTEI.phrases frame in
  228 + let frames = simplify_frame pos frame in
  229 + Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame orth frame))))) *)
  230 +
  231 +(* let has_mode_coordination = function
  232 + Frame(atrs,s) -> Xlist.fold s false (fun b p ->
  233 + let n = Xlist.fold p.morfs 0 (fun n -> function
  234 + MorfId id -> failwith "has_nontrivial_lex"
  235 + | PhraseAbbr(Advp _,_) -> n+1
  236 + | PhraseAbbr(Xp _,_) -> n+1
  237 + (* | LexPhraseMode _ -> n+1 FIXME*)
  238 + | _ -> n) in
  239 + if n>1 then true else b)
  240 + | _ -> failwith "has_nontrivial_lex: ni" *)
  241 +
  242 +(* Koordynacja z mode *)
  243 +(* let _ =
  244 + StringMap.iter schemata_walenty(*connected_walenty*) (fun _ orth_map ->
  245 + StringMap.iter orth_map (fun orth frames ->
  246 + Xlist.iter frames (fun frame ->
  247 + let frame = insert_phrases ENIAMwalTEI.phrases frame in
  248 + if has_mode_coordination frame then
  249 + print_endline (ENIAMwalStringOf.frame orth frame)))) *)
  250 +
  251 +
  252 +(* let get_entry orth pos *)
  253 + (*
  254 +let load_walenty2 () =
  255 + let walenty = load_walenty walenty_filename in
  256 + Xlist.fold walenty StringMap.empty (fun walenty entry ->
  257 + if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) ->
  258 + let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
  259 + let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
  260 + StringMap.add walenty pos map)
  261 + else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) ->
  262 + let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
  263 + let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
  264 + StringMap.add walenty pos map))
  265 +
  266 +
  267 +let print_stringqmap filename qmap =
  268 + let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in
  269 + File.file_out filename (fun file ->
  270 + Xlist.iter (Xlist.sort l compare) (fun (v,k) ->
  271 + Printf.fprintf file "%5d %s\n" v k))
  272 +
  273 +let sel_prefs_quantities walenty =
  274 + Xlist.fold walenty StringQMap.empty (fun quant e ->
  275 + Xlist.fold e.frames quant (fun quant f ->
  276 + Xlist.fold f.arguments quant (fun quant a ->
  277 + Xlist.fold a.sel_prefs quant (fun quant l ->
  278 + Xlist.fold l quant (fun quant -> function
  279 + Numeric s ->
  280 + let name = try ENIAMplWordnet.synset_name s with Not_found -> "unknown" in
  281 + StringQMap.add quant ("N " ^ s ^ " " ^ name)
  282 + | Symbol s -> StringQMap.add quant ("S " ^ s)
  283 + | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t))))))
  284 +*)
  285 +(*let _ =
  286 + let walenty = load_walenty walenty_filename in
  287 + let quant = sel_prefs_quantities walenty in
  288 + print_stringqmap "results/quant_sel_prefs.txt" quant*)
  289 +
  290 +(*let _ =
  291 + let walenty = load_walenty2 () in
  292 + let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in
  293 + Xlist.iter frames_sem (fun frame ->
  294 + print_endline (WalStringOf.frame "bębnić" frame))*)
  295 +
  296 +
  297 +(* Wypisanie realizacji *)
  298 +(* let _ =
  299 + Xlist.iter ENIAMwalTEI.expands (fun (id,morf,l) ->
  300 + Printf.printf "%d %s:\n" id (ENIAMwalStringOf.morf morf);
  301 + Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *)
  302 +
  303 +(* Wypisanie realizacji po przetworzeniu *)
  304 +(* let _ =
  305 + AbbrMap.iter expands (fun morf l ->
  306 + Printf.printf "%s:\n" (ENIAMwalStringOf.phrase_abbr morf);
  307 + Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *)
  308 +
  309 +let has_realization = function
  310 + PhraseAbbr _ -> true
  311 + | PhraseComp _ -> true
  312 + | _ -> false
  313 +
  314 +(* Wypisanie fraz, które podlegają rozwijaniu *)
  315 +(*let _ =
  316 + IntMap.iter ENIAMwalTEI.phrases (fun i morf ->
  317 + if has_realization morf then
  318 + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *)
  319 +
  320 +(* Wypisanie fraz, które podlegają rozwijaniu *)
  321 +(* let _ =
  322 + IntMap.iter phrases (fun i morf ->
  323 + if has_realization morf then
  324 + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *)
  325 +
  326 +(* let test_phrases = [17088; 17133; 1642]
  327 + let _ =
  328 + Xlist.iter test_phrases (fun i ->
  329 + let m1 = IntMap.find ENIAMwalTEI.phrases i in
  330 + let m2 = IntMap.find phrases i in
  331 + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m1);
  332 + Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m2)) *)
  333 +
  334 +(* let print_entries entries =
  335 + StringMap.iter entries (fun pos entries2 ->
  336 + StringMap.iter entries2 (fun lemma entries3 ->
  337 + EntrySet.iter entries3 (fun entry ->
  338 + Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry)))) *)
  339 +
  340 +(* let _ = print_entries entries *)
... ...
walenty/ENIAMwalLex.ml
... ... @@ -17,7 +17,7 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -open ENIAMwalTypes
  20 +open ENIAMwalTypes2
21 21 open Xstd
22 22  
23 23 let prep_arg_schema_field morfs =
... ... @@ -63,7 +63,8 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen
63 63 (* | LexPhrase([PREP _,_;ADJ _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf]
64 64 | LexPhrase([PREP _,_;PPAS _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf]
65 65 | LexPhrase([PREP _,_;PACT _,_],(_,_::_)) -> print_endline (ENIAMwalStringOf.morf morf); [morf] *)
66   - | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[prep_arg_schema_field(*2*) [Phrase(NumP(case))]]))]
  66 + (* | Phrase(PrepNumP(prep,case)) -> [LexPhrase([PREP case,Lexeme prep],(Ratrs,[prep_arg_schema_field(*2*) [Phrase(NumP(case))]]))] *)
  67 + | Phrase(PrepNumP(prep,case)) -> [Phrase(PrepNP(prep,case))] (* FIXME: celowe uproszczenie *)
67 68 | LexPhrase([PREP pcase,plex;SUBST(n,c),slex],(Atr1,[{morfs=[LexPhrase([QUB,_],_)]} as s])) ->
68 69 (* print_endline (ENIAMwalStringOf.morf morf); *)
69 70 [LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([SUBST(n,c),slex],(Natr,[]))]]));
... ... @@ -80,13 +81,13 @@ and expand_lexicalizations_morfs morfs = (* uproszczenie polegające na zezwolen
80 81 | LexPhrase([PREP pcase,plex;NUM(c,g,a),nlex;pos,lex],restr) ->
81 82 let genders,lexs = split_elexeme lex in
82 83 Xlist.map genders (fun gender ->
83   - LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Pro]]))]]))) @
  84 + LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Null(*Pro*)]]))]]))) @ (*FIXME*)
84 85 Xlist.map lexs (fun lex ->
85 86 LexPhrase([PREP pcase,plex],(Ratrs,[prep_arg_schema_field [LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [LexPhrase([pos,lex],restr)]]))]])))
86 87 | LexPhrase([NUM(c,g,a),nlex;pos,lex],restr) ->
87 88 let genders,lexs = split_elexeme lex in
88 89 Xlist.map genders (fun gender ->
89   - LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Pro]]))) @
  90 + LexPhrase([NUM(c,gender,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [Phrase Null(*Pro*)]]))) @
90 91 Xlist.map lexs (fun lex ->
91 92 LexPhrase([NUM(c,g,a),nlex],(Ratrs,[(*num*)prep_arg_schema_field [LexPhrase([pos,lex],restr)]])))
92 93 | LexPhrase([COMP ctype,clex;pos,lex],restr) ->
... ... @@ -157,6 +158,20 @@ let get_pos lex = function
157 158 | COMPAR -> ["compar"]
158 159 | COMP _ -> ["comp"]
159 160 | FIXED -> ["fixed"]
  161 + | _ -> failwith "get_pos"
  162 +
  163 +let map_pos lemma = function
  164 + SUBST(number,case) ->
  165 + (match lemma with
  166 + "ja" -> PPRON12(number,case)
  167 + | "my" -> PPRON12(number,case)
  168 + | "ty" -> PPRON12(number,case)
  169 + | "wy" -> PPRON12(number,case)
  170 + | "on" -> PPRON3(number,case)
  171 + | "siebie" -> SIEBIE case
  172 + | "się" -> QUB
  173 + | _ -> SUBST(number,case))
  174 + | p -> p
160 175  
161 176 let lex_id_counter = ref 0
162 177  
... ... @@ -166,16 +181,20 @@ let get_lex_id () =
166 181  
167 182 (* FIXME: to trzeba będzie poprawić przy unlike coordination *)
168 183 (* FIXME: słownik pos wywołuje redundancję *)
  184 +(* FIXME: parametr refl z typu pos można przenieść do schematu *)
169 185 let rec extract_lex_entries (morfs,entries) = function
170 186 LexPhrase([pos,lex],(Natr,[])) ->
171 187 let lexemes = get_lexemes lex in
172 188 let entries = Xlist.fold lexemes entries (fun entries lemma ->
173 189 Xlist.fold (get_pos lemma pos) entries (fun entries pos2 ->
  190 + (pos2,lemma,SimpleLexEntry(lemma,pos2)) :: entries)) in
  191 + (* let entries = Xlist.fold lexemes entries (fun entries lemma ->
  192 + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 ->
174 193 let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in
175 194 let entry = SimpleLexEntry(lemma,pos2) in
176 195 let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in
177   - StringMap.add entries pos2 entries2)) in
178   - let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> SimpleLexArg(lemma,pos) :: morfs) in
  196 + StringMap.add entries pos2 entries2)) in *)
  197 + let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> SimpleLexArg(lemma,map_pos lemma pos) :: morfs) in
179 198 morfs,entries
180 199 | LexPhrase([pos,lex],(restr,schema)) ->
181 200 let id = get_lex_id () in
... ... @@ -183,11 +202,14 @@ let rec extract_lex_entries (morfs,entries) = function
183 202 let schema,entries = extract_lex_entries_schema entries schema in
184 203 let entries = Xlist.fold lexemes entries (fun entries lemma ->
185 204 Xlist.fold (get_pos lemma pos) entries (fun entries pos2 ->
  205 + (pos2,lemma,LexEntry(id,lemma,pos2,restr,schema)) :: entries)) in
  206 + (* let entries = Xlist.fold lexemes entries (fun entries lemma ->
  207 + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 ->
186 208 let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in
187 209 let entry = LexEntry(id,lemma,pos2,restr,schema) in
188 210 let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in
189   - StringMap.add entries pos2 entries2)) in
190   - let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> LexArg(id,lemma,pos) :: morfs) in
  211 + StringMap.add entries pos2 entries2)) in *)
  212 + let morfs = Xlist.fold lexemes morfs (fun morfs lemma -> LexArg(id,lemma,map_pos lemma pos) :: morfs) in
191 213 morfs,entries
192 214 | LexPhrase _ as morf -> failwith ("extract_lex_entries: " ^ ENIAMwalStringOf.morf morf)
193 215 | morf -> morf :: morfs, entries
... ... @@ -207,35 +229,21 @@ let extract_lex_entries_comprepnp entries compreps =
207 229 let schema,entries = extract_lex_entries_schema entries schema in
208 230 Xlist.fold lexemes entries (fun entries lemma ->
209 231 Xlist.fold (get_pos lemma pos) entries (fun entries pos2 ->
  232 + (pos2,lemma,ComprepNPEntry(clemma,restr,schema)) :: entries))
  233 + (* Xlist.fold lexemes entries (fun entries lemma ->
  234 + Xlist.fold (get_pos lemma pos) entries (fun entries pos2 ->
210 235 let entries2 = try StringMap.find entries pos2 with Not_found -> StringMap.empty in
211 236 let entry = ComprepNPEntry(clemma,restr,schema) in
212 237 let entries2 = StringMap.add_inc entries2 lemma (EntrySet.singleton entry) (fun set -> EntrySet.add set entry) in
213   - StringMap.add entries pos2 entries2))
  238 + StringMap.add entries pos2 entries2)) *)
214 239 | _ -> failwith "extract_lex_entries_comprepnp"))
215 240  
216   -let phrases,entries =
217   - let compreps = Xlist.map ENIAMwalRealizations.compreps (fun (lemma,morfs) ->
218   - lemma, expand_lexicalizations_morfs morfs) in
219   - let entries = extract_lex_entries_comprepnp StringMap.empty compreps in
220   - IntMap.fold ENIAMwalRealizations.phrases (IntMap.empty,entries) (fun (phrases,entries) id morfs ->
221   - let morfs = expand_lexicalizations_morfs morfs in
222   - let morfs,entries = Xlist.fold morfs ([],entries) extract_lex_entries in
223   - IntMap.add phrases id morfs, entries)
224   -
225   -let print_entries entries =
226   - StringMap.iter entries (fun pos entries2 ->
227   - StringMap.iter entries2 (fun lemma entries3 ->
228   - EntrySet.iter entries3 (fun entry ->
229   - Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry))))
230   -
231   -(* let _ = print_entries entries *)
232   -
233 241 let rec expand_restr valence lexeme pos = function
234 242 SimpleLexEntry(lemma,pos2) -> [SimpleLexEntry(lemma,pos2)]
235 243 (* | LexEntry(id,lemma,pos2,Natr,[]) -> [LexEntry(id,lemma,pos2,NoRestr,[])] *)
236 244 | LexEntry(id,lemma,pos2,Natr,_) -> failwith "expand_restr"
237 245 | LexEntry(id,lemma,pos2,restr,[]) ->
238   - print_endline (lexeme ^ " " ^ pos);
  246 + (* print_endline (lexeme ^ " " ^ pos); *)
239 247 [LexEntry(id,lemma,pos2,restr,[])] (* FIXME *)
240 248 (* (* print_endline "expand_restr"; *)
241 249 let frames = try StringMap.find (StringMap.find valence lexeme) pos
... ... @@ -265,7 +273,7 @@ let rec expand_restr valence lexeme pos = function
265 273 | LexEntry(id,lemma,pos2,NoRestr,_) -> failwith "expand_restr"
266 274 (* | ComprepNPEntry(lemma,Natr,[]) -> [ComprepNPEntry(lemma,NoRestr,[])] *)
267 275 | ComprepNPEntry(lemma,Natr,_) -> failwith "expand_restr"
268   - | ComprepNPEntry(lemma,restr,[]) as entry -> failwith ("expand_restr: " ^ ENIAMwalStringOf.entry entry)
  276 + | ComprepNPEntry(lemma,restr,[]) as entry -> failwith ("expand_restr: " ^ ENIAMwalStringOf.lex_entry entry)
269 277 | ComprepNPEntry(lemma,Atr,schema) ->
270 278 let schema = Xlist.map schema (fun p -> {p with morfs=Phrase Null :: p.morfs}) in
271 279 [ComprepNPEntry(lemma,NoRestr,schema)]
... ... @@ -279,35 +287,4 @@ let rec expand_restr valence lexeme pos = function
279 287 Xlist.map schema (fun x -> ComprepNPEntry(lemma,NoRestr,[x]))
280 288 | ComprepNPEntry(lemma,Ratrs,schema) -> [ComprepNPEntry(lemma,NoRestr,schema)]
281 289 | ComprepNPEntry(lemma,NoRestr,_) -> failwith "expand_restr"
282   - (* | Frame _ as frame -> [frame] *)
283   - | _ -> failwith "expand_restr"
284   -
285   -let entries =
286   - StringMap.mapi entries (fun pos entries2 ->
287   - StringMap.mapi entries2 (fun lemma entries3 ->
288   - EntrySet.fold entries3 [] (fun entries3 entry ->
289   - (expand_restr [] lemma pos entry) @ entries3)))
290   -
291   -
292   -(*
293   -let convert morfs =
294   - let morfs = expand_lexicalizations_morfs morfs in
295   - let morfs,frames = Xlist.fold morfs ([],[]) extract_lex_morf in
296   - (*Xlist.fold frames(*extract_lex_frames lexeme pos [] frame*) valence (fun valence -> function
297   - lexeme,pos,Frame(atrs,schema) ->
298   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
299   - Xlist.fold schemas valence (fun valence schema ->
300   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
301   - let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in
302   - StringMap.add valence lexeme poss)
303   - | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
304   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
305   - Xlist.fold schemas valence (fun valence schema ->
306   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
307   - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
308   - StringMap.add valence lexeme poss)
309   - | _ -> failwith "convert_frame") *)()
310   -
311   -
312   -let phrases = IntMap.map ENIAMwalRealizations.phrases convert
313   -*)
  290 + (* | _ -> failwith "expand_restr" *)
... ...
walenty/ENIAMwalParser.ml
... ... @@ -18,9 +18,16 @@
18 18 *)
19 19  
20 20 open ENIAMwalTypes
21   -
22 21 open Xstd
23 22  
  23 +type token =
  24 + Text of string
  25 + | Paren of token list
  26 + | Bracet of token list
  27 + | SqBra of token list
  28 + | LParen | RParen | LBracet | RBracet | LSqBra | RSqBra
  29 + | Semic | Plus | Comma | Quot
  30 +
24 31 let rec find_brackets = function
25 32 LParen :: l ->
26 33 let found,l = find_rbracket RParen [] l in
... ... @@ -50,6 +57,21 @@ and find_rbracket bracket rev = function
50 57 | s :: l -> find_rbracket bracket (s :: rev) l
51 58 | [] -> failwith "find_rbracket"
52 59  
  60 +let split_text schema =
  61 + find_brackets (Xlist.map (Str.full_split (Str.regexp "\\]\\|\\+\\|{\\|}\\|(\\|)\\|,\\|;\\|'\\|\\[") schema) (function
  62 + Str.Text s -> Text s
  63 + | Str.Delim "(" -> LParen
  64 + | Str.Delim ")" -> RParen
  65 + | Str.Delim "{" -> LBracet
  66 + | Str.Delim "}" -> RBracet
  67 + | Str.Delim "[" -> LSqBra
  68 + | Str.Delim "]" -> RSqBra
  69 + | Str.Delim ";" -> Semic
  70 + | Str.Delim "+" -> Plus
  71 + | Str.Delim "," -> Comma
  72 + | Str.Delim "'" -> Quot
  73 + | _ -> failwith "parse_text"))
  74 +
53 75 let rec split_symbol symb rev = function
54 76 [] -> [List.rev rev](*failwith "split_symbol"*)
55 77 | s :: l ->
... ... @@ -58,56 +80,24 @@ let rec split_symbol symb rev = function
58 80 else (List.rev rev) :: (split_symbol symb [] l)
59 81 else split_symbol symb (s :: rev) l
60 82  
61   -let parse_opinion = function
62   - "pewny" -> Pewny
63   - | "cer" -> Pewny
64   - | "potoczny" -> Potoczny
65   - | "col" -> Potoczny
66   - | "wątpliwy" -> Watpliwy
67   - | "unc" -> Watpliwy
68   - | "archaiczny" -> Archaiczny
69   - | "dat" -> Archaiczny
70   - | "zły" -> Zly
71   - | "bad" -> Zly
72   - | "wulgarny" -> Wulgarny
73   - | "vul" -> Wulgarny
74   - | x -> failwith ("parse_opinion: " ^ x)
75   -
76   -let parse_roles l =
77   - let r,cr,ce = Xlist.fold l ([],[],[]) (fun (r,controller,controllee) -> function
78   - "subj" -> SUBJ :: r, controller, controllee
79   - | "obj" -> OBJ :: r, controller, controllee
80   - | "controller" -> r, "1" :: controller, controllee
81   - | "controllee" -> r, controller, "1" :: controllee
82   - | "controller2" -> r, "2" :: controller, controllee
83   - | "controllee2" -> r, controller, "2" :: controllee
84   - | "" -> r, controller, controllee
85   - | x -> failwith ("parse_roles: " ^ x)) in
86   - (match r with
87   - [] -> ARG
88   - | [x] -> x
89   - | _ -> failwith "parse_roles"),cr,ce
90   -
91   -let parse_negation = function
92   - [Text "_"] -> NegationUndef
93   - | [Text "neg"] -> Negation
94   - | [Text "aff"] -> Aff
95   - | [Text ""] -> NegationNA
96   - | l -> failwith ("parse_negation: " ^ ENIAMwalStringOf.token_list l)
97   -
98   -let parse_pred = function
99   - [Text ""] -> PredNA
100   - | [Text "pred"] -> Pred
101   - | [Text "false"] -> PredNA
102   - | [Text "true"] -> Pred
103   - | l -> failwith ("parse_pred: " ^ ENIAMwalStringOf.token_list l)
104   -
105   -let parse_aspect = function
106   - [Text "perf"] -> Aspect "perf"
107   - | [Text "imperf"] -> Aspect "imperf"
108   - | [Text "_"] -> AspectUndef
109   - | [Text ""] -> AspectNA
110   - | l -> failwith ("parse_aspect: " ^ ENIAMwalStringOf.token_list l)
  83 +let rec string_of_token = function
  84 + Text s -> s
  85 + | Paren l -> "(" ^ String.concat "" (Xlist.map l string_of_token) ^ ")"
  86 + | Bracet l -> "{" ^ String.concat "" (Xlist.map l string_of_token) ^ "}"
  87 + | SqBra l -> "[" ^ String.concat "" (Xlist.map l string_of_token) ^ "]"
  88 + | LParen -> "("
  89 + | RParen -> ")"
  90 + | LBracet -> "{"
  91 + | RBracet -> "}"
  92 + | LSqBra -> "["
  93 + | RSqBra -> "]"
  94 + | Semic -> ";"
  95 + | Plus -> "+"
  96 + | Comma -> ","
  97 + | Quot -> "'"
  98 +
  99 +let string_of_token_list l =
  100 + String.concat "" (Xlist.map l string_of_token)
111 101  
112 102 let parse_case = function
113 103 [Text "nom"] -> Case "nom"
... ... @@ -121,169 +111,15 @@ let parse_case = function
121 111 | [Text "part"] -> Part
122 112 | [Text "postp"] -> Case "postp"
123 113 | [Text "agr"] -> CaseAgr
124   - | l -> failwith ("parse_case: " ^ ENIAMwalStringOf.token_list l)
125   -
126   -let parse_prep = function
127   - | [Text "niż"] -> "niż"
128   - | [Text "w czasie"] -> "w czasie"
129   - | [Text "podczas"] -> "podczas"
130   - | [Text "w roli"] -> "w roli"
131   - | [Text "pomiędzy"] -> "pomiędzy"
132   - | [Text "według"] -> "według"
133   - | [Text "zza"] -> "zza"
134   - | [Text "poza"] -> "poza"
135   - | [Text "spośród"] -> "spośród"
136   - | [Text "spod"] -> "spod"
137   - | [Text "koło"] -> "koło"
138   - | [Text "względem"] -> "względem"
139   - | [Text "wśród"] -> "wśród"
140   - | [Text "wskutek"] -> "wskutek"
141   - | [Text "przez"] -> "przez"
142   - | [Text "co do"] -> "co do"
143   - | [Text "wokół"] -> "wokół"
144   - | [Text "między"] -> "między"
145   - | [Text "bez"] -> "bez"
146   - | [Text "przy"] -> "przy"
147   - | [Text "na rzecz"] -> "na rzecz"
148   - | [Text "na temat"] -> "na temat"
149   - | [Text "po"] -> "po"
150   - | [Text "u"] -> "u"
151   - | [Text "pod"] -> "pod"
152   - | [Text "ponad"] -> "ponad"
153   - | [Text "jako"] -> "jako"
154   - | [Text "w kwestii"] -> "w kwestii"
155   - | [Text "w sprawie"] -> "w sprawie"
156   - | [Text "ku"] -> "ku"
157   - | [Text "przeciw"] -> "przeciw"
158   - | [Text "nad"] -> "nad"
159   - | [Text "z powodu"] -> "z powodu"
160   - | [Text "przed"] -> "przed"
161   - | [Text "od"] -> "od"
162   - | [Text "o"] -> "o"
163   - | [Text "w"] -> "w"
164   - | [Text "za"] -> "za"
165   - | [Text "dla"] -> "dla"
166   - | [Text "na"] -> "na"
167   - | [Text "z"] -> "z"
168   - | [Text "do"] -> "do"
169   - | [Text "wobec"] -> "wobec"
170   - | [Text "jak"] -> "jak"
171   - | [Text "spomiędzy"] -> "spomiędzy"
172   - | [Text "sponad"] -> "sponad"
173   - | [Text "spopod"] -> "spopod"
174   - | [Text "spoza"] -> "spoza"
175   - | [Text "sprzed"] -> "sprzed"
176   - | [Text "znad"] -> "znad"
177   - | [Text "dokoła"] -> "dokoła"
178   - | [Text "dookoła"] -> "dookoła"
179   - | [Text "naokoło"] -> "naokoło"
180   - | [Text "naprzeciw"] -> "naprzeciw"
181   - | [Text "obok"] -> "obok"
182   - | [Text "poniżej"] -> "poniżej"
183   - | [Text "popod"] -> "popod"
184   - | [Text "pośrodku"] -> "pośrodku"
185   - | [Text "pośród"] -> "pośród"
186   - | [Text "powyżej"] -> "powyżej"
187   - | [Text "wewnątrz"] -> "wewnątrz"
188   - | [Text "wkoło"] -> "wkoło"
189   - | [Text "wzdłuż"] -> "wzdłuż"
190   - | [Text "naokół"] -> "naokół"
191   - | [Text "śród"] -> "śród"
192   - | [Text "wpośród"] -> "wpośród"
193   - | [Text "poprzez"] -> "poprzez"
194   - | [Text "około"] -> "około"
195   - | [Text "na punkcie"] -> "na punkcie"
196   - | [Text "pod względem"] -> "pod względem"
197   - | [Text "pod wpływem"] -> "pod wpływem"
198   - | [Text "na skutek"] -> "na skutek"
199   - | [Text "na polu"] -> "na polu"
200   - | [Text "na poczet"] -> "na poczet"
201   - | [Text "na widok"] -> "na widok"
202   - | [Text "w dziedzinie"] -> "w dziedzinie"
203   - | [Text "pod pozorem"] -> "pod pozorem"
204   - | [Text "pod pretekstem"] -> "pod pretekstem"
205   - | [Text "za pomocą"] -> "za pomocą"
206   - | [Text "pod adresem"] -> "pod adresem"
207   - | [Text "w kierunku"] -> "w kierunku"
208   - | [Text "w stronę"] -> "w stronę"
209   - | [Text "w obliczu"] -> "w obliczu"
210   - | [Text "u podstaw"] -> "u podstaw"
211   - | [Text "pod kątem"] -> "pod kątem"
212   - | [Text "na okoliczność"] -> "na okoliczność"
213   - | [Text "w temacie"] -> "w temacie"
214   - | [Text "od strony"] -> "od strony"
215   - | [Text "ze strony"] -> "ze strony"
216   - | [Text "po stronie"] -> "po stronie"
217   - | [Text "na myśl"] -> "na myśl"
218   - | [Text "w charakterze"] -> "w charakterze"
219   - | [Text "w zakresie"] -> "w zakresie"
220   - | [Text "ze względu na"] -> "ze względu na"
221   - | [Text "na podstawie"] -> "na podstawie"
222   - | [Text "w stosunku do"] -> "w stosunku do"
223   - | [Text "z tytułu"] -> "z tytułu"
224   - | [Text "z okazji"] -> "z okazji"
225   - | [Text "z zakresu"] -> "z zakresu"
226   - | [Text "w wysokości"] -> "w wysokości"
227   - | [Text "na gruncie"] -> "na gruncie"
228   - | [Text "z dziedziny"] -> "z dziedziny"
229   - | [Text "na tle"] -> "na tle"
230   - | [Text "na niwie"] -> "na niwie"
231   - | [Text "w ramach"] -> "w ramach"
232   - | [Text "na korzyść"] -> "na korzyść"
233   - | [Text "w intencji"] -> "w intencji"
234   - | [Text "na kanwie"] -> "na kanwie"
235   - | [Text "na niekorzyść"] -> "na niekorzyść"
236   - | [Text "z ramienia"] -> "z ramienia"
237   - | [Text "w wyniku"] -> "w wyniku"
238   - | [Text "za sprawą"] -> "za sprawą"
239   - | [Text "w imię"] -> "w imię"
240   - | [Text "w celu"] -> "w celu"
241   - | [Text "z pomocą"] -> "z pomocą"
242   - | [Text "per"] -> "per"
243   - | [Text "co"] -> "co"
244   - | [Text s] -> s
245   - | l -> failwith ("parse_prep: " ^ ENIAMwalStringOf.token_list l)
246   -
247   -let rec parse_comp = function
248   - [Text "int"] -> Int,[]
249   - | [Text "rel"] -> Rel,[]
250   - | [Text "int";SqBra l] -> Int,List.flatten (Xlist.map (split_symbol Semic [] l) (fun c -> snd (parse_comp c)))
251   - | [Text "rel";SqBra l] -> Rel,List.flatten (Xlist.map (split_symbol Semic [] l) (fun c -> snd (parse_comp c)))
252   - | [Text "co"] -> CompTypeUndef,[Comp "co"] (* subst qub prep comp *)
253   - | [Text "kto"] -> CompTypeUndef,[Comp "kto"] (* subst *)
254   - | [Text "ile"] -> CompTypeUndef,[Comp "ile"] (* num adv *)
255   - | [Text "jaki"] -> CompTypeUndef,[Comp "jaki"] (* adj *)
256   - | [Text "który"] -> CompTypeUndef,[Comp "który"] (* adj *)
257   - | [Text "czyj"] -> CompTypeUndef,[Comp "czyj"] (* adj *)
258   - | [Text "jak"] -> CompTypeUndef,[Comp "jak"] (* prep conj adv *)
259   - | [Text "kiedy"] -> CompTypeUndef,[Comp "kiedy"] (* comp adv *)
260   - | [Text "gdzie"] -> CompTypeUndef,[Comp "gdzie"] (* qub adv *)
261   - | [Text "odkąd"] -> CompTypeUndef,[Comp "odkąd"] (* adv *)
262   - | [Text "skąd"] -> CompTypeUndef,[Comp "skąd"] (* adv *)
263   - | [Text "dokąd"] -> CompTypeUndef,[Comp "dokąd"] (* adv *)
264   - | [Text "którędy"] -> CompTypeUndef,[Comp "którędy"] (* adv *)
265   - | [Text "dlaczego"] -> CompTypeUndef,[Comp "dlaczego"] (* adv *)
266   - | [Text "czemu"] -> CompTypeUndef,[Comp "czemu"] (* adv *)
267   - | [Text "czy"] -> CompTypeUndef,[Comp "czy"] (* qub conj *)
268   - | [Text "jakby"] -> CompTypeUndef,[Comp "jakby"] (* qub comp *)
269   - | [Text "jakoby"] -> CompTypeUndef,[Comp "jakoby"] (* qub comp *)
270   - | [Text "gdy"] -> CompTypeUndef,[Gdy] (* adv; gdyby: qub comp *)
271   - | [Text "dopóki"] -> CompTypeUndef,[Comp "dopóki"] (* comp *)
272   - | [Text "zanim"] -> CompTypeUndef,[Comp "zanim"] (* comp *)
273   - | [Text "jeśli"] -> CompTypeUndef,[Comp "jeśli"] (* comp *)
274   - | [Text "żeby2"] -> CompTypeUndef,[Zeby]
275   - | [Text "żeby"] -> CompTypeUndef,[Comp "żeby"] (* qub comp *)
276   - | [Text "że"] -> CompTypeUndef,[Comp "że"] (* qub comp *)
277   - | [Text "aż"] -> CompTypeUndef,[Comp "aż"] (* qub comp *)
278   - | [Text "bo"] -> CompTypeUndef,[Comp "bo"] (* qub comp *)
279   - | l -> failwith ("parse_comp: " ^ ENIAMwalStringOf.token_list l)
  114 + | [Text "_"] -> CaseUndef
  115 + | l -> failwith ("parse_case: " ^ string_of_token_list l)
280 116  
281 117 let parse_number = function
282 118 [Text "sg"] -> Number "sg"
283 119 | [Text "pl"] -> Number "pl"
284 120 | [Text "agr"] -> NumberAgr
285 121 | [Text "_"] -> NumberUndef
286   - | l -> failwith ("parse_number: " ^ ENIAMwalStringOf.token_list l)
  122 + | l -> failwith ("parse_number: " ^ string_of_token_list l)
287 123  
288 124 let parse_gender = function
289 125 [Text "m1"] -> Gender "m1"
... ... @@ -291,247 +127,273 @@ let parse_gender = function
291 127 | [Text "n"] -> Genders["n1";"n2"]
292 128 | [Text "f"] -> Gender "f"
293 129 | [Text "m1.n"] -> Genders["m1";"n1";"n2"]
  130 + | [Text "m1.n1.n2"] -> Genders["m1";"n1";"n2"]
  131 + | [Text "n1.n2"] -> Genders["n1";"n2"]
294 132 | [Text "_"] -> GenderUndef
295 133 | [Text "agr"] -> GenderAgr
296   - | l -> failwith ("parse_gender: " ^ ENIAMwalStringOf.token_list l)
  134 + | l -> failwith ("parse_gender: " ^ string_of_token_list l)
297 135  
298 136 let parse_grad = function
299 137 [Text "pos"] -> Grad "pos"
300 138 | [Text "com"] -> Grad "com"
301 139 | [Text "sup"] -> Grad "sup"
302 140 | [Text "_"] -> GradUndef
303   - | l -> failwith ("parse_grad: " ^ ENIAMwalStringOf.token_list l)
  141 + | l -> failwith ("parse_grad: " ^ string_of_token_list l)
  142 +
  143 +let parse_aspect = function
  144 + [Text "perf"] -> Aspect "perf"
  145 + | [Text "imperf"] -> Aspect "imperf"
  146 + | [Text "_"] -> AspectUndef
  147 + | [Text ""] -> AspectNA
  148 + | l -> failwith ("parse_aspect: " ^ string_of_token_list l)
  149 +
  150 +let parse_negation = function
  151 + [Text "_"] -> NegationUndef
  152 + | [Text "neg"] -> Negation
  153 + | [Text "aff"] -> Aff
  154 + | [Text ""] -> NegationNA
  155 + | l -> failwith ("parse_negation: " ^ string_of_token_list l)
304 156  
305 157 let parse_refl = function
306   - [] -> ReflEmpty
  158 + (* [] -> ReflEmpty
307 159 | [Text "się"] -> ReflSie
308 160 | [Text ""] -> ReflEmpty
309 161 | [Text "false"] -> ReflEmpty
310   - | [Text "true"] -> ReflSie
311   - | l -> failwith ("parse_refl: " ^ ENIAMwalStringOf.token_list l)
312   -
313   -let rec parse_lex = function
314   - [Quot; Text "E"; Paren[Text gender]; Quot] -> Elexeme(parse_gender [Text gender])
315   - | [Quot; Text x; Quot] -> Lexeme x
316   - | [Quot; Text x; Comma; Text y; Quot] -> Lexeme (x ^ "," ^ y)
317   - | [Text "OR"; Paren l] ->
318   - (match split_symbol Semic [] l with
319   - [l] -> (match split_symbol Comma [] l with
320   - [_] -> failwith "parse_lex OR"
321   - | ll -> ORconcat(Xlist.map ll parse_lex))
322   - | ll -> ORcoord(Xlist.map ll parse_lex))
323   - | [Text "XOR"; Paren l] -> XOR(Xlist.map (List.flatten (Xlist.map (split_symbol Semic [] l) (split_symbol Comma []))) parse_lex)
324   - | l -> failwith ("parse_lex: " ^ ENIAMwalStringOf.token_list l)
325   -
326   -let get_lexeme = function
327   - Lexeme s -> s
328   - | _ -> failwith "get_lexeme"
329   -
330   -let new_schema r cr ce morfs =
331   - {gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=morfs}
332   -
333   -let rec parse_restr = function
334   - [Text "natr"] -> Natr,[]
335   - | [Text "atr"] -> Atr,[]
336   - | [Text "ratr"] -> Ratr,[]
337   - | [Text "atr1"] -> Atr1,[]
338   - | [Text "ratr1"] -> Ratr1,[]
339   - | [Text "ratr1"; Paren schema] -> Ratr1, parse_schema_simple schema
340   - | [Text "ratr"; Paren schema] -> Ratr, parse_schema_simple schema
341   - | [Text "atr1"; Paren schema] -> Atr1, parse_schema_simple schema
342   - | [Text "atr"; Paren schema] -> Atr, parse_schema_simple schema
343   - | l -> failwith ("parse_restr: " ^ ENIAMwalStringOf.token_list l)
344   -
345   -and parse_schema_simple schema =
346   - Xlist.map (split_symbol Plus [] schema) (function
347   - [Bracet b] -> let r,cr,ce = parse_roles [] in new_schema r cr ce (parse_morfs b)
348   - | [Text s1;Bracet b] -> let r,cr,ce = parse_roles [s1] in new_schema r cr ce (parse_morfs b)
349   - | _ -> failwith "parse_schema_simple")
350   -
351   -and parse_mode = function
352   - [Text "abl"] -> "abl",[]
353   - | [Text "adl"] -> "adl",[]
354   - | [Text "caus"] -> "caus",[]
355   - | [Text "dest"] -> "dest",[]
356   - | [Text "dur"] -> "dur",[]
357   - | [Text "instr"] -> "instr",[]
358   - | [Text "locat"] -> "locat",[]
359   - | [Text "perl"] -> "perl",[]
360   - | [Text "temp"] -> "temp",[]
361   - | [Text "abl";SqBra l] -> "abl",parse_morfs l
362   - | [Text "adl";SqBra l] -> "adl",parse_morfs l
363   - | [Text "caus";SqBra l] -> "caus",parse_morfs l
364   - | [Text "dest";SqBra l] -> "dest",parse_morfs l
365   - | [Text "dur";SqBra l] -> "dur",parse_morfs l
366   - | [Text "instr";SqBra l] -> "instr",parse_morfs l
367   - | [Text "locat";SqBra l] -> "locat",parse_morfs l
368   - | [Text "perl";SqBra l] -> "perl",parse_morfs l
369   - | [Text "temp";SqBra l] -> "temp",parse_morfs l
370   - | [Text "mod"] -> "mod",[]
371   - | [Text "mod";SqBra l] -> "mod",parse_morfs l
372   - | [Text "pron"] -> "pron",[]
373   - | [Text "misc"] -> "misc",[]
374   - | l -> failwith ("parse_mode: " ^ ENIAMwalStringOf.token_list l)
  162 + | [Text "true"] -> ReflSie *)
  163 + | [Text "nosię"] -> ReflFalse
  164 + | [Text "się"] -> ReflTrue
  165 + | l -> failwith ("parse_refl: " ^ string_of_token_list l)
  166 +
  167 +let parse_ctype = function
  168 + [Text "int"] -> Int
  169 + | [Text "rel"] -> Rel
  170 + | [Text "_"] -> CompTypeUndef
  171 + | l -> failwith ("parse_ctype: " ^ string_of_token_list l)
  172 +
  173 +let parse_acm = function
  174 + (* [Text "int"] -> Int
  175 + | [Text "rel"] -> Rel *)
  176 + | [Text "_"] -> AcmUndef
  177 + | l -> failwith ("parse_acm: " ^ string_of_token_list l)
  178 +
  179 +let parse_comp = function
  180 + | [Text "co"] -> Comp "co" (* subst qub prep comp *)
  181 + | [Text "kto"] -> Comp "kto" (* subst *)
  182 + | [Text "ile"] -> Comp "ile" (* num adv *)
  183 + | [Text "jaki"] -> Comp "jaki" (* adj *)
  184 + | [Text "który"] -> Comp "który" (* adj *)
  185 + | [Text "czyj"] -> Comp "czyj" (* adj *)
  186 + | [Text "jak"] -> Comp "jak" (* prep conj adv *)
  187 + | [Text "kiedy"] -> Comp "kiedy" (* comp adv *)
  188 + | [Text "gdzie"] -> Comp "gdzie" (* qub adv *)
  189 + | [Text "odkąd"] -> Comp "odkąd" (* adv *)
  190 + | [Text "skąd"] -> Comp "skąd" (* adv *)
  191 + | [Text "dokąd"] -> Comp "dokąd" (* adv *)
  192 + | [Text "którędy"] -> Comp "którędy" (* adv *)
  193 + | [Text "dlaczego"] -> Comp "dlaczego" (* adv *)
  194 + | [Text "czemu"] -> Comp "czemu" (* adv *)
  195 + | [Text "czy"] -> Comp "czy" (* qub conj *)
  196 + | [Text "jakby"] -> Comp "jakby" (* qub comp *)
  197 + | [Text "jakoby"] -> Comp "jakoby" (* qub comp *)
  198 + | [Text "gdy"] -> Gdy (* adv; gdyby: qub comp *)
  199 + | [Text "dopóki"] -> Comp "dopóki" (* comp *)
  200 + | [Text "zanim"] -> Comp "zanim" (* comp *)
  201 + | [Text "jeśli"] -> Comp "jeśli" (* comp *)
  202 + | [Text "żeby2"] -> Zeby
  203 + | [Text "żeby"] -> Comp "żeby" (* qub comp *)
  204 + | [Text "że"] -> Comp "że" (* qub comp *)
  205 + | [Text "aż"] -> Comp "aż" (* qub comp *)
  206 + | [Text "bo"] -> Comp "bo" (* qub comp *)
  207 + | [Text "niczym"] -> Comp "niczym"
  208 + | [Text "_"] -> CompUndef
  209 + | l -> failwith ("parse_comp: " ^ string_of_token_list l)
  210 +
  211 +let parse_opinion = function
  212 + "pewny" -> Pewny
  213 + (* | "cer" -> Pewny *)
  214 + | "potoczny" -> Potoczny
  215 + (* | "col" -> Potoczny *)
  216 + | "wątpliwy" -> Watpliwy
  217 + (* | "unc" -> Watpliwy *)
  218 + | "archaiczny" -> Archaiczny
  219 + (* | "dat" -> Archaiczny *)
  220 + | "zły" -> Zly
  221 + (* | "bad" -> Zly *)
  222 + | "wulgarny" -> Wulgarny
  223 + (* | "vul" -> Wulgarny *)
  224 + | x -> failwith ("parse_opinion: " ^ x)
  225 +
  226 +let parse_pred = function
  227 + "pred" -> PredTrue
  228 + | "nopred" -> PredFalse
  229 + | s -> failwith ("parse_pred: " ^ s)
  230 +
  231 +let parse_pos = function
  232 + "SUBST",[number;case] -> SUBST(parse_number number,parse_case case)
  233 + | "PPRON12",[number;case] -> PPRON12(parse_number number,parse_case case)
  234 + | "PPRON3",[number;case] -> PPRON3(parse_number number,parse_case case)
  235 + | "SIEBIE",[case] -> SIEBIE(parse_case case)
  236 + | "PREP",[case] -> PREP(parse_case case)
  237 + | "NUM",[case;gender;acm] -> NUM(parse_case case,parse_gender gender,parse_acm acm)
  238 + | "ADJ",[number;case;gender;grad] -> ADJ(parse_number number,parse_case case,parse_gender gender,parse_grad grad)
  239 + | "ADV",[grad] -> ADV(parse_grad grad)
  240 + | "GER",[number;case;gender;aspect;negation;refl] -> GER(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl)
  241 + | "PPAS",[number;case;gender;aspect;negation] -> PPAS(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation)
  242 + | "PACT",[number;case;gender;aspect;negation;refl] -> PACT(parse_number number,parse_case case,parse_gender gender,parse_aspect aspect,parse_negation negation,parse_refl refl)
  243 + | "INF",[aspect;negation;refl] -> INF(parse_aspect aspect,parse_negation negation,parse_refl refl)
  244 + | "QUB",[] -> QUB
  245 + | "COMPAR",[] -> COMPAR
  246 + | "COMP",[ctype] -> COMP(parse_ctype ctype)
  247 + | "PERS",[negation;refl] -> PERS(parse_negation negation,parse_refl refl)
  248 + | s,ll -> print_endline ("parse_pos: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")"); FIXED
  249 +
  250 +let rec parse_phrase = function
  251 + "np",[case] -> NP(parse_case case)
  252 + | "prepnp",[[Text prep]; case] -> PrepNP(prep,parse_case case)
  253 + | "adjp",[case] -> AdjP(parse_case case)
  254 + | "prepadjp",[[Text prep]; case] -> PrepAdjP(prep,parse_case case)
  255 + | "comprepnp",[[Text prep]] -> ComprepNP prep
  256 + | "comparp",[[Text prep]] -> ComparP prep
  257 + | "cp",[ctype;comp] -> CP(parse_ctype ctype,parse_comp comp)
  258 + | "ncp",[case;ctype;comp] -> NCP(parse_case case,parse_ctype ctype,parse_comp comp)
  259 + | "prepncp",[[Text prep];case;ctype;comp] -> PrepNCP(prep,parse_case case,parse_ctype ctype,parse_comp comp)
  260 + | "infp",[aspect] -> InfP(parse_aspect aspect)
  261 + | "fixed",[[Text lemma]] -> FixedP lemma
  262 + | "fixed",[[Text lemma1];[Text lemma2]] -> FixedP (lemma1 ^ "," ^ lemma2)
  263 + | "or",[] -> Or
  264 + | "refl",[] -> Refl
  265 + | "recip",[] -> Recip
  266 + | "E",[morf] -> E(parse_morf morf)
  267 + | "advp",[] -> AdvP
  268 + | "null",[] -> Null
  269 + | "lex",[[Text lemma];[Text pos; Paren p]] -> SimpleLexArg(lemma,parse_pos (pos, split_symbol Comma [] p))
  270 + | "lex",[[Text lemma];[Text pos]] -> SimpleLexArg(lemma,parse_pos (pos, []))
  271 + | "lex",[[Text id];[Text lemma];[Text pos; Paren p]] -> LexArg(int_of_string id,lemma,parse_pos (pos, split_symbol Comma [] p))
  272 + | "lex",[[Text id];[Text lemma];[Text pos]] -> LexArg(int_of_string id,lemma,parse_pos (pos, []))
  273 + | s,ll -> print_endline ("parse_phrase: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll string_of_token_list) ^ ")"); Null
375 274  
376 275 and parse_morf = function
377   - "np",[case] -> Phrase(NP(parse_case case))
378   - | "prepnp",[prep; case] -> Phrase(PrepNP(Sem,parse_prep prep,parse_case case))
379   - | "adjp",[case] -> Phrase(AdjP(parse_case case))
380   - | "prepadjp",[prep; case] -> Phrase(PrepAdjP(Sem,parse_prep prep,parse_case case))
381   - | "nump",[case] -> Phrase(NumP(parse_case case))
382   - | "prepnump",[prep; case] -> Phrase(PrepNumP(Sem,parse_prep prep,parse_case case))
383   - | "comprepnp",[prep] -> Phrase(ComprepNP(Sem,parse_prep prep))
384   - | "compar",[prep] -> PhraseAbbr(ComparP(parse_prep prep),[])
385   - | "cp",[comp] -> PhraseComp(Cp,parse_comp comp)
386   - | "ncp",[case; comp] -> PhraseComp(Ncp(parse_case case),parse_comp comp)
387   - | "prepncp",[prep; case; comp] -> PhraseComp(Prepncp(parse_prep prep,parse_case case),parse_comp comp)
388   - | "infp",[aspect] -> Phrase(InfP(parse_aspect aspect(*,ReqUndef*)))
389   - | "fixed",[morf;lex] -> Phrase(FixedP((*parse_morf_single morf,*) get_lexeme (parse_lex lex)))
390   - | "fixed",[morf;lex;lex2] -> Phrase(FixedP((*parse_morf_single morf,*) get_lexeme (parse_lex (lex @ [Comma] @ lex2))))
391   - | "or",[] -> Phrase Or
392   - | "refl",[] -> Phrase (*Refl*)(Lex "się")
393   - | "recip",[] -> Phrase (*Recip*)(Lex "się") (* FIXME *)
394   - | "E",[] -> E Null
395   - | "advp",[mode] -> let mode,morfs = parse_mode mode in PhraseAbbr(Advp mode,morfs)
396   - | "xp",[mode] -> let mode,morfs = parse_mode mode in PhraseAbbr(Xp mode,morfs)
397   - | "nonch",[] -> PhraseAbbr(Nonch,[])
398   - | "distrp",[] -> PhraseAbbr(Distrp,[])
399   - | "possp",[] -> PhraseAbbr(Possp,[])
400   - | "null",[] -> Phrase Null
401   - | "lex",[Text a; Paren p] :: ll -> parse_lex_morf (a, split_symbol Comma [] p, ll)
402   - | "lex",[Text a] :: ll -> parse_lex_morf (a, [], ll)
403   - | s,ll -> failwith ("parse_morf: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll ENIAMwalStringOf.token_list) ^ ")")
404   -
405   -and parse_lex_morf = function
406   - | "np",[case],[num;lex;restr] -> LexPhrase([SUBST(parse_number num,parse_case case),parse_lex lex], parse_restr restr)
407   - | "prepnp",[prep; case],[num;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);SUBST(parse_number num,parse_case case),parse_lex lex], parse_restr restr)
408   - | "adjp",[case],[num;gender;grad;lex;restr] -> LexPhrase([ADJ(parse_number num,parse_case case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr)
409   - | "prepadjp",[prep; case],[num;gender;grad;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);ADJ(parse_number num,parse_case case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr)
410   - | "ppasp",[case],[num;gender;negation;lex;restr] -> LexPhrase([PPAS(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation),parse_lex lex], parse_restr restr)
411   - | "prepppasp",[prep;case],[num;gender;negation;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);PPAS(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation),parse_lex lex], parse_restr restr)
412   - | "pactp",[case],[num;gender;negation;lex;refl;restr] -> LexPhrase([PACT(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
413   - | "preppactp",[prep;case],[num;gender;negation;lex;refl;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);PACT(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
414   - | "gerp",[case],[num;negation;lex;refl;restr] -> LexPhrase([GER(parse_number num,parse_case case,GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
415   - | "prepgerp",[prep;case],[num;negation;lex;refl;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);GER(parse_number num,parse_case case,GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
416   - | "nump",[case],[lex1;lex2;restr] -> LexPhrase([NUM(parse_case case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
417   - | "prepnump",[prep;case],[lex1;lex2;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);NUM(parse_case case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
418   - | "compar",[prep],[morfs] -> LexPhrase([COMPAR,Lexeme(parse_prep prep)],(Ratrs,Xlist.map (split_symbol Plus [] morfs) (fun morf -> new_schema ARG [] [] [parse_morf_single morf])))
419   - | "infp",[aspect],[negation;lex;refl;restr] -> LexPhrase([INF(parse_aspect aspect,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
420   - | "qub",[],[lex;restr] -> LexPhrase([QUB,parse_lex lex], parse_restr restr)
421   - | "advp",[mode],[grad;lex;restr] ->
422   - (match parse_mode mode with
423   - mode, [] -> LexPhraseMode(mode,[ADV(parse_grad grad),parse_lex lex], parse_restr restr)
424   - | _ -> failwith "parse_lex_morf")
425   - | "xp",[[Text mode;SqBra [Text "prepgerp"; Paren [prep;Comma;case]]]],[num;negation;lex;refl;restr] ->
426   - LexPhraseMode(mode,[PREP(parse_case [case]),Lexeme(parse_prep [prep]);GER(parse_number num,parse_case [case],GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
427   - | "xp",[mode],ll ->
428   - (match parse_mode mode,ll with
429   - (mode,[Phrase(NP case)]),[num;lex;restr] -> LexPhraseMode(mode,[SUBST(parse_number num,case),parse_lex lex], parse_restr restr)
430   - | (mode,[Phrase(PrepNP(_,prep,case))]),[num;lex;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;SUBST(parse_number num,case),parse_lex lex], parse_restr restr)
431   - | (mode,[Phrase(PrepAdjP(_,prep,case))]),[num;gender;grad;lex;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;ADJ(parse_number num,case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr)
432   - | (mode,[Phrase(NumP case)]),[lex1;lex2;restr] -> LexPhraseMode(mode,[NUM(case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
433   - | (mode,[Phrase(PrepNumP(_,prep,case))]),[lex1;lex2;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
434   - | (mode,[PhraseAbbr(Advp _,[])]),[grad;lex;restr] -> LexPhraseMode(mode,[ADV(parse_grad grad),parse_lex lex], parse_restr restr)
435   - | (mode,[PhraseAbbr(ComparP prep,[])]),[morfs] -> LexPhraseMode(mode,[COMPAR,Lexeme prep],(Ratrs,Xlist.map (split_symbol Plus [] morfs) (fun morf -> new_schema ARG [] [] [parse_morf_single morf])))
436   - | (mode,[PhraseComp(Cp,(ctype,[Comp comp]))]),[negation;lex;refl;restr] -> LexPhrase([COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
437   - | _ -> failwith ("parse_lex_morf: xp(" ^ ENIAMwalStringOf.token_list mode ^ ")," ^ String.concat "," (Xlist.map ll ENIAMwalStringOf.token_list) ^ ")"))
438   - | "cp",[comp],[negation;lex;refl;restr] ->
439   - (match parse_comp comp with
440   - ctype,[Comp comp] -> LexPhrase([COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
441   - | ctype,[Comp comp1;Comp comp2] -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
442   - | _,comp -> failwith ("parse_lex_morf comp: " (*^ ENIAMwalStringOf.comp comp*)))
443   - | "ncp",[case;comp],[negation;lex;refl;restr] ->
444   - (match parse_comp comp with
445   - ctype,[Comp comp] -> LexPhrase([SUBST(Number "sg",parse_case case),Lexeme "to";COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
446   - | _,comp -> failwith ("parse_lex_morf comp: " (*^ ENIAMwalStringOf.comp comp*)))
447   - | s,ll,ll2 -> failwith ("parse_lex_morf: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll ENIAMwalStringOf.token_list) ^ ")," ^ String.concat "," (Xlist.map ll2 ENIAMwalStringOf.token_list) ^ ")")
448   -
449   -and parse_morfs l =
450   - let morfs = Xlist.map (split_symbol Semic [] l) (function
451   - [Text a; Paren p] -> parse_morf (a, split_symbol Comma [] p)
452   - | [Text a] -> parse_morf (a, [])
453   - | l -> failwith ("parse_morfs: " ^ ENIAMwalStringOf.token_list l)) in
454   -(* expand_prep_morfs equivs (expand_comp_morfs equivs morfs) *)morfs
455   -
456   -and parse_morf_single l = (* FIXME: problem z equivs *)
457   - match parse_morfs l with
458   - [x] -> x
459   - | l -> failwith ("parse_morf_single: " ^ String.concat ";" (Xlist.map l ENIAMwalStringOf.morf))
460   -
461   -let split_schema schema =
462   - find_brackets (Xlist.map (Str.full_split (Str.regexp "\\]\\|\\+\\|{\\|}\\|(\\|)\\|,\\|;\\|'\\|\\[") schema) (function
463   - Str.Text s -> Text s
464   - | Str.Delim "(" -> LParen
465   - | Str.Delim ")" -> RParen
466   - | Str.Delim "{" -> LBracet
467   - | Str.Delim "}" -> RBracet
468   - | Str.Delim "[" -> LSqBra
469   - | Str.Delim "]" -> RSqBra
470   - | Str.Delim ";" -> Semic
471   - | Str.Delim "+" -> Plus
472   - | Str.Delim "," -> Comma
473   - | Str.Delim "'" -> Quot
474   - | _ -> failwith "parse_schema"))
  276 + [Text a; Paren p] -> parse_phrase (a, split_symbol Comma [] p)
  277 + | [Text a] -> parse_phrase (a, [])
  278 + | l -> failwith ("parse_morf: " ^ string_of_token_list l)
475 279  
476   -let parse_schema schema =
477   - if schema = "" then [] else (
478   - let l = split_schema schema in
479   -(* print_endline schema; *)
  280 +let parse_roles l =
  281 + let r,cr,ce,m = Xlist.fold l ([],[],[],[]) (fun (r,controller,controllee,m) -> function
  282 + "subj" -> SUBJ :: r, controller, controllee, m
  283 + | "obj" -> OBJ :: r, controller, controllee, m
  284 + | "controller" -> r, "1" :: controller, controllee, m
  285 + | "controllee" -> r, controller, "1" :: controllee, m
  286 + | "controller2" -> r, "2" :: controller, controllee, m
  287 + | "controllee2" -> r, controller, "2" :: controllee, m
  288 + | "misc" -> r, controller, controllee, "misc" :: m
  289 + | "locat" -> r, controller, controllee, "locat" :: m
  290 + | "abl" -> r, controller, controllee, "abl" :: m
  291 + | "adl" -> r, controller, controllee, "adl" :: m
  292 + | "caus" -> r, controller, controllee, "caus" :: m
  293 + | "mod" -> r, controller, controllee, "mod" :: m
  294 + | "temp" -> r, controller, controllee, "temp" :: m
  295 + | "dur" -> r, controller, controllee, "dur" :: m
  296 + | "possp" -> r, controller, controllee, "possp" :: m
  297 + | "perl" -> r, controller, controllee, "perl" :: m
  298 + | "instr" -> r, controller, controllee, "instr" :: m
  299 + | "dest" -> r, controller, controllee, "dest" :: m
  300 + | "distrp" -> r, controller, controllee, "distrp" :: m
  301 + | "lemma" -> r, controller, controllee, "lemma" :: m
  302 + | "refl" -> r, controller, controllee, "refl" :: m
  303 + | "recip" -> r, controller, controllee, "recip" :: m
  304 + | "nonch" -> r, controller, controllee, "nonch" :: m
  305 + | "pron" -> r, controller, controllee, "pron" :: m
  306 + | "" -> r, controller, controllee, m
  307 + | x -> failwith ("parse_roles: " ^ x)) in
  308 + (match r with
  309 + [] -> ARG
  310 + | [x] -> x
  311 + | _ -> failwith "parse_roles"),cr,ce,m
  312 +
  313 +let parse_schema = function
  314 + [] -> NoRestr,[]
  315 + | [Text "atr"] -> Atr,[]
  316 + | [Text "ratr"] -> Ratr,[]
  317 + | [Text "atr1"] -> Atr1,[]
  318 + | [Text "ratr1"] -> Ratr1,[]
  319 + | l -> NoRestr,Xlist.map (split_symbol Plus [] l) (function
  320 + [Bracet l] -> {empty_position with morfs=Xlist.map (split_symbol Semic [] l) parse_morf}
  321 + | [Text s; Bracet l] ->
  322 + let gf,cr,ce,m = parse_roles [s] in
  323 + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=Xlist.map (split_symbol Semic [] l) parse_morf}
  324 + | l -> print_endline ("parse_schema: " ^ string_of_token_list l); empty_position)
  325 +
  326 +let parse_simple_morf = function
  327 + [Text id] -> MorfId (int_of_string id)
  328 + | l -> print_endline ("parse_simple_morf: " ^ string_of_token_list l); Null
  329 +
  330 +let parse_simple_schema l =
  331 + if l = [] then [] else
480 332 Xlist.map (split_symbol Plus [] l) (function
481   - [Bracet b] -> let r,cr,ce = parse_roles [] in new_schema r cr ce (parse_morfs b)
482   - | [Text s1;Bracet b] -> let r,cr,ce = parse_roles [s1] in new_schema r cr ce (parse_morfs b)
483   - | [Text s1;Comma;Text s2;Bracet b] -> let r,cr,ce = parse_roles [s1;s2] in new_schema r cr ce (parse_morfs b)
484   - | [Text s1;Comma;Text s2;Comma;Text s3;Bracet b] -> let r,cr,ce = parse_roles [s1;s2;s3] in new_schema r cr ce (parse_morfs b)
485   - | _ -> failwith ("parse_schema: " ^ schema)))
486   -
487   -let parse_lexeme s =
488   - match Str.split (Str.regexp " ") s with
489   - [s] -> s,""
490   - | [s;"się"] -> s,"się"
491   - | _ -> failwith ("parse_lexeme: " ^ s)
492   -
493   -let load_frames filename =
494   - Xlist.fold (List.tl (Str.split (Str.regexp "\n") (File.load_file filename))) StringMap.empty (fun schemas line ->
495   - if String.sub line 0 1 = "%" then schemas else
496   - match Str.split_delim (Str.regexp ": ") line with
497   - [lexeme;opinion;negation;pred;aspect;schema] ->
498   - let lexeme,refl = parse_lexeme lexeme in
499   - StringMap.add_inc schemas lexeme [refl,opinion,negation,pred,aspect,schema] (fun l -> (refl,opinion,negation,pred,aspect,schema) :: l)
500   - | _ -> failwith ("load_frames: " ^ line))
501   -
502   -
503   -
504   -let rec extract_fixed_schema fixed schema =
505   - Xlist.fold schema fixed (fun fixed s ->
506   - Xlist.fold s.morfs fixed (fun fixed -> function
507   - Phrase(FixedP s) -> StringSet.add fixed s
508   - | LexPhrase(_,(_,schema)) -> extract_fixed_schema fixed schema
509   - | LexPhraseMode(_,_,(_,schema)) -> extract_fixed_schema fixed schema
510   - | _ -> fixed))
511   -
512   -(*let extract_fixed fixed_filename = (* FIXME: nie wykrywa fixed w argumentach leksykalizacji *)
513   -(* let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *)
514   - let fixed = Xlist.fold Paths.walenty_filenames StringSet.empty (fun fixed filename ->
515   -(* print_endline filename; *)
516   - let frames = load_frames (Paths.walenty_path ^ filename) in
517   - StringMap.fold frames fixed (fun fixed _ l ->
518   - Xlist.fold l fixed (fun fixed (refl,opinion,negation,pred,aspect,schema) ->
519   - extract_fixed_schema fixed (parse_schema schema)))) in
520   - let entries = StringSet.fold fixed StringSet.empty (fun entries s ->
521   - let tokens = List.rev (Xlist.fold (Str.full_split (Str.regexp " \\|,") s) [] (fun l -> function
522   - Str.Text t -> t :: l
523   - | Str.Delim " " -> l
524   - | Str.Delim t -> t :: l)) in
525   - let tokens_string = String.concat " " tokens in
526   - Xlist.fold tokens entries (fun entries token ->
527   - StringSet.add entries (Printf.sprintf "%s\t%s\t%s:fixed\tfixed" token s tokens_string))) in
528   - File.file_out fixed_filename (fun file ->
529   - StringSet.iter entries (fun entry ->
530   - Printf.fprintf file "%s\n" entry))
531   -
532   -(* generowanie fixed.tab *)
533   -(* let _ = extract_fixed "data/fixed.tab" *)
  333 + [Bracet l] ->
  334 + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in
  335 + {empty_position with morfs=morfs}
  336 + | [Text s; Bracet l] ->
  337 + let gf,cr,ce,m = parse_roles [s] in
  338 + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in
  339 + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs}
  340 + | [Text s1; Comma; Text s2; Bracet l] ->
  341 + let gf,cr,ce,m = parse_roles [s1;s2] in
  342 + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in
  343 + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs}
  344 + | [Text s1; Comma; Text s2; Comma; Text s3; Bracet l] ->
  345 + let gf,cr,ce,m = parse_roles [s1;s2;s3] in
  346 + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in
  347 + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs}
  348 + | [Text s1; Comma; Text s2; Comma; Text s3; Comma; Text s4; Bracet l] ->
  349 + let gf,cr,ce,m = parse_roles [s1;s2;s3;s4] in
  350 + let morfs = Xlist.map (split_symbol Semic [] l) parse_simple_morf in
  351 + {empty_position with gf=gf; cr=cr; ce=ce; mode=m; morfs=morfs}
  352 + | l -> print_endline ("parse_simple_schema: " ^ string_of_token_list l); empty_position)
  353 +
  354 +let parse_entry (restr,schema) = function
  355 + [Text "lex"; Paren[Text lemma;Comma;Text pos]] -> SimpleLexEntry(lemma,pos)
  356 + | [Text "lex"; Paren[Text id;Comma;Text lemma;Comma;Text pos]] -> LexEntry(int_of_string id,lemma,pos,restr,schema)
  357 + | [Text "comprepnp"; Paren[Text lemma]] -> ComprepNPEntry(lemma,restr,schema)
  358 + | l -> print_endline ("parse_entry: " ^ string_of_token_list l); SimpleLexEntry("","")
  359 +
  360 +let load_entries filename =
  361 + let l = File.load_tab filename (function
  362 + [pos; lemma; entry; schema] -> pos, lemma, entry, schema
  363 + | [pos; lemma; entry] -> pos, lemma, entry, ""
  364 + | _ -> failwith "load_entries") in
  365 + Xlist.fold l Entries.empty (fun entries (pos,lemma,entry,schema) ->
  366 + let schema = parse_schema (split_text schema) in
  367 + let entry = parse_entry schema (split_text entry) in
  368 + Entries.add_inc entries pos lemma entry)
  369 +
  370 +let load_phrases filename =
  371 + let l = File.load_tab filename (function
  372 + id :: morfs -> int_of_string id, morfs
  373 + | _ -> failwith "load_phrases") in
  374 + Xlist.fold l IntMap.empty (fun phrases (id,morfs) ->
  375 + (* print_endline (string_of_int id); *)
  376 + let morfs = Xlist.map morfs (fun morf -> parse_morf (split_text morf)) in
  377 + IntMap.add phrases id morfs)
  378 +
  379 +let load_schemata filename =
  380 + let l = File.load_tab filename (function
  381 + [pos; lemma; opinion; neg; pred; aspect; schema] -> pos, lemma, opinion, neg, pred, aspect, schema
  382 + | _ -> failwith "load_schemata") in
  383 + Xlist.fold l Entries.empty (fun entries (pos,lemma,opinion,neg,pred,aspect,schema) ->
  384 + let opinion = parse_opinion opinion in
  385 + let neg = parse_negation [Text neg] in
  386 + let pred = parse_pred pred in
  387 + let aspect = parse_aspect [Text aspect] in
  388 + let schema = parse_simple_schema (split_text schema) in
  389 + let entry = opinion,neg,pred,aspect,schema in
  390 + Entries.add_inc entries pos lemma entry)
  391 +
  392 +let phrases = load_phrases "results/phrases.tab"
  393 +let entries = load_entries "results/entries.tab"
  394 +let schemata = load_schemata "results/schemata.tab"
534 395  
  396 +(*
535 397 let print_subjs () =
536 398 (* let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *)
537 399 let subjs = Xlist.fold Paths.walenty_filenames StringQMap.empty (fun subjs filename ->
... ... @@ -560,18 +422,4 @@ let print_ctrls () =
560 422 Printf.printf "%5d %s\n" v s)
561 423  
562 424 (* let _ = print_ctrls () *)
563   -
564   -(* Test parsowania *)
565   -(*let _ =
566   - let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in
567   - Xlist.iter Paths.walenty_filenames (fun filename ->
568   - print_endline filename;
569   - let frames = load_frames (Paths.walenty_path ^ filename) in
570   - StringMap.iter frames (fun _ l ->
571   - Xlist.iter l (fun (refl,opinion,negation,pred,aspect,schema) ->
572   -(* print_endline schema; *)
573   - ignore (parse_opinion opinion);
574   - ignore (parse_negation [Text negation]);
575   - ignore (parse_pred [Text pred]);
576   - ignore (parse_aspect [Text aspect]);
577   - ignore (expand_equivs_schema equivs (expand_subtypes subtypes (expand_schema expands (parse_schema schema)))))))*)*)
  425 +*)
... ...
walenty/ENIAMwalRealizations.ml
... ... @@ -17,31 +17,21 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -open ENIAMwalTypes
  20 +open ENIAMwalTypes2
21 21 open Xstd
22 22  
23 23 let rec expand_schema_morf expands = function
24   - PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[])
25   - | PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[])
26   - (* | PhraseAbbr(ComparP s,[]) -> PhraseAbbr(ComparP s,[Phrase(ComparNP(s,Str));Phrase(ComparPP(s))]) (* FIXME: albo do walTEI albo usunąć *) *)
  24 + PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[Phrase AdvP])
  25 + | PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[Phrase AdvP])
27 26 | PhraseAbbr(abbr,[]) -> (try PhraseAbbr(abbr,AbbrMap.find expands abbr) with Not_found -> failwith "expand_schema_morf")
28 27 | PhraseAbbr(abbr,morfs) -> PhraseAbbr(abbr,Xlist.map morfs (expand_schema_morf expands))
29 28 | LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_schema expands schema))
30   - (* | LexPhraseMode(mode,pos_lex,(restr,schema)) -> LexPhraseMode(mode,pos_lex,(restr,expand_schema expands schema)) *)
31 29 | morf -> morf
32 30  
33 31 and expand_schema expands schema =
34 32 Xlist.map schema (fun s ->
35 33 {s with morfs=Xlist.map s.morfs (expand_schema_morf expands)})
36 34  
37   -(* let get_mode = function
38   - Xp(m) -> m
39   - | Advp(m) -> m
40   - (* | ComparP prep -> "compar" *)
41   - | Nonch -> "nonch"
42   - | Distrp -> "distrp"
43   - | Possp -> "possp" *)
44   -
45 35 let rec expand_subtypes_morf subtypes = function
46 36 PhraseComp(comp_morf,(ctype,comps)) ->
47 37 let comps = if comps = [] then (try CompMap.find subtypes ctype with Not_found -> failwith "expand_subtypes_schema") else comps in
... ... @@ -50,18 +40,8 @@ let rec expand_subtypes_morf subtypes = function
50 40 | Ncp case -> NCP(case,ctype,comp)
51 41 | Prepncp(prep,case) -> PrepNCP(prep,case,ctype,comp)))
52 42 | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,expand_subtypes subtypes schema))]
53   - (* | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,pos_lex,(restr,expand_subtypes subtypes schema))] *)
54 43 | PhraseAbbr(abbr,morfs) ->
55   - (* let mode = get_mode abbr in *)
56 44 List.flatten (Xlist.map morfs (expand_subtypes_morf subtypes))
57   - (* Xlist.map morfs (function
58   - Phrase p -> PhraseMode(mode,p)
59   - | PhraseMode(_,p) -> PhraseMode(mode,p)
60   - | LexPhrase(pos_lex,r) -> LexPhraseMode(mode,pos_lex,r)
61   - | LexRPhrase(pos_lex,r) -> LexRPhraseMode(mode,pos_lex,r)
62   - | LexPhraseMode(m,pos_lex,r) -> LexPhraseMode(mode,pos_lex,r)
63   - | LexRPhraseMode(m,pos_lex,r) -> LexRPhraseMode(mode,pos_lex,r)
64   - | _ -> failwith "expand_subtypes_morf") *)
65 45 | E Null -> [E(NP(Str));E(NCP(Str,CompTypeUndef,CompUndef));E(CP(CompTypeUndef,CompUndef)); E(Or)]
66 46 | morf -> [morf]
67 47  
... ... @@ -74,7 +54,6 @@ let expand_equivs_phrase equivs = function
74 54 | PrepAdjP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepAdjP(prep,case))
75 55 | PrepNumP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNumP(prep,case))
76 56 | ComprepNP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComprepNP(prep))
77   - (* | ComparNP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparNP(prep,case)) *)
78 57 | ComparP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparP(prep))
79 58 | CP(ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> CP(ctype,Comp comp))
80 59 | NCP(case,ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> NCP(case,ctype,Comp comp))
... ... @@ -93,80 +72,27 @@ let rec expand_equivs_lex equivs = function
93 72  
94 73 let rec expand_equivs_morf equivs = function
95 74 Phrase phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> Phrase phrase)
96   - (* | PhraseMode(mode,phrase) -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> PhraseMode(mode,phrase)) *)
97 75 | E phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> E phrase)
98 76 | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
99   - (* | LexRPhrase(pos_lex,(restr,schema)) -> [LexRPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
100   - | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
101   - | LexRPhraseMode(mode,pos_lex,(restr,schema)) -> [LexRPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] *)
102   -(* | PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_equivs_morf equivs)))] *)
103 77 | morf -> failwith ("expand_equivs_morf: " ^ ENIAMwalStringOf.morf morf)
104 78  
105 79 and expand_equivs_schema equivs schema =
106 80 Xlist.map schema (fun s ->
107 81 {s with morfs=List.flatten (Xlist.map s.morfs (expand_equivs_morf equivs))})
108 82  
109   -
110   -let rec load_realizations_rec (expands,subtypes,equivs) found rev = function
111   - [] -> if rev <> [] || found <> [] then failwith "load_realizations_rec" else expands,subtypes,equivs
112   - | [Str.Text s; Str.Delim "-->"] :: l -> load_realizations_rec (expands,subtypes,equivs) ((s,rev) :: found) [] l
113   - | [Str.Delim " "; Str.Text s; Str.Delim "\t"; Str.Text t] :: l ->
114   - load_realizations_rec (expands,subtypes,equivs) found ((s,t) :: rev) l
115   - | [Str.Delim " "; Str.Text s] :: l ->
116   - load_realizations_rec (expands,subtypes,equivs) found ((s,"") :: rev) l
117   - | [Str.Delim "% "; Str.Text "Phrase types expand:"] :: l -> load_realizations_rec (found,subtypes,equivs) [] rev l
118   - | [Str.Delim "% "; Str.Text "Attributes subtypes:"] :: l -> load_realizations_rec (expands,found,equivs) [] rev l
119   - | [Str.Delim "% "; Str.Text "Attributes equivalents:"] :: l -> load_realizations_rec (expands,subtypes,found) [] rev l
120   -(* | [Str.Delim "% "; Str.Text s] :: l -> print_endline s; load_realizations_rec found rev l *)
121   - | [] :: l -> load_realizations_rec (expands,subtypes,equivs) found rev l
122   - | _ -> failwith "load_realizations_rec"
123   -
124   -(* let find_comprep_reqs compreps =
125   - Xlist.fold compreps StringMap.empty (fun comprep_reqs (s,l) ->
126   - let l = Xlist.map l (function
127   - LexPhrase(pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function
128   - _,Lexeme s -> StringSet.add set s
129   - | _ -> set)
130   - (* | LexPhraseMode(_,pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function
131   - _,Lexeme s -> StringSet.add set s
132   - | _ -> set) *)
133   - | morf -> failwith ("find_compreps_reqs: " ^ ENIAMwalStringOf.morf morf)) in
134   - if l = [] then failwith "find_compreps_reqs";
135   - StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union))) *)
136   -
137   -(* let create_comprep_dict compreps =
138   - Xlist.fold compreps StringMap.empty (fun compreps (s,l) ->
139   - Xlist.fold l compreps (fun compreps -> function
140   - LexPhrase([PREP _,_;SUBST _,lex],_) as morf ->
141   - let lexemes = get_lexemes lex in
142   - Xlist.fold lexemes compreps (fun compreps lexeme ->
143   - StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l))
144   - (* | LexPhraseMode("misc",[ADV grad,lex],restr) ->
145   - let morf = LexPhrase([ADV grad,lex],restr) in
146   - let lexemes = get_lexemes lex in
147   - Xlist.fold lexemes compreps (fun compreps lexeme ->
148   - StringMap.add_inc compreps lexeme ["adv",(s,morf)] (fun l -> ("adv",(s,morf)) :: l)) *)
149   - | LexPhrase([PREP _,_;NUM _,_;SUBST _,lex],_) as morf ->
150   - let lexemes = get_lexemes lex in
151   - Xlist.fold lexemes compreps (fun compreps lexeme ->
152   - StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l))
153   - | morf -> failwith ("create_comprep_dict: " ^ ENIAMwalStringOf.morf morf))) *)
154   -
155   -let load_realizations () =
156   - (* let lines = Str.split (Str.regexp "\n") (File.load_file realizations_filename) in
157   - let lines = Xlist.rev_map lines (fun line -> Str.full_split (Str.regexp "% \\|-->\\| \\|\t") line) in
158   - let expands,subtypes,equivs = load_realizations_rec ([],[],[]) [] [] lines in *)
159   - let subtypes = Xlist.fold ENIAMwalTEI.subtypes CompMap.empty (fun subtypes -> function
  83 +(* UWAGA: aktualnie equivs nie są wstawiane do expands *)
  84 +let load_realizations (expands,subtypes,equivs) =
  85 + let subtypes = Xlist.fold subtypes CompMap.empty (fun subtypes -> function
160 86 "int",l -> CompMap.add subtypes Int (List.flatten (Xlist.map l (fun v -> snd(ENIAMwalTEI.parse_comp v))))
161 87 | "rel",l -> CompMap.add subtypes Rel (List.flatten (Xlist.map l (fun v -> snd(ENIAMwalTEI.parse_comp v))))
162 88 | _ -> failwith "load_realizations 1") in
163   - let equivs = Xlist.fold ENIAMwalTEI.equivs StringMap.empty (fun equivs (k,l) -> StringMap.add equivs k (k :: l)) in
164   - let expands,compreps = Xlist.fold ENIAMwalTEI.expands (AbbrMap.empty,[]) (fun (expands, compreps) (id,k,l) ->
  89 + let equivs = Xlist.fold equivs StringMap.empty (fun equivs (k,l) -> StringMap.add equivs k (k :: l)) in
  90 + let expands,compreps = Xlist.fold expands (AbbrMap.empty,[]) (fun (expands, compreps) (id,k,l) ->
165 91 match k with
166 92 PhraseAbbr(Advp m,[]) -> AbbrMap.add expands (Advp m) l, compreps
167 93 | PhraseAbbr(Nonch,[]) -> AbbrMap.add expands Nonch l, compreps
168 94 | PhraseAbbr(Xp m,[]) -> AbbrMap.add expands (Xp m) (List.flatten (Xlist.map l (function
169   - PhraseAbbr(Advp m,[]) -> (try AbbrMap.find expands (Advp m) with Not_found -> [PhraseAbbr(Advp m,[])]) (* FIXME: zakładam, że advp się nie rozmnoży *)
  95 + PhraseAbbr(Advp m,[]) -> (try AbbrMap.find expands (Advp m) with Not_found -> [PhraseAbbr(Advp m,[])]) (* zakładam, że advp się nie rozmnoży *)
170 96 | morf -> [morf]))), compreps
171 97 | Phrase(ComprepNP s) -> expands, (s, l) :: compreps
172 98 | PhraseAbbr(Distrp,[]) -> AbbrMap.add expands Distrp l, compreps
... ... @@ -174,53 +100,4 @@ let load_realizations () =
174 100 | _ -> failwith "load_realizations 2") in
175 101 let compreps = Xlist.map compreps (fun (s,morfs) ->
176 102 s, List.flatten (List.flatten (Xlist.map morfs (fun morf -> Xlist.map (expand_subtypes_morf subtypes (expand_schema_morf expands morf)) (expand_equivs_morf equivs))))) in
177   - (* let comprep_reqs = find_comprep_reqs compreps in *)
178   - (* let compreps = create_comprep_dict compreps in *)
179   - expands,compreps,(*comprep_reqs,*)subtypes,equivs
180   -
181   -let expands,compreps,(*comprep_reqs,*)subtypes,equivs = load_realizations ()
182   -
183   -
184   -(* Wypisanie realizacji *)
185   -(* let _ =
186   - Xlist.iter ENIAMwalTEI.expands (fun (id,morf,l) ->
187   - Printf.printf "%d %s:\n" id (ENIAMwalStringOf.morf morf);
188   - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *)
189   -
190   -(* Wypisanie realizacji po przetworzeniu *)
191   -(* let _ =
192   - AbbrMap.iter expands (fun morf l ->
193   - Printf.printf "%s:\n" (ENIAMwalStringOf.phrase_abbr morf);
194   - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *)
195   -
196   -let has_realization = function
197   - PhraseAbbr _ -> true
198   - | PhraseComp _ -> true
199   - | _ -> false
200   -
201   -(* Wypisanie fraz, które podlegają rozwijaniu *)
202   -(*let _ =
203   - IntMap.iter ENIAMwalTEI.phrases (fun i morf ->
204   - if has_realization morf then
205   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *)
206   -
207   -let phrases =
208   - IntMap.map ENIAMwalTEI.phrases (fun morf ->
209   - let morf = expand_schema_morf expands morf in
210   - let morfs = expand_subtypes_morf subtypes morf in
211   - let morf = List.flatten (Xlist.map morfs (expand_equivs_morf equivs)) in
212   - morf)
213   -
214   -(* Wypisanie fraz, które podlegają rozwijaniu *)
215   -(* let _ =
216   - IntMap.iter phrases (fun i morf ->
217   - if has_realization morf then
218   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *)
219   -
220   -(* let test_phrases = [17088; 17133; 1642]
221   -let _ =
222   - Xlist.iter test_phrases (fun i ->
223   - let m1 = IntMap.find ENIAMwalTEI.phrases i in
224   - let m2 = IntMap.find phrases i in
225   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m1);
226   - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m2)) *)
  103 + expands,compreps,subtypes,equivs
... ...
walenty/ENIAMwalReduce.ml 0 → 100644
  1 +(*
  2 + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAMwalTypes
  21 +open Xstd
  22 +
  23 +(* let rec assign_pro_args schema =
  24 + Xlist.map schema (fun s ->
  25 + let morfs = match s.morfs with
  26 + (E p) :: l -> E Pro :: (E p) :: l
  27 + | [LexPhrase _] as morfs -> morfs
  28 + | [Phrase(FixedP _)] as morfs -> morfs
  29 + | [Phrase(Lex _)] as morfs -> morfs
  30 + (* | [Phrase Refl] as morfs -> morfs
  31 + | [Phrase Recip] as morfs -> morfs*)
  32 + | Phrase Null :: _ as morfs -> morfs
  33 + | Phrase Pro :: _ as morfs -> morfs
  34 + | morfs -> if s.gf <> SUBJ && s.cr = [] && s.ce = [] then (Phrase Null) :: morfs else (Phrase Pro) :: morfs in (* FIXME: ustalić czy są inne przypadki uzgodnienia *)
  35 + (* let morfs = assign_pro_args_lex morfs in *) (* bez pro wewnątrz leksykalizacji *)
  36 + {s with morfs=morfs}) *)
  37 +
  38 +(*let assign_pro_args_lex morfs =
  39 + Xlist.map morfs (function
  40 + Lex(morf,specs,lex,restr) -> LexN(morf,specs,lex,assign_pro_args_restr restr)
  41 + | LexNum(morf,lex1,lex2,restr) -> LexNum(morf,lex1,lex2,assign_pro_args_restr restr)
  42 + | LexCompar(morf,l) -> LexCompar(morf,make_gfs_lex l)
  43 + | morf -> morf)
  44 +
  45 + and assign_pro_args_restr = function
  46 + Natr -> Natr
  47 + | Ratr1 schema -> Ratr1(assign_pro_args schema)
  48 + | Atr1 schema -> Atr1(assign_pro_args schema)
  49 + | Ratr schema -> Ratr(assign_pro_args schema)
  50 + | Atr schema -> Atr(assign_pro_args schema)*)
  51 +
  52 +exception ImpossibleSchema
  53 +
  54 +let rec reduce_comp lexemes = function
  55 + Comp s -> if StringMap.mem lexemes s then Comp s else raise Not_found
  56 + | Zeby -> if StringMap.mem lexemes "żeby" || StringMap.mem lexemes "że" then Zeby else raise Not_found
  57 + | Gdy -> if StringMap.mem lexemes "gdy" || StringMap.mem lexemes "gdyby" then Gdy else raise Not_found
  58 + | CompUndef -> failwith "reduce_comp"
  59 +
  60 +let reduce_phrase comprep_reqs lexemes = function
  61 + | PrepNP(prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
  62 + | PrepAdjP(prep,case) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
  63 + | ComprepNP(prep) as phrase -> if Xlist.fold (try StringMap.find comprep_reqs prep with Not_found -> []) true (fun b s -> b && StringMap.mem lexemes s) then phrase else raise Not_found
  64 + | ComparP(prep) as phrase -> if StringMap.mem lexemes prep then phrase else raise Not_found
  65 + | CP(ctype,comp) -> CP(ctype,reduce_comp lexemes comp)
  66 + | NCP(case,ctype,comp) -> if StringMap.mem lexemes "to" then NCP(case,ctype,reduce_comp lexemes comp) else raise Not_found
  67 + | PrepNCP(prep,case,ctype,comp) -> if StringMap.mem lexemes prep && StringMap.mem lexemes "to" then PrepNCP(prep,case,ctype,reduce_comp lexemes comp) else raise Not_found
  68 + | SimpleLexArg(lemma,_) as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found
  69 + | LexArg(_,lemma,_) as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found
  70 + | FixedP lemma as phrase -> if StringMap.mem lexemes lemma then phrase else raise Not_found
  71 + | phrase -> phrase
  72 +
  73 +let rec reduce_morfs comprep_reqs lexemes = function
  74 + [] -> []
  75 + | morf :: l -> (try [reduce_phrase comprep_reqs lexemes morf] with Not_found -> []) @ reduce_morfs comprep_reqs lexemes l
  76 +
  77 +let rec reduce_schema2 comprep_reqs lexemes = function
  78 + [] -> []
  79 + | s :: l ->
  80 + let morfs = reduce_morfs comprep_reqs lexemes s.morfs in
  81 + if morfs = [] then reduce_schema2 comprep_reqs lexemes l else
  82 + {s with morfs=morfs} :: reduce_schema2 comprep_reqs lexemes l
  83 +
  84 +let rec reduce_schema comprep_reqs lexemes = function
  85 + [] -> []
  86 + | s :: l ->
  87 + let morfs = reduce_morfs comprep_reqs lexemes s.morfs in
  88 + if morfs = [] then raise ImpossibleSchema else
  89 + {s with morfs=morfs} :: reduce_schema comprep_reqs lexemes l
  90 +
  91 +(* let reduce_schema_frame lexemes = function
  92 + Frame(atrs,schema) -> Frame(atrs,reduce_schema lexemes schema)
  93 + (* | ComprepFrame(s,morfs) ->
  94 + let morfs = reduce_morfs lexemes morfs in
  95 + if morfs = [] then raise ImpossibleSchema else ComprepFrame(s,morfs)*)
  96 + | _ -> failwith "reduce_schema_frame" *)
  97 +
  98 +
  99 +let reduce_entries lexemes entries =
  100 + StringMap.map entries (fun entries ->
  101 + StringSet.fold lexemes StringMap.empty (fun reduced lemma ->
  102 + try StringMap.add reduced lemma (StringMap.find entries lemma)
  103 + with Not_found -> reduced))
... ...
walenty/ENIAMwalStringOf.ml
... ... @@ -17,26 +17,7 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
20   -open ENIAMwalTypes
21   -
22   -(*let rec token = function
23   - Text s -> s
24   - | Paren l -> "(" ^ String.concat "" (Xlist.map l token) ^ ")"
25   - | Bracet l -> "{" ^ String.concat "" (Xlist.map l token) ^ "}"
26   - | SqBra l -> "[" ^ String.concat "" (Xlist.map l token) ^ "]"
27   - | LParen -> "("
28   - | RParen -> ")"
29   - | LBracet -> "{"
30   - | RBracet -> "}"
31   - | LSqBra -> "["
32   - | RSqBra -> "]"
33   - | Semic -> ";"
34   - | Plus -> "+"
35   - | Comma -> ","
36   - | Quot -> "'"
37   -
38   -let token_list l =
39   - String.concat "" (Xlist.map l token)*)
  20 +open ENIAMwalTypes2
40 21  
41 22 let opinion = function
42 23 Pewny -> "pewny"
... ... @@ -87,10 +68,7 @@ let rec comp = function
87 68 let rec comp_type = function
88 69 Int -> "int"
89 70 | Rel -> "rel"
90   - (* | Sub -> "sub"
91   - | Coord -> "coord" *)
92 71 | CompTypeUndef -> "_"
93   - (* | CompTypeAgr -> "agr" *)
94 72  
95 73 let number = function
96 74 Number s -> s
... ... @@ -117,29 +95,16 @@ let acm = function
117 95 Acm s -> s
118 96 | AcmUndef -> "_"
119 97  
120   -(* let sem = function
121   - Sem -> "sem"
122   - | NoSem -> "nosem" *)
123   -
124   -(*let req = function
125   - Req -> ",req"
126   - | NReq -> ",nreq"
127   - | ReqUndef -> ""*)
128   -
129 98 let gf = function
130 99 SUBJ -> "subj"
131 100 | OBJ -> "obj"
132 101 | ARG -> "arg"(*""*)
133   - (* | CORE -> "core"
134   - | NOSEM -> "nosem"
135   - | NOGF -> "nogf"
136   - | ADJUNCT -> "adjunct"
137   - | RAISED -> "raised"
138   - | CLAUSE -> "clause"
139   - | SENTENCE -> "sentence" *)
140 102  
141 103 let pos = function
142 104 SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")"
  105 + | PPRON12(n,c) -> "PPRON12(" ^ number n ^ "," ^ case c ^ ")"
  106 + | PPRON3(n,c) -> "PPRON3(" ^ number n ^ "," ^ case c ^ ")"
  107 + | SIEBIE(c) -> "SIEBIE(" ^ case c ^ ")"
143 108 | PREP(c) -> "PREP(" ^ case c ^ ")"
144 109 | NUM(c,g,a) -> "NUM(" ^ case c ^ "," ^ gender g ^ "," ^ acm a ^ ")"
145 110 | ADJ(n,c,g,gr) -> "ADJ(" ^ number n ^ "," ^ case c ^ "," ^ gender g ^ "," ^ grad gr ^ ")"
... ... @@ -162,7 +127,6 @@ let phrase = function
162 127 | NumP(c) -> "nump(" ^ case c ^ ")"
163 128 | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")"
164 129 | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")"
165   - (* | ComparNP(prep,c) -> "comparnp(" ^ prep ^ "," ^ case c ^ ")" *)
166 130 | ComparP(prep) -> "comparp(" ^ prep ^ ")"
167 131 | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")"
168 132 | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
... ... @@ -172,11 +136,9 @@ let phrase = function
172 136 | FixedP s -> "fixed(" ^ s ^ ")"
173 137 | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")"
174 138 | Or -> "or"
175   - | Refl -> "refl"
176   - | Recip -> "recip"
177 139 | Qub -> "qub"
178   - | Pro -> "pro"
179   - | ProNG -> "prong"
  140 + (* | Pro -> "pro"
  141 + | ProNG -> "prong" *)
180 142 | Null -> "null"
181 143 | GerP c -> "gerp(" ^ case c ^ ")"
182 144 | PrepGerP(prep,c) -> "prepgerp(" ^ prep ^ "," ^ case c ^ ")"
... ... @@ -188,7 +150,6 @@ let phrase = function
188 150 let phrase_abbr = function
189 151 Xp(m) -> "xp(" ^ m ^ ")"
190 152 | Advp(m) -> "advp(" ^ m ^ ")"
191   - (* | ComparP prep -> "compar(" ^ prep ^ ")" *)
192 153 | Nonch -> "nonch"
193 154 | Distrp -> "distrp"
194 155 | Possp -> "possp"
... ... @@ -212,13 +173,7 @@ let restr = function
212 173 | Ratrs -> "ratrs"
213 174 | Atr1 -> "atr1"
214 175 | Ratr1 -> "ratr1"
215   -(* | Ratr1,s -> "ratr1(" ^ schema s ^ ")"
216   - | Atr1,s -> "atr1(" ^ schema s ^ ")"
217   - | Ratr,s -> "ratr(" ^ schema s ^ ")"
218   - | Atr,s -> "atr(" ^ schema s ^ ")"
219   - | Ratrs,s -> "ratrs(" ^ schema s ^ ")"
220   - *) | NoRestr -> ""
221   -(* | NoRestr,s -> schema s *)
  176 + | NoRestr -> ""
222 177  
223 178 let controllers l =
224 179 Xlist.map l (function
... ... @@ -230,42 +185,10 @@ let controllees l =
230 185 "1" -> "controllee"
231 186 | n -> "controllee" ^ n)
232 187  
233   -(*let lex_specs = function
234   - NSpecs num -> number num
235   - | AdvSpecs gr -> grad gr
236   - | AdjSpecs(num,g,gr) -> number num ^ "," ^ gender g ^ "," ^ grad gr
237   - | PpasSpecs(num,g,neg) -> number num ^ "," ^ gender g ^ "," ^ negation neg
238   - | PactSpecs(num,g,neg,r) -> number num ^ "," ^ gender g ^ "," ^ negation neg ^ "," ^ refl r
239   - | GerSpecs(num,neg,r) -> number num ^ "," ^ negation neg ^ "," ^ refl r
240   - | CSpecs(neg,r) -> negation neg ^ "," ^ refl r
241   - | NumSpecs g -> gender g
242   - | EmptySpecs -> ""*)
243   -
244   -let mood = function
245   - (*Mood*) s -> s
246   - (*| MoodUndef -> "_"*)
247   -
248   -let tense t = t
249   -
250   -let aux = function
251   - NoAux -> "-"
252   - | PastAux -> "aux-past"
253   - | FutAux -> "aux-fut"
254   - | ImpAux -> "aux-imp"
255   -
256   -let nsem = function
257   - Common s -> s
258   - | Time -> "time"
259   -
260   -(* let direction = function
261   - Forward -> "/"
262   - | Backward -> "\\"
263   - | Both -> "|" *)
264   -
265 188 let rec schema schema =
266 189 String.concat "+" (Xlist.map schema (fun s ->
267 190 String.concat "," (
268   - (if s.gf = ARG then [] else [gf s.gf])@
  191 + (if s.gf = ARG then [] else [gf s.gf])@s.mode@
269 192 (if s.role = "" then [] else [s.role])@
270 193 (if s.role_attr = "" then [] else [s.role_attr])@
271 194 (*s.sel_prefs@*)(controllers s.cr)@(controllees s.ce)) ^ "{" ^ String.concat ";" (Xlist.map s.morfs morf) ^ "}"))
... ... @@ -276,82 +199,38 @@ let rec schema schema =
276 199  
277 200 and morf = function
278 201 Phrase p -> phrase p
279   - (* | PhraseMode(m,p) -> m ^ ":" ^ phrase p *)
280 202 | E p -> "E(" ^ phrase p ^ ")"
281 203 | LexPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])"
282   - (* | LexRPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *)
283   - (* | LexPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *)
284   - (* | LexRPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *)
285 204 | PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]"
286 205 | PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]"
287   - (* | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")"
288   - | LexArg(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ le ^ ")" *)
289   - (* | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" *)
290 206 | MorfId id -> Printf.sprintf "id(%d)" id
291 207 | SimpleLexArg(le,p) -> "lex(" ^ le ^ "," ^ pos p ^ ")"
292 208 | LexArg(id,le,p) -> "lex(" ^ string_of_int id ^ "," ^ le ^ "," ^ pos p ^ ")"
293   -(* | LexRealization(mrf,le) -> "lex(" ^ morf mrf ^ "," ^ le ^ ")"*)
294   - (* | Raised(mrf1,dir,mrf2) -> "raised([" ^ String.concat ";" mrf1 ^ "]," ^ direction dir ^ "[" ^ String.concat ";" mrf2 ^ "])"
295   - | Multi l -> "multi(" ^ String.concat ";" (Xlist.map l phrase) ^ ")" *)
296   -
297   -(*and mode = function
298   - Mode(s,[]) -> s
299   - | Mode(s,l) -> s ^ "[" ^ "..."(*String.concat ";" (Xlist.map l morf)*) ^ "]"
300   -(* | Mod l -> "mod[...]" *)
301   - | Pron [] -> "pron"
302   - | Pron l -> "pron" ^ "[" ^ "..."(*String.concat ";" (Xlist.map l morf)*) ^ "]"
303   - | Misc -> "misc"*)
304   -
305   -let meaning m =
306   - m.name ^ "-" ^ m.variant
307 209  
308   -let frame_atrs = function
309   - DefaultAtrs(m,r,o,neg,p,a) -> Printf.sprintf "%s: %s: %s: %s: %s: %s" (String.concat "; " (Xlist.map m meaning)) (refl r) (opinion o) (negation neg) (pred p) (aspect a)
310   - | EmptyAtrs m -> Printf.sprintf "%s" (String.concat "; " (Xlist.map m meaning))
311   - | NounAtrs(m,nsyn,s(*,typ*)) -> Printf.sprintf "%s: %s: %s" (String.concat "; " (Xlist.map m meaning)) nsyn (nsem s) (*(String.concat ";" typ)*)
312   - | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> Printf.sprintf "%s: %s: %s" (String.concat "; " (Xlist.map m meaning)) (case c) adjsyn (*adjsem (String.concat ";" typ)*)
313   - | PersAtrs(m,le,neg,mo,t,au,a) -> Printf.sprintf "%s: %s: %s: %s: %s: %s: %s" (String.concat "; " (Xlist.map m meaning)) le (negation neg) (mood mo) (tense t) (aux au) (aspect a)
314   - | GerAtrs(m,le,neg,a) -> Printf.sprintf "%s: %s: %s: %s" (String.concat "; " (Xlist.map m meaning)) le (negation neg) (aspect a)
315   - | NonPersAtrs(m,le,role,role_attr,neg,a) -> Printf.sprintf "%s: %s: %s,%s: %s: %s" (String.concat "; " (Xlist.map m meaning)) le role role_attr (negation neg) (aspect a)
316   - | _ -> failwith "WalStringOf.frame_atrs"
  210 +let simple_morf = function
  211 + | MorfId id -> Printf.sprintf "%d" id
  212 + | _ -> failwith "ENIAMwalStringOf.simple_morf"
317 213  
318   -let frame lexeme = function
319   - Frame(atrs,s) ->
320   - Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema s)
321   - | SimpleLexEntry(le,p) ->
322   - Printf.sprintf "%s: %s" le p
323   - | LexEntry(id,le,p,r,s) ->
324   - Printf.sprintf "%d: %s: %s: %s: %s" id le p (restr r) (schema s)
325   - | ComprepNPEntry(le,r,s) ->
326   - Printf.sprintf "%s: %s: %s" le (restr r) (schema s)
327   - (* | LexFrame(id,p,r,s) ->
328   - Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema s)
329   - | ComprepFrame(le,p,r,s) ->
330   - Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema s) *)
331   -(* | FrameR(atrs,s) ->
332   - Printf.sprintf "%s: %s: %s" lexeme (frame_atrs atrs) (schema_role s)
333   - | LexFrameR(id,p,r,s) ->
334   - Printf.sprintf "%s: %s: %s: %s: %s" lexeme id (pos p) (restr r) (schema_role s)
335   - | ComprepFrameR(le,p,r,s) ->
336   - Printf.sprintf "%s: %s: %s: %s: %s" lexeme le (pos p) (restr r) (schema_role s)*)
337   -(* | _ -> failwith "WalStringOf.frame" *)
  214 +let rec simple_schema schema =
  215 + String.concat "+" (Xlist.map schema (fun s ->
  216 + String.concat "," (
  217 + (if s.gf = ARG then [] else [gf s.gf])@
  218 + s.mode@(controllers s.cr)@(controllees s.ce)) ^
  219 + "{" ^ String.concat ";" (Xlist.map s.morfs simple_morf) ^ "}"))
338 220  
339   -let entry = function
340   - Frame(atrs,s) ->
341   - Printf.sprintf "%s: %s: %s" "lexeme" (frame_atrs atrs) (schema s)
342   - | SimpleLexEntry(le,p) ->
343   - Printf.sprintf "%s: %s" le p
344   - | LexEntry(id,le,p,r,s) ->
345   - Printf.sprintf "%d: %s: %s: %s: %s" id le p (restr r) (schema s)
346   - | ComprepNPEntry(le,r,s) ->
347   - Printf.sprintf "%s: %s: %s" le (restr r) (schema s)
348 221  
349   -(* let fnum_frame lexeme = function
350   - fnum,Frame(atrs,s) ->
351   - Printf.sprintf "%d: %s: %s: %s" fnum lexeme (frame_atrs atrs) (schema s)
352   - | fnum,LexFrame(id,p,r,s) ->
353   - Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme id (pos p) (restr r) (schema s)
354   - | fnum,ComprepFrame(le,p,r,s) ->
355   - Printf.sprintf "%d: %s: %s: %s: %s: %s" fnum lexeme le (pos p) (restr r) (schema s) *)
  222 +let meaning m =
  223 + m.name ^ "-" ^ m.variant
356 224  
357   -let unparsed_frame lexeme (r,o,neg,p,a,s) = lexeme ^ " " ^ String.concat ": " [r;o;neg;p;a;s]
  225 +let lex_entry = function
  226 + SimpleLexEntry(le,p) ->
  227 + Printf.sprintf "lex(%s,%s)" le p
  228 + | LexEntry(id,le,p,NoRestr,s) ->
  229 + Printf.sprintf "lex(%d,%s,%s)\t%s" id le p (schema s)
  230 + | LexEntry(id,le,p,r,[]) ->
  231 + Printf.sprintf "lex(%d,%s,%s)\t%s" id le p (restr r)
  232 + | ComprepNPEntry(le,NoRestr,s) ->
  233 + Printf.sprintf "comprepnp(%s)\t%s" le (schema s)
  234 + | ComprepNPEntry(le,r,[]) ->
  235 + Printf.sprintf "comprepnp(%s)\t%s" le (restr r)
  236 + | _ -> failwith "ENIAMwalStringOf.lex_entry"
... ...
walenty/ENIAMwalTEI.ml
... ... @@ -18,7 +18,7 @@
18 18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19 *)
20 20  
21   -open ENIAMwalTypes
  21 +open ENIAMwalTypes2
22 22 open Xstd
23 23  
24 24 type id = {hash: bool; suffix: string; numbers: int list}
... ... @@ -261,6 +261,10 @@ let rec process_lex lex = function
261 261 (ENIAMwalStringOf.lex lemma) (ENIAMwalStringOf.lex numeral_lemma) in
262 262 failwith ("process_lex: " ^ s)
263 263  
  264 +(* UWAGA: refl_id może się zmienić wraz z wersją Walentego *)
  265 +let refl_id = 25
  266 +let refl_position = {empty_position with role="Lemma"; mode=["lemma"]; morfs=[MorfId refl_id]}
  267 +
264 268 let rec load_category = function
265 269 | F("category",Fs("category_def",x)) ->
266 270 (match x with
... ... @@ -321,7 +325,7 @@ and load_phrase mode = function
321 325 | Fs("advp", [e;F("category",Symbol a)]) -> mode:=a :: !mode; PhraseAbbr(Advp(a),[])
322 326 | Fs("nonch", []) -> mode:="nonch" :: !mode; PhraseAbbr(Nonch,[])
323 327 | Fs("or", []) -> Phrase Or
324   - | Fs("refl", []) -> Phrase Refl
  328 + | Fs("refl", []) -> mode:="refl" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[]))
325 329 | Fs("E", []) -> E Null
326 330 | Fs("lex", x) ->
327 331 let lex = Xlist.fold x empty_lex load_lex in
... ... @@ -330,7 +334,7 @@ and load_phrase mode = function
330 334 | Fs("fixed", [F("argument",a);F("string",TEIstring b)]) -> Phrase (FixedP((*snd (load_phrase a),*)b))
331 335 | Fs("possp", [e]) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[])
332 336 | Fs("possp", []) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[])
333   - | Fs("recip", []) -> Phrase Recip
  337 + | Fs("recip", []) -> mode:="recip" :: !mode; LexPhrase([QUB,Lexeme "się"],(Natr,[]))
334 338 | Fs("distrp", [e]) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[])
335 339 | Fs("distrp", []) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[])
336 340 | Fs("compar", [F("compar_category",Symbol value)]) -> Phrase(ComparP value)
... ... @@ -395,8 +399,7 @@ let parse_opinion = function
395 399  
396 400 let load_schema_info ent phrases (arg:schema) = function
397 401 | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
398   - | F("inherent_sie",Binary true) -> {arg with reflexiveMark = ReflTrue}
399   - | F("inherent_sie",Binary false) -> {arg with reflexiveMark = ReflFalse}
  402 + | F("inherent_sie",Binary b) -> {arg with reflexiveMark = b}
400 403 | F("aspect",Symbol aspect_value) -> {arg with aspect = parse_aspect aspect_value}
401 404 | Fset("aspect", []) -> arg
402 405 | F("negativity",Symbol negativity_value) -> {arg with negativity = parse_negation negativity_value}
... ... @@ -411,7 +414,7 @@ let load_schema_info ent phrases (arg:schema) = function
411 414  
412 415 let load_schema ent phrases = function
413 416 Fs("schema", schema) ->
414   - let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = ReflUndef; aspect = AspectUndef;
  417 + let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = false; aspect = AspectUndef;
415 418 negativity = NegationUndef; predicativity = PredUndef; positions = []; text_rep=""} in
416 419 let result = Xlist.fold schema result (load_schema_info ent phrases) in
417 420 result
... ... @@ -587,22 +590,6 @@ let load_rentry = function
587 590 id,morf,expansions
588 591 | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
589 592  
590   -let load_expands filename =
591   - begin
592   - match Xml.parse_file filename with
593   - Xml.Element("TEI", _,
594   - [Xml.Element("teiHeader",_,_) ;
595   - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
596   - List.rev (Xlist.rev_map entries load_rentry)
597   - | _ -> failwith "load_walenty"
598   - end
599   -
600   -
601   - (*let walenty = load_walenty Paths.walenty_filename *)
602   -let walenty,phrases = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml"
603   -
604   -(* let _ = Printf.printf "|phrases|=%d\n" (IntMap.size phrases) *)
605   -
606 593 let expands_supplement = [
607 594 (-2), PhraseAbbr(Nonch,[]), [
608 595 LexPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[]));
... ... @@ -615,7 +602,15 @@ let expands_supplement = [
615 602 LexPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[]))
616 603 ]]
617 604  
618   -let expands = expands_supplement @ load_expands "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml"
  605 +let load_expands filename =
  606 + begin
  607 + match Xml.parse_file filename with
  608 + Xml.Element("TEI", _,
  609 + [Xml.Element("teiHeader",_,_) ;
  610 + Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
  611 + expands_supplement @ List.rev (Xlist.rev_map entries load_rentry)
  612 + | _ -> failwith "load_walenty"
  613 + end
619 614  
620 615 let subtypes = [
621 616 "int",[
... ...
walenty/ENIAMwalTypes.ml
... ... @@ -42,6 +42,9 @@ type gf = SUBJ | OBJ | ARG
42 42  
43 43 type pos =
44 44 SUBST of number * case
  45 + | PPRON12 of number * case
  46 + | PPRON3 of number * case
  47 + | SIEBIE of case
45 48 | PREP of case
46 49 | NUM of case * gender * acm
47 50 | ADJ of number * case * gender * grad
... ... @@ -84,39 +87,13 @@ type phrase =
84 87 | PpasP of case
85 88 | PrepPpasP of string * case
86 89 | PactP of case
87   -
88   -type phrase_abbr =
89   - Xp of string
90   - | Advp of string
91   - | Nonch
92   - | Distrp
93   - | Possp
94   -
95   -type phrase_comp =
96   - Cp
97   - | Ncp of case
98   - | Prepncp of string * case
99   -
100   -type lex =
101   - Lexeme of string
102   - | ORconcat of lex list
103   - | ORcoord of lex list
104   - | XOR of lex list
105   - | Elexeme of gender
  90 + | SimpleLexArg of string * pos
  91 + | LexArg of int * string * pos
  92 + | E of phrase
  93 + | MorfId of int
106 94  
107 95 type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr
108 96  
109   -(*type lex_specs =
110   - NSpecs of number
111   - | AdvSpecs of grad
112   - | AdjSpecs of number * gender * grad
113   - | PpasSpecs of number * gender * negation
114   - | PactSpecs of number * gender * negation * refl
115   - | GerSpecs of number * negation * refl
116   - | CSpecs of negation * refl
117   - | NumSpecs of gender
118   - | EmptySpecs *)
119   -
120 97 type sel_prefs =
121 98 SynsetId of int
122 99 | Predef of string
... ... @@ -124,49 +101,11 @@ type sel_prefs =
124 101 | RelationRole of string * string * string (* relacji * rola * atrybut roli *)
125 102  
126 103 type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list;
127   - mode: string list; cr: string list; ce: string list; morfs: morf list}
128   -
129   -and morf =
130   - Phrase of phrase
131   - (* | PhraseMode of string * phrase *)
132   - | E of phrase
133   - | LexPhrase of (pos * lex) list * (restr * position list)
134   - (* | LexRPhrase of (pos * lex) list * (restr * position list) *)
135   - (* | LexPhraseMode of string * (pos * lex) list * (restr * position list) *)
136   - (* | LexRPhraseMode of string * (pos * lex) list * (restr * position list) *)
137   - | PhraseAbbr of phrase_abbr * morf list
138   - | PhraseComp of phrase_comp * (comp_type * comp list)
139   - (* | LexPhraseId of string * pos * lex
140   - | LexArg of string * pos * string *)
141   - | MorfId of int
142   -(* | LexRealization of morf * string*)
143   -(* | Raised of string list * direction * string list
144   - | Multi of phrase list*)
145   - | SimpleLexArg of string * pos
146   - | LexArg of int * string * pos
  104 + mode: string list; cr: string list; ce: string list; morfs: phrase list}
147 105  
148 106 let empty_position =
149 107 {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]}
150 108  
151   -type lex_record = {
152   - lex_argument: morf;
153   - lex_arguments: morf list;
154   - lex_lemma: lex;
155   - lex_numeral_lemma: lex;
156   - lex_mode: string list;
157   - lex_negation: negation;
158   - lex_degree: grad;
159   - lex_number: number;
160   - lex_reflex: refl;
161   - lex_gender: gender;
162   - lex_modification: restr * position list;
163   -}
164   -
165   -let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme "";
166   - lex_numeral_lemma=Lexeme ""; lex_mode=[]; lex_negation=NegationUndef;
167   - lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef;
168   - lex_gender=GenderUndef; lex_modification = Natr,[]}
169   -
170 109 type meaning = {mng_id: int;
171 110 name: string;
172 111 variant: string;
... ... @@ -211,21 +150,6 @@ end
211 150  
212 151 module EntrySet = Xset.Make(OrderedEntry)
213 152  
214   -module OrderedAbbr = struct
215   - type t = phrase_abbr
216   - let compare = compare
217   -end
218   -
219   -module AbbrMap = Xmap.Make(OrderedAbbr)
220   -
221   -module OrderedComp = struct
222   - type t = comp_type
223   - let compare = compare
224   -end
225   -
226   -(* module MorfSet = Xset.Make(OrderedMorf) *)
227   -module CompMap = Xmap.Make(OrderedComp)
228   -
229 153 let resource_path =
230 154 try Sys.getenv "ENIAM_RESOURCE_PATH"
231 155 with Not_found -> "/usr/share/eniam"
... ...
walenty/ENIAMwalTypes2.ml 0 → 100644
  1 +(*
  2 + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open Xstd
  21 +
  22 +type opinion = Dobry | Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny | OpinionUndef
  23 +type negation = Negation | Aff | NegationUndef | NegationNA
  24 +type pred = PredTrue | PredFalse | PredUndef | PredNA
  25 +type aspect = Aspect of string | AspectUndef | AspectNA
  26 +type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr
  27 +type comp = Comp of string | Zeby | Gdy | CompUndef
  28 +type comp_type = Int | Rel | CompTypeUndef
  29 +type number = Number of string | NumberUndef | NumberAgr
  30 +type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
  31 +type grad = Grad of string | GradUndef
  32 +type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
  33 +type acm = Acm of string | AcmUndef
  34 +
  35 +type gf = SUBJ | OBJ | ARG
  36 +
  37 +type pos =
  38 + SUBST of number * case
  39 + | PPRON12 of number * case
  40 + | PPRON3 of number * case
  41 + | SIEBIE of case
  42 + | PREP of case
  43 + | NUM of case * gender * acm
  44 + | ADJ of number * case * gender * grad
  45 + | ADV of grad
  46 + | GER of number * case * gender * aspect * negation * refl
  47 + | PACT of number * case * gender * aspect * negation * refl
  48 + | PPAS of number * case * gender * aspect * negation
  49 + | INF of aspect * negation * refl
  50 + | QUB
  51 + | COMPAR
  52 + | COMP of comp_type
  53 + | PERS of (*number * gender * aspect * person * *)negation * refl
  54 + | FIXED
  55 +
  56 +type phrase =
  57 + NP of case
  58 + | PrepNP of string * case
  59 + | AdjP of case
  60 + | PrepAdjP of string * case
  61 + | NumP of case
  62 + | PrepNumP of string * case
  63 + | ComprepNP of string
  64 + | ComparP of string (** case*)
  65 + | CP of comp_type * comp
  66 + | NCP of case * comp_type * comp
  67 + | PrepNCP of string * case * comp_type * comp
  68 + | InfP of aspect
  69 + | AdvP
  70 + | FixedP of string
  71 + | Num of case * acm
  72 + | Or
  73 + | Qub
  74 + (* | Pro
  75 + | ProNG *)
  76 + | Null
  77 + | GerP of case
  78 + | PrepGerP of string * case
  79 + | PpasP of case
  80 + | PrepPpasP of string * case
  81 + | PactP of case
  82 +
  83 +type phrase_abbr =
  84 + Xp of string
  85 + | Advp of string
  86 + | Nonch
  87 + | Distrp
  88 + | Possp
  89 +
  90 +type phrase_comp =
  91 + Cp
  92 + | Ncp of case
  93 + | Prepncp of string * case
  94 +
  95 +type lex =
  96 + Lexeme of string
  97 + | ORconcat of lex list
  98 + | ORcoord of lex list
  99 + | XOR of lex list
  100 + | Elexeme of gender
  101 +
  102 +type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr
  103 +
  104 +type sel_prefs =
  105 + SynsetId of int
  106 + | Predef of string
  107 + | RelationArgId of string * int (* nazwa relacji * id argumentu ramy *)
  108 + | RelationRole of string * string * string (* relacji * rola * atrybut roli *)
  109 +
  110 +type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list;
  111 + mode: string list; cr: string list; ce: string list; morfs: morf list}
  112 +
  113 +and morf =
  114 + Phrase of phrase
  115 + | E of phrase
  116 + | LexPhrase of (pos * lex) list * (restr * position list)
  117 + | PhraseAbbr of phrase_abbr * morf list
  118 + | PhraseComp of phrase_comp * (comp_type * comp list)
  119 + | MorfId of int
  120 + | SimpleLexArg of string * pos
  121 + | LexArg of int * string * pos
  122 +
  123 +let empty_position =
  124 + {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]}
  125 +
  126 +type lex_record = {
  127 + lex_argument: morf;
  128 + lex_arguments: morf list;
  129 + lex_lemma: lex;
  130 + lex_numeral_lemma: lex;
  131 + lex_mode: string list;
  132 + lex_negation: negation;
  133 + lex_degree: grad;
  134 + lex_number: number;
  135 + lex_reflex: refl;
  136 + lex_gender: gender;
  137 + lex_modification: restr * position list;
  138 +}
  139 +
  140 +let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme "";
  141 + lex_numeral_lemma=Lexeme ""; lex_mode=[]; lex_negation=NegationUndef;
  142 + lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef;
  143 + lex_gender=GenderUndef; lex_modification = Natr,[]}
  144 +
  145 +type meaning = {mng_id: int;
  146 + name: string;
  147 + variant: string;
  148 + plwnluid: int;
  149 + gloss: string}
  150 +
  151 +let empty_meaning = {mng_id = (-1);
  152 + name = "";
  153 + variant = "";
  154 + plwnluid = (-1);
  155 + gloss = ""}
  156 +
  157 +type schema = {sch_id: int; opinion: opinion; reflexiveMark: bool; aspect: aspect;
  158 + negativity: negation; predicativity: pred; positions: position list; text_rep: string}
  159 +
  160 +type lex_entry =
  161 + SimpleLexEntry of string * string
  162 + | LexEntry of int * string * string * restr * position list
  163 + | ComprepNPEntry of string * restr * position list
  164 +
  165 +
  166 +module OrderedEntry = struct
  167 + type t = lex_entry
  168 + let compare = compare
  169 +end
  170 +
  171 +module EntrySet = Xset.Make(OrderedEntry)
  172 +
  173 +module OrderedAbbr = struct
  174 + type t = phrase_abbr
  175 + let compare = compare
  176 +end
  177 +
  178 +module AbbrMap = Xmap.Make(OrderedAbbr)
  179 +
  180 +module OrderedComp = struct
  181 + type t = comp_type
  182 + let compare = compare
  183 +end
  184 +
  185 +module CompMap = Xmap.Make(OrderedComp)
  186 +
  187 +type example = {exm_id: int;
  188 + meaning: int;
  189 + phrases: (int * int * int) list;
  190 + sentence: string;
  191 + source: string;
  192 + opinion: opinion;
  193 + note: string}
  194 +
  195 +type argument = {arg_id: int;
  196 + role: string;
  197 + role_attribute: string;
  198 + sel_prefs: sel_prefs list}
  199 +
  200 +type frame = {frm_id: int;
  201 + opinion: string;
  202 + meanings: int list;
  203 + arguments: argument list}
  204 +
  205 +type connection = {argument: int;
  206 + phrases: (int * int list) list}
  207 +
  208 +type alternation = {schema: int; frame: int; connections: connection list}
  209 +
  210 +type entry = {ent_id: int;
  211 + status: string;
  212 + form_orth: string;
  213 + form_pos: string;
  214 + schemata: schema list;
  215 + examples: example list;
  216 + frames: frame list;
  217 + meanings: meaning list;
  218 + alternations: alternation list}
  219 +
  220 +let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
  221 + frames=[]; meanings=[]; alternations=[]}
... ...
walenty/entries.ml 0 → 100644
  1 +(*
  2 + * ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
  3 + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open Xstd
  21 +
  22 +let empty = StringMap.empty
  23 +
  24 +let add_inc pos_map pos lemma entry =
  25 + let lemma_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in
  26 + let lemma_map = StringMap.add_inc lemma_map lemma [entry] (fun l -> entry :: l) in
  27 + StringMap.add pos_map pos lemma_map
  28 +
  29 +let add_inc_list pos_map pos lemma entries =
  30 + let lemma_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in
  31 + let lemma_map = StringMap.add_inc lemma_map lemma entries (fun l -> entries @ l) in
  32 + StringMap.add pos_map pos lemma_map
  33 +
  34 +let flatten_map pos_map f =
  35 + StringMap.mapi pos_map (fun pos lemma_map ->
  36 + StringMap.mapi lemma_map (fun lemma entries ->
  37 + List.flatten (Xlist.rev_map entries (fun entry ->
  38 + f pos lemma entry))))
  39 +
  40 +let map2 pos_map f =
  41 + StringMap.mapi pos_map (fun pos lemma_map ->
  42 + StringMap.mapi lemma_map (fun lemma entries ->
  43 + f pos lemma entries))
  44 +
  45 +let iter pos_map f =
  46 + StringMap.iter pos_map (fun pos lemma_map ->
  47 + StringMap.iter lemma_map (fun lemma entries ->
  48 + Xlist.iter entries (fun entry ->
  49 + f pos lemma entry)))
  50 +
  51 +let fold pos_map s f =
  52 + StringMap.fold pos_map s (fun s pos lemma_map ->
  53 + StringMap.fold lemma_map s (fun s lemma entries ->
  54 + Xlist.fold entries s (fun s entry ->
  55 + f s pos lemma entry)))
... ...
walenty/makefile
... ... @@ -25,11 +25,12 @@ eniam-walenty.cma: $(SOURCES)
25 25 eniam-walenty.cmxa: $(SOURCES)
26 26 ocamlopt -linkall -a -o eniam-walenty.cmxa $(INCLUDES) $^
27 27  
28   -test: test.ml
29   - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
  28 +test: entries.ml ENIAMwalTypes.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml
  29 + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalParser.ml ENIAMwalReduce.ml test.ml
30 30  
31   -loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml
32   - $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml
  31 +loader: entries.ml ENIAMwalTypes2.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml ENIAMwalGenerate.ml
  32 + mkdir -p results
  33 + $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes2.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml ENIAMwalGenerate.ml
33 34  
34 35 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
35 36  
... ...
walenty/test.ml
... ... @@ -18,7 +18,7 @@
18 18 *)
19 19  
20 20 open Xstd
21   -
  21 +(*
22 22 let test_strings = [
23 23 ["chłopiec","subst"; "maić","fin"; "ulica","subst"; "kwiat","subst"]
24 24 ]
... ... @@ -50,3 +50,4 @@ let _ =
50 50 s := read_line ()
51 51 done;*)
52 52 ()
  53 +*)
... ...