Commit 1c6cd30492543c60379151bba86b8a8a1d084476

Authored by Wojciech Jaworski
1 parent edbd3bf6

uporządkowanie tei id

walenty/ENIAMwalConnect.ml
... ... @@ -18,123 +18,39 @@
18 18 *)
19 19  
20 20 open Xstd
21   -
22   -let rec parse_comp = function
23   - "int",[] -> ENIAMwalTypes.Int,[]
24   - | "rel",[] -> ENIAMwalTypes.Rel,[]
25   - | "int",l -> ENIAMwalTypes.Int, Xlist.map l (fun s -> ENIAMwalTypes.Comp s)
26   - | "rel",l -> ENIAMwalTypes.Rel, Xlist.map l (fun s -> ENIAMwalTypes.Comp s)
27   - | s,[] -> ENIAMwalTypes.CompTypeUndef,[ENIAMwalTypes.Comp s]
28   - | _ -> failwith "parse_comp"
29   -
30   -
31   -let rec morf_of_phrase = function
32   - NP c -> ENIAMwalTypes.Phrase (ENIAMwalTypes.NP(ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]))
33   - | PrepNP(prep,c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.PrepNP(ENIAMwalTypes.Sem,ENIAMwalParser.parse_prep [ENIAMwalTypes.Text prep],ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]))
34   - | AdjP c -> ENIAMwalTypes.Phrase (ENIAMwalTypes.AdjP(ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]))
35   - | PrepAdjP(prep,c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.PrepAdjP(ENIAMwalTypes.Sem,ENIAMwalParser.parse_prep [ENIAMwalTypes.Text prep],ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]))
36   - | ComprepNP prep -> ENIAMwalTypes.Phrase (ENIAMwalTypes.ComprepNP(ENIAMwalTypes.Sem,ENIAMwalParser.parse_prep [ENIAMwalTypes.Text prep]))
37   - | CP(co) -> ENIAMwalTypes.PhraseComp(ENIAMwalTypes.Cp,parse_comp co)
38   - | NCP(c,co) -> ENIAMwalTypes.PhraseComp(ENIAMwalTypes.Ncp(ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]),parse_comp co)
39   - | PrepNCP(prep,c,co) -> ENIAMwalTypes.PhraseComp(ENIAMwalTypes.Prepncp(ENIAMwalParser.parse_prep [ENIAMwalTypes.Text prep],ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]),parse_comp co)
40   - | InfP(a) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.InfP(ENIAMwalParser.parse_aspect [ENIAMwalTypes.Text a]))
41   - | XP(mode,phrases) -> ENIAMwalTypes.PhraseAbbr(ENIAMwalTypes.Xp(fst (ENIAMwalParser.parse_mode [ENIAMwalTypes.Text mode])), Xlist.map phrases morf_of_phrase)
42   - | AdvP mode -> ENIAMwalTypes.PhraseAbbr(ENIAMwalTypes.Advp(fst (ENIAMwalParser.parse_mode [ENIAMwalTypes.Text mode])), [])
43   - | NumP(c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.NumP(ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]))
44   - | PrepNumP(prep,c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.PrepNumP(ENIAMwalTypes.Sem,ENIAMwalParser.parse_prep [ENIAMwalTypes.Text prep],ENIAMwalParser.parse_case [ENIAMwalTypes.Text c]))
45   - | ComparP prep -> ENIAMwalTypes.PhraseAbbr (ENIAMwalTypes.ComparP(ENIAMwalParser.parse_prep [ENIAMwalTypes.Text prep]),[])
46   - | Nonch -> ENIAMwalTypes.PhraseAbbr (ENIAMwalTypes.Nonch,[])
47   - | Or -> ENIAMwalTypes.Phrase ENIAMwalTypes.Or
48   - | Refl -> ENIAMwalTypes.Phrase (ENIAMwalTypes.Lex "się")
49   - | Recip -> ENIAMwalTypes.Phrase (ENIAMwalTypes.Lex "się")
50   - | E -> ENIAMwalTypes.E ENIAMwalTypes.Null
51   - | DistrP -> ENIAMwalTypes.PhraseAbbr (ENIAMwalTypes.Distrp,[])
52   - | PossP -> ENIAMwalTypes.PhraseAbbr (ENIAMwalTypes.Possp,[])
53   - | FixedP(_,s) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.FixedP s)
54   - | Lex lex -> (*print_endline "lex";*) ENIAMwalTypes.Phrase (ENIAMwalTypes.Null) (* FIXME: ni *)
55   - | Null -> ENIAMwalTypes.Phrase (ENIAMwalTypes.Null)
56   - | _ -> failwith "morf_of_phrase"
57   -
58   -(* | GerP(c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.
59   - | PrepGerP(prep,c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.
60   - | PpasP(c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.
61   - | PrepPpasP(prep,c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.
62   - | PPact(c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.
63   - | PrepPactP(prep,c) -> ENIAMwalTypes.Phrase (ENIAMwalTypes.
64   - | Qub -> ENIAMwalTypes.Phrase (ENIAMwalTypes.*)
65   -
66   -
67 21 open ENIAMwalTypes
68 22  
69   -let process_phrases phrases =
70   - Xlist.fold phrases StringMap.empty (fun phrases (id,phrase) ->
71   - let id =
72   - match id with
73   - {hash=false; numbers=[(*_;_;_;*)id]; suffix="phr"} -> id
74   - | _ -> failwith "process_phrases" in
75   - StringMap.add phrases id phrase)
  23 +let process_morfs = function
  24 + MorfId id -> id
  25 + | _ -> failwith "process_morfs"
76 26  
77 27 let process_positions positions =
78 28 Xlist.fold positions StringMap.empty (fun positions position ->
79   - let id =
80   - match position.psn_id with
81   - {hash=false; numbers=[(*_;_;*)id]; suffix="psn"} -> id
82   - | _ -> failwith "process_positions" in
  29 + let id = position.psn_id in
83 30 let r,cr,ce = ENIAMwalParser.parse_roles (position.gf :: position.control) in
84   - let phrases = process_phrases position.phrases in
  31 + let phrases = List.rev (Xlist.rev_map position.morfs process_morfs) in
85 32 StringMap.add positions id (r,cr,ce,phrases))
86 33  
87 34 let process_schemata schemata =
88 35 Xlist.fold schemata StringMap.empty (fun schemata schema ->
89   - let id =
90   - match schema.sch_id with
91   - {hash=false; numbers=[(*_;*)id]; suffix="sch"} -> id
92   - | _ -> failwith "process_schemata" in
93   - let schema_atrs = DefaultAtrs([],
94   - ENIAMwalParser.parse_refl [Text schema.reflexiveMark],
95   - ENIAMwalParser.parse_opinion schema.opinion,
96   - ENIAMwalParser.parse_negation [Text schema.negativity],
97   - ENIAMwalParser.parse_pred [Text schema.predicativity],
98   - ENIAMwalParser.parse_aspect [Text schema.aspect]) in
  36 + let id = schema.sch_id in
  37 + let schema_atrs = DefaultAtrs([],schema.reflexiveMark,
  38 + schema.opinion, schema.negativity, schema.predicativity, schema.aspect) in
99 39 let positions = process_positions schema.positions in
100 40 StringMap.add schemata id (schema_atrs,positions))
101 41  
  42 +
  43 +
  44 +
102 45 let add_meanings meanings = function
103 46 DefaultAtrs(_,r,o,n,p,a) -> DefaultAtrs(meanings,r,o,n,p,a)
104 47 | _ -> failwith "add_meanings"
105 48  
106 49 let process_arguments arguments =
107 50 Xlist.fold arguments StringMap.empty (fun arguments argument ->
108   - let id =
109   - match argument.arg_id with
110   - {hash=false; numbers=[(*_;_;*)id]; suffix="arg"} -> id
111   - | _ -> failwith "process_arguments" in
  51 + let id = argument.arg_id in
112 52 StringMap.add arguments id (argument.role,argument.role_attribute,argument.sel_prefs))
113 53  
114   -let get_meaning_id meaning =
115   - match parse_full_id meaning with
116   - {hash=true; numbers=[_;id]; suffix="mng"} -> id
117   - | _ -> failwith "get_meaning_id"
118   -
119   -let get_schema_id alt =
120   - try
121   - match parse_full_id (List.hd ((List.hd alt.connections).phrases)) with
122   - {hash=true; numbers=[_;id;_;_]; suffix="phr"} -> id
123   - | _ -> failwith "get_schema_id 1"
124   - with _ -> failwith "get_schema_id 2"
125   -
126   -let get_frame_id alt =
127   - try
128   - match parse_full_id ((List.hd alt.connections).argument) with
129   - {hash=true; numbers=[_;id;_]; suffix="arg"} -> id
130   - | _ -> failwith "get_frame_id"
131   - with _ -> failwith "get_frame_id"
132   -
133   -let get_argument_id arg =
134   - match parse_full_id arg with
135   - {hash=true; numbers=[_;_;id]; suffix="arg"} -> id
136   - | _ -> failwith "get_argument_id"
137   -
138 54 let get_position_id phrases =
139 55 try
140 56 match parse_full_id (List.hd phrases) with
... ... @@ -154,8 +70,7 @@ let process_frames frames =
154 70 {hash=false; numbers=[(*_;*)id]; suffix="frm"} -> id
155 71 | _ -> failwith "process_frames" in
156 72 let arguments = process_arguments frame.arguments in
157   - let meaning_ids = Xlist.map frame.meanings get_meaning_id in
158   - StringMap.add frames id (meaning_ids,arguments))
  73 + StringMap.add frames id (frame.meanings,arguments))
159 74  
160 75 let process_meanings meanings =
161 76 Xlist.fold meanings StringMap.empty (fun meanings meaning ->
... ... @@ -175,23 +90,18 @@ let connect entry =
175 90 let frames = process_frames entry.frames in
176 91 let meanings = process_meanings entry.meanings in
177 92 Xlist.fold entry.alternations [] (fun found alt ->
178   - if alt.connections = [] then found else
179   - let schema_id = get_schema_id alt in
180   - let frame_id = get_frame_id alt in
181   - let schema_atrs,positions = StringMap.find schemata schema_id in
182   - let meaning_ids,arguments = StringMap.find frames frame_id in
  93 + let schema_atrs,positions = StringMap.find schemata alt.schema in
  94 + let meaning_ids,arguments = StringMap.find frames alt.frame in
183 95 let positions = Xlist.fold alt.connections [] (fun positions2 conn ->
184   - let argument_id = get_argument_id conn.argument in
185   - let position_id = get_position_id conn.phrases in
186   - let r,cr,ce,phrases = StringMap.find positions position_id in
187   - let phrases = Xlist.fold conn.phrases [] (fun phrases2 id ->
188   - let phrase_id = get_phrase_id id in
189   - try StringMap.find phrases phrase_id :: phrases2
190   - with Not_found -> (*Printf.printf "%s\n%!" entry.form_orth;*)phrases2) in
191   - let role,role_attribute,sel_prefs = StringMap.find arguments argument_id in
  96 + let role,role_attribute,sel_prefs = StringMap.find arguments conn.argument in
192 97 let sel_prefs = Xlist.map (List.flatten sel_prefs) process_sel_pref in
193   - {gf=r; role=role; role_attr=role_attribute; sel_prefs=sel_prefs;
194   - cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in
  98 + let positions2 = Xlist.fold conn.phrases positions2 (fun positions2 (position_id,phrase_ids) ->
  99 + let r,cr,ce,phrases = StringMap.find positions position_id in
  100 + let morfs = Xlist.fold phrase_ids [] (fun morfs phrase_id ->
  101 + try StringMap.find phrases phrase_id :: morfs
  102 + with Not_found -> Printf.printf "%s\n%!" entry.form_orth;morfs) in
  103 + {gf=r; role=role; role_attr=role_attribute; sel_prefs=sel_prefs;
  104 + cr=cr; ce=ce; dir=Both; morfs=List.rev morfs} :: positions2) in
195 105 let meanings = List.rev (Xlist.fold meaning_ids [] (fun l id ->
196 106 (StringMap.find meanings id) :: l)) in
197 107 let schema_atrs = add_meanings meanings schema_atrs in
... ... @@ -242,36 +152,6 @@ let sel_prefs_quantities walenty =
242 152 let quant = sel_prefs_quantities walenty in
243 153 print_stringqmap "results/quant_sel_prefs.txt" quant*)
244 154  
245   -let print_entry filename lex =
246   - match Xml.parse_file filename with
247   - Xml.Element("TEI", _,
248   - [Xml.Element("teiHeader",_,_) ;
249   - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
250   - Xlist.iter entries (function
251   - Xml.Element("entry",_,Xml.Element("form", [], [Xml.Element("orth",[],[Xml.PCData orth]);_]) :: xml :: _) ->
252   - if orth = lex then print_endline (Xml.to_string_fmt xml)
253   - | _ -> failwith "print_entry")
254   - | _ -> failwith "print_entry"
255   -
256   -
257   -(*let _ =
258   - print_entry walenty_filename "bębnić"*)
259   -
260   -let print_full_entry filename lex =
261   - match Xml.parse_file filename with
262   - Xml.Element("TEI", _,
263   - [Xml.Element("teiHeader",_,_) ;
264   - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
265   - Xlist.iter entries (function
266   - Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: _ :: l) ->
267   - let xml = Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: l) in
268   - if orth = lex then print_endline (Xml.to_string_fmt xml)
269   - | _ -> failwith "print_full_entry")
270   - | _ -> failwith "print_full_entry"
271   -
272   -(*let _ =
273   - print_full_entry walenty_filename "bębnić"*)
274   -
275 155 (*let _ =
276 156 let walenty = load_walenty2 () in
277 157 let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in
... ...
walenty/ENIAMwalStringOf.ml
... ... @@ -267,7 +267,7 @@ let rec schema schema =
267 267 (if s.gf = ARG then [] else [gf s.gf])@
268 268 (if s.role = "" then [] else [s.role])@
269 269 (if s.role_attr = "" then [] else [s.role_attr])@
270   - s.sel_prefs@(controllers s.cr)@(controllees s.ce)) ^ direction s.dir ^ "{" ^ String.concat ";" (Xlist.map s.morfs (fun (_,m) -> morf m)) ^ "}"))
  270 + s.sel_prefs@(controllers s.cr)@(controllees s.ce)) ^ direction s.dir ^ "{" ^ String.concat ";" (Xlist.map s.morfs morf) ^ "}"))
271 271  
272 272 (*and schema_role schema =
273 273 String.concat "+" (Xlist.map schema (fun (r,role,cr,ce,morfs) ->
... ... @@ -277,11 +277,13 @@ and morf = function
277 277 Phrase p -> phrase p
278 278 | E p -> "E(" ^ phrase p ^ ")"
279 279 | LexPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])"
  280 + | LexRPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])"
280 281 | LexPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])"
281 282 | PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]"
282 283 | PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]"
283 284 | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")"
284 285 | LexArg(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ le ^ ")"
  286 + | MorfId id -> Printf.sprintf "id(%d)" id
285 287 (* | LexRealization(mrf,le) -> "lex(" ^ morf mrf ^ "," ^ le ^ ")"*)
286 288 (* | Raised(mrf1,dir,mrf2) -> "raised([" ^ String.concat ";" mrf1 ^ "]," ^ direction dir ^ "[" ^ String.concat ";" mrf2 ^ "])"
287 289 | Multi l -> "multi(" ^ String.concat ";" (Xlist.map l phrase) ^ ")" *)
... ...
walenty/ENIAMwalTEI.ml
... ... @@ -19,6 +19,11 @@
19 19 *)
20 20  
21 21 open ENIAMwalTypes
  22 +open Xstd
  23 +
  24 +type id = {hash: bool; suffix: string; numbers: int list}
  25 +
  26 +let empty_id = {hash = false; suffix = ""; numbers = []}
22 27  
23 28 let parse_id s =
24 29 if String.length s = 0 then empty_id else
... ... @@ -35,17 +40,6 @@ let parse_id s =
35 40 let string_of_id id =
36 41 (if id.hash then "#" else "") ^ "wal_" ^ (String.concat "." (Xlist.map id.numbers string_of_int)) ^ "-" ^ id.suffix
37 42  
38   -(*let parse_id s =
39   - if String.length s = 0 then empty_id else
40   - if String.length s < 6 then failwith "za krótkie id" else
41   - let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in
42   - if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else
43   - let s,suf = match Str.split (Str.regexp "-") s with
44   - [s;suf] -> s,suf
45   - | _ -> failwith "zła ilość '-'" in
46   - let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in
47   - {id with numbers = [last id.numbers]}*)
48   -
49 43 type tei =
50 44 Symbol of string
51 45 | TEIstring of string
... ... @@ -208,7 +202,6 @@ let load_ctype = function
208 202 | _ -> failwith "load_ctype")
209 203 | l -> failwith ("load_ctype 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))
210 204 | xml -> failwith ("load_ctype:\n " ^ tei_to_string xml)
211   -(*Printf.printf "%s\n" (tei_to_string xml)*)
212 205  
213 206 let load_lemmas_set = function
214 207 | TEIstring mstring -> mstring
... ... @@ -244,11 +237,11 @@ let process_lex_phrase lemma = function
244 237 (ENIAMwalStringOf.gender gender) (ENIAMwalStringOf.grad grad) (ENIAMwalStringOf.negation negation) (ENIAMwalStringOf.refl reflex); []
245 238  
246 239 let new_schema r cr ce morfs =
247   - {psn_id=empty_id; gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=morfs}
  240 + {psn_id=(-1); gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=morfs}
248 241  
249 242 let rec process_lex lex = function
250 243 | PhraseAbbr(ComparP prep,[]),arguments,Lexeme "",Lexeme "" ->
251   - LexPhrase([COMPAR,Lexeme prep],(Ratrs,Xlist.map arguments (fun morf -> new_schema ARG [] [] [empty_id,morf])))
  244 + LexPhrase([COMPAR,Lexeme prep],(Ratrs,Xlist.map arguments (fun morf -> new_schema ARG [] [] [morf])))
252 245 | PhraseAbbr(Xp mode,[argument]),_,_,_ ->
253 246 let lex = {lex with lex_argument=argument} in
254 247 (match process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma) with
... ... @@ -277,21 +270,21 @@ let rec load_category = function
277 270 (match x with
278 271 | [F("name",Symbol value)] -> value, []
279 272 | [F("name",Symbol value);Fset("constraints",set)] ->
280   - value, List.rev (Xlist.rev_map set (fun s -> snd (load_phrase s)))
  273 + value, List.rev (Xlist.rev_map set load_phrase)
281 274 | l -> failwith ("load_category 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))
282 275 | xml -> failwith ("load_category:\n " ^ tei_to_string xml)
283 276  
284 277 and load_modification_def = function (*pomocnicza do load_lex *)
285 278 | [F("type",Symbol value)] -> parse_restr value, []
286 279 | [F("type",Symbol value); Fset("positions",set)] ->
287   - parse_restr value, List.rev (Xlist.rev_map set load_position)
  280 + parse_restr value, List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty)))
288 281 | x -> Printf.printf "%s\n" (tei_to_string (List.hd x));
289 282 failwith "load_modification_def:\n"
290 283  
291 284 and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*)
292   - | F("argument",set) -> {arg with lex_argument = snd (load_phrase set)}
  285 + | F("argument",set) -> {arg with lex_argument = load_phrase set}
293 286 | Fset("arguments",set) ->
294   - {arg with lex_arguments=List.rev (Xlist.fold set [] (fun l s -> (snd (load_phrase s)) :: l))}
  287 + {arg with lex_arguments=List.rev (Xlist.rev_map set load_phrase)}
295 288 | F("modification",Fs("modification_def",x)) -> {arg with lex_modification = load_modification_def x}
296 289 | F("lemma",Fs("lemma_def",[F("selection_mode",Symbol value1);
297 290 F("cooccurrence",Symbol value2);
... ... @@ -312,68 +305,74 @@ and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*)
312 305 Printf.printf "%s\n" (tei_to_string xml);
313 306 failwith "load_lex:\n "
314 307  
315   -and load_phrase xml:id * morf =
316   - let id, idtype, x =
317   - match xml with
318   - | Fs(_idtype,Id _id :: _x) -> (_id, _idtype, _x)
319   - | Fs(_idtype, _x) -> (empty_id, _idtype, _x)
320   - | _ -> failwith "load_phrase let id,idtype...\n" in
321   - match idtype, x with
322   - | "np",[F("case",Symbol a)] -> id, Phrase (NP(parse_case a));
323   - | "prepnp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepNP(a, parse_case b))
324   - | "adjp", [F("case",Symbol a)] -> id, Phrase (AdjP(parse_case a))
325   - | "prepadjp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepAdjP(a, parse_case b))
326   - | "comprepnp", [e;F("complex_preposition",TEIstring a)] -> id, Phrase (ComprepNP(a))
327   - | "comprepnp", [F("complex_preposition",TEIstring a)] -> id, Phrase (ComprepNP(a))
328   - | "cp", [a] -> id, PhraseComp(Cp,load_ctype a)
329   - | "ncp", [F("case",Symbol a);b] -> id, PhraseComp(Ncp(parse_case a),load_ctype b)
330   - | "prepncp", [F("preposition",Symbol a);F("case",Symbol b);c] -> id, PhraseComp(Prepncp(a, parse_case b),load_ctype c)
331   - | "infp", [F("aspect",Symbol a)] -> id, Phrase (InfP(parse_aspect a))
332   - | "xp", [a] -> let x,y = load_category a in id, PhraseAbbr(Xp x,y)
333   - | "xp", [e;a] -> let x,y = load_category a in id, PhraseAbbr(Xp x,y)
334   - | "advp", [F("category",Symbol a)] -> id, PhraseAbbr(Advp(a),[])
335   - | "advp", [e;F("category",Symbol a)] -> id, PhraseAbbr(Advp(a),[])
336   - | "nonch", [] -> id, PhraseAbbr(Nonch,[])
337   - | "or", [] -> id, Phrase Or
338   - | "refl", [] -> id, Phrase Refl
339   - | "E", [] -> id, E Null
340   - | "lex", x ->
  308 +and load_phrase = function
  309 + | Fs("np",[F("case",Symbol a)]) -> Phrase (NP(parse_case a));
  310 + | Fs("prepnp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNP(a, parse_case b))
  311 + | Fs("adjp", [F("case",Symbol a)]) -> Phrase (AdjP(parse_case a))
  312 + | Fs("prepadjp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepAdjP(a, parse_case b))
  313 + | Fs("comprepnp", [e;F("complex_preposition",TEIstring a)]) -> Phrase (ComprepNP(a))
  314 + | Fs("comprepnp", [F("complex_preposition",TEIstring a)]) -> Phrase (ComprepNP(a))
  315 + | Fs("cp", [a]) -> PhraseComp(Cp,load_ctype a)
  316 + | Fs("ncp", [F("case",Symbol a);b]) -> PhraseComp(Ncp(parse_case a),load_ctype b)
  317 + | Fs("prepncp", [F("preposition",Symbol a);F("case",Symbol b);c]) -> PhraseComp(Prepncp(a, parse_case b),load_ctype c)
  318 + | Fs("infp", [F("aspect",Symbol a)]) -> Phrase (InfP(parse_aspect a))
  319 + | Fs("xp", [a]) -> let x,y = load_category a in PhraseAbbr(Xp x,y)
  320 + | Fs("xp", [e;a]) -> let x,y = load_category a in PhraseAbbr(Xp x,y)
  321 + | Fs("advp", [F("category",Symbol a)]) -> PhraseAbbr(Advp(a),[])
  322 + | Fs("advp", [e;F("category",Symbol a)]) -> PhraseAbbr(Advp(a),[])
  323 + | Fs("nonch", []) -> PhraseAbbr(Nonch,[])
  324 + | Fs("or", []) -> Phrase Or
  325 + | Fs("refl", []) -> Phrase Refl
  326 + | Fs("E", []) -> E Null
  327 + | Fs("lex", x) ->
341 328 let lex = Xlist.fold x empty_lex load_lex in
342   - id, process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)
343   - | "fixed", [F("argument",a);F("string",TEIstring b)] -> id, Phrase (FixedP((*snd (load_phrase a),*)b))
344   - | "possp", [e] -> id, PhraseAbbr(Possp,[])
345   - | "possp", [] -> id, PhraseAbbr(Possp,[])
346   - | "recip", [] -> id, Phrase Recip
347   - | "distrp", [e] -> id, PhraseAbbr(Distrp,[])
348   - | "distrp", [] -> id, PhraseAbbr(Distrp,[])
349   - | "compar", [F("compar_category",Symbol value)] -> id, PhraseAbbr(ComparP value,[])
350   - | "gerp", [F("case",Symbol a)] -> id, Phrase (GerP(parse_case a))
351   - | "prepgerp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepGerP(a, parse_case b))
352   - | "nump", [F("case",Symbol a)] -> id, Phrase (NumP(parse_case a))
353   - | "prepnump", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepNumP(a, parse_case b))
354   - | "ppasp", [F("case",Symbol a)] -> id, Phrase (PpasP(parse_case a))
355   - | "prepppasp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepPpasP(a, parse_case b))
356   - | "qub", [] -> id, Phrase Qub
357   - | "pactp", [F("case",Symbol a)] -> id, Phrase (PactP(parse_case a))
358   - | "adverb",[F("adverb",Symbol s)] -> id, LexPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[]))
359   - | _ -> failwith ("load_phrase match:\n " ^ tei_to_string xml)
  329 + process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)
  330 + | Fs("fixed", [F("argument",a);F("string",TEIstring b)]) -> Phrase (FixedP((*snd (load_phrase a),*)b))
  331 + | Fs("possp", [e]) -> PhraseAbbr(Possp,[])
  332 + | Fs("possp", []) -> PhraseAbbr(Possp,[])
  333 + | Fs("recip", []) -> Phrase Recip
  334 + | Fs("distrp", [e]) -> PhraseAbbr(Distrp,[])
  335 + | Fs("distrp", []) -> PhraseAbbr(Distrp,[])
  336 + | Fs("compar", [F("compar_category",Symbol value)]) -> PhraseAbbr(ComparP value,[])
  337 + | Fs("gerp", [F("case",Symbol a)]) -> Phrase (GerP(parse_case a))
  338 + | Fs("prepgerp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepGerP(a, parse_case b))
  339 + | Fs("nump", [F("case",Symbol a)]) -> Phrase (NumP(parse_case a))
  340 + | Fs("prepnump", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNumP(a, parse_case b))
  341 + | Fs("ppasp", [F("case",Symbol a)]) -> Phrase (PpasP(parse_case a))
  342 + | Fs("prepppasp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepPpasP(a, parse_case b))
  343 + | Fs("qub", []) -> Phrase Qub
  344 + | Fs("pactp", [F("case",Symbol a)]) -> Phrase (PactP(parse_case a))
  345 + | Fs("adverb",[F("adverb",Symbol s)]) -> LexRPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[]))
  346 + | xml -> failwith ("load_phrase match:\n " ^ tei_to_string xml)
  347 +
  348 +and load_phrase_id ent sch psn phrases = function
  349 + | Fs(morf,Id{hash=false; numbers=[ent_id;sch_id;psn_id;id]; suffix="phr"} :: l) ->
  350 + if ent_id = ent && sch_id = sch && psn_id = psn then
  351 + let morf = load_phrase (Fs(morf, l)) in
  352 + phrases := IntMap.add_inc (!phrases) id morf (fun morf2 -> if morf = morf2 then morf else failwith "load_phrase_id");
  353 + MorfId id
  354 + else failwith (Printf.sprintf "load_phrase %d %d" ent ent_id)
  355 + | Fs(morf, l) -> load_phrase (Fs(morf, l))
  356 + | _ -> failwith "load_phrase_id"
360 357  
361 358  
362 359 and load_control arg = function
363 360 | Symbol value -> parse_control arg value
364 361 | xml -> failwith ("load_control:\n " ^ tei_to_string xml)
365 362  
366   -and load_position_info arg = function
  363 +and load_position_info ent sch phrases arg = function
367 364 | F("function",Symbol value) -> {arg with gf = parse_gf value}
368 365 | Fset("phrases",phrases_set) ->
369   - {arg with morfs = List.rev (Xlist.rev_map phrases_set load_phrase)}
  366 + {arg with morfs = List.rev (Xlist.rev_map phrases_set (load_phrase_id ent sch arg.psn_id phrases))}
370 367 | Fset("control",control_set) -> Xlist.fold control_set arg load_control
371   - | Id id -> {arg with psn_id=id}
  368 + | Id{hash=false; numbers=[ent_id;sch_id;id]; suffix="psn"} ->
  369 + if ent_id = ent && sch_id = sch then {arg with psn_id = id}
  370 + else failwith (Printf.sprintf "load_position_info %d %d" ent ent_id)
372 371 | xml -> failwith ("load_position_info:\n " ^ tei_to_string xml)
373 372  
374   -and load_position = function
  373 +and load_position ent sch phrases = function
375 374 | Fs("position", listt) ->
376   - Xlist.fold listt empty_position load_position_info
  375 + Xlist.fold listt empty_position (load_position_info ent sch phrases)
377 376 | xml -> failwith ("load_position:\n " ^ tei_to_string xml)
378 377  
379 378 let parse_opinion = function
... ... @@ -392,7 +391,7 @@ let parse_opinion = function
392 391 | "dobry" -> Dobry
393 392 | x -> failwith ("parse_opinion: " ^ x)
394 393  
395   -let load_schema_info ent (arg:schema) = function
  394 +let load_schema_info ent phrases (arg:schema) = function
396 395 | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
397 396 | F("inherent_sie",Binary true) -> {arg with reflexiveMark = ReflTrue}
398 397 | F("inherent_sie",Binary false) -> {arg with reflexiveMark = ReflFalse}
... ... @@ -403,73 +402,79 @@ let load_schema_info ent (arg:schema) = function
403 402 | F("predicativity",Binary true) -> {arg with predicativity = PredTrue}
404 403 | F("predicativity",Binary false) -> {arg with predicativity = PredFalse}
405 404 | Fset("positions", positions) ->
406   - {arg with positions = List.rev (Xlist.rev_map positions load_position)}
  405 + {arg with positions = List.rev (Xlist.rev_map positions (load_position ent arg.sch_id phrases))}
407 406 | F("text_rep",TEIstring text_rep) -> {arg with text_rep = text_rep}
408   - (* | Id id -> {arg with sch_id = id} *)
409 407 | Id{hash=false; numbers=[ent_id;id]; suffix="sch"} -> if ent_id = ent then {arg with sch_id = id} else failwith (Printf.sprintf "load_schema_info %d %d" ent ent_id)
410 408 | xml -> failwith ("load_schema_info\n " ^ tei_to_string xml)
411 409  
412   -let load_schema ent = function
  410 +let load_schema ent phrases = function
413 411 Fs("schema", schema) ->
414 412 let result = {sch_id = (-1); opinion = OpinionUndef; reflexiveMark = ReflUndef; aspect = AspectUndef;
415 413 negativity = NegationUndef; predicativity = PredUndef; positions = []; text_rep=""} in
416   - let result = Xlist.fold schema result (load_schema_info ent) in
  414 + let result = Xlist.fold schema result (load_schema_info ent phrases) in
417 415 result
418 416 | xml -> failwith ("load_schema:\n " ^ tei_to_string xml)
419 417  
420   -let load_phrases_set = function
421   - | SameAs(same_as,"phrase") -> {same_as with numbers = List.tl same_as.numbers}
  418 +let load_phrases_set ent = function
  419 + | SameAs({hash=true; numbers=[ent_id;sch_id;psn_id;phr_id]; suffix="phr"},"phrase") ->
  420 + if ent_id <> ent then failwith (Printf.sprintf "load_phrases_set %d %d" ent ent_id) else
  421 + sch_id,psn_id,phr_id
422 422 | xml -> failwith ("load_phrases_set :\n " ^ tei_to_string xml)
423 423  
424 424 let load_example_info ent arg = function
425   - | F("meaning",SameAs(same_as,"lexical_unit")) -> {arg with meaning = same_as}
  425 + | F("meaning",SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit")) ->
  426 + if ent_id = ent then {arg with meaning = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)
426 427 | Fset("phrases",phrases_set) ->
427   - {arg with phrases = List.rev (Xlist.rev_map phrases_set load_phrases_set)}
  428 + {arg with phrases = List.rev (Xlist.rev_map phrases_set (load_phrases_set ent))}
428 429 | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string}
429 430 | F("source",Symbol source_value) -> {arg with source = source_value}
430 431 | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
431 432 | F("note",TEIstring note_string) -> {arg with note = note_string}
432   - (* | Id id -> {arg with exm_id = id} *)
433 433 | Id{hash=false; numbers=[ent_id;id]; suffix="exm"} -> if ent_id = ent then {arg with exm_id = id} else failwith (Printf.sprintf "load_example_info %d %d" ent ent_id)
434 434 | xml -> failwith ("load_example_info :\n " ^ tei_to_string xml)
435 435  
436 436 let load_example ent = function
437 437 | Fs("example",example_elements) ->
438   - let result = {exm_id = (-1); meaning = empty_id; phrases = []; sentence = "";
  438 + let result = {exm_id = (-1); meaning = (-1); phrases = []; sentence = "";
439 439 source = ""; opinion = OpinionUndef; note = "";} in
440 440 let result = Xlist.fold example_elements result (load_example_info ent) in
441 441 result
442 442 | xml -> failwith ("load_example :\n " ^ tei_to_string xml)
443 443  
444   -let load_self_prefs_sets = function
  444 +let load_self_prefs_sets ent = function
445 445 | Numeric value -> NumericP(value)
446 446 | Symbol value -> SymbolP(value)
447   - | Fs("relation",[F("type",Symbol value);F("to",SameAs(same_as, "argument"))]) ->
448   - RelationP(value,same_as)
  447 + | Fs("relation",[F("type",Symbol value);F("to",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"}, "argument"))]) ->
  448 + if ent_id <> ent then failwith (Printf.sprintf "load_self_prefs_sets %d %d" ent ent_id)
  449 + else RelationP(value,frm_id,arg_id)
449 450 | xml -> failwith ("load_self_prefs_sets :\n " ^ tei_to_string xml)
450 451  
451   -let load_argument_self_prefs = function
  452 +let load_argument_self_prefs ent = function
452 453 | Fset(name,self_prefs_set) ->
453   - List.rev (Xlist.rev_map self_prefs_set load_self_prefs_sets)
  454 + List.rev (Xlist.rev_map self_prefs_set (load_self_prefs_sets ent))
454 455 | xml -> failwith ("load_argument_self_prefs :\n " ^ tei_to_string xml)
455 456  
456   -let load_argument_info arg = function
  457 +let load_argument_info ent frm arg = function
457 458 | F("role",Symbol value) -> {arg with role = value}
458 459 | F("role_attribute",Symbol value) -> {arg with role_attribute = value}
459 460 | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) ->
460   - {arg with sel_prefs = List.rev (Xlist.rev_map self_prefs load_argument_self_prefs)}
461   - | Id id -> {arg with arg_id = id}
  461 + {arg with sel_prefs = List.rev (Xlist.rev_map self_prefs (load_argument_self_prefs ent))}
  462 + (* | Id id -> {arg with arg_id = id} *)
  463 + | Id{hash=false; numbers=[ent_id;frm_id;id]; suffix="arg"} ->
  464 + if ent_id = ent && frm_id = frm then {arg with arg_id = id}
  465 + else failwith (Printf.sprintf "load_argument_info %d %d" ent ent_id)
462 466 | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml)
463 467  
464   -let load_arguments_set = function
  468 +let load_arguments_set ent frm = function
465 469 | Fs("argument", info) ->
466   - let result = {arg_id = empty_id; role = ""; role_attribute = ""; sel_prefs = []} in
467   - let result = Xlist.fold info result load_argument_info in
  470 + let result = {arg_id = (-1); role = ""; role_attribute = ""; sel_prefs = []} in
  471 + let result = Xlist.fold info result (load_argument_info ent frm) in
468 472 result
469 473 | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml)
470 474  
471   -let load_meanings_set = function
472   - | SameAs(same_as,"lexical_unit") -> same_as
  475 +let load_meanings_set ent = function
  476 + | SameAs({hash=true; numbers=[ent_id;id]; suffix="mng"},"lexical_unit") ->
  477 + if ent_id = ent then id else failwith (Printf.sprintf "load_meanings_set %d %d" ent ent_id)
473 478 | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml)
474 479  
475 480 let load_frame ent = function
... ... @@ -481,8 +486,8 @@ let load_frame ent = function
481 486 if ent_id <> ent then failwith (Printf.sprintf "load_frame %d %d" ent ent_id) else
482 487 {frm_id = id;
483 488 opinion = opinion;
484   - meanings = List.rev (Xlist.rev_map meanings_set load_meanings_set);
485   - arguments = List.rev (Xlist.rev_map arguments_set load_arguments_set)}
  489 + meanings = List.rev (Xlist.rev_map meanings_set (load_meanings_set ent));
  490 + arguments = List.rev (Xlist.rev_map arguments_set (load_arguments_set ent id))}
486 491 | xml -> failwith ("load_frame :\n " ^ tei_to_string xml)
487 492  
488 493 let load_meaning_info ent arg = function
... ... @@ -499,25 +504,33 @@ let load_meaning ent = function
499 504 Xlist.fold meaning_info empty_meaning (load_meaning_info ent)
500 505 | xml -> failwith ("load_meaning:\n " ^ tei_to_string xml)
501 506  
502   -let load_phrases_connections = function
503   - | SameAs(same_as,"phrase") -> same_as
504   - | xml -> failwith ("load_phrases_connections: \n " ^ tei_to_string xml)
505   -
506   -let load_alter_connection = function
  507 +let load_alter_connection ent = function
507 508 | Fs("connection", [
508   - F("argument",SameAs(same_as,"argument"));
  509 + F("argument",SameAs({hash=true; numbers=[ent_id;frm_id;arg_id]; suffix="arg"},"argument"));
509 510 Fset("phrases",phrases)]) ->
510   - {argument = same_as; phrases = List.rev (Xlist.rev_map phrases load_phrases_connections)}
  511 + if ent_id <> ent then failwith (Printf.sprintf "load_alter_connection %d %d" ent ent_id) else
  512 + let phrases,sch_set = Xlist.fold phrases (IntMap.empty,IntSet.empty) (fun (phrases,sch_set) phrase ->
  513 + let sch_id,psn_id,phr_id = load_phrases_set ent phrase in
  514 + IntMap.add_inc phrases psn_id [phr_id] (fun l -> phr_id :: l),
  515 + IntSet.add sch_set sch_id) in
  516 + if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alter_connection: |sch_set|=%d" (IntSet.size sch_set)) else
  517 + IntSet.min_elt sch_set, frm_id,
  518 + {argument = arg_id; phrases = IntMap.fold phrases [] (fun l psn phrs -> (psn,phrs) :: l)}
511 519 | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml)
512 520  
513 521 let load_alternations ent = function
514 522 | Fs("alternation",[Fset("connections",connections_set)]) ->
515   - List.rev (Xlist.rev_map connections_set load_alter_connection)
  523 + let conns,sch_set,frm_set = Xlist.fold connections_set ([],IntSet.empty,IntSet.empty) (fun (conns,sch_set,frm_set) conn ->
  524 + let sch_id,frm_id,conn = load_alter_connection ent conn in
  525 + conn :: conns, IntSet.add sch_set sch_id, IntSet.add frm_set frm_id) in
  526 + if IntSet.size sch_set <> 1 then failwith (Printf.sprintf "load_alternations: |sch_set|=%d" (IntSet.size sch_set)) else
  527 + if IntSet.size frm_set <> 1 then failwith (Printf.sprintf "load_alternations: |frm_set|=%d" (IntSet.size sch_set)) else
  528 + {schema=IntSet.min_elt sch_set; frame=IntSet.min_elt frm_set; connections=List.rev conns}
516 529 | xml -> failwith ("load_alternations: \n " ^ tei_to_string xml)
517 530  
518   -let load_entry = function
  531 +let load_entry phrases = function
519 532 | Xml.Element("entry",["xml:id",id], l) ->
520   - print_endline id;
  533 + (* print_endline id; *)
521 534 let id = match parse_id id with
522 535 {hash=false; numbers=[id]; suffix="ent"} -> id
523 536 | _ -> failwith "process_meanings" in
... ... @@ -525,9 +538,9 @@ let load_entry = function
525 538 Xlist.fold l entry (fun e -> function
526 539 Xml.Element("form", [], [
527 540 Xml.Element("orth",[],[Xml.PCData orth]);
528   - Xml.Element("pos",[],[Xml.PCData pos])]) -> {e with form_orth=orth; form_pos=pos}
  541 + Xml.Element("pos",[],[Xml.PCData pos])]) -> (*print_endline orth;*) {e with form_orth=orth; form_pos=pos}
529 542 | xml -> (match parse_tei xml with
530   - | Fs("syntactic_layer", [Fset("schemata",schemata_set)]) -> {e with schemata = List.rev (Xlist.rev_map schemata_set (load_schema id))}
  543 + | Fs("syntactic_layer", [Fset("schemata",schemata_set)]) -> {e with schemata = List.rev (Xlist.rev_map schemata_set (load_schema id phrases))}
531 544 | Fs("examples_layer", [Fset("examples",examples_set)]) -> {e with examples = List.rev (Xlist.rev_map examples_set (load_example id))}
532 545 | Fs("semantic_layer", [Fset("frames",frame_set)]) -> {e with frames = List.rev (Xlist.rev_map frame_set (load_frame id))}
533 546 | Fs("meanings_layer", [Fset("meanings",meanings_set)]) -> {e with meanings = List.rev (Xlist.rev_map meanings_set (load_meaning id))}
... ... @@ -536,27 +549,31 @@ let load_entry = function
536 549 | xml -> failwith ("load_entry: \n" ^ tei_to_string xml)))
537 550 | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
538 551  
539   -let load_walenty filename:entry list =
  552 +let load_walenty filename =
540 553 begin
541 554 match Xml.parse_file filename with
542 555 Xml.Element("TEI", _,
543 556 [Xml.Element("teiHeader",_,_) ;
544 557 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
545   - List.rev (Xlist.rev_map entries load_entry)
  558 + let phrases = ref IntMap.empty in
  559 + let walenty = List.rev (Xlist.rev_map entries (load_entry phrases)) in
  560 + walenty, !phrases
546 561 | _ -> failwith "load_walenty"
547 562 end
548 563  
549 564 type expansion = Phrases of morf list | Positions of position list
550 565  
551 566 let load_expansion = function
552   - Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",set)]) -> Phrases(List.rev (Xlist.map set (fun p -> snd (load_phrase p))))
553   - | Fs("expansion",[F("opinion",Symbol opinion);Fset("positions",set)]) -> Positions(List.rev (Xlist.map set load_position))
  567 + Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",set)]) -> Phrases(List.rev (Xlist.rev_map set load_phrase))
  568 + | Fs("expansion",[F("opinion",Symbol opinion);Fset("positions",set)]) -> Positions(List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty))))
554 569 | tei -> failwith ("load_expansion: \n" ^ tei_to_string tei)
555 570  
556 571 let load_rentry = function
557 572 | Xml.Element("entry",["xml:id",id], [phrase;exp]) ->
558   - let id = parse_id id in
559   - let morf = snd (load_phrase (parse_tei phrase)) in
  573 + let id = match parse_id id with
  574 + {hash=false; numbers=[id]; suffix="exp"} -> id
  575 + | _ -> failwith "process_meanings" in
  576 + let morf = load_phrase (parse_tei phrase) in
560 577 let expansions = match parse_tei exp with
561 578 | Fs("phrase_type_expansions", [Fset("expansions",expansions)]) -> List.rev (Xlist.map expansions load_expansion)
562 579 | Fs("phrase_type_expansions", [F("expansions",expansion)]) -> [load_expansion expansion]
... ... @@ -576,18 +593,20 @@ let load_expands filename =
576 593  
577 594  
578 595 (*let walenty = load_walenty Paths.walenty_filename *)
579   -let walenty = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170304.xml"
  596 +let walenty,phrases = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170304.xml"
  597 +
  598 +(* let _ = Printf.printf "|phrases|=%d\n" (IntMap.size phrases) *)
580 599  
581 600 let expands_supplement = [
582   - empty_id, PhraseAbbr(Nonch,[]), [Phrases[
583   - LexPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[]));
584   - LexPhrase([SUBST(NumberUndef,Str),Lexeme "coś"],(Natr,[]));
585   - LexPhrase([SUBST(NumberUndef,Str),Lexeme "nic"],(Natr,[]));
586   - LexPhrase([SUBST(NumberUndef,Str),Lexeme "to"],(Natr,[]));
  601 + (-2), PhraseAbbr(Nonch,[]), [Phrases[
  602 + LexRPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[]));
  603 + LexRPhrase([SUBST(NumberUndef,Str),Lexeme "coś"],(Natr,[]));
  604 + LexRPhrase([SUBST(NumberUndef,Str),Lexeme "nic"],(Natr,[]));
  605 + LexRPhrase([SUBST(NumberUndef,Str),Lexeme "to"],(Natr,[]));
587 606 ]];
588   - empty_id, PhraseAbbr(Advp "pron",[]), [Phrases[
589   - LexPhrase([ADV (Grad "pos"),Lexeme "tak"],(Natr,[]));
590   - LexPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[]))
  607 + (-3), PhraseAbbr(Advp "pron",[]), [Phrases[
  608 + LexRPhrase([ADV (Grad "pos"),Lexeme "tak"],(Natr,[]));
  609 + LexRPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[]))
591 610 ]]]
592 611  
593 612 let expands = expands_supplement @ load_expands "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170304.xml"
... ... @@ -601,330 +620,3 @@ let subtypes = [
601 620 "którędy"; "który"; "odkąd"; "skąd"]]
602 621  
603 622 let equivs = ["jak",["niczym"]; "przeciw",["przeciwko"]]
604   -
605   -(*
606   -let przejdz funkcja poczym =
607   - let _ = List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym) in
608   - ()
609   -
610   -(*zwraca liste zwróconych wartosci przez funkcje*)
611   -let przejdz_lista funkcja poczym =
612   - List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym)
613   -
614   -let przejdz_lista_second funkcja poczym =
615   - List.rev (List.fold_left (fun l nazwa -> (snd (funkcja nazwa)) :: l) [] poczym)
616   -
617   -(*łączy listy zwróconych wartości przez funkcje*)
618   -let przejdz_scal funkcja poczym =
619   - List.rev (List.fold_left (fun l nazwa -> funkcja nazwa @ l) [] poczym)
620   -
621   -(*zapisuje wynik wywołania do zmiennej i wywołuje ze zmienną*)
622   -let przejdz_zapisz funkcja zmienna poczym =
623   - List.fold_left (fun zmienna nazwa -> funkcja zmienna nazwa) zmienna poczym
624   -
625   -
626   -let rec last l =
627   - match l with
628   - | [a] -> a
629   - | a::b -> last b
630   - | _ -> failwith "pusta lista"
631   -
632   -let parse_full_id s =
633   - if String.length s = 0 then empty_id else
634   - if String.length s < 6 then failwith "za krótkie id" else
635   - let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in
636   - if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else
637   - let s,suf = match Str.split (Str.regexp "-") s with
638   - [s;suf] -> s,suf
639   - | _ -> failwith "zła ilość '-'" in
640   - let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in
641   - id
642   -
643   -let parse_id s =
644   - if String.length s = 0 then empty_id else
645   - if String.length s < 6 then failwith "za krótkie id" else
646   - let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in
647   - if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else
648   - let s,suf = match Str.split (Str.regexp "-") s with
649   - [s;suf] -> s,suf
650   - | _ -> failwith "zła ilość '-'" in
651   - let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in
652   - {id with numbers = [last id.numbers]}
653   -*)
654   -
655   -(* ******************************************* *)
656   -
657   -(****
658   -
659   -
660   -(*
661   -
662   -
663   -(*sprawdzanie czy id jednoznacznie definiuje zawartość typu*)
664   -
665   -
666   -(*meaningsLayer*)
667   -module StringMap = Map.Make(String)
668   -
669   -let cnt = ref 0;;
670   -
671   -let add_new map meaning =
672   - let num_id = match meaning.mng_id with
673   - {hash=false; numbers=[num_id]; suffix="mng"} -> num_id
674   - | _ -> failwith "zła składnia id" in
675   - if StringMap.mem num_id map then
676   - (Printf.printf "okkk\n";
677   - let meaning2 = StringMap.find num_id map in
678   - if meaning = meaning2 then map else
679   - failwith "różne1111")
680   - else (cnt:=!cnt+1;StringMap.add num_id meaning map)
681   -
682   -let check_entry_menaings mapa entry =
683   - przejdz_zapisz add_new mapa entry.meanings
684   -
685   -let check_meanings walenty =
686   - przejdz_zapisz check_entry_menaings StringMap.empty walenty
687   -
688   -(*
689   -let _ = check_meanings walenty
690   -let _ = Printf.printf "meaning map.size: %d\n" !cnt
691   -*)
692   -
693   -(*semanticLayer*)
694   -
695   -let cnt = ref 0;;
696   -
697   -(*arg_id*)
698   -let add_new map argument =
699   - let arg_id = match argument.arg_id with
700   - {hash=false; numbers=[num_id]; suffix="arg"} -> num_id
701   - | _ -> failwith "zła składnia id" in
702   - if StringMap.mem arg_id map then
703   - (Printf.printf "okkk\n";
704   - let val2 = StringMap.find arg_id map in
705   - let val1 = argument in
706   - if val1 = val2 then map else
707   - failwith "różne1111")
708   - else (cnt:=!cnt+1; StringMap.add arg_id argument map)
709   -
710   -
711   -
712   -let check_frame mapa frame =
713   - przejdz_zapisz add_new mapa frame.arguments
714   -
715   -let check_entry_frames mapa entry =
716   - przejdz_zapisz check_frame mapa entry.frames
717   -
718   -let check_meanings walenty =
719   - przejdz_zapisz check_entry_frames StringMap.empty walenty
720   -
721   -(*
722   -let _ = check_meanings walenty
723   -let _ = Printf.printf "entry.frame.argument map.size: %d\n" !cnt
724   -*)
725   -(*arg_id done*)
726   -
727   -
728   -let cnt = ref 0;;
729   -(*frm_id*)
730   -let add_new map frame =
731   - let id = match frame.frm_id with
732   - {hash=false; numbers=[num_id]; suffix="frm"} -> num_id
733   - | _ -> failwith "zła składnia id" in
734   - if StringMap.mem id map then
735   - (Printf.printf "okkk\n";
736   - let val2 = StringMap.find id map in
737   - let val1 = frame in
738   - if val1 = val2 then map else
739   - failwith "różne1111")
740   - else (cnt:=!cnt+1; StringMap.add id frame map)
741   -
742   -let check_entry_frames mapa entry =
743   - przejdz_zapisz add_new mapa entry.frames
744   -
745   -let check_meanings walenty =
746   - przejdz_zapisz check_entry_frames StringMap.empty walenty
747   -
748   -(*
749   -let _ = check_meanings walenty
750   -let _ = Printf.printf "entry.frame map.size: %d\n" !cnt
751   -*)
752   -
753   -(*frm.id done*)
754   -
755   -(*examplesLayer*)
756   -(*na razie zeruje phrases!!!!*)
757   -let print_example example =
758   - Printf.printf "meaning: %s\n phrases: " example.meaning;
759   -(* print_endline (String.concat "; " example.phrases);*)
760   - Printf.printf "sentence: %s\n" example.sentence;
761   - Printf.printf "source: %s\n" example.source;
762   - Printf.printf "opinion: %s\n" example.opinion;
763   - Printf.printf "note: %s\n\n" example.note
764   -
765   -
766   -
767   -let cnt = ref 0;;
768   -let takiesame = ref 0;;
769   -
770   -let add_new map example =
771   - let id = match example.exm_id with
772   - {hash=false; numbers=[num_id]; suffix="exm"} -> num_id
773   - | _ -> failwith "zła składnia id" in
774   -(* let example = {example with phrases = []} in (*uwaga!!!! zeruje phrases!!!*)*)
775   - let example = {example with meaning = ""} in (*uwaga!!!! zeruje meaning!!!*)
776   - if StringMap.mem id map then
777   - (takiesame:=!takiesame+1;
778   - (* Printf.printf "okkk\n";*)
779   - let val2 = StringMap.find id map in
780   - let val1 = example in
781   - if val1 = val2 then map else
782   - let _ = print_example val1 in
783   - let _ = print_example val2 in
784   - failwith "różne1111")
785   - else (cnt:=!cnt+1; StringMap.add id example map)
786   -
787   -let check_entry_example mapa entry =
788   - przejdz_zapisz add_new mapa entry.examples
789   -
790   -let check_meanings walenty =
791   - przejdz_zapisz check_entry_example StringMap.empty walenty
792   -
793   -(*
794   -let _ = check_meanings walenty
795   -let _ = Printf.printf "examples map.size: %d takich samych: %d\n" !cnt !takiesame
796   -*)
797   -
798   -
799   -(*syntatcticLayer position*)
800   -
801   -
802   -let cnt = ref 0;;
803   -let takiesame = ref 0;;
804   -
805   -let add_new map position =
806   - let id = match position.psn_id with
807   - {hash=false; numbers=[num_id]; suffix="psn"} -> num_id
808   - | _ -> failwith "zła składnia id" in
809   -(* let position = {position with phrases = przejdz_lista (fun (x,y) -> (parse_id "",y)) position.phrases} in*) (*uwaga!!!!*)
810   - if StringMap.mem id map then
811   - (takiesame:=!takiesame+1;
812   - (* Printf.printf "okkk\n";*)
813   - let val2 = StringMap.find id map in
814   - let val1 = position in
815   - if val1 = val2 then map else
816   - failwith "różne1111")
817   - else (cnt:=!cnt+1; StringMap.add id position map)
818   -
819   -let check_schema mapa schema =
820   - przejdz_zapisz add_new mapa schema.positions
821   -
822   -let check_entry mapa entry =
823   - przejdz_zapisz check_schema mapa entry.schemata
824   -
825   -let check walenty =
826   - przejdz_zapisz check_entry StringMap.empty walenty
827   -
828   -(*
829   -let _ = check walenty
830   -let _ = Printf.printf "syntactic...position map.size: %d takich samych: %d\n" !cnt !takiesame
831   -*)
832   -
833   -(* schema *)
834   -
835   -let cnt = ref 0;;
836   -let takiesame = ref 0;;
837   -
838   -(*let clear_id (position:position) =
839   - let position = {position with phrases = [](*przejdz_lista (fun (x,y) -> (empty_id,y)) position.phrases*)} in (*uwaga!!!!*)
840   - let position = {position with psn_id = empty_id} in
841   - position*)
842   -
843   -let print_schema (schema:schema) =
844   - Printf.printf "schema.opinion= %s\n" schema.opinion;
845   - Printf.printf "schema.reflexiveMark= %s\n" schema.reflexiveMark;
846   - Printf.printf "schema.aspect= %s\n" schema.aspect;
847   - Printf.printf "schema.negativity= %s\n" schema.negativity;
848   - Printf.printf "schema.predicativity= %s\n___________________\n" schema.predicativity
849   -
850   -let add_new map schema =
851   - let id = match schema.sch_id with
852   - {hash=false; numbers=[num_id]; suffix="sch"} -> num_id
853   - | _ -> failwith "zła składnia id" in
854   - let schema = {schema with opinion = ""} in (*uwaga, zeruje opinie!!!*)
855   - if StringMap.mem id map then
856   - (takiesame:=!takiesame+1;
857   - (* Printf.printf "okkk\n";*)
858   - let val2 = StringMap.find id map in
859   - let val1 = schema in
860   - if val1 = val2 then map else
861   - let _ = print_schema val1 in
862   - let _ = print_schema val2 in
863   - failwith "różne1111")
864   - else (cnt:=!cnt+1; StringMap.add id schema map)
865   -
866   -
867   -
868   -let check_schema mapa schema =
869   - add_new mapa schema
870   -
871   -let check_entry mapa entry =
872   - przejdz_zapisz check_schema mapa entry.schemata
873   -
874   -let check walenty =
875   - przejdz_zapisz check_entry StringMap.empty walenty
876   -
877   -(*
878   -let _ = check walenty
879   -let _ = Printf.printf "syntactic...schema map.size: %d takich samych: %d\n" !cnt !takiesame
880   -*)
881   -
882   -(*phrases*)
883   -
884   -let cnt = ref 0;;
885   -let takiesame = ref 0;;
886   -
887   -let add_new map (id, phrase) =
888   - let id = match id with
889   - {hash=false; numbers=[num_id]; suffix="phr"} -> num_id
890   - | _ -> failwith "zła składnia id" in
891   - if StringMap.mem id map then
892   - (takiesame:=!takiesame+1;
893   - (* Printf.printf "okkk\n";*)
894   - let val2 = StringMap.find id map in
895   - let val1 = phrase in
896   - if val1 = val2 then map else
897   - failwith "różne1111")
898   - else (cnt:=!cnt+1; StringMap.add id phrase map)
899   -
900   -let check_pos mapa (position:position) =
901   - przejdz_zapisz add_new mapa position.phrases
902   -
903   -let check_schema mapa schema =
904   - przejdz_zapisz check_pos mapa schema.positions
905   -
906   -let check_entry mapa entry =
907   - przejdz_zapisz check_schema mapa entry.schemata
908   -
909   -let check walenty =
910   - przejdz_zapisz check_entry StringMap.empty walenty
911   -
912   -*)
913   -
914   -(*
915   -let _ = check walenty
916   -let _ = Printf.printf "syntactic...phrases map.size: %d takich samych: %d\n" !cnt !takiesame
917   -*)
918   -
919   -
920   -(*
921   -loading: OK
922   -meaning map.size: 32962
923   -entry.frame.argument map.size: 10475
924   -entry.frame map.size: 3463
925   -examples map.size: 146536 takich samych: 64
926   -syntactic...position map.size: 7021 takich samych: 195288
927   -syntactic...schema map.size: 21247 takich samych: 51241
928   -*)
929   -
930   - ****)
... ...
walenty/ENIAMwalTypes.ml
... ... @@ -120,28 +120,26 @@ type direction = Forward | Backward | Both
120 120 | NumSpecs of gender
121 121 | EmptySpecs *)
122 122  
123   -type id = {hash: bool; suffix: string; numbers: int list}
124   -
125   -let empty_id = {hash = false; suffix = ""; numbers = []}
126   -
127   -type position = {psn_id: id; gf: gf; role: string; role_attr: string; sel_prefs: string list;
128   - cr: string list; ce: string list; dir: direction; morfs: (id * morf) list}
  123 +type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: string list;
  124 + cr: string list; ce: string list; dir: direction; morfs: morf list}
129 125  
130 126 and morf =
131 127 Phrase of phrase
132 128 | E of phrase
133 129 | LexPhrase of (pos * lex) list * (restr * position list)
  130 + | LexRPhrase of (pos * lex) list * (restr * position list)
134 131 | LexPhraseMode of string * (pos * lex) list * (restr * position list)
135 132 | PhraseAbbr of phrase_abbr * morf list
136 133 | PhraseComp of phrase_comp * (comp_type * comp list)
137 134 | LexPhraseId of string * pos * lex
138 135 | LexArg of string * pos * string
  136 + | MorfId of int
139 137 (* | LexRealization of morf * string*)
140 138 (* | Raised of string list * direction * string list
141 139 | Multi of phrase list*)
142 140  
143 141 let empty_position =
144   - {psn_id=empty_id; gf=ARG; role=""; role_attr="";sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}
  142 + {psn_id=(-1); gf=ARG; role=""; role_attr="";sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}
145 143  
146 144 type lex_record = {
147 145 lex_argument: morf;
... ... @@ -218,8 +216,8 @@ let subst_time_lexemes_filename = resource_path ^ &quot;/Walenty/subst_time.dat&quot;
218 216  
219 217  
220 218 type example = {exm_id: int;
221   - meaning: id;
222   - phrases: id list;
  219 + meaning: int;
  220 + phrases: (int * int * int) list;
223 221 sentence: string;
224 222 source: string;
225 223 opinion: opinion;
... ... @@ -228,16 +226,16 @@ type example = {exm_id: int;
228 226 type sel_prefs =
229 227 NumericP of int
230 228 | SymbolP of string
231   - | RelationP of string * id
  229 + | RelationP of string * int * int
232 230  
233   -type argument = {arg_id: id;
  231 +type argument = {arg_id: int;
234 232 role: string;
235 233 role_attribute: string;
236 234 sel_prefs: sel_prefs list list}
237 235  
238 236 type frame = {frm_id: int;
239 237 opinion: string;
240   - meanings: id list;
  238 + meanings: int list;
241 239 arguments: argument list}
242 240  
243 241 type meaning = {mng_id: int;
... ... @@ -252,8 +250,10 @@ let empty_meaning = {mng_id = (-1);
252 250 plwnluid = (-1);
253 251 gloss = ""}
254 252  
255   -type connection = {argument: id;
256   - phrases: id list}
  253 +type connection = {argument: int;
  254 + phrases: (int * int list) list}
  255 +
  256 +type alternation = {schema: int; frame: int; connections: connection list}
257 257  
258 258 type entry = {ent_id: int;
259 259 status: string;
... ... @@ -263,7 +263,7 @@ type entry = {ent_id: int;
263 263 examples: example list;
264 264 frames: frame list;
265 265 meanings: meaning list;
266   - alternations: connection list list}
  266 + alternations: alternation list}
267 267  
268 268 let empty_entry = {ent_id=(-1); status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
269 269 frames=[]; meanings=[]; alternations=[]}
... ...