Commit 479c42bcdbe0c67eba3dea6876b89ffeb40d5cbc

Authored by Wojciech Jaworski
1 parent 099c6f48

dostosowanie pre do pracy z morphology

config 0 โ†’ 100644
  1 +# General resource path
  2 +RESOURCES_PATH=../resources/
  3 +
  4 +# Localization of Walenty in TEI format
  5 +WALENTY=/usr/share/walenty/walenty_20160412.xml
  6 +
  7 +# Port number for pre server
  8 +PRE_PORT=3258
  9 +
  10 +# Host name for pre server
  11 +PRE_HOST=localhost
  12 +
  13 +# Path to the directory for parsed sentences
  14 +RESULTS_PATH=../results/
  15 +
  16 +# Maximum number of generated solutions
  17 +MAX_NO_SOLUTIONS=10
  18 +
  19 +# LCG parser timeout in seconds
  20 +LCG_TIMEOUT=100
  21 +
  22 +# LCG parser memory size (maximum number of nodes of parsed term)
  23 +LCG_NO_NODES=10000000
  24 +
  25 +# Number of parser processes
  26 +NO_PROCESSES=4
... ...
morphology/dict.ml
... ... @@ -315,9 +315,9 @@ let exceptional_lemmata = StringSet.of_list ([
315 315 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
316 316 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
317 317 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*)
318   - ] @ File.load_lines "data/obce.tab" (* @
319   - File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @
320   - File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab" *))
  318 + ] @ File.load_lines "../morphology/data/obce.tab" (* @
  319 + File.load_lines "../morphology/data/validated_adj.tab" @ File.load_lines "../morphology/data/validated_noun.tab" @
  320 + File.load_lines "../morphology/data/validated_verb.tab" @ File.load_lines "../morphology/data/adv_nieodprzymiotnikowe.tab" *))
321 321  
322 322 let remove_exceptional_lemmata dict =
323 323 Xlist.fold dict [] (fun dict entry ->
... ...
morphology/inflexion.a 0 โ†’ 100644
No preview for this file type
morphology/inflexion.cmxa 0 โ†’ 100644
No preview for this file type
morphology/ruleGenerator.ml
... ... @@ -2,7 +2,7 @@ open Xstd
2 2 open Printf
3 3 open Types
4 4  
5   -let alternation_map = Rules.alternation_map
  5 +(* let alternation_map = Rules.alternation_map *)
6 6  
7 7 let rule_types = Xlist.fold [
8 8 (* Xlist.map (StringMap.find alternation_map "obce_ch") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{x}ych\t{x}";
... ...
morphology/rules.ml
... ... @@ -44,7 +44,7 @@ let load_alternations filename =
44 44 | _ -> failwith "load_alternations") in
45 45 (name,List.rev alts) :: alternations
46 46  
47   -let alternations () = load_alternations "data/alternations.dic"
  47 +let alternations () = load_alternations "../morphology/data/alternations.dic"
48 48  
49 49 let revert_alternations l =
50 50 Xlist.map l (fun a -> {a with afind=a.aset; aset=a.afind})
... ... @@ -76,9 +76,9 @@ let load_pref_rules filename =
76 76 | _ -> failwith "load_pref_rules") in
77 77 (name,List.rev rules) :: pref_rules
78 78  
79   -let rules () = load_suf_rules "data/rules.dic"
80   -let rev_rules () = load_suf_rules "data/rev_rules.dic"
81   -let pref_rules () = load_pref_rules "data/pref_rules.dic"
  79 +let rules () = load_suf_rules "../morphology/data/rules.dic"
  80 +let rev_rules () = load_suf_rules "../morphology/data/rev_rules.dic"
  81 +let pref_rules () = load_pref_rules "../morphology/data/pref_rules.dic"
82 82  
83 83 let load_freq_rules filename =
84 84 File.fold_tab filename [] (fun rules -> function
... ... @@ -120,7 +120,7 @@ let rule_map alternation_map rev_alternation_map rules rev_rules pref_rules =
120 120 let map = Xlist.fold rev_rules map (fun map (k,v) -> StringMap.add map k (prepare_rev_rules rev_alternation_map v)) in
121 121 Xlist.fold pref_rules map (fun map (k,v) -> StringMap.add map k (prepare_pref_rules v))
122 122  
123   -let schemata () = File.load_tab "data/schemata.dic" (fun l -> l)
  123 +let schemata () = File.load_tab "../morphology/data/schemata.dic" (fun l -> l)
124 124  
125 125 (**********************************************************************************************)
126 126  
... ... @@ -236,7 +236,7 @@ module InterpTree = struct
236 236  
237 237 end
238 238  
239   -let interp_tree () = InterpTree.create (load_interp_rules "data/interp_rules.dic")
  239 +let interp_tree () = InterpTree.create (load_interp_rules "../morphology/data/interp_rules.dic")
240 240  
241 241 (**********************************************************************************************)
242 242  
... ...
parser/.gitignore 0 โ†’ 100644
  1 +pipe
  2 +results/*
... ...
parser/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -#INCLUDES=-I +xml-light -I +xlib -I ../../lib/latexvis -I ../lib/xt -I ../../Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre
5   -INCLUDES=--I +xml-light -I +xlib -I ../pre
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre
  5 +#INCLUDES=-I +xml-light -I +xlib -I ../pre
6 6 OCAMLFLAGS=$(INCLUDES) -g
7   -#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa latexvis.cmxa nkjp.cmxa
8   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa
  7 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa
  8 +#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa
9 9  
10 10 PRE= ../pre/paths.ml ../pre/walTypes.ml ../pre/preTypes.ml ../pre/walStringOf.ml
11   -#LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
12   -LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
  11 +LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
  12 +#LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
13 13 DISAMB= disambSelPref.ml disambLemma.ml
14   -#SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
15   -SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
16   -EXEC= execTypes.ml visualization.ml exec.ml
17   -
18   -all:
19   -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
  14 +SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
  15 +#SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
  16 +EXEC= execTypes.ml visualization.ml exec.ml
  17 +
  18 +all:
  19 + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
20 20 # $(OCAMLOPT) -o server $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
21   -# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
22   - $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
23   - $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
24   -# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
25   - $(OCAMLOPT) -o eniam $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) singleInterface.ml
26   -
27   -# pipe:
28   -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(PROC_F)
29   -#
  21 +# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
  22 +# $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
  23 +# $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
  24 +# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
  25 +# $(OCAMLOPT) -o eniam $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) singleInterface.ml
  26 +
  27 +# pipe:
  28 +# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(PROC_F)
  29 +#
30 30 # of_xml:
31 31 # $(OCAMLOPT) -o of_xml $(OCAMLOPTFLAGS) LCGofXml.ml
32 32  
... ...
parser/visualization.ml
... ... @@ -22,11 +22,11 @@ open Xstd
22 22 open Printf
23 23 open PreTypes
24 24  
25   -let string_of_interps interps =
  25 +let string_of_interps interps =
26 26 String.concat "|" (Xlist.map interps (fun interp ->
27 27 (String.concat ":" (Xlist.map interp (fun interp2 ->
28 28 (String.concat "." interp2))))))
29   -
  29 +
30 30 let rec string_of_token = function
31 31 PreTypes.SmallLetter orth -> sprintf "SmallLetter(%s)" orth
32 32 | PreTypes.CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc
... ... @@ -43,22 +43,22 @@ let rec string_of_token = function
43 43 | PreTypes.Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses)
44 44 | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token))
45 45  
46   -let rec spaces i =
  46 +let rec spaces i =
47 47 if i = 0 then "" else " " ^ spaces (i-1)
48   -
  48 +
49 49 let rec string_of_tokens i = function
50   - PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token)
  50 + PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token)
51 51 t.PreTypes.id t.PreTypes.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight)))
52 52 (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (WalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (WalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles)
53 53 | PreTypes.Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
54   - | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
55   -
56   -let paths_to_string_indexed (paths,last,next_id) =
57   - String.concat "\n" (Xlist.map paths (fun (i,j,t) ->
  54 + | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
  55 +
  56 +let paths_to_string_indexed (paths,last,next_id) =
  57 + String.concat "\n" (Xlist.map paths (fun (i,j,t) ->
58 58 Printf.sprintf "%2d %2d %s" i j (string_of_tokens 0 (PreTypes.Token t))))
59 59 ^ Printf.sprintf "\nlast=%d next_id=%d" last next_id
60   -
61   -let rec xml_of_graph = function
  60 +
  61 +let rec xml_of_graph = function
62 62 Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[
63 63 Xml.Element("gs",[],[xml_of_graph t.gs]);
64 64 Xml.Element("agf",[],[Xml.PCData (WalStringOf.gf t.agf)]);
... ... @@ -75,11 +75,11 @@ let rec xml_of_graph = function
75 75 Xml.Element("relations",[],[xml_of_graph c.cx_relations])])
76 76 | Relation(r,a,t) -> Xml.Element("relation",[],[
77 77 Xml.Element("role",[],[xml_of_graph r]);
78   - Xml.Element("role_attr",[],[xml_of_graph r]);
  78 + Xml.Element("role_attr",[],[xml_of_graph r]);
79 79 xml_of_graph t])
80 80 | RevRelation(r,a,t) -> Xml.Element("revrelation",[],[
81 81 Xml.Element("role",[],[xml_of_graph r]);
82   - Xml.Element("role_attr",[],[xml_of_graph r]);
  82 + Xml.Element("role_attr",[],[xml_of_graph r]);
83 83 xml_of_graph t])
84 84 | SingleRelation(r) -> Xml.Element("singlerelation",[],[xml_of_graph r])
85 85 | Tuple l -> Xml.Element("tuple",[],Xlist.map l xml_of_graph)
... ... @@ -90,18 +90,18 @@ let rec xml_of_graph = function
90 90 | Ref i -> Xml.Element("ref",["id",string_of_int i],[])
91 91 | Morf _ -> Xml.Element("dot",[],[]) (* FIXME!!! *)
92 92 | t -> failwith ("xml_of_graph: " ^ LCGstringOf.linear_term 0 t)
93   -
94   -let print_xml_graph path name references =
  93 +
  94 +let print_xml_graph path name references =
95 95 let l = Int.fold 0 (Array.length references - 1) [] (fun l i ->
96 96 (i, xml_of_graph references.(i)) :: l) in
97 97 let xml = Xml.Element("graph",[],Xlist.rev_map l (fun (i,xml) ->
98 98 Xml.Element("graph_node",["id",string_of_int i],[xml]))) in
99   - File.file_out (path ^ name ^ ".xml") (fun file ->
  99 + File.file_out (path ^ name ^ ".xml") (fun file ->
100 100 fprintf file "%s\n" (Xml.to_string_fmt xml))
101 101  
102   -let print_xml_tree path name tree =
  102 +let print_xml_tree path name tree =
103 103 let xml = xml_of_graph tree in
104   - File.file_out (path ^ name ^ ".xml") (fun file ->
  104 + File.file_out (path ^ name ^ ".xml") (fun file ->
105 105 fprintf file "%s\n" (Xml.to_string_fmt xml))
106 106  
107 107  
... ... @@ -112,14 +112,14 @@ let rec get_refs rev = function
112 112 | Variant(e,l) -> Xlist.fold l rev (fun rev (i,t) -> get_refs rev t)
113 113 | Dot -> rev
114 114 | _ -> (*failwith*)print_endline "get_refs"; rev
115   -
116   -let escape_string s =
  115 +
  116 +let escape_string s =
117 117 Int.fold 0 (String.length s - 1) "" (fun t i ->
118 118 match String.sub s i 1 with
119 119 "<" -> t ^ "โŒฉ"
120 120 | ">" -> t ^ "โŒช"
121 121 | c -> t ^ c)
122   -
  122 +
123 123 let string_of_node t =
124 124 let l = [
125 125 "PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs;
... ... @@ -130,64 +130,64 @@ let string_of_node t =
130 130 "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (LCGstringOf.linear_term 0 t) ^ " }")) ^ " }"
131 131  
132 132 let single_rel_id_count = ref 0
133   -
  133 +
134 134 let get_single_rel_id () =
135 135 let id = !single_rel_id_count in
136 136 incr single_rel_id_count;
137 137 "s" ^ string_of_int id
138   -
  138 +
139 139 let print_edge file label upper id =
140   - if upper <> "" then
  140 + if upper <> "" then
141 141 if label = "" then fprintf file " %s -> %s\n" upper id
142 142 else fprintf file " %s -> %s [label=\"%s\"]\n" upper id label
143   -
144   -(*let rec print_graph_rec2 file edge upper = function
  143 +
  144 +(*let rec print_graph_rec2 file edge upper = function
145 145 Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper)
146   - | Node t ->
  146 + | Node t ->
147 147 let id = get_single_rel_id () in
148 148 fprintf file " %s [label=\"%s\"]\n" id (string_of_node t);
149 149 print_edge file edge upper id;
150 150 print_graph_rec2 file "" id t.args
151   - | Concept t ->
  151 + | Concept t ->
152 152 let id = get_single_rel_id () in
153   - fprintf file " %s [shape=box,label=\"%s %s\"]\n" id
154   - (LCGchart.string_of_linear_term 0 t.c_sense)
  153 + fprintf file " %s [shape=box,label=\"%s %s\"]\n" id
  154 + (LCGchart.string_of_linear_term 0 t.c_sense)
155 155 (if t.c_name=Dot then "" else "โ€ž" ^ LCGchart.string_of_linear_term 0 t.c_name ^ "โ€"); (* FIXME *)
156 156 print_edge file edge upper id;
157 157 print_graph_rec2 file "" id t.c_relations
158   - | SingleRelation(role) ->
  158 + | SingleRelation(role) ->
159 159 let id = get_single_rel_id () in
160 160 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGchart.string_of_linear_term 0 role);
161 161 if upper <> "" then fprintf file " %s -> %s\n" upper id
162   - | Variant(e,l) ->
  162 + | Variant(e,l) ->
163 163 fprintf file " %s [shape=diamond]\n" e;
164 164 print_edge file edge upper e;
165 165 Xlist.iter l (fun (i,t) -> print_graph_rec2 file i e t)
166 166 | Dot -> ()
167 167 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
168 168 | t -> failwith ("print_graph_rec2: " ^ LCGchart.string_of_linear_term 0 t) *)
169   -
  169 +
170 170 let rec string_of_quant_rec quant = function
171 171 Tuple l -> Xlist.fold l quant string_of_quant_rec
172 172 | Variant(e,l) -> (LCGstringOf.linear_term 0 (Variant(e,l))) :: quant
173 173 | Dot -> quant
174 174 | Val s -> s :: quant
175 175 | _ -> failwith "string_of_quant_rec"
176   -
  176 +
177 177 let string_of_quant t =
178 178 let l = string_of_quant_rec [] t in
179 179 let s = String.concat " " l in
180 180 if s = "" then "" else "<I>" ^ s ^ "</I> "
181   -
182   -let rec print_graph_rec file edge upper id = function
183   - Node t ->
  181 +
  182 +let rec print_graph_rec file edge upper id = function
  183 + Node t ->
184 184 fprintf file " %s [label=\"%s\"]\n" id (string_of_node t);
185 185 print_edge file edge upper id;
186 186 print_graph_rec2 file "" id t.args
187   - | Concept t ->
188   - fprintf file " %s [shape=box,label=<%s%s %s>]\n" id
189   - (string_of_quant t.c_quant)
190   - (LCGstringOf.linear_term 0 t.c_sense)
  187 + | Concept t ->
  188 + fprintf file " %s [shape=box,label=<%s%s %s>]\n" id
  189 + (string_of_quant t.c_quant)
  190 + (LCGstringOf.linear_term 0 t.c_sense)
191 191 (if t.c_name=Dot then "" else "โ€ž" ^ LCGstringOf.linear_term 0 t.c_name ^ "โ€"); (* FIXME *)
192 192 print_edge file edge upper id;
193 193 print_graph_rec2 file "" id t.c_relations
... ... @@ -197,7 +197,7 @@ let rec print_graph_rec file edge upper id = function
197 197 print_edge file edge upper id;
198 198 print_graph_rec2 file "" id t.cx_contents;
199 199 print_graph_rec2 file "" id t.cx_relations;
200   - | Relation(role,role_attr,t) ->
  200 + | Relation(role,role_attr,t) ->
201 201 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
202 202 print_edge file edge upper id;
203 203 print_graph_rec2 file "" id t
... ... @@ -205,31 +205,31 @@ let rec print_graph_rec file edge upper id = function
205 205 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
206 206 print_edge file edge upper id;
207 207 print_graph_rec2 file "" id t
208   - | SingleRelation(role) ->
  208 + | SingleRelation(role) ->
209 209 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role);
210 210 print_edge file edge upper id
211   - | AddRelation(t,role,role_attr,s) ->
  211 + | AddRelation(t,role,role_attr,s) ->
212 212 fprintf file " %s [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr;
213 213 print_edge file edge upper id;
214 214 print_graph_rec2 file "" id t;
215 215 print_graph_rec2 file "" id s;
216   - | SetContextName(s,t) ->
  216 + | SetContextName(s,t) ->
217 217 fprintf file " %s [shape=circle,label=\"SetContextName\\n%s\"]\n" id s;
218 218 print_edge file edge upper id;
219 219 print_graph_rec2 file "" id t
220   - | RemoveRelation t ->
  220 + | RemoveRelation t ->
221 221 fprintf file " %s [shape=circle,label=\"RemoveRelation\"]\n" id;
222 222 print_edge file edge upper id;
223 223 print_graph_rec2 file "" id t
224   - | Variant(e,l) ->
  224 + | Variant(e,l) ->
225 225 fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e;
226 226 print_edge file edge upper id;
227 227 Xlist.iter l (fun (i,t) -> print_graph_rec2 file i id t)
228   - | Choice choice ->
  228 + | Choice choice ->
229 229 fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id "";
230 230 print_edge file edge upper id;
231 231 StringMap.iter choice (fun ei t -> print_graph_rec2 file ei id t)
232   - | Val s ->
  232 + | Val s ->
233 233 fprintf file " %s [shape=box,label=\"%s\"]\n" id s;
234 234 print_edge file edge upper id
235 235 | Dot -> ()
... ... @@ -237,63 +237,63 @@ let rec print_graph_rec file edge upper id = function
237 237 print_edge file edge upper id*)
238 238 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
239 239 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)
240   -
  240 +
241 241 and print_graph_rec2 file edge upper = function
242 242 Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper)
243 243 | t -> print_graph_rec file edge upper (get_single_rel_id ()) t
244   -
  244 +
245 245 (*let rec print_graph_rec file is_rev upper i = function (* FIXME: dokoล„czyฤ‡ is_rev *)
246   - Node t ->
  246 + Node t ->
247 247 (* let orth = if t.id = 0 then "" else.(t.id).PreTypes.orth in
248 248 fprintf file " %s [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat;*)
249 249 fprintf file " %s [label=\"%s\"]\n" i (string_of_node t);
250   - if upper <> "" then
  250 + if upper <> "" then
251 251 if is_rev then fprintf file " %s -> %s\n" i upper
252 252 else fprintf file " %s -> %s\n" upper i;
253 253 print_graph_rec file false i i t.args
254   - | Concept t ->
255   - fprintf file " %s [shape=box,label=\"%s %s\"]\n" ("c" ^ i)
256   - (LCGstringOf.linear_term 0 t.c_sense)
  254 + | Concept t ->
  255 + fprintf file " %s [shape=box,label=\"%s %s\"]\n" ("c" ^ i)
  256 + (LCGstringOf.linear_term 0 t.c_sense)
257 257 (if t.c_name=Dot then "" else "โ€ž" ^ LCGstringOf.linear_term 0 t.c_name ^ "โ€"); (* FIXME *)
258   - if upper <> "" then
  258 + if upper <> "" then
259 259 if is_rev then fprintf file " %s -> %s\n" ("c" ^ i) upper
260 260 else fprintf file " %s -> %s\n" upper ("c" ^ i);
261 261 print_graph_rec file false ("c" ^ i) i t.c_relations
262 262 | Context t ->
263 263 fprintf file " %s [shape=Msquare,label=\"\"]\n" ("i" ^ i);
264   - if upper <> "" then
  264 + if upper <> "" then
265 265 if is_rev then fprintf file " %s -> %s\n" ("i" ^ i) upper
266 266 else fprintf file " %s -> %s\n" upper ("i" ^ i);
267 267 print_graph_rec file false ("i" ^ i) i t.cx_contents
268   - | SingleRelation(role) ->
  268 + | SingleRelation(role) ->
269 269 let id = get_single_rel_id () in
270 270 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role);
271 271 if upper <> "" then fprintf file " %s -> %s\n" upper id
272   - | Relation(role,role_attr,t) ->
  272 + | Relation(role,role_attr,t) ->
273 273 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
274 274 if upper <> "" then fprintf file " %s -> %s\n" upper i;
275 275 print_graph_rec file false i i t
276   - | RevRelation(role,role_attr,t) ->
  276 + | RevRelation(role,role_attr,t) ->
277 277 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
278 278 if upper <> "" then fprintf file " %s -> %s\n" i upper;
279 279 print_graph_rec file true i i t
280 280 | Tuple l -> Xlist.iter l (print_graph_rec file is_rev upper i)
281   - | Variant(e,l) ->
  281 + | Variant(e,l) ->
282 282 fprintf file " %s [shape=diamond]\n" e;
283 283 if upper <> "" then fprintf file " %s -> %s\n" upper e;
284 284 Xlist.iter l (fun (i2,t) -> print_graph_rec file false e ("x" ^ i ^ "y" ^ i2) t)
285 285 | Dot -> ()
286 286 | Ref i2 -> fprintf file " %s -> %d\n" upper i2
287 287 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)*)
288   -
289   -let print_graph path name references =
  288 +
  289 +let print_graph path name references =
290 290 single_rel_id_count := 0;
291   - File.file_out (path ^ name ^ ".gv") (fun file ->
  291 + File.file_out (path ^ name ^ ".gv") (fun file ->
292 292 fprintf file "digraph G {\n node [shape=record]\n";
293 293 Int.iter 0 (Array.length references - 1) (fun i -> print_graph_rec file (*false*) "" "" ("x" ^ string_of_int i) references.(i));
294   -(* Int.iter 0 (Array.length references - 1) (fun i ->
  294 +(* Int.iter 0 (Array.length references - 1) (fun i ->
295 295 match references.(i) with
296   - Node t ->
  296 + Node t ->
297 297 fprintf file " %d [label=\"%s\"]\n" i (string_of_node t);
298 298 let refs = get_refs [] t.args in
299 299 Xlist.iter refs (fun r ->
... ... @@ -304,31 +304,31 @@ let print_graph path name references =
304 304 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
305 305 Sys.chdir ".."
306 306  
307   -let id_counter = ref 0
308   -
  307 +let id_counter = ref 0
  308 +
309 309 let print_edge2 file edge_rev edge_label edge_head edge_tail upper id =
310 310 let edge_head,edge_tail,upper,id = if edge_rev then edge_tail,edge_head,id,upper else edge_head,edge_tail,upper,id in
311   - let l =
  311 + let l =
312 312 (if edge_label = "" then [] else ["label=\"" ^ edge_label ^ "\""]) @
313 313 (if edge_head = "" then [] else ["ltail=\"" ^ edge_head ^ "\""]) @
314 314 (if edge_tail = "" then [] else ["lhead=\"" ^ edge_tail ^ "\""]) in
315   - if upper <> 0 then
  315 + if upper <> 0 then
316 316 if l = [] then fprintf file " %d -> %d\n" upper id
317 317 else fprintf file " %d -> %d [%s]\n" upper id (String.concat "," l)
318   -
319   -let rec print_graph2_rec file edge_rev edge_label edge_head upper = function
320   - Node t ->
  318 +
  319 +let rec print_graph2_rec file edge_rev edge_label edge_head upper = function
  320 + Node t ->
321 321 let id = !id_counter in
322 322 incr id_counter;
323 323 fprintf file " %d [label=\"%s\"]\n" id (string_of_node t);
324 324 print_edge2 file edge_rev edge_label edge_head "" upper id;
325 325 print_graph2_rec file false "" "" id t.args
326   - | Concept t ->
  326 + | Concept t ->
327 327 let id = !id_counter in
328 328 incr id_counter;
329   - fprintf file " %d [shape=box,label=<%s%s %s>]\n" id
330   - (string_of_quant t.c_quant)
331   - (LCGstringOf.linear_term 0 t.c_sense)
  329 + fprintf file " %d [shape=box,label=<%s%s %s>]\n" id
  330 + (string_of_quant t.c_quant)
  331 + (LCGstringOf.linear_term 0 t.c_sense)
332 332 (if t.c_name=Dot then "" else "โ€ž" ^ LCGstringOf.linear_term 0 t.c_name ^ "โ€"); (* FIXME *)
333 333 print_edge2 file edge_rev edge_label edge_head "" upper id;
334 334 print_graph2_rec file false "" "" id t.c_relations
... ... @@ -338,64 +338,64 @@ let rec print_graph2_rec file edge_rev edge_label edge_head upper = function
338 338 if t.cx_sense = Dot then fprintf file " subgraph cluster%d {\nlabel=\"\"\n" id
339 339 else fprintf file " subgraph cluster%d {\nlabel=\"%s\"\n" id (LCGstringOf.linear_term 0 t.cx_sense);
340 340 print_graph2_rec file false "" "" 0 t.cx_contents;
341   - fprintf file " }\n";
  341 + fprintf file " }\n";
342 342 print_edge2 file edge_rev edge_label edge_head ("cluster" ^ string_of_int id) upper (id+1);
343 343 print_graph2_rec file false "" ("cluster" ^ string_of_int id) (id+1) t.cx_relations;
344   - | Relation(role,role_attr,t) ->
  344 + | Relation(role,role_attr,t) ->
345 345 let id = !id_counter in
346 346 incr id_counter;
347 347 fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
348 348 print_edge2 file false edge_label edge_head "" upper id;
349 349 print_graph2_rec file false "" "" id t
350   - | RevRelation(role,role_attr,t) ->
  350 + | RevRelation(role,role_attr,t) ->
351 351 let id = !id_counter in
352 352 incr id_counter;
353 353 fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
354 354 print_edge2 file true edge_label edge_head "" upper id;
355 355 print_graph2_rec file true "" "" id t
356   - | SingleRelation(role) ->
  356 + | SingleRelation(role) ->
357 357 let id = !id_counter in
358 358 incr id_counter;
359 359 fprintf file " %d [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role);
360 360 print_edge2 file false edge_label edge_head "" upper id
361   - | AddRelation(t,role,role_attr,s) ->
  361 + | AddRelation(t,role,role_attr,s) ->
362 362 let id = !id_counter in
363 363 incr id_counter;
364 364 fprintf file " %d [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr;
365 365 print_edge2 file edge_rev edge_label edge_head "" upper id;
366 366 print_graph2_rec file false "" "" id t;
367 367 print_graph2_rec file false "" "" id s
368   - | RemoveRelation t ->
  368 + | RemoveRelation t ->
369 369 let id = !id_counter in
370 370 incr id_counter;
371 371 fprintf file " %d [shape=circle,label=\"RemoveRelation\"]\n" id;
372 372 print_edge2 file edge_rev edge_label edge_head "" upper id;
373 373 print_graph2_rec file false "" "" id t
374   - | SetContextName(s,t) ->
  374 + | SetContextName(s,t) ->
375 375 let id = !id_counter in
376 376 incr id_counter;
377 377 fprintf file " %d [shape=circle,label=\"SetContextName\\n%s\"]\n" id s;
378 378 print_edge2 file edge_rev edge_label edge_head "" upper id;
379 379 print_graph2_rec file false "" "" id t;
380 380 | Tuple l -> Xlist.iter l (print_graph2_rec file edge_rev edge_label edge_head upper)
381   - | Variant(e,l) ->
  381 + | Variant(e,l) ->
382 382 let id = !id_counter in
383 383 incr id_counter;
384 384 fprintf file " %d [shape=diamond,label=\"%s\"]\n" id e;
385 385 print_edge2 file edge_rev edge_label edge_head "" upper id;
386 386 Xlist.iter l (fun (i,t) -> print_graph2_rec file edge_rev i "" id t)
387   - | Val s ->
  387 + | Val s ->
388 388 let id = !id_counter in
389 389 incr id_counter;
390 390 fprintf file " %d [shape=box,label=\"%s\"]\n" id s;
391 391 print_edge2 file edge_rev edge_label edge_head "" upper id
392 392 | Dot -> ()
393 393 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)
394   -
395   -let print_graph2 path name query t =
  394 +
  395 +let print_graph2 path name query t =
396 396 (* print_endline *)
397 397 id_counter := 1;
398   - File.file_out (path ^ name ^ ".gv") (fun file ->
  398 + File.file_out (path ^ name ^ ".gv") (fun file ->
399 399 fprintf file "digraph G {\n compound=true\n node [shape=record]\n";
400 400 print_graph2_rec file false "" "" 0 t;
401 401 fprintf file "label=\"%s\"\n }\n" query);
... ... @@ -407,11 +407,11 @@ let rec get_lemma = function
407 407 PreTypes.Interp orth -> orth
408 408 | PreTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat
409 409 | _ -> ""
410   -
411   -let print_paths path name paths =
412   - File.file_out (path ^ name ^ ".gv") (fun file ->
  410 +
  411 +let print_paths path name paths =
  412 + File.file_out (path ^ name ^ ".gv") (fun file ->
413 413 fprintf file "digraph G {\n";
414   - Array.iter (fun t ->
  414 + Array.iter (fun t ->
415 415 let lemma = get_lemma t.PreTypes.token in
416 416 if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.PreTypes.beg t.PreTypes.next t.PreTypes.orth lemma) paths;
417 417 fprintf file "}\n");
... ... @@ -419,39 +419,39 @@ let print_paths path name paths =
419 419 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
420 420 Sys.chdir ".."
421 421  
422   -let rec print_tree_rec2 file paths edge upper = function
  422 +let rec print_tree_rec2 file paths edge upper = function
423 423 Tuple l -> Xlist.iter l (print_tree_rec2 file paths edge upper)
424   - | Variant(e,l) ->
  424 + | Variant(e,l) ->
425 425 fprintf file " %s [shape=diamond]\n" e;
426 426 print_edge file edge upper e;
427 427 Xlist.iter l (fun (i,t) -> print_tree_rec2 file paths i e t)
428 428 | Dot -> ()
429 429 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
430 430 | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t)
431   -
432   -let rec print_tree_rec file paths edge upper id = function
433   - Node t ->
  431 +
  432 +let rec print_tree_rec file paths edge upper id = function
  433 + Node t ->
434 434 let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in
435 435 fprintf file " %s [label=\"%s\\n%s\\n%s:%s\\n%f\"]\n" id (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat t.weight;
436 436 print_edge file edge upper id;
437 437 print_tree_rec2 file paths "" id t.args
438   - | Variant(e,l) ->
  438 + | Variant(e,l) ->
439 439 fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e;
440 440 print_edge file edge upper id;
441 441 Xlist.iter l (fun (i,t) -> print_tree_rec file paths i id (id ^ "y" ^ i) t)
442   - | Choice choice ->
  442 + | Choice choice ->
443 443 fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id "";
444 444 print_edge file edge upper id;
445 445 StringMap.iter choice (fun ei t -> print_tree_rec file paths ei id (id ^ "b" ^ ei) t)
446 446 | Dot -> ()
447 447 | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t)
448   -
449   -let print_tree path name paths references =
450   - File.file_out (path ^ name ^ ".gv") (fun file ->
  448 +
  449 +let print_tree path name paths references =
  450 + File.file_out (path ^ name ^ ".gv") (fun file ->
451 451 fprintf file "digraph G {\n node [shape=box]\n";
452 452 Int.iter 0 (Array.length references - 1) (fun i -> print_tree_rec file paths "" "" ("x" ^ string_of_int i) references.(i));
453 453 (* match references.(i) with
454   - Node t ->
  454 + Node t ->
455 455 let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in
456 456 fprintf file " %d [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat;
457 457 let refs = get_refs [] t.args in
... ... @@ -463,13 +463,13 @@ let print_tree path name paths references =
463 463 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
464 464 Sys.chdir ".."
465 465  
466   -(*let print_tree filename paths references =
467   - File.file_out filename (fun file ->
  466 +(*let print_tree filename paths references =
  467 + File.file_out filename (fun file ->
468 468 fprintf file "digraph G {\n";
469 469 let set = Xlist.fold paths IntSet.empty (fun set t ->
470 470 IntSet.add (IntSet.add set t.PreTypes.beg) t.PreTypes.next) in
471 471 IntSet.iter set (fun i -> fprintf file " %d [width=0; height=0; label=\"\"]\n" i);
472   - Xlist.iter paths (fun t ->
  472 + Xlist.iter paths (fun t ->
473 473 let lemma = get_lemma t.PreTypes.token in
474 474 if lemma <> "" then (
475 475 let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in
... ... @@ -480,8 +480,8 @@ let print_tree path name paths references =
480 480 ignore (Sys.command "dot -Tpng tree.gv -o tree.png");
481 481 Sys.chdir ".."*)
482 482  
483   -(*let print_tree filename paths references =
484   - File.file_out filename (fun file ->
  483 +(*let print_tree filename paths references =
  484 + File.file_out filename (fun file ->
485 485 fprintf file "digraph G {\n";
486 486 fprintf file " subgraph {\n ordering=out\n";
487 487 let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.PreTypes.beg t.PreTypes.beg)) [] (fun same t ->
... ... @@ -494,9 +494,9 @@ let print_tree path name paths references =
494 494 else same) in
495 495 fprintf file " }\n";
496 496 fprintf file " { rank = same; %s }\n" (String.concat "; " (Xlist.map same (fun i -> sprintf "\"i%d\"" i)));
497   - Int.iter 0 (Array.length references - 1) (fun i ->
  497 + Int.iter 0 (Array.length references - 1) (fun i ->
498 498 match references.(i) with
499   - Node t ->
  499 + Node t ->
500 500 fprintf file " %d [label=\"%s\"]\n" i t.pred;
501 501 fprintf file " %d -> i%d\n" i t.id;
502 502 let refs = get_refs [] t.args in
... ... @@ -508,9 +508,9 @@ let print_tree path name paths references =
508 508 ignore (Sys.command "dot -Tpng tree.gv -o tree.png");
509 509 Sys.chdir ".."*)
510 510  
511   -(*let rec schema_latex schema =
512   - "\\begin{tabular}{l}" ^
513   - String.concat "\\\\" (Xlist.map schema (fun s ->
  511 +let rec schema_latex schema =
  512 + "\\begin{tabular}{l}" ^
  513 + String.concat "\\\\" (Xlist.map schema (fun s ->
514 514 LatexMain.escape_string (String.concat "," (
515 515 (if s.WalTypes.gf = WalTypes.ARG then [] else [WalStringOf.gf s.WalTypes.gf])@
516 516 (if s.WalTypes.role = "" then [] else [s.WalTypes.role])@
... ... @@ -519,32 +519,32 @@ let print_tree path name paths references =
519 519 "\\end{tabular}"
520 520  
521 521 let fnum_frame_latex = function
522   - fnum,WalTypes.Frame(atrs,s) ->
  522 + fnum,WalTypes.Frame(atrs,s) ->
523 523 Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (WalStringOf.frame_atrs atrs)) (schema_latex s)
524   - | fnum,WalTypes.LexFrame(id,p,r,s) ->
  524 + | fnum,WalTypes.LexFrame(id,p,r,s) ->
525 525 Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s)
526   - | fnum,WalTypes.ComprepFrame(le,p,r,s) ->
  526 + | fnum,WalTypes.ComprepFrame(le,p,r,s) ->
527 527 Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s)
528   -
  528 +
529 529 let print_paths_latex name paths =
530 530 LatexMain.latex_file_out "results/" name "a0" false (fun file ->
531 531 fprintf file "\\begin{longtable}{|l|l|l|l|l|l|l|p{4cm}|l|l|l|l|}\n\\hline\north & beg & len & next & token & id & weight & attrs & lroles & senses & simple valence & valence\\\\\n";
532 532 Int.iter 0 (Array.length paths - 1) (fun i ->
533 533 let t = paths.(i) in
534   - fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n"
  534 + fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n"
535 535 t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (LatexMain.escape_string (string_of_token t.PreTypes.token)) t.PreTypes.id t.PreTypes.weight
536   - (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles)
  536 + (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles)
537 537 (String.concat "\\\\\n" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero))))
538   - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x)))
  538 + (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x)))
539 539 (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.valence (fun x -> fnum_frame_latex x))));
540 540 fprintf file "\\end{longtable}");
541   - LatexMain.latex_compile_and_clean "results/" name *)
542   -
  541 + LatexMain.latex_compile_and_clean "results/" name
  542 +
543 543 let print_mml path name mml =
544 544 File.file_out (path ^ name ^ ".mml") (fun file ->
545 545 fprintf file "<!DOCTYPE math PUBLIC \"-//W3C//DTD MathML 2.0//EN\" \"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd\">\n";
546 546 fprintf file "%s\n" (Xml.to_string_fmt mml))
547   -
  547 +
548 548 let page_header path =
549 549 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">
550 550 <html>
... ... @@ -556,11 +556,11 @@ let page_header path =
556 556  
557 557 <body>
558 558 <center>
559   - <h1>ENIAM: Kategorialny Parser Skล‚adniowo-Semantyczny</h1>
560   - <h3>Podaj tekst:</h3>
561   - <form method=POST action=\"" ^ path ^ "parser.cgi\">
562   - <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p>
563   - <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p>
  559 + <h1>ENIAM: Kategorialny Parser Skล‚adniowo-Semantyczny</h1>
  560 + <h3>Podaj tekst:</h3>
  561 + <form method=POST action=\"" ^ path ^ "parser.cgi\">
  562 + <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p>
  563 + <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p>
564 564 </form>"
565 565  
566 566 let page_trailer =
... ... @@ -569,25 +569,25 @@ let page_trailer =
569 569 Copyright &copy; 2016 Institute of Computer Science Polish Academy of Sciences<BR>
570 570 </center>
571 571 </body>
572   -</html>"
  572 +</html>"
573 573  
574 574 let print_webpage file cg_bin_path html_path id query n max_n mml =
575 575 fprintf file "%s\n" (page_header cg_bin_path);
576 576 fprintf file "\n<H3>%s</H3>\n" query;
577   - fprintf file "<P>%s %s\n"
  577 + fprintf file "<P>%s %s\n"
578 578 (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1))
579 579 (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Nastฤ™pna interpretacja</A>" html_path id (n+1));
580 580 fprintf file "<P><IMG SRC=\"%stree%s_%d.png\">\n" html_path id n;
581 581 fprintf file "<P>%s\n" (Xml.to_string_fmt mml);
582 582 fprintf file "<P><A HREF=\"%stree%s_%d.xml\">Graf w formacie XML</A>\n" html_path id n;
583 583 fprintf file "<P><A HREF=\"%sformula%s_%d.mml\">Formuล‚a w formacie MathML</A>\n" html_path id n;
584   - fprintf file "<P>%s %s\n"
  584 + fprintf file "<P>%s %s\n"
585 585 (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1))
586 586 (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Nastฤ™pna interpretacja</A>" html_path id (n+1));
587 587 fprintf file "%s\n" page_trailer
588 588  
589   -open ExecTypes
590   -
  589 +open ExecTypes
  590 +
591 591 let generate_status_message result = function
592 592 Idle -> "Server error: " ^ result.msg
593 593 | PreprocessingError -> "Error during preprocessing: " ^ result.msg
... ... @@ -602,13 +602,8 @@ let generate_status_message result = function
602 602 | NotTranslated -> "Unable to generate logical form"
603 603 | Parsed -> "parsed"
604 604  
605   -let print_other_result file cg_bin_path query result =
  605 +let print_other_result file cg_bin_path query result =
606 606 fprintf file "%s\n" (page_header cg_bin_path);
607 607 fprintf file "\n<H3>%s</H3>\n" query;
608 608 fprintf file "\n<P>%s\n" (generate_status_message result result.status);
609 609 fprintf file "%s\n" page_trailer
610   -
611   -
612   -
613   -
614   -
615 610 \ No newline at end of file
... ...
pre/inflexion.ml deleted
1   -(*
2   - * ENIAM: Categorial Syntactic-Semantic Parser for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This program is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This program is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open Xstd
21   -open Printf
22   -
23   -let load_alt filename =
24   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
25   - List.rev (Xlist.fold l [] (fun l line ->
26   - if String.length line = 0 then l else
27   - if String.get line 0 = '#' then l else
28   - match Str.split_delim (Str.regexp "\t") line with
29   - [orth; lemma; interp] -> (orth,lemma,interp) :: l
30   - | _ -> failwith ("load_alt: " ^ line)))
31   -
32   -let load_dict filename =
33   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
34   - List.rev (Xlist.fold l [] (fun l line ->
35   - if String.length line = 0 then l else
36   - if String.get line 0 = '#' then l else
37   - match Str.split_delim (Str.regexp "\t") line with
38   - [stem; lemma_suf2; rule_names] -> (stem,lemma_suf2,Str.split (Str.regexp " ") rule_names) :: l
39   - | _ -> failwith ("load_dict: " ^ line)))
40   -
41   -let load_rules filename =
42   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
43   - List.rev (Xlist.fold l [] (fun l line ->
44   - if String.length line = 0 then l else
45   - if String.get line 0 = '#' then l else
46   - match Str.split_delim (Str.regexp "\t") line with
47   - [rule_name; quantity; lemma_suf; orth_suf; interp] -> (rule_name,int_of_string quantity,lemma_suf,orth_suf,interp) :: l
48   - | _ -> failwith ("load_rules: " ^ line)))
49   -
50   -let make_rules_map rules =
51   - Xlist.fold rules StringMap.empty (fun rules (rule_name,quantity,lemma_suf,orth_suf,interp) ->
52   - let rules2 = try StringMap.find rules orth_suf with Not_found -> StringMap.empty in
53   - let rules2 = StringMap.add rules2 rule_name (lemma_suf,interp) in
54   - StringMap.add rules orth_suf rules2)
55   -
56   -module OrderedChar = struct
57   -
58   - type t = char
59   -
60   - let compare = compare
61   -
62   -end
63   -
64   -module CharMap = Xmap.Make(OrderedChar)
65   -
66   -type char_tree = N of char_tree CharMap.t * (string * string list) list * (string * string) list
67   - (* nastฤ™pne moลผliwoล›ci * (lemma_suf2 * lista reguล‚) list * lista alt *)
68   -
69   -type char_tree_suf = M of char_tree_suf CharMap.t * (string * int * string * string) list
70   - (* nastฤ™pne moลผliwoล›ci * (rule_name * lemma_suf * interp) list *)
71   -
72   -let empty_char_tree = N(CharMap.empty,[],[])
73   -let empty_char_tree_suf = M(CharMap.empty,[])
74   -
75   -let rec add_path_dict stem i n lemma_suf2 rule_names (N(map,rules,alts)) =
76   - if i = n then N(map,(lemma_suf2,rule_names) :: rules,alts) else
77   - let tree = try CharMap.find map (String.get stem i) with Not_found -> empty_char_tree in
78   - let tree = add_path_dict stem (i+1) n lemma_suf2 rule_names tree in
79   - N(CharMap.add map (String.get stem i) tree,rules,alts)
80   -
81   -let rec add_path_alt stem i n lemma interp (N(map,rules,alts)) =
82   - if i = n then N(map,rules,(lemma,interp) :: alts) else
83   - let tree = try CharMap.find map (String.get stem i) with Not_found -> empty_char_tree in
84   - let tree = add_path_alt stem (i+1) n lemma interp tree in
85   - N(CharMap.add map (String.get stem i) tree,rules,alts)
86   -
87   -let make_char_tree dict alt =
88   - let tree = Xlist.fold dict empty_char_tree (fun tree (stem,lemma_suf2,rule_names) ->
89   - add_path_dict stem 0 (String.length stem) lemma_suf2 rule_names tree) in
90   - Xlist.fold alt tree (fun tree (orth,lemma,interp) ->
91   - add_path_alt orth 0 (String.length orth) lemma interp tree)
92   -
93   -let rec add_path_rules rule_name quantity orth_suf i lemma_suf interp (M(map,rules)) =
94   - if i = -1 then M(map,(rule_name,quantity,lemma_suf,interp) :: rules) else
95   - let tree = try CharMap.find map (String.get orth_suf i) with Not_found -> empty_char_tree_suf in
96   - let tree = add_path_rules rule_name quantity orth_suf (i-1) lemma_suf interp tree in
97   - M(CharMap.add map (String.get orth_suf i) tree,rules)
98   -
99   -let make_char_tree_suf rules =
100   - let tree = Xlist.fold rules empty_char_tree_suf (fun tree (rule_name,quantity,lemma_suf,orth_suf,interp) ->
101   - add_path_rules rule_name quantity orth_suf (String.length orth_suf - 1) lemma_suf interp tree) in
102   - tree
103   -
104   -let rec find_char_tree_rec i n orth (N(map,rules,alts)) =
105   - if i = n then [orth,"",rules,alts] else
106   - let l = try find_char_tree_rec (i+1) n orth (CharMap.find map (String.get orth i)) with Not_found -> [] in
107   - (String.sub orth 0 i,String.sub orth i (n-i),rules,[]) :: l
108   -
109   -let find_char_tree tree rules orth =
110   - let l = find_char_tree_rec 0 (String.length orth) orth tree in
111   - Xlist.fold l [] (fun found (stem,suf,rule_names,alts) ->
112   - let rules2 = try StringMap.find rules suf with Not_found -> StringMap.empty in
113   - let found = alts @ found in
114   - Xlist.fold rule_names found (fun found (lemma_suf2,rule_names2) ->
115   - Xlist.fold rule_names2 found (fun found rule_name ->
116   - try
117   - let lemma_suf,interp = StringMap.find rules2 rule_name in
118   - let lemma = if lemma_suf2 = "" then stem ^ lemma_suf else stem ^ lemma_suf ^ ":" ^ lemma_suf2 in
119   - (lemma,interp) :: found
120   - with Not_found -> found)))
121   -
122   -let rec find_char_tree_suf_rec i orth (M(map,rules)) =
123   - if i = 0 then Xlist.fold rules [] (fun l (rule_name,quantity,lemma_suf,interp) -> ("", rule_name, quantity, lemma_suf, interp, i) :: l) else
124   - let l = try find_char_tree_suf_rec (i-1) orth (CharMap.find map (String.get orth (i-1))) with Not_found -> [] in
125   - Xlist.fold rules l (fun l (rule_name, quantity, lemma_suf,interp) ->
126   - (String.sub orth 0 i, rule_name, quantity, String.sub orth 0 i ^ lemma_suf, interp, i) :: l)
127   -
128   -let find_char_tree_suf rules_tree stem_map alt_map orth =
129   - let alt_l = Xlist.rev_map (try StringMap.find alt_map orth with Not_found -> []) (fun (lemma,interp) -> lemma,interp,1,[]) in
130   - let l = find_char_tree_suf_rec (String.length orth) orth rules_tree in
131   - let found = Xlist.fold l alt_l (fun found (stem,rule_name,quantity,lemma,interp,i) ->
132   - try
133   - let rule_names = StringMap.find stem_map stem in
134   - if StringSet.mem rule_names rule_name then (lemma,interp,1,[]) :: found else found (* FIXME: czy na pewno nie dodawaฤ‡ reguล‚ niepasujฤ…cych? to powoduje ลผe lemat tak samo brzmiฤ…cy a majฤ…cy innฤ… odmianฤ™ nie zostanie rozpoznany *)
135   - with Not_found -> if quantity < 100 || (String.length orth = i && stem = lemma) then found else (lemma,interp,quantity,["lemma not validated"]) :: found) in (* FIXME: uciฤ™cie ลผadkich reguล‚ powinno byฤ‡ inaczej sterowane *)
136   -(* if found = [] then [orth,"unk",1,["token not found"]] else *)
137   - let found = (orth,"unk",1,["token not found"]) :: found in
138   - let valid = Xlist.fold found [] (fun valid -> function
139   - lemma,interp,quantity,[] -> (lemma,interp,quantity,[]) :: valid
140   - | _ -> valid) in
141   - if valid = [] then found else valid
142   -
143   -let prepare_inflexion alt_filename dict_filename rules_filename =
144   - let alt = load_alt alt_filename in
145   - let dict = load_dict dict_filename in
146   - let rules = load_rules rules_filename in
147   - let tree = make_char_tree dict alt in
148   - let rules = make_rules_map rules in
149   - tree,rules
150   -
151   -let tree,rules =
152   -(* prepare_inflexion (morfeusz_path ^ Paths.alt_adj) (morfeusz_path ^ Paths.dict_adj) (morfeusz_path ^ Paths.rules_adj) *)
153   -(* prepare_inflexion (morfeusz_path ^ Paths.alt_all) (morfeusz_path ^ Paths.dict_all) (morfeusz_path ^ Paths.rules_all) *)
154   - empty_char_tree,StringMap.empty
155   -
156   -let make_alt_map alt =
157   - Xlist.fold alt StringMap.empty (fun alt_map (orth,lemma,interp) ->
158   - StringMap.add_inc alt_map orth [lemma,interp] (fun l -> (lemma,interp) :: l))
159   -
160   -let prepare_inflexion_suf alt_filename dict_filename rules_filename =
161   - let alt = load_alt alt_filename in
162   - let rules = load_rules rules_filename in
163   - let rules_tree = make_char_tree_suf rules in
164   - let alt_map = make_alt_map alt in
165   - let dict = load_dict dict_filename in
166   - let stem_map = Xlist.fold dict StringMap.empty (fun stem_map (stem,lemma_suf2,rule_names) ->
167   - StringMap.add_inc stem_map stem (StringSet.of_list rule_names) (fun set -> Xlist.fold rule_names set StringSet.add)) in
168   - alt_map,rules_tree,stem_map
169   -
170   -let alt_map,rules_tree,stem_map =
171   - prepare_inflexion_suf (Paths.sgjp_path ^ Paths.alt_all) (Paths.sgjp_path ^ Paths.dict_all) (Paths.sgjp_path ^ Paths.rules_all)
172   -
173   -let check_prefix pat s =
174   - let n = String.length pat in
175   - if n > String.length s then false else
176   - String.sub s 0 n = pat
177   -
178   -let cut_prefix pat s =
179   - let i = String.length pat in
180   - let n = String.length s in
181   - if i >= n then "" else
182   - try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i)
183   -
184   -let check_sufix pat s =
185   - let n = String.length pat in
186   - let m = String.length s in
187   - if n > m then false else
188   - String.sub s (m-n) n = pat
189   -
190   -let cut_sufix pat s =
191   - let i = String.length pat in
192   - let n = String.length s in
193   - try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s)
194   -
195   -let rec select_interp_sufix pat = function
196   - [] -> []
197   - | (lemma,interp) :: l -> if check_sufix pat interp then (lemma,interp) :: (select_interp_sufix pat l) else select_interp_sufix pat l
198   -
199   -let rec select_interp_sufix_suf pat = function
200   - [] -> []
201   - | (lemma,interp,quantity,attrs) :: l -> if check_sufix pat interp then (lemma,interp,quantity,attrs) :: (select_interp_sufix_suf pat l) else select_interp_sufix_suf pat l
202   -
203   -let rec remove_interp_sufix pat = function
204   - [] -> []
205   - | (lemma,interp) :: l -> if check_sufix pat interp then remove_interp_sufix pat l else (lemma,interp) :: (remove_interp_sufix pat l)
206   -
207   -let rec remove_interp_sufix_suf pat = function
208   - [] -> []
209   - | (lemma,interp,quantity,attrs) :: l ->
210   - if interp = "adv:sup" then (lemma,interp,quantity,attrs) :: (remove_interp_sufix_suf pat l) else (* FIXME: zaล›lepka, wymaga poprawienia algorytmu generowania sล‚ownikรณw *)
211   - if check_sufix pat interp then remove_interp_sufix_suf pat l else (lemma,interp,quantity,attrs) :: (remove_interp_sufix_suf pat l)
212   -
213   -let get_interpretations orth =
214   - (if check_prefix "naj" orth then select_interp_sufix ":sup" (find_char_tree tree rules (cut_prefix "naj" orth)) else []) @
215   - (if check_prefix "nie" orth then select_interp_sufix ":neg" (find_char_tree tree rules (cut_prefix "nie" orth)) else []) @
216   - (remove_interp_sufix ":neg" (remove_interp_sufix ":sup" (find_char_tree tree rules orth)))
217   -
218   -let get_interpretations_suf orth = (* FIXME: nie dziaล‚a dla adv:sup pisanych z wielkiej litery np Najdoskonalej Najlepiej *)
219   - if orth = "siebie" then ["siebie","siebie:acc.gen",1,[]] else
220   - if orth = "sobie" then ["siebie","siebie:dat.loc",1,[]] else
221   - if orth = "sobฤ…" then ["siebie","siebie:inst",1,[]] else
222   - (if check_prefix "naj" orth then select_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "naj" orth)) else []) @
223   - (if check_prefix "nie" orth then select_interp_sufix_suf ":neg" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "nie" orth)) else []) @
224   - (if check_prefix "Naj" orth then select_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "Naj" orth)) else []) @
225   - (if check_prefix "Nie" orth then select_interp_sufix_suf ":neg" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "Nie" orth)) else []) @
226   - (remove_interp_sufix_suf ":neg" (remove_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map orth)))
227   -
228   -(* Testy *)
229   -
230   -let print_interpretations l =
231   - Xlist.iter (Xlist.sort l compare) (fun (lemma,interp) ->
232   - printf "%s\t%s\n" lemma interp)
233   -
234   -(*let _ =
235   - let l = get_interpretations "ลผyczliwฤ…" in
236   - print_interpretations l;
237   - let l = get_interpretations "ลผyลบniejszego" in
238   - print_interpretations l;
239   - let l = get_interpretations "zwiล›li" in
240   - print_interpretations l;
241   - let l = get_interpretations "najzieleล„sza" in
242   - print_interpretations l;
243   - let l = get_interpretations "najtandetniejsza" in
244   - print_interpretations l;
245   - let l = get_interpretations "nieลผelazny" in
246   - print_interpretations l;
247   - ()*)
248   -
249   -(*let sgjp_filename = "sgjp-20151020.tab"
250   -let polimorf_filename = "polimorf-20151020.tab"
251   -
252   -let _ =
253   - let interp_sel = Morf.load_interp_sel "data/interps.tab" in
254   - print_endline "loading sgjp";
255   - let sgjp = Morf.load_tab (morfeusz_path ^ sgjp_filename) in
256   - print_endline "loading polimorf";
257   - let polimorf = Morf.load_tab (morfeusz_path ^ polimorf_filename) in
258   - print_endline "merging";
259   - let dicts = Morf.merge_dicts [sgjp;polimorf] in
260   - let adj_interp_sel = StringMap.find interp_sel "adj" in
261   - let adj_sup_interp_sel = StringMap.find interp_sel "adj-sup" in
262   -(* let dicts = Morf.remove_prefix dicts "naj" adj_sup_interp_sel in *)
263   - print_endline "preparing queries";
264   - let queries = StringMap.fold dicts StringMap.empty (fun queries lemma interps ->
265   - let interps = Morf.select_interps interps (adj_interp_sel @ adj_sup_interp_sel) in
266   - StringMap.fold interps queries (fun queries interp orths ->
267   - Xlist.fold orths queries (fun queries orth ->
268   - let s = lemma ^ "\t" ^ interp in
269   - StringMap.add_inc queries orth (StringSet.singleton s) (fun set -> StringSet.add set s)))) in
270   - print_endline "testing";
271   - StringMap.iter queries (fun orth set ->
272   - let set = Xlist.fold (get_interpretations orth) set (fun set (lemma,interp) ->
273   - let s = lemma ^ "\t" ^ interp in
274   - if StringSet.mem set s then StringSet.remove set s else (
275   - printf "excessing interpretation: %s\t%s" orth s;
276   - set)) in
277   - if StringSet.is_empty set then () else
278   - StringSet.iter set (fun s ->
279   - printf "lacking interpretation: %s\t%s" orth s))*)
280   -
281   -
282   -
283   -
284   -
285   -
286   -
287   -
288   -
289   -
290   -
291   -
292   -
293   -
294   -
295   -
296   -
297   -
298   -
299   -
300   -
301   -
302 0 \ No newline at end of file
pre/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology
5 5 OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa inflexion.cmxa
7 7 INSTALLDIR=`ocamlc -where`
8 8  
9 9 WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml
10   -PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml morf.ml inflexion.ml prePaths.ml preMWE.ml preSemantics.ml preProcessing.ml
11   -
12   -all:
  10 +PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preProcessing.ml
  11 +
  12 +all:
13 13 $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE)
14 14  
15 15 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
... ...
pre/morf.ml deleted
1   -(*
2   - * ENIAM: Categorial Syntactic-Semantic Parser for Polish
3   - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This program is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This program is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open Xstd
21   -
22   -let load_tab filename =
23   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
24   - List.rev (Xlist.fold l [] (fun l line ->
25   - if String.length line = 0 then l else
26   - if String.get line 0 = '#' then l else
27   - match Str.split (Str.regexp "\t") line with
28   - orth :: lemma :: interp :: _ -> (orth,lemma,interp) :: l
29   - | _ -> failwith ("load_tab: " ^ line)))
30   -
31   -let load_tab_full filename =
32   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
33   - List.rev (Xlist.fold l [] (fun l line ->
34   - if String.length line = 0 then l else
35   - if String.get line 0 = '#' then l else
36   - match Str.split (Str.regexp "\t") line with
37   - [orth; lemma; interp] -> (orth,lemma,interp,"","") :: l
38   - | [orth; lemma; interp; cl] -> (orth,lemma,interp,cl,"") :: l
39   - | [orth; lemma; interp; cl; cl2] -> (orth,lemma,interp,cl,cl2) :: l
40   -(* | orth :: lemma :: interp :: cl :: cl2 -> (orth,lemma,interp,cl,String.concat ";" cl2) :: l *)
41   - | _ -> failwith ("load_tab_full: " ^ line)))
42   -
43   -let merge_dicts l =
44   - Xlist.fold l StringMap.empty (fun dicts tab ->
45   - Xlist.fold tab dicts (fun dicts (orth,lemma,interp) ->
46   - let interps = try StringMap.find dicts lemma with Not_found -> StringMap.empty in
47   - let interps = StringMap.add_inc interps interp [orth] (fun orths ->
48   - if Xlist.mem orths orth then orths else orth :: orths) in
49   - StringMap.add dicts lemma interps))
50   -
51   -let load_interp_sel filename =
52   - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in
53   - Xlist.fold l StringMap.empty (fun interp_sel line ->
54   - if String.length line = 0 then interp_sel else
55   - if String.get line 0 = '#' then interp_sel else
56   - match Str.split (Str.regexp "\t") line with
57   - [group;interp;label] -> StringMap.add_inc interp_sel group [interp,label] (fun l -> (interp,label) :: l)
58   - | _ -> failwith ("load_interp_sel: " ^ line))
59   -
60   -
61   -let rec merge_digraph = function
62   - [] -> []
63   - | "c" :: "h" :: l -> "ch" :: (merge_digraph l)
64   - | "c" :: "z" :: l -> "cz" :: (merge_digraph l)
65   - | "d" :: "z" :: l -> "dz" :: (merge_digraph l)
66   - | "d" :: "ลบ" :: l -> "dลบ" :: (merge_digraph l)
67   - | "d" :: "ลผ" :: l -> "dลผ" :: (merge_digraph l)
68   - | "r" :: "z" :: l -> "rz" :: (merge_digraph l)
69   - | "s" :: "z" :: l -> "sz" :: (merge_digraph l)
70   - | "b" :: "'" :: l -> "b'" :: (merge_digraph l)
71   - | "f" :: "'" :: l -> "f'" :: (merge_digraph l)
72   - | s :: l -> s :: (merge_digraph l)
73   -
74   -let text_to_chars s = Xunicode.classified_chars_of_utf8_string s
75   -(* (try UTF8.validate s with UTF8.Malformed_code -> failwith ("Invalid UTF8 string: " ^ s));
76   - let r = ref [] in
77   - UTF8.iter (fun c ->
78   - r := (UTF8.init 1 (fun _ -> c)) :: (!r)) s;
79   - merge_digraph (List.rev (!r))*)
80   -
81   -
82   -let check_prefix pat s =
83   - let n = String.length pat in
84   - if n > String.length s then false else
85   - String.sub s 0 n = pat
86   -
87   -let cut_prefix pat s =
88   - let i = String.length pat in
89   - let n = String.length s in
90   - if i >= n then "" else
91   - try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i)
92   -
93   -let check_sufix pat s =
94   - let n = String.length pat in
95   - let m = String.length s in
96   - if n > m then false else
97   - String.sub s (m-n) n = pat
98   -
99   -let cut_sufix pat s =
100   - let i = String.length pat in
101   - let n = String.length s in
102   - try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s)
103   -
104   -let apply_transform orth (s,t) =
105   - if check_sufix s orth then cut_sufix s orth ^ t else raise Not_found
106   -
107   -let split_colon s =
108   - match Str.split_delim (Str.regexp ":") s with
109   - [s] -> s, ""
110   - | [s;t] -> s, t
111   - | _ -> failwith "split_colon"
112   -
113   -let get_cat s =
114   - match Str.split_delim (Str.regexp ":") s with
115   - cat :: _ -> cat
116   - | _ -> failwith "get_cat"
117   -
118   -let select_interps interps interp_sel =
119   - Xlist.fold interp_sel StringMap.empty (fun new_interps (interp,_) ->
120   - try
121   - StringMap.add new_interps interp (StringMap.find interps interp)
122   - with Not_found -> new_interps)
pre/prePaths.ml
... ... @@ -20,17 +20,17 @@
20 20 open Xstd
21 21 open PreTypes
22 22  
23   -let to_string (paths,last) =
  23 +let to_string (paths,last) =
24 24 String.concat "\n" (Xlist.map paths (fun t -> PreTokenizer.string_of_tokens 0 (Token t)))
25 25 ^ Printf.sprintf "\nlast=%d" last
26 26  
27   -let to_string_indexed (paths,last) =
28   - String.concat "\n" (Xlist.map paths (fun (i,j,t) ->
  27 +let to_string_indexed (paths,last) =
  28 + String.concat "\n" (Xlist.map paths (fun (i,j,t) ->
29 29 Printf.sprintf "%2d %2d %s" i j (PreTokenizer.string_of_tokens 0 (Token t))))
30 30 ^ Printf.sprintf "\nlast=%d" last
31 31  
32 32 (*let indexed_token_record_to_xml i j t =
33   - let lemma,pos,tags =
  33 + let lemma,pos,tags =
34 34 match t.token with
35 35 Lemma(lemma,pos,tags) -> lemma,pos,tags
36 36 | _ -> failwith "indexed_token_record_to_xml" in
... ... @@ -46,11 +46,11 @@ let to_string_indexed (paths,last) =
46 46 Xlist.map t.senses (fun (sense,hipero,weight) ->
47 47 Xml.Element("sense",["name",sense;"weight",string_of_float weight],
48 48 Xlist.map hipero (fun s -> Xml.Element("hipero",[],[Xml.PCData s])))))])
49   -
50   -let to_xml (paths,last) =
  49 +
  50 +let to_xml (paths,last) =
51 51 Xml.Element("paths",["last",string_of_int last],
52 52 Xlist.map paths (fun (i,j,t) -> indexed_token_record_to_xml i j t)) *)
53   -
  53 +
54 54 let compare_token_record p r =
55 55 let v = compare p.beg r.beg in
56 56 if v <> 0 then v else
... ... @@ -58,30 +58,30 @@ let compare_token_record p r =
58 58 if v <> 0 then v else
59 59 compare p r
60 60  
61   -let sort (paths,last) =
  61 +let sort (paths,last) =
62 62 Xlist.sort paths compare_token_record, last
63 63  
64 64 let rec uniq_rec rev = function
65 65 [] -> List.rev rev
66 66 | [p] -> List.rev (p :: rev)
67 67 | p :: r :: l -> if p = r then uniq_rec rev (r :: l) else uniq_rec (p :: rev) (r :: l)
68   -
69   -let uniq (paths,last) =
  68 +
  69 +let uniq (paths,last) =
70 70 uniq_rec [] paths, last
71   -
  71 +
72 72 let rec translate_into_paths_rec paths = function
73 73 Token t -> t :: paths
74 74 | Seq l -> Xlist.fold l paths translate_into_paths_rec
75 75 | Variant l -> Xlist.fold l paths translate_into_paths_rec
76   -
77   -let translate_into_paths tokens =
78   - let paths = Xlist.fold tokens [] (fun paths token ->
  76 +
  77 +let translate_into_paths tokens =
  78 + let paths = Xlist.fold tokens [] (fun paths token ->
79 79 translate_into_paths_rec paths token) in
80 80 let last = if paths = [] then 0 else (List.hd paths).next in
81 81 let paths = sort (paths,last) in
82 82 let paths = uniq paths in
83   - paths
84   -
  83 + paths
  84 +
85 85 (**********************************************************************************)
86 86  
87 87 let excluded_interps = StringSet.of_list [
... ... @@ -214,7 +214,7 @@ let transformed_interps = Xlist.fold [
214 214 "praet:sg:n1.n2:imperf.perf:nagl","praet:sg:n1.n2:imperf.perf";
215 215 "praet:sg:n1.n2:imperf:nagl","praet:sg:n1.n2:imperf";
216 216 ] StringMap.empty (fun map (k,v) -> StringMap.add map k v)
217   -
  217 +
218 218 let merge_lemmata l =
219 219 let map = Xlist.fold l StringMap.empty (fun map (lemma,interp,quantity,attrs) ->
220 220 let interp = if interp = "num:comp" then "numc" else interp in
... ... @@ -229,88 +229,88 @@ let merge_lemmata l =
229 229 StringMap.add_inc map pos [tags] (fun l -> tags :: l))),
230 230 max 1 (quantity / Xlist.size interps),
231 231 attrs) in
232   - StringMap.fold map [] (fun l _ (lemma,map,quantity,attrs) ->
  232 + StringMap.fold map [] (fun l _ (lemma,map,quantity,attrs) ->
233 233 StringMap.fold map l (fun l cat interp ->
234 234 (lemma,cat,interp,quantity,attrs) :: l))
235 235  
236 236 let uppercase lemma cl ll =
237 237 let n = String.length lemma in
238 238 let nll = String.length ll in
239   - cl ^ String.sub lemma nll (n - nll)
240   -
241   -let quant_mod quantity =
  239 + cl ^ String.sub lemma nll (n - nll)
  240 +
  241 +let quant_mod quantity =
242 242 log10 (float quantity)
243   -
244   -let lemmatize_token = function
245   - | {token=AllSmall s} as t ->
246   - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) ->
  243 +
  244 +let lemmatize_token = function
  245 + | {token=AllSmall s} as t ->
  246 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
247 247 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs}))
248   - | {token=SmallLetter s} as t ->
249   - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) ->
  248 + | {token=SmallLetter s} as t ->
  249 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
250 250 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs}))
251   - | {token=FirstCap(s,lower,cl,ll)} as t ->
252   - let l = Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) ->
  251 + | {token=FirstCap(s,lower,cl,ll)} as t ->
  252 + let l = Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
253 253 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}) in
254 254 let b = Xlist.fold l false (fun b t -> if Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found" then b else true) in
255   - if b then t :: l else
256   - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf lower)) (fun (lemma,cat,interp,quantity,attrs) ->
  255 + if b then t :: l else
  256 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations lower)) (fun (lemma,cat,interp,quantity,attrs) ->
257 257 {t with token=Lemma(uppercase lemma cl ll,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: "lemmatized as lowercase" :: attrs @ t.attrs}))
258   - | {token=AllCap(s,_,_)} as t ->
259   - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) ->
  258 + | {token=AllCap(s,_,_)} as t ->
  259 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
260 260 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}))
261   - | {token=CapLetter(s,_)} as t ->
262   - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) ->
  261 + | {token=CapLetter(s,_)} as t ->
  262 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
263 263 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}))
264   - | {token=SomeCap s} as t ->
265   - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) ->
  264 + | {token=SomeCap s} as t ->
  265 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
266 266 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}))
267 267 | t -> [t]
268   -
269   -let rec lemmatize_rec rev = function
270   - [] -> List.rev rev
  268 +
  269 +let rec lemmatize_rec rev = function
  270 + [] -> List.rev rev
271 271 | t :: l -> lemmatize_rec (lemmatize_token t @ rev) l
272 272  
273 273 let lemmatize (paths,last) =
274 274 List.rev (Xlist.fold (lemmatize_rec [] paths) [] (fun paths t ->
275 275 match t.token with
276   - Lemma _ -> if (Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found") &&
  276 + Lemma _ -> if (Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found") &&
277 277 (Xlist.mem t.attrs "required validated lemmatization") then paths
278 278 else t(*{t with attrs=List.remove "required validated lemmatization" t.attrs}*) :: paths
279 279 | _ -> t :: paths)), last
280   -
  280 +
281 281 (* TODO: docelowa lematyzacja:
282 282 - lematyzacja za pomocฤ… pรณล‚rฤ™cznie wytworzonych reguล‚ lematyzacji i listy wyjฤ…tkรณw
283 283 - walidacja lematรณw za pomocฤ… listy znanych lematรณw zawierajฤ…cej lemat, kategoriฤ™, rodzaj subst, aspekt verb (obejmuje teลผ walidacjฤ™ akronimรณw)
284 284 - rozpoznawanie wyraลผeล„ wielosล‚ownych (mwe i mte) za pomocฤ… listy zawierajฤ…cej ich lematy i szablony odmiany
285 285 *)
286   -
  286 +
287 287 (**********************************************************************************)
288   -
  288 +
289 289  
290 290 (**********************************************************************************)
291   -
  291 +
292 292 (**********************************************************************************)
293   -
294   -
295   -
  293 +
  294 +
  295 +
296 296 (*let rec get_beg_id = function
297 297 Token t -> t.beg
298 298 | Seq(t :: _) -> get_beg_id t
299 299 | Variant(t :: _) -> get_beg_id t
300 300 | _ -> failwith "get_beg_id"
301   -
  301 +
302 302 let rec get_end_id = function
303 303 Token t -> t.beg + t.len
304 304 | Seq [] -> failwith "get_end_id"
305 305 | Seq l -> get_end_id (List.hd (List.rev l))
306 306 | Variant(t :: _) -> get_end_id t
307 307 | _ -> failwith "get_end_id"*)
308   -
  308 +
309 309 (*let rec lemmatize_tokens paths next_id = function
310 310 Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg next_id t.orth v (parse_postags cat) t.beg t.len
311 311 | Token({token=Lemma(lemma,interp)} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags interp) t.beg t.len
312 312 | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags "interp") t.beg t.len
313   - | Token({token=AllSmall s} as t) ->
  313 + | Token({token=AllSmall s} as t) ->
314 314 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) ->
315 315 PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags postags) t.beg t.len)
316 316 | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodaฤ‡ wersjฤ™ z s2 ; uporzฤ…dkowaฤ‡ sล‚ownik; dodaฤ‡ akronimy *)
... ... @@ -321,19 +321,19 @@ let rec get_end_id = function
321 321 | Seq(t :: next :: l) -> lemmatize_tokens (lemmatize_tokens paths (get_beg_id next) t) next_id (Seq(next :: l))
322 322 | Seq [] -> failwith "lemmatize_tokens"
323 323 | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_tokens paths next_id t)*)
324   -
  324 +
325 325 (*let rec lemmatize_paths_tokens paths = function (* FIXME: uzgodniฤ‡ postaฤ‡ lematรณw *)
326 326 Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg t.next t.orth v (parse_postags cat) t.attrs t.beg t.len
327   - | Token({token=Lemma(lemma,interp)} as t) ->
  327 + | Token({token=Lemma(lemma,interp)} as t) ->
328 328 if Xlist.mem t.attrs "lemmatized as lowercase" || Xlist.mem t.attrs "lemma not validated" then paths else (* FIXME *)
329 329 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags interp) t.attrs t.beg t.len
330 330 | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags "interp") t.attrs t.beg t.len
331 331 | Token({token=Proper _} as t) -> failwith "lemmatize_paths_tokens: ni"
332 332 | Token({token=Compound _} as t) -> failwith "lemmatize_paths_tokens: ni"
333   -(* | Token({token=AllSmall s} as t) ->
  333 +(* | Token({token=AllSmall s} as t) ->
334 334 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) ->
335 335 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len)
336   - | Token({token=SmallLetter s} as t) ->
  336 + | Token({token=SmallLetter s} as t) ->
337 337 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) ->
338 338 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len)
339 339 | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodaฤ‡ wersjฤ™ z s2 ; uporzฤ…dkowaฤ‡ sล‚ownik; dodaฤ‡ akronimy *)
... ... @@ -345,19 +345,19 @@ let rec get_end_id = function
345 345 | Token _ -> paths
346 346 | Seq l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t)
347 347 | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t)
348   -
  348 +
349 349 (*let rec lemmatize paths = function
350 350 t :: next :: l -> lemmatize (lemmatize_tokens paths (get_beg_id next) t) (next :: l)
351 351 | [t] -> lemmatize_tokens paths (get_end_id t) t
352 352 | [] -> failwith "lemmatize"*)
353   -
  353 +
354 354 let rec lemmatize_paths paths = function
355 355 t :: l -> lemmatize_paths (lemmatize_paths_tokens paths t) l
356 356 | [] -> paths *)
357   -
  357 +
358 358 (* FIXME: dodaฤ‡ 'co do' prep:gen *)
359   -
360   -
  359 +
  360 +
361 361 (* Dane do przekazania:
362 362 - lematy i interpretacje: generowanie typรณw i termรณw
363 363 - orths
... ... @@ -365,8 +365,8 @@ let rec lemmatize_paths paths = function
365 365 - struktura grafu: wyrรณลผniki przy niejednoznacznoล›ci
366 366 - sensy wraz z hiperonimami
367 367 - <indent> *)
368   -
369   -(*
  368 +
  369 +(*
370 370 Ala zjadล‚a kota.
371 371 Ala subst:sg:nom:f imiฤ™ -> istota
372 372 kot subst:sg:nom:m2 pospolita - kot 2 - istota 1 istota ลผywa 1 zwierzฤ™ 1 strunowiec 1 czaszkowiec 1 krฤ™gowiec 1 tetrapod 1 owodniowiec 1 ssak 1 ssak ลผyworodny 1 ล‚oลผyskowiec 1 ssak drapieลผny 1 kot 1 kot 2
... ... @@ -392,10 +392,10 @@ czas 3 godzina 4
392 392 do opisu czasu trwania:
393 393 jednostka czasu 1: godzina 3, sekunda 2, (minuta 4 - nie podล‚ฤ…czona) dzieล„ 2, miesiฤ…c 1, rok 1/2
394 394  
395   -*)
396   -
397   -(**
398   -
  395 +*)
  396 +
  397 +(**
  398 +
399 399 (* empty *)
400 400  
401 401 let empty = IntMap.empty, 0, 0
... ... @@ -406,11 +406,11 @@ let poss_record_empty = {interp=[]; attrs=[]; proper=[]; senses=[]}
406 406  
407 407 (* add *)
408 408  
409   -let dict_add dict lemma postags attrs beg len =
  409 +let dict_add dict lemma postags attrs beg len =
410 410 if postags = [] then dict else
411 411 let interps = try StringMap.find dict.lemmas lemma with Not_found -> StringMap.empty in
412 412 let interps = Xlist.fold postags interps (fun interps (pos,tags) ->
413   - StringMap.add_inc interps pos {poss_record_empty with interp=[tags]; attrs=attrs} (fun l ->
  413 + StringMap.add_inc interps pos {poss_record_empty with interp=[tags]; attrs=attrs} (fun l ->
414 414 {l with interp=tags :: l.interp; attrs=StringSet.to_list (StringSet.union (StringSet.of_list l.attrs) (StringSet.of_list attrs))})) in
415 415 if dict.dbeg <> beg && dict.dbeg <> -1 then failwith "dict_add" else
416 416 if dict.dlen <> len && dict.dlen <> -1 then failwith "dict_add" else
... ... @@ -424,52 +424,52 @@ let add_simple map i j orth lemma postags attrs beg len =
424 424 let orths = StringMap.add orths orth dict in
425 425 let map2 = IntMap.add map2 j orths in
426 426 IntMap.add map i map2
427   -
  427 +
428 428 let add_edge (map,last,n) i j orth lemma postags attrs beg len =
429 429 add_simple map i j orth lemma postags attrs beg len, max j last, max j n
430   -
  430 +
431 431 let rec add_path (map,last,n) i j = function
432 432 [] -> failwith "add_path"
433   - | [orth,lemma,postags,beg,len] ->
  433 + | [orth,lemma,postags,beg,len] ->
434 434 add_simple map i j orth lemma postags [] beg len, last, n
435   - | (orth,lemma,postags,beg,len) :: l ->
  435 + | (orth,lemma,postags,beg,len) :: l ->
436 436 add_path (add_simple map i (n+1) orth lemma postags [] beg len, last, n+1) (n+1) j l
437   -(*
  437 +(*
438 438 let insert (map,last,n) i j orth dict =
439 439 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
440 440 let orths = try IntMap.find map2 j with Not_found -> StringMap.empty in
441 441 let orths = StringMap.add orths orth dict in
442   - let map2 = IntMap.add map2 j orths in
  442 + let map2 = IntMap.add map2 j orths in
443 443 IntMap.add map i map2, last, n
444   -
  444 +
445 445 let rec insert_path (map,last,n) i j = function
446 446 [] -> failwith "add_path"
447   - | [orth,dict] ->
  447 + | [orth,dict] ->
448 448 insert (map,last,n) i j orth dict
449   - | (orth,dict) :: l ->
  449 + | (orth,dict) :: l ->
450 450 insert_path (insert (map,last,n+1) i (n+1) orth dict) (n+1) j l
451   -
452   -let set_sentence_begin (map,last,n) i j orth =
  451 +
  452 +let set_sentence_begin (map,last,n) i j orth =
453 453 try
454 454 let map2 = IntMap.find map i in
455 455 let orths = IntMap.find map2 j in
456 456 let dict = StringMap.find orths orth in
457 457 let orths = StringMap.add orths orth {dict with sentence_begin=true} in
458 458 let map2 = IntMap.add map2 j orths in
459   - IntMap.add map i map2, last, n
  459 + IntMap.add map i map2, last, n
460 460 with Not_found -> failwith "set_sentence_begin"
461 461  
462   -let set_sentence_end (map,last,n) i j orth =
  462 +let set_sentence_end (map,last,n) i j orth =
463 463 try
464 464 let map2 = IntMap.find map i in
465 465 let orths = IntMap.find map2 j in
466 466 let dict = StringMap.find orths orth in
467 467 let orths = StringMap.add orths orth {dict with sentence_end=true} in
468 468 let map2 = IntMap.add map2 j orths in
469   - IntMap.add map i map2, last, n
  469 + IntMap.add map i map2, last, n
470 470 with Not_found -> failwith "set_sentence_end"
471 471  
472   -let is_sentence_end (map,last,n) i j orth =
  472 +let is_sentence_end (map,last,n) i j orth =
473 473 try
474 474 let map2 = IntMap.find map i in
475 475 let orths = IntMap.find map2 j in
... ... @@ -503,7 +503,7 @@ let rec find_paths_bound (map,last,n) k i =
503 503 let tails = find_paths_bound (map,last,n) (k-1) j in
504 504 StringMap.fold set paths (fun paths s _ ->
505 505 Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths)))
506   -
  506 +
507 507 let rec find_paths_rec (map,last,n) i =
508 508 if i = last then [[]] else
509 509 if not (IntMap.mem map i) then failwith "find_paths_rec" else
... ... @@ -511,14 +511,14 @@ let rec find_paths_rec (map,last,n) i =
511 511 let tails = find_paths_rec (map,last,n) j in
512 512 StringMap.fold set paths (fun paths s _ ->
513 513 Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths)))
514   -
  514 +
515 515 let find_paths (map,last,n) =
516 516 find_paths_rec (map,last,n) 0
517   -*)
518   -let has_lemma orths =
  517 +*)
  518 +let has_lemma orths =
519 519 StringMap.fold orths false (fun b _ dict ->
520 520 if StringMap.is_empty dict.lemmas then b else true)
521   -
  521 +
522 522 let rec no_possible_path_rec map last i =
523 523 if last = i then false else
524 524 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
... ... @@ -526,27 +526,27 @@ let rec no_possible_path_rec map last i =
526 526 if has_lemma orths then
527 527 b && no_possible_path_rec map last j
528 528 else b)
529   -
  529 +
530 530 let no_possible_path (map,last,n) =
531 531 no_possible_path_rec map last 0
532   -(*
  532 +(*
533 533 let rec match_path_rec map found i rev = function
534   - [] -> (i :: rev) :: found
  534 + [] -> (i :: rev) :: found
535 535 | s :: l ->
536 536 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
537 537 let found2 = IntMap.fold map2 [] (fun found2 j set ->
538 538 if StringMap.mem set s then j :: found2 else found2) in
539 539 Xlist.fold found2 found (fun found j -> match_path_rec map found j (i :: rev) l)
540   -
  540 +
541 541 let match_path (map,last,n) = function
542 542 [] -> failwith "match_path"
543   - | s :: l ->
  543 + | s :: l ->
544 544 let found = IntMap.fold map [] (fun found i map2 ->
545 545 IntMap.fold map2 found (fun found j set ->
546 546 if StringMap.mem set s then (i,j) :: found else found)) in
547 547 Xlist.fold found [] (fun found (i,j) -> match_path_rec map found j [i] l)
548 548  
549   -let get_matched orths = function
  549 +let get_matched orths = function
550 550 Orth s -> if StringMap.mem orths s then [s] else []
551 551 | Pos s -> (*print_endline ("a1 " ^ s);*) StringSet.to_list (StringMap.fold orths StringSet.empty (fun set orth dict ->
552 552 StringMap.fold dict.lemmas set (fun set lemma interps ->
... ... @@ -554,19 +554,19 @@ let get_matched orths = function
554 554 (* print_endline ("a2 " ^ pos); *)
555 555 if s = pos then StringSet.add set orth else set))))
556 556 (* | All -> orths *)
557   -
  557 +
558 558 let rec match_path_ex_rec map found i rev = function
559   - [] -> ((i,[]) :: rev) :: found
  559 + [] -> ((i,[]) :: rev) :: found
560 560 | s :: l ->
561 561 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
562 562 let found2 = IntMap.fold map2 [] (fun found2 j orths ->
563 563 let l = get_matched orths s in
564 564 if l <> [] then (j,l) :: found2 else found2) in
565 565 Xlist.fold found2 found (fun found (j,l2) -> match_path_ex_rec map found j ((i,l2) :: rev) l)
566   -
  566 +
567 567 let match_path_ex (map,last,n) = function
568 568 [] -> failwith "match_path_ex"
569   - | s :: l ->
  569 + | s :: l ->
570 570 let found = IntMap.fold map [] (fun found i map2 ->
571 571 IntMap.fold map2 found (fun found j orths ->
572 572 let l = get_matched orths s in
... ... @@ -578,49 +578,49 @@ let last_node (_,last,_) = last
578 578 let set_last_node (map,last,n) new_last = map, new_last, n
579 579  
580 580 let find (map,last,n) i =
581   - try
  581 + try
582 582 IntMap.fold (IntMap.find map i) [] (fun found j orths ->
583 583 StringMap.fold orths found (fun found orth _ ->
584 584 (i,j,orth) :: found))
585 585 with Not_found -> []
586 586  
587 587 let find_full (map,last,n) i =
588   - try
  588 + try
589 589 IntMap.fold (IntMap.find map i) [] (fun found j orths ->
590 590 StringMap.fold orths found (fun found orth dict ->
591 591 (i,j,orth,dict) :: found))
592 592 with Not_found -> []
593 593 *)
594   -let fold (map,last,n) s f =
  594 +let fold (map,last,n) s f =
595 595 IntMap.fold map s (fun s i map2 ->
596 596 IntMap.fold map2 s (fun s j set ->
597 597 StringMap.fold set s (fun s orth lemmas ->
598 598 f s orth i j lemmas)))
599   -(*
600   -let map (map,last,n) f =
  599 +(*
  600 +let map (map,last,n) f =
601 601 IntMap.map map (fun map2 ->
602 602 IntMap.map map2 (fun orths ->
603 603 StringMap.map orths (fun lemmas ->
604 604 f lemmas))), last, n
605   -
606   -let mapi (map,last,n) f =
  605 +
  606 +let mapi (map,last,n) f =
607 607 IntMap.mapi map (fun i map2 ->
608 608 IntMap.mapi map2 (fun j orths ->
609 609 StringMap.mapi orths (fun orth lemmas ->
610 610 f orth i j lemmas))), last, n
611   -
612   -let get_edges (map,_,_) i j =
  611 +
  612 +let get_edges (map,_,_) i j =
613 613 IntMap.find (IntMap.find map i) j
614   -
615   -let get_edges_from (map,_,_) i =
  614 +
  615 +let get_edges_from (map,_,_) i =
616 616 IntMap.find map i
617   -*)
  617 +*)
618 618 let rec topological_sort_rec map visited l i =
619   - if IntSet.mem visited i then (l,visited) else
  619 + if IntSet.mem visited i then (l,visited) else
620 620 let l, visited = IntMap.fold (try IntMap.find map i with Not_found -> IntMap.empty) (l,IntSet.add visited i) (fun (l,visited) j _ ->
621 621 topological_sort_rec map visited l j) in
622 622 i :: l, visited
623   -
  623 +
624 624 let topological_sort (map,last,n) =
625 625 let l, _ = topological_sort_rec map IntSet.empty [] 0 in
626 626 let translation, k = Xlist.fold l (IntMap.empty,0) (fun (translation,k) i ->
... ... @@ -630,27 +630,27 @@ let topological_sort (map,last,n) =
630 630 try IntMap.add map2 (IntMap.find translation j) orths with Not_found -> map2) in
631 631 try IntMap.add map (IntMap.find translation i) map2 with Not_found -> map) in
632 632 map, (try IntMap.find translation last with Not_found -> failwith "topological_sort 3"), k-1
633   -
  633 +
634 634 (*let interp_to_string interp =
635 635 String.concat " " (Xlist.fold interp.interp [] (fun l tags ->
636 636 (String.concat ":" (Xlist.map tags (String.concat "."))) :: l))
637   -
  637 +
638 638 let interps_to_string interps =
639 639 String.concat " " (StringMap.fold interps [] (fun l pos interp ->
640 640 (pos ^ "[" ^ interp_to_string interp ^ "]") :: l))
641   -
  641 +
642 642 let lemmas_to_string lemmas =
643 643 String.concat " " (StringMap.fold lemmas [] (fun l lemma interps ->
644 644 (lemma ^ "[" ^ interps_to_string interps ^ "]") :: l))
645   -
646   -let to_string (map,last,n) =
  645 +
  646 +let to_string (map,last,n) =
647 647 let l = IntMap.fold map [] (fun l i map2 ->
648 648 IntMap.fold map2 l (fun l j orths ->
649   - (Printf.sprintf "%5d %5d %s" i j (String.concat " " (StringMap.fold orths [] (fun l2 orths dict ->
  649 + (Printf.sprintf "%5d %5d %s" i j (String.concat " " (StringMap.fold orths [] (fun l2 orths dict ->
650 650 (Printf.sprintf "%s %5d %5d [%s]" orths dict.dbeg dict.dlen (lemmas_to_string dict.lemmas)) :: l2)))) :: l)) in
651 651 Printf.sprintf "last=%d n=%d\n %s" last n (String.concat "\n " (List.sort compare l))*)
652   - (*
653   -let make_unique_orths (map,last,n) =
  652 + (*
  653 +let make_unique_orths (map,last,n) =
654 654 let names = fold (map,last,n) StringQMap.empty (fun names orth _ _ _ ->
655 655 StringQMap.add names orth) in
656 656 let names = StringQMap.fold names StringSet.empty (fun names name n ->
... ... @@ -658,10 +658,10 @@ let make_unique_orths (map,last,n) =
658 658 let map,_ = IntMap.fold map (IntMap.empty,StringMap.empty) (fun (map,used) i map2 ->
659 659 let map2,used = IntMap.fold map2 (IntMap.empty,used) (fun (map2,used) j orths ->
660 660 let orths,used = StringMap.fold orths (StringMap.empty,used) (fun (orths,used) orth lemmas ->
661   - let orth,used =
  661 + let orth,used =
662 662 if StringSet.mem names orth then
663   - let n =
664   - try StringMap.find used orth + 1
  663 + let n =
  664 + try StringMap.find used orth + 1
665 665 with Not_found -> 1 in
666 666 orth ^ "-" ^ string_of_int n, StringMap.add used orth n
667 667 else orth,used in
... ... @@ -670,6 +670,6 @@ let make_unique_orths (map,last,n) =
670 670 IntMap.add map i map2, used) in
671 671 map,last,n
672 672  
673   -*)
674   -
675   -**)
676 673 \ No newline at end of file
  674 +*)
  675 +
  676 +**)
... ...