Commit 479c42bcdbe0c67eba3dea6876b89ffeb40d5cbc
1 parent
099c6f48
dostosowanie pre do pracy z morphology
Showing
13 changed files
with
326 additions
and
726 deletions
config
0 โ 100644
1 | +# General resource path | |
2 | +RESOURCES_PATH=../resources/ | |
3 | + | |
4 | +# Localization of Walenty in TEI format | |
5 | +WALENTY=/usr/share/walenty/walenty_20160412.xml | |
6 | + | |
7 | +# Port number for pre server | |
8 | +PRE_PORT=3258 | |
9 | + | |
10 | +# Host name for pre server | |
11 | +PRE_HOST=localhost | |
12 | + | |
13 | +# Path to the directory for parsed sentences | |
14 | +RESULTS_PATH=../results/ | |
15 | + | |
16 | +# Maximum number of generated solutions | |
17 | +MAX_NO_SOLUTIONS=10 | |
18 | + | |
19 | +# LCG parser timeout in seconds | |
20 | +LCG_TIMEOUT=100 | |
21 | + | |
22 | +# LCG parser memory size (maximum number of nodes of parsed term) | |
23 | +LCG_NO_NODES=10000000 | |
24 | + | |
25 | +# Number of parser processes | |
26 | +NO_PROCESSES=4 | |
... | ... |
morphology/dict.ml
... | ... | @@ -315,9 +315,9 @@ let exceptional_lemmata = StringSet.of_list ([ |
315 | 315 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; |
316 | 316 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; |
317 | 317 | ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*) |
318 | - ] @ File.load_lines "data/obce.tab" (* @ | |
319 | - File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @ | |
320 | - File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab" *)) | |
318 | + ] @ File.load_lines "../morphology/data/obce.tab" (* @ | |
319 | + File.load_lines "../morphology/data/validated_adj.tab" @ File.load_lines "../morphology/data/validated_noun.tab" @ | |
320 | + File.load_lines "../morphology/data/validated_verb.tab" @ File.load_lines "../morphology/data/adv_nieodprzymiotnikowe.tab" *)) | |
321 | 321 | |
322 | 322 | let remove_exceptional_lemmata dict = |
323 | 323 | Xlist.fold dict [] (fun dict entry -> |
... | ... |
morphology/inflexion.a
0 โ 100644
No preview for this file type
morphology/inflexion.cmxa
0 โ 100644
No preview for this file type
morphology/ruleGenerator.ml
... | ... | @@ -2,7 +2,7 @@ open Xstd |
2 | 2 | open Printf |
3 | 3 | open Types |
4 | 4 | |
5 | -let alternation_map = Rules.alternation_map | |
5 | +(* let alternation_map = Rules.alternation_map *) | |
6 | 6 | |
7 | 7 | let rule_types = Xlist.fold [ |
8 | 8 | (* Xlist.map (StringMap.find alternation_map "obce_ch") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{x}ych\t{x}"; |
... | ... |
morphology/rules.ml
... | ... | @@ -44,7 +44,7 @@ let load_alternations filename = |
44 | 44 | | _ -> failwith "load_alternations") in |
45 | 45 | (name,List.rev alts) :: alternations |
46 | 46 | |
47 | -let alternations () = load_alternations "data/alternations.dic" | |
47 | +let alternations () = load_alternations "../morphology/data/alternations.dic" | |
48 | 48 | |
49 | 49 | let revert_alternations l = |
50 | 50 | Xlist.map l (fun a -> {a with afind=a.aset; aset=a.afind}) |
... | ... | @@ -76,9 +76,9 @@ let load_pref_rules filename = |
76 | 76 | | _ -> failwith "load_pref_rules") in |
77 | 77 | (name,List.rev rules) :: pref_rules |
78 | 78 | |
79 | -let rules () = load_suf_rules "data/rules.dic" | |
80 | -let rev_rules () = load_suf_rules "data/rev_rules.dic" | |
81 | -let pref_rules () = load_pref_rules "data/pref_rules.dic" | |
79 | +let rules () = load_suf_rules "../morphology/data/rules.dic" | |
80 | +let rev_rules () = load_suf_rules "../morphology/data/rev_rules.dic" | |
81 | +let pref_rules () = load_pref_rules "../morphology/data/pref_rules.dic" | |
82 | 82 | |
83 | 83 | let load_freq_rules filename = |
84 | 84 | File.fold_tab filename [] (fun rules -> function |
... | ... | @@ -120,7 +120,7 @@ let rule_map alternation_map rev_alternation_map rules rev_rules pref_rules = |
120 | 120 | let map = Xlist.fold rev_rules map (fun map (k,v) -> StringMap.add map k (prepare_rev_rules rev_alternation_map v)) in |
121 | 121 | Xlist.fold pref_rules map (fun map (k,v) -> StringMap.add map k (prepare_pref_rules v)) |
122 | 122 | |
123 | -let schemata () = File.load_tab "data/schemata.dic" (fun l -> l) | |
123 | +let schemata () = File.load_tab "../morphology/data/schemata.dic" (fun l -> l) | |
124 | 124 | |
125 | 125 | (**********************************************************************************************) |
126 | 126 | |
... | ... | @@ -236,7 +236,7 @@ module InterpTree = struct |
236 | 236 | |
237 | 237 | end |
238 | 238 | |
239 | -let interp_tree () = InterpTree.create (load_interp_rules "data/interp_rules.dic") | |
239 | +let interp_tree () = InterpTree.create (load_interp_rules "../morphology/data/interp_rules.dic") | |
240 | 240 | |
241 | 241 | (**********************************************************************************************) |
242 | 242 | |
... | ... |
parser/.gitignore
0 โ 100644
parser/makefile
1 | 1 | OCAMLC=ocamlc |
2 | 2 | OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | -#INCLUDES=-I +xml-light -I +xlib -I ../../lib/latexvis -I ../lib/xt -I ../../Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre | |
5 | -INCLUDES=--I +xml-light -I +xlib -I ../pre | |
4 | +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre | |
5 | +#INCLUDES=-I +xml-light -I +xlib -I ../pre | |
6 | 6 | OCAMLFLAGS=$(INCLUDES) -g |
7 | -#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa latexvis.cmxa nkjp.cmxa | |
8 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa | |
7 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa | |
8 | +#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa | |
9 | 9 | |
10 | 10 | PRE= ../pre/paths.ml ../pre/walTypes.ml ../pre/preTypes.ml ../pre/walStringOf.ml |
11 | -#LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml | |
12 | -LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml | |
11 | +LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml | |
12 | +#LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml | |
13 | 13 | DISAMB= disambSelPref.ml disambLemma.ml |
14 | -#SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml | |
15 | -SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml | |
16 | -EXEC= execTypes.ml visualization.ml exec.ml | |
17 | - | |
18 | -all: | |
19 | -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml | |
14 | +SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml | |
15 | +#SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml | |
16 | +EXEC= execTypes.ml visualization.ml exec.ml | |
17 | + | |
18 | +all: | |
19 | + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml | |
20 | 20 | # $(OCAMLOPT) -o server $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml |
21 | -# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
22 | - $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml | |
23 | - $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml | |
24 | -# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml | |
25 | - $(OCAMLOPT) -o eniam $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) singleInterface.ml | |
26 | - | |
27 | -# pipe: | |
28 | -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(PROC_F) | |
29 | -# | |
21 | +# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
22 | +# $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml | |
23 | +# $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml | |
24 | +# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml | |
25 | +# $(OCAMLOPT) -o eniam $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) singleInterface.ml | |
26 | + | |
27 | +# pipe: | |
28 | +# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(PROC_F) | |
29 | +# | |
30 | 30 | # of_xml: |
31 | 31 | # $(OCAMLOPT) -o of_xml $(OCAMLOPTFLAGS) LCGofXml.ml |
32 | 32 | |
... | ... |
parser/visualization.ml
... | ... | @@ -22,11 +22,11 @@ open Xstd |
22 | 22 | open Printf |
23 | 23 | open PreTypes |
24 | 24 | |
25 | -let string_of_interps interps = | |
25 | +let string_of_interps interps = | |
26 | 26 | String.concat "|" (Xlist.map interps (fun interp -> |
27 | 27 | (String.concat ":" (Xlist.map interp (fun interp2 -> |
28 | 28 | (String.concat "." interp2)))))) |
29 | - | |
29 | + | |
30 | 30 | let rec string_of_token = function |
31 | 31 | PreTypes.SmallLetter orth -> sprintf "SmallLetter(%s)" orth |
32 | 32 | | PreTypes.CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc |
... | ... | @@ -43,22 +43,22 @@ let rec string_of_token = function |
43 | 43 | | PreTypes.Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) |
44 | 44 | | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) |
45 | 45 | |
46 | -let rec spaces i = | |
46 | +let rec spaces i = | |
47 | 47 | if i = 0 then "" else " " ^ spaces (i-1) |
48 | - | |
48 | + | |
49 | 49 | let rec string_of_tokens i = function |
50 | - PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) | |
50 | + PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) | |
51 | 51 | t.PreTypes.id t.PreTypes.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight))) |
52 | 52 | (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (WalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (WalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles) |
53 | 53 | | PreTypes.Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) |
54 | - | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | |
55 | - | |
56 | -let paths_to_string_indexed (paths,last,next_id) = | |
57 | - String.concat "\n" (Xlist.map paths (fun (i,j,t) -> | |
54 | + | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | |
55 | + | |
56 | +let paths_to_string_indexed (paths,last,next_id) = | |
57 | + String.concat "\n" (Xlist.map paths (fun (i,j,t) -> | |
58 | 58 | Printf.sprintf "%2d %2d %s" i j (string_of_tokens 0 (PreTypes.Token t)))) |
59 | 59 | ^ Printf.sprintf "\nlast=%d next_id=%d" last next_id |
60 | - | |
61 | -let rec xml_of_graph = function | |
60 | + | |
61 | +let rec xml_of_graph = function | |
62 | 62 | Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[ |
63 | 63 | Xml.Element("gs",[],[xml_of_graph t.gs]); |
64 | 64 | Xml.Element("agf",[],[Xml.PCData (WalStringOf.gf t.agf)]); |
... | ... | @@ -75,11 +75,11 @@ let rec xml_of_graph = function |
75 | 75 | Xml.Element("relations",[],[xml_of_graph c.cx_relations])]) |
76 | 76 | | Relation(r,a,t) -> Xml.Element("relation",[],[ |
77 | 77 | Xml.Element("role",[],[xml_of_graph r]); |
78 | - Xml.Element("role_attr",[],[xml_of_graph r]); | |
78 | + Xml.Element("role_attr",[],[xml_of_graph r]); | |
79 | 79 | xml_of_graph t]) |
80 | 80 | | RevRelation(r,a,t) -> Xml.Element("revrelation",[],[ |
81 | 81 | Xml.Element("role",[],[xml_of_graph r]); |
82 | - Xml.Element("role_attr",[],[xml_of_graph r]); | |
82 | + Xml.Element("role_attr",[],[xml_of_graph r]); | |
83 | 83 | xml_of_graph t]) |
84 | 84 | | SingleRelation(r) -> Xml.Element("singlerelation",[],[xml_of_graph r]) |
85 | 85 | | Tuple l -> Xml.Element("tuple",[],Xlist.map l xml_of_graph) |
... | ... | @@ -90,18 +90,18 @@ let rec xml_of_graph = function |
90 | 90 | | Ref i -> Xml.Element("ref",["id",string_of_int i],[]) |
91 | 91 | | Morf _ -> Xml.Element("dot",[],[]) (* FIXME!!! *) |
92 | 92 | | t -> failwith ("xml_of_graph: " ^ LCGstringOf.linear_term 0 t) |
93 | - | |
94 | -let print_xml_graph path name references = | |
93 | + | |
94 | +let print_xml_graph path name references = | |
95 | 95 | let l = Int.fold 0 (Array.length references - 1) [] (fun l i -> |
96 | 96 | (i, xml_of_graph references.(i)) :: l) in |
97 | 97 | let xml = Xml.Element("graph",[],Xlist.rev_map l (fun (i,xml) -> |
98 | 98 | Xml.Element("graph_node",["id",string_of_int i],[xml]))) in |
99 | - File.file_out (path ^ name ^ ".xml") (fun file -> | |
99 | + File.file_out (path ^ name ^ ".xml") (fun file -> | |
100 | 100 | fprintf file "%s\n" (Xml.to_string_fmt xml)) |
101 | 101 | |
102 | -let print_xml_tree path name tree = | |
102 | +let print_xml_tree path name tree = | |
103 | 103 | let xml = xml_of_graph tree in |
104 | - File.file_out (path ^ name ^ ".xml") (fun file -> | |
104 | + File.file_out (path ^ name ^ ".xml") (fun file -> | |
105 | 105 | fprintf file "%s\n" (Xml.to_string_fmt xml)) |
106 | 106 | |
107 | 107 | |
... | ... | @@ -112,14 +112,14 @@ let rec get_refs rev = function |
112 | 112 | | Variant(e,l) -> Xlist.fold l rev (fun rev (i,t) -> get_refs rev t) |
113 | 113 | | Dot -> rev |
114 | 114 | | _ -> (*failwith*)print_endline "get_refs"; rev |
115 | - | |
116 | -let escape_string s = | |
115 | + | |
116 | +let escape_string s = | |
117 | 117 | Int.fold 0 (String.length s - 1) "" (fun t i -> |
118 | 118 | match String.sub s i 1 with |
119 | 119 | "<" -> t ^ "โฉ" |
120 | 120 | | ">" -> t ^ "โช" |
121 | 121 | | c -> t ^ c) |
122 | - | |
122 | + | |
123 | 123 | let string_of_node t = |
124 | 124 | let l = [ |
125 | 125 | "PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs; |
... | ... | @@ -130,64 +130,64 @@ let string_of_node t = |
130 | 130 | "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (LCGstringOf.linear_term 0 t) ^ " }")) ^ " }" |
131 | 131 | |
132 | 132 | let single_rel_id_count = ref 0 |
133 | - | |
133 | + | |
134 | 134 | let get_single_rel_id () = |
135 | 135 | let id = !single_rel_id_count in |
136 | 136 | incr single_rel_id_count; |
137 | 137 | "s" ^ string_of_int id |
138 | - | |
138 | + | |
139 | 139 | let print_edge file label upper id = |
140 | - if upper <> "" then | |
140 | + if upper <> "" then | |
141 | 141 | if label = "" then fprintf file " %s -> %s\n" upper id |
142 | 142 | else fprintf file " %s -> %s [label=\"%s\"]\n" upper id label |
143 | - | |
144 | -(*let rec print_graph_rec2 file edge upper = function | |
143 | + | |
144 | +(*let rec print_graph_rec2 file edge upper = function | |
145 | 145 | Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper) |
146 | - | Node t -> | |
146 | + | Node t -> | |
147 | 147 | let id = get_single_rel_id () in |
148 | 148 | fprintf file " %s [label=\"%s\"]\n" id (string_of_node t); |
149 | 149 | print_edge file edge upper id; |
150 | 150 | print_graph_rec2 file "" id t.args |
151 | - | Concept t -> | |
151 | + | Concept t -> | |
152 | 152 | let id = get_single_rel_id () in |
153 | - fprintf file " %s [shape=box,label=\"%s %s\"]\n" id | |
154 | - (LCGchart.string_of_linear_term 0 t.c_sense) | |
153 | + fprintf file " %s [shape=box,label=\"%s %s\"]\n" id | |
154 | + (LCGchart.string_of_linear_term 0 t.c_sense) | |
155 | 155 | (if t.c_name=Dot then "" else "โ" ^ LCGchart.string_of_linear_term 0 t.c_name ^ "โ"); (* FIXME *) |
156 | 156 | print_edge file edge upper id; |
157 | 157 | print_graph_rec2 file "" id t.c_relations |
158 | - | SingleRelation(role) -> | |
158 | + | SingleRelation(role) -> | |
159 | 159 | let id = get_single_rel_id () in |
160 | 160 | fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGchart.string_of_linear_term 0 role); |
161 | 161 | if upper <> "" then fprintf file " %s -> %s\n" upper id |
162 | - | Variant(e,l) -> | |
162 | + | Variant(e,l) -> | |
163 | 163 | fprintf file " %s [shape=diamond]\n" e; |
164 | 164 | print_edge file edge upper e; |
165 | 165 | Xlist.iter l (fun (i,t) -> print_graph_rec2 file i e t) |
166 | 166 | | Dot -> () |
167 | 167 | | Ref i -> print_edge file edge upper ("x" ^ string_of_int i) |
168 | 168 | | t -> failwith ("print_graph_rec2: " ^ LCGchart.string_of_linear_term 0 t) *) |
169 | - | |
169 | + | |
170 | 170 | let rec string_of_quant_rec quant = function |
171 | 171 | Tuple l -> Xlist.fold l quant string_of_quant_rec |
172 | 172 | | Variant(e,l) -> (LCGstringOf.linear_term 0 (Variant(e,l))) :: quant |
173 | 173 | | Dot -> quant |
174 | 174 | | Val s -> s :: quant |
175 | 175 | | _ -> failwith "string_of_quant_rec" |
176 | - | |
176 | + | |
177 | 177 | let string_of_quant t = |
178 | 178 | let l = string_of_quant_rec [] t in |
179 | 179 | let s = String.concat " " l in |
180 | 180 | if s = "" then "" else "<I>" ^ s ^ "</I> " |
181 | - | |
182 | -let rec print_graph_rec file edge upper id = function | |
183 | - Node t -> | |
181 | + | |
182 | +let rec print_graph_rec file edge upper id = function | |
183 | + Node t -> | |
184 | 184 | fprintf file " %s [label=\"%s\"]\n" id (string_of_node t); |
185 | 185 | print_edge file edge upper id; |
186 | 186 | print_graph_rec2 file "" id t.args |
187 | - | Concept t -> | |
188 | - fprintf file " %s [shape=box,label=<%s%s %s>]\n" id | |
189 | - (string_of_quant t.c_quant) | |
190 | - (LCGstringOf.linear_term 0 t.c_sense) | |
187 | + | Concept t -> | |
188 | + fprintf file " %s [shape=box,label=<%s%s %s>]\n" id | |
189 | + (string_of_quant t.c_quant) | |
190 | + (LCGstringOf.linear_term 0 t.c_sense) | |
191 | 191 | (if t.c_name=Dot then "" else "โ" ^ LCGstringOf.linear_term 0 t.c_name ^ "โ"); (* FIXME *) |
192 | 192 | print_edge file edge upper id; |
193 | 193 | print_graph_rec2 file "" id t.c_relations |
... | ... | @@ -197,7 +197,7 @@ let rec print_graph_rec file edge upper id = function |
197 | 197 | print_edge file edge upper id; |
198 | 198 | print_graph_rec2 file "" id t.cx_contents; |
199 | 199 | print_graph_rec2 file "" id t.cx_relations; |
200 | - | Relation(role,role_attr,t) -> | |
200 | + | Relation(role,role_attr,t) -> | |
201 | 201 | fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); |
202 | 202 | print_edge file edge upper id; |
203 | 203 | print_graph_rec2 file "" id t |
... | ... | @@ -205,31 +205,31 @@ let rec print_graph_rec file edge upper id = function |
205 | 205 | fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); |
206 | 206 | print_edge file edge upper id; |
207 | 207 | print_graph_rec2 file "" id t |
208 | - | SingleRelation(role) -> | |
208 | + | SingleRelation(role) -> | |
209 | 209 | fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role); |
210 | 210 | print_edge file edge upper id |
211 | - | AddRelation(t,role,role_attr,s) -> | |
211 | + | AddRelation(t,role,role_attr,s) -> | |
212 | 212 | fprintf file " %s [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr; |
213 | 213 | print_edge file edge upper id; |
214 | 214 | print_graph_rec2 file "" id t; |
215 | 215 | print_graph_rec2 file "" id s; |
216 | - | SetContextName(s,t) -> | |
216 | + | SetContextName(s,t) -> | |
217 | 217 | fprintf file " %s [shape=circle,label=\"SetContextName\\n%s\"]\n" id s; |
218 | 218 | print_edge file edge upper id; |
219 | 219 | print_graph_rec2 file "" id t |
220 | - | RemoveRelation t -> | |
220 | + | RemoveRelation t -> | |
221 | 221 | fprintf file " %s [shape=circle,label=\"RemoveRelation\"]\n" id; |
222 | 222 | print_edge file edge upper id; |
223 | 223 | print_graph_rec2 file "" id t |
224 | - | Variant(e,l) -> | |
224 | + | Variant(e,l) -> | |
225 | 225 | fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e; |
226 | 226 | print_edge file edge upper id; |
227 | 227 | Xlist.iter l (fun (i,t) -> print_graph_rec2 file i id t) |
228 | - | Choice choice -> | |
228 | + | Choice choice -> | |
229 | 229 | fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id ""; |
230 | 230 | print_edge file edge upper id; |
231 | 231 | StringMap.iter choice (fun ei t -> print_graph_rec2 file ei id t) |
232 | - | Val s -> | |
232 | + | Val s -> | |
233 | 233 | fprintf file " %s [shape=box,label=\"%s\"]\n" id s; |
234 | 234 | print_edge file edge upper id |
235 | 235 | | Dot -> () |
... | ... | @@ -237,63 +237,63 @@ let rec print_graph_rec file edge upper id = function |
237 | 237 | print_edge file edge upper id*) |
238 | 238 | | Ref i -> print_edge file edge upper ("x" ^ string_of_int i) |
239 | 239 | | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t) |
240 | - | |
240 | + | |
241 | 241 | and print_graph_rec2 file edge upper = function |
242 | 242 | Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper) |
243 | 243 | | t -> print_graph_rec file edge upper (get_single_rel_id ()) t |
244 | - | |
244 | + | |
245 | 245 | (*let rec print_graph_rec file is_rev upper i = function (* FIXME: dokoลczyฤ is_rev *) |
246 | - Node t -> | |
246 | + Node t -> | |
247 | 247 | (* let orth = if t.id = 0 then "" else.(t.id).PreTypes.orth in |
248 | 248 | fprintf file " %s [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat;*) |
249 | 249 | fprintf file " %s [label=\"%s\"]\n" i (string_of_node t); |
250 | - if upper <> "" then | |
250 | + if upper <> "" then | |
251 | 251 | if is_rev then fprintf file " %s -> %s\n" i upper |
252 | 252 | else fprintf file " %s -> %s\n" upper i; |
253 | 253 | print_graph_rec file false i i t.args |
254 | - | Concept t -> | |
255 | - fprintf file " %s [shape=box,label=\"%s %s\"]\n" ("c" ^ i) | |
256 | - (LCGstringOf.linear_term 0 t.c_sense) | |
254 | + | Concept t -> | |
255 | + fprintf file " %s [shape=box,label=\"%s %s\"]\n" ("c" ^ i) | |
256 | + (LCGstringOf.linear_term 0 t.c_sense) | |
257 | 257 | (if t.c_name=Dot then "" else "โ" ^ LCGstringOf.linear_term 0 t.c_name ^ "โ"); (* FIXME *) |
258 | - if upper <> "" then | |
258 | + if upper <> "" then | |
259 | 259 | if is_rev then fprintf file " %s -> %s\n" ("c" ^ i) upper |
260 | 260 | else fprintf file " %s -> %s\n" upper ("c" ^ i); |
261 | 261 | print_graph_rec file false ("c" ^ i) i t.c_relations |
262 | 262 | | Context t -> |
263 | 263 | fprintf file " %s [shape=Msquare,label=\"\"]\n" ("i" ^ i); |
264 | - if upper <> "" then | |
264 | + if upper <> "" then | |
265 | 265 | if is_rev then fprintf file " %s -> %s\n" ("i" ^ i) upper |
266 | 266 | else fprintf file " %s -> %s\n" upper ("i" ^ i); |
267 | 267 | print_graph_rec file false ("i" ^ i) i t.cx_contents |
268 | - | SingleRelation(role) -> | |
268 | + | SingleRelation(role) -> | |
269 | 269 | let id = get_single_rel_id () in |
270 | 270 | fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role); |
271 | 271 | if upper <> "" then fprintf file " %s -> %s\n" upper id |
272 | - | Relation(role,role_attr,t) -> | |
272 | + | Relation(role,role_attr,t) -> | |
273 | 273 | fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); |
274 | 274 | if upper <> "" then fprintf file " %s -> %s\n" upper i; |
275 | 275 | print_graph_rec file false i i t |
276 | - | RevRelation(role,role_attr,t) -> | |
276 | + | RevRelation(role,role_attr,t) -> | |
277 | 277 | fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); |
278 | 278 | if upper <> "" then fprintf file " %s -> %s\n" i upper; |
279 | 279 | print_graph_rec file true i i t |
280 | 280 | | Tuple l -> Xlist.iter l (print_graph_rec file is_rev upper i) |
281 | - | Variant(e,l) -> | |
281 | + | Variant(e,l) -> | |
282 | 282 | fprintf file " %s [shape=diamond]\n" e; |
283 | 283 | if upper <> "" then fprintf file " %s -> %s\n" upper e; |
284 | 284 | Xlist.iter l (fun (i2,t) -> print_graph_rec file false e ("x" ^ i ^ "y" ^ i2) t) |
285 | 285 | | Dot -> () |
286 | 286 | | Ref i2 -> fprintf file " %s -> %d\n" upper i2 |
287 | 287 | | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)*) |
288 | - | |
289 | -let print_graph path name references = | |
288 | + | |
289 | +let print_graph path name references = | |
290 | 290 | single_rel_id_count := 0; |
291 | - File.file_out (path ^ name ^ ".gv") (fun file -> | |
291 | + File.file_out (path ^ name ^ ".gv") (fun file -> | |
292 | 292 | fprintf file "digraph G {\n node [shape=record]\n"; |
293 | 293 | Int.iter 0 (Array.length references - 1) (fun i -> print_graph_rec file (*false*) "" "" ("x" ^ string_of_int i) references.(i)); |
294 | -(* Int.iter 0 (Array.length references - 1) (fun i -> | |
294 | +(* Int.iter 0 (Array.length references - 1) (fun i -> | |
295 | 295 | match references.(i) with |
296 | - Node t -> | |
296 | + Node t -> | |
297 | 297 | fprintf file " %d [label=\"%s\"]\n" i (string_of_node t); |
298 | 298 | let refs = get_refs [] t.args in |
299 | 299 | Xlist.iter refs (fun r -> |
... | ... | @@ -304,31 +304,31 @@ let print_graph path name references = |
304 | 304 | ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); |
305 | 305 | Sys.chdir ".." |
306 | 306 | |
307 | -let id_counter = ref 0 | |
308 | - | |
307 | +let id_counter = ref 0 | |
308 | + | |
309 | 309 | let print_edge2 file edge_rev edge_label edge_head edge_tail upper id = |
310 | 310 | let edge_head,edge_tail,upper,id = if edge_rev then edge_tail,edge_head,id,upper else edge_head,edge_tail,upper,id in |
311 | - let l = | |
311 | + let l = | |
312 | 312 | (if edge_label = "" then [] else ["label=\"" ^ edge_label ^ "\""]) @ |
313 | 313 | (if edge_head = "" then [] else ["ltail=\"" ^ edge_head ^ "\""]) @ |
314 | 314 | (if edge_tail = "" then [] else ["lhead=\"" ^ edge_tail ^ "\""]) in |
315 | - if upper <> 0 then | |
315 | + if upper <> 0 then | |
316 | 316 | if l = [] then fprintf file " %d -> %d\n" upper id |
317 | 317 | else fprintf file " %d -> %d [%s]\n" upper id (String.concat "," l) |
318 | - | |
319 | -let rec print_graph2_rec file edge_rev edge_label edge_head upper = function | |
320 | - Node t -> | |
318 | + | |
319 | +let rec print_graph2_rec file edge_rev edge_label edge_head upper = function | |
320 | + Node t -> | |
321 | 321 | let id = !id_counter in |
322 | 322 | incr id_counter; |
323 | 323 | fprintf file " %d [label=\"%s\"]\n" id (string_of_node t); |
324 | 324 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
325 | 325 | print_graph2_rec file false "" "" id t.args |
326 | - | Concept t -> | |
326 | + | Concept t -> | |
327 | 327 | let id = !id_counter in |
328 | 328 | incr id_counter; |
329 | - fprintf file " %d [shape=box,label=<%s%s %s>]\n" id | |
330 | - (string_of_quant t.c_quant) | |
331 | - (LCGstringOf.linear_term 0 t.c_sense) | |
329 | + fprintf file " %d [shape=box,label=<%s%s %s>]\n" id | |
330 | + (string_of_quant t.c_quant) | |
331 | + (LCGstringOf.linear_term 0 t.c_sense) | |
332 | 332 | (if t.c_name=Dot then "" else "โ" ^ LCGstringOf.linear_term 0 t.c_name ^ "โ"); (* FIXME *) |
333 | 333 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
334 | 334 | print_graph2_rec file false "" "" id t.c_relations |
... | ... | @@ -338,64 +338,64 @@ let rec print_graph2_rec file edge_rev edge_label edge_head upper = function |
338 | 338 | if t.cx_sense = Dot then fprintf file " subgraph cluster%d {\nlabel=\"\"\n" id |
339 | 339 | else fprintf file " subgraph cluster%d {\nlabel=\"%s\"\n" id (LCGstringOf.linear_term 0 t.cx_sense); |
340 | 340 | print_graph2_rec file false "" "" 0 t.cx_contents; |
341 | - fprintf file " }\n"; | |
341 | + fprintf file " }\n"; | |
342 | 342 | print_edge2 file edge_rev edge_label edge_head ("cluster" ^ string_of_int id) upper (id+1); |
343 | 343 | print_graph2_rec file false "" ("cluster" ^ string_of_int id) (id+1) t.cx_relations; |
344 | - | Relation(role,role_attr,t) -> | |
344 | + | Relation(role,role_attr,t) -> | |
345 | 345 | let id = !id_counter in |
346 | 346 | incr id_counter; |
347 | 347 | fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); |
348 | 348 | print_edge2 file false edge_label edge_head "" upper id; |
349 | 349 | print_graph2_rec file false "" "" id t |
350 | - | RevRelation(role,role_attr,t) -> | |
350 | + | RevRelation(role,role_attr,t) -> | |
351 | 351 | let id = !id_counter in |
352 | 352 | incr id_counter; |
353 | 353 | fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); |
354 | 354 | print_edge2 file true edge_label edge_head "" upper id; |
355 | 355 | print_graph2_rec file true "" "" id t |
356 | - | SingleRelation(role) -> | |
356 | + | SingleRelation(role) -> | |
357 | 357 | let id = !id_counter in |
358 | 358 | incr id_counter; |
359 | 359 | fprintf file " %d [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role); |
360 | 360 | print_edge2 file false edge_label edge_head "" upper id |
361 | - | AddRelation(t,role,role_attr,s) -> | |
361 | + | AddRelation(t,role,role_attr,s) -> | |
362 | 362 | let id = !id_counter in |
363 | 363 | incr id_counter; |
364 | 364 | fprintf file " %d [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr; |
365 | 365 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
366 | 366 | print_graph2_rec file false "" "" id t; |
367 | 367 | print_graph2_rec file false "" "" id s |
368 | - | RemoveRelation t -> | |
368 | + | RemoveRelation t -> | |
369 | 369 | let id = !id_counter in |
370 | 370 | incr id_counter; |
371 | 371 | fprintf file " %d [shape=circle,label=\"RemoveRelation\"]\n" id; |
372 | 372 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
373 | 373 | print_graph2_rec file false "" "" id t |
374 | - | SetContextName(s,t) -> | |
374 | + | SetContextName(s,t) -> | |
375 | 375 | let id = !id_counter in |
376 | 376 | incr id_counter; |
377 | 377 | fprintf file " %d [shape=circle,label=\"SetContextName\\n%s\"]\n" id s; |
378 | 378 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
379 | 379 | print_graph2_rec file false "" "" id t; |
380 | 380 | | Tuple l -> Xlist.iter l (print_graph2_rec file edge_rev edge_label edge_head upper) |
381 | - | Variant(e,l) -> | |
381 | + | Variant(e,l) -> | |
382 | 382 | let id = !id_counter in |
383 | 383 | incr id_counter; |
384 | 384 | fprintf file " %d [shape=diamond,label=\"%s\"]\n" id e; |
385 | 385 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
386 | 386 | Xlist.iter l (fun (i,t) -> print_graph2_rec file edge_rev i "" id t) |
387 | - | Val s -> | |
387 | + | Val s -> | |
388 | 388 | let id = !id_counter in |
389 | 389 | incr id_counter; |
390 | 390 | fprintf file " %d [shape=box,label=\"%s\"]\n" id s; |
391 | 391 | print_edge2 file edge_rev edge_label edge_head "" upper id |
392 | 392 | | Dot -> () |
393 | 393 | | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t) |
394 | - | |
395 | -let print_graph2 path name query t = | |
394 | + | |
395 | +let print_graph2 path name query t = | |
396 | 396 | (* print_endline *) |
397 | 397 | id_counter := 1; |
398 | - File.file_out (path ^ name ^ ".gv") (fun file -> | |
398 | + File.file_out (path ^ name ^ ".gv") (fun file -> | |
399 | 399 | fprintf file "digraph G {\n compound=true\n node [shape=record]\n"; |
400 | 400 | print_graph2_rec file false "" "" 0 t; |
401 | 401 | fprintf file "label=\"%s\"\n }\n" query); |
... | ... | @@ -407,11 +407,11 @@ let rec get_lemma = function |
407 | 407 | PreTypes.Interp orth -> orth |
408 | 408 | | PreTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat |
409 | 409 | | _ -> "" |
410 | - | |
411 | -let print_paths path name paths = | |
412 | - File.file_out (path ^ name ^ ".gv") (fun file -> | |
410 | + | |
411 | +let print_paths path name paths = | |
412 | + File.file_out (path ^ name ^ ".gv") (fun file -> | |
413 | 413 | fprintf file "digraph G {\n"; |
414 | - Array.iter (fun t -> | |
414 | + Array.iter (fun t -> | |
415 | 415 | let lemma = get_lemma t.PreTypes.token in |
416 | 416 | if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.PreTypes.beg t.PreTypes.next t.PreTypes.orth lemma) paths; |
417 | 417 | fprintf file "}\n"); |
... | ... | @@ -419,39 +419,39 @@ let print_paths path name paths = |
419 | 419 | ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); |
420 | 420 | Sys.chdir ".." |
421 | 421 | |
422 | -let rec print_tree_rec2 file paths edge upper = function | |
422 | +let rec print_tree_rec2 file paths edge upper = function | |
423 | 423 | Tuple l -> Xlist.iter l (print_tree_rec2 file paths edge upper) |
424 | - | Variant(e,l) -> | |
424 | + | Variant(e,l) -> | |
425 | 425 | fprintf file " %s [shape=diamond]\n" e; |
426 | 426 | print_edge file edge upper e; |
427 | 427 | Xlist.iter l (fun (i,t) -> print_tree_rec2 file paths i e t) |
428 | 428 | | Dot -> () |
429 | 429 | | Ref i -> print_edge file edge upper ("x" ^ string_of_int i) |
430 | 430 | | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t) |
431 | - | |
432 | -let rec print_tree_rec file paths edge upper id = function | |
433 | - Node t -> | |
431 | + | |
432 | +let rec print_tree_rec file paths edge upper id = function | |
433 | + Node t -> | |
434 | 434 | let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in |
435 | 435 | fprintf file " %s [label=\"%s\\n%s\\n%s:%s\\n%f\"]\n" id (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat t.weight; |
436 | 436 | print_edge file edge upper id; |
437 | 437 | print_tree_rec2 file paths "" id t.args |
438 | - | Variant(e,l) -> | |
438 | + | Variant(e,l) -> | |
439 | 439 | fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e; |
440 | 440 | print_edge file edge upper id; |
441 | 441 | Xlist.iter l (fun (i,t) -> print_tree_rec file paths i id (id ^ "y" ^ i) t) |
442 | - | Choice choice -> | |
442 | + | Choice choice -> | |
443 | 443 | fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id ""; |
444 | 444 | print_edge file edge upper id; |
445 | 445 | StringMap.iter choice (fun ei t -> print_tree_rec file paths ei id (id ^ "b" ^ ei) t) |
446 | 446 | | Dot -> () |
447 | 447 | | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t) |
448 | - | |
449 | -let print_tree path name paths references = | |
450 | - File.file_out (path ^ name ^ ".gv") (fun file -> | |
448 | + | |
449 | +let print_tree path name paths references = | |
450 | + File.file_out (path ^ name ^ ".gv") (fun file -> | |
451 | 451 | fprintf file "digraph G {\n node [shape=box]\n"; |
452 | 452 | Int.iter 0 (Array.length references - 1) (fun i -> print_tree_rec file paths "" "" ("x" ^ string_of_int i) references.(i)); |
453 | 453 | (* match references.(i) with |
454 | - Node t -> | |
454 | + Node t -> | |
455 | 455 | let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in |
456 | 456 | fprintf file " %d [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat; |
457 | 457 | let refs = get_refs [] t.args in |
... | ... | @@ -463,13 +463,13 @@ let print_tree path name paths references = |
463 | 463 | ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); |
464 | 464 | Sys.chdir ".." |
465 | 465 | |
466 | -(*let print_tree filename paths references = | |
467 | - File.file_out filename (fun file -> | |
466 | +(*let print_tree filename paths references = | |
467 | + File.file_out filename (fun file -> | |
468 | 468 | fprintf file "digraph G {\n"; |
469 | 469 | let set = Xlist.fold paths IntSet.empty (fun set t -> |
470 | 470 | IntSet.add (IntSet.add set t.PreTypes.beg) t.PreTypes.next) in |
471 | 471 | IntSet.iter set (fun i -> fprintf file " %d [width=0; height=0; label=\"\"]\n" i); |
472 | - Xlist.iter paths (fun t -> | |
472 | + Xlist.iter paths (fun t -> | |
473 | 473 | let lemma = get_lemma t.PreTypes.token in |
474 | 474 | if lemma <> "" then ( |
475 | 475 | let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in |
... | ... | @@ -480,8 +480,8 @@ let print_tree path name paths references = |
480 | 480 | ignore (Sys.command "dot -Tpng tree.gv -o tree.png"); |
481 | 481 | Sys.chdir ".."*) |
482 | 482 | |
483 | -(*let print_tree filename paths references = | |
484 | - File.file_out filename (fun file -> | |
483 | +(*let print_tree filename paths references = | |
484 | + File.file_out filename (fun file -> | |
485 | 485 | fprintf file "digraph G {\n"; |
486 | 486 | fprintf file " subgraph {\n ordering=out\n"; |
487 | 487 | let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.PreTypes.beg t.PreTypes.beg)) [] (fun same t -> |
... | ... | @@ -494,9 +494,9 @@ let print_tree path name paths references = |
494 | 494 | else same) in |
495 | 495 | fprintf file " }\n"; |
496 | 496 | fprintf file " { rank = same; %s }\n" (String.concat "; " (Xlist.map same (fun i -> sprintf "\"i%d\"" i))); |
497 | - Int.iter 0 (Array.length references - 1) (fun i -> | |
497 | + Int.iter 0 (Array.length references - 1) (fun i -> | |
498 | 498 | match references.(i) with |
499 | - Node t -> | |
499 | + Node t -> | |
500 | 500 | fprintf file " %d [label=\"%s\"]\n" i t.pred; |
501 | 501 | fprintf file " %d -> i%d\n" i t.id; |
502 | 502 | let refs = get_refs [] t.args in |
... | ... | @@ -508,9 +508,9 @@ let print_tree path name paths references = |
508 | 508 | ignore (Sys.command "dot -Tpng tree.gv -o tree.png"); |
509 | 509 | Sys.chdir ".."*) |
510 | 510 | |
511 | -(*let rec schema_latex schema = | |
512 | - "\\begin{tabular}{l}" ^ | |
513 | - String.concat "\\\\" (Xlist.map schema (fun s -> | |
511 | +let rec schema_latex schema = | |
512 | + "\\begin{tabular}{l}" ^ | |
513 | + String.concat "\\\\" (Xlist.map schema (fun s -> | |
514 | 514 | LatexMain.escape_string (String.concat "," ( |
515 | 515 | (if s.WalTypes.gf = WalTypes.ARG then [] else [WalStringOf.gf s.WalTypes.gf])@ |
516 | 516 | (if s.WalTypes.role = "" then [] else [s.WalTypes.role])@ |
... | ... | @@ -519,32 +519,32 @@ let print_tree path name paths references = |
519 | 519 | "\\end{tabular}" |
520 | 520 | |
521 | 521 | let fnum_frame_latex = function |
522 | - fnum,WalTypes.Frame(atrs,s) -> | |
522 | + fnum,WalTypes.Frame(atrs,s) -> | |
523 | 523 | Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (WalStringOf.frame_atrs atrs)) (schema_latex s) |
524 | - | fnum,WalTypes.LexFrame(id,p,r,s) -> | |
524 | + | fnum,WalTypes.LexFrame(id,p,r,s) -> | |
525 | 525 | Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) |
526 | - | fnum,WalTypes.ComprepFrame(le,p,r,s) -> | |
526 | + | fnum,WalTypes.ComprepFrame(le,p,r,s) -> | |
527 | 527 | Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) |
528 | - | |
528 | + | |
529 | 529 | let print_paths_latex name paths = |
530 | 530 | LatexMain.latex_file_out "results/" name "a0" false (fun file -> |
531 | 531 | fprintf file "\\begin{longtable}{|l|l|l|l|l|l|l|p{4cm}|l|l|l|l|}\n\\hline\north & beg & len & next & token & id & weight & attrs & lroles & senses & simple valence & valence\\\\\n"; |
532 | 532 | Int.iter 0 (Array.length paths - 1) (fun i -> |
533 | 533 | let t = paths.(i) in |
534 | - fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n" | |
534 | + fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n" | |
535 | 535 | t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (LatexMain.escape_string (string_of_token t.PreTypes.token)) t.PreTypes.id t.PreTypes.weight |
536 | - (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles) | |
536 | + (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles) | |
537 | 537 | (String.concat "\\\\\n" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero)))) |
538 | - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x))) | |
538 | + (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x))) | |
539 | 539 | (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.valence (fun x -> fnum_frame_latex x)))); |
540 | 540 | fprintf file "\\end{longtable}"); |
541 | - LatexMain.latex_compile_and_clean "results/" name *) | |
542 | - | |
541 | + LatexMain.latex_compile_and_clean "results/" name | |
542 | + | |
543 | 543 | let print_mml path name mml = |
544 | 544 | File.file_out (path ^ name ^ ".mml") (fun file -> |
545 | 545 | fprintf file "<!DOCTYPE math PUBLIC \"-//W3C//DTD MathML 2.0//EN\" \"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd\">\n"; |
546 | 546 | fprintf file "%s\n" (Xml.to_string_fmt mml)) |
547 | - | |
547 | + | |
548 | 548 | let page_header path = |
549 | 549 | "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"> |
550 | 550 | <html> |
... | ... | @@ -556,11 +556,11 @@ let page_header path = |
556 | 556 | |
557 | 557 | <body> |
558 | 558 | <center> |
559 | - <h1>ENIAM: Kategorialny Parser Skลadniowo-Semantyczny</h1> | |
560 | - <h3>Podaj tekst:</h3> | |
561 | - <form method=POST action=\"" ^ path ^ "parser.cgi\"> | |
562 | - <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p> | |
563 | - <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p> | |
559 | + <h1>ENIAM: Kategorialny Parser Skลadniowo-Semantyczny</h1> | |
560 | + <h3>Podaj tekst:</h3> | |
561 | + <form method=POST action=\"" ^ path ^ "parser.cgi\"> | |
562 | + <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p> | |
563 | + <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p> | |
564 | 564 | </form>" |
565 | 565 | |
566 | 566 | let page_trailer = |
... | ... | @@ -569,25 +569,25 @@ let page_trailer = |
569 | 569 | Copyright © 2016 Institute of Computer Science Polish Academy of Sciences<BR> |
570 | 570 | </center> |
571 | 571 | </body> |
572 | -</html>" | |
572 | +</html>" | |
573 | 573 | |
574 | 574 | let print_webpage file cg_bin_path html_path id query n max_n mml = |
575 | 575 | fprintf file "%s\n" (page_header cg_bin_path); |
576 | 576 | fprintf file "\n<H3>%s</H3>\n" query; |
577 | - fprintf file "<P>%s %s\n" | |
577 | + fprintf file "<P>%s %s\n" | |
578 | 578 | (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1)) |
579 | 579 | (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Nastฤpna interpretacja</A>" html_path id (n+1)); |
580 | 580 | fprintf file "<P><IMG SRC=\"%stree%s_%d.png\">\n" html_path id n; |
581 | 581 | fprintf file "<P>%s\n" (Xml.to_string_fmt mml); |
582 | 582 | fprintf file "<P><A HREF=\"%stree%s_%d.xml\">Graf w formacie XML</A>\n" html_path id n; |
583 | 583 | fprintf file "<P><A HREF=\"%sformula%s_%d.mml\">Formuลa w formacie MathML</A>\n" html_path id n; |
584 | - fprintf file "<P>%s %s\n" | |
584 | + fprintf file "<P>%s %s\n" | |
585 | 585 | (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1)) |
586 | 586 | (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Nastฤpna interpretacja</A>" html_path id (n+1)); |
587 | 587 | fprintf file "%s\n" page_trailer |
588 | 588 | |
589 | -open ExecTypes | |
590 | - | |
589 | +open ExecTypes | |
590 | + | |
591 | 591 | let generate_status_message result = function |
592 | 592 | Idle -> "Server error: " ^ result.msg |
593 | 593 | | PreprocessingError -> "Error during preprocessing: " ^ result.msg |
... | ... | @@ -602,13 +602,8 @@ let generate_status_message result = function |
602 | 602 | | NotTranslated -> "Unable to generate logical form" |
603 | 603 | | Parsed -> "parsed" |
604 | 604 | |
605 | -let print_other_result file cg_bin_path query result = | |
605 | +let print_other_result file cg_bin_path query result = | |
606 | 606 | fprintf file "%s\n" (page_header cg_bin_path); |
607 | 607 | fprintf file "\n<H3>%s</H3>\n" query; |
608 | 608 | fprintf file "\n<P>%s\n" (generate_status_message result result.status); |
609 | 609 | fprintf file "%s\n" page_trailer |
610 | - | |
611 | - | |
612 | - | |
613 | - | |
614 | - | |
615 | 610 | \ No newline at end of file |
... | ... |
pre/inflexion.ml deleted
1 | -(* | |
2 | - * ENIAM: Categorial Syntactic-Semantic Parser for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This program is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This program is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open Xstd | |
21 | -open Printf | |
22 | - | |
23 | -let load_alt filename = | |
24 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
25 | - List.rev (Xlist.fold l [] (fun l line -> | |
26 | - if String.length line = 0 then l else | |
27 | - if String.get line 0 = '#' then l else | |
28 | - match Str.split_delim (Str.regexp "\t") line with | |
29 | - [orth; lemma; interp] -> (orth,lemma,interp) :: l | |
30 | - | _ -> failwith ("load_alt: " ^ line))) | |
31 | - | |
32 | -let load_dict filename = | |
33 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
34 | - List.rev (Xlist.fold l [] (fun l line -> | |
35 | - if String.length line = 0 then l else | |
36 | - if String.get line 0 = '#' then l else | |
37 | - match Str.split_delim (Str.regexp "\t") line with | |
38 | - [stem; lemma_suf2; rule_names] -> (stem,lemma_suf2,Str.split (Str.regexp " ") rule_names) :: l | |
39 | - | _ -> failwith ("load_dict: " ^ line))) | |
40 | - | |
41 | -let load_rules filename = | |
42 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
43 | - List.rev (Xlist.fold l [] (fun l line -> | |
44 | - if String.length line = 0 then l else | |
45 | - if String.get line 0 = '#' then l else | |
46 | - match Str.split_delim (Str.regexp "\t") line with | |
47 | - [rule_name; quantity; lemma_suf; orth_suf; interp] -> (rule_name,int_of_string quantity,lemma_suf,orth_suf,interp) :: l | |
48 | - | _ -> failwith ("load_rules: " ^ line))) | |
49 | - | |
50 | -let make_rules_map rules = | |
51 | - Xlist.fold rules StringMap.empty (fun rules (rule_name,quantity,lemma_suf,orth_suf,interp) -> | |
52 | - let rules2 = try StringMap.find rules orth_suf with Not_found -> StringMap.empty in | |
53 | - let rules2 = StringMap.add rules2 rule_name (lemma_suf,interp) in | |
54 | - StringMap.add rules orth_suf rules2) | |
55 | - | |
56 | -module OrderedChar = struct | |
57 | - | |
58 | - type t = char | |
59 | - | |
60 | - let compare = compare | |
61 | - | |
62 | -end | |
63 | - | |
64 | -module CharMap = Xmap.Make(OrderedChar) | |
65 | - | |
66 | -type char_tree = N of char_tree CharMap.t * (string * string list) list * (string * string) list | |
67 | - (* nastฤpne moลผliwoลci * (lemma_suf2 * lista reguล) list * lista alt *) | |
68 | - | |
69 | -type char_tree_suf = M of char_tree_suf CharMap.t * (string * int * string * string) list | |
70 | - (* nastฤpne moลผliwoลci * (rule_name * lemma_suf * interp) list *) | |
71 | - | |
72 | -let empty_char_tree = N(CharMap.empty,[],[]) | |
73 | -let empty_char_tree_suf = M(CharMap.empty,[]) | |
74 | - | |
75 | -let rec add_path_dict stem i n lemma_suf2 rule_names (N(map,rules,alts)) = | |
76 | - if i = n then N(map,(lemma_suf2,rule_names) :: rules,alts) else | |
77 | - let tree = try CharMap.find map (String.get stem i) with Not_found -> empty_char_tree in | |
78 | - let tree = add_path_dict stem (i+1) n lemma_suf2 rule_names tree in | |
79 | - N(CharMap.add map (String.get stem i) tree,rules,alts) | |
80 | - | |
81 | -let rec add_path_alt stem i n lemma interp (N(map,rules,alts)) = | |
82 | - if i = n then N(map,rules,(lemma,interp) :: alts) else | |
83 | - let tree = try CharMap.find map (String.get stem i) with Not_found -> empty_char_tree in | |
84 | - let tree = add_path_alt stem (i+1) n lemma interp tree in | |
85 | - N(CharMap.add map (String.get stem i) tree,rules,alts) | |
86 | - | |
87 | -let make_char_tree dict alt = | |
88 | - let tree = Xlist.fold dict empty_char_tree (fun tree (stem,lemma_suf2,rule_names) -> | |
89 | - add_path_dict stem 0 (String.length stem) lemma_suf2 rule_names tree) in | |
90 | - Xlist.fold alt tree (fun tree (orth,lemma,interp) -> | |
91 | - add_path_alt orth 0 (String.length orth) lemma interp tree) | |
92 | - | |
93 | -let rec add_path_rules rule_name quantity orth_suf i lemma_suf interp (M(map,rules)) = | |
94 | - if i = -1 then M(map,(rule_name,quantity,lemma_suf,interp) :: rules) else | |
95 | - let tree = try CharMap.find map (String.get orth_suf i) with Not_found -> empty_char_tree_suf in | |
96 | - let tree = add_path_rules rule_name quantity orth_suf (i-1) lemma_suf interp tree in | |
97 | - M(CharMap.add map (String.get orth_suf i) tree,rules) | |
98 | - | |
99 | -let make_char_tree_suf rules = | |
100 | - let tree = Xlist.fold rules empty_char_tree_suf (fun tree (rule_name,quantity,lemma_suf,orth_suf,interp) -> | |
101 | - add_path_rules rule_name quantity orth_suf (String.length orth_suf - 1) lemma_suf interp tree) in | |
102 | - tree | |
103 | - | |
104 | -let rec find_char_tree_rec i n orth (N(map,rules,alts)) = | |
105 | - if i = n then [orth,"",rules,alts] else | |
106 | - let l = try find_char_tree_rec (i+1) n orth (CharMap.find map (String.get orth i)) with Not_found -> [] in | |
107 | - (String.sub orth 0 i,String.sub orth i (n-i),rules,[]) :: l | |
108 | - | |
109 | -let find_char_tree tree rules orth = | |
110 | - let l = find_char_tree_rec 0 (String.length orth) orth tree in | |
111 | - Xlist.fold l [] (fun found (stem,suf,rule_names,alts) -> | |
112 | - let rules2 = try StringMap.find rules suf with Not_found -> StringMap.empty in | |
113 | - let found = alts @ found in | |
114 | - Xlist.fold rule_names found (fun found (lemma_suf2,rule_names2) -> | |
115 | - Xlist.fold rule_names2 found (fun found rule_name -> | |
116 | - try | |
117 | - let lemma_suf,interp = StringMap.find rules2 rule_name in | |
118 | - let lemma = if lemma_suf2 = "" then stem ^ lemma_suf else stem ^ lemma_suf ^ ":" ^ lemma_suf2 in | |
119 | - (lemma,interp) :: found | |
120 | - with Not_found -> found))) | |
121 | - | |
122 | -let rec find_char_tree_suf_rec i orth (M(map,rules)) = | |
123 | - if i = 0 then Xlist.fold rules [] (fun l (rule_name,quantity,lemma_suf,interp) -> ("", rule_name, quantity, lemma_suf, interp, i) :: l) else | |
124 | - let l = try find_char_tree_suf_rec (i-1) orth (CharMap.find map (String.get orth (i-1))) with Not_found -> [] in | |
125 | - Xlist.fold rules l (fun l (rule_name, quantity, lemma_suf,interp) -> | |
126 | - (String.sub orth 0 i, rule_name, quantity, String.sub orth 0 i ^ lemma_suf, interp, i) :: l) | |
127 | - | |
128 | -let find_char_tree_suf rules_tree stem_map alt_map orth = | |
129 | - let alt_l = Xlist.rev_map (try StringMap.find alt_map orth with Not_found -> []) (fun (lemma,interp) -> lemma,interp,1,[]) in | |
130 | - let l = find_char_tree_suf_rec (String.length orth) orth rules_tree in | |
131 | - let found = Xlist.fold l alt_l (fun found (stem,rule_name,quantity,lemma,interp,i) -> | |
132 | - try | |
133 | - let rule_names = StringMap.find stem_map stem in | |
134 | - if StringSet.mem rule_names rule_name then (lemma,interp,1,[]) :: found else found (* FIXME: czy na pewno nie dodawaฤ reguล niepasujฤ cych? to powoduje ลผe lemat tak samo brzmiฤ cy a majฤ cy innฤ odmianฤ nie zostanie rozpoznany *) | |
135 | - with Not_found -> if quantity < 100 || (String.length orth = i && stem = lemma) then found else (lemma,interp,quantity,["lemma not validated"]) :: found) in (* FIXME: uciฤcie ลผadkich reguล powinno byฤ inaczej sterowane *) | |
136 | -(* if found = [] then [orth,"unk",1,["token not found"]] else *) | |
137 | - let found = (orth,"unk",1,["token not found"]) :: found in | |
138 | - let valid = Xlist.fold found [] (fun valid -> function | |
139 | - lemma,interp,quantity,[] -> (lemma,interp,quantity,[]) :: valid | |
140 | - | _ -> valid) in | |
141 | - if valid = [] then found else valid | |
142 | - | |
143 | -let prepare_inflexion alt_filename dict_filename rules_filename = | |
144 | - let alt = load_alt alt_filename in | |
145 | - let dict = load_dict dict_filename in | |
146 | - let rules = load_rules rules_filename in | |
147 | - let tree = make_char_tree dict alt in | |
148 | - let rules = make_rules_map rules in | |
149 | - tree,rules | |
150 | - | |
151 | -let tree,rules = | |
152 | -(* prepare_inflexion (morfeusz_path ^ Paths.alt_adj) (morfeusz_path ^ Paths.dict_adj) (morfeusz_path ^ Paths.rules_adj) *) | |
153 | -(* prepare_inflexion (morfeusz_path ^ Paths.alt_all) (morfeusz_path ^ Paths.dict_all) (morfeusz_path ^ Paths.rules_all) *) | |
154 | - empty_char_tree,StringMap.empty | |
155 | - | |
156 | -let make_alt_map alt = | |
157 | - Xlist.fold alt StringMap.empty (fun alt_map (orth,lemma,interp) -> | |
158 | - StringMap.add_inc alt_map orth [lemma,interp] (fun l -> (lemma,interp) :: l)) | |
159 | - | |
160 | -let prepare_inflexion_suf alt_filename dict_filename rules_filename = | |
161 | - let alt = load_alt alt_filename in | |
162 | - let rules = load_rules rules_filename in | |
163 | - let rules_tree = make_char_tree_suf rules in | |
164 | - let alt_map = make_alt_map alt in | |
165 | - let dict = load_dict dict_filename in | |
166 | - let stem_map = Xlist.fold dict StringMap.empty (fun stem_map (stem,lemma_suf2,rule_names) -> | |
167 | - StringMap.add_inc stem_map stem (StringSet.of_list rule_names) (fun set -> Xlist.fold rule_names set StringSet.add)) in | |
168 | - alt_map,rules_tree,stem_map | |
169 | - | |
170 | -let alt_map,rules_tree,stem_map = | |
171 | - prepare_inflexion_suf (Paths.sgjp_path ^ Paths.alt_all) (Paths.sgjp_path ^ Paths.dict_all) (Paths.sgjp_path ^ Paths.rules_all) | |
172 | - | |
173 | -let check_prefix pat s = | |
174 | - let n = String.length pat in | |
175 | - if n > String.length s then false else | |
176 | - String.sub s 0 n = pat | |
177 | - | |
178 | -let cut_prefix pat s = | |
179 | - let i = String.length pat in | |
180 | - let n = String.length s in | |
181 | - if i >= n then "" else | |
182 | - try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i) | |
183 | - | |
184 | -let check_sufix pat s = | |
185 | - let n = String.length pat in | |
186 | - let m = String.length s in | |
187 | - if n > m then false else | |
188 | - String.sub s (m-n) n = pat | |
189 | - | |
190 | -let cut_sufix pat s = | |
191 | - let i = String.length pat in | |
192 | - let n = String.length s in | |
193 | - try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s) | |
194 | - | |
195 | -let rec select_interp_sufix pat = function | |
196 | - [] -> [] | |
197 | - | (lemma,interp) :: l -> if check_sufix pat interp then (lemma,interp) :: (select_interp_sufix pat l) else select_interp_sufix pat l | |
198 | - | |
199 | -let rec select_interp_sufix_suf pat = function | |
200 | - [] -> [] | |
201 | - | (lemma,interp,quantity,attrs) :: l -> if check_sufix pat interp then (lemma,interp,quantity,attrs) :: (select_interp_sufix_suf pat l) else select_interp_sufix_suf pat l | |
202 | - | |
203 | -let rec remove_interp_sufix pat = function | |
204 | - [] -> [] | |
205 | - | (lemma,interp) :: l -> if check_sufix pat interp then remove_interp_sufix pat l else (lemma,interp) :: (remove_interp_sufix pat l) | |
206 | - | |
207 | -let rec remove_interp_sufix_suf pat = function | |
208 | - [] -> [] | |
209 | - | (lemma,interp,quantity,attrs) :: l -> | |
210 | - if interp = "adv:sup" then (lemma,interp,quantity,attrs) :: (remove_interp_sufix_suf pat l) else (* FIXME: zaลlepka, wymaga poprawienia algorytmu generowania sลownikรณw *) | |
211 | - if check_sufix pat interp then remove_interp_sufix_suf pat l else (lemma,interp,quantity,attrs) :: (remove_interp_sufix_suf pat l) | |
212 | - | |
213 | -let get_interpretations orth = | |
214 | - (if check_prefix "naj" orth then select_interp_sufix ":sup" (find_char_tree tree rules (cut_prefix "naj" orth)) else []) @ | |
215 | - (if check_prefix "nie" orth then select_interp_sufix ":neg" (find_char_tree tree rules (cut_prefix "nie" orth)) else []) @ | |
216 | - (remove_interp_sufix ":neg" (remove_interp_sufix ":sup" (find_char_tree tree rules orth))) | |
217 | - | |
218 | -let get_interpretations_suf orth = (* FIXME: nie dziaลa dla adv:sup pisanych z wielkiej litery np Najdoskonalej Najlepiej *) | |
219 | - if orth = "siebie" then ["siebie","siebie:acc.gen",1,[]] else | |
220 | - if orth = "sobie" then ["siebie","siebie:dat.loc",1,[]] else | |
221 | - if orth = "sobฤ " then ["siebie","siebie:inst",1,[]] else | |
222 | - (if check_prefix "naj" orth then select_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "naj" orth)) else []) @ | |
223 | - (if check_prefix "nie" orth then select_interp_sufix_suf ":neg" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "nie" orth)) else []) @ | |
224 | - (if check_prefix "Naj" orth then select_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "Naj" orth)) else []) @ | |
225 | - (if check_prefix "Nie" orth then select_interp_sufix_suf ":neg" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "Nie" orth)) else []) @ | |
226 | - (remove_interp_sufix_suf ":neg" (remove_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map orth))) | |
227 | - | |
228 | -(* Testy *) | |
229 | - | |
230 | -let print_interpretations l = | |
231 | - Xlist.iter (Xlist.sort l compare) (fun (lemma,interp) -> | |
232 | - printf "%s\t%s\n" lemma interp) | |
233 | - | |
234 | -(*let _ = | |
235 | - let l = get_interpretations "ลผyczliwฤ " in | |
236 | - print_interpretations l; | |
237 | - let l = get_interpretations "ลผyลบniejszego" in | |
238 | - print_interpretations l; | |
239 | - let l = get_interpretations "zwiลli" in | |
240 | - print_interpretations l; | |
241 | - let l = get_interpretations "najzieleลsza" in | |
242 | - print_interpretations l; | |
243 | - let l = get_interpretations "najtandetniejsza" in | |
244 | - print_interpretations l; | |
245 | - let l = get_interpretations "nieลผelazny" in | |
246 | - print_interpretations l; | |
247 | - ()*) | |
248 | - | |
249 | -(*let sgjp_filename = "sgjp-20151020.tab" | |
250 | -let polimorf_filename = "polimorf-20151020.tab" | |
251 | - | |
252 | -let _ = | |
253 | - let interp_sel = Morf.load_interp_sel "data/interps.tab" in | |
254 | - print_endline "loading sgjp"; | |
255 | - let sgjp = Morf.load_tab (morfeusz_path ^ sgjp_filename) in | |
256 | - print_endline "loading polimorf"; | |
257 | - let polimorf = Morf.load_tab (morfeusz_path ^ polimorf_filename) in | |
258 | - print_endline "merging"; | |
259 | - let dicts = Morf.merge_dicts [sgjp;polimorf] in | |
260 | - let adj_interp_sel = StringMap.find interp_sel "adj" in | |
261 | - let adj_sup_interp_sel = StringMap.find interp_sel "adj-sup" in | |
262 | -(* let dicts = Morf.remove_prefix dicts "naj" adj_sup_interp_sel in *) | |
263 | - print_endline "preparing queries"; | |
264 | - let queries = StringMap.fold dicts StringMap.empty (fun queries lemma interps -> | |
265 | - let interps = Morf.select_interps interps (adj_interp_sel @ adj_sup_interp_sel) in | |
266 | - StringMap.fold interps queries (fun queries interp orths -> | |
267 | - Xlist.fold orths queries (fun queries orth -> | |
268 | - let s = lemma ^ "\t" ^ interp in | |
269 | - StringMap.add_inc queries orth (StringSet.singleton s) (fun set -> StringSet.add set s)))) in | |
270 | - print_endline "testing"; | |
271 | - StringMap.iter queries (fun orth set -> | |
272 | - let set = Xlist.fold (get_interpretations orth) set (fun set (lemma,interp) -> | |
273 | - let s = lemma ^ "\t" ^ interp in | |
274 | - if StringSet.mem set s then StringSet.remove set s else ( | |
275 | - printf "excessing interpretation: %s\t%s" orth s; | |
276 | - set)) in | |
277 | - if StringSet.is_empty set then () else | |
278 | - StringSet.iter set (fun s -> | |
279 | - printf "lacking interpretation: %s\t%s" orth s))*) | |
280 | - | |
281 | - | |
282 | - | |
283 | - | |
284 | - | |
285 | - | |
286 | - | |
287 | - | |
288 | - | |
289 | - | |
290 | - | |
291 | - | |
292 | - | |
293 | - | |
294 | - | |
295 | - | |
296 | - | |
297 | - | |
298 | - | |
299 | - | |
300 | - | |
301 | - | |
302 | 0 | \ No newline at end of file |
pre/makefile
1 | 1 | OCAMLC=ocamlc |
2 | 2 | OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | -INCLUDES=-I +xml-light -I +xlib | |
4 | +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology | |
5 | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa | |
6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa inflexion.cmxa | |
7 | 7 | INSTALLDIR=`ocamlc -where` |
8 | 8 | |
9 | 9 | WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml |
10 | -PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml morf.ml inflexion.ml prePaths.ml preMWE.ml preSemantics.ml preProcessing.ml | |
11 | - | |
12 | -all: | |
10 | +PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preProcessing.ml | |
11 | + | |
12 | +all: | |
13 | 13 | $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE) |
14 | 14 | |
15 | 15 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
... | ... |
pre/morf.ml deleted
1 | -(* | |
2 | - * ENIAM: Categorial Syntactic-Semantic Parser for Polish | |
3 | - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This program is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This program is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open Xstd | |
21 | - | |
22 | -let load_tab filename = | |
23 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
24 | - List.rev (Xlist.fold l [] (fun l line -> | |
25 | - if String.length line = 0 then l else | |
26 | - if String.get line 0 = '#' then l else | |
27 | - match Str.split (Str.regexp "\t") line with | |
28 | - orth :: lemma :: interp :: _ -> (orth,lemma,interp) :: l | |
29 | - | _ -> failwith ("load_tab: " ^ line))) | |
30 | - | |
31 | -let load_tab_full filename = | |
32 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
33 | - List.rev (Xlist.fold l [] (fun l line -> | |
34 | - if String.length line = 0 then l else | |
35 | - if String.get line 0 = '#' then l else | |
36 | - match Str.split (Str.regexp "\t") line with | |
37 | - [orth; lemma; interp] -> (orth,lemma,interp,"","") :: l | |
38 | - | [orth; lemma; interp; cl] -> (orth,lemma,interp,cl,"") :: l | |
39 | - | [orth; lemma; interp; cl; cl2] -> (orth,lemma,interp,cl,cl2) :: l | |
40 | -(* | orth :: lemma :: interp :: cl :: cl2 -> (orth,lemma,interp,cl,String.concat ";" cl2) :: l *) | |
41 | - | _ -> failwith ("load_tab_full: " ^ line))) | |
42 | - | |
43 | -let merge_dicts l = | |
44 | - Xlist.fold l StringMap.empty (fun dicts tab -> | |
45 | - Xlist.fold tab dicts (fun dicts (orth,lemma,interp) -> | |
46 | - let interps = try StringMap.find dicts lemma with Not_found -> StringMap.empty in | |
47 | - let interps = StringMap.add_inc interps interp [orth] (fun orths -> | |
48 | - if Xlist.mem orths orth then orths else orth :: orths) in | |
49 | - StringMap.add dicts lemma interps)) | |
50 | - | |
51 | -let load_interp_sel filename = | |
52 | - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in | |
53 | - Xlist.fold l StringMap.empty (fun interp_sel line -> | |
54 | - if String.length line = 0 then interp_sel else | |
55 | - if String.get line 0 = '#' then interp_sel else | |
56 | - match Str.split (Str.regexp "\t") line with | |
57 | - [group;interp;label] -> StringMap.add_inc interp_sel group [interp,label] (fun l -> (interp,label) :: l) | |
58 | - | _ -> failwith ("load_interp_sel: " ^ line)) | |
59 | - | |
60 | - | |
61 | -let rec merge_digraph = function | |
62 | - [] -> [] | |
63 | - | "c" :: "h" :: l -> "ch" :: (merge_digraph l) | |
64 | - | "c" :: "z" :: l -> "cz" :: (merge_digraph l) | |
65 | - | "d" :: "z" :: l -> "dz" :: (merge_digraph l) | |
66 | - | "d" :: "ลบ" :: l -> "dลบ" :: (merge_digraph l) | |
67 | - | "d" :: "ลผ" :: l -> "dลผ" :: (merge_digraph l) | |
68 | - | "r" :: "z" :: l -> "rz" :: (merge_digraph l) | |
69 | - | "s" :: "z" :: l -> "sz" :: (merge_digraph l) | |
70 | - | "b" :: "'" :: l -> "b'" :: (merge_digraph l) | |
71 | - | "f" :: "'" :: l -> "f'" :: (merge_digraph l) | |
72 | - | s :: l -> s :: (merge_digraph l) | |
73 | - | |
74 | -let text_to_chars s = Xunicode.classified_chars_of_utf8_string s | |
75 | -(* (try UTF8.validate s with UTF8.Malformed_code -> failwith ("Invalid UTF8 string: " ^ s)); | |
76 | - let r = ref [] in | |
77 | - UTF8.iter (fun c -> | |
78 | - r := (UTF8.init 1 (fun _ -> c)) :: (!r)) s; | |
79 | - merge_digraph (List.rev (!r))*) | |
80 | - | |
81 | - | |
82 | -let check_prefix pat s = | |
83 | - let n = String.length pat in | |
84 | - if n > String.length s then false else | |
85 | - String.sub s 0 n = pat | |
86 | - | |
87 | -let cut_prefix pat s = | |
88 | - let i = String.length pat in | |
89 | - let n = String.length s in | |
90 | - if i >= n then "" else | |
91 | - try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i) | |
92 | - | |
93 | -let check_sufix pat s = | |
94 | - let n = String.length pat in | |
95 | - let m = String.length s in | |
96 | - if n > m then false else | |
97 | - String.sub s (m-n) n = pat | |
98 | - | |
99 | -let cut_sufix pat s = | |
100 | - let i = String.length pat in | |
101 | - let n = String.length s in | |
102 | - try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s) | |
103 | - | |
104 | -let apply_transform orth (s,t) = | |
105 | - if check_sufix s orth then cut_sufix s orth ^ t else raise Not_found | |
106 | - | |
107 | -let split_colon s = | |
108 | - match Str.split_delim (Str.regexp ":") s with | |
109 | - [s] -> s, "" | |
110 | - | [s;t] -> s, t | |
111 | - | _ -> failwith "split_colon" | |
112 | - | |
113 | -let get_cat s = | |
114 | - match Str.split_delim (Str.regexp ":") s with | |
115 | - cat :: _ -> cat | |
116 | - | _ -> failwith "get_cat" | |
117 | - | |
118 | -let select_interps interps interp_sel = | |
119 | - Xlist.fold interp_sel StringMap.empty (fun new_interps (interp,_) -> | |
120 | - try | |
121 | - StringMap.add new_interps interp (StringMap.find interps interp) | |
122 | - with Not_found -> new_interps) |
pre/prePaths.ml
... | ... | @@ -20,17 +20,17 @@ |
20 | 20 | open Xstd |
21 | 21 | open PreTypes |
22 | 22 | |
23 | -let to_string (paths,last) = | |
23 | +let to_string (paths,last) = | |
24 | 24 | String.concat "\n" (Xlist.map paths (fun t -> PreTokenizer.string_of_tokens 0 (Token t))) |
25 | 25 | ^ Printf.sprintf "\nlast=%d" last |
26 | 26 | |
27 | -let to_string_indexed (paths,last) = | |
28 | - String.concat "\n" (Xlist.map paths (fun (i,j,t) -> | |
27 | +let to_string_indexed (paths,last) = | |
28 | + String.concat "\n" (Xlist.map paths (fun (i,j,t) -> | |
29 | 29 | Printf.sprintf "%2d %2d %s" i j (PreTokenizer.string_of_tokens 0 (Token t)))) |
30 | 30 | ^ Printf.sprintf "\nlast=%d" last |
31 | 31 | |
32 | 32 | (*let indexed_token_record_to_xml i j t = |
33 | - let lemma,pos,tags = | |
33 | + let lemma,pos,tags = | |
34 | 34 | match t.token with |
35 | 35 | Lemma(lemma,pos,tags) -> lemma,pos,tags |
36 | 36 | | _ -> failwith "indexed_token_record_to_xml" in |
... | ... | @@ -46,11 +46,11 @@ let to_string_indexed (paths,last) = |
46 | 46 | Xlist.map t.senses (fun (sense,hipero,weight) -> |
47 | 47 | Xml.Element("sense",["name",sense;"weight",string_of_float weight], |
48 | 48 | Xlist.map hipero (fun s -> Xml.Element("hipero",[],[Xml.PCData s])))))]) |
49 | - | |
50 | -let to_xml (paths,last) = | |
49 | + | |
50 | +let to_xml (paths,last) = | |
51 | 51 | Xml.Element("paths",["last",string_of_int last], |
52 | 52 | Xlist.map paths (fun (i,j,t) -> indexed_token_record_to_xml i j t)) *) |
53 | - | |
53 | + | |
54 | 54 | let compare_token_record p r = |
55 | 55 | let v = compare p.beg r.beg in |
56 | 56 | if v <> 0 then v else |
... | ... | @@ -58,30 +58,30 @@ let compare_token_record p r = |
58 | 58 | if v <> 0 then v else |
59 | 59 | compare p r |
60 | 60 | |
61 | -let sort (paths,last) = | |
61 | +let sort (paths,last) = | |
62 | 62 | Xlist.sort paths compare_token_record, last |
63 | 63 | |
64 | 64 | let rec uniq_rec rev = function |
65 | 65 | [] -> List.rev rev |
66 | 66 | | [p] -> List.rev (p :: rev) |
67 | 67 | | p :: r :: l -> if p = r then uniq_rec rev (r :: l) else uniq_rec (p :: rev) (r :: l) |
68 | - | |
69 | -let uniq (paths,last) = | |
68 | + | |
69 | +let uniq (paths,last) = | |
70 | 70 | uniq_rec [] paths, last |
71 | - | |
71 | + | |
72 | 72 | let rec translate_into_paths_rec paths = function |
73 | 73 | Token t -> t :: paths |
74 | 74 | | Seq l -> Xlist.fold l paths translate_into_paths_rec |
75 | 75 | | Variant l -> Xlist.fold l paths translate_into_paths_rec |
76 | - | |
77 | -let translate_into_paths tokens = | |
78 | - let paths = Xlist.fold tokens [] (fun paths token -> | |
76 | + | |
77 | +let translate_into_paths tokens = | |
78 | + let paths = Xlist.fold tokens [] (fun paths token -> | |
79 | 79 | translate_into_paths_rec paths token) in |
80 | 80 | let last = if paths = [] then 0 else (List.hd paths).next in |
81 | 81 | let paths = sort (paths,last) in |
82 | 82 | let paths = uniq paths in |
83 | - paths | |
84 | - | |
83 | + paths | |
84 | + | |
85 | 85 | (**********************************************************************************) |
86 | 86 | |
87 | 87 | let excluded_interps = StringSet.of_list [ |
... | ... | @@ -214,7 +214,7 @@ let transformed_interps = Xlist.fold [ |
214 | 214 | "praet:sg:n1.n2:imperf.perf:nagl","praet:sg:n1.n2:imperf.perf"; |
215 | 215 | "praet:sg:n1.n2:imperf:nagl","praet:sg:n1.n2:imperf"; |
216 | 216 | ] StringMap.empty (fun map (k,v) -> StringMap.add map k v) |
217 | - | |
217 | + | |
218 | 218 | let merge_lemmata l = |
219 | 219 | let map = Xlist.fold l StringMap.empty (fun map (lemma,interp,quantity,attrs) -> |
220 | 220 | let interp = if interp = "num:comp" then "numc" else interp in |
... | ... | @@ -229,88 +229,88 @@ let merge_lemmata l = |
229 | 229 | StringMap.add_inc map pos [tags] (fun l -> tags :: l))), |
230 | 230 | max 1 (quantity / Xlist.size interps), |
231 | 231 | attrs) in |
232 | - StringMap.fold map [] (fun l _ (lemma,map,quantity,attrs) -> | |
232 | + StringMap.fold map [] (fun l _ (lemma,map,quantity,attrs) -> | |
233 | 233 | StringMap.fold map l (fun l cat interp -> |
234 | 234 | (lemma,cat,interp,quantity,attrs) :: l)) |
235 | 235 | |
236 | 236 | let uppercase lemma cl ll = |
237 | 237 | let n = String.length lemma in |
238 | 238 | let nll = String.length ll in |
239 | - cl ^ String.sub lemma nll (n - nll) | |
240 | - | |
241 | -let quant_mod quantity = | |
239 | + cl ^ String.sub lemma nll (n - nll) | |
240 | + | |
241 | +let quant_mod quantity = | |
242 | 242 | log10 (float quantity) |
243 | - | |
244 | -let lemmatize_token = function | |
245 | - | {token=AllSmall s} as t -> | |
246 | - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
243 | + | |
244 | +let lemmatize_token = function | |
245 | + | {token=AllSmall s} as t -> | |
246 | + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
247 | 247 | {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs})) |
248 | - | {token=SmallLetter s} as t -> | |
249 | - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
248 | + | {token=SmallLetter s} as t -> | |
249 | + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
250 | 250 | {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs})) |
251 | - | {token=FirstCap(s,lower,cl,ll)} as t -> | |
252 | - let l = Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
251 | + | {token=FirstCap(s,lower,cl,ll)} as t -> | |
252 | + let l = Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
253 | 253 | {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}) in |
254 | 254 | let b = Xlist.fold l false (fun b t -> if Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found" then b else true) in |
255 | - if b then t :: l else | |
256 | - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf lower)) (fun (lemma,cat,interp,quantity,attrs) -> | |
255 | + if b then t :: l else | |
256 | + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations lower)) (fun (lemma,cat,interp,quantity,attrs) -> | |
257 | 257 | {t with token=Lemma(uppercase lemma cl ll,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: "lemmatized as lowercase" :: attrs @ t.attrs})) |
258 | - | {token=AllCap(s,_,_)} as t -> | |
259 | - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
258 | + | {token=AllCap(s,_,_)} as t -> | |
259 | + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
260 | 260 | {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs})) |
261 | - | {token=CapLetter(s,_)} as t -> | |
262 | - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
261 | + | {token=CapLetter(s,_)} as t -> | |
262 | + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
263 | 263 | {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs})) |
264 | - | {token=SomeCap s} as t -> | |
265 | - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
264 | + | {token=SomeCap s} as t -> | |
265 | + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) -> | |
266 | 266 | {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs})) |
267 | 267 | | t -> [t] |
268 | - | |
269 | -let rec lemmatize_rec rev = function | |
270 | - [] -> List.rev rev | |
268 | + | |
269 | +let rec lemmatize_rec rev = function | |
270 | + [] -> List.rev rev | |
271 | 271 | | t :: l -> lemmatize_rec (lemmatize_token t @ rev) l |
272 | 272 | |
273 | 273 | let lemmatize (paths,last) = |
274 | 274 | List.rev (Xlist.fold (lemmatize_rec [] paths) [] (fun paths t -> |
275 | 275 | match t.token with |
276 | - Lemma _ -> if (Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found") && | |
276 | + Lemma _ -> if (Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found") && | |
277 | 277 | (Xlist.mem t.attrs "required validated lemmatization") then paths |
278 | 278 | else t(*{t with attrs=List.remove "required validated lemmatization" t.attrs}*) :: paths |
279 | 279 | | _ -> t :: paths)), last |
280 | - | |
280 | + | |
281 | 281 | (* TODO: docelowa lematyzacja: |
282 | 282 | - lematyzacja za pomocฤ pรณลrฤcznie wytworzonych reguล lematyzacji i listy wyjฤ tkรณw |
283 | 283 | - walidacja lematรณw za pomocฤ listy znanych lematรณw zawierajฤ cej lemat, kategoriฤ, rodzaj subst, aspekt verb (obejmuje teลผ walidacjฤ akronimรณw) |
284 | 284 | - rozpoznawanie wyraลผeล wielosลownych (mwe i mte) za pomocฤ listy zawierajฤ cej ich lematy i szablony odmiany |
285 | 285 | *) |
286 | - | |
286 | + | |
287 | 287 | (**********************************************************************************) |
288 | - | |
288 | + | |
289 | 289 | |
290 | 290 | (**********************************************************************************) |
291 | - | |
291 | + | |
292 | 292 | (**********************************************************************************) |
293 | - | |
294 | - | |
295 | - | |
293 | + | |
294 | + | |
295 | + | |
296 | 296 | (*let rec get_beg_id = function |
297 | 297 | Token t -> t.beg |
298 | 298 | | Seq(t :: _) -> get_beg_id t |
299 | 299 | | Variant(t :: _) -> get_beg_id t |
300 | 300 | | _ -> failwith "get_beg_id" |
301 | - | |
301 | + | |
302 | 302 | let rec get_end_id = function |
303 | 303 | Token t -> t.beg + t.len |
304 | 304 | | Seq [] -> failwith "get_end_id" |
305 | 305 | | Seq l -> get_end_id (List.hd (List.rev l)) |
306 | 306 | | Variant(t :: _) -> get_end_id t |
307 | 307 | | _ -> failwith "get_end_id"*) |
308 | - | |
308 | + | |
309 | 309 | (*let rec lemmatize_tokens paths next_id = function |
310 | 310 | Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg next_id t.orth v (parse_postags cat) t.beg t.len |
311 | 311 | | Token({token=Lemma(lemma,interp)} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags interp) t.beg t.len |
312 | 312 | | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags "interp") t.beg t.len |
313 | - | Token({token=AllSmall s} as t) -> | |
313 | + | Token({token=AllSmall s} as t) -> | |
314 | 314 | Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) -> |
315 | 315 | PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags postags) t.beg t.len) |
316 | 316 | | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodaฤ wersjฤ z s2 ; uporzฤ dkowaฤ sลownik; dodaฤ akronimy *) |
... | ... | @@ -321,19 +321,19 @@ let rec get_end_id = function |
321 | 321 | | Seq(t :: next :: l) -> lemmatize_tokens (lemmatize_tokens paths (get_beg_id next) t) next_id (Seq(next :: l)) |
322 | 322 | | Seq [] -> failwith "lemmatize_tokens" |
323 | 323 | | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_tokens paths next_id t)*) |
324 | - | |
324 | + | |
325 | 325 | (*let rec lemmatize_paths_tokens paths = function (* FIXME: uzgodniฤ postaฤ lematรณw *) |
326 | 326 | Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg t.next t.orth v (parse_postags cat) t.attrs t.beg t.len |
327 | - | Token({token=Lemma(lemma,interp)} as t) -> | |
327 | + | Token({token=Lemma(lemma,interp)} as t) -> | |
328 | 328 | if Xlist.mem t.attrs "lemmatized as lowercase" || Xlist.mem t.attrs "lemma not validated" then paths else (* FIXME *) |
329 | 329 | PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags interp) t.attrs t.beg t.len |
330 | 330 | | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags "interp") t.attrs t.beg t.len |
331 | 331 | | Token({token=Proper _} as t) -> failwith "lemmatize_paths_tokens: ni" |
332 | 332 | | Token({token=Compound _} as t) -> failwith "lemmatize_paths_tokens: ni" |
333 | -(* | Token({token=AllSmall s} as t) -> | |
333 | +(* | Token({token=AllSmall s} as t) -> | |
334 | 334 | Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) -> |
335 | 335 | PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len) |
336 | - | Token({token=SmallLetter s} as t) -> | |
336 | + | Token({token=SmallLetter s} as t) -> | |
337 | 337 | Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) -> |
338 | 338 | PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len) |
339 | 339 | | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodaฤ wersjฤ z s2 ; uporzฤ dkowaฤ sลownik; dodaฤ akronimy *) |
... | ... | @@ -345,19 +345,19 @@ let rec get_end_id = function |
345 | 345 | | Token _ -> paths |
346 | 346 | | Seq l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t) |
347 | 347 | | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t) |
348 | - | |
348 | + | |
349 | 349 | (*let rec lemmatize paths = function |
350 | 350 | t :: next :: l -> lemmatize (lemmatize_tokens paths (get_beg_id next) t) (next :: l) |
351 | 351 | | [t] -> lemmatize_tokens paths (get_end_id t) t |
352 | 352 | | [] -> failwith "lemmatize"*) |
353 | - | |
353 | + | |
354 | 354 | let rec lemmatize_paths paths = function |
355 | 355 | t :: l -> lemmatize_paths (lemmatize_paths_tokens paths t) l |
356 | 356 | | [] -> paths *) |
357 | - | |
357 | + | |
358 | 358 | (* FIXME: dodaฤ 'co do' prep:gen *) |
359 | - | |
360 | - | |
359 | + | |
360 | + | |
361 | 361 | (* Dane do przekazania: |
362 | 362 | - lematy i interpretacje: generowanie typรณw i termรณw |
363 | 363 | - orths |
... | ... | @@ -365,8 +365,8 @@ let rec lemmatize_paths paths = function |
365 | 365 | - struktura grafu: wyrรณลผniki przy niejednoznacznoลci |
366 | 366 | - sensy wraz z hiperonimami |
367 | 367 | - <indent> *) |
368 | - | |
369 | -(* | |
368 | + | |
369 | +(* | |
370 | 370 | Ala zjadลa kota. |
371 | 371 | Ala subst:sg:nom:f imiฤ -> istota |
372 | 372 | kot subst:sg:nom:m2 pospolita - kot 2 - istota 1 istota ลผywa 1 zwierzฤ 1 strunowiec 1 czaszkowiec 1 krฤgowiec 1 tetrapod 1 owodniowiec 1 ssak 1 ssak ลผyworodny 1 ลoลผyskowiec 1 ssak drapieลผny 1 kot 1 kot 2 |
... | ... | @@ -392,10 +392,10 @@ czas 3 godzina 4 |
392 | 392 | do opisu czasu trwania: |
393 | 393 | jednostka czasu 1: godzina 3, sekunda 2, (minuta 4 - nie podลฤ czona) dzieล 2, miesiฤ c 1, rok 1/2 |
394 | 394 | |
395 | -*) | |
396 | - | |
397 | -(** | |
398 | - | |
395 | +*) | |
396 | + | |
397 | +(** | |
398 | + | |
399 | 399 | (* empty *) |
400 | 400 | |
401 | 401 | let empty = IntMap.empty, 0, 0 |
... | ... | @@ -406,11 +406,11 @@ let poss_record_empty = {interp=[]; attrs=[]; proper=[]; senses=[]} |
406 | 406 | |
407 | 407 | (* add *) |
408 | 408 | |
409 | -let dict_add dict lemma postags attrs beg len = | |
409 | +let dict_add dict lemma postags attrs beg len = | |
410 | 410 | if postags = [] then dict else |
411 | 411 | let interps = try StringMap.find dict.lemmas lemma with Not_found -> StringMap.empty in |
412 | 412 | let interps = Xlist.fold postags interps (fun interps (pos,tags) -> |
413 | - StringMap.add_inc interps pos {poss_record_empty with interp=[tags]; attrs=attrs} (fun l -> | |
413 | + StringMap.add_inc interps pos {poss_record_empty with interp=[tags]; attrs=attrs} (fun l -> | |
414 | 414 | {l with interp=tags :: l.interp; attrs=StringSet.to_list (StringSet.union (StringSet.of_list l.attrs) (StringSet.of_list attrs))})) in |
415 | 415 | if dict.dbeg <> beg && dict.dbeg <> -1 then failwith "dict_add" else |
416 | 416 | if dict.dlen <> len && dict.dlen <> -1 then failwith "dict_add" else |
... | ... | @@ -424,52 +424,52 @@ let add_simple map i j orth lemma postags attrs beg len = |
424 | 424 | let orths = StringMap.add orths orth dict in |
425 | 425 | let map2 = IntMap.add map2 j orths in |
426 | 426 | IntMap.add map i map2 |
427 | - | |
427 | + | |
428 | 428 | let add_edge (map,last,n) i j orth lemma postags attrs beg len = |
429 | 429 | add_simple map i j orth lemma postags attrs beg len, max j last, max j n |
430 | - | |
430 | + | |
431 | 431 | let rec add_path (map,last,n) i j = function |
432 | 432 | [] -> failwith "add_path" |
433 | - | [orth,lemma,postags,beg,len] -> | |
433 | + | [orth,lemma,postags,beg,len] -> | |
434 | 434 | add_simple map i j orth lemma postags [] beg len, last, n |
435 | - | (orth,lemma,postags,beg,len) :: l -> | |
435 | + | (orth,lemma,postags,beg,len) :: l -> | |
436 | 436 | add_path (add_simple map i (n+1) orth lemma postags [] beg len, last, n+1) (n+1) j l |
437 | -(* | |
437 | +(* | |
438 | 438 | let insert (map,last,n) i j orth dict = |
439 | 439 | let map2 = try IntMap.find map i with Not_found -> IntMap.empty in |
440 | 440 | let orths = try IntMap.find map2 j with Not_found -> StringMap.empty in |
441 | 441 | let orths = StringMap.add orths orth dict in |
442 | - let map2 = IntMap.add map2 j orths in | |
442 | + let map2 = IntMap.add map2 j orths in | |
443 | 443 | IntMap.add map i map2, last, n |
444 | - | |
444 | + | |
445 | 445 | let rec insert_path (map,last,n) i j = function |
446 | 446 | [] -> failwith "add_path" |
447 | - | [orth,dict] -> | |
447 | + | [orth,dict] -> | |
448 | 448 | insert (map,last,n) i j orth dict |
449 | - | (orth,dict) :: l -> | |
449 | + | (orth,dict) :: l -> | |
450 | 450 | insert_path (insert (map,last,n+1) i (n+1) orth dict) (n+1) j l |
451 | - | |
452 | -let set_sentence_begin (map,last,n) i j orth = | |
451 | + | |
452 | +let set_sentence_begin (map,last,n) i j orth = | |
453 | 453 | try |
454 | 454 | let map2 = IntMap.find map i in |
455 | 455 | let orths = IntMap.find map2 j in |
456 | 456 | let dict = StringMap.find orths orth in |
457 | 457 | let orths = StringMap.add orths orth {dict with sentence_begin=true} in |
458 | 458 | let map2 = IntMap.add map2 j orths in |
459 | - IntMap.add map i map2, last, n | |
459 | + IntMap.add map i map2, last, n | |
460 | 460 | with Not_found -> failwith "set_sentence_begin" |
461 | 461 | |
462 | -let set_sentence_end (map,last,n) i j orth = | |
462 | +let set_sentence_end (map,last,n) i j orth = | |
463 | 463 | try |
464 | 464 | let map2 = IntMap.find map i in |
465 | 465 | let orths = IntMap.find map2 j in |
466 | 466 | let dict = StringMap.find orths orth in |
467 | 467 | let orths = StringMap.add orths orth {dict with sentence_end=true} in |
468 | 468 | let map2 = IntMap.add map2 j orths in |
469 | - IntMap.add map i map2, last, n | |
469 | + IntMap.add map i map2, last, n | |
470 | 470 | with Not_found -> failwith "set_sentence_end" |
471 | 471 | |
472 | -let is_sentence_end (map,last,n) i j orth = | |
472 | +let is_sentence_end (map,last,n) i j orth = | |
473 | 473 | try |
474 | 474 | let map2 = IntMap.find map i in |
475 | 475 | let orths = IntMap.find map2 j in |
... | ... | @@ -503,7 +503,7 @@ let rec find_paths_bound (map,last,n) k i = |
503 | 503 | let tails = find_paths_bound (map,last,n) (k-1) j in |
504 | 504 | StringMap.fold set paths (fun paths s _ -> |
505 | 505 | Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths))) |
506 | - | |
506 | + | |
507 | 507 | let rec find_paths_rec (map,last,n) i = |
508 | 508 | if i = last then [[]] else |
509 | 509 | if not (IntMap.mem map i) then failwith "find_paths_rec" else |
... | ... | @@ -511,14 +511,14 @@ let rec find_paths_rec (map,last,n) i = |
511 | 511 | let tails = find_paths_rec (map,last,n) j in |
512 | 512 | StringMap.fold set paths (fun paths s _ -> |
513 | 513 | Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths))) |
514 | - | |
514 | + | |
515 | 515 | let find_paths (map,last,n) = |
516 | 516 | find_paths_rec (map,last,n) 0 |
517 | -*) | |
518 | -let has_lemma orths = | |
517 | +*) | |
518 | +let has_lemma orths = | |
519 | 519 | StringMap.fold orths false (fun b _ dict -> |
520 | 520 | if StringMap.is_empty dict.lemmas then b else true) |
521 | - | |
521 | + | |
522 | 522 | let rec no_possible_path_rec map last i = |
523 | 523 | if last = i then false else |
524 | 524 | let map2 = try IntMap.find map i with Not_found -> IntMap.empty in |
... | ... | @@ -526,27 +526,27 @@ let rec no_possible_path_rec map last i = |
526 | 526 | if has_lemma orths then |
527 | 527 | b && no_possible_path_rec map last j |
528 | 528 | else b) |
529 | - | |
529 | + | |
530 | 530 | let no_possible_path (map,last,n) = |
531 | 531 | no_possible_path_rec map last 0 |
532 | -(* | |
532 | +(* | |
533 | 533 | let rec match_path_rec map found i rev = function |
534 | - [] -> (i :: rev) :: found | |
534 | + [] -> (i :: rev) :: found | |
535 | 535 | | s :: l -> |
536 | 536 | let map2 = try IntMap.find map i with Not_found -> IntMap.empty in |
537 | 537 | let found2 = IntMap.fold map2 [] (fun found2 j set -> |
538 | 538 | if StringMap.mem set s then j :: found2 else found2) in |
539 | 539 | Xlist.fold found2 found (fun found j -> match_path_rec map found j (i :: rev) l) |
540 | - | |
540 | + | |
541 | 541 | let match_path (map,last,n) = function |
542 | 542 | [] -> failwith "match_path" |
543 | - | s :: l -> | |
543 | + | s :: l -> | |
544 | 544 | let found = IntMap.fold map [] (fun found i map2 -> |
545 | 545 | IntMap.fold map2 found (fun found j set -> |
546 | 546 | if StringMap.mem set s then (i,j) :: found else found)) in |
547 | 547 | Xlist.fold found [] (fun found (i,j) -> match_path_rec map found j [i] l) |
548 | 548 | |
549 | -let get_matched orths = function | |
549 | +let get_matched orths = function | |
550 | 550 | Orth s -> if StringMap.mem orths s then [s] else [] |
551 | 551 | | Pos s -> (*print_endline ("a1 " ^ s);*) StringSet.to_list (StringMap.fold orths StringSet.empty (fun set orth dict -> |
552 | 552 | StringMap.fold dict.lemmas set (fun set lemma interps -> |
... | ... | @@ -554,19 +554,19 @@ let get_matched orths = function |
554 | 554 | (* print_endline ("a2 " ^ pos); *) |
555 | 555 | if s = pos then StringSet.add set orth else set)))) |
556 | 556 | (* | All -> orths *) |
557 | - | |
557 | + | |
558 | 558 | let rec match_path_ex_rec map found i rev = function |
559 | - [] -> ((i,[]) :: rev) :: found | |
559 | + [] -> ((i,[]) :: rev) :: found | |
560 | 560 | | s :: l -> |
561 | 561 | let map2 = try IntMap.find map i with Not_found -> IntMap.empty in |
562 | 562 | let found2 = IntMap.fold map2 [] (fun found2 j orths -> |
563 | 563 | let l = get_matched orths s in |
564 | 564 | if l <> [] then (j,l) :: found2 else found2) in |
565 | 565 | Xlist.fold found2 found (fun found (j,l2) -> match_path_ex_rec map found j ((i,l2) :: rev) l) |
566 | - | |
566 | + | |
567 | 567 | let match_path_ex (map,last,n) = function |
568 | 568 | [] -> failwith "match_path_ex" |
569 | - | s :: l -> | |
569 | + | s :: l -> | |
570 | 570 | let found = IntMap.fold map [] (fun found i map2 -> |
571 | 571 | IntMap.fold map2 found (fun found j orths -> |
572 | 572 | let l = get_matched orths s in |
... | ... | @@ -578,49 +578,49 @@ let last_node (_,last,_) = last |
578 | 578 | let set_last_node (map,last,n) new_last = map, new_last, n |
579 | 579 | |
580 | 580 | let find (map,last,n) i = |
581 | - try | |
581 | + try | |
582 | 582 | IntMap.fold (IntMap.find map i) [] (fun found j orths -> |
583 | 583 | StringMap.fold orths found (fun found orth _ -> |
584 | 584 | (i,j,orth) :: found)) |
585 | 585 | with Not_found -> [] |
586 | 586 | |
587 | 587 | let find_full (map,last,n) i = |
588 | - try | |
588 | + try | |
589 | 589 | IntMap.fold (IntMap.find map i) [] (fun found j orths -> |
590 | 590 | StringMap.fold orths found (fun found orth dict -> |
591 | 591 | (i,j,orth,dict) :: found)) |
592 | 592 | with Not_found -> [] |
593 | 593 | *) |
594 | -let fold (map,last,n) s f = | |
594 | +let fold (map,last,n) s f = | |
595 | 595 | IntMap.fold map s (fun s i map2 -> |
596 | 596 | IntMap.fold map2 s (fun s j set -> |
597 | 597 | StringMap.fold set s (fun s orth lemmas -> |
598 | 598 | f s orth i j lemmas))) |
599 | -(* | |
600 | -let map (map,last,n) f = | |
599 | +(* | |
600 | +let map (map,last,n) f = | |
601 | 601 | IntMap.map map (fun map2 -> |
602 | 602 | IntMap.map map2 (fun orths -> |
603 | 603 | StringMap.map orths (fun lemmas -> |
604 | 604 | f lemmas))), last, n |
605 | - | |
606 | -let mapi (map,last,n) f = | |
605 | + | |
606 | +let mapi (map,last,n) f = | |
607 | 607 | IntMap.mapi map (fun i map2 -> |
608 | 608 | IntMap.mapi map2 (fun j orths -> |
609 | 609 | StringMap.mapi orths (fun orth lemmas -> |
610 | 610 | f orth i j lemmas))), last, n |
611 | - | |
612 | -let get_edges (map,_,_) i j = | |
611 | + | |
612 | +let get_edges (map,_,_) i j = | |
613 | 613 | IntMap.find (IntMap.find map i) j |
614 | - | |
615 | -let get_edges_from (map,_,_) i = | |
614 | + | |
615 | +let get_edges_from (map,_,_) i = | |
616 | 616 | IntMap.find map i |
617 | -*) | |
617 | +*) | |
618 | 618 | let rec topological_sort_rec map visited l i = |
619 | - if IntSet.mem visited i then (l,visited) else | |
619 | + if IntSet.mem visited i then (l,visited) else | |
620 | 620 | let l, visited = IntMap.fold (try IntMap.find map i with Not_found -> IntMap.empty) (l,IntSet.add visited i) (fun (l,visited) j _ -> |
621 | 621 | topological_sort_rec map visited l j) in |
622 | 622 | i :: l, visited |
623 | - | |
623 | + | |
624 | 624 | let topological_sort (map,last,n) = |
625 | 625 | let l, _ = topological_sort_rec map IntSet.empty [] 0 in |
626 | 626 | let translation, k = Xlist.fold l (IntMap.empty,0) (fun (translation,k) i -> |
... | ... | @@ -630,27 +630,27 @@ let topological_sort (map,last,n) = |
630 | 630 | try IntMap.add map2 (IntMap.find translation j) orths with Not_found -> map2) in |
631 | 631 | try IntMap.add map (IntMap.find translation i) map2 with Not_found -> map) in |
632 | 632 | map, (try IntMap.find translation last with Not_found -> failwith "topological_sort 3"), k-1 |
633 | - | |
633 | + | |
634 | 634 | (*let interp_to_string interp = |
635 | 635 | String.concat " " (Xlist.fold interp.interp [] (fun l tags -> |
636 | 636 | (String.concat ":" (Xlist.map tags (String.concat "."))) :: l)) |
637 | - | |
637 | + | |
638 | 638 | let interps_to_string interps = |
639 | 639 | String.concat " " (StringMap.fold interps [] (fun l pos interp -> |
640 | 640 | (pos ^ "[" ^ interp_to_string interp ^ "]") :: l)) |
641 | - | |
641 | + | |
642 | 642 | let lemmas_to_string lemmas = |
643 | 643 | String.concat " " (StringMap.fold lemmas [] (fun l lemma interps -> |
644 | 644 | (lemma ^ "[" ^ interps_to_string interps ^ "]") :: l)) |
645 | - | |
646 | -let to_string (map,last,n) = | |
645 | + | |
646 | +let to_string (map,last,n) = | |
647 | 647 | let l = IntMap.fold map [] (fun l i map2 -> |
648 | 648 | IntMap.fold map2 l (fun l j orths -> |
649 | - (Printf.sprintf "%5d %5d %s" i j (String.concat " " (StringMap.fold orths [] (fun l2 orths dict -> | |
649 | + (Printf.sprintf "%5d %5d %s" i j (String.concat " " (StringMap.fold orths [] (fun l2 orths dict -> | |
650 | 650 | (Printf.sprintf "%s %5d %5d [%s]" orths dict.dbeg dict.dlen (lemmas_to_string dict.lemmas)) :: l2)))) :: l)) in |
651 | 651 | Printf.sprintf "last=%d n=%d\n %s" last n (String.concat "\n " (List.sort compare l))*) |
652 | - (* | |
653 | -let make_unique_orths (map,last,n) = | |
652 | + (* | |
653 | +let make_unique_orths (map,last,n) = | |
654 | 654 | let names = fold (map,last,n) StringQMap.empty (fun names orth _ _ _ -> |
655 | 655 | StringQMap.add names orth) in |
656 | 656 | let names = StringQMap.fold names StringSet.empty (fun names name n -> |
... | ... | @@ -658,10 +658,10 @@ let make_unique_orths (map,last,n) = |
658 | 658 | let map,_ = IntMap.fold map (IntMap.empty,StringMap.empty) (fun (map,used) i map2 -> |
659 | 659 | let map2,used = IntMap.fold map2 (IntMap.empty,used) (fun (map2,used) j orths -> |
660 | 660 | let orths,used = StringMap.fold orths (StringMap.empty,used) (fun (orths,used) orth lemmas -> |
661 | - let orth,used = | |
661 | + let orth,used = | |
662 | 662 | if StringSet.mem names orth then |
663 | - let n = | |
664 | - try StringMap.find used orth + 1 | |
663 | + let n = | |
664 | + try StringMap.find used orth + 1 | |
665 | 665 | with Not_found -> 1 in |
666 | 666 | orth ^ "-" ^ string_of_int n, StringMap.add used orth n |
667 | 667 | else orth,used in |
... | ... | @@ -670,6 +670,6 @@ let make_unique_orths (map,last,n) = |
670 | 670 | IntMap.add map i map2, used) in |
671 | 671 | map,last,n |
672 | 672 | |
673 | -*) | |
674 | - | |
675 | -**) | |
676 | 673 | \ No newline at end of file |
674 | +*) | |
675 | + | |
676 | +**) | |
... | ... |