Commit 479c42bcdbe0c67eba3dea6876b89ffeb40d5cbc

Authored by Wojciech Jaworski
1 parent 099c6f48

dostosowanie pre do pracy z morphology

config 0 → 100644
  1 +# General resource path
  2 +RESOURCES_PATH=../resources/
  3 +
  4 +# Localization of Walenty in TEI format
  5 +WALENTY=/usr/share/walenty/walenty_20160412.xml
  6 +
  7 +# Port number for pre server
  8 +PRE_PORT=3258
  9 +
  10 +# Host name for pre server
  11 +PRE_HOST=localhost
  12 +
  13 +# Path to the directory for parsed sentences
  14 +RESULTS_PATH=../results/
  15 +
  16 +# Maximum number of generated solutions
  17 +MAX_NO_SOLUTIONS=10
  18 +
  19 +# LCG parser timeout in seconds
  20 +LCG_TIMEOUT=100
  21 +
  22 +# LCG parser memory size (maximum number of nodes of parsed term)
  23 +LCG_NO_NODES=10000000
  24 +
  25 +# Number of parser processes
  26 +NO_PROCESSES=4
morphology/dict.ml
@@ -315,9 +315,9 @@ let exceptional_lemmata = StringSet.of_list ([ @@ -315,9 +315,9 @@ let exceptional_lemmata = StringSet.of_list ([
315 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; 315 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
316 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; 316 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";
317 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*) 317 ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; ""; "";*)
318 - ] @ File.load_lines "data/obce.tab" (* @  
319 - File.load_lines "data/validated_adj.tab" @ File.load_lines "data/validated_noun.tab" @  
320 - File.load_lines "data/validated_verb.tab" @ File.load_lines "data/adv_nieodprzymiotnikowe.tab" *)) 318 + ] @ File.load_lines "../morphology/data/obce.tab" (* @
  319 + File.load_lines "../morphology/data/validated_adj.tab" @ File.load_lines "../morphology/data/validated_noun.tab" @
  320 + File.load_lines "../morphology/data/validated_verb.tab" @ File.load_lines "../morphology/data/adv_nieodprzymiotnikowe.tab" *))
321 321
322 let remove_exceptional_lemmata dict = 322 let remove_exceptional_lemmata dict =
323 Xlist.fold dict [] (fun dict entry -> 323 Xlist.fold dict [] (fun dict entry ->
morphology/inflexion.a 0 → 100644
No preview for this file type
morphology/inflexion.cmxa 0 → 100644
No preview for this file type
morphology/ruleGenerator.ml
@@ -2,7 +2,7 @@ open Xstd @@ -2,7 +2,7 @@ open Xstd
2 open Printf 2 open Printf
3 open Types 3 open Types
4 4
5 -let alternation_map = Rules.alternation_map 5 +(* let alternation_map = Rules.alternation_map *)
6 6
7 let rule_types = Xlist.fold [ 7 let rule_types = Xlist.fold [
8 (* Xlist.map (StringMap.find alternation_map "obce_ch") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{x}ych\t{x}"; 8 (* Xlist.map (StringMap.find alternation_map "obce_ch") (fun (_,s,t) -> sprintf "%sch\t%s" s t), "{x}ych\t{x}";
morphology/rules.ml
@@ -44,7 +44,7 @@ let load_alternations filename = @@ -44,7 +44,7 @@ let load_alternations filename =
44 | _ -> failwith "load_alternations") in 44 | _ -> failwith "load_alternations") in
45 (name,List.rev alts) :: alternations 45 (name,List.rev alts) :: alternations
46 46
47 -let alternations () = load_alternations "data/alternations.dic" 47 +let alternations () = load_alternations "../morphology/data/alternations.dic"
48 48
49 let revert_alternations l = 49 let revert_alternations l =
50 Xlist.map l (fun a -> {a with afind=a.aset; aset=a.afind}) 50 Xlist.map l (fun a -> {a with afind=a.aset; aset=a.afind})
@@ -76,9 +76,9 @@ let load_pref_rules filename = @@ -76,9 +76,9 @@ let load_pref_rules filename =
76 | _ -> failwith "load_pref_rules") in 76 | _ -> failwith "load_pref_rules") in
77 (name,List.rev rules) :: pref_rules 77 (name,List.rev rules) :: pref_rules
78 78
79 -let rules () = load_suf_rules "data/rules.dic"  
80 -let rev_rules () = load_suf_rules "data/rev_rules.dic"  
81 -let pref_rules () = load_pref_rules "data/pref_rules.dic" 79 +let rules () = load_suf_rules "../morphology/data/rules.dic"
  80 +let rev_rules () = load_suf_rules "../morphology/data/rev_rules.dic"
  81 +let pref_rules () = load_pref_rules "../morphology/data/pref_rules.dic"
82 82
83 let load_freq_rules filename = 83 let load_freq_rules filename =
84 File.fold_tab filename [] (fun rules -> function 84 File.fold_tab filename [] (fun rules -> function
@@ -120,7 +120,7 @@ let rule_map alternation_map rev_alternation_map rules rev_rules pref_rules = @@ -120,7 +120,7 @@ let rule_map alternation_map rev_alternation_map rules rev_rules pref_rules =
120 let map = Xlist.fold rev_rules map (fun map (k,v) -> StringMap.add map k (prepare_rev_rules rev_alternation_map v)) in 120 let map = Xlist.fold rev_rules map (fun map (k,v) -> StringMap.add map k (prepare_rev_rules rev_alternation_map v)) in
121 Xlist.fold pref_rules map (fun map (k,v) -> StringMap.add map k (prepare_pref_rules v)) 121 Xlist.fold pref_rules map (fun map (k,v) -> StringMap.add map k (prepare_pref_rules v))
122 122
123 -let schemata () = File.load_tab "data/schemata.dic" (fun l -> l) 123 +let schemata () = File.load_tab "../morphology/data/schemata.dic" (fun l -> l)
124 124
125 (**********************************************************************************************) 125 (**********************************************************************************************)
126 126
@@ -236,7 +236,7 @@ module InterpTree = struct @@ -236,7 +236,7 @@ module InterpTree = struct
236 236
237 end 237 end
238 238
239 -let interp_tree () = InterpTree.create (load_interp_rules "data/interp_rules.dic") 239 +let interp_tree () = InterpTree.create (load_interp_rules "../morphology/data/interp_rules.dic")
240 240
241 (**********************************************************************************************) 241 (**********************************************************************************************)
242 242
parser/.gitignore 0 → 100644
  1 +pipe
  2 +results/*
parser/makefile
1 OCAMLC=ocamlc 1 OCAMLC=ocamlc
2 OCAMLOPT=ocamlopt 2 OCAMLOPT=ocamlopt
3 OCAMLDEP=ocamldep 3 OCAMLDEP=ocamldep
4 -#INCLUDES=-I +xml-light -I +xlib -I ../../lib/latexvis -I ../lib/xt -I ../../Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre  
5 -INCLUDES=--I +xml-light -I +xlib -I ../pre 4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre
  5 +#INCLUDES=-I +xml-light -I +xlib -I ../pre
6 OCAMLFLAGS=$(INCLUDES) -g 6 OCAMLFLAGS=$(INCLUDES) -g
7 -#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa latexvis.cmxa nkjp.cmxa  
8 -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa 7 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa
  8 +#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa
9 9
10 PRE= ../pre/paths.ml ../pre/walTypes.ml ../pre/preTypes.ml ../pre/walStringOf.ml 10 PRE= ../pre/paths.ml ../pre/walTypes.ml ../pre/preTypes.ml ../pre/walStringOf.ml
11 -#LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml  
12 -LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml 11 +LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
  12 +#LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml
13 DISAMB= disambSelPref.ml disambLemma.ml 13 DISAMB= disambSelPref.ml disambLemma.ml
14 -#SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml  
15 -SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml  
16 -EXEC= execTypes.ml visualization.ml exec.ml  
17 -  
18 -all:  
19 -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml 14 +SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
  15 +#SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
  16 +EXEC= execTypes.ml visualization.ml exec.ml
  17 +
  18 +all:
  19 + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
20 # $(OCAMLOPT) -o server $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml 20 # $(OCAMLOPT) -o server $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
21 -# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml  
22 - $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml  
23 - $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml  
24 -# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml  
25 - $(OCAMLOPT) -o eniam $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) singleInterface.ml  
26 -  
27 -# pipe:  
28 -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(PROC_F)  
29 -# 21 +# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
  22 +# $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
  23 +# $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
  24 +# $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
  25 +# $(OCAMLOPT) -o eniam $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) singleInterface.ml
  26 +
  27 +# pipe:
  28 +# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(PROC_F)
  29 +#
30 # of_xml: 30 # of_xml:
31 # $(OCAMLOPT) -o of_xml $(OCAMLOPTFLAGS) LCGofXml.ml 31 # $(OCAMLOPT) -o of_xml $(OCAMLOPTFLAGS) LCGofXml.ml
32 32
parser/visualization.ml
@@ -22,11 +22,11 @@ open Xstd @@ -22,11 +22,11 @@ open Xstd
22 open Printf 22 open Printf
23 open PreTypes 23 open PreTypes
24 24
25 -let string_of_interps interps = 25 +let string_of_interps interps =
26 String.concat "|" (Xlist.map interps (fun interp -> 26 String.concat "|" (Xlist.map interps (fun interp ->
27 (String.concat ":" (Xlist.map interp (fun interp2 -> 27 (String.concat ":" (Xlist.map interp (fun interp2 ->
28 (String.concat "." interp2)))))) 28 (String.concat "." interp2))))))
29 - 29 +
30 let rec string_of_token = function 30 let rec string_of_token = function
31 PreTypes.SmallLetter orth -> sprintf "SmallLetter(%s)" orth 31 PreTypes.SmallLetter orth -> sprintf "SmallLetter(%s)" orth
32 | PreTypes.CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc 32 | PreTypes.CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc
@@ -43,22 +43,22 @@ let rec string_of_token = function @@ -43,22 +43,22 @@ let rec string_of_token = function
43 | PreTypes.Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) 43 | PreTypes.Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses)
44 | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) 44 | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token))
45 45
46 -let rec spaces i = 46 +let rec spaces i =
47 if i = 0 then "" else " " ^ spaces (i-1) 47 if i = 0 then "" else " " ^ spaces (i-1)
48 - 48 +
49 let rec string_of_tokens i = function 49 let rec string_of_tokens i = function
50 - PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) 50 + PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token)
51 t.PreTypes.id t.PreTypes.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight))) 51 t.PreTypes.id t.PreTypes.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight)))
52 (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (WalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (WalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles) 52 (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (WalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (WalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles)
53 | PreTypes.Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) 53 | PreTypes.Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
54 - | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))  
55 -  
56 -let paths_to_string_indexed (paths,last,next_id) =  
57 - String.concat "\n" (Xlist.map paths (fun (i,j,t) -> 54 + | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1))))
  55 +
  56 +let paths_to_string_indexed (paths,last,next_id) =
  57 + String.concat "\n" (Xlist.map paths (fun (i,j,t) ->
58 Printf.sprintf "%2d %2d %s" i j (string_of_tokens 0 (PreTypes.Token t)))) 58 Printf.sprintf "%2d %2d %s" i j (string_of_tokens 0 (PreTypes.Token t))))
59 ^ Printf.sprintf "\nlast=%d next_id=%d" last next_id 59 ^ Printf.sprintf "\nlast=%d next_id=%d" last next_id
60 -  
61 -let rec xml_of_graph = function 60 +
  61 +let rec xml_of_graph = function
62 Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[ 62 Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[
63 Xml.Element("gs",[],[xml_of_graph t.gs]); 63 Xml.Element("gs",[],[xml_of_graph t.gs]);
64 Xml.Element("agf",[],[Xml.PCData (WalStringOf.gf t.agf)]); 64 Xml.Element("agf",[],[Xml.PCData (WalStringOf.gf t.agf)]);
@@ -75,11 +75,11 @@ let rec xml_of_graph = function @@ -75,11 +75,11 @@ let rec xml_of_graph = function
75 Xml.Element("relations",[],[xml_of_graph c.cx_relations])]) 75 Xml.Element("relations",[],[xml_of_graph c.cx_relations])])
76 | Relation(r,a,t) -> Xml.Element("relation",[],[ 76 | Relation(r,a,t) -> Xml.Element("relation",[],[
77 Xml.Element("role",[],[xml_of_graph r]); 77 Xml.Element("role",[],[xml_of_graph r]);
78 - Xml.Element("role_attr",[],[xml_of_graph r]); 78 + Xml.Element("role_attr",[],[xml_of_graph r]);
79 xml_of_graph t]) 79 xml_of_graph t])
80 | RevRelation(r,a,t) -> Xml.Element("revrelation",[],[ 80 | RevRelation(r,a,t) -> Xml.Element("revrelation",[],[
81 Xml.Element("role",[],[xml_of_graph r]); 81 Xml.Element("role",[],[xml_of_graph r]);
82 - Xml.Element("role_attr",[],[xml_of_graph r]); 82 + Xml.Element("role_attr",[],[xml_of_graph r]);
83 xml_of_graph t]) 83 xml_of_graph t])
84 | SingleRelation(r) -> Xml.Element("singlerelation",[],[xml_of_graph r]) 84 | SingleRelation(r) -> Xml.Element("singlerelation",[],[xml_of_graph r])
85 | Tuple l -> Xml.Element("tuple",[],Xlist.map l xml_of_graph) 85 | Tuple l -> Xml.Element("tuple",[],Xlist.map l xml_of_graph)
@@ -90,18 +90,18 @@ let rec xml_of_graph = function @@ -90,18 +90,18 @@ let rec xml_of_graph = function
90 | Ref i -> Xml.Element("ref",["id",string_of_int i],[]) 90 | Ref i -> Xml.Element("ref",["id",string_of_int i],[])
91 | Morf _ -> Xml.Element("dot",[],[]) (* FIXME!!! *) 91 | Morf _ -> Xml.Element("dot",[],[]) (* FIXME!!! *)
92 | t -> failwith ("xml_of_graph: " ^ LCGstringOf.linear_term 0 t) 92 | t -> failwith ("xml_of_graph: " ^ LCGstringOf.linear_term 0 t)
93 -  
94 -let print_xml_graph path name references = 93 +
  94 +let print_xml_graph path name references =
95 let l = Int.fold 0 (Array.length references - 1) [] (fun l i -> 95 let l = Int.fold 0 (Array.length references - 1) [] (fun l i ->
96 (i, xml_of_graph references.(i)) :: l) in 96 (i, xml_of_graph references.(i)) :: l) in
97 let xml = Xml.Element("graph",[],Xlist.rev_map l (fun (i,xml) -> 97 let xml = Xml.Element("graph",[],Xlist.rev_map l (fun (i,xml) ->
98 Xml.Element("graph_node",["id",string_of_int i],[xml]))) in 98 Xml.Element("graph_node",["id",string_of_int i],[xml]))) in
99 - File.file_out (path ^ name ^ ".xml") (fun file -> 99 + File.file_out (path ^ name ^ ".xml") (fun file ->
100 fprintf file "%s\n" (Xml.to_string_fmt xml)) 100 fprintf file "%s\n" (Xml.to_string_fmt xml))
101 101
102 -let print_xml_tree path name tree = 102 +let print_xml_tree path name tree =
103 let xml = xml_of_graph tree in 103 let xml = xml_of_graph tree in
104 - File.file_out (path ^ name ^ ".xml") (fun file -> 104 + File.file_out (path ^ name ^ ".xml") (fun file ->
105 fprintf file "%s\n" (Xml.to_string_fmt xml)) 105 fprintf file "%s\n" (Xml.to_string_fmt xml))
106 106
107 107
@@ -112,14 +112,14 @@ let rec get_refs rev = function @@ -112,14 +112,14 @@ let rec get_refs rev = function
112 | Variant(e,l) -> Xlist.fold l rev (fun rev (i,t) -> get_refs rev t) 112 | Variant(e,l) -> Xlist.fold l rev (fun rev (i,t) -> get_refs rev t)
113 | Dot -> rev 113 | Dot -> rev
114 | _ -> (*failwith*)print_endline "get_refs"; rev 114 | _ -> (*failwith*)print_endline "get_refs"; rev
115 -  
116 -let escape_string s = 115 +
  116 +let escape_string s =
117 Int.fold 0 (String.length s - 1) "" (fun t i -> 117 Int.fold 0 (String.length s - 1) "" (fun t i ->
118 match String.sub s i 1 with 118 match String.sub s i 1 with
119 "<" -> t ^ "〈" 119 "<" -> t ^ "〈"
120 | ">" -> t ^ "〉" 120 | ">" -> t ^ "〉"
121 | c -> t ^ c) 121 | c -> t ^ c)
122 - 122 +
123 let string_of_node t = 123 let string_of_node t =
124 let l = [ 124 let l = [
125 "PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs; 125 "PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs;
@@ -130,64 +130,64 @@ let string_of_node t = @@ -130,64 +130,64 @@ let string_of_node t =
130 "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (LCGstringOf.linear_term 0 t) ^ " }")) ^ " }" 130 "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (LCGstringOf.linear_term 0 t) ^ " }")) ^ " }"
131 131
132 let single_rel_id_count = ref 0 132 let single_rel_id_count = ref 0
133 - 133 +
134 let get_single_rel_id () = 134 let get_single_rel_id () =
135 let id = !single_rel_id_count in 135 let id = !single_rel_id_count in
136 incr single_rel_id_count; 136 incr single_rel_id_count;
137 "s" ^ string_of_int id 137 "s" ^ string_of_int id
138 - 138 +
139 let print_edge file label upper id = 139 let print_edge file label upper id =
140 - if upper <> "" then 140 + if upper <> "" then
141 if label = "" then fprintf file " %s -> %s\n" upper id 141 if label = "" then fprintf file " %s -> %s\n" upper id
142 else fprintf file " %s -> %s [label=\"%s\"]\n" upper id label 142 else fprintf file " %s -> %s [label=\"%s\"]\n" upper id label
143 -  
144 -(*let rec print_graph_rec2 file edge upper = function 143 +
  144 +(*let rec print_graph_rec2 file edge upper = function
145 Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper) 145 Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper)
146 - | Node t -> 146 + | Node t ->
147 let id = get_single_rel_id () in 147 let id = get_single_rel_id () in
148 fprintf file " %s [label=\"%s\"]\n" id (string_of_node t); 148 fprintf file " %s [label=\"%s\"]\n" id (string_of_node t);
149 print_edge file edge upper id; 149 print_edge file edge upper id;
150 print_graph_rec2 file "" id t.args 150 print_graph_rec2 file "" id t.args
151 - | Concept t -> 151 + | Concept t ->
152 let id = get_single_rel_id () in 152 let id = get_single_rel_id () in
153 - fprintf file " %s [shape=box,label=\"%s %s\"]\n" id  
154 - (LCGchart.string_of_linear_term 0 t.c_sense) 153 + fprintf file " %s [shape=box,label=\"%s %s\"]\n" id
  154 + (LCGchart.string_of_linear_term 0 t.c_sense)
155 (if t.c_name=Dot then "" else "„" ^ LCGchart.string_of_linear_term 0 t.c_name ^ "”"); (* FIXME *) 155 (if t.c_name=Dot then "" else "„" ^ LCGchart.string_of_linear_term 0 t.c_name ^ "”"); (* FIXME *)
156 print_edge file edge upper id; 156 print_edge file edge upper id;
157 print_graph_rec2 file "" id t.c_relations 157 print_graph_rec2 file "" id t.c_relations
158 - | SingleRelation(role) -> 158 + | SingleRelation(role) ->
159 let id = get_single_rel_id () in 159 let id = get_single_rel_id () in
160 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGchart.string_of_linear_term 0 role); 160 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGchart.string_of_linear_term 0 role);
161 if upper <> "" then fprintf file " %s -> %s\n" upper id 161 if upper <> "" then fprintf file " %s -> %s\n" upper id
162 - | Variant(e,l) -> 162 + | Variant(e,l) ->
163 fprintf file " %s [shape=diamond]\n" e; 163 fprintf file " %s [shape=diamond]\n" e;
164 print_edge file edge upper e; 164 print_edge file edge upper e;
165 Xlist.iter l (fun (i,t) -> print_graph_rec2 file i e t) 165 Xlist.iter l (fun (i,t) -> print_graph_rec2 file i e t)
166 | Dot -> () 166 | Dot -> ()
167 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i) 167 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
168 | t -> failwith ("print_graph_rec2: " ^ LCGchart.string_of_linear_term 0 t) *) 168 | t -> failwith ("print_graph_rec2: " ^ LCGchart.string_of_linear_term 0 t) *)
169 - 169 +
170 let rec string_of_quant_rec quant = function 170 let rec string_of_quant_rec quant = function
171 Tuple l -> Xlist.fold l quant string_of_quant_rec 171 Tuple l -> Xlist.fold l quant string_of_quant_rec
172 | Variant(e,l) -> (LCGstringOf.linear_term 0 (Variant(e,l))) :: quant 172 | Variant(e,l) -> (LCGstringOf.linear_term 0 (Variant(e,l))) :: quant
173 | Dot -> quant 173 | Dot -> quant
174 | Val s -> s :: quant 174 | Val s -> s :: quant
175 | _ -> failwith "string_of_quant_rec" 175 | _ -> failwith "string_of_quant_rec"
176 - 176 +
177 let string_of_quant t = 177 let string_of_quant t =
178 let l = string_of_quant_rec [] t in 178 let l = string_of_quant_rec [] t in
179 let s = String.concat " " l in 179 let s = String.concat " " l in
180 if s = "" then "" else "<I>" ^ s ^ "</I> " 180 if s = "" then "" else "<I>" ^ s ^ "</I> "
181 -  
182 -let rec print_graph_rec file edge upper id = function  
183 - Node t -> 181 +
  182 +let rec print_graph_rec file edge upper id = function
  183 + Node t ->
184 fprintf file " %s [label=\"%s\"]\n" id (string_of_node t); 184 fprintf file " %s [label=\"%s\"]\n" id (string_of_node t);
185 print_edge file edge upper id; 185 print_edge file edge upper id;
186 print_graph_rec2 file "" id t.args 186 print_graph_rec2 file "" id t.args
187 - | Concept t ->  
188 - fprintf file " %s [shape=box,label=<%s%s %s>]\n" id  
189 - (string_of_quant t.c_quant)  
190 - (LCGstringOf.linear_term 0 t.c_sense) 187 + | Concept t ->
  188 + fprintf file " %s [shape=box,label=<%s%s %s>]\n" id
  189 + (string_of_quant t.c_quant)
  190 + (LCGstringOf.linear_term 0 t.c_sense)
191 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *) 191 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *)
192 print_edge file edge upper id; 192 print_edge file edge upper id;
193 print_graph_rec2 file "" id t.c_relations 193 print_graph_rec2 file "" id t.c_relations
@@ -197,7 +197,7 @@ let rec print_graph_rec file edge upper id = function @@ -197,7 +197,7 @@ let rec print_graph_rec file edge upper id = function
197 print_edge file edge upper id; 197 print_edge file edge upper id;
198 print_graph_rec2 file "" id t.cx_contents; 198 print_graph_rec2 file "" id t.cx_contents;
199 print_graph_rec2 file "" id t.cx_relations; 199 print_graph_rec2 file "" id t.cx_relations;
200 - | Relation(role,role_attr,t) -> 200 + | Relation(role,role_attr,t) ->
201 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); 201 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
202 print_edge file edge upper id; 202 print_edge file edge upper id;
203 print_graph_rec2 file "" id t 203 print_graph_rec2 file "" id t
@@ -205,31 +205,31 @@ let rec print_graph_rec file edge upper id = function @@ -205,31 +205,31 @@ let rec print_graph_rec file edge upper id = function
205 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); 205 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
206 print_edge file edge upper id; 206 print_edge file edge upper id;
207 print_graph_rec2 file "" id t 207 print_graph_rec2 file "" id t
208 - | SingleRelation(role) -> 208 + | SingleRelation(role) ->
209 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role); 209 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role);
210 print_edge file edge upper id 210 print_edge file edge upper id
211 - | AddRelation(t,role,role_attr,s) -> 211 + | AddRelation(t,role,role_attr,s) ->
212 fprintf file " %s [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr; 212 fprintf file " %s [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr;
213 print_edge file edge upper id; 213 print_edge file edge upper id;
214 print_graph_rec2 file "" id t; 214 print_graph_rec2 file "" id t;
215 print_graph_rec2 file "" id s; 215 print_graph_rec2 file "" id s;
216 - | SetContextName(s,t) -> 216 + | SetContextName(s,t) ->
217 fprintf file " %s [shape=circle,label=\"SetContextName\\n%s\"]\n" id s; 217 fprintf file " %s [shape=circle,label=\"SetContextName\\n%s\"]\n" id s;
218 print_edge file edge upper id; 218 print_edge file edge upper id;
219 print_graph_rec2 file "" id t 219 print_graph_rec2 file "" id t
220 - | RemoveRelation t -> 220 + | RemoveRelation t ->
221 fprintf file " %s [shape=circle,label=\"RemoveRelation\"]\n" id; 221 fprintf file " %s [shape=circle,label=\"RemoveRelation\"]\n" id;
222 print_edge file edge upper id; 222 print_edge file edge upper id;
223 print_graph_rec2 file "" id t 223 print_graph_rec2 file "" id t
224 - | Variant(e,l) -> 224 + | Variant(e,l) ->
225 fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e; 225 fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e;
226 print_edge file edge upper id; 226 print_edge file edge upper id;
227 Xlist.iter l (fun (i,t) -> print_graph_rec2 file i id t) 227 Xlist.iter l (fun (i,t) -> print_graph_rec2 file i id t)
228 - | Choice choice -> 228 + | Choice choice ->
229 fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id ""; 229 fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id "";
230 print_edge file edge upper id; 230 print_edge file edge upper id;
231 StringMap.iter choice (fun ei t -> print_graph_rec2 file ei id t) 231 StringMap.iter choice (fun ei t -> print_graph_rec2 file ei id t)
232 - | Val s -> 232 + | Val s ->
233 fprintf file " %s [shape=box,label=\"%s\"]\n" id s; 233 fprintf file " %s [shape=box,label=\"%s\"]\n" id s;
234 print_edge file edge upper id 234 print_edge file edge upper id
235 | Dot -> () 235 | Dot -> ()
@@ -237,63 +237,63 @@ let rec print_graph_rec file edge upper id = function @@ -237,63 +237,63 @@ let rec print_graph_rec file edge upper id = function
237 print_edge file edge upper id*) 237 print_edge file edge upper id*)
238 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i) 238 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
239 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t) 239 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)
240 - 240 +
241 and print_graph_rec2 file edge upper = function 241 and print_graph_rec2 file edge upper = function
242 Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper) 242 Tuple l -> Xlist.iter l (print_graph_rec2 file edge upper)
243 | t -> print_graph_rec file edge upper (get_single_rel_id ()) t 243 | t -> print_graph_rec file edge upper (get_single_rel_id ()) t
244 - 244 +
245 (*let rec print_graph_rec file is_rev upper i = function (* FIXME: dokończyć is_rev *) 245 (*let rec print_graph_rec file is_rev upper i = function (* FIXME: dokończyć is_rev *)
246 - Node t -> 246 + Node t ->
247 (* let orth = if t.id = 0 then "" else.(t.id).PreTypes.orth in 247 (* let orth = if t.id = 0 then "" else.(t.id).PreTypes.orth in
248 fprintf file " %s [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat;*) 248 fprintf file " %s [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat;*)
249 fprintf file " %s [label=\"%s\"]\n" i (string_of_node t); 249 fprintf file " %s [label=\"%s\"]\n" i (string_of_node t);
250 - if upper <> "" then 250 + if upper <> "" then
251 if is_rev then fprintf file " %s -> %s\n" i upper 251 if is_rev then fprintf file " %s -> %s\n" i upper
252 else fprintf file " %s -> %s\n" upper i; 252 else fprintf file " %s -> %s\n" upper i;
253 print_graph_rec file false i i t.args 253 print_graph_rec file false i i t.args
254 - | Concept t ->  
255 - fprintf file " %s [shape=box,label=\"%s %s\"]\n" ("c" ^ i)  
256 - (LCGstringOf.linear_term 0 t.c_sense) 254 + | Concept t ->
  255 + fprintf file " %s [shape=box,label=\"%s %s\"]\n" ("c" ^ i)
  256 + (LCGstringOf.linear_term 0 t.c_sense)
257 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *) 257 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *)
258 - if upper <> "" then 258 + if upper <> "" then
259 if is_rev then fprintf file " %s -> %s\n" ("c" ^ i) upper 259 if is_rev then fprintf file " %s -> %s\n" ("c" ^ i) upper
260 else fprintf file " %s -> %s\n" upper ("c" ^ i); 260 else fprintf file " %s -> %s\n" upper ("c" ^ i);
261 print_graph_rec file false ("c" ^ i) i t.c_relations 261 print_graph_rec file false ("c" ^ i) i t.c_relations
262 | Context t -> 262 | Context t ->
263 fprintf file " %s [shape=Msquare,label=\"\"]\n" ("i" ^ i); 263 fprintf file " %s [shape=Msquare,label=\"\"]\n" ("i" ^ i);
264 - if upper <> "" then 264 + if upper <> "" then
265 if is_rev then fprintf file " %s -> %s\n" ("i" ^ i) upper 265 if is_rev then fprintf file " %s -> %s\n" ("i" ^ i) upper
266 else fprintf file " %s -> %s\n" upper ("i" ^ i); 266 else fprintf file " %s -> %s\n" upper ("i" ^ i);
267 print_graph_rec file false ("i" ^ i) i t.cx_contents 267 print_graph_rec file false ("i" ^ i) i t.cx_contents
268 - | SingleRelation(role) -> 268 + | SingleRelation(role) ->
269 let id = get_single_rel_id () in 269 let id = get_single_rel_id () in
270 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role); 270 fprintf file " %s [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role);
271 if upper <> "" then fprintf file " %s -> %s\n" upper id 271 if upper <> "" then fprintf file " %s -> %s\n" upper id
272 - | Relation(role,role_attr,t) -> 272 + | Relation(role,role_attr,t) ->
273 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); 273 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
274 if upper <> "" then fprintf file " %s -> %s\n" upper i; 274 if upper <> "" then fprintf file " %s -> %s\n" upper i;
275 print_graph_rec file false i i t 275 print_graph_rec file false i i t
276 - | RevRelation(role,role_attr,t) -> 276 + | RevRelation(role,role_attr,t) ->
277 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); 277 fprintf file " %s [shape=circle,label=\"%s\\n%s\"]\n" i (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
278 if upper <> "" then fprintf file " %s -> %s\n" i upper; 278 if upper <> "" then fprintf file " %s -> %s\n" i upper;
279 print_graph_rec file true i i t 279 print_graph_rec file true i i t
280 | Tuple l -> Xlist.iter l (print_graph_rec file is_rev upper i) 280 | Tuple l -> Xlist.iter l (print_graph_rec file is_rev upper i)
281 - | Variant(e,l) -> 281 + | Variant(e,l) ->
282 fprintf file " %s [shape=diamond]\n" e; 282 fprintf file " %s [shape=diamond]\n" e;
283 if upper <> "" then fprintf file " %s -> %s\n" upper e; 283 if upper <> "" then fprintf file " %s -> %s\n" upper e;
284 Xlist.iter l (fun (i2,t) -> print_graph_rec file false e ("x" ^ i ^ "y" ^ i2) t) 284 Xlist.iter l (fun (i2,t) -> print_graph_rec file false e ("x" ^ i ^ "y" ^ i2) t)
285 | Dot -> () 285 | Dot -> ()
286 | Ref i2 -> fprintf file " %s -> %d\n" upper i2 286 | Ref i2 -> fprintf file " %s -> %d\n" upper i2
287 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)*) 287 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)*)
288 -  
289 -let print_graph path name references = 288 +
  289 +let print_graph path name references =
290 single_rel_id_count := 0; 290 single_rel_id_count := 0;
291 - File.file_out (path ^ name ^ ".gv") (fun file -> 291 + File.file_out (path ^ name ^ ".gv") (fun file ->
292 fprintf file "digraph G {\n node [shape=record]\n"; 292 fprintf file "digraph G {\n node [shape=record]\n";
293 Int.iter 0 (Array.length references - 1) (fun i -> print_graph_rec file (*false*) "" "" ("x" ^ string_of_int i) references.(i)); 293 Int.iter 0 (Array.length references - 1) (fun i -> print_graph_rec file (*false*) "" "" ("x" ^ string_of_int i) references.(i));
294 -(* Int.iter 0 (Array.length references - 1) (fun i -> 294 +(* Int.iter 0 (Array.length references - 1) (fun i ->
295 match references.(i) with 295 match references.(i) with
296 - Node t -> 296 + Node t ->
297 fprintf file " %d [label=\"%s\"]\n" i (string_of_node t); 297 fprintf file " %d [label=\"%s\"]\n" i (string_of_node t);
298 let refs = get_refs [] t.args in 298 let refs = get_refs [] t.args in
299 Xlist.iter refs (fun r -> 299 Xlist.iter refs (fun r ->
@@ -304,31 +304,31 @@ let print_graph path name references = @@ -304,31 +304,31 @@ let print_graph path name references =
304 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); 304 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
305 Sys.chdir ".." 305 Sys.chdir ".."
306 306
307 -let id_counter = ref 0  
308 - 307 +let id_counter = ref 0
  308 +
309 let print_edge2 file edge_rev edge_label edge_head edge_tail upper id = 309 let print_edge2 file edge_rev edge_label edge_head edge_tail upper id =
310 let edge_head,edge_tail,upper,id = if edge_rev then edge_tail,edge_head,id,upper else edge_head,edge_tail,upper,id in 310 let edge_head,edge_tail,upper,id = if edge_rev then edge_tail,edge_head,id,upper else edge_head,edge_tail,upper,id in
311 - let l = 311 + let l =
312 (if edge_label = "" then [] else ["label=\"" ^ edge_label ^ "\""]) @ 312 (if edge_label = "" then [] else ["label=\"" ^ edge_label ^ "\""]) @
313 (if edge_head = "" then [] else ["ltail=\"" ^ edge_head ^ "\""]) @ 313 (if edge_head = "" then [] else ["ltail=\"" ^ edge_head ^ "\""]) @
314 (if edge_tail = "" then [] else ["lhead=\"" ^ edge_tail ^ "\""]) in 314 (if edge_tail = "" then [] else ["lhead=\"" ^ edge_tail ^ "\""]) in
315 - if upper <> 0 then 315 + if upper <> 0 then
316 if l = [] then fprintf file " %d -> %d\n" upper id 316 if l = [] then fprintf file " %d -> %d\n" upper id
317 else fprintf file " %d -> %d [%s]\n" upper id (String.concat "," l) 317 else fprintf file " %d -> %d [%s]\n" upper id (String.concat "," l)
318 -  
319 -let rec print_graph2_rec file edge_rev edge_label edge_head upper = function  
320 - Node t -> 318 +
  319 +let rec print_graph2_rec file edge_rev edge_label edge_head upper = function
  320 + Node t ->
321 let id = !id_counter in 321 let id = !id_counter in
322 incr id_counter; 322 incr id_counter;
323 fprintf file " %d [label=\"%s\"]\n" id (string_of_node t); 323 fprintf file " %d [label=\"%s\"]\n" id (string_of_node t);
324 print_edge2 file edge_rev edge_label edge_head "" upper id; 324 print_edge2 file edge_rev edge_label edge_head "" upper id;
325 print_graph2_rec file false "" "" id t.args 325 print_graph2_rec file false "" "" id t.args
326 - | Concept t -> 326 + | Concept t ->
327 let id = !id_counter in 327 let id = !id_counter in
328 incr id_counter; 328 incr id_counter;
329 - fprintf file " %d [shape=box,label=<%s%s %s>]\n" id  
330 - (string_of_quant t.c_quant)  
331 - (LCGstringOf.linear_term 0 t.c_sense) 329 + fprintf file " %d [shape=box,label=<%s%s %s>]\n" id
  330 + (string_of_quant t.c_quant)
  331 + (LCGstringOf.linear_term 0 t.c_sense)
332 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *) 332 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *)
333 print_edge2 file edge_rev edge_label edge_head "" upper id; 333 print_edge2 file edge_rev edge_label edge_head "" upper id;
334 print_graph2_rec file false "" "" id t.c_relations 334 print_graph2_rec file false "" "" id t.c_relations
@@ -338,64 +338,64 @@ let rec print_graph2_rec file edge_rev edge_label edge_head upper = function @@ -338,64 +338,64 @@ let rec print_graph2_rec file edge_rev edge_label edge_head upper = function
338 if t.cx_sense = Dot then fprintf file " subgraph cluster%d {\nlabel=\"\"\n" id 338 if t.cx_sense = Dot then fprintf file " subgraph cluster%d {\nlabel=\"\"\n" id
339 else fprintf file " subgraph cluster%d {\nlabel=\"%s\"\n" id (LCGstringOf.linear_term 0 t.cx_sense); 339 else fprintf file " subgraph cluster%d {\nlabel=\"%s\"\n" id (LCGstringOf.linear_term 0 t.cx_sense);
340 print_graph2_rec file false "" "" 0 t.cx_contents; 340 print_graph2_rec file false "" "" 0 t.cx_contents;
341 - fprintf file " }\n"; 341 + fprintf file " }\n";
342 print_edge2 file edge_rev edge_label edge_head ("cluster" ^ string_of_int id) upper (id+1); 342 print_edge2 file edge_rev edge_label edge_head ("cluster" ^ string_of_int id) upper (id+1);
343 print_graph2_rec file false "" ("cluster" ^ string_of_int id) (id+1) t.cx_relations; 343 print_graph2_rec file false "" ("cluster" ^ string_of_int id) (id+1) t.cx_relations;
344 - | Relation(role,role_attr,t) -> 344 + | Relation(role,role_attr,t) ->
345 let id = !id_counter in 345 let id = !id_counter in
346 incr id_counter; 346 incr id_counter;
347 fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); 347 fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
348 print_edge2 file false edge_label edge_head "" upper id; 348 print_edge2 file false edge_label edge_head "" upper id;
349 print_graph2_rec file false "" "" id t 349 print_graph2_rec file false "" "" id t
350 - | RevRelation(role,role_attr,t) -> 350 + | RevRelation(role,role_attr,t) ->
351 let id = !id_counter in 351 let id = !id_counter in
352 incr id_counter; 352 incr id_counter;
353 fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr); 353 fprintf file " %d [shape=circle,label=\"%s\\n%s\"]\n" id (LCGstringOf.linear_term 0 role) (LCGstringOf.linear_term 0 role_attr);
354 print_edge2 file true edge_label edge_head "" upper id; 354 print_edge2 file true edge_label edge_head "" upper id;
355 print_graph2_rec file true "" "" id t 355 print_graph2_rec file true "" "" id t
356 - | SingleRelation(role) -> 356 + | SingleRelation(role) ->
357 let id = !id_counter in 357 let id = !id_counter in
358 incr id_counter; 358 incr id_counter;
359 fprintf file " %d [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role); 359 fprintf file " %d [shape=circle,label=\"%s\"]\n" id (LCGstringOf.linear_term 0 role);
360 print_edge2 file false edge_label edge_head "" upper id 360 print_edge2 file false edge_label edge_head "" upper id
361 - | AddRelation(t,role,role_attr,s) -> 361 + | AddRelation(t,role,role_attr,s) ->
362 let id = !id_counter in 362 let id = !id_counter in
363 incr id_counter; 363 incr id_counter;
364 fprintf file " %d [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr; 364 fprintf file " %d [shape=circle,label=\"AddRelation\\n%s\\n%s\"]\n" id role role_attr;
365 print_edge2 file edge_rev edge_label edge_head "" upper id; 365 print_edge2 file edge_rev edge_label edge_head "" upper id;
366 print_graph2_rec file false "" "" id t; 366 print_graph2_rec file false "" "" id t;
367 print_graph2_rec file false "" "" id s 367 print_graph2_rec file false "" "" id s
368 - | RemoveRelation t -> 368 + | RemoveRelation t ->
369 let id = !id_counter in 369 let id = !id_counter in
370 incr id_counter; 370 incr id_counter;
371 fprintf file " %d [shape=circle,label=\"RemoveRelation\"]\n" id; 371 fprintf file " %d [shape=circle,label=\"RemoveRelation\"]\n" id;
372 print_edge2 file edge_rev edge_label edge_head "" upper id; 372 print_edge2 file edge_rev edge_label edge_head "" upper id;
373 print_graph2_rec file false "" "" id t 373 print_graph2_rec file false "" "" id t
374 - | SetContextName(s,t) -> 374 + | SetContextName(s,t) ->
375 let id = !id_counter in 375 let id = !id_counter in
376 incr id_counter; 376 incr id_counter;
377 fprintf file " %d [shape=circle,label=\"SetContextName\\n%s\"]\n" id s; 377 fprintf file " %d [shape=circle,label=\"SetContextName\\n%s\"]\n" id s;
378 print_edge2 file edge_rev edge_label edge_head "" upper id; 378 print_edge2 file edge_rev edge_label edge_head "" upper id;
379 print_graph2_rec file false "" "" id t; 379 print_graph2_rec file false "" "" id t;
380 | Tuple l -> Xlist.iter l (print_graph2_rec file edge_rev edge_label edge_head upper) 380 | Tuple l -> Xlist.iter l (print_graph2_rec file edge_rev edge_label edge_head upper)
381 - | Variant(e,l) -> 381 + | Variant(e,l) ->
382 let id = !id_counter in 382 let id = !id_counter in
383 incr id_counter; 383 incr id_counter;
384 fprintf file " %d [shape=diamond,label=\"%s\"]\n" id e; 384 fprintf file " %d [shape=diamond,label=\"%s\"]\n" id e;
385 print_edge2 file edge_rev edge_label edge_head "" upper id; 385 print_edge2 file edge_rev edge_label edge_head "" upper id;
386 Xlist.iter l (fun (i,t) -> print_graph2_rec file edge_rev i "" id t) 386 Xlist.iter l (fun (i,t) -> print_graph2_rec file edge_rev i "" id t)
387 - | Val s -> 387 + | Val s ->
388 let id = !id_counter in 388 let id = !id_counter in
389 incr id_counter; 389 incr id_counter;
390 fprintf file " %d [shape=box,label=\"%s\"]\n" id s; 390 fprintf file " %d [shape=box,label=\"%s\"]\n" id s;
391 print_edge2 file edge_rev edge_label edge_head "" upper id 391 print_edge2 file edge_rev edge_label edge_head "" upper id
392 | Dot -> () 392 | Dot -> ()
393 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t) 393 | t -> failwith ("print_graph_rec: " ^ LCGstringOf.linear_term 0 t)
394 -  
395 -let print_graph2 path name query t = 394 +
  395 +let print_graph2 path name query t =
396 (* print_endline *) 396 (* print_endline *)
397 id_counter := 1; 397 id_counter := 1;
398 - File.file_out (path ^ name ^ ".gv") (fun file -> 398 + File.file_out (path ^ name ^ ".gv") (fun file ->
399 fprintf file "digraph G {\n compound=true\n node [shape=record]\n"; 399 fprintf file "digraph G {\n compound=true\n node [shape=record]\n";
400 print_graph2_rec file false "" "" 0 t; 400 print_graph2_rec file false "" "" 0 t;
401 fprintf file "label=\"%s\"\n }\n" query); 401 fprintf file "label=\"%s\"\n }\n" query);
@@ -407,11 +407,11 @@ let rec get_lemma = function @@ -407,11 +407,11 @@ let rec get_lemma = function
407 PreTypes.Interp orth -> orth 407 PreTypes.Interp orth -> orth
408 | PreTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat 408 | PreTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat
409 | _ -> "" 409 | _ -> ""
410 -  
411 -let print_paths path name paths =  
412 - File.file_out (path ^ name ^ ".gv") (fun file -> 410 +
  411 +let print_paths path name paths =
  412 + File.file_out (path ^ name ^ ".gv") (fun file ->
413 fprintf file "digraph G {\n"; 413 fprintf file "digraph G {\n";
414 - Array.iter (fun t -> 414 + Array.iter (fun t ->
415 let lemma = get_lemma t.PreTypes.token in 415 let lemma = get_lemma t.PreTypes.token in
416 if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.PreTypes.beg t.PreTypes.next t.PreTypes.orth lemma) paths; 416 if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.PreTypes.beg t.PreTypes.next t.PreTypes.orth lemma) paths;
417 fprintf file "}\n"); 417 fprintf file "}\n");
@@ -419,39 +419,39 @@ let print_paths path name paths = @@ -419,39 +419,39 @@ let print_paths path name paths =
419 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); 419 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
420 Sys.chdir ".." 420 Sys.chdir ".."
421 421
422 -let rec print_tree_rec2 file paths edge upper = function 422 +let rec print_tree_rec2 file paths edge upper = function
423 Tuple l -> Xlist.iter l (print_tree_rec2 file paths edge upper) 423 Tuple l -> Xlist.iter l (print_tree_rec2 file paths edge upper)
424 - | Variant(e,l) -> 424 + | Variant(e,l) ->
425 fprintf file " %s [shape=diamond]\n" e; 425 fprintf file " %s [shape=diamond]\n" e;
426 print_edge file edge upper e; 426 print_edge file edge upper e;
427 Xlist.iter l (fun (i,t) -> print_tree_rec2 file paths i e t) 427 Xlist.iter l (fun (i,t) -> print_tree_rec2 file paths i e t)
428 | Dot -> () 428 | Dot -> ()
429 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i) 429 | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
430 | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t) 430 | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t)
431 -  
432 -let rec print_tree_rec file paths edge upper id = function  
433 - Node t -> 431 +
  432 +let rec print_tree_rec file paths edge upper id = function
  433 + Node t ->
434 let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in 434 let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in
435 fprintf file " %s [label=\"%s\\n%s\\n%s:%s\\n%f\"]\n" id (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat t.weight; 435 fprintf file " %s [label=\"%s\\n%s\\n%s:%s\\n%f\"]\n" id (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat t.weight;
436 print_edge file edge upper id; 436 print_edge file edge upper id;
437 print_tree_rec2 file paths "" id t.args 437 print_tree_rec2 file paths "" id t.args
438 - | Variant(e,l) -> 438 + | Variant(e,l) ->
439 fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e; 439 fprintf file " %s [shape=diamond,label=\"%s\"]\n" id e;
440 print_edge file edge upper id; 440 print_edge file edge upper id;
441 Xlist.iter l (fun (i,t) -> print_tree_rec file paths i id (id ^ "y" ^ i) t) 441 Xlist.iter l (fun (i,t) -> print_tree_rec file paths i id (id ^ "y" ^ i) t)
442 - | Choice choice -> 442 + | Choice choice ->
443 fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id ""; 443 fprintf file " %s [shape=Mdiamond,label=\"%s\"]\n" id "";
444 print_edge file edge upper id; 444 print_edge file edge upper id;
445 StringMap.iter choice (fun ei t -> print_tree_rec file paths ei id (id ^ "b" ^ ei) t) 445 StringMap.iter choice (fun ei t -> print_tree_rec file paths ei id (id ^ "b" ^ ei) t)
446 | Dot -> () 446 | Dot -> ()
447 | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t) 447 | t -> failwith ("print_tree_rec: " ^ LCGstringOf.linear_term 0 t)
448 -  
449 -let print_tree path name paths references =  
450 - File.file_out (path ^ name ^ ".gv") (fun file -> 448 +
  449 +let print_tree path name paths references =
  450 + File.file_out (path ^ name ^ ".gv") (fun file ->
451 fprintf file "digraph G {\n node [shape=box]\n"; 451 fprintf file "digraph G {\n node [shape=box]\n";
452 Int.iter 0 (Array.length references - 1) (fun i -> print_tree_rec file paths "" "" ("x" ^ string_of_int i) references.(i)); 452 Int.iter 0 (Array.length references - 1) (fun i -> print_tree_rec file paths "" "" ("x" ^ string_of_int i) references.(i));
453 (* match references.(i) with 453 (* match references.(i) with
454 - Node t -> 454 + Node t ->
455 let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in 455 let orth = if t.id = 0 then "" else paths.(t.id).PreTypes.orth in
456 fprintf file " %d [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat; 456 fprintf file " %d [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat;
457 let refs = get_refs [] t.args in 457 let refs = get_refs [] t.args in
@@ -463,13 +463,13 @@ let print_tree path name paths references = @@ -463,13 +463,13 @@ let print_tree path name paths references =
463 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); 463 ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png"));
464 Sys.chdir ".." 464 Sys.chdir ".."
465 465
466 -(*let print_tree filename paths references =  
467 - File.file_out filename (fun file -> 466 +(*let print_tree filename paths references =
  467 + File.file_out filename (fun file ->
468 fprintf file "digraph G {\n"; 468 fprintf file "digraph G {\n";
469 let set = Xlist.fold paths IntSet.empty (fun set t -> 469 let set = Xlist.fold paths IntSet.empty (fun set t ->
470 IntSet.add (IntSet.add set t.PreTypes.beg) t.PreTypes.next) in 470 IntSet.add (IntSet.add set t.PreTypes.beg) t.PreTypes.next) in
471 IntSet.iter set (fun i -> fprintf file " %d [width=0; height=0; label=\"\"]\n" i); 471 IntSet.iter set (fun i -> fprintf file " %d [width=0; height=0; label=\"\"]\n" i);
472 - Xlist.iter paths (fun t -> 472 + Xlist.iter paths (fun t ->
473 let lemma = get_lemma t.PreTypes.token in 473 let lemma = get_lemma t.PreTypes.token in
474 if lemma <> "" then ( 474 if lemma <> "" then (
475 let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in 475 let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in
@@ -480,8 +480,8 @@ let print_tree path name paths references = @@ -480,8 +480,8 @@ let print_tree path name paths references =
480 ignore (Sys.command "dot -Tpng tree.gv -o tree.png"); 480 ignore (Sys.command "dot -Tpng tree.gv -o tree.png");
481 Sys.chdir ".."*) 481 Sys.chdir ".."*)
482 482
483 -(*let print_tree filename paths references =  
484 - File.file_out filename (fun file -> 483 +(*let print_tree filename paths references =
  484 + File.file_out filename (fun file ->
485 fprintf file "digraph G {\n"; 485 fprintf file "digraph G {\n";
486 fprintf file " subgraph {\n ordering=out\n"; 486 fprintf file " subgraph {\n ordering=out\n";
487 let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.PreTypes.beg t.PreTypes.beg)) [] (fun same t -> 487 let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.PreTypes.beg t.PreTypes.beg)) [] (fun same t ->
@@ -494,9 +494,9 @@ let print_tree path name paths references = @@ -494,9 +494,9 @@ let print_tree path name paths references =
494 else same) in 494 else same) in
495 fprintf file " }\n"; 495 fprintf file " }\n";
496 fprintf file " { rank = same; %s }\n" (String.concat "; " (Xlist.map same (fun i -> sprintf "\"i%d\"" i))); 496 fprintf file " { rank = same; %s }\n" (String.concat "; " (Xlist.map same (fun i -> sprintf "\"i%d\"" i)));
497 - Int.iter 0 (Array.length references - 1) (fun i -> 497 + Int.iter 0 (Array.length references - 1) (fun i ->
498 match references.(i) with 498 match references.(i) with
499 - Node t -> 499 + Node t ->
500 fprintf file " %d [label=\"%s\"]\n" i t.pred; 500 fprintf file " %d [label=\"%s\"]\n" i t.pred;
501 fprintf file " %d -> i%d\n" i t.id; 501 fprintf file " %d -> i%d\n" i t.id;
502 let refs = get_refs [] t.args in 502 let refs = get_refs [] t.args in
@@ -508,9 +508,9 @@ let print_tree path name paths references = @@ -508,9 +508,9 @@ let print_tree path name paths references =
508 ignore (Sys.command "dot -Tpng tree.gv -o tree.png"); 508 ignore (Sys.command "dot -Tpng tree.gv -o tree.png");
509 Sys.chdir ".."*) 509 Sys.chdir ".."*)
510 510
511 -(*let rec schema_latex schema =  
512 - "\\begin{tabular}{l}" ^  
513 - String.concat "\\\\" (Xlist.map schema (fun s -> 511 +let rec schema_latex schema =
  512 + "\\begin{tabular}{l}" ^
  513 + String.concat "\\\\" (Xlist.map schema (fun s ->
514 LatexMain.escape_string (String.concat "," ( 514 LatexMain.escape_string (String.concat "," (
515 (if s.WalTypes.gf = WalTypes.ARG then [] else [WalStringOf.gf s.WalTypes.gf])@ 515 (if s.WalTypes.gf = WalTypes.ARG then [] else [WalStringOf.gf s.WalTypes.gf])@
516 (if s.WalTypes.role = "" then [] else [s.WalTypes.role])@ 516 (if s.WalTypes.role = "" then [] else [s.WalTypes.role])@
@@ -519,32 +519,32 @@ let print_tree path name paths references = @@ -519,32 +519,32 @@ let print_tree path name paths references =
519 "\\end{tabular}" 519 "\\end{tabular}"
520 520
521 let fnum_frame_latex = function 521 let fnum_frame_latex = function
522 - fnum,WalTypes.Frame(atrs,s) -> 522 + fnum,WalTypes.Frame(atrs,s) ->
523 Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (WalStringOf.frame_atrs atrs)) (schema_latex s) 523 Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (WalStringOf.frame_atrs atrs)) (schema_latex s)
524 - | fnum,WalTypes.LexFrame(id,p,r,s) -> 524 + | fnum,WalTypes.LexFrame(id,p,r,s) ->
525 Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) 525 Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s)
526 - | fnum,WalTypes.ComprepFrame(le,p,r,s) -> 526 + | fnum,WalTypes.ComprepFrame(le,p,r,s) ->
527 Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) 527 Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s)
528 - 528 +
529 let print_paths_latex name paths = 529 let print_paths_latex name paths =
530 LatexMain.latex_file_out "results/" name "a0" false (fun file -> 530 LatexMain.latex_file_out "results/" name "a0" false (fun file ->
531 fprintf file "\\begin{longtable}{|l|l|l|l|l|l|l|p{4cm}|l|l|l|l|}\n\\hline\north & beg & len & next & token & id & weight & attrs & lroles & senses & simple valence & valence\\\\\n"; 531 fprintf file "\\begin{longtable}{|l|l|l|l|l|l|l|p{4cm}|l|l|l|l|}\n\\hline\north & beg & len & next & token & id & weight & attrs & lroles & senses & simple valence & valence\\\\\n";
532 Int.iter 0 (Array.length paths - 1) (fun i -> 532 Int.iter 0 (Array.length paths - 1) (fun i ->
533 let t = paths.(i) in 533 let t = paths.(i) in
534 - fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n" 534 + fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n"
535 t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (LatexMain.escape_string (string_of_token t.PreTypes.token)) t.PreTypes.id t.PreTypes.weight 535 t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (LatexMain.escape_string (string_of_token t.PreTypes.token)) t.PreTypes.id t.PreTypes.weight
536 - (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles) 536 + (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles)
537 (String.concat "\\\\\n" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero)))) 537 (String.concat "\\\\\n" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero))))
538 - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x))) 538 + (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x)))
539 (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.valence (fun x -> fnum_frame_latex x)))); 539 (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.valence (fun x -> fnum_frame_latex x))));
540 fprintf file "\\end{longtable}"); 540 fprintf file "\\end{longtable}");
541 - LatexMain.latex_compile_and_clean "results/" name *)  
542 - 541 + LatexMain.latex_compile_and_clean "results/" name
  542 +
543 let print_mml path name mml = 543 let print_mml path name mml =
544 File.file_out (path ^ name ^ ".mml") (fun file -> 544 File.file_out (path ^ name ^ ".mml") (fun file ->
545 fprintf file "<!DOCTYPE math PUBLIC \"-//W3C//DTD MathML 2.0//EN\" \"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd\">\n"; 545 fprintf file "<!DOCTYPE math PUBLIC \"-//W3C//DTD MathML 2.0//EN\" \"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd\">\n";
546 fprintf file "%s\n" (Xml.to_string_fmt mml)) 546 fprintf file "%s\n" (Xml.to_string_fmt mml))
547 - 547 +
548 let page_header path = 548 let page_header path =
549 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"> 549 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">
550 <html> 550 <html>
@@ -556,11 +556,11 @@ let page_header path = @@ -556,11 +556,11 @@ let page_header path =
556 556
557 <body> 557 <body>
558 <center> 558 <center>
559 - <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>  
560 - <h3>Podaj tekst:</h3>  
561 - <form method=POST action=\"" ^ path ^ "parser.cgi\">  
562 - <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p>  
563 - <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p> 559 + <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>
  560 + <h3>Podaj tekst:</h3>
  561 + <form method=POST action=\"" ^ path ^ "parser.cgi\">
  562 + <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p>
  563 + <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p>
564 </form>" 564 </form>"
565 565
566 let page_trailer = 566 let page_trailer =
@@ -569,25 +569,25 @@ let page_trailer = @@ -569,25 +569,25 @@ let page_trailer =
569 Copyright &copy; 2016 Institute of Computer Science Polish Academy of Sciences<BR> 569 Copyright &copy; 2016 Institute of Computer Science Polish Academy of Sciences<BR>
570 </center> 570 </center>
571 </body> 571 </body>
572 -</html>" 572 +</html>"
573 573
574 let print_webpage file cg_bin_path html_path id query n max_n mml = 574 let print_webpage file cg_bin_path html_path id query n max_n mml =
575 fprintf file "%s\n" (page_header cg_bin_path); 575 fprintf file "%s\n" (page_header cg_bin_path);
576 fprintf file "\n<H3>%s</H3>\n" query; 576 fprintf file "\n<H3>%s</H3>\n" query;
577 - fprintf file "<P>%s %s\n" 577 + fprintf file "<P>%s %s\n"
578 (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1)) 578 (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1))
579 (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Następna interpretacja</A>" html_path id (n+1)); 579 (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Następna interpretacja</A>" html_path id (n+1));
580 fprintf file "<P><IMG SRC=\"%stree%s_%d.png\">\n" html_path id n; 580 fprintf file "<P><IMG SRC=\"%stree%s_%d.png\">\n" html_path id n;
581 fprintf file "<P>%s\n" (Xml.to_string_fmt mml); 581 fprintf file "<P>%s\n" (Xml.to_string_fmt mml);
582 fprintf file "<P><A HREF=\"%stree%s_%d.xml\">Graf w formacie XML</A>\n" html_path id n; 582 fprintf file "<P><A HREF=\"%stree%s_%d.xml\">Graf w formacie XML</A>\n" html_path id n;
583 fprintf file "<P><A HREF=\"%sformula%s_%d.mml\">Formuła w formacie MathML</A>\n" html_path id n; 583 fprintf file "<P><A HREF=\"%sformula%s_%d.mml\">Formuła w formacie MathML</A>\n" html_path id n;
584 - fprintf file "<P>%s %s\n" 584 + fprintf file "<P>%s %s\n"
585 (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1)) 585 (if n = 1 then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Poprzednia interpretacja</A>" html_path id (n-1))
586 (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Następna interpretacja</A>" html_path id (n+1)); 586 (if n = max_n then "" else sprintf "<A HREF=\"%spage%s_%d.html\">Następna interpretacja</A>" html_path id (n+1));
587 fprintf file "%s\n" page_trailer 587 fprintf file "%s\n" page_trailer
588 588
589 -open ExecTypes  
590 - 589 +open ExecTypes
  590 +
591 let generate_status_message result = function 591 let generate_status_message result = function
592 Idle -> "Server error: " ^ result.msg 592 Idle -> "Server error: " ^ result.msg
593 | PreprocessingError -> "Error during preprocessing: " ^ result.msg 593 | PreprocessingError -> "Error during preprocessing: " ^ result.msg
@@ -602,13 +602,8 @@ let generate_status_message result = function @@ -602,13 +602,8 @@ let generate_status_message result = function
602 | NotTranslated -> "Unable to generate logical form" 602 | NotTranslated -> "Unable to generate logical form"
603 | Parsed -> "parsed" 603 | Parsed -> "parsed"
604 604
605 -let print_other_result file cg_bin_path query result = 605 +let print_other_result file cg_bin_path query result =
606 fprintf file "%s\n" (page_header cg_bin_path); 606 fprintf file "%s\n" (page_header cg_bin_path);
607 fprintf file "\n<H3>%s</H3>\n" query; 607 fprintf file "\n<H3>%s</H3>\n" query;
608 fprintf file "\n<P>%s\n" (generate_status_message result result.status); 608 fprintf file "\n<P>%s\n" (generate_status_message result result.status);
609 fprintf file "%s\n" page_trailer 609 fprintf file "%s\n" page_trailer
610 -  
611 -  
612 -  
613 -  
614 -  
615 \ No newline at end of file 610 \ No newline at end of file
pre/inflexion.ml deleted
1 -(*  
2 - * ENIAM: Categorial Syntactic-Semantic Parser for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This program is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This program is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open Xstd  
21 -open Printf  
22 -  
23 -let load_alt filename =  
24 - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in  
25 - List.rev (Xlist.fold l [] (fun l line ->  
26 - if String.length line = 0 then l else  
27 - if String.get line 0 = '#' then l else  
28 - match Str.split_delim (Str.regexp "\t") line with  
29 - [orth; lemma; interp] -> (orth,lemma,interp) :: l  
30 - | _ -> failwith ("load_alt: " ^ line)))  
31 -  
32 -let load_dict filename =  
33 - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in  
34 - List.rev (Xlist.fold l [] (fun l line ->  
35 - if String.length line = 0 then l else  
36 - if String.get line 0 = '#' then l else  
37 - match Str.split_delim (Str.regexp "\t") line with  
38 - [stem; lemma_suf2; rule_names] -> (stem,lemma_suf2,Str.split (Str.regexp " ") rule_names) :: l  
39 - | _ -> failwith ("load_dict: " ^ line)))  
40 -  
41 -let load_rules filename =  
42 - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in  
43 - List.rev (Xlist.fold l [] (fun l line ->  
44 - if String.length line = 0 then l else  
45 - if String.get line 0 = '#' then l else  
46 - match Str.split_delim (Str.regexp "\t") line with  
47 - [rule_name; quantity; lemma_suf; orth_suf; interp] -> (rule_name,int_of_string quantity,lemma_suf,orth_suf,interp) :: l  
48 - | _ -> failwith ("load_rules: " ^ line)))  
49 -  
50 -let make_rules_map rules =  
51 - Xlist.fold rules StringMap.empty (fun rules (rule_name,quantity,lemma_suf,orth_suf,interp) ->  
52 - let rules2 = try StringMap.find rules orth_suf with Not_found -> StringMap.empty in  
53 - let rules2 = StringMap.add rules2 rule_name (lemma_suf,interp) in  
54 - StringMap.add rules orth_suf rules2)  
55 -  
56 -module OrderedChar = struct  
57 -  
58 - type t = char  
59 -  
60 - let compare = compare  
61 -  
62 -end  
63 -  
64 -module CharMap = Xmap.Make(OrderedChar)  
65 -  
66 -type char_tree = N of char_tree CharMap.t * (string * string list) list * (string * string) list  
67 - (* następne możliwości * (lemma_suf2 * lista reguł) list * lista alt *)  
68 -  
69 -type char_tree_suf = M of char_tree_suf CharMap.t * (string * int * string * string) list  
70 - (* następne możliwości * (rule_name * lemma_suf * interp) list *)  
71 -  
72 -let empty_char_tree = N(CharMap.empty,[],[])  
73 -let empty_char_tree_suf = M(CharMap.empty,[])  
74 -  
75 -let rec add_path_dict stem i n lemma_suf2 rule_names (N(map,rules,alts)) =  
76 - if i = n then N(map,(lemma_suf2,rule_names) :: rules,alts) else  
77 - let tree = try CharMap.find map (String.get stem i) with Not_found -> empty_char_tree in  
78 - let tree = add_path_dict stem (i+1) n lemma_suf2 rule_names tree in  
79 - N(CharMap.add map (String.get stem i) tree,rules,alts)  
80 -  
81 -let rec add_path_alt stem i n lemma interp (N(map,rules,alts)) =  
82 - if i = n then N(map,rules,(lemma,interp) :: alts) else  
83 - let tree = try CharMap.find map (String.get stem i) with Not_found -> empty_char_tree in  
84 - let tree = add_path_alt stem (i+1) n lemma interp tree in  
85 - N(CharMap.add map (String.get stem i) tree,rules,alts)  
86 -  
87 -let make_char_tree dict alt =  
88 - let tree = Xlist.fold dict empty_char_tree (fun tree (stem,lemma_suf2,rule_names) ->  
89 - add_path_dict stem 0 (String.length stem) lemma_suf2 rule_names tree) in  
90 - Xlist.fold alt tree (fun tree (orth,lemma,interp) ->  
91 - add_path_alt orth 0 (String.length orth) lemma interp tree)  
92 -  
93 -let rec add_path_rules rule_name quantity orth_suf i lemma_suf interp (M(map,rules)) =  
94 - if i = -1 then M(map,(rule_name,quantity,lemma_suf,interp) :: rules) else  
95 - let tree = try CharMap.find map (String.get orth_suf i) with Not_found -> empty_char_tree_suf in  
96 - let tree = add_path_rules rule_name quantity orth_suf (i-1) lemma_suf interp tree in  
97 - M(CharMap.add map (String.get orth_suf i) tree,rules)  
98 -  
99 -let make_char_tree_suf rules =  
100 - let tree = Xlist.fold rules empty_char_tree_suf (fun tree (rule_name,quantity,lemma_suf,orth_suf,interp) ->  
101 - add_path_rules rule_name quantity orth_suf (String.length orth_suf - 1) lemma_suf interp tree) in  
102 - tree  
103 -  
104 -let rec find_char_tree_rec i n orth (N(map,rules,alts)) =  
105 - if i = n then [orth,"",rules,alts] else  
106 - let l = try find_char_tree_rec (i+1) n orth (CharMap.find map (String.get orth i)) with Not_found -> [] in  
107 - (String.sub orth 0 i,String.sub orth i (n-i),rules,[]) :: l  
108 -  
109 -let find_char_tree tree rules orth =  
110 - let l = find_char_tree_rec 0 (String.length orth) orth tree in  
111 - Xlist.fold l [] (fun found (stem,suf,rule_names,alts) ->  
112 - let rules2 = try StringMap.find rules suf with Not_found -> StringMap.empty in  
113 - let found = alts @ found in  
114 - Xlist.fold rule_names found (fun found (lemma_suf2,rule_names2) ->  
115 - Xlist.fold rule_names2 found (fun found rule_name ->  
116 - try  
117 - let lemma_suf,interp = StringMap.find rules2 rule_name in  
118 - let lemma = if lemma_suf2 = "" then stem ^ lemma_suf else stem ^ lemma_suf ^ ":" ^ lemma_suf2 in  
119 - (lemma,interp) :: found  
120 - with Not_found -> found)))  
121 -  
122 -let rec find_char_tree_suf_rec i orth (M(map,rules)) =  
123 - if i = 0 then Xlist.fold rules [] (fun l (rule_name,quantity,lemma_suf,interp) -> ("", rule_name, quantity, lemma_suf, interp, i) :: l) else  
124 - let l = try find_char_tree_suf_rec (i-1) orth (CharMap.find map (String.get orth (i-1))) with Not_found -> [] in  
125 - Xlist.fold rules l (fun l (rule_name, quantity, lemma_suf,interp) ->  
126 - (String.sub orth 0 i, rule_name, quantity, String.sub orth 0 i ^ lemma_suf, interp, i) :: l)  
127 -  
128 -let find_char_tree_suf rules_tree stem_map alt_map orth =  
129 - let alt_l = Xlist.rev_map (try StringMap.find alt_map orth with Not_found -> []) (fun (lemma,interp) -> lemma,interp,1,[]) in  
130 - let l = find_char_tree_suf_rec (String.length orth) orth rules_tree in  
131 - let found = Xlist.fold l alt_l (fun found (stem,rule_name,quantity,lemma,interp,i) ->  
132 - try  
133 - let rule_names = StringMap.find stem_map stem in  
134 - if StringSet.mem rule_names rule_name then (lemma,interp,1,[]) :: found else found (* FIXME: czy na pewno nie dodawać reguł niepasujących? to powoduje że lemat tak samo brzmiący a mający inną odmianę nie zostanie rozpoznany *)  
135 - with Not_found -> if quantity < 100 || (String.length orth = i && stem = lemma) then found else (lemma,interp,quantity,["lemma not validated"]) :: found) in (* FIXME: ucięcie żadkich reguł powinno być inaczej sterowane *)  
136 -(* if found = [] then [orth,"unk",1,["token not found"]] else *)  
137 - let found = (orth,"unk",1,["token not found"]) :: found in  
138 - let valid = Xlist.fold found [] (fun valid -> function  
139 - lemma,interp,quantity,[] -> (lemma,interp,quantity,[]) :: valid  
140 - | _ -> valid) in  
141 - if valid = [] then found else valid  
142 -  
143 -let prepare_inflexion alt_filename dict_filename rules_filename =  
144 - let alt = load_alt alt_filename in  
145 - let dict = load_dict dict_filename in  
146 - let rules = load_rules rules_filename in  
147 - let tree = make_char_tree dict alt in  
148 - let rules = make_rules_map rules in  
149 - tree,rules  
150 -  
151 -let tree,rules =  
152 -(* prepare_inflexion (morfeusz_path ^ Paths.alt_adj) (morfeusz_path ^ Paths.dict_adj) (morfeusz_path ^ Paths.rules_adj) *)  
153 -(* prepare_inflexion (morfeusz_path ^ Paths.alt_all) (morfeusz_path ^ Paths.dict_all) (morfeusz_path ^ Paths.rules_all) *)  
154 - empty_char_tree,StringMap.empty  
155 -  
156 -let make_alt_map alt =  
157 - Xlist.fold alt StringMap.empty (fun alt_map (orth,lemma,interp) ->  
158 - StringMap.add_inc alt_map orth [lemma,interp] (fun l -> (lemma,interp) :: l))  
159 -  
160 -let prepare_inflexion_suf alt_filename dict_filename rules_filename =  
161 - let alt = load_alt alt_filename in  
162 - let rules = load_rules rules_filename in  
163 - let rules_tree = make_char_tree_suf rules in  
164 - let alt_map = make_alt_map alt in  
165 - let dict = load_dict dict_filename in  
166 - let stem_map = Xlist.fold dict StringMap.empty (fun stem_map (stem,lemma_suf2,rule_names) ->  
167 - StringMap.add_inc stem_map stem (StringSet.of_list rule_names) (fun set -> Xlist.fold rule_names set StringSet.add)) in  
168 - alt_map,rules_tree,stem_map  
169 -  
170 -let alt_map,rules_tree,stem_map =  
171 - prepare_inflexion_suf (Paths.sgjp_path ^ Paths.alt_all) (Paths.sgjp_path ^ Paths.dict_all) (Paths.sgjp_path ^ Paths.rules_all)  
172 -  
173 -let check_prefix pat s =  
174 - let n = String.length pat in  
175 - if n > String.length s then false else  
176 - String.sub s 0 n = pat  
177 -  
178 -let cut_prefix pat s =  
179 - let i = String.length pat in  
180 - let n = String.length s in  
181 - if i >= n then "" else  
182 - try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i)  
183 -  
184 -let check_sufix pat s =  
185 - let n = String.length pat in  
186 - let m = String.length s in  
187 - if n > m then false else  
188 - String.sub s (m-n) n = pat  
189 -  
190 -let cut_sufix pat s =  
191 - let i = String.length pat in  
192 - let n = String.length s in  
193 - try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s)  
194 -  
195 -let rec select_interp_sufix pat = function  
196 - [] -> []  
197 - | (lemma,interp) :: l -> if check_sufix pat interp then (lemma,interp) :: (select_interp_sufix pat l) else select_interp_sufix pat l  
198 -  
199 -let rec select_interp_sufix_suf pat = function  
200 - [] -> []  
201 - | (lemma,interp,quantity,attrs) :: l -> if check_sufix pat interp then (lemma,interp,quantity,attrs) :: (select_interp_sufix_suf pat l) else select_interp_sufix_suf pat l  
202 -  
203 -let rec remove_interp_sufix pat = function  
204 - [] -> []  
205 - | (lemma,interp) :: l -> if check_sufix pat interp then remove_interp_sufix pat l else (lemma,interp) :: (remove_interp_sufix pat l)  
206 -  
207 -let rec remove_interp_sufix_suf pat = function  
208 - [] -> []  
209 - | (lemma,interp,quantity,attrs) :: l ->  
210 - if interp = "adv:sup" then (lemma,interp,quantity,attrs) :: (remove_interp_sufix_suf pat l) else (* FIXME: zaślepka, wymaga poprawienia algorytmu generowania słowników *)  
211 - if check_sufix pat interp then remove_interp_sufix_suf pat l else (lemma,interp,quantity,attrs) :: (remove_interp_sufix_suf pat l)  
212 -  
213 -let get_interpretations orth =  
214 - (if check_prefix "naj" orth then select_interp_sufix ":sup" (find_char_tree tree rules (cut_prefix "naj" orth)) else []) @  
215 - (if check_prefix "nie" orth then select_interp_sufix ":neg" (find_char_tree tree rules (cut_prefix "nie" orth)) else []) @  
216 - (remove_interp_sufix ":neg" (remove_interp_sufix ":sup" (find_char_tree tree rules orth)))  
217 -  
218 -let get_interpretations_suf orth = (* FIXME: nie działa dla adv:sup pisanych z wielkiej litery np Najdoskonalej Najlepiej *)  
219 - if orth = "siebie" then ["siebie","siebie:acc.gen",1,[]] else  
220 - if orth = "sobie" then ["siebie","siebie:dat.loc",1,[]] else  
221 - if orth = "sobą" then ["siebie","siebie:inst",1,[]] else  
222 - (if check_prefix "naj" orth then select_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "naj" orth)) else []) @  
223 - (if check_prefix "nie" orth then select_interp_sufix_suf ":neg" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "nie" orth)) else []) @  
224 - (if check_prefix "Naj" orth then select_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "Naj" orth)) else []) @  
225 - (if check_prefix "Nie" orth then select_interp_sufix_suf ":neg" (find_char_tree_suf rules_tree stem_map alt_map (cut_prefix "Nie" orth)) else []) @  
226 - (remove_interp_sufix_suf ":neg" (remove_interp_sufix_suf ":sup" (find_char_tree_suf rules_tree stem_map alt_map orth)))  
227 -  
228 -(* Testy *)  
229 -  
230 -let print_interpretations l =  
231 - Xlist.iter (Xlist.sort l compare) (fun (lemma,interp) ->  
232 - printf "%s\t%s\n" lemma interp)  
233 -  
234 -(*let _ =  
235 - let l = get_interpretations "życzliwą" in  
236 - print_interpretations l;  
237 - let l = get_interpretations "żyźniejszego" in  
238 - print_interpretations l;  
239 - let l = get_interpretations "zwiśli" in  
240 - print_interpretations l;  
241 - let l = get_interpretations "najzieleńsza" in  
242 - print_interpretations l;  
243 - let l = get_interpretations "najtandetniejsza" in  
244 - print_interpretations l;  
245 - let l = get_interpretations "nieżelazny" in  
246 - print_interpretations l;  
247 - ()*)  
248 -  
249 -(*let sgjp_filename = "sgjp-20151020.tab"  
250 -let polimorf_filename = "polimorf-20151020.tab"  
251 -  
252 -let _ =  
253 - let interp_sel = Morf.load_interp_sel "data/interps.tab" in  
254 - print_endline "loading sgjp";  
255 - let sgjp = Morf.load_tab (morfeusz_path ^ sgjp_filename) in  
256 - print_endline "loading polimorf";  
257 - let polimorf = Morf.load_tab (morfeusz_path ^ polimorf_filename) in  
258 - print_endline "merging";  
259 - let dicts = Morf.merge_dicts [sgjp;polimorf] in  
260 - let adj_interp_sel = StringMap.find interp_sel "adj" in  
261 - let adj_sup_interp_sel = StringMap.find interp_sel "adj-sup" in  
262 -(* let dicts = Morf.remove_prefix dicts "naj" adj_sup_interp_sel in *)  
263 - print_endline "preparing queries";  
264 - let queries = StringMap.fold dicts StringMap.empty (fun queries lemma interps ->  
265 - let interps = Morf.select_interps interps (adj_interp_sel @ adj_sup_interp_sel) in  
266 - StringMap.fold interps queries (fun queries interp orths ->  
267 - Xlist.fold orths queries (fun queries orth ->  
268 - let s = lemma ^ "\t" ^ interp in  
269 - StringMap.add_inc queries orth (StringSet.singleton s) (fun set -> StringSet.add set s)))) in  
270 - print_endline "testing";  
271 - StringMap.iter queries (fun orth set ->  
272 - let set = Xlist.fold (get_interpretations orth) set (fun set (lemma,interp) ->  
273 - let s = lemma ^ "\t" ^ interp in  
274 - if StringSet.mem set s then StringSet.remove set s else (  
275 - printf "excessing interpretation: %s\t%s" orth s;  
276 - set)) in  
277 - if StringSet.is_empty set then () else  
278 - StringSet.iter set (fun s ->  
279 - printf "lacking interpretation: %s\t%s" orth s))*)  
280 -  
281 -  
282 -  
283 -  
284 -  
285 -  
286 -  
287 -  
288 -  
289 -  
290 -  
291 -  
292 -  
293 -  
294 -  
295 -  
296 -  
297 -  
298 -  
299 -  
300 -  
301 -  
302 \ No newline at end of file 0 \ No newline at end of file
pre/makefile
1 OCAMLC=ocamlc 1 OCAMLC=ocamlc
2 OCAMLOPT=ocamlopt 2 OCAMLOPT=ocamlopt
3 OCAMLDEP=ocamldep 3 OCAMLDEP=ocamldep
4 -INCLUDES=-I +xml-light -I +xlib 4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology
5 OCAMLFLAGS=$(INCLUDES) -g 5 OCAMLFLAGS=$(INCLUDES) -g
6 -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa 6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa inflexion.cmxa
7 INSTALLDIR=`ocamlc -where` 7 INSTALLDIR=`ocamlc -where`
8 8
9 WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml 9 WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml
10 -PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml morf.ml inflexion.ml prePaths.ml preMWE.ml preSemantics.ml preProcessing.ml  
11 -  
12 -all: 10 +PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preProcessing.ml
  11 +
  12 +all:
13 $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE) 13 $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE)
14 14
15 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx 15 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
pre/morf.ml deleted
1 -(*  
2 - * ENIAM: Categorial Syntactic-Semantic Parser for Polish  
3 - * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>  
4 - * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences  
5 - *  
6 - * This program is free software: you can redistribute it and/or modify  
7 - * it under the terms of the GNU General Public License as published by  
8 - * the Free Software Foundation, either version 3 of the License, or  
9 - * (at your option) any later version.  
10 - *  
11 - * This program is distributed in the hope that it will be useful,  
12 - * but WITHOUT ANY WARRANTY; without even the implied warranty of  
13 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  
14 - * GNU General Public License for more details.  
15 - *  
16 - * You should have received a copy of the GNU General Public License  
17 - * along with this program. If not, see <http://www.gnu.org/licenses/>.  
18 - *)  
19 -  
20 -open Xstd  
21 -  
22 -let load_tab filename =  
23 - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in  
24 - List.rev (Xlist.fold l [] (fun l line ->  
25 - if String.length line = 0 then l else  
26 - if String.get line 0 = '#' then l else  
27 - match Str.split (Str.regexp "\t") line with  
28 - orth :: lemma :: interp :: _ -> (orth,lemma,interp) :: l  
29 - | _ -> failwith ("load_tab: " ^ line)))  
30 -  
31 -let load_tab_full filename =  
32 - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in  
33 - List.rev (Xlist.fold l [] (fun l line ->  
34 - if String.length line = 0 then l else  
35 - if String.get line 0 = '#' then l else  
36 - match Str.split (Str.regexp "\t") line with  
37 - [orth; lemma; interp] -> (orth,lemma,interp,"","") :: l  
38 - | [orth; lemma; interp; cl] -> (orth,lemma,interp,cl,"") :: l  
39 - | [orth; lemma; interp; cl; cl2] -> (orth,lemma,interp,cl,cl2) :: l  
40 -(* | orth :: lemma :: interp :: cl :: cl2 -> (orth,lemma,interp,cl,String.concat ";" cl2) :: l *)  
41 - | _ -> failwith ("load_tab_full: " ^ line)))  
42 -  
43 -let merge_dicts l =  
44 - Xlist.fold l StringMap.empty (fun dicts tab ->  
45 - Xlist.fold tab dicts (fun dicts (orth,lemma,interp) ->  
46 - let interps = try StringMap.find dicts lemma with Not_found -> StringMap.empty in  
47 - let interps = StringMap.add_inc interps interp [orth] (fun orths ->  
48 - if Xlist.mem orths orth then orths else orth :: orths) in  
49 - StringMap.add dicts lemma interps))  
50 -  
51 -let load_interp_sel filename =  
52 - let l = Str.split_delim (Str.regexp "\n") (File.load_file filename) in  
53 - Xlist.fold l StringMap.empty (fun interp_sel line ->  
54 - if String.length line = 0 then interp_sel else  
55 - if String.get line 0 = '#' then interp_sel else  
56 - match Str.split (Str.regexp "\t") line with  
57 - [group;interp;label] -> StringMap.add_inc interp_sel group [interp,label] (fun l -> (interp,label) :: l)  
58 - | _ -> failwith ("load_interp_sel: " ^ line))  
59 -  
60 -  
61 -let rec merge_digraph = function  
62 - [] -> []  
63 - | "c" :: "h" :: l -> "ch" :: (merge_digraph l)  
64 - | "c" :: "z" :: l -> "cz" :: (merge_digraph l)  
65 - | "d" :: "z" :: l -> "dz" :: (merge_digraph l)  
66 - | "d" :: "ź" :: l -> "dź" :: (merge_digraph l)  
67 - | "d" :: "ż" :: l -> "dż" :: (merge_digraph l)  
68 - | "r" :: "z" :: l -> "rz" :: (merge_digraph l)  
69 - | "s" :: "z" :: l -> "sz" :: (merge_digraph l)  
70 - | "b" :: "'" :: l -> "b'" :: (merge_digraph l)  
71 - | "f" :: "'" :: l -> "f'" :: (merge_digraph l)  
72 - | s :: l -> s :: (merge_digraph l)  
73 -  
74 -let text_to_chars s = Xunicode.classified_chars_of_utf8_string s  
75 -(* (try UTF8.validate s with UTF8.Malformed_code -> failwith ("Invalid UTF8 string: " ^ s));  
76 - let r = ref [] in  
77 - UTF8.iter (fun c ->  
78 - r := (UTF8.init 1 (fun _ -> c)) :: (!r)) s;  
79 - merge_digraph (List.rev (!r))*)  
80 -  
81 -  
82 -let check_prefix pat s =  
83 - let n = String.length pat in  
84 - if n > String.length s then false else  
85 - String.sub s 0 n = pat  
86 -  
87 -let cut_prefix pat s =  
88 - let i = String.length pat in  
89 - let n = String.length s in  
90 - if i >= n then "" else  
91 - try String.sub s i (n-i) with _ -> failwith ("cut_prefix: " ^ s ^ " " ^ string_of_int i)  
92 -  
93 -let check_sufix pat s =  
94 - let n = String.length pat in  
95 - let m = String.length s in  
96 - if n > m then false else  
97 - String.sub s (m-n) n = pat  
98 -  
99 -let cut_sufix pat s =  
100 - let i = String.length pat in  
101 - let n = String.length s in  
102 - try String.sub s 0 (n-i) with _ -> failwith ("cut_sufix: " ^ s)  
103 -  
104 -let apply_transform orth (s,t) =  
105 - if check_sufix s orth then cut_sufix s orth ^ t else raise Not_found  
106 -  
107 -let split_colon s =  
108 - match Str.split_delim (Str.regexp ":") s with  
109 - [s] -> s, ""  
110 - | [s;t] -> s, t  
111 - | _ -> failwith "split_colon"  
112 -  
113 -let get_cat s =  
114 - match Str.split_delim (Str.regexp ":") s with  
115 - cat :: _ -> cat  
116 - | _ -> failwith "get_cat"  
117 -  
118 -let select_interps interps interp_sel =  
119 - Xlist.fold interp_sel StringMap.empty (fun new_interps (interp,_) ->  
120 - try  
121 - StringMap.add new_interps interp (StringMap.find interps interp)  
122 - with Not_found -> new_interps)  
pre/prePaths.ml
@@ -20,17 +20,17 @@ @@ -20,17 +20,17 @@
20 open Xstd 20 open Xstd
21 open PreTypes 21 open PreTypes
22 22
23 -let to_string (paths,last) = 23 +let to_string (paths,last) =
24 String.concat "\n" (Xlist.map paths (fun t -> PreTokenizer.string_of_tokens 0 (Token t))) 24 String.concat "\n" (Xlist.map paths (fun t -> PreTokenizer.string_of_tokens 0 (Token t)))
25 ^ Printf.sprintf "\nlast=%d" last 25 ^ Printf.sprintf "\nlast=%d" last
26 26
27 -let to_string_indexed (paths,last) =  
28 - String.concat "\n" (Xlist.map paths (fun (i,j,t) -> 27 +let to_string_indexed (paths,last) =
  28 + String.concat "\n" (Xlist.map paths (fun (i,j,t) ->
29 Printf.sprintf "%2d %2d %s" i j (PreTokenizer.string_of_tokens 0 (Token t)))) 29 Printf.sprintf "%2d %2d %s" i j (PreTokenizer.string_of_tokens 0 (Token t))))
30 ^ Printf.sprintf "\nlast=%d" last 30 ^ Printf.sprintf "\nlast=%d" last
31 31
32 (*let indexed_token_record_to_xml i j t = 32 (*let indexed_token_record_to_xml i j t =
33 - let lemma,pos,tags = 33 + let lemma,pos,tags =
34 match t.token with 34 match t.token with
35 Lemma(lemma,pos,tags) -> lemma,pos,tags 35 Lemma(lemma,pos,tags) -> lemma,pos,tags
36 | _ -> failwith "indexed_token_record_to_xml" in 36 | _ -> failwith "indexed_token_record_to_xml" in
@@ -46,11 +46,11 @@ let to_string_indexed (paths,last) = @@ -46,11 +46,11 @@ let to_string_indexed (paths,last) =
46 Xlist.map t.senses (fun (sense,hipero,weight) -> 46 Xlist.map t.senses (fun (sense,hipero,weight) ->
47 Xml.Element("sense",["name",sense;"weight",string_of_float weight], 47 Xml.Element("sense",["name",sense;"weight",string_of_float weight],
48 Xlist.map hipero (fun s -> Xml.Element("hipero",[],[Xml.PCData s])))))]) 48 Xlist.map hipero (fun s -> Xml.Element("hipero",[],[Xml.PCData s])))))])
49 -  
50 -let to_xml (paths,last) = 49 +
  50 +let to_xml (paths,last) =
51 Xml.Element("paths",["last",string_of_int last], 51 Xml.Element("paths",["last",string_of_int last],
52 Xlist.map paths (fun (i,j,t) -> indexed_token_record_to_xml i j t)) *) 52 Xlist.map paths (fun (i,j,t) -> indexed_token_record_to_xml i j t)) *)
53 - 53 +
54 let compare_token_record p r = 54 let compare_token_record p r =
55 let v = compare p.beg r.beg in 55 let v = compare p.beg r.beg in
56 if v <> 0 then v else 56 if v <> 0 then v else
@@ -58,30 +58,30 @@ let compare_token_record p r = @@ -58,30 +58,30 @@ let compare_token_record p r =
58 if v <> 0 then v else 58 if v <> 0 then v else
59 compare p r 59 compare p r
60 60
61 -let sort (paths,last) = 61 +let sort (paths,last) =
62 Xlist.sort paths compare_token_record, last 62 Xlist.sort paths compare_token_record, last
63 63
64 let rec uniq_rec rev = function 64 let rec uniq_rec rev = function
65 [] -> List.rev rev 65 [] -> List.rev rev
66 | [p] -> List.rev (p :: rev) 66 | [p] -> List.rev (p :: rev)
67 | p :: r :: l -> if p = r then uniq_rec rev (r :: l) else uniq_rec (p :: rev) (r :: l) 67 | p :: r :: l -> if p = r then uniq_rec rev (r :: l) else uniq_rec (p :: rev) (r :: l)
68 -  
69 -let uniq (paths,last) = 68 +
  69 +let uniq (paths,last) =
70 uniq_rec [] paths, last 70 uniq_rec [] paths, last
71 - 71 +
72 let rec translate_into_paths_rec paths = function 72 let rec translate_into_paths_rec paths = function
73 Token t -> t :: paths 73 Token t -> t :: paths
74 | Seq l -> Xlist.fold l paths translate_into_paths_rec 74 | Seq l -> Xlist.fold l paths translate_into_paths_rec
75 | Variant l -> Xlist.fold l paths translate_into_paths_rec 75 | Variant l -> Xlist.fold l paths translate_into_paths_rec
76 -  
77 -let translate_into_paths tokens =  
78 - let paths = Xlist.fold tokens [] (fun paths token -> 76 +
  77 +let translate_into_paths tokens =
  78 + let paths = Xlist.fold tokens [] (fun paths token ->
79 translate_into_paths_rec paths token) in 79 translate_into_paths_rec paths token) in
80 let last = if paths = [] then 0 else (List.hd paths).next in 80 let last = if paths = [] then 0 else (List.hd paths).next in
81 let paths = sort (paths,last) in 81 let paths = sort (paths,last) in
82 let paths = uniq paths in 82 let paths = uniq paths in
83 - paths  
84 - 83 + paths
  84 +
85 (**********************************************************************************) 85 (**********************************************************************************)
86 86
87 let excluded_interps = StringSet.of_list [ 87 let excluded_interps = StringSet.of_list [
@@ -214,7 +214,7 @@ let transformed_interps = Xlist.fold [ @@ -214,7 +214,7 @@ let transformed_interps = Xlist.fold [
214 "praet:sg:n1.n2:imperf.perf:nagl","praet:sg:n1.n2:imperf.perf"; 214 "praet:sg:n1.n2:imperf.perf:nagl","praet:sg:n1.n2:imperf.perf";
215 "praet:sg:n1.n2:imperf:nagl","praet:sg:n1.n2:imperf"; 215 "praet:sg:n1.n2:imperf:nagl","praet:sg:n1.n2:imperf";
216 ] StringMap.empty (fun map (k,v) -> StringMap.add map k v) 216 ] StringMap.empty (fun map (k,v) -> StringMap.add map k v)
217 - 217 +
218 let merge_lemmata l = 218 let merge_lemmata l =
219 let map = Xlist.fold l StringMap.empty (fun map (lemma,interp,quantity,attrs) -> 219 let map = Xlist.fold l StringMap.empty (fun map (lemma,interp,quantity,attrs) ->
220 let interp = if interp = "num:comp" then "numc" else interp in 220 let interp = if interp = "num:comp" then "numc" else interp in
@@ -229,88 +229,88 @@ let merge_lemmata l = @@ -229,88 +229,88 @@ let merge_lemmata l =
229 StringMap.add_inc map pos [tags] (fun l -> tags :: l))), 229 StringMap.add_inc map pos [tags] (fun l -> tags :: l))),
230 max 1 (quantity / Xlist.size interps), 230 max 1 (quantity / Xlist.size interps),
231 attrs) in 231 attrs) in
232 - StringMap.fold map [] (fun l _ (lemma,map,quantity,attrs) -> 232 + StringMap.fold map [] (fun l _ (lemma,map,quantity,attrs) ->
233 StringMap.fold map l (fun l cat interp -> 233 StringMap.fold map l (fun l cat interp ->
234 (lemma,cat,interp,quantity,attrs) :: l)) 234 (lemma,cat,interp,quantity,attrs) :: l))
235 235
236 let uppercase lemma cl ll = 236 let uppercase lemma cl ll =
237 let n = String.length lemma in 237 let n = String.length lemma in
238 let nll = String.length ll in 238 let nll = String.length ll in
239 - cl ^ String.sub lemma nll (n - nll)  
240 -  
241 -let quant_mod quantity = 239 + cl ^ String.sub lemma nll (n - nll)
  240 +
  241 +let quant_mod quantity =
242 log10 (float quantity) 242 log10 (float quantity)
243 -  
244 -let lemmatize_token = function  
245 - | {token=AllSmall s} as t ->  
246 - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> 243 +
  244 +let lemmatize_token = function
  245 + | {token=AllSmall s} as t ->
  246 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
247 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs})) 247 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs}))
248 - | {token=SmallLetter s} as t ->  
249 - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> 248 + | {token=SmallLetter s} as t ->
  249 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
250 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs})) 250 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs=attrs @ t.attrs}))
251 - | {token=FirstCap(s,lower,cl,ll)} as t ->  
252 - let l = Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> 251 + | {token=FirstCap(s,lower,cl,ll)} as t ->
  252 + let l = Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
253 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}) in 253 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}) in
254 let b = Xlist.fold l false (fun b t -> if Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found" then b else true) in 254 let b = Xlist.fold l false (fun b t -> if Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found" then b else true) in
255 - if b then t :: l else  
256 - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf lower)) (fun (lemma,cat,interp,quantity,attrs) -> 255 + if b then t :: l else
  256 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations lower)) (fun (lemma,cat,interp,quantity,attrs) ->
257 {t with token=Lemma(uppercase lemma cl ll,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: "lemmatized as lowercase" :: attrs @ t.attrs})) 257 {t with token=Lemma(uppercase lemma cl ll,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: "lemmatized as lowercase" :: attrs @ t.attrs}))
258 - | {token=AllCap(s,_,_)} as t ->  
259 - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> 258 + | {token=AllCap(s,_,_)} as t ->
  259 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
260 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs})) 260 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}))
261 - | {token=CapLetter(s,_)} as t ->  
262 - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> 261 + | {token=CapLetter(s,_)} as t ->
  262 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
263 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs})) 263 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}))
264 - | {token=SomeCap s} as t ->  
265 - t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations_suf s)) (fun (lemma,cat,interp,quantity,attrs) -> 264 + | {token=SomeCap s} as t ->
  265 + t :: (Xlist.map (merge_lemmata (Inflexion.get_interpretations s)) (fun (lemma,cat,interp,quantity,attrs) ->
266 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs})) 266 {t with token=Lemma(lemma,cat,interp); weight=t.weight+.(quant_mod quantity); attrs="notvalidated proper" :: attrs @ t.attrs}))
267 | t -> [t] 267 | t -> [t]
268 -  
269 -let rec lemmatize_rec rev = function  
270 - [] -> List.rev rev 268 +
  269 +let rec lemmatize_rec rev = function
  270 + [] -> List.rev rev
271 | t :: l -> lemmatize_rec (lemmatize_token t @ rev) l 271 | t :: l -> lemmatize_rec (lemmatize_token t @ rev) l
272 272
273 let lemmatize (paths,last) = 273 let lemmatize (paths,last) =
274 List.rev (Xlist.fold (lemmatize_rec [] paths) [] (fun paths t -> 274 List.rev (Xlist.fold (lemmatize_rec [] paths) [] (fun paths t ->
275 match t.token with 275 match t.token with
276 - Lemma _ -> if (Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found") && 276 + Lemma _ -> if (Xlist.mem t.attrs "lemma not validated" || Xlist.mem t.attrs "token not found") &&
277 (Xlist.mem t.attrs "required validated lemmatization") then paths 277 (Xlist.mem t.attrs "required validated lemmatization") then paths
278 else t(*{t with attrs=List.remove "required validated lemmatization" t.attrs}*) :: paths 278 else t(*{t with attrs=List.remove "required validated lemmatization" t.attrs}*) :: paths
279 | _ -> t :: paths)), last 279 | _ -> t :: paths)), last
280 - 280 +
281 (* TODO: docelowa lematyzacja: 281 (* TODO: docelowa lematyzacja:
282 - lematyzacja za pomocą półręcznie wytworzonych reguł lematyzacji i listy wyjątków 282 - lematyzacja za pomocą półręcznie wytworzonych reguł lematyzacji i listy wyjątków
283 - walidacja lematów za pomocą listy znanych lematów zawierającej lemat, kategorię, rodzaj subst, aspekt verb (obejmuje też walidację akronimów) 283 - walidacja lematów za pomocą listy znanych lematów zawierającej lemat, kategorię, rodzaj subst, aspekt verb (obejmuje też walidację akronimów)
284 - rozpoznawanie wyrażeń wielosłownych (mwe i mte) za pomocą listy zawierającej ich lematy i szablony odmiany 284 - rozpoznawanie wyrażeń wielosłownych (mwe i mte) za pomocą listy zawierającej ich lematy i szablony odmiany
285 *) 285 *)
286 - 286 +
287 (**********************************************************************************) 287 (**********************************************************************************)
288 - 288 +
289 289
290 (**********************************************************************************) 290 (**********************************************************************************)
291 - 291 +
292 (**********************************************************************************) 292 (**********************************************************************************)
293 -  
294 -  
295 - 293 +
  294 +
  295 +
296 (*let rec get_beg_id = function 296 (*let rec get_beg_id = function
297 Token t -> t.beg 297 Token t -> t.beg
298 | Seq(t :: _) -> get_beg_id t 298 | Seq(t :: _) -> get_beg_id t
299 | Variant(t :: _) -> get_beg_id t 299 | Variant(t :: _) -> get_beg_id t
300 | _ -> failwith "get_beg_id" 300 | _ -> failwith "get_beg_id"
301 - 301 +
302 let rec get_end_id = function 302 let rec get_end_id = function
303 Token t -> t.beg + t.len 303 Token t -> t.beg + t.len
304 | Seq [] -> failwith "get_end_id" 304 | Seq [] -> failwith "get_end_id"
305 | Seq l -> get_end_id (List.hd (List.rev l)) 305 | Seq l -> get_end_id (List.hd (List.rev l))
306 | Variant(t :: _) -> get_end_id t 306 | Variant(t :: _) -> get_end_id t
307 | _ -> failwith "get_end_id"*) 307 | _ -> failwith "get_end_id"*)
308 - 308 +
309 (*let rec lemmatize_tokens paths next_id = function 309 (*let rec lemmatize_tokens paths next_id = function
310 Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg next_id t.orth v (parse_postags cat) t.beg t.len 310 Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg next_id t.orth v (parse_postags cat) t.beg t.len
311 | Token({token=Lemma(lemma,interp)} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags interp) t.beg t.len 311 | Token({token=Lemma(lemma,interp)} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags interp) t.beg t.len
312 | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags "interp") t.beg t.len 312 | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags "interp") t.beg t.len
313 - | Token({token=AllSmall s} as t) -> 313 + | Token({token=AllSmall s} as t) ->
314 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) -> 314 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) ->
315 PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags postags) t.beg t.len) 315 PrePaths.add_edge paths t.beg next_id t.orth lemma (parse_postags postags) t.beg t.len)
316 | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodać wersję z s2 ; uporządkować słownik; dodać akronimy *) 316 | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodać wersję z s2 ; uporządkować słownik; dodać akronimy *)
@@ -321,19 +321,19 @@ let rec get_end_id = function @@ -321,19 +321,19 @@ let rec get_end_id = function
321 | Seq(t :: next :: l) -> lemmatize_tokens (lemmatize_tokens paths (get_beg_id next) t) next_id (Seq(next :: l)) 321 | Seq(t :: next :: l) -> lemmatize_tokens (lemmatize_tokens paths (get_beg_id next) t) next_id (Seq(next :: l))
322 | Seq [] -> failwith "lemmatize_tokens" 322 | Seq [] -> failwith "lemmatize_tokens"
323 | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_tokens paths next_id t)*) 323 | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_tokens paths next_id t)*)
324 - 324 +
325 (*let rec lemmatize_paths_tokens paths = function (* FIXME: uzgodnić postać lematów *) 325 (*let rec lemmatize_paths_tokens paths = function (* FIXME: uzgodnić postać lematów *)
326 Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg t.next t.orth v (parse_postags cat) t.attrs t.beg t.len 326 Token({token=Dig(v,cat)} as t)-> PrePaths.add_edge paths t.beg t.next t.orth v (parse_postags cat) t.attrs t.beg t.len
327 - | Token({token=Lemma(lemma,interp)} as t) -> 327 + | Token({token=Lemma(lemma,interp)} as t) ->
328 if Xlist.mem t.attrs "lemmatized as lowercase" || Xlist.mem t.attrs "lemma not validated" then paths else (* FIXME *) 328 if Xlist.mem t.attrs "lemmatized as lowercase" || Xlist.mem t.attrs "lemma not validated" then paths else (* FIXME *)
329 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags interp) t.attrs t.beg t.len 329 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags interp) t.attrs t.beg t.len
330 | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags "interp") t.attrs t.beg t.len 330 | Token({token=Interp lemma} as t) -> PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags "interp") t.attrs t.beg t.len
331 | Token({token=Proper _} as t) -> failwith "lemmatize_paths_tokens: ni" 331 | Token({token=Proper _} as t) -> failwith "lemmatize_paths_tokens: ni"
332 | Token({token=Compound _} as t) -> failwith "lemmatize_paths_tokens: ni" 332 | Token({token=Compound _} as t) -> failwith "lemmatize_paths_tokens: ni"
333 -(* | Token({token=AllSmall s} as t) -> 333 +(* | Token({token=AllSmall s} as t) ->
334 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) -> 334 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) ->
335 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len) 335 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len)
336 - | Token({token=SmallLetter s} as t) -> 336 + | Token({token=SmallLetter s} as t) ->
337 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) -> 337 Xlist.fold (Inflexion.get_interpretations s) paths (fun paths (lemma,postags) ->
338 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len) 338 PrePaths.add_edge paths t.beg t.next t.orth lemma (parse_postags postags) t.beg t.len)
339 | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodać wersję z s2 ; uporządkować słownik; dodać akronimy *) 339 | Token({token=FirstCap(s,s2)} as t) -> (* FIXME: dodać wersję z s2 ; uporządkować słownik; dodać akronimy *)
@@ -345,19 +345,19 @@ let rec get_end_id = function @@ -345,19 +345,19 @@ let rec get_end_id = function
345 | Token _ -> paths 345 | Token _ -> paths
346 | Seq l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t) 346 | Seq l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t)
347 | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t) 347 | Variant l -> Xlist.fold l paths (fun paths t -> lemmatize_paths_tokens paths t)
348 - 348 +
349 (*let rec lemmatize paths = function 349 (*let rec lemmatize paths = function
350 t :: next :: l -> lemmatize (lemmatize_tokens paths (get_beg_id next) t) (next :: l) 350 t :: next :: l -> lemmatize (lemmatize_tokens paths (get_beg_id next) t) (next :: l)
351 | [t] -> lemmatize_tokens paths (get_end_id t) t 351 | [t] -> lemmatize_tokens paths (get_end_id t) t
352 | [] -> failwith "lemmatize"*) 352 | [] -> failwith "lemmatize"*)
353 - 353 +
354 let rec lemmatize_paths paths = function 354 let rec lemmatize_paths paths = function
355 t :: l -> lemmatize_paths (lemmatize_paths_tokens paths t) l 355 t :: l -> lemmatize_paths (lemmatize_paths_tokens paths t) l
356 | [] -> paths *) 356 | [] -> paths *)
357 - 357 +
358 (* FIXME: dodać 'co do' prep:gen *) 358 (* FIXME: dodać 'co do' prep:gen *)
359 -  
360 - 359 +
  360 +
361 (* Dane do przekazania: 361 (* Dane do przekazania:
362 - lematy i interpretacje: generowanie typów i termów 362 - lematy i interpretacje: generowanie typów i termów
363 - orths 363 - orths
@@ -365,8 +365,8 @@ let rec lemmatize_paths paths = function @@ -365,8 +365,8 @@ let rec lemmatize_paths paths = function
365 - struktura grafu: wyróżniki przy niejednoznaczności 365 - struktura grafu: wyróżniki przy niejednoznaczności
366 - sensy wraz z hiperonimami 366 - sensy wraz z hiperonimami
367 - <indent> *) 367 - <indent> *)
368 -  
369 -(* 368 +
  369 +(*
370 Ala zjadła kota. 370 Ala zjadła kota.
371 Ala subst:sg:nom:f imię -> istota 371 Ala subst:sg:nom:f imię -> istota
372 kot subst:sg:nom:m2 pospolita - kot 2 - istota 1 istota żywa 1 zwierzę 1 strunowiec 1 czaszkowiec 1 kręgowiec 1 tetrapod 1 owodniowiec 1 ssak 1 ssak żyworodny 1 łożyskowiec 1 ssak drapieżny 1 kot 1 kot 2 372 kot subst:sg:nom:m2 pospolita - kot 2 - istota 1 istota żywa 1 zwierzę 1 strunowiec 1 czaszkowiec 1 kręgowiec 1 tetrapod 1 owodniowiec 1 ssak 1 ssak żyworodny 1 łożyskowiec 1 ssak drapieżny 1 kot 1 kot 2
@@ -392,10 +392,10 @@ czas 3 godzina 4 @@ -392,10 +392,10 @@ czas 3 godzina 4
392 do opisu czasu trwania: 392 do opisu czasu trwania:
393 jednostka czasu 1: godzina 3, sekunda 2, (minuta 4 - nie podłączona) dzień 2, miesiąc 1, rok 1/2 393 jednostka czasu 1: godzina 3, sekunda 2, (minuta 4 - nie podłączona) dzień 2, miesiąc 1, rok 1/2
394 394
395 -*)  
396 -  
397 -(**  
398 - 395 +*)
  396 +
  397 +(**
  398 +
399 (* empty *) 399 (* empty *)
400 400
401 let empty = IntMap.empty, 0, 0 401 let empty = IntMap.empty, 0, 0
@@ -406,11 +406,11 @@ let poss_record_empty = {interp=[]; attrs=[]; proper=[]; senses=[]} @@ -406,11 +406,11 @@ let poss_record_empty = {interp=[]; attrs=[]; proper=[]; senses=[]}
406 406
407 (* add *) 407 (* add *)
408 408
409 -let dict_add dict lemma postags attrs beg len = 409 +let dict_add dict lemma postags attrs beg len =
410 if postags = [] then dict else 410 if postags = [] then dict else
411 let interps = try StringMap.find dict.lemmas lemma with Not_found -> StringMap.empty in 411 let interps = try StringMap.find dict.lemmas lemma with Not_found -> StringMap.empty in
412 let interps = Xlist.fold postags interps (fun interps (pos,tags) -> 412 let interps = Xlist.fold postags interps (fun interps (pos,tags) ->
413 - StringMap.add_inc interps pos {poss_record_empty with interp=[tags]; attrs=attrs} (fun l -> 413 + StringMap.add_inc interps pos {poss_record_empty with interp=[tags]; attrs=attrs} (fun l ->
414 {l with interp=tags :: l.interp; attrs=StringSet.to_list (StringSet.union (StringSet.of_list l.attrs) (StringSet.of_list attrs))})) in 414 {l with interp=tags :: l.interp; attrs=StringSet.to_list (StringSet.union (StringSet.of_list l.attrs) (StringSet.of_list attrs))})) in
415 if dict.dbeg <> beg && dict.dbeg <> -1 then failwith "dict_add" else 415 if dict.dbeg <> beg && dict.dbeg <> -1 then failwith "dict_add" else
416 if dict.dlen <> len && dict.dlen <> -1 then failwith "dict_add" else 416 if dict.dlen <> len && dict.dlen <> -1 then failwith "dict_add" else
@@ -424,52 +424,52 @@ let add_simple map i j orth lemma postags attrs beg len = @@ -424,52 +424,52 @@ let add_simple map i j orth lemma postags attrs beg len =
424 let orths = StringMap.add orths orth dict in 424 let orths = StringMap.add orths orth dict in
425 let map2 = IntMap.add map2 j orths in 425 let map2 = IntMap.add map2 j orths in
426 IntMap.add map i map2 426 IntMap.add map i map2
427 - 427 +
428 let add_edge (map,last,n) i j orth lemma postags attrs beg len = 428 let add_edge (map,last,n) i j orth lemma postags attrs beg len =
429 add_simple map i j orth lemma postags attrs beg len, max j last, max j n 429 add_simple map i j orth lemma postags attrs beg len, max j last, max j n
430 - 430 +
431 let rec add_path (map,last,n) i j = function 431 let rec add_path (map,last,n) i j = function
432 [] -> failwith "add_path" 432 [] -> failwith "add_path"
433 - | [orth,lemma,postags,beg,len] -> 433 + | [orth,lemma,postags,beg,len] ->
434 add_simple map i j orth lemma postags [] beg len, last, n 434 add_simple map i j orth lemma postags [] beg len, last, n
435 - | (orth,lemma,postags,beg,len) :: l -> 435 + | (orth,lemma,postags,beg,len) :: l ->
436 add_path (add_simple map i (n+1) orth lemma postags [] beg len, last, n+1) (n+1) j l 436 add_path (add_simple map i (n+1) orth lemma postags [] beg len, last, n+1) (n+1) j l
437 -(* 437 +(*
438 let insert (map,last,n) i j orth dict = 438 let insert (map,last,n) i j orth dict =
439 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in 439 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
440 let orths = try IntMap.find map2 j with Not_found -> StringMap.empty in 440 let orths = try IntMap.find map2 j with Not_found -> StringMap.empty in
441 let orths = StringMap.add orths orth dict in 441 let orths = StringMap.add orths orth dict in
442 - let map2 = IntMap.add map2 j orths in 442 + let map2 = IntMap.add map2 j orths in
443 IntMap.add map i map2, last, n 443 IntMap.add map i map2, last, n
444 - 444 +
445 let rec insert_path (map,last,n) i j = function 445 let rec insert_path (map,last,n) i j = function
446 [] -> failwith "add_path" 446 [] -> failwith "add_path"
447 - | [orth,dict] -> 447 + | [orth,dict] ->
448 insert (map,last,n) i j orth dict 448 insert (map,last,n) i j orth dict
449 - | (orth,dict) :: l -> 449 + | (orth,dict) :: l ->
450 insert_path (insert (map,last,n+1) i (n+1) orth dict) (n+1) j l 450 insert_path (insert (map,last,n+1) i (n+1) orth dict) (n+1) j l
451 -  
452 -let set_sentence_begin (map,last,n) i j orth = 451 +
  452 +let set_sentence_begin (map,last,n) i j orth =
453 try 453 try
454 let map2 = IntMap.find map i in 454 let map2 = IntMap.find map i in
455 let orths = IntMap.find map2 j in 455 let orths = IntMap.find map2 j in
456 let dict = StringMap.find orths orth in 456 let dict = StringMap.find orths orth in
457 let orths = StringMap.add orths orth {dict with sentence_begin=true} in 457 let orths = StringMap.add orths orth {dict with sentence_begin=true} in
458 let map2 = IntMap.add map2 j orths in 458 let map2 = IntMap.add map2 j orths in
459 - IntMap.add map i map2, last, n 459 + IntMap.add map i map2, last, n
460 with Not_found -> failwith "set_sentence_begin" 460 with Not_found -> failwith "set_sentence_begin"
461 461
462 -let set_sentence_end (map,last,n) i j orth = 462 +let set_sentence_end (map,last,n) i j orth =
463 try 463 try
464 let map2 = IntMap.find map i in 464 let map2 = IntMap.find map i in
465 let orths = IntMap.find map2 j in 465 let orths = IntMap.find map2 j in
466 let dict = StringMap.find orths orth in 466 let dict = StringMap.find orths orth in
467 let orths = StringMap.add orths orth {dict with sentence_end=true} in 467 let orths = StringMap.add orths orth {dict with sentence_end=true} in
468 let map2 = IntMap.add map2 j orths in 468 let map2 = IntMap.add map2 j orths in
469 - IntMap.add map i map2, last, n 469 + IntMap.add map i map2, last, n
470 with Not_found -> failwith "set_sentence_end" 470 with Not_found -> failwith "set_sentence_end"
471 471
472 -let is_sentence_end (map,last,n) i j orth = 472 +let is_sentence_end (map,last,n) i j orth =
473 try 473 try
474 let map2 = IntMap.find map i in 474 let map2 = IntMap.find map i in
475 let orths = IntMap.find map2 j in 475 let orths = IntMap.find map2 j in
@@ -503,7 +503,7 @@ let rec find_paths_bound (map,last,n) k i = @@ -503,7 +503,7 @@ let rec find_paths_bound (map,last,n) k i =
503 let tails = find_paths_bound (map,last,n) (k-1) j in 503 let tails = find_paths_bound (map,last,n) (k-1) j in
504 StringMap.fold set paths (fun paths s _ -> 504 StringMap.fold set paths (fun paths s _ ->
505 Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths))) 505 Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths)))
506 - 506 +
507 let rec find_paths_rec (map,last,n) i = 507 let rec find_paths_rec (map,last,n) i =
508 if i = last then [[]] else 508 if i = last then [[]] else
509 if not (IntMap.mem map i) then failwith "find_paths_rec" else 509 if not (IntMap.mem map i) then failwith "find_paths_rec" else
@@ -511,14 +511,14 @@ let rec find_paths_rec (map,last,n) i = @@ -511,14 +511,14 @@ let rec find_paths_rec (map,last,n) i =
511 let tails = find_paths_rec (map,last,n) j in 511 let tails = find_paths_rec (map,last,n) j in
512 StringMap.fold set paths (fun paths s _ -> 512 StringMap.fold set paths (fun paths s _ ->
513 Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths))) 513 Xlist.fold tails paths (fun paths tail -> (s :: tail) :: paths)))
514 - 514 +
515 let find_paths (map,last,n) = 515 let find_paths (map,last,n) =
516 find_paths_rec (map,last,n) 0 516 find_paths_rec (map,last,n) 0
517 -*)  
518 -let has_lemma orths = 517 +*)
  518 +let has_lemma orths =
519 StringMap.fold orths false (fun b _ dict -> 519 StringMap.fold orths false (fun b _ dict ->
520 if StringMap.is_empty dict.lemmas then b else true) 520 if StringMap.is_empty dict.lemmas then b else true)
521 - 521 +
522 let rec no_possible_path_rec map last i = 522 let rec no_possible_path_rec map last i =
523 if last = i then false else 523 if last = i then false else
524 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in 524 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
@@ -526,27 +526,27 @@ let rec no_possible_path_rec map last i = @@ -526,27 +526,27 @@ let rec no_possible_path_rec map last i =
526 if has_lemma orths then 526 if has_lemma orths then
527 b && no_possible_path_rec map last j 527 b && no_possible_path_rec map last j
528 else b) 528 else b)
529 - 529 +
530 let no_possible_path (map,last,n) = 530 let no_possible_path (map,last,n) =
531 no_possible_path_rec map last 0 531 no_possible_path_rec map last 0
532 -(* 532 +(*
533 let rec match_path_rec map found i rev = function 533 let rec match_path_rec map found i rev = function
534 - [] -> (i :: rev) :: found 534 + [] -> (i :: rev) :: found
535 | s :: l -> 535 | s :: l ->
536 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in 536 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
537 let found2 = IntMap.fold map2 [] (fun found2 j set -> 537 let found2 = IntMap.fold map2 [] (fun found2 j set ->
538 if StringMap.mem set s then j :: found2 else found2) in 538 if StringMap.mem set s then j :: found2 else found2) in
539 Xlist.fold found2 found (fun found j -> match_path_rec map found j (i :: rev) l) 539 Xlist.fold found2 found (fun found j -> match_path_rec map found j (i :: rev) l)
540 - 540 +
541 let match_path (map,last,n) = function 541 let match_path (map,last,n) = function
542 [] -> failwith "match_path" 542 [] -> failwith "match_path"
543 - | s :: l -> 543 + | s :: l ->
544 let found = IntMap.fold map [] (fun found i map2 -> 544 let found = IntMap.fold map [] (fun found i map2 ->
545 IntMap.fold map2 found (fun found j set -> 545 IntMap.fold map2 found (fun found j set ->
546 if StringMap.mem set s then (i,j) :: found else found)) in 546 if StringMap.mem set s then (i,j) :: found else found)) in
547 Xlist.fold found [] (fun found (i,j) -> match_path_rec map found j [i] l) 547 Xlist.fold found [] (fun found (i,j) -> match_path_rec map found j [i] l)
548 548
549 -let get_matched orths = function 549 +let get_matched orths = function
550 Orth s -> if StringMap.mem orths s then [s] else [] 550 Orth s -> if StringMap.mem orths s then [s] else []
551 | Pos s -> (*print_endline ("a1 " ^ s);*) StringSet.to_list (StringMap.fold orths StringSet.empty (fun set orth dict -> 551 | Pos s -> (*print_endline ("a1 " ^ s);*) StringSet.to_list (StringMap.fold orths StringSet.empty (fun set orth dict ->
552 StringMap.fold dict.lemmas set (fun set lemma interps -> 552 StringMap.fold dict.lemmas set (fun set lemma interps ->
@@ -554,19 +554,19 @@ let get_matched orths = function @@ -554,19 +554,19 @@ let get_matched orths = function
554 (* print_endline ("a2 " ^ pos); *) 554 (* print_endline ("a2 " ^ pos); *)
555 if s = pos then StringSet.add set orth else set)))) 555 if s = pos then StringSet.add set orth else set))))
556 (* | All -> orths *) 556 (* | All -> orths *)
557 - 557 +
558 let rec match_path_ex_rec map found i rev = function 558 let rec match_path_ex_rec map found i rev = function
559 - [] -> ((i,[]) :: rev) :: found 559 + [] -> ((i,[]) :: rev) :: found
560 | s :: l -> 560 | s :: l ->
561 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in 561 let map2 = try IntMap.find map i with Not_found -> IntMap.empty in
562 let found2 = IntMap.fold map2 [] (fun found2 j orths -> 562 let found2 = IntMap.fold map2 [] (fun found2 j orths ->
563 let l = get_matched orths s in 563 let l = get_matched orths s in
564 if l <> [] then (j,l) :: found2 else found2) in 564 if l <> [] then (j,l) :: found2 else found2) in
565 Xlist.fold found2 found (fun found (j,l2) -> match_path_ex_rec map found j ((i,l2) :: rev) l) 565 Xlist.fold found2 found (fun found (j,l2) -> match_path_ex_rec map found j ((i,l2) :: rev) l)
566 - 566 +
567 let match_path_ex (map,last,n) = function 567 let match_path_ex (map,last,n) = function
568 [] -> failwith "match_path_ex" 568 [] -> failwith "match_path_ex"
569 - | s :: l -> 569 + | s :: l ->
570 let found = IntMap.fold map [] (fun found i map2 -> 570 let found = IntMap.fold map [] (fun found i map2 ->
571 IntMap.fold map2 found (fun found j orths -> 571 IntMap.fold map2 found (fun found j orths ->
572 let l = get_matched orths s in 572 let l = get_matched orths s in
@@ -578,49 +578,49 @@ let last_node (_,last,_) = last @@ -578,49 +578,49 @@ let last_node (_,last,_) = last
578 let set_last_node (map,last,n) new_last = map, new_last, n 578 let set_last_node (map,last,n) new_last = map, new_last, n
579 579
580 let find (map,last,n) i = 580 let find (map,last,n) i =
581 - try 581 + try
582 IntMap.fold (IntMap.find map i) [] (fun found j orths -> 582 IntMap.fold (IntMap.find map i) [] (fun found j orths ->
583 StringMap.fold orths found (fun found orth _ -> 583 StringMap.fold orths found (fun found orth _ ->
584 (i,j,orth) :: found)) 584 (i,j,orth) :: found))
585 with Not_found -> [] 585 with Not_found -> []
586 586
587 let find_full (map,last,n) i = 587 let find_full (map,last,n) i =
588 - try 588 + try
589 IntMap.fold (IntMap.find map i) [] (fun found j orths -> 589 IntMap.fold (IntMap.find map i) [] (fun found j orths ->
590 StringMap.fold orths found (fun found orth dict -> 590 StringMap.fold orths found (fun found orth dict ->
591 (i,j,orth,dict) :: found)) 591 (i,j,orth,dict) :: found))
592 with Not_found -> [] 592 with Not_found -> []
593 *) 593 *)
594 -let fold (map,last,n) s f = 594 +let fold (map,last,n) s f =
595 IntMap.fold map s (fun s i map2 -> 595 IntMap.fold map s (fun s i map2 ->
596 IntMap.fold map2 s (fun s j set -> 596 IntMap.fold map2 s (fun s j set ->
597 StringMap.fold set s (fun s orth lemmas -> 597 StringMap.fold set s (fun s orth lemmas ->
598 f s orth i j lemmas))) 598 f s orth i j lemmas)))
599 -(*  
600 -let map (map,last,n) f = 599 +(*
  600 +let map (map,last,n) f =
601 IntMap.map map (fun map2 -> 601 IntMap.map map (fun map2 ->
602 IntMap.map map2 (fun orths -> 602 IntMap.map map2 (fun orths ->
603 StringMap.map orths (fun lemmas -> 603 StringMap.map orths (fun lemmas ->
604 f lemmas))), last, n 604 f lemmas))), last, n
605 -  
606 -let mapi (map,last,n) f = 605 +
  606 +let mapi (map,last,n) f =
607 IntMap.mapi map (fun i map2 -> 607 IntMap.mapi map (fun i map2 ->
608 IntMap.mapi map2 (fun j orths -> 608 IntMap.mapi map2 (fun j orths ->
609 StringMap.mapi orths (fun orth lemmas -> 609 StringMap.mapi orths (fun orth lemmas ->
610 f orth i j lemmas))), last, n 610 f orth i j lemmas))), last, n
611 -  
612 -let get_edges (map,_,_) i j = 611 +
  612 +let get_edges (map,_,_) i j =
613 IntMap.find (IntMap.find map i) j 613 IntMap.find (IntMap.find map i) j
614 -  
615 -let get_edges_from (map,_,_) i = 614 +
  615 +let get_edges_from (map,_,_) i =
616 IntMap.find map i 616 IntMap.find map i
617 -*) 617 +*)
618 let rec topological_sort_rec map visited l i = 618 let rec topological_sort_rec map visited l i =
619 - if IntSet.mem visited i then (l,visited) else 619 + if IntSet.mem visited i then (l,visited) else
620 let l, visited = IntMap.fold (try IntMap.find map i with Not_found -> IntMap.empty) (l,IntSet.add visited i) (fun (l,visited) j _ -> 620 let l, visited = IntMap.fold (try IntMap.find map i with Not_found -> IntMap.empty) (l,IntSet.add visited i) (fun (l,visited) j _ ->
621 topological_sort_rec map visited l j) in 621 topological_sort_rec map visited l j) in
622 i :: l, visited 622 i :: l, visited
623 - 623 +
624 let topological_sort (map,last,n) = 624 let topological_sort (map,last,n) =
625 let l, _ = topological_sort_rec map IntSet.empty [] 0 in 625 let l, _ = topological_sort_rec map IntSet.empty [] 0 in
626 let translation, k = Xlist.fold l (IntMap.empty,0) (fun (translation,k) i -> 626 let translation, k = Xlist.fold l (IntMap.empty,0) (fun (translation,k) i ->
@@ -630,27 +630,27 @@ let topological_sort (map,last,n) = @@ -630,27 +630,27 @@ let topological_sort (map,last,n) =
630 try IntMap.add map2 (IntMap.find translation j) orths with Not_found -> map2) in 630 try IntMap.add map2 (IntMap.find translation j) orths with Not_found -> map2) in
631 try IntMap.add map (IntMap.find translation i) map2 with Not_found -> map) in 631 try IntMap.add map (IntMap.find translation i) map2 with Not_found -> map) in
632 map, (try IntMap.find translation last with Not_found -> failwith "topological_sort 3"), k-1 632 map, (try IntMap.find translation last with Not_found -> failwith "topological_sort 3"), k-1
633 - 633 +
634 (*let interp_to_string interp = 634 (*let interp_to_string interp =
635 String.concat " " (Xlist.fold interp.interp [] (fun l tags -> 635 String.concat " " (Xlist.fold interp.interp [] (fun l tags ->
636 (String.concat ":" (Xlist.map tags (String.concat "."))) :: l)) 636 (String.concat ":" (Xlist.map tags (String.concat "."))) :: l))
637 - 637 +
638 let interps_to_string interps = 638 let interps_to_string interps =
639 String.concat " " (StringMap.fold interps [] (fun l pos interp -> 639 String.concat " " (StringMap.fold interps [] (fun l pos interp ->
640 (pos ^ "[" ^ interp_to_string interp ^ "]") :: l)) 640 (pos ^ "[" ^ interp_to_string interp ^ "]") :: l))
641 - 641 +
642 let lemmas_to_string lemmas = 642 let lemmas_to_string lemmas =
643 String.concat " " (StringMap.fold lemmas [] (fun l lemma interps -> 643 String.concat " " (StringMap.fold lemmas [] (fun l lemma interps ->
644 (lemma ^ "[" ^ interps_to_string interps ^ "]") :: l)) 644 (lemma ^ "[" ^ interps_to_string interps ^ "]") :: l))
645 -  
646 -let to_string (map,last,n) = 645 +
  646 +let to_string (map,last,n) =
647 let l = IntMap.fold map [] (fun l i map2 -> 647 let l = IntMap.fold map [] (fun l i map2 ->
648 IntMap.fold map2 l (fun l j orths -> 648 IntMap.fold map2 l (fun l j orths ->
649 - (Printf.sprintf "%5d %5d %s" i j (String.concat " " (StringMap.fold orths [] (fun l2 orths dict -> 649 + (Printf.sprintf "%5d %5d %s" i j (String.concat " " (StringMap.fold orths [] (fun l2 orths dict ->
650 (Printf.sprintf "%s %5d %5d [%s]" orths dict.dbeg dict.dlen (lemmas_to_string dict.lemmas)) :: l2)))) :: l)) in 650 (Printf.sprintf "%s %5d %5d [%s]" orths dict.dbeg dict.dlen (lemmas_to_string dict.lemmas)) :: l2)))) :: l)) in
651 Printf.sprintf "last=%d n=%d\n %s" last n (String.concat "\n " (List.sort compare l))*) 651 Printf.sprintf "last=%d n=%d\n %s" last n (String.concat "\n " (List.sort compare l))*)
652 - (*  
653 -let make_unique_orths (map,last,n) = 652 + (*
  653 +let make_unique_orths (map,last,n) =
654 let names = fold (map,last,n) StringQMap.empty (fun names orth _ _ _ -> 654 let names = fold (map,last,n) StringQMap.empty (fun names orth _ _ _ ->
655 StringQMap.add names orth) in 655 StringQMap.add names orth) in
656 let names = StringQMap.fold names StringSet.empty (fun names name n -> 656 let names = StringQMap.fold names StringSet.empty (fun names name n ->
@@ -658,10 +658,10 @@ let make_unique_orths (map,last,n) = @@ -658,10 +658,10 @@ let make_unique_orths (map,last,n) =
658 let map,_ = IntMap.fold map (IntMap.empty,StringMap.empty) (fun (map,used) i map2 -> 658 let map,_ = IntMap.fold map (IntMap.empty,StringMap.empty) (fun (map,used) i map2 ->
659 let map2,used = IntMap.fold map2 (IntMap.empty,used) (fun (map2,used) j orths -> 659 let map2,used = IntMap.fold map2 (IntMap.empty,used) (fun (map2,used) j orths ->
660 let orths,used = StringMap.fold orths (StringMap.empty,used) (fun (orths,used) orth lemmas -> 660 let orths,used = StringMap.fold orths (StringMap.empty,used) (fun (orths,used) orth lemmas ->
661 - let orth,used = 661 + let orth,used =
662 if StringSet.mem names orth then 662 if StringSet.mem names orth then
663 - let n =  
664 - try StringMap.find used orth + 1 663 + let n =
  664 + try StringMap.find used orth + 1
665 with Not_found -> 1 in 665 with Not_found -> 1 in
666 orth ^ "-" ^ string_of_int n, StringMap.add used orth n 666 orth ^ "-" ^ string_of_int n, StringMap.add used orth n
667 else orth,used in 667 else orth,used in
@@ -670,6 +670,6 @@ let make_unique_orths (map,last,n) = @@ -670,6 +670,6 @@ let make_unique_orths (map,last,n) =
670 IntMap.add map i map2, used) in 670 IntMap.add map i map2, used) in
671 map,last,n 671 map,last,n
672 672
673 -*)  
674 -  
675 -**)  
676 \ No newline at end of file 673 \ No newline at end of file
  674 +*)
  675 +
  676 +**)