Commit 24f2f00f018f263fe6613522fb47b6fbabec6f49

Authored by Wojciech Jaworski
2 parents 541993e4 03a0301f

Merge branch 'integration'

... ... @@ -5,7 +5,8 @@ RESOURCES_PATH=../resources/
5 5 WALENTY=/usr/share/walenty/walenty_20160412.xml
6 6  
7 7 # Port number for pre server
8   -PRE_PORT=3258
  8 +PRE_PORT=3158
  9 +#PRE_PORT=3258
9 10  
10 11 # Host name for pre server
11 12 PRE_HOST=localhost
... ... @@ -22,5 +23,26 @@ LCG_TIMEOUT=100
22 23 # LCG parser memory size (maximum number of nodes of parsed term)
23 24 LCG_NO_NODES=10000000
24 25  
25   -# Number of parser processes
  26 +# Number of parser processes
26 27 NO_PROCESSES=4
  28 +
  29 +# Is Concraft enabled
  30 +CONCRAFT_ENABLED=false
  31 +
  32 +# Path to Concraft
  33 +CONCRAFT_PATH=../concraft/
  34 +
  35 +# Is MateParser enabled
  36 +MATE_PARSER_ENABLED=false
  37 +
  38 +# Path to MateParser
  39 +MATE_PARSER_PATH=../dependencyParser/basic/mate-tools/
  40 +
  41 +# Is Świgra enabled
  42 +SWIGRA_ENABLED=false
  43 +
  44 +# Path to Świgra
  45 +SWIGRA_PATH=../swigra/parser/
  46 +
  47 +# Is sentence selection enabled
  48 +SENTENCE_SELECTION_ENABLED=true
... ...
corpora/CONLL.ml
1 1 open Xstd
2 2 open PreTypes
3 3  
4   -let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts
5   - then f (snd @@ List.find (fun (m,_) -> m = mode) alts)
6   - else f (snd @@ List.find (fun (m,_) -> m = Struct) alts)
7   -
8   -let string_of_token mode token id super label =
9   - let decompose_lemma = function
10   - | Lemma(a,b,c) -> a,b,if c = [[]]
11   - then "_"
12   - else String.concat "][" @@ Xlist.map c (fun x ->
13   - String.concat "|" @@ Xlist.map x ( fun y ->
14   - String.concat "." y))
15   - | _ -> failwith ("string_of_token: not Lemma") in
16   - match mode with
17   - | Raw -> token.orth
18   - | Struct -> failwith ("function string_of_token for mode Struct is not defined")
19   - | CONLL -> let lemma,cat,interp = decompose_lemma token.token in
20   - String.concat "\t" [id;
21   - token.orth; lemma; cat; cat; interp; "_"; "_";
22   - string_of_int token.beg; string_of_int token.len]
23   - | Mate -> let lemma,cat,interp = decompose_lemma token.token in
24   - String.concat "\t" [id;
25   - token.orth; lemma; lemma; cat; cat; interp; interp; "_"; "_"; "_"; "_"; "_"; "_"]
26   - | _ -> failwith "string_of_token: ni"
27   -
28   -let string_of_paths mode tokens paths =
29   - Array.fold_left (fun acc (id,super,label) ->
30   - acc ^ "\n" ^ (string_of_token mode (ExtArray.get tokens id) (string_of_int id) super label)) "" paths
31   -
32   -let rec string_of_sentence mode tokens = function
33   - RawSentence s -> if mode = Raw then s else ""
34   - | StructSentence (_,tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*)
35   - | DepSentence (_, paths) -> string_of_paths mode tokens paths
36   - | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences")
37   - | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts
38   -
39   -let string_of_p_record mode tokens p_record =
40   - (if p_record.pid = "" then "" else p_record.pid ^ "\n") ^
41   - string_of_sentence mode tokens p_record.psentence
42   -
43   -(*let rec string_of_paragraph mode tokens = function
44   - RawParagraph s -> if mode = Raw then s else ""
45   - | StructParagraph p_records -> String.concat "\n\n" @@ Xlist.map p_records (string_of_p_record mode tokens)
46   - | AltParagraph alts -> alternative_string (string_of_paragraph mode) mode alts
47   -
48   -let rec string_of_text mode tokens = function
49   - RawText s -> if mode = Raw then s else ""
50   - | StructText (paragraphs,_) -> String.concat "\n\n" @@ Xlist.map paragraphs (string_of_paragraph mode tokens)
51   - | AltText alts -> alternative_string (string_of_text mode) mode alts*)
52   -
53   -
54   -(******************)
55   -
56   -let establish_next tokens paths =
57   - let n = ExtArray.size tokens in
58   - Int.iter 1 (n - 2) (fun i ->
59   - let f = ExtArray.get tokens i in
60   - let s = ExtArray.get tokens (i+1) in
61   - ExtArray.set tokens i {f with next = s.beg});
62   - let last = ExtArray.get tokens (n-1) in
63   - ExtArray.set tokens (n-1) {last with next = last.beg + last.len}
64   -
65   -
66   - (*let rec pom res = function
67   - h :: t -> let next = if res = []
68   - then h.beg+h.len
69   - else (List.hd res).beg in
70   - pom ({h with next = next} :: res) t
71   - | [] -> res in
72   - pom [] rev_tokens*)
73   -
74   -let rec establish_for_token i text tokens = function
75   - (id,_,_) :: t as l->
76   - let h = ExtArray.get tokens id in
77   - if Xstring.check_prefix " " text
78   - then establish_for_token (i+100) (Xstring.cut_prefix " " text) tokens l
79   - else if Xstring.check_prefix h.orth text
80   - then
81   - let n = (List.length @@ Xunicode.utf8_chars_of_utf8_string h.orth) * 100 in
82   - let n_h = {h with beg = i ; len = n} in
83   - ExtArray.set tokens id n_h;
84   - establish_for_token (i+n) (Xstring.cut_prefix h.orth text) tokens t
85   - else failwith ("establish_for_token :" ^ h.orth ^ " " ^ text)
86   - | [] -> 100, i
87   -
88   -let rec establish_lengths text paths tokens =
89   - let pbeg, plen = establish_for_token 100 text tokens (List.tl (Array.to_list paths)) in
90   - establish_next tokens paths;
91   - pbeg, plen-100
92   -
93   -(******************)
94   -
95   -exception ErrorInfoFile of string
96   -
97   -let info_file = "../corpora/info_sentences.txt"
98   -
99   -let info = Xstring.split "\n\n" @@ File.load_file_gen info_file
100   -
101   -let add_to_map map info_str =
102   - match Xstring.split "\n" info_str with
103   - [id; text; info_token] -> StringMap.add map info_token (id, text)
104   - | _ -> raise (ErrorInfoFile info_str)
105   -
106   -let info_map =
107   - Xlist.fold info StringMap.empty add_to_map
108   -
109   -let match_sentence (p_record,tokens) =
110   - let rec info_token s = match s with
111   - RawSentence text -> failwith ("match_sentence: " ^ text)
112   - | StructSentence (_, tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
113   - | DepSentence (_, paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
114   - | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences")
115   - | AltSentence alts -> failwith ("match_sentence: AltSentence")
116   - (*if List.exists (fun (mode, s) -> mode = CONLL) alts
117   - then info_token (snd (List.find (fun (mode, s) -> mode = CONLL) alts))
118   - else failwith ("match_sentence: no CONLL mode in AltSentence")*) in
119   - let info_token, paths = info_token p_record.psentence in
120   - try
121   - let id, text = StringMap.find info_map info_token in
122   - let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in
123   - AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len;
124   - psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence("", paths)]}]],tokens)]
125   -(* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *)
126   - with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)]
127   -
128   -let match_corpus corpus =
  4 +let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts
  5 + then f (snd @@ List.find (fun (m,_) -> m = mode) alts)
  6 + else f (snd @@ List.find (fun (m,_) -> m = PreTypes.Struct) alts)
  7 +
  8 +let string_of_token mode token conll_id super label =
  9 + let decompose_lemma = function
  10 + | PreTypes.Lemma(a,b,c) -> a,b,if c = [[]]
  11 + then "_"
  12 + else String.concat "][" @@ Xlist.map c (fun x ->
  13 + String.concat "|" @@ Xlist.map x ( fun y ->
  14 + String.concat "." y))
  15 + | t -> failwith ("string_of_token: not Lemma") in
  16 + match mode with
  17 + | PreTypes.Raw -> token.PreTypes.orth
  18 + | PreTypes.Struct -> failwith ("function string_of_token for mode Struct is not defined")
  19 + | PreTypes.CONLL -> let lemma,cat,interp = decompose_lemma token.PreTypes.token in
  20 + String.concat "\t" [string_of_int conll_id;
  21 + token.PreTypes.orth; lemma; cat; cat; interp; "_"; "_";
  22 + string_of_int token.PreTypes.beg; string_of_int token.PreTypes.len]
  23 + | PreTypes.Mate -> let lemma,cat,interp = decompose_lemma token.PreTypes.token in
  24 + String.concat "\t" [string_of_int conll_id;
  25 + token.PreTypes.orth; lemma; lemma; cat; cat; interp; interp; "_"; "_"; "_"; "_"; "_"; "_"]
  26 + | _ -> failwith "string_of_token: ni"
  27 +
  28 +let string_of_paths mode tokens paths =
  29 + let l = Int.fold 1 (Array.length paths - 1) [] (fun l conll_id ->
  30 + let id,super,label = paths.(conll_id) in
  31 + (string_of_token mode (ExtArray.get tokens id) conll_id super label) :: l) in
  32 + String.concat "\n" (List.rev l) ^ "\n\n"
  33 +
  34 +let rec string_of_sentence mode tokens = function
  35 + RawSentence s -> if mode = Raw then s else ""
  36 + | StructSentence (tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*)
  37 + | DepSentence (paths) -> string_of_paths mode tokens paths
  38 + | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences")
  39 + | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts
  40 +
  41 +let string_of_p_record mode tokens p_record =
  42 + (if p_record.pid = "" then "" else p_record.pid ^ "\n") ^
  43 + string_of_sentence mode tokens p_record.psentence
  44 +
  45 +(*let rec string_of_paragraph mode tokens = function
  46 + RawParagraph s -> if mode = Raw then s else ""
  47 + | StructParagraph p_records -> String.concat "\n\n" @@ Xlist.map p_records (string_of_p_record mode tokens)
  48 + | AltParagraph alts -> alternative_string (string_of_paragraph mode) mode alts
  49 +
  50 +let rec string_of_text mode tokens = function
  51 + RawText s -> if mode = Raw then s else ""
  52 + | StructText (paragraphs,_) -> String.concat "\n\n" @@ Xlist.map paragraphs (string_of_paragraph mode tokens)
  53 + | AltText alts -> alternative_string (string_of_text mode) mode alts*)
  54 +
  55 +
  56 +(******************)
  57 +
  58 +let establish_next tokens paths =
  59 + let n = ExtArray.size tokens in
  60 + Int.iter 1 (n - 2) (fun i ->
  61 + let f = ExtArray.get tokens i in
  62 + let s = ExtArray.get tokens (i+1) in
  63 + ExtArray.set tokens i {f with next = s.beg});
  64 + let last = ExtArray.get tokens (n-1) in
  65 + ExtArray.set tokens (n-1) {last with next = last.beg + last.len}
  66 +
  67 +
  68 + (*let rec pom res = function
  69 + h :: t -> let next = if res = []
  70 + then h.beg+h.len
  71 + else (List.hd res).beg in
  72 + pom ({h with next = next} :: res) t
  73 + | [] -> res in
  74 + pom [] rev_tokens*)
  75 +
  76 +let rec establish_for_token i text tokens = function
  77 + (id,_,_) :: t as l->
  78 + let h = ExtArray.get tokens id in
  79 + if Xstring.check_prefix " " text
  80 + then establish_for_token (i+100) (Xstring.cut_prefix " " text) tokens l
  81 + else if Xstring.check_prefix h.orth text
  82 + then
  83 + let n = (List.length @@ Xunicode.utf8_chars_of_utf8_string h.orth) * 100 in
  84 + let n_h = {h with beg = i ; len = n} in
  85 + ExtArray.set tokens id n_h;
  86 + establish_for_token (i+n) (Xstring.cut_prefix h.orth text) tokens t
  87 + else failwith ("establish_for_token :" ^ h.orth ^ " " ^ text)
  88 + | [] -> 100, i
  89 +
  90 +let rec establish_lengths text paths tokens =
  91 + let pbeg, plen = establish_for_token 100 text tokens (List.tl (Array.to_list paths)) in
  92 + establish_next tokens paths;
  93 + pbeg, plen-100
  94 +
  95 +(******************)
  96 +
  97 +exception ErrorInfoFile of string
  98 +
  99 +let info_file = "../corpora/info_sentences.txt"
  100 +
  101 +let info = Xstring.split "\n\n" @@ File.load_file_gen info_file
  102 +
  103 +let add_to_map map info_str =
  104 + match Xstring.split "\n" info_str with
  105 + [id; text; info_token] -> StringMap.add map info_token (id, text)
  106 + | _ -> raise (ErrorInfoFile info_str)
  107 +
  108 +let info_map =
  109 + Xlist.fold info StringMap.empty add_to_map
  110 +
  111 +let match_sentence (p_record,tokens) =
  112 + let rec info_token s = match s with
  113 + RawSentence text -> failwith ("match_sentence: " ^ text)
  114 + | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
  115 + | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
  116 + | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences")
  117 + | AltSentence alts -> failwith ("match_sentence: AltSentence")
  118 + (*if List.exists (fun (mode, s) -> mode = CONLL) alts
  119 + then info_token (snd (List.find (fun (mode, s) -> mode = CONLL) alts))
  120 + else failwith ("match_sentence: no CONLL mode in AltSentence")*) in
  121 + let info_token, paths = info_token p_record.psentence in
  122 + try
  123 + let id, text = StringMap.find info_map info_token in
  124 + let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in
  125 + AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix="";
  126 + psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)]
  127 +(* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *)
  128 + with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)]
  129 +
  130 +let match_corpus corpus =
129 131 Xlist.map corpus match_sentence
130 132  
131 133 (******************)
... ... @@ -186,8 +188,8 @@ let info_map =
186 188 let match_sentence (p_record,tokens) =
187 189 let rec info_token s = match s with
188 190 RawSentence text -> failwith ("match_sentence: " ^ text)
189   - | StructSentence (_, tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
190   - | DepSentence (_, paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
  191 + | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
  192 + | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
191 193 | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences")
192 194 | AltSentence alts -> failwith ("match_sentence: AltSentence")
193 195 (*if List.exists (fun (mode, s) -> mode = CONLL) alts
... ... @@ -197,8 +199,8 @@ let match_sentence (p_record,tokens) =
197 199 try
198 200 let id, text = StringMap.find info_map info_token in
199 201 let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in
200   - AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len;
201   - psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence("", paths)]}]],tokens)]
  202 + AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix="";
  203 + psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)]
202 204 (* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *)
203 205 with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)]
204 206  
... ... @@ -224,6 +226,7 @@ let load_token in_channel =
224 226 else [Xlist.map (Xstring.split_delim "|" interp) (fun tag -> [tag])] in
225 227 {empty_token with orth = orth; token = Lemma(lemma,cat,interp);}, int_of_string id, int_of_super super, label in
226 228 let line = input_line in_channel in
  229 + (* print_endline ("load_token: " ^ line); *)
227 230 if line = ""
228 231 then raise Empty_line
229 232 else if line.[0] = '#'
... ... @@ -272,7 +275,7 @@ let load_sentence in_channel =
272 275 then raise End_of_file
273 276 else rev_paths, id in
274 277 let rev_paths, id = pom [] "" in
275   - {pid = id; pbeg = -1; plen = -1; pnext = -1; psentence = DepSentence("",Array.of_list ((0,-1,"") :: List.rev rev_paths))}, tokens
  278 + {pid = id; pbeg = -1; plen = -1; pnext = -1; pfile_prefix = ""; psentence = DepSentence(Array.of_list ((0,-1,"") :: List.rev rev_paths))}, tokens
276 279 (* {s_id = id; s_text = ""; s_paths = (List.rev rev_paths)} *)
277 280  
278 281 let load_corpus in_channel =
... ...
parser/.gitignore
1 1 pipe
  2 +swigra_test
2 3 results/*
... ...
parser/LCGchart.ml
... ... @@ -197,7 +197,7 @@ let is_dep_parsed = function
197 197 let get_parsed_term tokens chart =
198 198 let n = last_node chart in
199 199 let l = Xlist.fold (find chart 0 n) [] (fun l -> function
200   - LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l
  200 + LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut(LCGtypes.Tuple[sem])) :: l
201 201 (* | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<ors-sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l *)
202 202 | _ -> l) in
203 203 let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in
... ...
parser/LCGlatexOf.ml
... ... @@ -224,7 +224,7 @@ let parsed_dep_chart l =
224 224 "\\end{longtable}"
225 225  
226 226 let not_parsed_dep_chart (id,left,l,right) =
227   - Printf.sprintf "conll_id=%d\\\\" id ^
  227 + Printf.sprintf "conll\\_id=%d\\\\" id ^
228 228 "\\begin{longtable}{|l|p{20cm}|}\n\\hline\n" ^
229 229 String.concat "\\hline\n" (Xlist.map left table_entries_of_symbol_term_list) ^
230 230 "\\hline\n\\hline\n\\hline\n" ^
... ... @@ -234,7 +234,7 @@ let not_parsed_dep_chart (id,left,l,right) =
234 234 "\\end{longtable}"
235 235  
236 236 let rec dep_chart_rec (DepNode(id,left,l,right)) =
237   - printf "dep_chart_rec: %d\n" id;
  237 + (* printf "dep_chart_rec: %d\n" id; *)
238 238 String.concat "" (Xlist.map left dep_chart_rec) ^
239 239 String.concat "" (Xlist.rev_map l (fun (symbol,sem) ->
240 240 let symbol = grammar_symbol 0 symbol in
... ...
parser/LCGlexicon.ml
... ... @@ -216,6 +216,7 @@ let make_node lemma cat weight fnum l =
216 216  
217 217 (* FIXME: "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej *)
218 218 let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja siebie i się *)
  219 + if d.simple_valence = [] then print_endline "empty simple_valence";
219 220 let numbers = expand_numbers numbers in
220 221 let cases = expand_cases cases in
221 222 let genders = expand_genders genders in (* FIXME: trzeba dodać konstrukcję w marcu 2000 i leksykalizowane nazwy własne przy miesiącach *)
... ... @@ -1207,9 +1208,9 @@ let process_bracket_lemma (d:PreTypes.token_record) = function
1207 1208 LCGrenderer.make_frame x_flag tokens [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]];[schema_field OBJ "Theme" Forward [Phrase (Lex "</query>")]]] (["<colon>"]) {d with orth=""} batrs]
1208 1209 (*| "<or>" -> [LCGrenderer.make_frame x_flag tokens [] [[nosem_schema_field Forward [Phrase (Lex "</or>")]]] (["or"]) {d with orth=""} (make_node "<or>" "interp" d.weight 0 [])]*)
1209 1210 | "<or>" -> []
1210   - | "<or-sentence>" ->
  1211 + | "<or-sentence>" -> (* FIXME: dodać mówcę jako pro *)
1211 1212 let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in
1212   - [LCGrenderer.make_frame x_flag tokens [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} batrs;
  1213 + [LCGrenderer.make_frame x_flag tokens [] [[schema_field ARG ""(*"Theme"*) Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} batrs;
1213 1214 LCGrenderer.or_frame (make_node "<sentence>" "interp" d.weight 0 [])]
1214 1215 | "</or-sentence>" ->
1215 1216 let t = ["or2"] in
... ... @@ -1257,6 +1258,7 @@ let get_labels () = {
1257 1258 let symbol,sem = LCGrenderer.make_frame_simple [] t d ( batrs) in
1258 1259 [Bracket(true,true,symbol),sem]
1259 1260 | {token = Lemma(lemma,pos,interp)} ->
  1261 + (* print_endline (lemma ^ " " ^ pos); *)
1260 1262 Xlist.fold interp [] (fun l tags ->
1261 1263 let d = {d with e=get_labels (); valence=LCGrenderer.make_controll d.valence} in
1262 1264 let entries = process_interp d (lemma,pos,tags) in
... ... @@ -1294,7 +1296,7 @@ let rec dep_create_rec nodes sons conll_id =
1294 1296 let node = IntMap.find nodes conll_id in
1295 1297 let l = try IntMap.find sons conll_id with Not_found -> [] in
1296 1298 let left,right = split_sons [] conll_id [] l in
1297   - (* Printf.printf "dep_create_rec [%s] %s [%s]\n" (String.concat ";" left) id (String.concat ";" right); *)
  1299 + (* Printf.printf "dep_create_rec [%s] %d [%s]\n" (String.concat ";" (Xlist.map left string_of_int)) conll_id (String.concat ";" (Xlist.map right string_of_int)); *)
1298 1300 DepNode(conll_id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons))
1299 1301  
1300 1302 let dep_create paths tokens =
... ...
parser/LCGreductions.ml
... ... @@ -224,7 +224,7 @@ let linear_term_beta_reduction4 references =
224 224 | Variant(e2,l) -> Variant(e2,Xlist.map l (fun (i,t) -> i,linear_term_beta_reduction subst (SetAttr(e,s,t))))
225 225 | t -> SetAttr(e,s,t))
226 226 | Node t ->
227   - if !size > Paths.lcg_no_nodes then raise SemTooBig;
  227 + if !size > Paths.config.Paths.lcg_no_nodes then raise SemTooBig;
228 228 incr size;
229 229 Node{t with attrs=Xlist.map t.attrs (fun (e,t) -> e, linear_term_beta_reduction subst t);
230 230 gs=linear_term_beta_reduction subst t.gs;
... ...
parser/LCGrenderer.ml
... ... @@ -440,7 +440,7 @@ let or_frame node =
440 440 Imp(Tensor[Atom "ip"; Top; Top; Top],Forward,Tensor[Atom "or"])),Forward,
441 441 Tensor[Atom "or2"]),
442 442 Lambda("x",Lambda("y",Lambda("z",Node{node with gs=make_gs [] ["<root>"]; args=Tuple[
443   - Cut(App(Var "y",Var "x"))]})))
  443 + Cut(SetAttr("AROLE",Val "Clause",SetAttr("GF",Gf CLAUSE,App(Var "y",Var "x"))))]})))
444 444  
445 445  
446 446 let label_counter = ref 0
... ...
parser/LCGvalence.ml
... ... @@ -22,22 +22,22 @@ open LCGtypes
22 22 open Printf
23 23 open Xstd
24 24  
25   -let rec list_assoc2 x = function
  25 +let rec list_assoc2 x = function
26 26 (s,a,b) :: l -> if x = s then a,b else list_assoc2 x l
27   - | [] -> raise Not_found
28   -
29   -let meaning_weight = -1.
30   -
  27 + | [] -> raise Not_found
  28 +
  29 +let meaning_weight = -1.
  30 +
31 31 let prepare_senses lemma meanings senses =
32 32 match meanings,senses with
33 33 [],[] -> [lemma, ["ALL"],0.] (* FIXME *)
34   - | [],_ ->
35   - Xlist.map senses (fun (sense,hipero,weight) ->
36   - if hipero = ["0"] then sense,["0"],weight else
  34 + | [],_ ->
  35 + Xlist.map senses (fun (sense,hipero,weight) ->
  36 + if hipero = ["0"] then sense,["0"],weight else
37 37 sense,(if hipero = [] then ["ALL"] else hipero),weight)
38 38 | _,[] -> Xlist.map meanings (fun meaning -> meaning, ["ALL"],meaning_weight)
39   - | _,_ ->
40   - Xlist.map meanings (fun meaning ->
  39 + | _,_ ->
  40 + Xlist.map meanings (fun meaning ->
41 41 let hipero,weight = try list_assoc2 meaning senses with Not_found -> [],meaning_weight in
42 42 if hipero = ["0"] then meaning,["0"],weight else meaning,(if hipero = [] then ["ALL"] else hipero),weight)
43 43  
... ... @@ -50,89 +50,91 @@ let extract_meaning lemma = function
50 50 | GerAtrs(m,le,neg,a) -> m,le,GerAtrs([],le,neg,a)
51 51 | NonPersAtrs(m,le,role,role_attr,neg,a) -> m,le,NonPersAtrs([],le,role,role_attr,neg,a)
52 52 | _ -> failwith "extract_meaning"
53   -
  53 +
54 54 let extract_roles = function
55 55 NonPersAtrs(m,le,role,role_attr,neg,a) -> role,role_attr
56 56 | _ -> failwith "extract_roles"
57   -
  57 +
58 58 let get_lemma = function
59 59 PreTypes.Lemma(lemma,cat,_) -> lemma,cat
60 60 | PreTypes.Interp lemma -> lemma,"interp"
61 61 | _ -> "",""
62   -
63   -let prepare_valence paths_array =
64   - let valence = Array.map (fun d ->
  62 +
  63 +let prepare_valence tokens =
  64 + let valence = Array.make (ExtArray.size tokens) [] in
  65 + Int.iter 1 (ExtArray.size tokens - 1) (fun id ->
  66 + let d = ExtArray.get tokens id in
65 67 let lemma,cat = get_lemma d.PreTypes.token in
66   - let lemma = if lemma = "<ors>" || lemma = ":s" || lemma = "„s" then "pro-komunikować" else lemma in
67   - if lemma = "" then [] else
68   - let prep_valence =
69   - if cat = "prep" then
  68 + let lemma = if lemma = "<or-sentence>" (*|| lemma = ":s" || lemma = "„s"*) then "pro-komunikować" else lemma in
  69 + if lemma = "" then () else
  70 + let prep_valence =
  71 + if cat = "prep" then
70 72 (* (0,lemma,StringSet.empty,0.,"NOSEM","",Frame(EmptyAtrs[],[])) :: *)
71 73 match d.PreTypes.semantics with
72 74 PreTypes.Normal -> []
73   - | PreTypes.PrepSemantics l ->
74   - Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) ->
  75 + | PreTypes.PrepSemantics l ->
  76 + Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) ->
75 77 0,lemma,hipero,0.,lrole,lrole_attr,Frame(EmptyAtrs[],[]))
76 78 | _ -> failwith "prepare_valence"
77 79 else [] in
78   - let valence = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in
  80 + let valence2 = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in
79 81 let lrole,lrole_attr = d.PreTypes.lroles in
80   - prep_valence @ List.flatten (Xlist.map valence (function
  82 + valence.(id) <- prep_valence @ List.flatten (Xlist.map valence2 (function
81 83 fnum,Frame(attrs,schema) ->
82 84 let meanings,lemma,attrs = extract_meaning lemma attrs in
83   - let lrole,lrole_attr =
84   - if cat = "pact" || cat = "ppas" then extract_roles attrs else
85   - if cat = "pcon" then "Con","" else
  85 + let lrole,lrole_attr =
  86 + if cat = "pact" || cat = "ppas" then extract_roles attrs else
  87 + if cat = "pcon" then "Con","" else
86 88 if cat = "pant" then "Ant","" else
87 89 d.PreTypes.lroles in
88 90 Xlist.map (prepare_senses lemma meanings d.PreTypes.senses) (fun (meaning,hipero,weight) ->
89 91 let hipero = if cat = "conj" then ["0"] else hipero in
90 92 fnum,meaning,StringSet.of_list hipero,weight,lrole,lrole_attr,
91   - Frame(attrs,Xlist.map schema (fun s ->
  93 + Frame(attrs,Xlist.map schema (fun s ->
92 94 (* let s = if s.sel_prefs=[] then (print_endline ("prepare_valence empty sel_prefs: " ^ lemma ^ " " ^ cat); {s with sel_prefs=["ALL"]}) else s in *)
93 95 if s.role="" && s.gf <> ADJUNCT && s.gf <> NOSEM then (
94 96 printf "%d: %s\n%!" fnum (WalStringOf.frame lemma (Frame(attrs,schema)));
95 97 failwith ("prepare_valence empty role: " ^ lemma ^ " " ^ cat)) else
96 98 {s with morfs=List.sort compare s.morfs})))
97 99 | fnum,(LexFrame _ as frame) -> [fnum,"lex",StringSet.empty,0.,lrole,lrole_attr,frame]
98   - | fnum,(ComprepFrame _ as frame) -> [fnum,"comprep",StringSet.empty,0.,lrole,lrole_attr,frame]))) paths_array in
  100 + | fnum,(ComprepFrame _ as frame) -> [fnum,"comprep",StringSet.empty,0.,lrole,lrole_attr,frame])));
99 101 valence
100   -
  102 +
101 103 (*let create_pro_frames t =
102 104 [0,t.pred,StringSet.singleton "0",0.,"","",Frame(EmptyAtrs[],[])]*)
103   -
104   -let get_fnum t =
  105 +
  106 +let get_fnum t =
105 107 let x = try Xlist.assoc t.attrs "FNUM" with Not_found -> Val "0" in
106 108 (match x with
107 109 Val s -> (try int_of_string s with _ -> failwith "get_fnum 1")
108 110 | _ -> failwith "get_fnum 2")
109   -
110   -let select_frames l t =
  111 +
  112 +let select_frames l t =
111 113 (* printf "a1 pred=%s\n" t.pred; *)
112 114 let fnum = get_fnum t in
113 115 let l = Xlist.fold l [] (fun l (n,meaning,hipero,weight,lrole,lrole_attr,frame) ->
114 116 if n = fnum then (meaning,hipero,weight,lrole,lrole_attr,frame) :: l else l) in
115 117 (* printf "a2 pred=%s\n" t.pred; *)
116   - match l with
  118 + match l with
117 119 [] -> "",[]
118 120 | [_,_,_,_,_,LexFrame _] -> "",[]
119 121 | [_,_,_,_,_,ComprepFrame _] -> "",[]
120 122 | _ -> LCGreductions.get_variant_label (),
121 123 fst (Xlist.fold l ([],1) (fun (l,i) t -> (string_of_int i, t) :: l, i+1))
122   -
  124 +
123 125 let rec get_arg_refs found = function
124 126 Variant(_,l) -> Xlist.fold l found (fun found (_,t) -> get_arg_refs found t)
125 127 | Ref i -> i :: found
126 128 | t -> failwith ("get_arg_refs: " ^ LCGstringOf.linear_term 0 t)
127   -
  129 +
128 130 let rec match_position = function
129   - a :: la, b :: lb ->
  131 + a :: la, b :: lb ->
130 132 if a = b then match_position (la,b :: lb) else
131 133 if a > b then match_position (a :: la,lb)
132 134 else false
133 135 | [],_ -> true
134 136 | _,[] -> false
135   -
  137 +
136 138 let mark_sem_morfs morfs =
137 139 Xlist.map morfs (function
138 140 | Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(Sem,prep,c))
... ... @@ -143,7 +145,7 @@ let mark_sem_morfs morfs =
143 145 | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(Sem,prep))
144 146 | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(Sem,prep,c,ct,co))
145 147 | t -> t)
146   -
  148 +
147 149 let mark_nosem_morf = function
148 150 Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c))
149 151 | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c))
... ... @@ -153,7 +155,7 @@ let mark_nosem_morf = function
153 155 | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep))
154 156 | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co))
155 157 | t -> failwith "mark_nosem_morf"
156   -
  158 +
157 159 let rec is_nosem_morf = function
158 160 Phrase(PrepNP(NoSem,prep,c)) -> true
159 161 | Phrase(PrepAdjP(NoSem,prep,c)) -> true
... ... @@ -163,7 +165,7 @@ let rec is_nosem_morf = function
163 165 | Phrase(ComparPP(NoSem,prep)) -> true
164 166 | Phrase(PrepNCP(NoSem,prep,c,ct,co)) -> true
165 167 | _ -> false
166   -
  168 +
167 169 let rec is_sem_morf = function
168 170 Phrase(PrepNP(Sem,prep,c)) -> true
169 171 | Phrase(PrepAdjP(Sem,prep,c)) -> true
... ... @@ -173,12 +175,12 @@ let rec is_sem_morf = function
173 175 | Phrase(ComparPP(Sem,prep)) -> true
174 176 | Phrase(PrepNCP(Sem,prep,c,ct,co)) -> true
175 177 | _ -> false
176   -
  178 +
177 179 let rec exclude_sem_morfs = function
178 180 [] -> []
179 181 | morf :: morfs -> if is_sem_morf morf then exclude_sem_morfs morfs else morf :: exclude_sem_morfs morfs
180   -
181   -(* UWAGA: dopasowywane ramy są preselekcjonowane, więc wszystkie argumenty muszą się maczować *)
  182 +
  183 +(* UWAGA: dopasowywane ramy są preselekcjonowane, więc wszystkie argumenty muszą się maczować *)
182 184 let match_args_pos modifications nodes e i schema t =
183 185 (* printf "match_args_pos\n"; *)
184 186 (* if schema = [] then schema else *)
... ... @@ -189,9 +191,9 @@ let match_args_pos modifications nodes e i schema t =
189 191 (* if gf = NOSEM || gf = NOGF then schema else
190 192 if gf = CORE then schema else (* FIXME: semantyka dla core *)*)
191 193 let morfs = exclude_sem_morfs morfs in
192   - let schema,selected =
  194 + let schema,selected =
193 195 if morfs = [] then schema,[] else
194   - let morfs = List.sort compare morfs in
  196 + let morfs = List.sort compare morfs in
195 197 (* printf "gf=%s morfs=%s\n%!" (WalStringOf.gf gf) (String.concat ";" (Xlist.map morfs WalStringOf.morf)); *)
196 198 Xlist.fold schema ([],[]) (fun (schema,selected) pos ->
197 199 (* printf "pos.gf=%s pos.morfs=%s\n%!" (WalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs WalStringOf.morf)); *)
... ... @@ -201,9 +203,9 @@ let match_args_pos modifications nodes e i schema t =
201 203 (match selected with
202 204 [] -> (*if gf = ARG then failwith "match_args_pos 3" else*)
203 205 Xlist.iter refs (fun r ->
204   - modifications.(r) <- StringMap.add modifications.(r) (e ^ i)
  206 + modifications.(r) <- StringMap.add modifications.(r) (e ^ i)
205 207 LCGrenderer.empty_schema_field(*{gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}*) )
206   -(* FIXME: przy kilku pasujących pozycjach wybieram pierwszą a nich, do poprawienia przy okazji porządków z walencją
  208 +(* FIXME: przy kilku pasujących pozycjach wybieram pierwszą a nich, do poprawienia przy okazji porządków z walencją
207 209 np walencja leksemu "godzina":
208 210 1: : : common: time: Poss,T|{null;np(gen);nump(gen)}+Arg,T|{null;np(gen);nump(gen)};
209 211 1: : : common: time: Temp,T|{null;np(gen);nump(gen)}]; *)
... ... @@ -211,19 +213,19 @@ np walencja leksemu &quot;godzina&quot;:
211 213 modifications.(r) <- StringMap.add modifications.(r) (e ^ i) pos)
212 214 (*| _ -> failwith "match_args_pos 4"*));
213 215 schema
214   -
  216 +
215 217 let rec match_args_tuple modifications nodes e i schema = function
216   - Tuple l ->
  218 + Tuple l ->
217 219 Xlist.fold l schema (fun schema t ->
218 220 match_args_tuple modifications nodes e i schema t)
219 221 | t -> match_args_pos modifications nodes e i schema t
220   -
221   -let match_args modifications nodes e i t = function
  222 +
  223 +let match_args modifications nodes e i t = function
222 224 Frame(_,schema) -> ignore (match_args_tuple modifications nodes e i schema t.args)
223 225 | LexFrame _ -> failwith "match_args"
224 226 | ComprepFrame _ -> failwith "match_args"
225   -
226   -let rec assign_frames_and_senses_rec modifications valence nodes t =
  227 +
  228 +let rec assign_frames_and_senses_rec modifications valence nodes t =
227 229 (* printf "pred=%s id=%d\n" t.pred t.id; *)
228 230 if t.id = 0 then failwith ("assign_frames_and_senses_rec: t.id=0 pred=" ^ t.pred) else
229 231 let e,node_valence = select_frames ((*if t.id >= Array.length valence then create_pro_frames t else*) valence.(t.id)) t in
... ... @@ -235,23 +237,23 @@ let rec assign_frames_and_senses_rec modifications valence nodes t =
235 237 (* printf "meaning=%s\n" meaning; *)
236 238 i,Node{t with meaning=meaning;
237 239 hipero=hipero;
238   - meaning_weight=weight}) in
  240 + meaning_weight=weight}) in
239 241 Variant(e,l)
240 242  
241   -let rec is_nosem_morfs morfs =
  243 +let rec is_nosem_morfs morfs =
242 244 let sem = Xlist.fold morfs false (fun b m -> b || is_sem_morf m) in
243 245 let nosem = Xlist.fold morfs false (fun b m -> b || is_nosem_morf m) in
244 246 nosem && not sem (* FIXME: pewne uproszczenie, ale liczę, że nie ma wielu ramek z xp koordynowanym z prep *)
245   -
  247 +
246 248 let has_tuple = function
247 249 Tuple _ -> true
248 250 | _ -> false
249   -
250   -let rec apply_modifications_rec (*paths_array*) pos = function
251   - Variant(e,l) -> Variant(e,List.rev (Xlist.fold l [] (fun l (i,t) ->
252   - let t = apply_modifications_rec (*paths_array*) pos t in
253   - (*if t = Dot then l else*) (i, (*apply_modifications_rec (*paths_array*) pos*) t) :: l)))
254   - | Node t ->
  251 +
  252 +let rec apply_modifications_rec (*tokens*) pos = function
  253 + Variant(e,l) -> Variant(e,List.rev (Xlist.fold l [] (fun l (i,t) ->
  254 + let t = apply_modifications_rec (*tokens*) pos t in
  255 + (*if t = Dot then l else*) (i, (*apply_modifications_rec (*tokens*) pos*) t) :: l)))
  256 + | Node t ->
255 257 if is_sem_morf t.amorf then Node t else (* FIXME: czy to jest poprawne? *)
256 258 let t = if is_nosem_morf t.amorf then {t with agf=ARG} else t in
257 259 Node{t with position=pos}
... ... @@ -259,41 +261,42 @@ let rec apply_modifications_rec (*paths_array*) pos = function
259 261  
260 262 (* let empty_pos = {gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]} (* FIXME: jaka GF? *) *)
261 263  
262   -let apply_modifications (*paths_array*) modifications nodes references =
  264 +let apply_modifications (*tokens*) modifications nodes references =
263 265 Int.iter 1 (Array.length references - 1) (fun r ->
264 266 (* if StringMap.is_empty modifications.(r) then failwith ("apply_modifications: " ^ nodes.(r).pred) else *)
265   - references.(r) <- Choice(StringMap.map modifications.(r) (fun pos ->
266   - apply_modifications_rec (*paths_array*) pos references.(r))))
  267 + references.(r) <- Choice(StringMap.map modifications.(r) (fun pos ->
  268 + apply_modifications_rec (*tokens*) pos references.(r))))
267 269  
268 270 let rec extract_nosem rev = function
269 271 [] -> List.rev rev, false
270 272 | ("NOSEM",Val "+") :: l -> (List.rev rev) @ l, true
271 273 | x :: l -> extract_nosem (x :: rev) l
272   -
  274 +
273 275 let get_nodes = function
274   - Node t ->
  276 + Node t ->
275 277 let attrs,b = extract_nosem [] t.attrs in
276   - let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in
  278 + (* let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in *)
  279 + let t = if t.pred = "<sentence>" || t.pred = "pro-komunikować" then {t with agf=CORE} else t in (* FIXME: przetestować na mowie niezależnej *)
277 280 if t.agf = NOGF then failwith ("get_nodes agf=NOGF: " ^ t.pred) else
278 281 if b then {t with amorf=mark_nosem_morf t.amorf; attrs=attrs} else t
279   - | _ -> failwith "get_nodes"
280   -
281   -let rec propagate_nosem_selprefs modifications ei = function
  282 + | _ -> failwith "get_nodes"
  283 +
  284 +let rec propagate_nosem_selprefs modifications ei = function
282 285 Choice choice -> Choice(StringMap.map choice (propagate_nosem_selprefs modifications ""))
283 286 | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, propagate_nosem_selprefs modifications (e ^ i) t))
284   - | Node t ->
285   - if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then
  287 + | Node t ->
  288 + if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then
286 289 let refs = IntSet.of_list (get_arg_refs [] t.args) in
287   - IntSet.iter refs (fun r ->
288   - modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l ->
  290 + IntSet.iter refs (fun r ->
  291 + modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l ->
289 292 if l = t.position.WalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.WalTypes.sel_prefs ^ "]")));
290 293 Node{t with position= {t.position with WalTypes.sel_prefs = []}}
291 294 else Node t
292 295 | _ -> failwith "propagate_nosem_selprefs 2"
293   -
  296 +
294 297 let rec apply_modifications2_rec mods = function
295 298 Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, apply_modifications2_rec mods t))
296   - | Node t ->
  299 + | Node t ->
297 300 if t.position.WalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else
298 301 Node{t with position={t.position with WalTypes.sel_prefs=mods}}
299 302 | _ -> failwith "apply_modifications2_rec"
... ... @@ -302,57 +305,22 @@ let apply_modifications2 modifications references =
302 305 Int.iter 1 (Array.length references - 1) (fun r ->
303 306 if not (StringMap.is_empty modifications.(r)) then
304 307 match references.(r) with
305   - Choice choice ->
306   - references.(r) <- Choice(StringMap.mapi choice (fun ei t ->
  308 + Choice choice ->
  309 + references.(r) <- Choice(StringMap.mapi choice (fun ei t ->
307 310 try apply_modifications2_rec (StringMap.find modifications.(r) ei) t with Not_found -> t))
308 311 | _ -> failwith "apply_modifications2")
309   -
310   -let assign_frames_and_senses paths_array references =
  312 +
  313 +let assign_frames_and_senses tokens references =
311 314 let modifications = Array.make (Array.length references) StringMap.empty in
312   - let valence = prepare_valence paths_array in
  315 + let valence = prepare_valence tokens in
313 316 let nodes = Array.map get_nodes references in
314 317 let references = Array.map (assign_frames_and_senses_rec modifications valence nodes) nodes in
315   - apply_modifications (*paths_array*) modifications nodes references;
  318 + apply_modifications (*tokens*) modifications nodes references;
316 319 (* let modifications = Array.make (Array.length references) StringMap.empty in
317 320 Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); (* FIXME: propagowanie preferencji selekcyjnych więcej niż jeden poziom w głąb nie działa *)
318   - apply_modifications2 modifications references;
  321 + apply_modifications2 modifications references;
319 322 Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r));
320   - apply_modifications2 modifications references;
  323 + apply_modifications2 modifications references;
321 324 Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r));
322 325 apply_modifications2 modifications references; *)
323 326 references
324   -
325   -
326   -
327   -
328   -
329   -
330   -
331   -
332   -
333   -
334   -
335   -
336   -
337   -
338   -
339   -
340   -
341   -
342   -
343   -
344   -
345   -
346   -
347   -
348   -
349   -
350   -
351   -
352   -
353   -
354   -
355   -
356   -
357   -
358   -
359 327 \ No newline at end of file
... ...
parser/exec.ml
... ... @@ -27,23 +27,16 @@ let empty_result = {
27 27 msg="";
28 28 pre_time1=0.;
29 29 pre_time2=0.;
30   - (* lex_time=0.; *)
31 30 parse_time=0.;
32 31 parsed_text=RawText "";
33   -(* reduction_time=0.;
34   - sem_time=0.;
35   - paths_size=0;
36   - disamb=[| |];
37   - sem=[| |];
38   - sem2=[| |];
39   - sem3=LCGtypes.Dot;
40   - trees=[];
41   - mrls=[];
42   - paths=[| |];*)
43   - (*structs=SemTypes.Atom "",SemTypes.Label "",SemTypes.Label "",[],""*)}
  32 + semantic_time=0.;
  33 + selected_sent_text=RawText "";
  34 + semantic_text=RawText "";
  35 + selected_semantic_text=RawText "";
  36 + }
44 37  
45 38 let empty_eniam_parse_result = {
46   - id="";
  39 + file_prefix="";
47 40 status=Idle;
48 41 msg="";
49 42 lex_time=0.;
... ... @@ -58,7 +51,7 @@ let empty_eniam_parse_result = {
58 51 }
59 52  
60 53 let empty_conll_parse_result = {
61   - id="";
  54 + file_prefix="";
62 55 status=Idle;
63 56 msg="";
64 57 lex_time=0.;
... ... @@ -67,12 +60,26 @@ let empty_conll_parse_result = {
67 60 sem_time=0.;
68 61 paths_size=0;
69 62 dependency_tree_size=0;
  63 + paths=[| |];
70 64 dep_chart=DepNode(-100,[],[],[]);
71 65 parsed_dep_chart=[];
72 66 not_parsed_dep_chart=(-100,[],[],[]);
73 67 dependency_tree=[| |];
74 68 }
75 69  
  70 +let empty_semantic_processing_result = {
  71 + file_prefix="";
  72 + status=Idle;
  73 + msg="";
  74 + sem_time=0.;
  75 + disamb=[| |];
  76 + sem=[| |];
  77 + sem2=[| |];
  78 + sem3=LCGtypes.Dot;
  79 + trees=[];
  80 + mrls=[];
  81 + }
  82 +
76 83 let empty_sum_result = {
77 84 no_queries=0;
78 85 no_pre_error=0;
... ... @@ -102,14 +109,16 @@ let translate_mode = function
102 109 | PreTypes.CONLL -> CONLL
103 110 | PreTypes.ENIAM -> ENIAM
104 111 | PreTypes.Mate -> Mate
  112 + | PreTypes.Swigra -> Swigra
  113 + | PreTypes.POLFIE -> POLFIE
105 114  
106 115 let rec translate_sentence = function
107 116 PreTypes.RawSentence s -> RawSentence s
108   - | PreTypes.StructSentence(id,paths,last) -> StructSentence(id,paths,last)
109   - | PreTypes.DepSentence(id,paths) -> DepSentence(id,paths)
  117 + | PreTypes.StructSentence(paths,last) -> StructSentence(paths,last)
  118 + | PreTypes.DepSentence(paths) -> DepSentence(paths)
110 119 | PreTypes.QuotedSentences sentences ->
111 120 QuotedSentences(Xlist.map sentences (fun p ->
112   - {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext;
  121 + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
113 122 psentence=translate_sentence p.PreTypes.psentence}))
114 123 | PreTypes.AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
115 124 translate_mode mode, translate_sentence sentence))
... ... @@ -118,7 +127,7 @@ let rec translate_paragraph = function
118 127 PreTypes.RawParagraph s -> RawParagraph s
119 128 | PreTypes.StructParagraph sentences ->
120 129 StructParagraph(Xlist.map sentences (fun p ->
121   - {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext;
  130 + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
122 131 psentence=translate_sentence p.PreTypes.psentence}))
123 132 | PreTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) ->
124 133 translate_mode mode, translate_paragraph paragraph))
... ... @@ -130,8 +139,8 @@ let rec translate_text = function
130 139 | PreTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) ->
131 140 translate_mode mode, translate_text text))
132 141  
133   -let eniam_parse_sentence timeout test_only_flag id paths last tokens =
134   - let result = {empty_eniam_parse_result with id=id} in
  142 +let eniam_parse_sentence timeout test_only_flag paths last tokens =
  143 + let result = empty_eniam_parse_result in
135 144 let time2 = time_fun () in
136 145 try
137 146 let chart = LCGlexicon.create (paths,last) tokens in
... ... @@ -187,8 +196,8 @@ let eniam_parse_sentence timeout test_only_flag id paths last tokens =
187 196 let time3 = time_fun () in
188 197 {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}
189 198  
190   -let conll_parse_sentence timeout test_only_flag id paths tokens =
191   - let result = {empty_conll_parse_result with id=id} in
  199 +let conll_parse_sentence timeout test_only_flag paths tokens =
  200 + let result = empty_conll_parse_result in
192 201 let time2 = time_fun () in
193 202 try
194 203 let dep_chart = LCGlexicon.dep_create paths tokens in
... ... @@ -198,10 +207,10 @@ let conll_parse_sentence timeout test_only_flag id paths tokens =
198 207 let result = {result with lex_time=time3 -. time2} in
199 208 try
200 209 (* print_endline "conll_parse_sentence 1"; *)
201   - LCGlatexOf.print_references "results/" "references1" references;
  210 + (* LCGlatexOf.print_references "results/" "references1" references; *)
202 211 let parsed_dep_chart = LCGchart.dep_parse dep_chart references timeout time_fun in (* uwaga: niejawna zmiana imperatywna w references *)
203 212 (* print_endline "conll_parse_sentence 2"; *)
204   - LCGlatexOf.print_references "results/" "references2" references;
  213 + (* LCGlatexOf.print_references "results/" "references2" references; *)
205 214 let time4 = time_fun () in
206 215 let result = if test_only_flag then result else {result with parsed_dep_chart=parsed_dep_chart} in
207 216 let result = {result with parse_time=time4 -. time3} in
... ... @@ -251,20 +260,58 @@ let conll_parse_sentence timeout test_only_flag id paths tokens =
251 260 {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}
252 261  
253 262  
254   -let mate_in, mate_out = Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test"
  263 +let mate_in, mate_out = (*Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test"*)
  264 + if Paths.config.Paths.mate_parser_enabled then
  265 + Unix.open_process ("java -jar " ^ Paths.config.Paths.mate_parser_path ^ "dist/anna-3.5.jar -model " ^
  266 + Paths.config.Paths.mate_parser_path ^ "examples/160622_Polish_MateParser.mdl -test")
  267 + else stdin, stdout
255 268  
256   -let rec parse_sentence timeout test_only_flag mode tokens = function
257   - RawSentence s -> RawSentence s
258   - | StructSentence(id,paths,last) ->
  269 +let swigra_in, swigra_out = (*Unix.open_process "../swigra/parser/run.sh"*)
  270 + if Paths.config.Paths.swigra_enabled then
  271 + Unix.open_process (Paths.config.Paths.swigra_path ^ "run.sh")
  272 + else stdin, stdout
  273 +
  274 +let file_prefix_of_mode = function
  275 + Raw -> "R"
  276 + | Struct -> "St"
  277 + | CONLL -> "C"
  278 + | ENIAM -> "E"
  279 + | Mate -> "M"
  280 + | Swigra -> "S"
  281 + | POLFIE -> "P"
  282 +
  283 +let get_paths old_paths = function
  284 + {PreTypes.psentence=PreTypes.DepSentence(paths)},_ ->
  285 + Int.iter 0 (Array.length paths - 1) (fun i ->
  286 + let id,_,_ = old_paths.(i) in
  287 + let _,super,label = paths.(i) in
  288 + paths.(i) <- id,super,label);
  289 + paths
  290 + | _ -> failwith "get_paths"
  291 +
  292 +let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
  293 + RawSentence s ->
  294 + (match mode with
  295 + Swigra ->
  296 + if not Paths.config.Paths.swigra_enabled then RawSentence s else (
  297 + Printf.fprintf swigra_out "%s\n%!" s;
  298 + print_endline ("swigra: " ^ input_line swigra_in);
  299 + RawSentence s)
  300 + | _ -> RawSentence s)
  301 + | StructSentence(paths,last) ->
259 302 (match mode with
260 303 ENIAM ->
261   - let result = eniam_parse_sentence timeout test_only_flag id paths last tokens in
  304 + let result = eniam_parse_sentence timeout test_only_flag paths last tokens in
  305 + let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in
262 306 ENIAMSentence result
263 307 | _ -> failwith "parse_sentence")
264   - | DepSentence(id,paths) ->
  308 + | DepSentence(paths) ->
265 309 (match mode with
266 310 CONLL ->
267   - let result = conll_parse_sentence timeout test_only_flag id paths tokens in
  311 + let result = conll_parse_sentence timeout test_only_flag paths tokens in
  312 + let result = {result with
  313 + file_prefix = file_prefix_of_mode mode ^ file_prefix;
  314 + paths = paths} in
268 315 CONLLSentence result
269 316 (* let xml = DepTree.conll_to_xml paths in
270 317 let graph = XmlPrinter.graph_of_xml xml in (* FIXME: do poprawy *)
... ... @@ -272,28 +319,31 @@ let rec parse_sentence timeout test_only_flag mode tokens = function
272 319 let result = {empty_eniam_parse_result with status=Parsed; term=graph} in
273 320 ENIAMSentence result, next_id *)
274 321 | Mate ->
275   - (*print_endline "parse_sentence 1";
276   - let conll = CONLL.string_of_sentence PreTypes.Mate (PreTypes.StructSentence(paths,last)) in
  322 + if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else (
  323 + print_endline "parse_sentence 1";
  324 + (* print_endline (Visualization.html_of_dep_sentence tokens paths); *)
  325 + let conll = CONLL.string_of_paths PreTypes.Mate tokens paths in
277 326 print_endline "parse_sentence 2";
278   - printf "%s\n" conll;
279   - Printf.fprintf mate_out "%s\n\n%!" conll;
  327 + (* printf "|%s|\n" conll; *)
  328 + Printf.fprintf mate_out "%s%!" conll;
280 329 print_endline "parse_sentence 3";
281   - let conll = CONLL.load_sentence mate_in in
282   - print_endline "parse_sentence 4";*)
283   - (*konwersja na strukturę danych*)
284   - (* FIXME: tu trzeba wstawić konwersję na tekstowy format CONLL,
285   - uruchomienie MateParser i
286   - powtórną konwersję wyniku. *)
287   - RawSentence ""
  330 + let new_paths = get_paths paths (CONLL.load_sentence mate_in) in
  331 + print_endline "parse_sentence 4";
  332 + (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *)
  333 + let result = conll_parse_sentence timeout test_only_flag new_paths tokens in
  334 + let result = {result with
  335 + file_prefix = file_prefix_of_mode mode ^ file_prefix;
  336 + paths=new_paths} in
  337 + CONLLSentence result)
288 338 | _ -> failwith "parse_sentence")
289 339 | QuotedSentences sentences ->
290 340 let sentences = Xlist.rev_map sentences (fun p ->
291   - let sentence = parse_sentence timeout test_only_flag mode tokens p.psentence in
  341 + let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in
292 342 {p with psentence=sentence}) in
293 343 QuotedSentences(List.rev sentences)
294 344 | AltSentence l ->
295 345 let l = Xlist.rev_map l (fun (mode,sentence) ->
296   - mode, parse_sentence timeout test_only_flag mode tokens sentence) in
  346 + mode, parse_sentence timeout test_only_flag mode file_prefix tokens sentence) in
297 347 AltSentence(List.rev l)
298 348 | _ -> failwith "parse_sentence"
299 349  
... ... @@ -301,7 +351,7 @@ let rec parse_paragraph timeout test_only_flag mode tokens = function
301 351 RawParagraph s -> RawParagraph s
302 352 | StructParagraph sentences ->
303 353 let sentences = Xlist.rev_map sentences (fun p ->
304   - let sentence = parse_sentence timeout test_only_flag mode tokens p.psentence in
  354 + let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in
305 355 {p with psentence=sentence}) in
306 356 StructParagraph(List.rev sentences)
307 357 | AltParagraph l ->
... ... @@ -318,30 +368,186 @@ let rec parse_text timeout test_only_flag mode = function
318 368 | AltText l -> AltText(Xlist.map l (fun (mode,text) ->
319 369 mode, parse_text timeout test_only_flag mode text))
320 370  
  371 +let select_mode = function
  372 + (Raw,_),_ -> failwith "select_mode"
  373 + | _,(Raw,_) -> failwith "select_mode"
  374 + | (Struct,_),_ -> failwith "select_mode"
  375 + | _,(Struct,_) -> failwith "select_mode"
  376 + | (CONLL,s),_ -> CONLL,s
  377 + | _,(CONLL,s) -> CONLL,s
  378 + | (ENIAM,s),_ -> ENIAM,s
  379 + | _,(ENIAM,s) -> ENIAM,s
  380 + | (Swigra,s),_ -> Swigra,s
  381 + | _,(Swigra,s) -> Swigra,s
  382 + | (Mate,s),_ -> Mate,s
  383 + | _,(Mate,s) -> Mate,s
  384 + | _ -> failwith "select_mode: ni"
  385 +
  386 +let rec select_sentences_sentence = function
  387 + RawSentence s -> failwith "select_sentences_sentence"
  388 + | StructSentence(paths,last) -> failwith "select_sentences_sentence"
  389 + | DepSentence paths -> failwith "select_sentences_sentence"
  390 + | QuotedSentences sentences ->
  391 + let sentences = Xlist.rev_map sentences (fun p ->
  392 + let sentence,_ = select_sentences_sentence p.psentence in
  393 + {p with psentence=sentence}) in
  394 + QuotedSentences(List.rev sentences), Parsed
  395 + | AltSentence l ->
  396 + let raw,selected = Xlist.fold l ([],[]) (fun (raw,selected) (mode,sentence) ->
  397 + if mode = Raw then (mode,sentence) :: raw, selected else
  398 + let sentence,status = select_sentences_sentence sentence in
  399 + if status <> Parsed && status <> NotTranslated then raw,selected else
  400 + match selected with
  401 + [] -> raw,[mode,sentence]
  402 + | [mode2,sentence2] -> raw,[select_mode ((mode,sentence),(mode2,sentence2))]
  403 + | _ -> failwith "select_sentences_sentence") in
  404 + AltSentence(raw @ selected), Parsed
  405 + | ENIAMSentence result -> ENIAMSentence result, result.status
  406 + | CONLLSentence result -> CONLLSentence result, result.status
  407 + | SemSentence result -> SemSentence result, result.status
  408 +
  409 +let rec select_sentences_paragraph = function
  410 + RawParagraph s -> RawParagraph s
  411 + | StructParagraph sentences ->
  412 + let sentences = Xlist.rev_map sentences (fun p ->
  413 + let sentence,_ = select_sentences_sentence p.psentence in
  414 + {p with psentence=sentence}) in
  415 + StructParagraph(List.rev sentences)
  416 + | AltParagraph l ->
  417 + let l = Xlist.rev_map l (fun (mode,paragraph) ->
  418 + mode, select_sentences_paragraph paragraph) in
  419 + AltParagraph(List.rev l)
  420 +
  421 +let rec select_sentences_text = function
  422 + RawText s -> RawText s
  423 + | StructText(paragraphs,tokens) ->
  424 + let paragraphs = Xlist.rev_map paragraphs (fun paragraph ->
  425 + select_sentences_paragraph paragraph) in
  426 + StructText(List.rev paragraphs, tokens)
  427 + | AltText l -> AltText(Xlist.map l (fun (mode,text) ->
  428 + mode, select_sentences_text text))
  429 +
  430 +let semantic_processing timeout test_only_flag file_prefix tokens max_n dependency_tree =
  431 + let time5 = time_fun () in
  432 + let result = {empty_semantic_processing_result with file_prefix=file_prefix} in
  433 + try
  434 + let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses tokens dependency_tree in
  435 + let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in
  436 + let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in
  437 + let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in
  438 + let (*sem*)disamb = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in
  439 + let (*disamb*)sem = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node2 (*dependency_tree2*)disamb in
  440 + let result = if test_only_flag then result else {result with disamb=disamb} in
  441 + let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in
  442 + let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in
  443 + let result = if test_only_flag then result else {result with sem=sem} in
  444 + let sem2 = SemGraph.translate tokens (*disamb*)sem in
  445 + let result = if test_only_flag then result else {result with sem2=sem2} in
  446 + let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in
  447 + let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in
  448 +(* let sem3(*disamb*) = SemGraph.manage_quantification(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in *)
  449 + let sem3(*disamb*) = SemGraph.simplify_gender(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in
  450 +(* if Array.length disamb < 10000 then print_xml_dependency_tree "results/trees/" (id ^ "dis") disamb; *)
  451 + let result = if test_only_flag then result else {result with sem3=sem3} in
  452 + let time6 = time_fun () in
  453 + if SemGraph.validate_semantics sem3 then
  454 + let trees = SemGraph.draw_trees max_n sem3 in
  455 + let trees2 = Xlist.map trees SemMrl.variable_alpha_convertion in
  456 + let mrls = Xlist.map trees2 SemMrl.make_mrl in
  457 + let mrls = Xlist.map mrls SemMrl.move_requirements in
  458 + let mrss = Xlist.map mrls SemMrl.make_mrs_of_mrl in
  459 + let mrss = Xlist.map mrss SemMrl.mrs_handle_alpha_convertion in
  460 + let fols = Xlist.map mrss (fun mrs ->
  461 + let l = SemMrl.foll_of_mrs_greedy mrs in
  462 + if l = [] then failwith "empty fol" else
  463 + List.hd l) in
  464 + let result = if test_only_flag then result else {result with trees=trees; mrls=fols(*mrls*)} in
  465 + {result with status=Parsed; sem_time=time6 -. time5}
  466 + else {result with status=NotTranslated; sem_time=time6 -. time5}
  467 + with e ->
  468 + let time6 = time_fun () in
  469 + {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5}
  470 +
  471 +
  472 +let rec semantic_processing_sentence timeout test_only_flag tokens max_n = function
  473 + RawSentence s -> RawSentence s
  474 + | ENIAMSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree)
  475 + | CONLLSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree)
  476 + | QuotedSentences sentences ->
  477 + let sentences = Xlist.rev_map sentences (fun p ->
  478 + let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in
  479 + {p with psentence=sentence}) in
  480 + QuotedSentences(List.rev sentences)
  481 + | AltSentence l ->
  482 + let l = Xlist.rev_map l (fun (mode,sentence) ->
  483 + mode, semantic_processing_sentence timeout test_only_flag tokens max_n sentence) in
  484 + AltSentence(List.rev l)
  485 + | _ -> failwith "semantic_processing_sentence"
  486 +
  487 +let rec semantic_processing_paragraph timeout test_only_flag tokens max_n = function
  488 + RawParagraph s -> RawParagraph s
  489 + | StructParagraph sentences ->
  490 + let sentences = Xlist.rev_map sentences (fun p ->
  491 + let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in
  492 + {p with psentence=sentence}) in
  493 + StructParagraph(List.rev sentences)
  494 + | AltParagraph l ->
  495 + let l = Xlist.rev_map l (fun (mode,paragraph) ->
  496 + mode, semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in
  497 + AltParagraph(List.rev l)
  498 +
  499 +let rec semantic_processing_text timeout test_only_flag max_n = function
  500 + RawText s -> RawText s
  501 + | StructText(paragraphs,tokens) ->
  502 + let paragraphs = Xlist.rev_map paragraphs (fun paragraph ->
  503 + semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in
  504 + StructText(List.rev paragraphs, tokens)
  505 + | AltText l -> AltText(Xlist.map l (fun (mode,text) ->
  506 + mode, semantic_processing_text timeout test_only_flag max_n text))
  507 +
321 508 let rec extract_query_text = function
322 509 RawText s -> s
323 510 | AltText l -> (try extract_query_text (Xlist.assoc l Raw) with Not_found -> failwith "extract_query_text")
324 511 | _ -> failwith "extract_query_text"
325 512  
326 513 let process_query pre_in pre_out timeout test_only_flag id full_query max_n =
327   - print_endline "process_query 0";
  514 + (* print_endline "process_query 0"; *)
328 515 let result = {empty_result with input_text=translate_text full_query} in
329 516 let time1 = time_fun () in
330   - print_endline "process_query 1";
  517 + (* print_endline "process_query 1"; *)
331 518 Marshal.to_channel pre_out full_query [];
332 519 flush pre_out;
333   - print_endline "process_query 2";
  520 + (* print_endline "process_query 2"; *)
334 521 let pre_text,msg,pre_time1 = (Marshal.from_channel pre_in : PreTypes.text * string * float) in
335 522 let time2 = time_fun () in
336 523 let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in
337 524 let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in
338 525 if msg <> "" then {result with status=PreprocessingError; msg=msg} else (
339   - print_endline "process_query 3";
  526 + (* print_endline "process_query 3"; *)
340 527 let parsed_text = parse_text timeout test_only_flag Struct (translate_text pre_text) in
341   - print_endline "process_query 4";
  528 + (* print_endline "process_query 4"; *)
342 529 let time3 = time_fun () in
343 530 let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in
344 531 let result = {result with parse_time=time3 -. time2} in
  532 + (* print_endline "process_query 5"; *)
  533 + let selected_sent_text =
  534 + if not Paths.config.Paths.sentence_selection_enabled then parsed_text
  535 + else select_sentences_text parsed_text in
  536 + (* print_endline "process_query 6"; *)
  537 + let result = if test_only_flag then result else {result with status=Parsed; selected_sent_text=selected_sent_text} in
  538 + let semantic_text = semantic_processing_text timeout test_only_flag max_n selected_sent_text in
  539 + (* print_endline "process_query 7"; *)
  540 + let selected_semantic_text =
  541 + if not Paths.config.Paths.sentence_selection_enabled then semantic_text
  542 + else select_sentences_text semantic_text in
  543 + (* print_endline "process_query 8"; *)
  544 + let time4 = time_fun () in
  545 + let result =
  546 + if test_only_flag then result
  547 + else {result with status=Parsed;
  548 + semantic_text=semantic_text;
  549 + selected_semantic_text=selected_semantic_text} in
  550 + let result = {result with semantic_time=time4 -. time3} in
345 551 result)
346 552 (** (* let text,msg,pre_time1 = PreProcessing.mail_loop2 query in *)
347 553 (* let text = PreTypes.RawText "" in
... ...
parser/execTypes.ml
... ... @@ -20,7 +20,7 @@
20 20 type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated
21 21  
22 22 type eniam_parse_result = {
23   - id: string;
  23 + file_prefix: string;
24 24 status: status;
25 25 msg: string;
26 26 lex_time: float;
... ... @@ -35,7 +35,7 @@ type eniam_parse_result = {
35 35 }
36 36  
37 37 type conll_parse_result = {
38   - id: string;
  38 + file_prefix: string;
39 39 status: status;
40 40 msg: string;
41 41 lex_time: float;
... ... @@ -44,6 +44,7 @@ type conll_parse_result = {
44 44 sem_time: float;
45 45 paths_size: int;
46 46 dependency_tree_size: int;
  47 + paths: (int * int * string) array;
47 48 dep_chart: LCGtypes.dep_tree;
48 49 parsed_dep_chart: (LCGtypes.SymbolMap.key * LCGtypes.linear_term) list;
49 50 not_parsed_dep_chart: int *
... ... @@ -53,22 +54,36 @@ type conll_parse_result = {
53 54 dependency_tree: LCGtypes.linear_term array;
54 55 }
55 56  
  57 +type semantic_processing_result = {
  58 + file_prefix: string;
  59 + status: status;
  60 + msg: string;
  61 + sem_time: float;
  62 + disamb: LCGtypes.linear_term array;
  63 + sem: LCGtypes.linear_term array;
  64 + sem2: LCGtypes.linear_term array;
  65 + sem3: LCGtypes.linear_term;
  66 + trees: LCGtypes.linear_term list;
  67 + mrls: SemTypes.mrl_formula list;
  68 + }
  69 +
56 70 type mode =
57   - Raw | Struct | CONLL | ENIAM | Mate
  71 + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE
58 72  
59 73 type sentence =
60 74 RawSentence of string
61 75 (* | CONLL of conll list *)
62   - | StructSentence of string * (int * int * int) list * int (* file_prefix * (id * lnode * rnode) list * last *)
63   - | DepSentence of string * (int * int * string) array (* file_prefix * (id * super * label) conll_id *)
  76 + | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *)
  77 + | DepSentence of (int * int * string) array (* (id * super * label) conll_id *)
64 78 | QuotedSentences of paragraph_record list
65 79 (* | NKJP1M of nkjp1m list *)
66 80 (* | Skladnica of skladnica_tree *)
67 81 | AltSentence of (mode * sentence) list (* string = etykieta np raw, nkjp, krzaki *)
68 82 | ENIAMSentence of eniam_parse_result
69 83 | CONLLSentence of conll_parse_result
  84 + | SemSentence of semantic_processing_result
70 85  
71   -and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *)
  86 +and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *)
72 87  
73 88 and paragraph =
74 89 RawParagraph of string
... ... @@ -88,19 +103,12 @@ type result = {
88 103 pre_time2: float;
89 104 status: status;
90 105 msg: string;
91   - (* lex_time: float; *)
92 106 parse_time: float;
93 107 parsed_text: text;
94   - (* reduction_time: float;
95   - sem_time: float;
96   - paths_size: int;
97   - disamb: LCGtypes.linear_term array;
98   - sem: LCGtypes.linear_term array;
99   - sem2: LCGtypes.linear_term array;
100   - sem3: LCGtypes.linear_term;
101   - trees: LCGtypes.linear_term list;
102   - mrls: SemTypes.mrl_formula list;
103   - paths: PreTypes.token_record array; *)
  108 + semantic_time: float;
  109 + selected_sent_text: text;
  110 + semantic_text: text;
  111 + selected_semantic_text: text;
104 112 }
105 113  
106 114 type sum_result = {
... ...
parser/index.html
... ... @@ -8,11 +8,12 @@
8 8  
9 9 <body>
10 10 <center>
11   - <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>
12   - <h3>Podaj tekst:</h3>
13   - <form method=POST action="cgi-bin/parser.cgi">
14   - <p><input type="text" name="text0" value="" size="40"></p>
15   - <p><input type="submit" value="Analizuj" size="60"></p>
  11 + <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>
  12 + <h3>Podaj tekst:</h3>
  13 + <form method=POST action="cgi-bin/parser2.cgi">
  14 + <p><textarea name="text0" cols="100" rows="10"></textarea></p>
  15 + <!--<p><input type="text" name="text0" value="" size="100"></p> -->
  16 + <p><input type="submit" value="Analizuj" size="60"></p>
16 17 </form>
17 18 <BR><BR>
18 19 <hr align="center" size="2" width="800" />
... ...
parser/makefile
... ... @@ -16,9 +16,9 @@ SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
16 16 EXEC= execTypes.ml visualization.ml exec.ml
17 17  
18 18 all:
19   - $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
20   -# $(OCAMLOPT) -o server $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
21   -# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
  19 +# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
  20 + $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
  21 + $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
22 22 # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
23 23 # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
24 24 # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
... ... @@ -30,6 +30,10 @@ all:
30 30 # of_xml:
31 31 # $(OCAMLOPT) -o of_xml $(OCAMLOPTFLAGS) LCGofXml.ml
32 32  
  33 +swigra_test: swigra_test.ml
  34 + $(OCAMLOPT) -o swigra_test $(OCAMLOPTFLAGS) swigra_test.ml
  35 +
  36 +
33 37 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
34 38  
35 39 .mll.ml:
... ... @@ -51,4 +55,4 @@ all:
51 55 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
52 56  
53 57 clean:
54   - rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker
  58 + rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker server2 parser2.cgi
... ...
parser/pipe.ml
... ... @@ -50,6 +50,10 @@ let lcg_process query =
50 50 Visualization.print_html_text path "input_text" result.input_text;
51 51 Visualization.print_html_text path "pre_text" result.pre_text;
52 52 Visualization.print_html_text path "parsed_text" result.parsed_text;
  53 + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text;
  54 + Visualization.print_html_text path "semantic_text" result.semantic_text;
  55 + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text;
  56 + Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text;
53 57 Exec.print_result stdout result;
54 58 (*Visualization.print_paths "results/" "paths" result.paths;
55 59 Visualization.print_paths_latex "paths" result.paths;
... ... @@ -118,9 +122,9 @@ let lcg_process query =
118 122 let _ = Unix.shutdown_connection ic in
119 123 ()
120 124  
121   -(* let _ =
  125 +let _ =
122 126 if Array.length Sys.argv < 2 then print_endline "missing argument" else
123   - lcg_process Sys.argv.(1) *)
  127 + lcg_process Sys.argv.(1)
124 128  
125 129  
126 130 (* FIXME: parser dziwnie się zachowuje dla 'ścieżki anomalia.' 'ścieżki anomalia. GG' itp. - nie parsuje '.' a jak sparsuje to nie chce redukować *)
... ... @@ -210,24 +214,20 @@ let process_conll_corpus filename =
210 214 let _ =
211 215 (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *)
212 216 (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
213   - process_conll_corpus "../testy/skladnica-test1.conll";
  217 + (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
214 218 ()
215 219  
216 220 (* TO DO:
217 221 - współbieżne uruchamianie parserów
218   - - wybór tego które włączać (w pre i w parserze)
219 222 - nkjp jako źródło danych
220 223 - concraft, wcrft2
221 224 - dopasowanie do siebie tokenów w różnych wersjach tokenizacji
222   - - dopasowanie do siebie akapitów i zdań
223 225 2016.10.15
224 226 - nadmiar węzłów pro po parsowaniu
225 227 2016.10.16
226   - - zgubione węzły przy przetwarzaniu dep
227   - - assign_not_parsed
228 228 - sprawdzenie zerowania globalnych referencji przy parsowaniu korpusu
229   - - mateParser
230   - 2016.10.19
  229 + 2016.10.22
  230 + - instalacja Świgry
231 231 *)
232 232  
233 233  
... ...
parser/semGraph.ml
... ... @@ -21,11 +21,11 @@ open LCGtypes
21 21 open Xstd
22 22 open Printf
23 23  
24   -let pro_id_counter = ref 100000 (* FIXME: to trzeba usunąć !!! *)
  24 +(*let pro_id_counter = ref 100000 (* FIXME: to trzeba usunąć !!! *)
25 25  
26 26 let get_pro_id () =
27 27 incr pro_id_counter;
28   - !pro_id_counter
  28 + !pro_id_counter*)
29 29  
30 30 let empty_concept =
31 31 {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list;
... ... @@ -61,7 +61,7 @@ let rec get_person = function
61 61 | _ :: l -> get_person l
62 62 | [] -> ""
63 63  
64   -let rec create_normal_concept (*roles role_attrs*) paths t sem_args =
  64 +let rec create_normal_concept (*roles role_attrs*) tokens t sem_args =
65 65 let sem_args = if t.cat = "pro" then
66 66 match get_person t.attrs with
67 67 "pri" -> ["indexical"]
... ... @@ -73,10 +73,10 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args =
73 73 if t.agf = WalTypes.NOSEM then t.args else
74 74 let c = {empty_concept with
75 75 c_sense = Val t.meaning;
76   - c_relations=(*create_concepts paths*) t.args;
  76 + c_relations=(*create_concepts tokens*) t.args;
77 77 c_quant=make_sem_args sem_args;
78 78 c_variable=string_of_int t.id,"";
79   - c_pos=(*if t.id >= Array.length paths then -1 else*) paths.(t.id).PreTypes.beg;
  79 + c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).PreTypes.beg;
80 80 c_local_quant=true} in
81 81 if t.cat = "subst" || t.cat = "depr" || t.cat = "ger" || t.cat = "unk" || StringSet.mem symbols t.cat then (* FIXME: wykrywanie plurale tantum *)
82 82 let c = {c with c_local_quant=false} in
... ... @@ -103,7 +103,8 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args =
103 103 | e,t -> failwith ("create_normal_concept noun: " ^ e)) in
104 104 let c = if t.cat = "depr" then {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "depr")]} else c in
105 105 if cx_flag then
106   - Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int (get_pro_id ()),""; cx_pos=c.c_pos}
  106 + let id = ExtArray.add tokens PreTypes.empty_token in
  107 + Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos}
107 108 else Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else
108 109 if t.cat = "fin" || t.cat = "bedzie" || t.cat = "praet" || t.cat = "winien" || t.cat = "impt" || t.cat = "imps" || t.cat = "pred" || t.pred = "pro-komunikować" then
109 110 let c = {c with c_local_quant=false} in
... ... @@ -120,7 +121,9 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args =
120 121 | "MOOD",Val "imperative" -> {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "impt")]} (* FIXME *)
121 122 | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]}
122 123 | e,t -> failwith ("create_normal_concept verb: " ^ e)) in
123   - let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int (get_pro_id ()),""; cx_pos=c.c_pos} in
  124 + let c = if t.pred = "pro-komunikować" then {c with c_relations=Relation(Val "Theme",Val "",c.c_relations)} else c in (* FIXME: to by trzeba przesunąć na wcześniej *)
  125 + let id = ExtArray.add tokens PreTypes.empty_token in
  126 + let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in
124 127 if t.position.WalTypes.role <> "" || t.position.WalTypes.role_attr <> "" then failwith "create_normal_concept: verb" else
125 128 (* Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else *)
126 129 Context cx else
... ... @@ -131,7 +134,8 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args =
131 134 | "TENSE",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]}
132 135 | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]}
133 136 | e,t -> failwith ("create_normal_concept verb: " ^ e)) in
134   - let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int (get_pro_id ()),""; cx_pos=c.c_pos} in
  137 + let id = ExtArray.add tokens PreTypes.empty_token in
  138 + let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in
135 139 Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else
136 140 if t.cat = "adj" || t.cat = "adjc" || t.cat = "adjp" || t.cat = "adja" || t.cat = "pact" || t.cat = "ppas" || t.cat = "ordnum" || t.cat = "roman-adj" then
137 141 let c = if t.cat = "pact" || t.cat = "ppas" then {c with c_local_quant=false} else c in
... ... @@ -249,42 +253,45 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args =
249 253 if t.cat = "interp" && t.pred = "?" then SingleRelation(Val "int") else
250 254 if t.cat = "interp" && t.pred = "„" then
251 255 Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,RemoveRelation t.args) else
  256 + if t.cat = "interp" || t.pred = "</or-sentence>" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else (
252 257 if t.cat = "interp" then Node t else
253 258 if t.cat = "" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else
254   - Node t
  259 + (* print_endline t.pred; *)
  260 + Node t)
255 261  
256   -and create_concepts paths = function
  262 +and create_concepts tokens = function
257 263 Node t ->
  264 + (* print_endline ("cc " ^ t.pred); *)
258 265 (* let agf = t.agf in *)
259 266 let attrs = remove_unimportant_attrs t.attrs in
260 267 (* let attrs,roles,role_attrs = get_roles attrs in *)
261 268 let t = {t with attrs=attrs} in
262   - (match paths.(t.id).PreTypes.semantics with
  269 + (match (ExtArray.get tokens t.id).PreTypes.semantics with
263 270 PreTypes.Normal ->
264   - let t = create_normal_concept paths t [] in
  271 + let t = create_normal_concept tokens t [] in
265 272 (* if agf = WalTypes.CORE then Core t else *) t
266 273 | PreTypes.PrepSemantics _ ->
267   - let t = create_normal_concept paths t [] in
  274 + let t = create_normal_concept tokens t [] in
268 275 (* if agf = WalTypes.CORE then Core t else *) t
269 276 | PreTypes.Special l ->
270   - let t = create_normal_concept paths t l in
  277 + let t = create_normal_concept tokens t l in
271 278 (* if agf = WalTypes.CORE then Core t else *) t
272 279 (* | PreTypes.SpecialNoun(lemma,_) ->
273   - let t = create_normal_concept paths t in*)
  280 + let t = create_normal_concept tokens t in*)
274 281 (* if agf = WalTypes.CORE then Core t else t*)
275 282 (*| _ -> failwith "create_concepts: ni"*))
276   - | Tuple l -> Tuple(Xlist.map l (create_concepts paths))
277   - | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts paths t))
  283 + | Tuple l -> Tuple(Xlist.map l (create_concepts tokens))
  284 + | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens t))
278 285 | Dot -> Dot
279 286 | Ref i -> Ref i
280   - | Choice choices -> Choice(StringMap.map choices (create_concepts paths))
  287 + | Choice choices -> Choice(StringMap.map choices (create_concepts tokens))
281 288 | t -> failwith ("create_concepts: " ^ LCGstringOf.linear_term 0 t)
282 289  
283 290  
284   -let translate paths term =
  291 +let translate tokens term =
285 292 let sem = Array.copy term in
286 293 Int.iter 0 (Array.length sem - 1) (fun i ->
287   - sem.(i) <- create_concepts paths sem.(i));
  294 + sem.(i) <- create_concepts tokens sem.(i));
288 295 sem
289 296  
290 297 (***************************************************************************************)
... ...
parser/server.ml
... ... @@ -16,7 +16,7 @@
16 16 * You should have received a copy of the GNU General Public License
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19   -
  19 +
20 20 open ExecTypes
21 21  
22 22 let logfile = open_out_gen [Open_wronly; Open_append; Open_creat] ((6*8+4)*8+4) "results/queries.log"
... ... @@ -26,35 +26,66 @@ let get_sock_addr host_name port =
26 26 let addr = he.Unix.h_addr_list in
27 27 Unix.ADDR_INET(addr.(0),port)
28 28  
  29 +let rec clean_result_sentence = function
  30 + SemSentence result -> SemSentence {result with disamb=[| |]; sem=[| |]; sem2=[| |]; sem3=LCGtypes.Dot}
  31 + | AltSentence l ->
  32 + let l = Xlist.rev_map l (fun (mode,sentence) ->
  33 + mode, clean_result_sentence sentence) in
  34 + AltSentence(List.rev l)
  35 + | t -> t
  36 +
  37 +let rec clean_result_paragraph = function
  38 + RawParagraph s -> RawParagraph s
  39 + | StructParagraph sentences ->
  40 + let sentences = Xlist.rev_map sentences (fun p ->
  41 + let sentence = clean_result_sentence p.psentence in
  42 + {p with psentence=sentence}) in
  43 + StructParagraph(List.rev sentences)
  44 + | AltParagraph l ->
  45 + let l = Xlist.rev_map l (fun (mode,paragraph) ->
  46 + mode, clean_result_paragraph paragraph) in
  47 + AltParagraph(List.rev l)
  48 +
  49 +let rec clean_result_text = function
  50 + RawText s -> RawText s
  51 + | StructText(paragraphs,tokens) ->
  52 + let paragraphs = Xlist.rev_map paragraphs (fun paragraph ->
  53 + clean_result_paragraph paragraph) in
  54 + StructText(List.rev paragraphs, tokens)
  55 + | AltText l -> AltText(Xlist.map l (fun (mode,text) ->
  56 + mode, clean_result_text text))
  57 +
29 58 let parse query =
30 59 let max_n = 10 in
31 60 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
32   - let result = Exec.process_query ic oc 100. false "x" query max_n in
  61 + let result = Exec.process_query ic oc 30. false "x" (PreTypes.RawText query) max_n in
33 62 Printf.fprintf oc "\n%!";
34 63 let _ = Unix.shutdown_connection ic in
35   - {result with
36   - graph=[| |];
37   - term=[| |];
38   - disamb=[| |];
39   - sem=[| |];
40   - sem2=[| |];
41   - sem3=LCGtypes.Dot;
42   - paths=[| |]}
  64 + {result with
  65 + pre_text = RawText "";
  66 + parsed_text = RawText "";
  67 + selected_sent_text = RawText "";
  68 + semantic_text = RawText "";
  69 + selected_semantic_text = clean_result_text result.selected_semantic_text}
43 70  
44 71 let rec main_loop in_chan out_chan =
45   - let query = input_line in_chan in
46   - if query = "" then Marshal.to_channel out_chan {Exec.empty_result with msg="Empty query"} [] else
47   - (try
  72 + (* let query = input_line in_chan in *)
  73 + let query = (Marshal.from_channel in_chan : PreTypes.text) in
  74 + let query = match query with PreTypes.RawText q -> q | _ -> failwith "bad query format" in
  75 + (* Printf.fprintf logfile "raw query: '%s'\n" query; *)
  76 + if query = "" then Marshal.to_channel out_chan {Exec.empty_result with msg="Empty query"} [] else
  77 + (try
48 78 let result : ExecTypes.result = parse query in
49 79 Exec.print_result logfile result;
50 80 Marshal.to_channel out_chan result []
51   - with e ->
  81 + with e ->
52 82 Printf.fprintf logfile "query: %s\nerror_other: %s\n%!" query (Printexc.to_string e);
53 83 Marshal.to_channel out_chan {Exec.empty_result with msg=Printexc.to_string e} []);
54   - flush out_chan
  84 + (* flush out_chan *)
  85 + ()
55 86  
56 87  
57 88 let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,Paths.server_port)
58 89  
59   -let _ =
60   - Unix.establish_server main_loop sockaddr
  90 +let _ =
  91 + Unix.establish_server main_loop sockaddr
... ...
parser/swigra_test.ml 0 → 100644
  1 +
  2 +let swigra_in, swigra_out = Unix.open_process "../swigra/parser/run.sh"
  3 +
  4 +let _ =
  5 + Printf.fprintf swigra_out "Ala ma kota\n%!";
  6 + print_endline (input_line swigra_in);
  7 + (* powyższą linię należy zastąpić przez:
  8 + let xml = Xml.parse_in swigra_in in
  9 + print_endline (Xml.to_string_fmt xml; *)
  10 + Printf.fprintf swigra_out "Ela ma kota\n%!";
  11 + print_endline (input_line swigra_in);
  12 + (* powyższą linię należy zastąpić przez:
  13 + let xml = Xml.parse_in swigra_in in
  14 + print_endline (Xml.to_string_fmt xml; *)
  15 + ()
... ...
parser/visualization.ml
... ... @@ -44,6 +44,23 @@ let rec string_of_token = function
44 44 | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token))
45 45 | PreTypes.Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int))
46 46  
  47 +let lemma_of_token = function
  48 + PreTypes.SmallLetter orth -> orth
  49 + | PreTypes.CapLetter(orth,lc) -> orth
  50 + | PreTypes.AllSmall orth -> orth
  51 + | PreTypes.AllCap(orth,lc,lc2) -> orth
  52 + | PreTypes.FirstCap(orth,lc,cl,ll) -> orth
  53 + | PreTypes.SomeCap orth -> orth
  54 + | PreTypes.RomanDig(v,t) -> v
  55 + | PreTypes.Interp orth -> orth
  56 + | PreTypes.Symbol orth -> orth
  57 + | PreTypes.Dig(v,t) -> v
  58 + | PreTypes.Other2 orth -> orth
  59 + | PreTypes.Lemma(lemma,cat,interps) -> lemma
  60 + | PreTypes.Proper(lemma,cat,interps,senses) -> lemma
  61 + | PreTypes.Compound(sense,l) -> "Compound"
  62 + | PreTypes.Tokens(cat,l) -> "Tokens"
  63 +
47 64 let rec spaces i =
48 65 if i = 0 then "" else " " ^ spaces (i-1)
49 66  
... ... @@ -354,7 +371,7 @@ let rec print_graph2_rec file edge_rev edge_label edge_head upper = function
354 371 incr id_counter;
355 372 fprintf file " %d [shape=box,label=<%s%s %s>]\n" id
356 373 (string_of_quant t.c_quant)
357   - (LCGstringOf.linear_term 0 t.c_sense)
  374 + (escape_string (LCGstringOf.linear_term 0 t.c_sense))
358 375 (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *)
359 376 print_edge2 file edge_rev edge_label edge_head "" upper id;
360 377 print_graph2_rec file false "" "" id t.c_relations
... ... @@ -584,11 +601,13 @@ let page_header path =
584 601 <center>
585 602 <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>
586 603 <h3>Podaj tekst:</h3>
587   - <form method=POST action=\"" ^ path ^ "parser.cgi\">
588   - <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p>
  604 + <form method=POST action=\"" ^ path ^ "parser2.cgi\">
  605 + <p><input type=\"text\" name=\"text0\" value=\"\" size=\"100\"></p>
589 606 <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p>
590 607 </form>"
591 608  
  609 +(*<textarea name="text0" cols="100" rows="10"></textarea>*)
  610 +
592 611 let page_trailer =
593 612 "<BR><BR>
594 613 <hr align=\"center\" size=\"2\" width=\"800\" />
... ... @@ -640,6 +659,8 @@ let string_of_mode = function
640 659 | CONLL -> "CONLL"
641 660 | ENIAM -> "ENIAM"
642 661 | Mate -> "Mate"
  662 + | Swigra -> "Swigra"
  663 + | POLFIE -> "POLFIE"
643 664  
644 665 (*let rec string_of_sentence = function
645 666 RawSentence s -> sprintf "RawSentence(%s)" s
... ... @@ -716,6 +737,24 @@ let html_of_tokens tokens =
716 737 (String.concat "; " t.PreTypes.attrs)) :: l))) ^
717 738 "</table>"
718 739  
  740 +let html_of_tokens_simple_valence tokens =
  741 + "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^
  742 + String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id ->
  743 + let t = ExtArray.get tokens id in
  744 + Xlist.fold t.simple_valence l (fun l (fnum,frame) ->
  745 + (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>"
  746 + id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^
  747 + "</table>"
  748 +
  749 +let html_of_tokens_valence tokens =
  750 + "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^
  751 + String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id ->
  752 + let t = ExtArray.get tokens id in
  753 + Xlist.fold t.valence l (fun l (fnum,frame) ->
  754 + (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>"
  755 + id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^
  756 + "</table>"
  757 +
719 758 let create_latex_chart path name chart =
720 759 LatexMain.latex_file_out path name "a1" false (fun file ->
721 760 Printf.fprintf file "%s\n" (LCGlatexOf.chart chart));
... ... @@ -742,30 +781,30 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) =
742 781 (* | PreprocessingError -> "error_pre: %s\n" result.msg *)
743 782 | LexiconError -> sprintf "error_lex: %s\n" result.msg
744 783 | ParseError ->
745   - create_latex_chart path (result.id ^ "_chart") result.chart;
  784 + create_latex_chart path (result.file_prefix ^ "_chart") result.chart;
746 785 sprintf "error_parse: %s\n" result.msg ^
747   - sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id
  786 + sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix
748 787 | ParseTimeout ->
749   - create_latex_chart path (result.id ^ "_chart") result.chart;
  788 + create_latex_chart path (result.file_prefix ^ "_chart") result.chart;
750 789 sprintf "timeout: %s\n" result.msg ^
751   - sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id
  790 + sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix
752 791 | NotParsed ->
753   - create_latex_chart path (result.id ^ "_chart") result.chart;
  792 + create_latex_chart path (result.file_prefix ^ "_chart") result.chart;
754 793 sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
755   - sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id
  794 + sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix
756 795 | ReductionError -> sprintf "error_reduction: %s\n" result.msg
757 796 | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size
758 797 | NotReduced -> sprintf "not_reduced: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size
759 798 | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
760 799 (* | NotTranslated -> "not_translated: \n" *)
761 800 | Parsed ->
762   - print_simplified_dependency_tree path (result.id ^ "_simplified_dependency_tree") tokens result.dependency_tree;
763   - print_dependency_tree path (result.id ^ "_dependency_tree") result.dependency_tree;
764   - LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree;
  801 + print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
  802 + print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
  803 + (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
765 804 sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^
766   - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.id ^
767   - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.id ^
768   - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id
  805 + sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
  806 + sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix (* ^
  807 + sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
769 808 | _ -> failwith "html_of_eniam_sentence"
770 809  
771 810 let html_of_conll_sentence path tokens (result : conll_parse_result) =
... ... @@ -774,48 +813,74 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) =
774 813 (* | PreprocessingError -> "error_pre: %s\n" result.msg *)
775 814 | LexiconError -> sprintf "error_lex: %s\n" result.msg
776 815 | ParseError ->
777   - create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart;
778   - create_latex_parsed_dep_chart path (result.id ^ "_parsed_dep_chart") result.parsed_dep_chart;
  816 + create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart;
  817 + create_latex_parsed_dep_chart path (result.file_prefix ^ "_parsed_dep_chart") result.parsed_dep_chart;
779 818 sprintf "error_parse: %s\n" result.msg ^
780   - sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^
781   - sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.id
  819 + sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^
  820 + sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.file_prefix
782 821 | ParseTimeout ->
783   - create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart;
784   - create_latex_parsed_dep_chart path (result.id ^ "_parsed_dep_chart") result.parsed_dep_chart;
  822 + create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart;
  823 + create_latex_parsed_dep_chart path (result.file_prefix ^ "_parsed_dep_chart") result.parsed_dep_chart;
785 824 sprintf "timeout: %s\n" result.msg ^
786   - sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^
787   - sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.id
  825 + sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^
  826 + sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.file_prefix
788 827 | NotParsed ->
789   - create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart;
790   - create_latex_not_parsed_dep_chart path (result.id ^ "_not_parsed_dep_chart") result.not_parsed_dep_chart;
  828 + create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart;
  829 + create_latex_not_parsed_dep_chart path (result.file_prefix ^ "_not_parsed_dep_chart") result.not_parsed_dep_chart;
791 830 sprintf "not_parsed\n" ^
792   - sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^
793   - sprintf "<BR><A HREF=\"%s_not_parsed_dep_chart.pdf\">Not Parsed Chart</A>\n" result.id
  831 + html_of_dep_sentence tokens result.paths ^
  832 + sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^
  833 + sprintf "<BR><A HREF=\"%s_not_parsed_dep_chart.pdf\">Not Parsed Chart</A>\n" result.file_prefix
794 834 | ReductionError -> sprintf "error_reduction: %s\n" result.msg
795 835 | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d\n" result.paths_size
796 836 | NotReduced ->
797   - LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree;
  837 + LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
798 838 sprintf "not_reduced: paths_size=%d\n" result.paths_size ^
799   - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id
  839 + sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
800 840 | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
801 841 (* | NotTranslated -> "not_translated: \n" *)
802 842 | Parsed ->
803   - print_simplified_dependency_tree path (result.id ^ "_simplified_dependency_tree") tokens result.dependency_tree;
804   - print_dependency_tree path (result.id ^ "_dependency_tree") result.dependency_tree;
805   - LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree;
  843 + print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
  844 + print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
  845 + LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
806 846 sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^
807   - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.id ^
808   - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.id ^
809   - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id
  847 + sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
  848 + sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
  849 + sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
810 850 | _ -> failwith "html_of_conll_sentence"
811 851  
  852 +let html_of_sem_sentence path tokens (result : semantic_processing_result) =
  853 + match result.status with
  854 + Idle -> "idle\n"
  855 + | SemError -> sprintf "error_sem: %s\n" result.msg
  856 + (* print_dependency_tree path (result.file_prefix ^ "_disamb") result.disamb;
  857 + print_dependency_tree path (result.file_prefix ^ "_sem") result.sem;
  858 + print_dependency_tree path (result.file_prefix ^ "_sem2") result.sem2;
  859 + print_graph2 "results/" "sem3" query result.sem3; *)
  860 + | NotTranslated ->
  861 + (* print_dependency_tree path (result.file_prefix ^ "_disamb") result.disamb;
  862 + print_dependency_tree path (result.file_prefix ^ "_sem") result.sem;
  863 + print_dependency_tree path (result.file_prefix ^ "_sem2") result.sem2; *)
  864 + print_graph2 path (result.file_prefix ^ "_sem3") "" result.sem3;
  865 + sprintf "not_translated \n" ^
  866 + (* sprintf "<BR><A HREF=\"%s_disamb.png\">Disambiguated Dependency Tree</A>\n" result.file_prefix ^
  867 + sprintf "<BR><A HREF=\"%s_sem.png\">Semantic Graph 1</A>\n" result.file_prefix ^
  868 + sprintf "<BR><A HREF=\"%s_sem2.png\">Semantic Graph 2</A>\n" result.file_prefix ^ *)
  869 + sprintf "<BR><A HREF=\"%s_sem3.png\">Semantic Graph</A>\n" result.file_prefix
  870 + | Parsed ->
  871 + print_graph2 path (result.file_prefix ^ "_sem3") "" result.sem3;
  872 + sprintf "parsed \n" ^
  873 + sprintf "<BR><A HREF=\"%s_sem3.png\">Semantic Graph</A>\n" result.file_prefix
  874 + | _ -> failwith "html_of_sem_sentence"
  875 +
812 876  
813 877 let rec html_of_sentence path tokens = function
814 878 RawSentence s -> s
815   - | StructSentence(_,paths,last) -> html_of_struct_sentence tokens paths last
816   - | DepSentence(_,paths) -> html_of_dep_sentence tokens paths
  879 + | StructSentence(paths,last) -> html_of_struct_sentence tokens paths last
  880 + | DepSentence paths -> html_of_dep_sentence tokens paths
817 881 | ENIAMSentence result -> html_of_eniam_sentence path tokens result
818 882 | CONLLSentence result -> html_of_conll_sentence path tokens result
  883 + | SemSentence result -> html_of_sem_sentence path tokens result
819 884 | QuotedSentences sentences ->
820 885 String.concat "<BR>\n" (Xlist.map sentences (fun p ->
821 886 sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence)))
... ... @@ -840,9 +905,9 @@ let rec html_of_paragraph path tokens = function
840 905 let rec html_of_text path = function
841 906 RawText s -> s
842 907 | StructText(paragraphs,tokens) ->
843   - sprintf "%s<BR>\n%s"
  908 + sprintf "%s<BR>\n%s\n%s\n%s"
844 909 (String.concat "<BR>\n" (Xlist.map paragraphs (html_of_paragraph path tokens)))
845   - (html_of_tokens tokens)
  910 + (html_of_tokens tokens) (html_of_tokens_simple_valence tokens) (html_of_tokens_valence tokens)
846 911 | AltText l ->
847 912 "<table border=3>" ^
848 913 String.concat "\n" (Xlist.map l (fun (mode,text) ->
... ... @@ -854,3 +919,136 @@ let print_html_text path name text =
854 919 fprintf file "%s\n" html_header;
855 920 fprintf file "%s\n" (html_of_text path text);
856 921 fprintf file "%s\n" html_trailer)
  922 +
  923 +let rec find_prev_next_sentence pid rev = function
  924 + AltSentence[Raw,_;Struct,QuotedSentences sentences] ->
  925 + Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.pid rev p.psentence)
  926 + | AltSentence[Raw,RawSentence s; mode,SemSentence result] -> result.file_prefix :: rev
  927 + | AltSentence[Raw,RawSentence s] -> ("p" ^ pid) :: rev
  928 + | _ -> failwith "find_prev_next_sentence: ni"
  929 +
  930 +let rec find_prev_next_paragraph rev = function
  931 + RawParagraph s -> rev
  932 + | StructParagraph sentences ->
  933 + Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.pid rev p.psentence)
  934 + | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph)
  935 +
  936 +let rec make_prev_next_map map prev = function
  937 + [x] -> StringMap.add map x (prev,"")
  938 + | x :: next :: l -> make_prev_next_map (StringMap.add map x (prev,next)) x (next :: l)
  939 + | [] -> failwith "make_prev_next_map"
  940 +
  941 +let print_main_result cg_bin_path mode path id tokens query result prev_next_map =
  942 + let prev,next = try StringMap.find prev_next_map result.file_prefix with Not_found -> failwith "print_main_result" in
  943 + File.file_out (path ^ "page" ^ id ^ "_" ^ result.file_prefix ^ ".html") (fun file ->
  944 + fprintf file "%s\n" (page_header cg_bin_path);
  945 + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev;
  946 + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next;
  947 + fprintf file "\n<H3>%s</H3>\n" query;
  948 + if mode <> ENIAM then fprintf file "<P>Parsed by %s\n" (string_of_mode mode);
  949 + ignore (Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl ->
  950 + print_graph2 path ("tree" ^ id ^ "_" ^ result.file_prefix ^ "_" ^ string_of_int n) "" tree;
  951 + print_xml_tree path ("tree" ^ id ^ "_" ^ result.file_prefix ^ "_" ^ string_of_int n) tree;
  952 + let mml = SemMmlOf.mml_of_mrl mrl in
  953 + print_mml path ("formula" ^ id ^ "_" ^ result.file_prefix ^ "_" ^ string_of_int n) mml;
  954 + fprintf file "<P><IMG SRC=\"tree%s_%s_%d.png\">\n" id result.file_prefix n;
  955 + fprintf file "<P>%s\n" (Xml.to_string_fmt mml);
  956 + fprintf file "<P><A HREF=\"tree%s_%s_%d.xml\">Graf w formacie XML</A>\n" id result.file_prefix n;
  957 + fprintf file "<P><A HREF=\"formula%s_%s_%d.mml\">Formuła w formacie MathML</A>\n" id result.file_prefix n;
  958 + n+1));
  959 + fprintf file "<P>";
  960 + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev;
  961 + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next;
  962 + fprintf file "%s\n" page_trailer)
  963 +
  964 +let print_not_parsed_main_result cg_bin_path path id query pid prev_next_map =
  965 + let pid = "p" ^ pid in
  966 + let prev,next = try StringMap.find prev_next_map pid with Not_found -> failwith "print_not_parsed_main_result" in
  967 + File.file_out (path ^ "page" ^ id ^ "_" ^ pid ^ ".html") (fun file ->
  968 + fprintf file "%s\n" (page_header cg_bin_path);
  969 + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev;
  970 + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next;
  971 + fprintf file "\n<H3>%s</H3>\n" query;
  972 + fprintf file "<P>Not parsed\n";
  973 + fprintf file "<P>";
  974 + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev;
  975 + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next;
  976 + fprintf file "%s\n" page_trailer)
  977 +
  978 +let rec print_main_result_sentence cg_bin_path path id tokens pid prev_next_map = function
  979 + AltSentence[Raw,_;Struct,QuotedSentences sentences] ->
  980 + Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence)
  981 + | AltSentence[Raw,RawSentence query; mode,SemSentence result] ->
  982 + print_main_result cg_bin_path mode path id tokens query result prev_next_map
  983 + | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result cg_bin_path path id query pid prev_next_map
  984 + | _ -> failwith "print_main_result_sentence: ni"
  985 +
  986 +let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = function
  987 + RawParagraph s -> ()
  988 + | StructParagraph sentences ->
  989 + Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence)
  990 + | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph)
  991 +
  992 +let rec print_main_result_text cg_bin_path path id = function
  993 + RawText s -> ()
  994 + | StructText(paragraphs,tokens) ->
  995 + let prev_next_map = make_prev_next_map StringMap.empty ""
  996 + (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in
  997 + Xlist.iter paragraphs (print_main_result_paragraph cg_bin_path path id tokens prev_next_map)
  998 + | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_text cg_bin_path path id text)
  999 +
  1000 +let print_main_result_first_page cg_bin_path mode path id tokens query result prev_next_map =
  1001 + let prev,next = try StringMap.find prev_next_map result.file_prefix with Not_found -> failwith "print_main_result" in
  1002 + printf "%s\n" (page_header cg_bin_path);
  1003 + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev;
  1004 + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next;
  1005 + printf "\n<H3>%s</H3>\n" query;
  1006 + if mode <> ENIAM then printf "<P>Parsed by %s\n" (string_of_mode mode);
  1007 + ignore (Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl ->
  1008 + let mml = SemMmlOf.mml_of_mrl mrl in
  1009 + printf "<P><IMG SRC=\"%stree%s_%s_%d.png\">\n" path id result.file_prefix n;
  1010 + printf "<P>%s\n" (Xml.to_string_fmt mml);
  1011 + printf "<P><A HREF=\"%stree%s_%s_%d.xml\">Graf w formacie XML</A>\n" path id result.file_prefix n;
  1012 + printf "<P><A HREF=\"%sformula%s_%s_%d.mml\">Formuła w formacie MathML</A>\n" path id result.file_prefix n;
  1013 + n+1));
  1014 + printf "<P>";
  1015 + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev;
  1016 + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next;
  1017 + printf "%s\n" page_trailer
  1018 +
  1019 +let print_not_parsed_main_result_first_page cg_bin_path path id query pid prev_next_map =
  1020 + let pid = "p" ^ pid in
  1021 + let prev,next = try StringMap.find prev_next_map pid with Not_found -> failwith "print_not_parsed_main_result" in
  1022 + printf "%s\n" (page_header cg_bin_path);
  1023 + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev;
  1024 + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next;
  1025 + printf "\n<H3>%s</H3>\n" query;
  1026 + printf "<P>Not parsed\n";
  1027 + printf "<P>";
  1028 + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev;
  1029 + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next;
  1030 + printf "%s\n" page_trailer
  1031 +
  1032 +let rec print_main_result_first_page_sentence cg_bin_path path id tokens pid prev_next_map = function
  1033 + AltSentence[Raw,_;Struct,QuotedSentences sentences] ->
  1034 + let p = List.hd sentences in
  1035 + print_main_result_first_page_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence
  1036 + | AltSentence[Raw,RawSentence query; mode,SemSentence result] ->
  1037 + print_main_result_first_page cg_bin_path mode path id tokens query result prev_next_map
  1038 + | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result_first_page cg_bin_path path id query pid prev_next_map
  1039 + | _ -> failwith "print_main_result_first_page_sentence: ni"
  1040 +
  1041 +let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map = function
  1042 + RawParagraph s -> ()
  1043 + | StructParagraph sentences ->
  1044 + let p = List.hd sentences in
  1045 + print_main_result_first_page_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence
  1046 + | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph)
  1047 +
  1048 +let rec print_main_result_first_page_text cg_bin_path path id = function
  1049 + RawText s -> ()
  1050 + | StructText(paragraphs,tokens) ->
  1051 + let prev_next_map = make_prev_next_map StringMap.empty ""
  1052 + (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in
  1053 + print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map (List.hd paragraphs)
  1054 + | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_first_page_text cg_bin_path path id text)
... ...
parser/webInterface.ml
... ... @@ -22,44 +22,40 @@ open LCGtypes
22 22 open ExecTypes
23 23  
24 24 let get_sock_addr host_name port =
25   - let he =
26   - try Unix.gethostbyname host_name
  25 + let he =
  26 + try Unix.gethostbyname host_name
27 27 with Not_found -> failwith ("get_sock_addr: host " ^ host_name ^ " not found") in
28 28 let addr = he.Unix.h_addr_list in
29 29 Unix.ADDR_INET(addr.(0),port)
30 30  
31 31 let ala = Relation(Val "Initiator",Val "",Concept{c_sense=Val "osoba 1"; c_name=Val "Ala"; c_local_quant=false; c_quant=Val "sg"; c_relations=Dot; c_variable="a",""; c_pos=1})
32 32 let kot = Relation(Val "Theme",Val "",Concept{c_sense=Val "kot 1"; c_name=Dot; c_local_quant=false; c_quant=Val "sg"; c_relations=Dot; c_variable="k",""; c_pos=3})
33   -let ala_ma_kota n = Context{cx_contents=Concept{c_sense=Val ("mieć " ^ n); c_name=Dot; c_local_quant=false; c_quant=Dot; c_variable="m",""; c_pos=2; c_relations=Tuple[ala;kot]};
  33 +let ala_ma_kota n = Context{cx_contents=Concept{c_sense=Val ("mieć " ^ n); c_name=Dot; c_local_quant=false; c_quant=Dot; c_variable="m",""; c_pos=2; c_relations=Tuple[ala;kot]};
34 34 cx_sense=Dot; cx_relations=Dot; cx_variable="x",""; cx_pos=2}
35 35  
36 36 let process_query id query =
37 37 let sock = get_sock_addr Paths.server_host Paths.server_port in
38   - let ic,oc =
  38 + let ic,oc =
39 39 try Unix.open_connection sock
40 40 with e -> failwith ("server connection error: " ^ Printexc.to_string e) in
41   - Printf.fprintf oc "%s\n%!" query;
  41 + (* Printf.fprintf oc "%s\n%!" query; *)
  42 + Marshal.to_channel oc (PreTypes.RawText query) [];
  43 + flush oc;
42 44 let result = (Marshal.from_channel ic : ExecTypes.result) in
43   - Printf.fprintf oc "\n%!";
  45 + (* Printf.fprintf oc "\n%!"; *)
  46 + (* Marshal.to_channel oc (PreTypes.RawText "") [];
  47 + flush oc; *)
44 48 let _ = Unix.shutdown_connection ic in
45 49 if result.status <> Parsed then Visualization.print_other_result stdout "" query result else
46 50 let path = "results/web/" in
47 51 let id = "_" ^ id in
48   - ignore(Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl ->
49   - Visualization.print_graph2 path ("tree" ^ id ^ "_" ^ string_of_int n) "" tree;
50   - Visualization.print_xml_tree path ("tree" ^ id ^ "_" ^ string_of_int n) tree;
51   - let mml = SemMmlOf.mml_of_mrl mrl in
52   - Visualization.print_mml path ("formula" ^ id ^ "_" ^ string_of_int n) mml;
53   - File.file_out (path ^ "page" ^ id ^ "_" ^ string_of_int n ^ ".html") (fun file ->
54   - Visualization.print_webpage file "../../" "" id query n (Xlist.size result.trees) mml);
55   - n+1));
56   - Visualization.print_webpage stdout "" "results/web/" id query 1 (Xlist.size result.trees) (SemMmlOf.mml_of_mrl (List.hd result.mrls))
57   -
  52 + Visualization.print_main_result_text "../../" path id result.selected_semantic_text;
  53 + Visualization.print_main_result_first_page_text "" "results/web/" id result.selected_semantic_text
58 54  
59 55 let get_input () =
60 56 let r = ref [] in
61 57 (try
62   - while true do
  58 + while true do
63 59 r := (input_line stdin) :: (!r)
64 60 done;
65 61 !r
... ... @@ -67,31 +63,31 @@ let get_input () =
67 63  
68 64 let rec translate_input_rec buf i size query =
69 65 if i >= size then Buffer.contents buf else (
70   - let c,i =
  66 + let c,i =
71 67 if String.get query i = '%' then
72 68 Scanf.sscanf (String.sub query (i+1) 2) "%x" (fun a -> Char.chr a), i+3 else
73 69 if String.get query i = '+' then ' ', i+1 else
74 70 String.get query i, i+1 in
75 71 Buffer.add_char buf c;
76 72 translate_input_rec buf i size query)
77   -
  73 +
78 74 let translate_input query =
79 75 match query with
80 76 [query] ->
81   - if String.sub query 0 6 = "text0=" then
  77 + if String.sub query 0 6 = "text0=" then
82 78 let buf = Buffer.create (String.length query) in
83 79 translate_input_rec buf 6 (String.length query) query
84 80 else failwith "translate_input 1"
85 81 | _ -> failwith "translate_input 2"
86 82  
87   -let get_query_id () =
  83 +let get_query_id () =
88 84 let filename = Filename.temp_file ~temp_dir:"results/web/" "page_" "" in
89 85 (* print_endline filename; *)
90 86 let n = String.length "results/web/" + String.length "page_" in
91 87 let id = String.sub filename n (String.length filename - n) in
92 88 (* print_endline id; *)
93 89 id
94   -
  90 +
95 91 let generate_header () =
96 92 Printf.printf "Content-type: text/html\n";
97 93 Printf.printf "\n"
... ... @@ -106,7 +102,7 @@ let generate_trailer () =
106 102 (*Printf.printf "</BODY>\n</HTML>\n"*)()
107 103  
108 104 let generate_error_message e =
109   - Printf.printf
  105 + Printf.printf
110 106 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">
111 107 <html>
112 108 <head>
... ... @@ -117,24 +113,25 @@ let generate_error_message e =
117 113  
118 114 <body>
119 115 <center>
120   - <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>
121   - <h3>%s</h3>
  116 + <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1>
  117 + <h3>%s</h3>
122 118 </center>
123 119 </body>
124 120 </html>" e
125   -
126   -let _ =
  121 +
  122 +let _ =
127 123 generate_header ();
128 124 (try
129 125 let query = get_input () in
130 126 let query = translate_input query in
131 127 let id = get_query_id () in
132 128 process_query id query;
133   - with
  129 + with
134 130 Failure e -> generate_error_message e
135 131 | e -> generate_error_message (Printexc.to_string e));
136 132 generate_trailer ()
137 133  
138 134 (* testowanie z linii poleceń:
139   -echo "text0=Ala ma kota." | ./parser.cgi
140   -*)
141 135 \ No newline at end of file
  136 +echo "text0=Ala ma kota. Ela ma psa." | ./parser2.cgi
  137 +echo "text0=Szpak powiedział: „Frunę. Kiszę.”" | ./parser2.cgi
  138 +*)
... ...
pre/.gitignore
1 1 pre
  2 +concraft_test
... ...
pre/concraft_test.ml 0 → 100644
  1 +
  2 +(* let concraft_in, concraft_out, concraft_err = Unix.open_process_full "../../../.local/bin/concraft-pl tag ../concraft/nkjp-model-0.2.gz" [| |] *)
  3 +(*let concraft_in, concraft_out, concraft_err =
  4 + Unix.open_process_full "concraft-pl tag ../concraft/nkjp-model-0.2.gz"
  5 + [|"PATH=" ^ Sys.getenv "PATH"|]
  6 +
  7 +let _ =
  8 + print_endline "out";
  9 + Printf.fprintf concraft_out "Ala ma kota.\n\n%!";
  10 + print_endline "in";
  11 + print_endline ("concraft error message: " ^ input_line concraft_err);
  12 + ()*)
  13 +
  14 +(**********************)
  15 +
  16 +(*
  17 +Aby korzytać z concrafta trzeba najpierw postawić serwer wpisując z linii poleceń:
  18 +concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz
  19 +*)
  20 +
  21 +let read_whole_channel c =
  22 + let r = ref [] in
  23 + try
  24 + while true do
  25 + r := (input_line c) :: !r
  26 + done;
  27 + !r
  28 + with End_of_file -> List.rev (!r)
  29 +
  30 +(* Gdy serwer jest już włączony na concraft_err trafia komunikat:
  31 +concraft-pl: bind: resource busy (Address already in use)
  32 +w przeciwnym przypadku się program wiesza się na czytaniu concraft_in
  33 +*)
  34 +
  35 +(*let _ =
  36 + print_endline "Starting concraft server 1";
  37 + let concraft_in, concraft_out, concraft_err =
  38 + Unix.open_process_full "concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz"
  39 + [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in
  40 + (* let err_msg = String.concat "\n" (read_whole_channel concraft_err) in
  41 + let result = read_whole_channel concraft_in in *)
  42 + print_endline "Starting concraft server 2";
  43 + print_endline (input_line concraft_err);
  44 + print_endline "Starting concraft server 3";
  45 + print_endline (input_line concraft_err);
  46 + print_endline "Starting concraft server 3";
  47 + (* print_endline err_msg;
  48 + print_endline "Starting concraft server 3";
  49 + print_endline (String.concat "\n" result);
  50 + print_endline "Starting concraft server 4"; *)
  51 + ()*)
  52 +
  53 +let rec process_concraft_result orth lemma interp others rev = function
  54 + [] -> List.rev ((orth,(lemma,interp) :: others) :: rev)
  55 + | "" :: l -> process_concraft_result orth lemma interp others rev l
  56 + | line :: l ->
  57 + (match Xstring.split_delim "\t" line with
  58 + [orth2;s] when s = "none" || s = "space" ->
  59 + if orth = "" then process_concraft_result orth2 lemma interp others rev l
  60 + else process_concraft_result orth2 "" "" [] ((orth,(lemma,interp) :: others) :: rev) l
  61 + | ["";lemma2;interp2] -> process_concraft_result orth lemma interp ((lemma2,interp2) :: others) rev l
  62 + | ["";lemma;interp;"disamb"] -> process_concraft_result orth lemma interp others rev l
  63 + | _ -> failwith ("process_concraft_result: " ^ line))
  64 +
  65 +let concraft_parse s =
  66 + let concraft_in, concraft_out, concraft_err =
  67 + Unix.open_process_full ("echo \"" ^ s ^ "\" | concraft-pl client")
  68 + [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in
  69 + let err_msg = String.concat "\n" (read_whole_channel concraft_err) in
  70 + let result = read_whole_channel concraft_in in
  71 + if err_msg <> "" then failwith err_msg else
  72 + process_concraft_result "" "" "" [] [] result
  73 +
  74 +let print_parsed_tokens l =
  75 + Xlist.iter l (fun (orth,l) ->
  76 + if l = [] then failwith "print_parsed_tokens" else
  77 + let lemma,interp = List.hd l in
  78 + print_endline (orth ^ "\t" ^ lemma ^ "\t" ^ interp))
  79 +
  80 +let _ =
  81 + print_parsed_tokens (concraft_parse "Ala ma kota.");
  82 + print_parsed_tokens (concraft_parse "Szpak frunie.");
  83 + print_parsed_tokens (concraft_parse "Miałem miał.");
  84 + print_parsed_tokens (concraft_parse "Kiedyś miałem kota.");
  85 + print_parsed_tokens (concraft_parse "Kiadyś kupiłem kota.");
  86 + ()
... ...
pre/makefile
1 1 OCAMLC=ocamlc
2 2 OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4   -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology -I ../parser
  4 +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology -I ../parser -I ../corpora
5 5 OCAMLFLAGS=$(INCLUDES) -g
6 6 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa inflexion.cmxa
7 7 INSTALLDIR=`ocamlc -where`
8 8  
9 9 WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml
10   -PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preSentences.ml preProcessing.ml
  10 +PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preSentences.ml ../corpora/CONLL.ml preProcessing.ml
11 11  
12 12 all:
13 13 $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE)
14 14  
  15 +concraft_test: concraft_test.ml
  16 + $(OCAMLOPT) -o concraft_test $(OCAMLOPTFLAGS) concraft_test.ml
  17 +
15 18 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
16 19  
17 20 .mll.ml:
... ... @@ -33,4 +36,4 @@ all:
33 36 $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
34 37  
35 38 clean:
36   - rm -f *~ *.cm[oix] *.o pre
  39 + rm -f *~ *.cm[oix] *.o pre concraft_test
... ...
pre/paths.ml
... ... @@ -17,61 +17,83 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
  20 +type config =
  21 + {resources_path: string; walenty_filename: string; pre_port: int; pre_host: string;
  22 + results_path: string; max_no_solutions: int; lcg_timeout: float; lcg_no_nodes: int; no_processes: int;
  23 + concraft_enabled: bool; concraft_path: string;
  24 + mate_parser_enabled: bool; mate_parser_path: string;
  25 + swigra_enabled: bool; swigra_path: string;
  26 + sentence_selection_enabled: bool
  27 + }
20 28  
21   -let resources_path,walenty_filename,pre_port,pre_host,
22   - results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes =
23   - Xlist.fold (File.load_lines "../config") ("../resources/","/usr/share/walenty/walenty.xml",3258,"localhost","../resuls/",10,100.,10000000,4)
24   - (fun (resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes) s ->
  29 +let empty_config =
  30 + {resources_path="../resources/"; walenty_filename="/usr/share/walenty/walenty.xml"; pre_port=3258; pre_host="localhost";
  31 + results_path="../resuls/"; max_no_solutions=10; lcg_timeout=100.; lcg_no_nodes=10000000; no_processes=4;
  32 + concraft_enabled=false; concraft_path="";
  33 + mate_parser_enabled=false; mate_parser_path="";
  34 + swigra_enabled=false; swigra_path="";
  35 + sentence_selection_enabled=true
  36 + }
  37 +
  38 +let config = Xlist.fold (File.load_lines "../config") empty_config
  39 + (fun config s ->
25 40 match Str.split (Str.regexp "=") s with
26   - ["RESOURCES_PATH";resources_path] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
27   - | ["WALENTY";walenty_filename] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
28   - | ["PRE_PORT";pre_port] -> resources_path,walenty_filename,int_of_string pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
29   - | ["PRE_HOST";pre_host] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
30   - | ["RESULTS_PATH";results_path] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
31   - | ["MAX_NO_SOLUTIONS";max_no_solutions] -> resources_path,walenty_filename,pre_port,pre_host,results_path,int_of_string max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
32   - | ["LCG_TIMEOUT";lcg_timeout] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,float_of_string lcg_timeout,lcg_no_nodes,no_processes
33   - | ["LCG_NO_NODES";lcg_no_nodes] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,int_of_string lcg_no_nodes,no_processes
34   - | ["NO_PROCESSES";no_processes] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,int_of_string no_processes
35   - | [] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes
36   - | _ -> failwith ("invalid config format: " ^ s))
37   -
  41 + ["RESOURCES_PATH";resources_path] -> {config with resources_path=resources_path}
  42 + | ["WALENTY";walenty_filename] -> {config with walenty_filename=walenty_filename}
  43 + | ["PRE_PORT";pre_port] ->{config with pre_port=int_of_string pre_port}
  44 + | ["PRE_HOST";pre_host] -> {config with pre_host=pre_host}
  45 + | ["RESULTS_PATH";results_path] -> {config with results_path=results_path}
  46 + | ["MAX_NO_SOLUTIONS";max_no_solutions] -> {config with max_no_solutions=int_of_string max_no_solutions}
  47 + | ["LCG_TIMEOUT";lcg_timeout] -> {config with lcg_timeout=float_of_string lcg_timeout}
  48 + | ["LCG_NO_NODES";lcg_no_nodes] -> {config with lcg_no_nodes=int_of_string lcg_no_nodes}
  49 + | ["NO_PROCESSES";no_processes] -> {config with no_processes=int_of_string no_processes}
  50 + | ["CONCRAFT_ENABLED";concraft_enabled] -> {config with concraft_enabled=bool_of_string concraft_enabled}
  51 + | ["CONCRAFT_PATH";concraft_path] -> {config with concraft_path=concraft_path}
  52 + | ["MATE_PARSER_ENABLED";mate_parser_enabled] -> {config with mate_parser_enabled=bool_of_string mate_parser_enabled}
  53 + | ["MATE_PARSER_PATH";mate_parser_path] -> {config with mate_parser_path=mate_parser_path}
  54 + | ["SWIGRA_ENABLED";swigra_enabled] -> {config with swigra_enabled=bool_of_string swigra_enabled}
  55 + | ["SWIGRA_PATH";swigra_path] -> {config with swigra_path=swigra_path}
  56 + | ["SENTENCE_SELECTION_ENABLED";sentence_selection_enabled] -> {config with sentence_selection_enabled=bool_of_string sentence_selection_enabled}
  57 + | [] -> config
  58 + | l -> failwith ("invalid config format: " ^ s ^ " $" ^ String.concat "#" l ^ "$"))
  59 +
38 60 let alt_all = "alt1.tab"
39 61 let dict_all = "dict1.tab"
40 62 let rules_all = "rules1.tab"
41 63  
42 64 (* let resources_path = "../resources/" *)
43   -let sgjp_path = resources_path ^ "SGJP/"
  65 +let sgjp_path = config.resources_path ^ "SGJP/"
44 66  
45 67 (* let walenty_filename = "/usr/share/walenty/walenty.xml" *)
46 68  
47   -(* UWAGA: w razie wymiany słownika trzeba przekopiować definicję adv(pron),nonch,possp oraz wygenerować fixed.tab *)
48   -let realizations_filename = resources_path ^ "Walenty/phrase_types_expand_20150909.txt"
  69 +(* UWAGA: w razie wymiany słownika trzeba przekopiować definicję adv(pron),nonch,possp oraz wygenerować fixed.tab *)
  70 +let realizations_filename = config.resources_path ^ "Walenty/phrase_types_expand_20150909.txt"
49 71  
50   -let pre_port = 3258
  72 +let pre_port = (*3258*)3158
51 73 let pre_host = "localhost"
52 74 (* let pre_host = "wloczykij" *)
53   -(* let server_port = 3259 *)
  75 +let server_port = (*3259*)3159
54 76 (* let server_host = "localhost" *)
55   -(* let server_host = "wloczykij" *)
  77 +let server_host = "wloczykij"
56 78  
57 79 let proper_names_filename = sgjp_path ^ "proper_names_sgjp_polimorf_20151020.tab"
58   -let proper_names_filename2 = resources_path ^ "proper_names_20160104.tab"
59   -
60   -let subst_uncountable_lexemes_filename = resources_path ^ "subst_uncountable.dat"
61   -let subst_uncountable_lexemes_filename2 = resources_path ^ "subst_uncountable_stare.dat"
62   -let subst_container_lexemes_filename = resources_path ^ "subst_container.dat"
63   -let subst_numeral_lexemes_filename = resources_path ^ "subst_numeral.dat"
64   -let subst_time_lexemes_filename = resources_path ^ "subst_time.dat"
  80 +let proper_names_filename2 = config.resources_path ^ "proper_names_20160104.tab"
  81 +
  82 +let subst_uncountable_lexemes_filename = config.resources_path ^ "subst_uncountable.dat"
  83 +let subst_uncountable_lexemes_filename2 = config.resources_path ^ "subst_uncountable_stare.dat"
  84 +let subst_container_lexemes_filename = config.resources_path ^ "subst_container.dat"
  85 +let subst_numeral_lexemes_filename = config.resources_path ^ "subst_numeral.dat"
  86 +let subst_time_lexemes_filename = config.resources_path ^ "subst_time.dat"
65 87  
66   -let rzeczownik_filename = resources_path ^ "plWordnet/rzeczownik.tab"
67   -let czasownik_filename = resources_path ^ "plWordnet/czasownik.tab"
68   -let przymiotnik_filename = resources_path ^ "plWordnet/przymiotnik.tab"
69   -let synsets_filename = resources_path ^ "plWordnet/synsets.tab"
70   -let hipero_filename = resources_path ^ "plWordnet/hipero.tab"
71   -let predef_filename = resources_path ^ "predef_prefs.tab"
72   -let proper_classes_filename = resources_path ^ "proper_classes.tab"
  88 +let rzeczownik_filename = config.resources_path ^ "plWordnet/rzeczownik.tab"
  89 +let czasownik_filename = config.resources_path ^ "plWordnet/czasownik.tab"
  90 +let przymiotnik_filename = config.resources_path ^ "plWordnet/przymiotnik.tab"
  91 +let synsets_filename = config.resources_path ^ "plWordnet/synsets.tab"
  92 +let hipero_filename = config.resources_path ^ "plWordnet/hipero.tab"
  93 +let predef_filename = config.resources_path ^ "predef_prefs.tab"
  94 +let proper_classes_filename = config.resources_path ^ "proper_classes.tab"
73 95  
74   -let brev_filename = resources_path ^ "brev.tab"
75   -let lemma_frequencies_filename = resources_path ^ "NKJP1M/NKJP1M-lemma-freq.tab"
  96 +let brev_filename = config.resources_path ^ "brev.tab"
  97 +let lemma_frequencies_filename = config.resources_path ^ "NKJP1M/NKJP1M-lemma-freq.tab"
76 98  
77 99 let mte_filename = sgjp_path ^ "mte_20151215.tab"
... ...
pre/preProcessing.ml
... ... @@ -582,9 +582,10 @@ let parse query =
582 582 let paths = PreLemmatization.combine_interps paths in
583 583 (* print_endline (PrePaths.to_string paths); *)*)
584 584  
585   -let parse_conll tokens dep_paths =
  585 +let parse_conll tokens dep_paths = (* FIXME: sprawdzić, czy zachowana jest kolejność elementów paths !!! *)
586 586 let paths = List.rev (Int.fold 1 (Array.length dep_paths - 1) [] (fun paths conll_id ->
587   - ExtArray.get tokens conll_id :: paths)) in
  587 + let id,_,_ = dep_paths.(conll_id) in
  588 + ExtArray.get tokens id :: paths)) in
588 589 (* print_endline "a12"; *)
589 590 let paths = find_proper_names paths in
590 591 (* print_endline "a13"; *)
... ... @@ -600,26 +601,120 @@ let parse_conll tokens dep_paths =
600 601 let paths = PreSemantics.assign_semantics paths in
601 602 (* print_endline "a16"; *)
602 603 let _ = Xlist.fold paths 1 (fun conll_id t ->
603   - ExtArray.set tokens conll_id t;
  604 + let id,_,_ = dep_paths.(conll_id) in
  605 + ExtArray.set tokens id t;
604 606 conll_id + 1) in
605 607 ()
606 608  
  609 +(*
  610 +UWAGA: Aby korzytać z concrafta trzeba najpierw postawić serwer wpisując z linii poleceń:
  611 +concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz
  612 +*)
  613 +
  614 +let read_whole_channel c =
  615 + let r = ref [] in
  616 + try
  617 + while true do
  618 + r := (input_line c) :: !r
  619 + done;
  620 + !r
  621 + with End_of_file -> List.rev (!r)
  622 +
  623 +let rec process_concraft_result orth lemma interp others rev = function
  624 + [] -> List.rev ((orth,(lemma,interp) :: others) :: rev)
  625 + | "" :: l -> process_concraft_result orth lemma interp others rev l
  626 + | line :: l ->
  627 + (match Xstring.split_delim "\t" line with
  628 + [orth2;s] when s = "none" || s = "space" ->
  629 + if orth = "" then process_concraft_result orth2 lemma interp others rev l
  630 + else process_concraft_result orth2 "" "" [] ((orth,(lemma,interp) :: others) :: rev) l
  631 + | ["";lemma2;interp2] -> process_concraft_result orth lemma interp ((lemma2,interp2) :: others) rev l
  632 + | ["";lemma;interp;"disamb"] -> process_concraft_result orth lemma interp others rev l
  633 + | _ -> failwith ("process_concraft_result: " ^ line))
  634 +
  635 +let concraft_parse s =
  636 + let concraft_in, concraft_out, concraft_err =
  637 + Unix.open_process_full ("echo \"" ^ s ^ "\" | concraft-pl client")
  638 + [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in
  639 + let err_msg = String.concat "\n" (read_whole_channel concraft_err) in
  640 + let result = read_whole_channel concraft_in in
  641 + if err_msg <> "" then failwith err_msg else
  642 + process_concraft_result "" "" "" [] [] result
  643 +
  644 +(*let rec load_concraft_sentence white orth rev ic =
  645 + (* print_endline "load_concraft_sentence 1"; *)
  646 + (* print_endline ("concraft error message: " ^ input_line concraft_err); *)
  647 + let s = input_line ic in
  648 + (* print_endline ("load_concraft_sentence: " ^ s); *)
  649 + if s = "" then List.rev rev else
  650 + match Xstring.split_delim "\t" s with
  651 + [""; lemma; interp; "disamb"] -> load_concraft_sentence "" "" ((white,orth,lemma,interp) :: rev) ic
  652 + | [""; lemma; interp] -> load_concraft_sentence white orth rev ic
  653 + | [orth; white] -> load_concraft_sentence white orth rev ic
  654 + | _ -> failwith ("load_concraft_sentence: " ^ s)*)
  655 +
  656 +let make_token (orth,l) =
  657 + if l = [] then failwith "make_token 1" else
  658 + let lemma,interp = List.hd l in
  659 + let cat,interp = match Xstring.split ":" interp with
  660 + cat :: l -> cat, [Xlist.map l (fun tag -> [tag])]
  661 + | _ -> failwith ("make_token 2: " ^ orth ^ " " ^ lemma ^ " " ^ interp) in
  662 + {empty_token with orth = orth; token = Lemma(lemma,cat,interp)}
  663 +
  664 +let parse_mate tokens pbeg s =
  665 + (* print_endline ("parse_mate: " ^ s); *)
  666 + (* Printf.fprintf concraft_out "%s\n\n%!" s;
  667 + let l = load_concraft_sentence "" "" [] concraft_in in *)
  668 + let l = concraft_parse s in
  669 + let l = Xlist.map l make_token in
  670 + let l = {empty_token with token = Interp "<conll_root>"} :: l in
  671 + let l = Xlist.map l (fun t -> ExtArray.add tokens t,-1,"") in
  672 + let _ = CONLL.establish_for_token pbeg s tokens (List.tl l) in
  673 + let dep_paths = Array.of_list l in
  674 + parse_conll tokens dep_paths;
  675 + dep_paths
  676 +
  677 +let rec parse_mate_sentence tokens mode pbeg = function
  678 + RawSentence s -> if mode <> Mate || not Paths.config.Paths.concraft_enabled then RawSentence s else DepSentence (parse_mate tokens pbeg s)
  679 + | StructSentence(paths,last) -> StructSentence(paths,last)
  680 + | DepSentence(paths) -> DepSentence(paths)
  681 + | QuotedSentences sentences ->
  682 + QuotedSentences(Xlist.map sentences (fun p ->
  683 + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
  684 + psentence=parse_mate_sentence tokens mode pbeg p.PreTypes.psentence}))
  685 + | AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
  686 + mode, parse_mate_sentence tokens mode pbeg sentence))
  687 +
  688 +let parse_mate_sentences tokens sentences =
  689 + Xlist.map sentences (fun p ->
  690 + {p with psentence=parse_mate_sentence tokens Struct p.pbeg p.psentence})
  691 +
607 692 let parse_text = function
608 693 RawText query ->
  694 + (* print_endline ("parse_text: " ^ query); *)
609 695 let tokens = ExtArray.make 100 empty_token in
610 696 let _ = ExtArray.add tokens empty_token in (* id=0 jest zarezerwowane dla pro; FIXME: czy to jest jeszcze aktualne? *)
611   - let paragraphs = Xlist.map (Xstring.split "\n" query) (fun paragraph ->
  697 + let paragraphs = Xstring.split "\n\\|\r" query in
  698 + let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in
  699 + let n = if Xlist.size paragraphs = 1 then 0 else 1 in
  700 + let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph ->
612 701 let paths = parse paragraph in
613   - let sentences = PreSentences.split_into_sentences paragraph tokens paths in
614   - AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) in
  702 + (* print_endline "parse_text 1"; *)
  703 + let pid = if n = 0 then "" else string_of_int n ^ "_" in
  704 + let sentences = PreSentences.split_into_sentences pid paragraph tokens paths in
  705 + (* print_endline "parse_text 2"; *)
  706 + let sentences = parse_mate_sentences tokens sentences in
  707 + (* print_endline "parse_text 3"; *)
  708 + (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in
615 709 AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs, tokens)]
616 710 | AltText[Raw,RawText query;CONLL,StructText([
617   - StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence(_,dep_paths)]} as p]],tokens)] ->
  711 + StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]],tokens)] ->
618 712 parse_conll tokens dep_paths;
619 713 let paths = parse query in
620   - let sentences = PreSentences.split_into_sentences query tokens paths in
621   - let conll = StructParagraph[{p with psentence = AltSentence[Raw, RawSentence text;
622   - Mate, DepSentence("M",dep_paths); CONLL, DepSentence("C",dep_paths)]}] in
  714 + let sentences = PreSentences.split_into_sentences "" query tokens paths in
  715 + let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in
  716 + let conll = StructParagraph[{p with psentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
  717 + @ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in
623 718 AltText[Raw,RawText query; Struct, StructText([
624 719 AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]],tokens)]
625 720 | _ -> failwith "parse_text: not implemented"
... ... @@ -632,9 +727,9 @@ let rec main_loop in_chan out_chan =
632 727 (try
633 728 (* let time0 = Sys.time () in *)
634 729 let utime0 = Unix.gettimeofday () in
635   - (* print_endline "main_loop 3a"; *)
  730 + (* print_endline "main_loop 3a"; *)
636 731 let text = parse_text query in
637   - (* print_endline "main_loop 4a"; *)
  732 + (* print_endline "main_loop 4a"; *)
638 733 (* let time2 = Sys.time () in *)
639 734 let utime2 = Unix.gettimeofday () in
640 735 (* Printf.printf "time=%f utime=%f\n%!" (time2 -. time0) (utime2 -. utime0); *)
... ...
pre/preSentences.ml
... ... @@ -145,20 +145,18 @@ let find_tokens_in_chart tokens chart lnode rnode cat =
145 145 id
146 146 (id,lnode,rnode) :: paths)) in*)
147 147  
148   -let rec add_struct_sentence_ids_rec n sentences =
  148 +let rec add_struct_sentence_ids_rec pid n sentences =
149 149 Xlist.fold sentences ([],n) (fun (l,n) -> function
150   - {psentence=AltSentence[Raw,s;ENIAM,StructSentence(_,paths,last)]} as p ->
151   - {p with psentence=AltSentence[Raw,s;ENIAM,StructSentence("E" ^ string_of_int n,paths,last)]} :: l, n+1
152   - | {psentence=AltSentence[Raw,s;ENIAM,QuotedSentences sentences]} as p ->
153   - let sentences, n = add_struct_sentence_ids_rec n sentences in
154   - {p with psentence=AltSentence[Raw,s;ENIAM,QuotedSentences (List.rev sentences)]} :: l, n+1
155   - | _ -> failwith "add_struct_sentence_ids")
  150 + {psentence=AltSentence[Raw,s;Struct,QuotedSentences sentences]} as p ->
  151 + let sentences, n = add_struct_sentence_ids_rec pid n sentences in
  152 + {p with psentence=AltSentence[Raw,s;Struct,QuotedSentences (List.rev sentences)]} :: l, n
  153 + | p -> {p with pfile_prefix=pid ^ string_of_int n} :: l, n+1)
156 154  
157   -let add_struct_sentence_ids sentences =
  155 +let add_struct_sentence_ids pid sentences =
158 156 match sentences with
159   - [{psentence=AltSentence[Raw,s;ENIAM,StructSentence(_,paths,last)]} as p] ->
160   - [{p with psentence=AltSentence[Raw,s;ENIAM,StructSentence("E",paths,last)]}]
161   - | _ -> List.rev (fst (add_struct_sentence_ids_rec 1 sentences))
  157 + [{psentence=AltSentence[Raw,_;Struct,QuotedSentences _]}] -> List.rev (fst (add_struct_sentence_ids_rec pid 1 sentences))
  158 + | [p] -> [{p with pfile_prefix=pid}]
  159 + | _ -> List.rev (fst (add_struct_sentence_ids_rec pid 1 sentences))
162 160  
163 161 let prepare_indexes paths =
164 162 let set = Xlist.fold paths IntSet.empty (fun set (_,beg,next) ->
... ... @@ -181,20 +179,22 @@ let rec extract_sentences_rec tokens id =
181 179 match t.token with
182 180 Tokens("sentence",ids) ->
183 181 let paths,last = make_paths tokens ids in
184   - [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next;
185   - psentence=AltSentence[Raw,RawSentence t.orth;
186   - ENIAM,StructSentence("",paths,last)]}]
  182 + [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix="";
  183 + psentence=AltSentence([Raw,RawSentence t.orth; ENIAM,StructSentence(paths,last)] @
  184 + (if Paths.config.Paths.mate_parser_enabled then [Mate,RawSentence t.orth] else []) @
  185 + (if Paths.config.Paths.swigra_enabled then [Swigra,RawSentence t.orth] else [])(* @
  186 + (if Paths.config.Paths.polfie_enabled then [POLFIE,RawSentence t.orth] else []) *))}]
187 187 | Tokens("quoted_sentences",ids) ->
188   - [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next;
  188 + [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix="";
189 189 psentence=AltSentence[Raw,RawSentence t.orth;
190   - ENIAM,QuotedSentences(List.sort par_compare (List.flatten (Xlist.rev_map ids (extract_sentences_rec tokens))))]}]
  190 + Struct,QuotedSentences(List.sort par_compare (List.flatten (Xlist.rev_map ids (extract_sentences_rec tokens))))]}]
191 191 | _ -> []
192 192  
193   -let extract_sentences tokens chart last =
  193 +let extract_sentences pid tokens chart last =
194 194 let ids = find_tokens_in_chart tokens chart 0 last "query" in
195 195 let sentences = List.sort par_compare (List.flatten (Xlist.rev_map ids (fun id ->
196 196 extract_sentences_rec tokens id))) in
197   - add_struct_sentence_ids sentences
  197 + add_struct_sentence_ids pid sentences
198 198 (* let paths = Int.fold 0 last [] (fun paths lnode ->
199 199 Xlist.fold chart.(lnode) paths (fun paths (id,rnode) ->
200 200 (id,lnode,rnode) :: paths)) in
... ... @@ -255,7 +255,7 @@ let make_chart paths last =
255 255 chart.(beg) <- (id,next) :: chart.(beg));
256 256 chart
257 257  
258   -let split_into_sentences paragraph tokens paths =
  258 +let split_into_sentences pid paragraph tokens paths =
259 259 let paths = make_ids tokens paths in
260 260 let paths,last = prepare_indexes paths in
261 261 let chart = make_chart paths last in
... ... @@ -265,4 +265,4 @@ let split_into_sentences paragraph tokens paths =
265 265 find_sentence par tokens chart last;
266 266 find_quoted_sentences par tokens chart last;
267 267 find_query par tokens chart last;
268   - extract_sentences tokens chart last
  268 + extract_sentences pid tokens chart last
... ...
pre/preTypes.ml
... ... @@ -80,6 +80,7 @@ type token =
80 80 informacje o poszczególnych tokenach *)
81 81 and token_record = {
82 82 orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *)
  83 + corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *)
83 84 beg: int; (* pozycja początkowa tokenu względem początku akapitu *)
84 85 len: int; (* długość tokenu *)
85 86 next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *)
... ... @@ -112,12 +113,12 @@ let empty_labels = {
112 113 }
113 114  
114 115 let empty_token = {
115   - orth="";beg=0;len=0;next=0; token=Symbol ""; weight=0.; e=empty_labels;
  116 + orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; weight=0.; e=empty_labels;
116 117 attrs=[]; valence=[]; simple_valence=[]; senses=[];
117 118 lroles="",""; semantics=Normal}
118 119  
119 120 type mode =
120   - Raw | Struct | CONLL | ENIAM | Mate
  121 + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE
121 122  
122 123 (* warstwy nkjp1m do analizy:
123 124 header
... ... @@ -133,14 +134,14 @@ ann_named
133 134 type sentence =
134 135 RawSentence of string
135 136 (* | CONLL of conll list *)
136   - | StructSentence of string * (int * int * int) list * int (* file_prefix * (id * lnode * rnode) list * last *)
137   - | DepSentence of string * (int * int * string) array (* file_prefix * (id * super * label) conll_id *)
  137 + | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *)
  138 + | DepSentence of (int * int * string) array (* (id * super * label) conll_id *)
138 139 | QuotedSentences of paragraph_record list
139 140 (* | NKJP1M of nkjp1m list *)
140 141 (* | Skladnica of skladnica_tree *)
141 142 | AltSentence of (mode * sentence) list (* string = etykieta np raw, nkjp, krzaki *)
142 143  
143   -and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *)
  144 +and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *)
144 145  
145 146 and paragraph =
146 147 RawParagraph of string
... ...
pre/walTEI.ml
... ... @@ -21,23 +21,23 @@
21 21 Autor: Maciej Hołubowicz
22 22 *)
23 23  
24   -let przejdz funkcja poczym =
  24 +let przejdz funkcja poczym =
25 25 let _ = List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym) in
26 26 ()
27 27  
28 28 (*zwraca liste zwróconych wartosci przez funkcje*)
29   -let przejdz_lista funkcja poczym =
  29 +let przejdz_lista funkcja poczym =
30 30 List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym)
31 31  
32   -let przejdz_lista_second funkcja poczym =
  32 +let przejdz_lista_second funkcja poczym =
33 33 List.rev (List.fold_left (fun l nazwa -> (snd (funkcja nazwa)) :: l) [] poczym)
34   -
  34 +
35 35 (*łączy listy zwróconych wartości przez funkcje*)
36   -let przejdz_scal funkcja poczym =
  36 +let przejdz_scal funkcja poczym =
37 37 List.rev (List.fold_left (fun l nazwa -> funkcja nazwa @ l) [] poczym)
38 38  
39 39 (*zapisuje wynik wywołania do zmiennej i wywołuje ze zmienną*)
40   -let przejdz_zapisz funkcja zmienna poczym =
  40 +let przejdz_zapisz funkcja zmienna poczym =
41 41 List.fold_left (fun zmienna nazwa -> funkcja zmienna nazwa) zmienna poczym
42 42  
43 43  
... ... @@ -49,7 +49,7 @@ let rec last l =
49 49 match l with
50 50 | [a] -> a
51 51 | a::b -> last b
52   - | _ -> failwith "pusta lista"
  52 + | _ -> failwith "pusta lista"
53 53  
54 54 let parse_full_id s =
55 55 if String.length s = 0 then empty_id else
... ... @@ -71,10 +71,10 @@ let parse_id s =
71 71 [s;suf] -> s,suf
72 72 | _ -> failwith "zła ilość '-'" in
73 73 let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in
74   - {id with numbers = [last id.numbers]}
  74 + {id with numbers = [last id.numbers]}
  75 +
  76 +
75 77  
76   -
77   -
78 78 (* Początek kodu do wczytywania syntaticLayer *)
79 79  
80 80 type preposition = string
... ... @@ -94,7 +94,7 @@ and phrase =
94 94 | ComprepNP of preposition
95 95 | CP of comp
96 96 | NCP of case * comp
97   - | PrepNCP of preposition * case * comp
  97 + | PrepNCP of preposition * case * comp
98 98 | InfP of aspect
99 99 | XP of category
100 100 | AdvP of string
... ... @@ -110,15 +110,15 @@ and phrase =
110 110 | ComparP of string
111 111 | Nonch
112 112 | Or
113   - | Refl
114   - | Recip
  113 + | Refl
  114 + | Recip
115 115 | E
116 116 | DistrP
117   - | PossP
  117 + | PossP
118 118 | FixedP of phrase list * string
119 119 | Lex of lex
120 120 | Null
121   -
  121 +
122 122 and lex = {
123 123 phrases_list: phrase list;
124 124 lemma: string * string * lemmas;
... ... @@ -132,30 +132,30 @@ and lex = {
132 132 }
133 133 and position = {psn_id: id; gf: string; phrases: (id * phrase) list; control: string list}
134 134  
135   -let empty_lex = {phrases_list=[]; lemma="","",[]; numeral_lemma="","",[]; negation="";
  135 +let empty_lex = {phrases_list=[]; lemma="","",[]; numeral_lemma="","",[]; negation="";
136 136 degree=""; number=""; reflex=""; gender=""; modification = "",[]}
137 137  
138   -let load_case = function
  138 +let load_case = function
139 139 | Xml.Element("f", ["name","case"], [Xml.Element("symbol", ["value",value], [])]) ->
140 140 value
141 141 | xml -> failwith ("load_case:\n " ^ Xml.to_string_fmt xml)
142 142  
143   -let load_preposition = function
  143 +let load_preposition = function
144 144 | Xml.Element("f", ["name","preposition"], [Xml.Element("symbol", ["value",value], [])]) ->
145 145 value
146 146 | xml -> failwith ("load_preposition:\n " ^ Xml.to_string_fmt xml)
147   -
148   -let load_complex_preposition = function
  147 +
  148 +let load_complex_preposition = function
149 149 | Xml.Element("f", ["name","complex_preposition"], [Xml.Element("symbol", ["value",value], [])]) ->
150 150 value
151 151 | xml -> failwith ("load_complex_preposition:\n " ^ Xml.to_string_fmt xml)
152 152  
153   -let load_aspect = function
  153 +let load_aspect = function
154 154 | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value",value], [])]) ->
155 155 value
156 156 | xml -> failwith ("load_aspect:\n " ^ Xml.to_string_fmt xml)
157 157  
158   -let load_advp = function
  158 +let load_advp = function
159 159 | Xml.Element("f", ["name","category"], [Xml.Element("symbol", ["value",value], [])]) ->
160 160 value
161 161 | xml -> failwith ("load_advp:\n " ^ Xml.to_string_fmt xml)
... ... @@ -164,14 +164,14 @@ let load_type_constrains = function
164 164 | Xml.Element("symbol", ["value",value], []) ->
165 165 value
166 166 | xml -> failwith ("load_type_constrains:\n " ^ Xml.to_string_fmt xml)
167   -
168   -
169   - (*type może mieć dodatkowo "constraints", czego chyba nie ma w dokumentacji,
  167 +
  168 +
  169 + (*type może mieć dodatkowo "constraints", czego chyba nie ma w dokumentacji,
170 170 jest to lista elementów w stylu: Xml.Element("symbol", ["value",value], []) *)
171 171 let load_type = function
172 172 | Xml.Element("f", ["name","type"],[Xml.Element("fs", ["type","type_def"], x)]) ->
173   - begin
174   - match x with
  173 + begin
  174 + match x with
175 175 | [Xml.Element("f",["name","conjunction"],
176 176 [Xml.Element("symbol",["value",value],[])])] ->
177 177 value, []
... ... @@ -184,23 +184,23 @@ let load_type = function
184 184 end
185 185 | xml -> failwith ("load_type:\n " ^ Xml.to_string_fmt xml)
186 186 (*Printf.printf "%s\n" (Xml.to_string_fmt xml)*)
187   -
  187 +
188 188 (*używam w load_lex*)
189 189 let load_lemmas_set = function
190 190 | Xml.Element("string",[], [Xml.PCData mstring]) ->
191 191 mstring
192   - | xml -> failwith ("load_lemmas_set:\n " ^ Xml.to_string_fmt xml)
193   -
  192 + | xml -> failwith ("load_lemmas_set:\n " ^ Xml.to_string_fmt xml)
  193 +
194 194 (* category ma dodakowo "constrains", czego chyba nie ma w dokumentacji
195 195 co więcej constrains zawiera w sobie zbiór typu phrases, więc jest rekurencyjne*)
196   -let rec load_category = function
  196 +let rec load_category = function
197 197 | Xml.Element("f", ["name","category"], [Xml.Element("fs", ["type","category_def"], x)]) ->
198   - begin
199   - match x with
200   - | [Xml.Element("f",["name","name"],
  198 + begin
  199 + match x with
  200 + | [Xml.Element("f",["name","name"],
201 201 [Xml.Element("symbol",["value",value],[])])] ->
202 202 value, []
203   - | [Xml.Element("f",["name","name"],
  203 + | [Xml.Element("f",["name","name"],
204 204 [Xml.Element("symbol",["value",value],[])]);
205 205 Xml.Element("f",["name","constraints"],
206 206 [Xml.Element("vColl",["org","set"],set)])] ->
... ... @@ -215,25 +215,25 @@ and load_fixed = function (* również wzajemnie rekurencyjne z load_phrase*)
215 215 FixedP(przejdz_lista_second load_phrase set, stringg)
216 216 | _ -> failwith "load_fixed:\n "
217 217  
218   -
  218 +
219 219 and load_modification_def = function (*pomocnicza do load_lex *)
220   - | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])])] ->
  220 + | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])])] ->
221 221 value, []
222 222 | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])]);
223   - Xml.Element("f",["name","positions"],[Xml.Element("vColl",["org","set"],set)])] ->
  223 + Xml.Element("f",["name","positions"],[Xml.Element("vColl",["org","set"],set)])] ->
224 224 value, przejdz_lista load_position set
225 225 | x -> Printf.printf "%s\n" (Xml.to_string_fmt (List.hd x));
226   - failwith "load_modification_def:\n"
227   -
  226 + failwith "load_modification_def:\n"
  227 +
228 228 and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*)
229   - | Xml.Element("f", ["name","argument"],[set]) ->
  229 + | Xml.Element("f", ["name","argument"],[set]) ->
230 230 {arg with phrases_list = [snd (load_phrase set)]}
231 231 (* to samo co wyżej, tylko lista*)
232 232 | Xml.Element("f", ["name","arguments"],[Xml.Element("vColl",["org","set"],set)]) ->
233 233 {arg with phrases_list = przejdz_lista_second load_phrase set}
234 234 | Xml.Element("f", ["name","modification"],[Xml.Element("fs", ["type","modification_def"],x)]) ->
235 235 {arg with modification = load_modification_def x}
236   -
  236 +
237 237 | Xml.Element("f", ["name","lemma"],[Xml.Element("fs", ["type","lemma_def"],
238 238 [Xml.Element("f",["name","selection_mode"],[Xml.Element("symbol", ["value",value1],[])]);
239 239 Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]);
... ... @@ -244,113 +244,113 @@ and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*)
244 244 Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]);
245 245 Xml.Element("f",["name","lemmas"],[Xml.Element("vColl", ["org","set"],lemmas)])])]) ->
246 246 {arg with numeral_lemma = value1, value2, przejdz_lista load_lemmas_set lemmas}
247   -
  247 +
248 248 | Xml.Element("f", ["name","negation"],[Xml.Element("symbol",["value",value],[])]) ->
249 249 {arg with negation = value}
250 250 | Xml.Element("f", ["name","degree"],[Xml.Element("symbol",["value",value],[])]) ->
251 251 {arg with degree = value}
252   - | Xml.Element("f", ["name","number"],[Xml.Element("symbol",["value",value],[])]) ->
  252 + | Xml.Element("f", ["name","number"],[Xml.Element("symbol",["value",value],[])]) ->
253 253 {arg with number = value}
254 254 | Xml.Element("f", ["name","reflex"],[Xml.Element("symbol",["value",value],[])]) ->
255 255 {arg with reflex = value}
256 256 | Xml.Element("f", ["name","reflex"],[]) ->
257 257 {arg with reflex = ""}
258 258 (*niby set, ale zawsze jest jeden element*)
259   - | Xml.Element("f", ["name","gender"],
260   - [Xml.Element("vColl", ["org","set"],[Xml.Element("symbol",["value",value],[])])]) ->
  259 + | Xml.Element("f", ["name","gender"],
  260 + [Xml.Element("vColl", ["org","set"],[Xml.Element("symbol",["value",value],[])])]) ->
261 261 {arg with gender = value}
262   - | xml ->
  262 + | xml ->
263 263 Printf.printf "%s\n" (Xml.to_string_fmt xml);
264 264 failwith "load_lex:\n "
265 265  
266   -and load_phrase xml:id * phrase =
267   - let id, idtype, x =
  266 +and load_phrase xml:id * phrase =
  267 + let id, idtype, x =
268 268 begin
269   - match xml with
  269 + match xml with
270 270 | Xml.Element("fs", ["xml:id", _id; "type", _idtype], _x) -> (_id, _idtype, _x)
271 271 | Xml.Element("fs", ["type", _idtype], _x) -> ("", _idtype, _x)
272 272 | _ -> failwith "load_phrase let id,idtype...\n"
273 273 end;
274 274 in
275 275 let id = parse_id id in
276   - match idtype, x with
  276 + match idtype, x with
277 277 | "np", [a] ->
278 278 id, NP(load_case a);
279   - | "prepnp", [a;b] ->
  279 + | "prepnp", [a;b] ->
280 280 id, PrepNP(load_preposition a, load_case b)
281   - | "adjp", [a] ->
  281 + | "adjp", [a] ->
282 282 id, AdjP(load_case a)
283   - | "prepadjp", [a;b] ->
  283 + | "prepadjp", [a;b] ->
284 284 id, PrepAdjP(load_preposition a, load_case b)
285   - | "comprepnp", [a] ->
  285 + | "comprepnp", [a] ->
286 286 id, ComprepNP(load_complex_preposition a)
287   - | "cp", [a] ->
  287 + | "cp", [a] ->
288 288 id, CP(load_type a)
289   - | "ncp", [a;b] ->
  289 + | "ncp", [a;b] ->
290 290 id, NCP(load_case a, load_type b)
291   - | "prepncp", [a;b;c] ->
  291 + | "prepncp", [a;b;c] ->
292 292 id, PrepNCP(load_preposition a, load_case b, load_type c)
293   - | "infp", [a] ->
  293 + | "infp", [a] ->
294 294 id, InfP(load_aspect a)
295   - | "xp", [a] ->
  295 + | "xp", [a] ->
296 296 id, XP(load_category a)
297   - | "advp", [a] ->
  297 + | "advp", [a] ->
298 298 id, AdvP(load_advp a)
299   -
  299 +
300 300 | "nonch", [] -> id, Nonch
301 301 | "or", [] -> id, Or
302 302 | "refl", [] -> id, Refl
303 303 | "E", [] -> id, E
304   -
305   - | "lex", x ->
  304 +
  305 + | "lex", x ->
306 306 id, Lex(przejdz_zapisz load_lex empty_lex x)
307 307 (*
308 308 Printf.printf "%d\n" (List.length x);
309 309 Printf.printf "%s\n" (Xml.to_string_fmt xml);
310 310 *)
311   - | "fixed", x ->
  311 + | "fixed", x ->
312 312 id, load_fixed x
313   -
  313 +
314 314 (*dodatkowe, nie ma ich w dokmentacji a są na poziomie 0 load_phrase*)
315 315 | "possp", [] -> id, PossP
316 316 | "recip", [] -> id, Recip
317 317 | "distrp", [] -> id, DistrP
318 318 | "compar", [Xml.Element("f",["name","compar_category"],
319 319 [Xml.Element("symbol",["value",value],[])])] -> id, ComparP(value)
320   -
  320 +
321 321 (* dodatkowe: (gerp i prepgerp) są w dokumentacji,
322 322 i pojawiają się po rekurencyjnym wywołaniu z funkcji load_lex
323 323 podobne kolejno do: np, prepnp*)
324 324 | "gerp", [a] ->
325 325 id, GerP(load_case a)
326   - | "prepgerp", [a;b] ->
  326 + | "prepgerp", [a;b] ->
327 327 id, PrepGerP(load_preposition a, load_case b)
328 328 (*inne dodatkowe które też są powywołaniu z load_lex*)
329   - | "nump", [a] ->
  329 + | "nump", [a] ->
330 330 id, NumP(load_case a)
331 331 | "prepnump", [a;b] ->
332 332 id, PrepNumP(load_preposition a, load_case b)
333   - | "ppasp", [a] ->
  333 + | "ppasp", [a] ->
334 334 id, PpasP(load_case a)
335 335 | "prepppasp", [a;b] ->
336 336 id, PrepPpasP(load_preposition a, load_case b)
337 337 | "qub", [] ->
338 338 id, Qub
339   -
  339 +
340 340 (*dodatkowe, po wywołaniu z load_position *)
341   - | "pactp", [a] ->
  341 + | "pactp", [a] ->
342 342 id, PactP(load_case a)
343   -
344   -
  343 +
  344 +
345 345 | _ -> failwith ("load_phrase match:\n " ^ Xml.to_string_fmt xml)
346   -
347   -
  346 +
  347 +
348 348 and load_control = function
349 349 | Xml.Element("symbol", ["value", value], []) ->
350 350 value
351   - | xml -> failwith ("load_control:\n " ^ Xml.to_string_fmt xml)
352   -
353   -and load_position_info arg = function
  351 + | xml -> failwith ("load_control:\n " ^ Xml.to_string_fmt xml)
  352 +
  353 +and load_position_info arg = function
354 354 | Xml.Element("f",["name", "function"], [Xml.Element("symbol",["value", value],[])]) ->
355 355 {arg with gf = value}
356 356 | Xml.Element("f",["name", "phrases"], [Xml.Element("vColl",["org", "set"], phrases_set)]) ->
... ... @@ -358,8 +358,8 @@ and load_position_info arg = function
358 358 | Xml.Element("f",["name", "control"], [Xml.Element("vColl",["org", "set"], control_set)]) ->
359 359 {arg with control = (przejdz_lista load_control control_set)}
360 360 | xml -> failwith ("load_position_info:\n " ^ Xml.to_string_fmt xml)
361   -
362   -and load_position = function
  361 +
  362 +and load_position = function
363 363 | Xml.Element("fs", ["xml:id", id; "type","position"], listt) ->
364 364 let id = parse_id id in
365 365 let result = {psn_id = id; gf = ""; phrases = []; control = []} in
... ... @@ -385,7 +385,7 @@ let load_schema_info arg = function
385 385 | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value", aspect_value],[])]) ->
386 386 {arg with aspect = aspect_value}
387 387 | Xml.Element("f", ["name","aspect"], []) ->
388   - arg
  388 + arg
389 389 | Xml.Element("f", ["name","negativity"], [Xml.Element("symbol", ["value", negativity_value],[])]) ->
390 390 {arg with negativity = negativity_value}
391 391 | Xml.Element("f", ["name","negativity"], []) ->
... ... @@ -406,18 +406,18 @@ let load_schema = function
406 406 let result = przejdz_zapisz load_schema_info result schema in
407 407 result
408 408 | xml -> failwith ("load_schema:\n " ^ Xml.to_string_fmt xml)
409   -
  409 +
410 410 let load_syntactic = function
411   - Xml.Element("f", ["name", "schemata"],
  411 + Xml.Element("f", ["name", "schemata"],
412 412 [Xml.Element("vColl", ["org","set"], schemata_set)]) ->
413 413 przejdz_lista load_schema schemata_set
414 414 | xml -> failwith ("load_syntactic:\n " ^ Xml.to_string_fmt xml)
415   -(* Koniec kodu do wczytywania syntaticLayer *)
  415 +(* Koniec kodu do wczytywania syntaticLayer *)
416 416  
417 417  
418   -(* Początek kodu do wczytywania examplesLayer *)
  418 +(* Początek kodu do wczytywania examplesLayer *)
419 419  
420   -type example = {exm_id: id;
  420 +type example = {exm_id: id;
421 421 meaning: string; (*id*)
422 422 phrases: id list; (*zbiór id!!!*)
423 423 sentence: string;
... ... @@ -426,7 +426,7 @@ type example = {exm_id: id;
426 426 note: string}
427 427  
428 428  
429   -let load_phrases_set = function
  429 +let load_phrases_set = function
430 430 | Xml.Element("fs", ["sameAs", same_as; "type","phrase"], []) ->
431 431 let p = parse_full_id same_as in
432 432 {p with numbers = List.tl p.numbers}
... ... @@ -451,42 +451,42 @@ let load_example_info arg = function
451 451  
452 452 let load_example = function
453 453 | Xml.Element("fs", ["xml:id", id; "type", "example"], example_elements) ->
454   - let id = parse_id id in
455   - let result = {exm_id = id; meaning = ""; phrases = []; sentence = "";
  454 + let id = parse_id id in
  455 + let result = {exm_id = id; meaning = ""; phrases = []; sentence = "";
456 456 source = ""; opinion = ""; note = "";} in
457 457 let result = przejdz_zapisz load_example_info result example_elements in
458 458 result
459 459 | xml -> failwith ("load_example :\n " ^ Xml.to_string_fmt xml)
460   -
  460 +
461 461 let load_examples = function
462 462 | Xml.Element("f", ["name","examples"], [Xml.Element("vColl", ["org", "set"], examples_set)]) ->
463   - przejdz_lista load_example examples_set
  463 + przejdz_lista load_example examples_set
464 464 | xml -> failwith ("load_examples:\n " ^ Xml.to_string_fmt xml)
465   -
  465 +
466 466 (* Koniec kodu do wczytywania examplesLayer *)
467   -
468 467  
469 468  
470   -(* Początek kodu do wczytywania semanticLayer *)
471 469  
472   -type sel_prefs =
  470 +(* Początek kodu do wczytywania semanticLayer *)
  471 +
  472 +type sel_prefs =
473 473 Numeric of string
474 474 | Symbol of string
475 475 | Relation of string * string
476   -
  476 +
477 477 type argument = {arg_id: id;
478 478 role: string;
479 479 role_attribute: string;
480 480 sel_prefs: sel_prefs list list}
481   -
  481 +
482 482 type frame = {frm_id: id;
483 483 opinion: string;
484 484 meanings: string list;
485 485 arguments: argument list}
486 486  
487 487  
488   -let load_self_prefs_sets = function
489   - | Xml.Element("numeric", ["value",value],[]) ->
  488 +let load_self_prefs_sets = function
  489 + | Xml.Element("numeric", ["value",value],[]) ->
490 490 Numeric(value)
491 491 | Xml.Element("symbol", ["value",value],[]) ->
492 492 Symbol(value)
... ... @@ -495,54 +495,54 @@ let load_self_prefs_sets = function
495 495 Xml.Element("f", ["name", "to"],[Xml.Element("fs", ["sameAs", same_as; "type", "argument"], [])])]) ->
496 496 Relation(value,same_as)
497 497 | xml -> failwith ("load_self_prefs_sets :\n " ^ Xml.to_string_fmt xml)
498   -
  498 +
499 499 let load_argument_self_prefs = function
500   - | Xml.Element("f", ["name", name], [Xml.Element("vColl", ["org","set"], self_prefs_set)]) ->
  500 + | Xml.Element("f", ["name", name], [Xml.Element("vColl", ["org","set"], self_prefs_set)]) ->
501 501 przejdz_lista load_self_prefs_sets self_prefs_set
502 502 | xml -> failwith ("load_argument_self_prefs :\n " ^ Xml.to_string_fmt xml)
503   -
  503 +
504 504 let load_argument_info arg = function
505   - | Xml.Element("f", ["name","role"], [Xml.Element("symbol", ["value",value],[])]) ->
  505 + | Xml.Element("f", ["name","role"], [Xml.Element("symbol", ["value",value],[])]) ->
506 506 {arg with role = value}
507   - | Xml.Element("f", ["name","role_attribute"], [Xml.Element("symbol", ["value",value],[])]) ->
  507 + | Xml.Element("f", ["name","role_attribute"], [Xml.Element("symbol", ["value",value],[])]) ->
508 508 {arg with role_attribute = value}
509   - | Xml.Element("f", ["name","sel_prefs"], [Xml.Element("fs", ["type","sel_prefs_groups"], self_prefs)]) ->
  509 + | Xml.Element("f", ["name","sel_prefs"], [Xml.Element("fs", ["type","sel_prefs_groups"], self_prefs)]) ->
510 510 {arg with sel_prefs = przejdz_lista load_argument_self_prefs self_prefs}
511 511 | xml -> failwith ("load_argument_info :\n " ^ Xml.to_string_fmt xml)
512 512  
513 513 let load_arguments_set = function
514   - | Xml.Element("fs", ["xml:id", id; "type", "argument"], info) ->
  514 + | Xml.Element("fs", ["xml:id", id; "type", "argument"], info) ->
515 515 let id = parse_id id in
516 516 let result = {arg_id = id; role = ""; role_attribute = ""; sel_prefs = []} in
517   - let result = przejdz_zapisz load_argument_info result info in
  517 + let result = przejdz_zapisz load_argument_info result info in
518 518 result
519 519 | xml -> failwith ("load_arguments_set :\n " ^ Xml.to_string_fmt xml)
520 520  
521 521 let load_meanings_set = function
522   - | Xml.Element("fs", ["sameAs", same_As; "type", "lexical_unit"], []) ->
  522 + | Xml.Element("fs", ["sameAs", same_As; "type", "lexical_unit"], []) ->
523 523 same_As
524 524 | xml -> failwith ("load_meanings_set :\n " ^ Xml.to_string_fmt xml)
525   -
  525 +
526 526 let load_frame = function
527 527 | Xml.Element("fs", ["xml:id", id; "type", "frame"], [
528 528 Xml.Element("f", ["name", "opinion"], [Xml.Element("symbol", ["value", opinion],[])]);
529 529 Xml.Element("f", ["name", "meanings"], [Xml.Element("vColl", ["org", "set"], meanings_set)]);
530   - Xml.Element("f", ["name", "arguments"], [Xml.Element("vColl", ["org", "set"], arguments_set)])]) ->
  530 + Xml.Element("f", ["name", "arguments"], [Xml.Element("vColl", ["org", "set"], arguments_set)])]) ->
531 531 let id = parse_id id in
532 532 {frm_id = id;
533 533 opinion = opinion;
534 534 meanings = przejdz_lista load_meanings_set meanings_set;
535 535 arguments = przejdz_lista load_arguments_set arguments_set}
536 536 | xml -> failwith ("load_frame :\n " ^ Xml.to_string_fmt xml)
537   -
  537 +
538 538 let load_semantic = function
539   - | Xml.Element("f", ["name","frames"], [Xml.Element("vColl", ["org", "set"], frame_set)]) ->
  539 + | Xml.Element("f", ["name","frames"], [Xml.Element("vColl", ["org", "set"], frame_set)]) ->
540 540 przejdz_lista load_frame frame_set
541 541 | xml -> failwith ("load_semantic:\n " ^ Xml.to_string_fmt xml)
542   -(* Koniec kodu do wczytywania semanticLayer *)
543   -
  542 +(* Koniec kodu do wczytywania semanticLayer *)
544 543  
545   -(* Początek do wczytywania meaningsLayer *)
  544 +
  545 +(* Początek do wczytywania meaningsLayer *)
546 546  
547 547 type meaning = {mng_id: id;
548 548 name: string;
... ... @@ -558,21 +558,21 @@ let empty_meaning = {mng_id = empty_id;
558 558  
559 559  
560 560  
561   -let load_meaning_info arg = function
562   - | Xml.Element("f", ["name", "name"], [Xml.Element("string", [], [Xml.PCData name_string])]) ->
  561 +let load_meaning_info arg = function
  562 + | Xml.Element("f", ["name", "name"], [Xml.Element("string", [], [Xml.PCData name_string])]) ->
563 563 {arg with name = name_string}
564 564 | Xml.Element("f", ["name", "variant"], [Xml.Element("string", [], [Xml.PCData variant_string])]) ->
565 565 {arg with variant = variant_string}
566   - | Xml.Element("f", ["name", "plwnluid"], [Xml.Element("numeric", ["value",value],[])]) ->
  566 + | Xml.Element("f", ["name", "plwnluid"], [Xml.Element("numeric", ["value",value],[])]) ->
567 567 {arg with plwnluid = value}
568   - | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [Xml.PCData gloss_string])]) ->
  568 + | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [Xml.PCData gloss_string])]) ->
569 569 {arg with gloss = gloss_string}
570   - | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [])]) ->
  570 + | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [])]) ->
571 571 arg
572 572 | xml -> failwith ("load_meaning_info:\n " ^ Xml.to_string_fmt xml)
573 573  
574 574  
575   -let load_meaning = function
  575 +let load_meaning = function
576 576 | Xml.Element("fs", ["xml:id", id; "type", "lexical_unit"], meaning_info) ->
577 577 let id = parse_id id in
578 578 let result = empty_meaning in
... ... @@ -581,44 +581,44 @@ let load_meaning = function
581 581 result
582 582 | xml -> failwith ("load_meaning:\n " ^ Xml.to_string_fmt xml)
583 583  
584   -(* Koniec kodu do wczytywania meaningsLayer *)
  584 +(* Koniec kodu do wczytywania meaningsLayer *)
585 585  
586   -(* Początek kodu do wczytywania connectionsLayer *)
  586 +(* Początek kodu do wczytywania connectionsLayer *)
587 587  
588 588 type connection = {argument: string;
589 589 phrases: string list}
590   -
591   -type alternation = {connections: connection list}
  590 +
  591 +type alternation = {connections: connection list}
592 592  
593 593  
594 594 let load_phrases_connections = function
595   - | Xml.Element("fs", ["sameAs",sameAs; "type", "phrase"], []) ->
  595 + | Xml.Element("fs", ["sameAs",sameAs; "type", "phrase"], []) ->
596 596 sameAs
597 597 | xml -> failwith ("load_phrases_connections: \n " ^ Xml.to_string_fmt xml)
598 598  
599   -let load_alter_connection = function
  599 +let load_alter_connection = function
600 600 | Xml.Element("fs", ["type","connection"], [
601 601 Xml.Element("f", ["name", "argument"], [Xml.Element("fs", ["sameAs",sameAs; "type","argument"],[])]);
602 602 Xml.Element("f", ["name", "phrases"], [Xml.Element("vColl", ["org","set";], phrases)])]) ->
603 603 {argument = sameAs; phrases = (przejdz_lista load_phrases_connections phrases)}
604 604 | xml -> failwith ("load_alter_connections: \n " ^ Xml.to_string_fmt xml)
605 605  
606   -let load_alternations = function
607   - | Xml.Element("fs", ["type","alternation"],
  606 +let load_alternations = function
  607 + | Xml.Element("fs", ["type","alternation"],
608 608 [Xml.Element("f", ["name", "connections"], [Xml.Element("vColl", ["org", "set"], connections_set)])]) ->
609 609 {connections = przejdz_lista load_alter_connection connections_set}
610 610 | xml -> failwith ("load_alternations: \n " ^ Xml.to_string_fmt xml)
611 611  
612 612  
613   -let load_connections = function
  613 +let load_connections = function
614 614 | Xml.Element("f", ["name","alternations"], [Xml.Element("vColl", ["org", "set"], alternations)]) ->
615 615 przejdz_lista load_alternations alternations
616 616 | xml -> failwith ("load_connections: \n " ^ Xml.to_string_fmt xml)
617 617  
618 618  
619   -(* Koniec kodu do wczytywania connectionsLayer *)
  619 +(* Koniec kodu do wczytywania connectionsLayer *)
620 620  
621   -type entry = {ent_id: id;
  621 +type entry = {ent_id: id;
622 622 form_orth: string;
623 623 form_pos: string;
624 624 schemata: schema list;
... ... @@ -627,7 +627,7 @@ type entry = {ent_id: id;
627 627 meanings: meaning list;
628 628 alternations: alternation list}
629 629  
630   -let load_entry = function
  630 +let load_entry = function
631 631 | Xml.Element("entry",["xml:id",id], [
632 632 Xml.Element("form", [], [
633 633 Xml.Element("orth",[],[Xml.PCData orth]);
... ... @@ -635,10 +635,10 @@ let load_entry = function
635 635 Xml.Element("fs", ["type","syntactic_layer"], [syntactics]);
636 636 Xml.Element("fs", ["type","examples_layer"], [examples]);
637 637 Xml.Element("fs", ["type","semantic_layer"], [semantic]);
638   - Xml.Element("fs", ["type","meanings_layer"],
639   - [Xml.Element("f", ["name","meanings"],
  638 + Xml.Element("fs", ["type","meanings_layer"],
  639 + [Xml.Element("f", ["name","meanings"],
640 640 [Xml.Element("vColl", ["org", "set"], meanings_set)])]);
641   - Xml.Element("fs", ["type","connections_layer"],[connections])]) ->
  641 + Xml.Element("fs", ["type","connections_layer"],[connections])]) ->
642 642 let id = parse_id id in
643 643 {ent_id = id;
644 644 form_orth = orth;
... ... @@ -653,7 +653,7 @@ let load_entry = function
653 653 Xml.Element("orth",[],[Xml.PCData orth]);
654 654 Xml.Element("pos",[],[Xml.PCData pos])]);
655 655 Xml.Element("fs", ["type","syntactic_layer"], [syntactics]);
656   - Xml.Element("fs", ["type","examples_layer"], [examples])]) ->
  656 + Xml.Element("fs", ["type","examples_layer"], [examples])]) ->
657 657 let id = parse_id id in
658 658 {ent_id = id;
659 659 form_orth = orth;
... ... @@ -669,8 +669,8 @@ let load_entry = function
669 669 let load_walenty filename:entry list =
670 670 begin
671 671 match Xml.parse_file filename with
672   - Xml.Element("TEI", _,
673   - [Xml.Element("teiHeader",_,_) ;
  672 + Xml.Element("TEI", _,
  673 + [Xml.Element("teiHeader",_,_) ;
674 674 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
675 675 przejdz_lista load_entry entries
676 676 | _ -> failwith "load_walenty"
... ... @@ -682,7 +682,7 @@ let load_walenty filename:entry list =
682 682 (* let _ = Printf.printf "loading: OK\n" *)
683 683  
684 684 (* ******************************************* *)
685   -
  685 +
686 686  
687 687  
688 688  
... ... @@ -697,25 +697,25 @@ module StringMap = Map.Make(String)
697 697  
698 698 let cnt = ref 0;;
699 699  
700   -let add_new map meaning =
  700 +let add_new map meaning =
701 701 let num_id = match meaning.mng_id with
702 702 {hash=false; numbers=[num_id]; suffix="mng"} -> num_id
703   - | _ -> failwith "zła składnia id" in
704   - if StringMap.mem num_id map then
  703 + | _ -> failwith "zła składnia id" in
  704 + if StringMap.mem num_id map then
705 705 (Printf.printf "okkk\n";
706 706 let meaning2 = StringMap.find num_id map in
707 707 if meaning = meaning2 then map else
708 708 failwith "różne1111")
709 709 else (cnt:=!cnt+1;StringMap.add num_id meaning map)
710   -
  710 +
711 711 let check_entry_menaings mapa entry =
712 712 przejdz_zapisz add_new mapa entry.meanings
713 713  
714   -let check_meanings walenty =
  714 +let check_meanings walenty =
715 715 przejdz_zapisz check_entry_menaings StringMap.empty walenty
716 716  
717 717 (*
718   -let _ = check_meanings walenty
  718 +let _ = check_meanings walenty
719 719 let _ = Printf.printf "meaning map.size: %d\n" !cnt
720 720 *)
721 721  
... ... @@ -724,27 +724,27 @@ let _ = Printf.printf &quot;meaning map.size: %d\n&quot; !cnt
724 724 let cnt = ref 0;;
725 725  
726 726 (*arg_id*)
727   -let add_new map argument =
  727 +let add_new map argument =
728 728 let arg_id = match argument.arg_id with
729 729 {hash=false; numbers=[num_id]; suffix="arg"} -> num_id
730   - | _ -> failwith "zła składnia id" in
731   - if StringMap.mem arg_id map then
  730 + | _ -> failwith "zła składnia id" in
  731 + if StringMap.mem arg_id map then
732 732 (Printf.printf "okkk\n";
733 733 let val2 = StringMap.find arg_id map in
734 734 let val1 = argument in
735 735 if val1 = val2 then map else
736 736 failwith "różne1111")
737 737 else (cnt:=!cnt+1; StringMap.add arg_id argument map)
738   -
739 738  
740 739  
741   -let check_frame mapa frame =
742   - przejdz_zapisz add_new mapa frame.arguments
  740 +
  741 +let check_frame mapa frame =
  742 + przejdz_zapisz add_new mapa frame.arguments
743 743  
744 744 let check_entry_frames mapa entry =
745 745 przejdz_zapisz check_frame mapa entry.frames
746 746  
747   -let check_meanings walenty =
  747 +let check_meanings walenty =
748 748 przejdz_zapisz check_entry_frames StringMap.empty walenty
749 749  
750 750 (*
... ... @@ -756,22 +756,22 @@ let _ = Printf.printf &quot;entry.frame.argument map.size: %d\n&quot; !cnt
756 756  
757 757 let cnt = ref 0;;
758 758 (*frm_id*)
759   -let add_new map frame =
  759 +let add_new map frame =
760 760 let id = match frame.frm_id with
761 761 {hash=false; numbers=[num_id]; suffix="frm"} -> num_id
762   - | _ -> failwith "zła składnia id" in
763   - if StringMap.mem id map then
  762 + | _ -> failwith "zła składnia id" in
  763 + if StringMap.mem id map then
764 764 (Printf.printf "okkk\n";
765 765 let val2 = StringMap.find id map in
766 766 let val1 = frame in
767 767 if val1 = val2 then map else
768 768 failwith "różne1111")
769 769 else (cnt:=!cnt+1; StringMap.add id frame map)
770   -
  770 +
771 771 let check_entry_frames mapa entry =
772 772 przejdz_zapisz add_new mapa entry.frames
773 773  
774   -let check_meanings walenty =
  774 +let check_meanings walenty =
775 775 przejdz_zapisz check_entry_frames StringMap.empty walenty
776 776  
777 777 (*
... ... @@ -783,7 +783,7 @@ let _ = Printf.printf &quot;entry.frame map.size: %d\n&quot; !cnt
783 783  
784 784 (*examplesLayer*)
785 785 (*na razie zeruje phrases!!!!*)
786   -let print_example example =
  786 +let print_example example =
787 787 Printf.printf "meaning: %s\n phrases: " example.meaning;
788 788 (* print_endline (String.concat "; " example.phrases);*)
789 789 Printf.printf "sentence: %s\n" example.sentence;
... ... @@ -796,31 +796,31 @@ let print_example example =
796 796 let cnt = ref 0;;
797 797 let takiesame = ref 0;;
798 798  
799   -let add_new map example =
  799 +let add_new map example =
800 800 let id = match example.exm_id with
801 801 {hash=false; numbers=[num_id]; suffix="exm"} -> num_id
802   - | _ -> failwith "zła składnia id" in
  802 + | _ -> failwith "zła składnia id" in
803 803 (* let example = {example with phrases = []} in (*uwaga!!!! zeruje phrases!!!*)*)
804 804 let example = {example with meaning = ""} in (*uwaga!!!! zeruje meaning!!!*)
805   - if StringMap.mem id map then
  805 + if StringMap.mem id map then
806 806 (takiesame:=!takiesame+1;
807 807 (* Printf.printf "okkk\n";*)
808 808 let val2 = StringMap.find id map in
809 809 let val1 = example in
810 810 if val1 = val2 then map else
811   - let _ = print_example val1 in
  811 + let _ = print_example val1 in
812 812 let _ = print_example val2 in
813 813 failwith "różne1111")
814 814 else (cnt:=!cnt+1; StringMap.add id example map)
815   -
  815 +
816 816 let check_entry_example mapa entry =
817 817 przejdz_zapisz add_new mapa entry.examples
818 818  
819   -let check_meanings walenty =
  819 +let check_meanings walenty =
820 820 przejdz_zapisz check_entry_example StringMap.empty walenty
821 821  
822 822 (*
823   -let _ = check_meanings walenty
  823 +let _ = check_meanings walenty
824 824 let _ = Printf.printf "examples map.size: %d takich samych: %d\n" !cnt !takiesame
825 825 *)
826 826  
... ... @@ -831,12 +831,12 @@ let _ = Printf.printf &quot;examples map.size: %d takich samych: %d\n&quot; !cnt !takiesam
831 831 let cnt = ref 0;;
832 832 let takiesame = ref 0;;
833 833  
834   -let add_new map position =
  834 +let add_new map position =
835 835 let id = match position.psn_id with
836 836 {hash=false; numbers=[num_id]; suffix="psn"} -> num_id
837   - | _ -> failwith "zła składnia id" in
838   -(* let position = {position with phrases = przejdz_lista (fun (x,y) -> (parse_id "",y)) position.phrases} in*) (*uwaga!!!!*)
839   - if StringMap.mem id map then
  837 + | _ -> failwith "zła składnia id" in
  838 +(* let position = {position with phrases = przejdz_lista (fun (x,y) -> (parse_id "",y)) position.phrases} in*) (*uwaga!!!!*)
  839 + if StringMap.mem id map then
840 840 (takiesame:=!takiesame+1;
841 841 (* Printf.printf "okkk\n";*)
842 842 let val2 = StringMap.find id map in
... ... @@ -844,18 +844,18 @@ let add_new map position =
844 844 if val1 = val2 then map else
845 845 failwith "różne1111")
846 846 else (cnt:=!cnt+1; StringMap.add id position map)
847   -
848   -let check_schema mapa schema =
  847 +
  848 +let check_schema mapa schema =
849 849 przejdz_zapisz add_new mapa schema.positions
850   -
  850 +
851 851 let check_entry mapa entry =
852 852 przejdz_zapisz check_schema mapa entry.schemata
853 853  
854   -let check walenty =
  854 +let check walenty =
855 855 przejdz_zapisz check_entry StringMap.empty walenty
856 856  
857 857 (*
858   -let _ = check walenty
  858 +let _ = check walenty
859 859 let _ = Printf.printf "syntactic...position map.size: %d takich samych: %d\n" !cnt !takiesame
860 860 *)
861 861  
... ... @@ -864,8 +864,8 @@ let _ = Printf.printf &quot;syntactic...position map.size: %d takich samych: %d\n&quot; !c
864 864 let cnt = ref 0;;
865 865 let takiesame = ref 0;;
866 866  
867   -(*let clear_id (position:position) =
868   - let position = {position with phrases = [](*przejdz_lista (fun (x,y) -> (empty_id,y)) position.phrases*)} in (*uwaga!!!!*)
  867 +(*let clear_id (position:position) =
  868 + let position = {position with phrases = [](*przejdz_lista (fun (x,y) -> (empty_id,y)) position.phrases*)} in (*uwaga!!!!*)
869 869 let position = {position with psn_id = empty_id} in
870 870 position*)
871 871  
... ... @@ -876,12 +876,12 @@ let print_schema (schema:schema) =
876 876 Printf.printf "schema.negativity= %s\n" schema.negativity;
877 877 Printf.printf "schema.predicativity= %s\n___________________\n" schema.predicativity
878 878  
879   -let add_new map schema =
  879 +let add_new map schema =
880 880 let id = match schema.sch_id with
881 881 {hash=false; numbers=[num_id]; suffix="sch"} -> num_id
882   - | _ -> failwith "zła składnia id" in
  882 + | _ -> failwith "zła składnia id" in
883 883 let schema = {schema with opinion = ""} in (*uwaga, zeruje opinie!!!*)
884   - if StringMap.mem id map then
  884 + if StringMap.mem id map then
885 885 (takiesame:=!takiesame+1;
886 886 (* Printf.printf "okkk\n";*)
887 887 let val2 = StringMap.find id map in
... ... @@ -893,14 +893,14 @@ let add_new map schema =
893 893 else (cnt:=!cnt+1; StringMap.add id schema map)
894 894  
895 895  
896   -
897   -let check_schema mapa schema =
  896 +
  897 +let check_schema mapa schema =
898 898 add_new mapa schema
899   -
  899 +
900 900 let check_entry mapa entry =
901 901 przejdz_zapisz check_schema mapa entry.schemata
902 902  
903   -let check walenty =
  903 +let check walenty =
904 904 przejdz_zapisz check_entry StringMap.empty walenty
905 905  
906 906 (*
... ... @@ -913,11 +913,11 @@ let _ = Printf.printf &quot;syntactic...schema map.size: %d takich samych: %d\n&quot; !cnt
913 913 let cnt = ref 0;;
914 914 let takiesame = ref 0;;
915 915  
916   -let add_new map (id, phrase) =
  916 +let add_new map (id, phrase) =
917 917 let id = match id with
918 918 {hash=false; numbers=[num_id]; suffix="phr"} -> num_id
919   - | _ -> failwith "zła składnia id" in
920   - if StringMap.mem id map then
  919 + | _ -> failwith "zła składnia id" in
  920 + if StringMap.mem id map then
921 921 (takiesame:=!takiesame+1;
922 922 (* Printf.printf "okkk\n";*)
923 923 let val2 = StringMap.find id map in
... ... @@ -926,20 +926,20 @@ let add_new map (id, phrase) =
926 926 failwith "różne1111")
927 927 else (cnt:=!cnt+1; StringMap.add id phrase map)
928 928  
929   -let check_pos mapa (position:position) =
  929 +let check_pos mapa (position:position) =
930 930 przejdz_zapisz add_new mapa position.phrases
931   -
932   -let check_schema mapa schema =
  931 +
  932 +let check_schema mapa schema =
933 933 przejdz_zapisz check_pos mapa schema.positions
934   -
  934 +
935 935 let check_entry mapa entry =
936 936 przejdz_zapisz check_schema mapa entry.schemata
937 937  
938   -let check walenty =
  938 +let check walenty =
939 939 przejdz_zapisz check_entry StringMap.empty walenty
940   -
  940 +
941 941 *)
942   -
  942 +
943 943 (*
944 944 let _ = check walenty
945 945 let _ = Printf.printf "syntactic...phrases map.size: %d takich samych: %d\n" !cnt !takiesame
... ... @@ -967,7 +967,7 @@ let rec parse_comp = function
967 967 | "rel",l -> WalTypes.Rel, Xlist.map l (fun s -> WalTypes.Comp s)
968 968 | s,[] -> WalTypes.CompTypeUndef,[WalTypes.Comp s]
969 969 | _ -> failwith "parse_comp"
970   -
  970 +
971 971  
972 972 let rec morf_of_phrase = function
973 973 NP c -> WalTypes.Phrase (WalTypes.NP(WalParser.parse_case [WalTypes.Text c]))
... ... @@ -995,7 +995,7 @@ let rec morf_of_phrase = function
995 995 | Lex lex -> (*print_endline "lex";*) WalTypes.Phrase (WalTypes.Null) (* FIXME: ni *)
996 996 | Null -> WalTypes.Phrase (WalTypes.Null)
997 997 | _ -> failwith "morf_of_phrase"
998   -
  998 +
999 999 (* | GerP(c) -> WalTypes.Phrase (WalTypes.
1000 1000 | PrepGerP(prep,c) -> WalTypes.Phrase (WalTypes.
1001 1001 | PpasP(c) -> WalTypes.Phrase (WalTypes.
... ... @@ -1003,21 +1003,21 @@ let rec morf_of_phrase = function
1003 1003 | PPact(c) -> WalTypes.Phrase (WalTypes.
1004 1004 | PrepPactP(prep,c) -> WalTypes.Phrase (WalTypes.
1005 1005 | Qub -> WalTypes.Phrase (WalTypes.*)
1006   -
  1006 +
1007 1007  
1008 1008 open WalTypes
1009 1009  
1010   -let process_phrases phrases =
  1010 +let process_phrases phrases =
1011 1011 Xlist.fold phrases StringMap.empty (fun phrases (id,phrase) ->
1012   - let id =
  1012 + let id =
1013 1013 match id with
1014 1014 {hash=false; numbers=[(*_;_;_;*)id]; suffix="phr"} -> id
1015   - | _ -> failwith "process_phrases" in
  1015 + | _ -> failwith "process_phrases" in
1016 1016 StringMap.add phrases id phrase)
1017 1017  
1018 1018 let process_positions positions =
1019   - Xlist.fold positions StringMap.empty (fun positions position ->
1020   - let id =
  1019 + Xlist.fold positions StringMap.empty (fun positions position ->
  1020 + let id =
1021 1021 match position.psn_id with
1022 1022 {hash=false; numbers=[(*_;_;*)id]; suffix="psn"} -> id
1023 1023 | _ -> failwith "process_positions" in
... ... @@ -1027,7 +1027,7 @@ let process_positions positions =
1027 1027  
1028 1028 let process_schemata schemata =
1029 1029 Xlist.fold schemata StringMap.empty (fun schemata schema ->
1030   - let id =
  1030 + let id =
1031 1031 match schema.sch_id with
1032 1032 {hash=false; numbers=[(*_;*)id]; suffix="sch"} -> id
1033 1033 | _ -> failwith "process_schemata" in
... ... @@ -1043,75 +1043,75 @@ let process_schemata schemata =
1043 1043 let add_meanings meanings = function
1044 1044 DefaultAtrs(_,r,o,n,p,a) -> DefaultAtrs(meanings,r,o,n,p,a)
1045 1045 | _ -> failwith "add_meanings"
1046   -
  1046 +
1047 1047 let process_arguments arguments =
1048   - Xlist.fold arguments StringMap.empty (fun arguments argument ->
1049   - let id =
  1048 + Xlist.fold arguments StringMap.empty (fun arguments argument ->
  1049 + let id =
1050 1050 match argument.arg_id with
1051 1051 {hash=false; numbers=[(*_;_;*)id]; suffix="arg"} -> id
1052 1052 | _ -> failwith "process_arguments" in
1053   - StringMap.add arguments id (argument.role,argument.role_attribute,argument.sel_prefs))
1054   -
  1053 + StringMap.add arguments id (argument.role,argument.role_attribute,argument.sel_prefs))
  1054 +
1055 1055 let get_meaning_id meaning =
1056 1056 match parse_full_id meaning with
1057 1057 {hash=true; numbers=[_;id]; suffix="mng"} -> id
1058 1058 | _ -> failwith "get_meaning_id"
1059   -
1060   -let get_schema_id alt =
  1059 +
  1060 +let get_schema_id alt =
1061 1061 try
1062 1062 match parse_full_id (List.hd ((List.hd alt.connections).phrases)) with
1063 1063 {hash=true; numbers=[_;id;_;_]; suffix="phr"} -> id
1064 1064 | _ -> failwith "get_schema_id 1"
1065 1065 with _ -> failwith "get_schema_id 2"
1066   -
1067   -let get_frame_id alt =
  1066 +
  1067 +let get_frame_id alt =
1068 1068 try
1069 1069 match parse_full_id ((List.hd alt.connections).argument) with
1070 1070 {hash=true; numbers=[_;id;_]; suffix="arg"} -> id
1071 1071 | _ -> failwith "get_frame_id"
1072 1072 with _ -> failwith "get_frame_id"
1073   -
1074   -let get_argument_id arg =
  1073 +
  1074 +let get_argument_id arg =
1075 1075 match parse_full_id arg with
1076 1076 {hash=true; numbers=[_;_;id]; suffix="arg"} -> id
1077 1077 | _ -> failwith "get_argument_id"
1078   -
1079   -let get_position_id phrases =
  1078 +
  1079 +let get_position_id phrases =
1080 1080 try
1081 1081 match parse_full_id (List.hd phrases) with
1082 1082 {hash=true; numbers=[_;_;id;_]; suffix="phr"} -> id
1083 1083 | _ -> failwith "get_position_id"
1084 1084 with _ -> failwith "get_position_id"
1085   -
1086   -let get_phrase_id arg =
  1085 +
  1086 +let get_phrase_id arg =
1087 1087 match parse_full_id arg with
1088 1088 {hash=true; numbers=[_;_;_;id]; suffix="phr"} -> id
1089 1089 | _ -> failwith "get_phrase_id"
1090   -
  1090 +
1091 1091 let process_frames frames =
1092 1092 Xlist.fold frames StringMap.empty (fun frames frame ->
1093   - let id =
  1093 + let id =
1094 1094 match frame.frm_id with
1095 1095 {hash=false; numbers=[(*_;*)id]; suffix="frm"} -> id
1096 1096 | _ -> failwith "process_frames" in
1097 1097 let arguments = process_arguments frame.arguments in
1098 1098 let meaning_ids = Xlist.map frame.meanings get_meaning_id in
1099 1099 StringMap.add frames id (meaning_ids,arguments))
1100   -
1101   -let process_meanings meanings =
  1100 +
  1101 +let process_meanings meanings =
1102 1102 Xlist.fold meanings StringMap.empty (fun meanings meaning ->
1103   - let id =
  1103 + let id =
1104 1104 match meaning.mng_id with
1105 1105 {hash=false; numbers=[(*_;*)id]; suffix="mng"} -> id
1106 1106 | _ -> failwith "process_meanings" in
1107 1107 StringMap.add meanings id (meaning.name ^ " " ^ meaning.variant))
1108   -
  1108 +
1109 1109 let process_sel_pref = function
1110 1110 Numeric s -> (try PreWordnet.synset_name s with Not_found -> "unknown")
1111   - | Symbol s -> s
  1111 + | Symbol s -> s
1112 1112 | Relation(s,t) -> "REL" (* FIXME *)
1113   -
1114   -let connect entry =
  1113 +
  1114 +let connect entry =
1115 1115 let schemata = process_schemata entry.schemata in
1116 1116 let frames = process_frames entry.frames in
1117 1117 let meanings = process_meanings entry.meanings in
... ... @@ -1133,12 +1133,12 @@ let connect entry =
1133 1133 let sel_prefs = Xlist.map (List.flatten sel_prefs) process_sel_pref in
1134 1134 {gf=r; role=role; role_attr=role_attribute; sel_prefs=sel_prefs;
1135 1135 cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in
1136   - let meanings = List.rev (Xlist.fold meaning_ids [] (fun l id ->
  1136 + let meanings = List.rev (Xlist.fold meaning_ids [] (fun l id ->
1137 1137 (StringMap.find meanings id) :: l)) in
1138 1138 let schema_atrs = add_meanings meanings schema_atrs in
1139 1139 (entry.form_orth,entry.form_pos,Frame(schema_atrs,positions)) :: found)
1140   -
1141   -let connect2 entry =
  1140 +
  1141 +let connect2 entry =
1142 1142 let schemata = process_schemata entry.schemata in
1143 1143 StringMap.fold schemata [] (fun found _ (schema_atrs,positions) ->
1144 1144 let positions = StringMap.fold positions [] (fun positions2 _ (r,cr,ce,phrases) ->
... ... @@ -1146,9 +1146,9 @@ let connect2 entry =
1146 1146 {gf=r; role=""; role_attr=""; sel_prefs=[];
1147 1147 cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in
1148 1148 (entry.form_orth,entry.form_pos,Frame(schema_atrs,positions)) :: found)
1149   -
1150   -let load_walenty2 () =
1151   - let walenty = load_walenty Paths.walenty_filename in
  1149 +
  1150 +let load_walenty2 () =
  1151 + let walenty = load_walenty Paths.config.Paths.walenty_filename in
1152 1152 Xlist.fold walenty StringMap.empty (fun walenty entry ->
1153 1153 if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) ->
1154 1154 let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
... ... @@ -1158,63 +1158,63 @@ let load_walenty2 () =
1158 1158 let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in
1159 1159 let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in
1160 1160 StringMap.add walenty pos map))
1161   -
1162 1161  
1163   -let print_stringqmap filename qmap =
  1162 +
  1163 +let print_stringqmap filename qmap =
1164 1164 let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in
1165 1165 File.file_out filename (fun file ->
1166 1166 Xlist.iter (Xlist.sort l compare) (fun (v,k) ->
1167 1167 Printf.fprintf file "%5d %s\n" v k))
1168   -
  1168 +
1169 1169 let sel_prefs_quantities walenty =
1170 1170 Xlist.fold walenty StringQMap.empty (fun quant e ->
1171 1171 Xlist.fold e.frames quant (fun quant f ->
1172 1172 Xlist.fold f.arguments quant (fun quant a ->
1173 1173 Xlist.fold a.sel_prefs quant (fun quant l ->
1174 1174 Xlist.fold l quant (fun quant -> function
1175   - Numeric s ->
  1175 + Numeric s ->
1176 1176 let name = try PreWordnet.synset_name s with Not_found -> "unknown" in
1177 1177 StringQMap.add quant ("N " ^ s ^ " " ^ name)
1178 1178 | Symbol s -> StringQMap.add quant ("S " ^ s)
1179 1179 | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t))))))
1180   -
1181   -(*let _ =
  1180 +
  1181 +(*let _ =
1182 1182 let walenty = load_walenty walenty_filename in
1183 1183 let quant = sel_prefs_quantities walenty in
1184 1184 print_stringqmap "results/quant_sel_prefs.txt" quant*)
1185   -
  1185 +
1186 1186 let print_entry filename lex =
1187 1187 match Xml.parse_file filename with
1188   - Xml.Element("TEI", _,
1189   - [Xml.Element("teiHeader",_,_) ;
  1188 + Xml.Element("TEI", _,
  1189 + [Xml.Element("teiHeader",_,_) ;
1190 1190 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
1191 1191 Xlist.iter entries (function
1192   - Xml.Element("entry",_,Xml.Element("form", [], [Xml.Element("orth",[],[Xml.PCData orth]);_]) :: xml :: _) ->
  1192 + Xml.Element("entry",_,Xml.Element("form", [], [Xml.Element("orth",[],[Xml.PCData orth]);_]) :: xml :: _) ->
1193 1193 if orth = lex then print_endline (Xml.to_string_fmt xml)
1194 1194 | _ -> failwith "print_entry")
1195 1195 | _ -> failwith "print_entry"
1196 1196  
1197 1197  
1198   -(*let _ =
  1198 +(*let _ =
1199 1199 print_entry walenty_filename "bębnić"*)
1200 1200  
1201 1201 let print_full_entry filename lex =
1202 1202 match Xml.parse_file filename with
1203   - Xml.Element("TEI", _,
1204   - [Xml.Element("teiHeader",_,_) ;
  1203 + Xml.Element("TEI", _,
  1204 + [Xml.Element("teiHeader",_,_) ;
1205 1205 Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
1206 1206 Xlist.iter entries (function
1207   - Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: _ :: l) ->
  1207 + Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: _ :: l) ->
1208 1208 let xml = Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: l) in
1209 1209 if orth = lex then print_endline (Xml.to_string_fmt xml)
1210 1210 | _ -> failwith "print_full_entry")
1211 1211 | _ -> failwith "print_full_entry"
1212 1212  
1213   -(*let _ =
  1213 +(*let _ =
1214 1214 print_full_entry walenty_filename "bębnić"*)
1215 1215  
1216 1216 (*let _ =
1217 1217 let walenty = load_walenty2 () in
1218 1218 let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in
1219 1219 Xlist.iter frames_sem (fun frame ->
1220   - print_endline (WalStringOf.frame "bębnić" frame))*)
1221 1220 \ No newline at end of file
  1221 + print_endline (WalStringOf.frame "bębnić" frame))*)
... ...
swigra/parser/morfeusz2-swi.so 0 → 100755
No preview for this file type
testy/testy podziału na zdania.tab renamed to testy/testy podzialu na zdania.tab
1 1 - Frunę. or
2 2 - Frunę. Kiszę. or
3 3 - Frunę - powiedział szpak. or
4   -- Frunę! - powiedział szpak. or
5   -- Frunę - powiedział szpak. - Kiszę. or
  4 +- Frunę! - powiedział szpak. or -sem
  5 +- Frunę - powiedział szpak. - Kiszę. or -amb
6 6 - Ten szpak - powiedział - frunie. or -
7 7 Szpak powiedział: or
8   -Szpak powiedział: frunę. or
9   -Szpak powiedział: „Frunę”. or
  8 +Szpak powiedział: frunę. or -amb
  9 +Szpak powiedział: „Frunę”. or -amb
10 10 Szpak powiedział: „Frunę. Kiszę.” or
11   -Szpak powiedział: - Frunę. or
  11 +Szpak powiedział: - Frunę. or -vis
12 12 Szpak powiedział: - Frunę. - i bryknął. or -
13 13 Szpak powiedział: - Frunę. - i dodał: - Brykam. or -
14 14 Szpak bada sprawę: - Frunę - powiada - i brykam. or -
15   -SZPAK: Frunę. or
16   -SZPAK: - Frunę. or
  15 +SZPAK: Frunę. or -popr
  16 +SZPAK: - Frunę. or -amb
17 17 Frunę. Kiszę. sentence multi
18   -W XX w. Szpak frunie. sentence amb multi
  18 +W XX w. Szpak frunie. sentence amb multi -amb
  19 +Ala ma kota. Ela ma psa. Kaziu ma żółwika. Blablabla... selekcja parserów
... ...