Commit 24f2f00f018f263fe6613522fb47b6fbabec6f49
Merge branch 'integration'
Showing
29 changed files
with
1460 additions
and
788 deletions
config
... | ... | @@ -5,7 +5,8 @@ RESOURCES_PATH=../resources/ |
5 | 5 | WALENTY=/usr/share/walenty/walenty_20160412.xml |
6 | 6 | |
7 | 7 | # Port number for pre server |
8 | -PRE_PORT=3258 | |
8 | +PRE_PORT=3158 | |
9 | +#PRE_PORT=3258 | |
9 | 10 | |
10 | 11 | # Host name for pre server |
11 | 12 | PRE_HOST=localhost |
... | ... | @@ -22,5 +23,26 @@ LCG_TIMEOUT=100 |
22 | 23 | # LCG parser memory size (maximum number of nodes of parsed term) |
23 | 24 | LCG_NO_NODES=10000000 |
24 | 25 | |
25 | -# Number of parser processes | |
26 | +# Number of parser processes | |
26 | 27 | NO_PROCESSES=4 |
28 | + | |
29 | +# Is Concraft enabled | |
30 | +CONCRAFT_ENABLED=false | |
31 | + | |
32 | +# Path to Concraft | |
33 | +CONCRAFT_PATH=../concraft/ | |
34 | + | |
35 | +# Is MateParser enabled | |
36 | +MATE_PARSER_ENABLED=false | |
37 | + | |
38 | +# Path to MateParser | |
39 | +MATE_PARSER_PATH=../dependencyParser/basic/mate-tools/ | |
40 | + | |
41 | +# Is Świgra enabled | |
42 | +SWIGRA_ENABLED=false | |
43 | + | |
44 | +# Path to Świgra | |
45 | +SWIGRA_PATH=../swigra/parser/ | |
46 | + | |
47 | +# Is sentence selection enabled | |
48 | +SENTENCE_SELECTION_ENABLED=true | |
... | ... |
corpora/CONLL.ml
1 | 1 | open Xstd |
2 | 2 | open PreTypes |
3 | 3 | |
4 | -let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts | |
5 | - then f (snd @@ List.find (fun (m,_) -> m = mode) alts) | |
6 | - else f (snd @@ List.find (fun (m,_) -> m = Struct) alts) | |
7 | - | |
8 | -let string_of_token mode token id super label = | |
9 | - let decompose_lemma = function | |
10 | - | Lemma(a,b,c) -> a,b,if c = [[]] | |
11 | - then "_" | |
12 | - else String.concat "][" @@ Xlist.map c (fun x -> | |
13 | - String.concat "|" @@ Xlist.map x ( fun y -> | |
14 | - String.concat "." y)) | |
15 | - | _ -> failwith ("string_of_token: not Lemma") in | |
16 | - match mode with | |
17 | - | Raw -> token.orth | |
18 | - | Struct -> failwith ("function string_of_token for mode Struct is not defined") | |
19 | - | CONLL -> let lemma,cat,interp = decompose_lemma token.token in | |
20 | - String.concat "\t" [id; | |
21 | - token.orth; lemma; cat; cat; interp; "_"; "_"; | |
22 | - string_of_int token.beg; string_of_int token.len] | |
23 | - | Mate -> let lemma,cat,interp = decompose_lemma token.token in | |
24 | - String.concat "\t" [id; | |
25 | - token.orth; lemma; lemma; cat; cat; interp; interp; "_"; "_"; "_"; "_"; "_"; "_"] | |
26 | - | _ -> failwith "string_of_token: ni" | |
27 | - | |
28 | -let string_of_paths mode tokens paths = | |
29 | - Array.fold_left (fun acc (id,super,label) -> | |
30 | - acc ^ "\n" ^ (string_of_token mode (ExtArray.get tokens id) (string_of_int id) super label)) "" paths | |
31 | - | |
32 | -let rec string_of_sentence mode tokens = function | |
33 | - RawSentence s -> if mode = Raw then s else "" | |
34 | - | StructSentence (_,tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*) | |
35 | - | DepSentence (_, paths) -> string_of_paths mode tokens paths | |
36 | - | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences") | |
37 | - | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts | |
38 | - | |
39 | -let string_of_p_record mode tokens p_record = | |
40 | - (if p_record.pid = "" then "" else p_record.pid ^ "\n") ^ | |
41 | - string_of_sentence mode tokens p_record.psentence | |
42 | - | |
43 | -(*let rec string_of_paragraph mode tokens = function | |
44 | - RawParagraph s -> if mode = Raw then s else "" | |
45 | - | StructParagraph p_records -> String.concat "\n\n" @@ Xlist.map p_records (string_of_p_record mode tokens) | |
46 | - | AltParagraph alts -> alternative_string (string_of_paragraph mode) mode alts | |
47 | - | |
48 | -let rec string_of_text mode tokens = function | |
49 | - RawText s -> if mode = Raw then s else "" | |
50 | - | StructText (paragraphs,_) -> String.concat "\n\n" @@ Xlist.map paragraphs (string_of_paragraph mode tokens) | |
51 | - | AltText alts -> alternative_string (string_of_text mode) mode alts*) | |
52 | - | |
53 | - | |
54 | -(******************) | |
55 | - | |
56 | -let establish_next tokens paths = | |
57 | - let n = ExtArray.size tokens in | |
58 | - Int.iter 1 (n - 2) (fun i -> | |
59 | - let f = ExtArray.get tokens i in | |
60 | - let s = ExtArray.get tokens (i+1) in | |
61 | - ExtArray.set tokens i {f with next = s.beg}); | |
62 | - let last = ExtArray.get tokens (n-1) in | |
63 | - ExtArray.set tokens (n-1) {last with next = last.beg + last.len} | |
64 | - | |
65 | - | |
66 | - (*let rec pom res = function | |
67 | - h :: t -> let next = if res = [] | |
68 | - then h.beg+h.len | |
69 | - else (List.hd res).beg in | |
70 | - pom ({h with next = next} :: res) t | |
71 | - | [] -> res in | |
72 | - pom [] rev_tokens*) | |
73 | - | |
74 | -let rec establish_for_token i text tokens = function | |
75 | - (id,_,_) :: t as l-> | |
76 | - let h = ExtArray.get tokens id in | |
77 | - if Xstring.check_prefix " " text | |
78 | - then establish_for_token (i+100) (Xstring.cut_prefix " " text) tokens l | |
79 | - else if Xstring.check_prefix h.orth text | |
80 | - then | |
81 | - let n = (List.length @@ Xunicode.utf8_chars_of_utf8_string h.orth) * 100 in | |
82 | - let n_h = {h with beg = i ; len = n} in | |
83 | - ExtArray.set tokens id n_h; | |
84 | - establish_for_token (i+n) (Xstring.cut_prefix h.orth text) tokens t | |
85 | - else failwith ("establish_for_token :" ^ h.orth ^ " " ^ text) | |
86 | - | [] -> 100, i | |
87 | - | |
88 | -let rec establish_lengths text paths tokens = | |
89 | - let pbeg, plen = establish_for_token 100 text tokens (List.tl (Array.to_list paths)) in | |
90 | - establish_next tokens paths; | |
91 | - pbeg, plen-100 | |
92 | - | |
93 | -(******************) | |
94 | - | |
95 | -exception ErrorInfoFile of string | |
96 | - | |
97 | -let info_file = "../corpora/info_sentences.txt" | |
98 | - | |
99 | -let info = Xstring.split "\n\n" @@ File.load_file_gen info_file | |
100 | - | |
101 | -let add_to_map map info_str = | |
102 | - match Xstring.split "\n" info_str with | |
103 | - [id; text; info_token] -> StringMap.add map info_token (id, text) | |
104 | - | _ -> raise (ErrorInfoFile info_str) | |
105 | - | |
106 | -let info_map = | |
107 | - Xlist.fold info StringMap.empty add_to_map | |
108 | - | |
109 | -let match_sentence (p_record,tokens) = | |
110 | - let rec info_token s = match s with | |
111 | - RawSentence text -> failwith ("match_sentence: " ^ text) | |
112 | - | StructSentence (_, tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*) | |
113 | - | DepSentence (_, paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths | |
114 | - | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences") | |
115 | - | AltSentence alts -> failwith ("match_sentence: AltSentence") | |
116 | - (*if List.exists (fun (mode, s) -> mode = CONLL) alts | |
117 | - then info_token (snd (List.find (fun (mode, s) -> mode = CONLL) alts)) | |
118 | - else failwith ("match_sentence: no CONLL mode in AltSentence")*) in | |
119 | - let info_token, paths = info_token p_record.psentence in | |
120 | - try | |
121 | - let id, text = StringMap.find info_map info_token in | |
122 | - let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in | |
123 | - AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; | |
124 | - psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence("", paths)]}]],tokens)] | |
125 | -(* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *) | |
126 | - with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)] | |
127 | - | |
128 | -let match_corpus corpus = | |
4 | +let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts | |
5 | + then f (snd @@ List.find (fun (m,_) -> m = mode) alts) | |
6 | + else f (snd @@ List.find (fun (m,_) -> m = PreTypes.Struct) alts) | |
7 | + | |
8 | +let string_of_token mode token conll_id super label = | |
9 | + let decompose_lemma = function | |
10 | + | PreTypes.Lemma(a,b,c) -> a,b,if c = [[]] | |
11 | + then "_" | |
12 | + else String.concat "][" @@ Xlist.map c (fun x -> | |
13 | + String.concat "|" @@ Xlist.map x ( fun y -> | |
14 | + String.concat "." y)) | |
15 | + | t -> failwith ("string_of_token: not Lemma") in | |
16 | + match mode with | |
17 | + | PreTypes.Raw -> token.PreTypes.orth | |
18 | + | PreTypes.Struct -> failwith ("function string_of_token for mode Struct is not defined") | |
19 | + | PreTypes.CONLL -> let lemma,cat,interp = decompose_lemma token.PreTypes.token in | |
20 | + String.concat "\t" [string_of_int conll_id; | |
21 | + token.PreTypes.orth; lemma; cat; cat; interp; "_"; "_"; | |
22 | + string_of_int token.PreTypes.beg; string_of_int token.PreTypes.len] | |
23 | + | PreTypes.Mate -> let lemma,cat,interp = decompose_lemma token.PreTypes.token in | |
24 | + String.concat "\t" [string_of_int conll_id; | |
25 | + token.PreTypes.orth; lemma; lemma; cat; cat; interp; interp; "_"; "_"; "_"; "_"; "_"; "_"] | |
26 | + | _ -> failwith "string_of_token: ni" | |
27 | + | |
28 | +let string_of_paths mode tokens paths = | |
29 | + let l = Int.fold 1 (Array.length paths - 1) [] (fun l conll_id -> | |
30 | + let id,super,label = paths.(conll_id) in | |
31 | + (string_of_token mode (ExtArray.get tokens id) conll_id super label) :: l) in | |
32 | + String.concat "\n" (List.rev l) ^ "\n\n" | |
33 | + | |
34 | +let rec string_of_sentence mode tokens = function | |
35 | + RawSentence s -> if mode = Raw then s else "" | |
36 | + | StructSentence (tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*) | |
37 | + | DepSentence (paths) -> string_of_paths mode tokens paths | |
38 | + | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences") | |
39 | + | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts | |
40 | + | |
41 | +let string_of_p_record mode tokens p_record = | |
42 | + (if p_record.pid = "" then "" else p_record.pid ^ "\n") ^ | |
43 | + string_of_sentence mode tokens p_record.psentence | |
44 | + | |
45 | +(*let rec string_of_paragraph mode tokens = function | |
46 | + RawParagraph s -> if mode = Raw then s else "" | |
47 | + | StructParagraph p_records -> String.concat "\n\n" @@ Xlist.map p_records (string_of_p_record mode tokens) | |
48 | + | AltParagraph alts -> alternative_string (string_of_paragraph mode) mode alts | |
49 | + | |
50 | +let rec string_of_text mode tokens = function | |
51 | + RawText s -> if mode = Raw then s else "" | |
52 | + | StructText (paragraphs,_) -> String.concat "\n\n" @@ Xlist.map paragraphs (string_of_paragraph mode tokens) | |
53 | + | AltText alts -> alternative_string (string_of_text mode) mode alts*) | |
54 | + | |
55 | + | |
56 | +(******************) | |
57 | + | |
58 | +let establish_next tokens paths = | |
59 | + let n = ExtArray.size tokens in | |
60 | + Int.iter 1 (n - 2) (fun i -> | |
61 | + let f = ExtArray.get tokens i in | |
62 | + let s = ExtArray.get tokens (i+1) in | |
63 | + ExtArray.set tokens i {f with next = s.beg}); | |
64 | + let last = ExtArray.get tokens (n-1) in | |
65 | + ExtArray.set tokens (n-1) {last with next = last.beg + last.len} | |
66 | + | |
67 | + | |
68 | + (*let rec pom res = function | |
69 | + h :: t -> let next = if res = [] | |
70 | + then h.beg+h.len | |
71 | + else (List.hd res).beg in | |
72 | + pom ({h with next = next} :: res) t | |
73 | + | [] -> res in | |
74 | + pom [] rev_tokens*) | |
75 | + | |
76 | +let rec establish_for_token i text tokens = function | |
77 | + (id,_,_) :: t as l-> | |
78 | + let h = ExtArray.get tokens id in | |
79 | + if Xstring.check_prefix " " text | |
80 | + then establish_for_token (i+100) (Xstring.cut_prefix " " text) tokens l | |
81 | + else if Xstring.check_prefix h.orth text | |
82 | + then | |
83 | + let n = (List.length @@ Xunicode.utf8_chars_of_utf8_string h.orth) * 100 in | |
84 | + let n_h = {h with beg = i ; len = n} in | |
85 | + ExtArray.set tokens id n_h; | |
86 | + establish_for_token (i+n) (Xstring.cut_prefix h.orth text) tokens t | |
87 | + else failwith ("establish_for_token :" ^ h.orth ^ " " ^ text) | |
88 | + | [] -> 100, i | |
89 | + | |
90 | +let rec establish_lengths text paths tokens = | |
91 | + let pbeg, plen = establish_for_token 100 text tokens (List.tl (Array.to_list paths)) in | |
92 | + establish_next tokens paths; | |
93 | + pbeg, plen-100 | |
94 | + | |
95 | +(******************) | |
96 | + | |
97 | +exception ErrorInfoFile of string | |
98 | + | |
99 | +let info_file = "../corpora/info_sentences.txt" | |
100 | + | |
101 | +let info = Xstring.split "\n\n" @@ File.load_file_gen info_file | |
102 | + | |
103 | +let add_to_map map info_str = | |
104 | + match Xstring.split "\n" info_str with | |
105 | + [id; text; info_token] -> StringMap.add map info_token (id, text) | |
106 | + | _ -> raise (ErrorInfoFile info_str) | |
107 | + | |
108 | +let info_map = | |
109 | + Xlist.fold info StringMap.empty add_to_map | |
110 | + | |
111 | +let match_sentence (p_record,tokens) = | |
112 | + let rec info_token s = match s with | |
113 | + RawSentence text -> failwith ("match_sentence: " ^ text) | |
114 | + | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*) | |
115 | + | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths | |
116 | + | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences") | |
117 | + | AltSentence alts -> failwith ("match_sentence: AltSentence") | |
118 | + (*if List.exists (fun (mode, s) -> mode = CONLL) alts | |
119 | + then info_token (snd (List.find (fun (mode, s) -> mode = CONLL) alts)) | |
120 | + else failwith ("match_sentence: no CONLL mode in AltSentence")*) in | |
121 | + let info_token, paths = info_token p_record.psentence in | |
122 | + try | |
123 | + let id, text = StringMap.find info_map info_token in | |
124 | + let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in | |
125 | + AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix=""; | |
126 | + psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)] | |
127 | +(* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *) | |
128 | + with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)] | |
129 | + | |
130 | +let match_corpus corpus = | |
129 | 131 | Xlist.map corpus match_sentence |
130 | 132 | |
131 | 133 | (******************) |
... | ... | @@ -186,8 +188,8 @@ let info_map = |
186 | 188 | let match_sentence (p_record,tokens) = |
187 | 189 | let rec info_token s = match s with |
188 | 190 | RawSentence text -> failwith ("match_sentence: " ^ text) |
189 | - | StructSentence (_, tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*) | |
190 | - | DepSentence (_, paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths | |
191 | + | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*) | |
192 | + | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths | |
191 | 193 | | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences") |
192 | 194 | | AltSentence alts -> failwith ("match_sentence: AltSentence") |
193 | 195 | (*if List.exists (fun (mode, s) -> mode = CONLL) alts |
... | ... | @@ -197,8 +199,8 @@ let match_sentence (p_record,tokens) = |
197 | 199 | try |
198 | 200 | let id, text = StringMap.find info_map info_token in |
199 | 201 | let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in |
200 | - AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; | |
201 | - psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence("", paths)]}]],tokens)] | |
202 | + AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix=""; | |
203 | + psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)] | |
202 | 204 | (* {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *) |
203 | 205 | with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)] |
204 | 206 | |
... | ... | @@ -224,6 +226,7 @@ let load_token in_channel = |
224 | 226 | else [Xlist.map (Xstring.split_delim "|" interp) (fun tag -> [tag])] in |
225 | 227 | {empty_token with orth = orth; token = Lemma(lemma,cat,interp);}, int_of_string id, int_of_super super, label in |
226 | 228 | let line = input_line in_channel in |
229 | + (* print_endline ("load_token: " ^ line); *) | |
227 | 230 | if line = "" |
228 | 231 | then raise Empty_line |
229 | 232 | else if line.[0] = '#' |
... | ... | @@ -272,7 +275,7 @@ let load_sentence in_channel = |
272 | 275 | then raise End_of_file |
273 | 276 | else rev_paths, id in |
274 | 277 | let rev_paths, id = pom [] "" in |
275 | - {pid = id; pbeg = -1; plen = -1; pnext = -1; psentence = DepSentence("",Array.of_list ((0,-1,"") :: List.rev rev_paths))}, tokens | |
278 | + {pid = id; pbeg = -1; plen = -1; pnext = -1; pfile_prefix = ""; psentence = DepSentence(Array.of_list ((0,-1,"") :: List.rev rev_paths))}, tokens | |
276 | 279 | (* {s_id = id; s_text = ""; s_paths = (List.rev rev_paths)} *) |
277 | 280 | |
278 | 281 | let load_corpus in_channel = |
... | ... |
parser/LCGchart.ml
... | ... | @@ -197,7 +197,7 @@ let is_dep_parsed = function |
197 | 197 | let get_parsed_term tokens chart = |
198 | 198 | let n = last_node chart in |
199 | 199 | let l = Xlist.fold (find chart 0 n) [] (fun l -> function |
200 | - LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l | |
200 | + LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut(LCGtypes.Tuple[sem])) :: l | |
201 | 201 | (* | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<ors-sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l *) |
202 | 202 | | _ -> l) in |
203 | 203 | let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in |
... | ... |
parser/LCGlatexOf.ml
... | ... | @@ -224,7 +224,7 @@ let parsed_dep_chart l = |
224 | 224 | "\\end{longtable}" |
225 | 225 | |
226 | 226 | let not_parsed_dep_chart (id,left,l,right) = |
227 | - Printf.sprintf "conll_id=%d\\\\" id ^ | |
227 | + Printf.sprintf "conll\\_id=%d\\\\" id ^ | |
228 | 228 | "\\begin{longtable}{|l|p{20cm}|}\n\\hline\n" ^ |
229 | 229 | String.concat "\\hline\n" (Xlist.map left table_entries_of_symbol_term_list) ^ |
230 | 230 | "\\hline\n\\hline\n\\hline\n" ^ |
... | ... | @@ -234,7 +234,7 @@ let not_parsed_dep_chart (id,left,l,right) = |
234 | 234 | "\\end{longtable}" |
235 | 235 | |
236 | 236 | let rec dep_chart_rec (DepNode(id,left,l,right)) = |
237 | - printf "dep_chart_rec: %d\n" id; | |
237 | + (* printf "dep_chart_rec: %d\n" id; *) | |
238 | 238 | String.concat "" (Xlist.map left dep_chart_rec) ^ |
239 | 239 | String.concat "" (Xlist.rev_map l (fun (symbol,sem) -> |
240 | 240 | let symbol = grammar_symbol 0 symbol in |
... | ... |
parser/LCGlexicon.ml
... | ... | @@ -216,6 +216,7 @@ let make_node lemma cat weight fnum l = |
216 | 216 | |
217 | 217 | (* FIXME: "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej *) |
218 | 218 | let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja siebie i się *) |
219 | + if d.simple_valence = [] then print_endline "empty simple_valence"; | |
219 | 220 | let numbers = expand_numbers numbers in |
220 | 221 | let cases = expand_cases cases in |
221 | 222 | let genders = expand_genders genders in (* FIXME: trzeba dodać konstrukcję w marcu 2000 i leksykalizowane nazwy własne przy miesiącach *) |
... | ... | @@ -1207,9 +1208,9 @@ let process_bracket_lemma (d:PreTypes.token_record) = function |
1207 | 1208 | LCGrenderer.make_frame x_flag tokens [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]];[schema_field OBJ "Theme" Forward [Phrase (Lex "</query>")]]] (["<colon>"]) {d with orth=""} batrs] |
1208 | 1209 | (*| "<or>" -> [LCGrenderer.make_frame x_flag tokens [] [[nosem_schema_field Forward [Phrase (Lex "</or>")]]] (["or"]) {d with orth=""} (make_node "<or>" "interp" d.weight 0 [])]*) |
1209 | 1210 | | "<or>" -> [] |
1210 | - | "<or-sentence>" -> | |
1211 | + | "<or-sentence>" -> (* FIXME: dodać mówcę jako pro *) | |
1211 | 1212 | let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in |
1212 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} batrs; | |
1213 | + [LCGrenderer.make_frame x_flag tokens [] [[schema_field ARG ""(*"Theme"*) Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} batrs; | |
1213 | 1214 | LCGrenderer.or_frame (make_node "<sentence>" "interp" d.weight 0 [])] |
1214 | 1215 | | "</or-sentence>" -> |
1215 | 1216 | let t = ["or2"] in |
... | ... | @@ -1257,6 +1258,7 @@ let get_labels () = { |
1257 | 1258 | let symbol,sem = LCGrenderer.make_frame_simple [] t d ( batrs) in |
1258 | 1259 | [Bracket(true,true,symbol),sem] |
1259 | 1260 | | {token = Lemma(lemma,pos,interp)} -> |
1261 | + (* print_endline (lemma ^ " " ^ pos); *) | |
1260 | 1262 | Xlist.fold interp [] (fun l tags -> |
1261 | 1263 | let d = {d with e=get_labels (); valence=LCGrenderer.make_controll d.valence} in |
1262 | 1264 | let entries = process_interp d (lemma,pos,tags) in |
... | ... | @@ -1294,7 +1296,7 @@ let rec dep_create_rec nodes sons conll_id = |
1294 | 1296 | let node = IntMap.find nodes conll_id in |
1295 | 1297 | let l = try IntMap.find sons conll_id with Not_found -> [] in |
1296 | 1298 | let left,right = split_sons [] conll_id [] l in |
1297 | - (* Printf.printf "dep_create_rec [%s] %s [%s]\n" (String.concat ";" left) id (String.concat ";" right); *) | |
1299 | + (* Printf.printf "dep_create_rec [%s] %d [%s]\n" (String.concat ";" (Xlist.map left string_of_int)) conll_id (String.concat ";" (Xlist.map right string_of_int)); *) | |
1298 | 1300 | DepNode(conll_id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons)) |
1299 | 1301 | |
1300 | 1302 | let dep_create paths tokens = |
... | ... |
parser/LCGreductions.ml
... | ... | @@ -224,7 +224,7 @@ let linear_term_beta_reduction4 references = |
224 | 224 | | Variant(e2,l) -> Variant(e2,Xlist.map l (fun (i,t) -> i,linear_term_beta_reduction subst (SetAttr(e,s,t)))) |
225 | 225 | | t -> SetAttr(e,s,t)) |
226 | 226 | | Node t -> |
227 | - if !size > Paths.lcg_no_nodes then raise SemTooBig; | |
227 | + if !size > Paths.config.Paths.lcg_no_nodes then raise SemTooBig; | |
228 | 228 | incr size; |
229 | 229 | Node{t with attrs=Xlist.map t.attrs (fun (e,t) -> e, linear_term_beta_reduction subst t); |
230 | 230 | gs=linear_term_beta_reduction subst t.gs; |
... | ... |
parser/LCGrenderer.ml
... | ... | @@ -440,7 +440,7 @@ let or_frame node = |
440 | 440 | Imp(Tensor[Atom "ip"; Top; Top; Top],Forward,Tensor[Atom "or"])),Forward, |
441 | 441 | Tensor[Atom "or2"]), |
442 | 442 | Lambda("x",Lambda("y",Lambda("z",Node{node with gs=make_gs [] ["<root>"]; args=Tuple[ |
443 | - Cut(App(Var "y",Var "x"))]}))) | |
443 | + Cut(SetAttr("AROLE",Val "Clause",SetAttr("GF",Gf CLAUSE,App(Var "y",Var "x"))))]}))) | |
444 | 444 | |
445 | 445 | |
446 | 446 | let label_counter = ref 0 |
... | ... |
parser/LCGvalence.ml
... | ... | @@ -22,22 +22,22 @@ open LCGtypes |
22 | 22 | open Printf |
23 | 23 | open Xstd |
24 | 24 | |
25 | -let rec list_assoc2 x = function | |
25 | +let rec list_assoc2 x = function | |
26 | 26 | (s,a,b) :: l -> if x = s then a,b else list_assoc2 x l |
27 | - | [] -> raise Not_found | |
28 | - | |
29 | -let meaning_weight = -1. | |
30 | - | |
27 | + | [] -> raise Not_found | |
28 | + | |
29 | +let meaning_weight = -1. | |
30 | + | |
31 | 31 | let prepare_senses lemma meanings senses = |
32 | 32 | match meanings,senses with |
33 | 33 | [],[] -> [lemma, ["ALL"],0.] (* FIXME *) |
34 | - | [],_ -> | |
35 | - Xlist.map senses (fun (sense,hipero,weight) -> | |
36 | - if hipero = ["0"] then sense,["0"],weight else | |
34 | + | [],_ -> | |
35 | + Xlist.map senses (fun (sense,hipero,weight) -> | |
36 | + if hipero = ["0"] then sense,["0"],weight else | |
37 | 37 | sense,(if hipero = [] then ["ALL"] else hipero),weight) |
38 | 38 | | _,[] -> Xlist.map meanings (fun meaning -> meaning, ["ALL"],meaning_weight) |
39 | - | _,_ -> | |
40 | - Xlist.map meanings (fun meaning -> | |
39 | + | _,_ -> | |
40 | + Xlist.map meanings (fun meaning -> | |
41 | 41 | let hipero,weight = try list_assoc2 meaning senses with Not_found -> [],meaning_weight in |
42 | 42 | if hipero = ["0"] then meaning,["0"],weight else meaning,(if hipero = [] then ["ALL"] else hipero),weight) |
43 | 43 | |
... | ... | @@ -50,89 +50,91 @@ let extract_meaning lemma = function |
50 | 50 | | GerAtrs(m,le,neg,a) -> m,le,GerAtrs([],le,neg,a) |
51 | 51 | | NonPersAtrs(m,le,role,role_attr,neg,a) -> m,le,NonPersAtrs([],le,role,role_attr,neg,a) |
52 | 52 | | _ -> failwith "extract_meaning" |
53 | - | |
53 | + | |
54 | 54 | let extract_roles = function |
55 | 55 | NonPersAtrs(m,le,role,role_attr,neg,a) -> role,role_attr |
56 | 56 | | _ -> failwith "extract_roles" |
57 | - | |
57 | + | |
58 | 58 | let get_lemma = function |
59 | 59 | PreTypes.Lemma(lemma,cat,_) -> lemma,cat |
60 | 60 | | PreTypes.Interp lemma -> lemma,"interp" |
61 | 61 | | _ -> "","" |
62 | - | |
63 | -let prepare_valence paths_array = | |
64 | - let valence = Array.map (fun d -> | |
62 | + | |
63 | +let prepare_valence tokens = | |
64 | + let valence = Array.make (ExtArray.size tokens) [] in | |
65 | + Int.iter 1 (ExtArray.size tokens - 1) (fun id -> | |
66 | + let d = ExtArray.get tokens id in | |
65 | 67 | let lemma,cat = get_lemma d.PreTypes.token in |
66 | - let lemma = if lemma = "<ors>" || lemma = ":s" || lemma = "„s" then "pro-komunikować" else lemma in | |
67 | - if lemma = "" then [] else | |
68 | - let prep_valence = | |
69 | - if cat = "prep" then | |
68 | + let lemma = if lemma = "<or-sentence>" (*|| lemma = ":s" || lemma = "„s"*) then "pro-komunikować" else lemma in | |
69 | + if lemma = "" then () else | |
70 | + let prep_valence = | |
71 | + if cat = "prep" then | |
70 | 72 | (* (0,lemma,StringSet.empty,0.,"NOSEM","",Frame(EmptyAtrs[],[])) :: *) |
71 | 73 | match d.PreTypes.semantics with |
72 | 74 | PreTypes.Normal -> [] |
73 | - | PreTypes.PrepSemantics l -> | |
74 | - Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) -> | |
75 | + | PreTypes.PrepSemantics l -> | |
76 | + Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) -> | |
75 | 77 | 0,lemma,hipero,0.,lrole,lrole_attr,Frame(EmptyAtrs[],[])) |
76 | 78 | | _ -> failwith "prepare_valence" |
77 | 79 | else [] in |
78 | - let valence = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in | |
80 | + let valence2 = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in | |
79 | 81 | let lrole,lrole_attr = d.PreTypes.lroles in |
80 | - prep_valence @ List.flatten (Xlist.map valence (function | |
82 | + valence.(id) <- prep_valence @ List.flatten (Xlist.map valence2 (function | |
81 | 83 | fnum,Frame(attrs,schema) -> |
82 | 84 | let meanings,lemma,attrs = extract_meaning lemma attrs in |
83 | - let lrole,lrole_attr = | |
84 | - if cat = "pact" || cat = "ppas" then extract_roles attrs else | |
85 | - if cat = "pcon" then "Con","" else | |
85 | + let lrole,lrole_attr = | |
86 | + if cat = "pact" || cat = "ppas" then extract_roles attrs else | |
87 | + if cat = "pcon" then "Con","" else | |
86 | 88 | if cat = "pant" then "Ant","" else |
87 | 89 | d.PreTypes.lroles in |
88 | 90 | Xlist.map (prepare_senses lemma meanings d.PreTypes.senses) (fun (meaning,hipero,weight) -> |
89 | 91 | let hipero = if cat = "conj" then ["0"] else hipero in |
90 | 92 | fnum,meaning,StringSet.of_list hipero,weight,lrole,lrole_attr, |
91 | - Frame(attrs,Xlist.map schema (fun s -> | |
93 | + Frame(attrs,Xlist.map schema (fun s -> | |
92 | 94 | (* let s = if s.sel_prefs=[] then (print_endline ("prepare_valence empty sel_prefs: " ^ lemma ^ " " ^ cat); {s with sel_prefs=["ALL"]}) else s in *) |
93 | 95 | if s.role="" && s.gf <> ADJUNCT && s.gf <> NOSEM then ( |
94 | 96 | printf "%d: %s\n%!" fnum (WalStringOf.frame lemma (Frame(attrs,schema))); |
95 | 97 | failwith ("prepare_valence empty role: " ^ lemma ^ " " ^ cat)) else |
96 | 98 | {s with morfs=List.sort compare s.morfs}))) |
97 | 99 | | fnum,(LexFrame _ as frame) -> [fnum,"lex",StringSet.empty,0.,lrole,lrole_attr,frame] |
98 | - | fnum,(ComprepFrame _ as frame) -> [fnum,"comprep",StringSet.empty,0.,lrole,lrole_attr,frame]))) paths_array in | |
100 | + | fnum,(ComprepFrame _ as frame) -> [fnum,"comprep",StringSet.empty,0.,lrole,lrole_attr,frame]))); | |
99 | 101 | valence |
100 | - | |
102 | + | |
101 | 103 | (*let create_pro_frames t = |
102 | 104 | [0,t.pred,StringSet.singleton "0",0.,"","",Frame(EmptyAtrs[],[])]*) |
103 | - | |
104 | -let get_fnum t = | |
105 | + | |
106 | +let get_fnum t = | |
105 | 107 | let x = try Xlist.assoc t.attrs "FNUM" with Not_found -> Val "0" in |
106 | 108 | (match x with |
107 | 109 | Val s -> (try int_of_string s with _ -> failwith "get_fnum 1") |
108 | 110 | | _ -> failwith "get_fnum 2") |
109 | - | |
110 | -let select_frames l t = | |
111 | + | |
112 | +let select_frames l t = | |
111 | 113 | (* printf "a1 pred=%s\n" t.pred; *) |
112 | 114 | let fnum = get_fnum t in |
113 | 115 | let l = Xlist.fold l [] (fun l (n,meaning,hipero,weight,lrole,lrole_attr,frame) -> |
114 | 116 | if n = fnum then (meaning,hipero,weight,lrole,lrole_attr,frame) :: l else l) in |
115 | 117 | (* printf "a2 pred=%s\n" t.pred; *) |
116 | - match l with | |
118 | + match l with | |
117 | 119 | [] -> "",[] |
118 | 120 | | [_,_,_,_,_,LexFrame _] -> "",[] |
119 | 121 | | [_,_,_,_,_,ComprepFrame _] -> "",[] |
120 | 122 | | _ -> LCGreductions.get_variant_label (), |
121 | 123 | fst (Xlist.fold l ([],1) (fun (l,i) t -> (string_of_int i, t) :: l, i+1)) |
122 | - | |
124 | + | |
123 | 125 | let rec get_arg_refs found = function |
124 | 126 | Variant(_,l) -> Xlist.fold l found (fun found (_,t) -> get_arg_refs found t) |
125 | 127 | | Ref i -> i :: found |
126 | 128 | | t -> failwith ("get_arg_refs: " ^ LCGstringOf.linear_term 0 t) |
127 | - | |
129 | + | |
128 | 130 | let rec match_position = function |
129 | - a :: la, b :: lb -> | |
131 | + a :: la, b :: lb -> | |
130 | 132 | if a = b then match_position (la,b :: lb) else |
131 | 133 | if a > b then match_position (a :: la,lb) |
132 | 134 | else false |
133 | 135 | | [],_ -> true |
134 | 136 | | _,[] -> false |
135 | - | |
137 | + | |
136 | 138 | let mark_sem_morfs morfs = |
137 | 139 | Xlist.map morfs (function |
138 | 140 | | Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(Sem,prep,c)) |
... | ... | @@ -143,7 +145,7 @@ let mark_sem_morfs morfs = |
143 | 145 | | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(Sem,prep)) |
144 | 146 | | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(Sem,prep,c,ct,co)) |
145 | 147 | | t -> t) |
146 | - | |
148 | + | |
147 | 149 | let mark_nosem_morf = function |
148 | 150 | Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c)) |
149 | 151 | | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c)) |
... | ... | @@ -153,7 +155,7 @@ let mark_nosem_morf = function |
153 | 155 | | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep)) |
154 | 156 | | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co)) |
155 | 157 | | t -> failwith "mark_nosem_morf" |
156 | - | |
158 | + | |
157 | 159 | let rec is_nosem_morf = function |
158 | 160 | Phrase(PrepNP(NoSem,prep,c)) -> true |
159 | 161 | | Phrase(PrepAdjP(NoSem,prep,c)) -> true |
... | ... | @@ -163,7 +165,7 @@ let rec is_nosem_morf = function |
163 | 165 | | Phrase(ComparPP(NoSem,prep)) -> true |
164 | 166 | | Phrase(PrepNCP(NoSem,prep,c,ct,co)) -> true |
165 | 167 | | _ -> false |
166 | - | |
168 | + | |
167 | 169 | let rec is_sem_morf = function |
168 | 170 | Phrase(PrepNP(Sem,prep,c)) -> true |
169 | 171 | | Phrase(PrepAdjP(Sem,prep,c)) -> true |
... | ... | @@ -173,12 +175,12 @@ let rec is_sem_morf = function |
173 | 175 | | Phrase(ComparPP(Sem,prep)) -> true |
174 | 176 | | Phrase(PrepNCP(Sem,prep,c,ct,co)) -> true |
175 | 177 | | _ -> false |
176 | - | |
178 | + | |
177 | 179 | let rec exclude_sem_morfs = function |
178 | 180 | [] -> [] |
179 | 181 | | morf :: morfs -> if is_sem_morf morf then exclude_sem_morfs morfs else morf :: exclude_sem_morfs morfs |
180 | - | |
181 | -(* UWAGA: dopasowywane ramy są preselekcjonowane, więc wszystkie argumenty muszą się maczować *) | |
182 | + | |
183 | +(* UWAGA: dopasowywane ramy są preselekcjonowane, więc wszystkie argumenty muszą się maczować *) | |
182 | 184 | let match_args_pos modifications nodes e i schema t = |
183 | 185 | (* printf "match_args_pos\n"; *) |
184 | 186 | (* if schema = [] then schema else *) |
... | ... | @@ -189,9 +191,9 @@ let match_args_pos modifications nodes e i schema t = |
189 | 191 | (* if gf = NOSEM || gf = NOGF then schema else |
190 | 192 | if gf = CORE then schema else (* FIXME: semantyka dla core *)*) |
191 | 193 | let morfs = exclude_sem_morfs morfs in |
192 | - let schema,selected = | |
194 | + let schema,selected = | |
193 | 195 | if morfs = [] then schema,[] else |
194 | - let morfs = List.sort compare morfs in | |
196 | + let morfs = List.sort compare morfs in | |
195 | 197 | (* printf "gf=%s morfs=%s\n%!" (WalStringOf.gf gf) (String.concat ";" (Xlist.map morfs WalStringOf.morf)); *) |
196 | 198 | Xlist.fold schema ([],[]) (fun (schema,selected) pos -> |
197 | 199 | (* printf "pos.gf=%s pos.morfs=%s\n%!" (WalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs WalStringOf.morf)); *) |
... | ... | @@ -201,9 +203,9 @@ let match_args_pos modifications nodes e i schema t = |
201 | 203 | (match selected with |
202 | 204 | [] -> (*if gf = ARG then failwith "match_args_pos 3" else*) |
203 | 205 | Xlist.iter refs (fun r -> |
204 | - modifications.(r) <- StringMap.add modifications.(r) (e ^ i) | |
206 | + modifications.(r) <- StringMap.add modifications.(r) (e ^ i) | |
205 | 207 | LCGrenderer.empty_schema_field(*{gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}*) ) |
206 | -(* FIXME: przy kilku pasujących pozycjach wybieram pierwszą a nich, do poprawienia przy okazji porządków z walencją | |
208 | +(* FIXME: przy kilku pasujących pozycjach wybieram pierwszą a nich, do poprawienia przy okazji porządków z walencją | |
207 | 209 | np walencja leksemu "godzina": |
208 | 210 | 1: : : common: time: Poss,T|{null;np(gen);nump(gen)}+Arg,T|{null;np(gen);nump(gen)}; |
209 | 211 | 1: : : common: time: Temp,T|{null;np(gen);nump(gen)}]; *) |
... | ... | @@ -211,19 +213,19 @@ np walencja leksemu "godzina": |
211 | 213 | modifications.(r) <- StringMap.add modifications.(r) (e ^ i) pos) |
212 | 214 | (*| _ -> failwith "match_args_pos 4"*)); |
213 | 215 | schema |
214 | - | |
216 | + | |
215 | 217 | let rec match_args_tuple modifications nodes e i schema = function |
216 | - Tuple l -> | |
218 | + Tuple l -> | |
217 | 219 | Xlist.fold l schema (fun schema t -> |
218 | 220 | match_args_tuple modifications nodes e i schema t) |
219 | 221 | | t -> match_args_pos modifications nodes e i schema t |
220 | - | |
221 | -let match_args modifications nodes e i t = function | |
222 | + | |
223 | +let match_args modifications nodes e i t = function | |
222 | 224 | Frame(_,schema) -> ignore (match_args_tuple modifications nodes e i schema t.args) |
223 | 225 | | LexFrame _ -> failwith "match_args" |
224 | 226 | | ComprepFrame _ -> failwith "match_args" |
225 | - | |
226 | -let rec assign_frames_and_senses_rec modifications valence nodes t = | |
227 | + | |
228 | +let rec assign_frames_and_senses_rec modifications valence nodes t = | |
227 | 229 | (* printf "pred=%s id=%d\n" t.pred t.id; *) |
228 | 230 | if t.id = 0 then failwith ("assign_frames_and_senses_rec: t.id=0 pred=" ^ t.pred) else |
229 | 231 | let e,node_valence = select_frames ((*if t.id >= Array.length valence then create_pro_frames t else*) valence.(t.id)) t in |
... | ... | @@ -235,23 +237,23 @@ let rec assign_frames_and_senses_rec modifications valence nodes t = |
235 | 237 | (* printf "meaning=%s\n" meaning; *) |
236 | 238 | i,Node{t with meaning=meaning; |
237 | 239 | hipero=hipero; |
238 | - meaning_weight=weight}) in | |
240 | + meaning_weight=weight}) in | |
239 | 241 | Variant(e,l) |
240 | 242 | |
241 | -let rec is_nosem_morfs morfs = | |
243 | +let rec is_nosem_morfs morfs = | |
242 | 244 | let sem = Xlist.fold morfs false (fun b m -> b || is_sem_morf m) in |
243 | 245 | let nosem = Xlist.fold morfs false (fun b m -> b || is_nosem_morf m) in |
244 | 246 | nosem && not sem (* FIXME: pewne uproszczenie, ale liczę, że nie ma wielu ramek z xp koordynowanym z prep *) |
245 | - | |
247 | + | |
246 | 248 | let has_tuple = function |
247 | 249 | Tuple _ -> true |
248 | 250 | | _ -> false |
249 | - | |
250 | -let rec apply_modifications_rec (*paths_array*) pos = function | |
251 | - Variant(e,l) -> Variant(e,List.rev (Xlist.fold l [] (fun l (i,t) -> | |
252 | - let t = apply_modifications_rec (*paths_array*) pos t in | |
253 | - (*if t = Dot then l else*) (i, (*apply_modifications_rec (*paths_array*) pos*) t) :: l))) | |
254 | - | Node t -> | |
251 | + | |
252 | +let rec apply_modifications_rec (*tokens*) pos = function | |
253 | + Variant(e,l) -> Variant(e,List.rev (Xlist.fold l [] (fun l (i,t) -> | |
254 | + let t = apply_modifications_rec (*tokens*) pos t in | |
255 | + (*if t = Dot then l else*) (i, (*apply_modifications_rec (*tokens*) pos*) t) :: l))) | |
256 | + | Node t -> | |
255 | 257 | if is_sem_morf t.amorf then Node t else (* FIXME: czy to jest poprawne? *) |
256 | 258 | let t = if is_nosem_morf t.amorf then {t with agf=ARG} else t in |
257 | 259 | Node{t with position=pos} |
... | ... | @@ -259,41 +261,42 @@ let rec apply_modifications_rec (*paths_array*) pos = function |
259 | 261 | |
260 | 262 | (* let empty_pos = {gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]} (* FIXME: jaka GF? *) *) |
261 | 263 | |
262 | -let apply_modifications (*paths_array*) modifications nodes references = | |
264 | +let apply_modifications (*tokens*) modifications nodes references = | |
263 | 265 | Int.iter 1 (Array.length references - 1) (fun r -> |
264 | 266 | (* if StringMap.is_empty modifications.(r) then failwith ("apply_modifications: " ^ nodes.(r).pred) else *) |
265 | - references.(r) <- Choice(StringMap.map modifications.(r) (fun pos -> | |
266 | - apply_modifications_rec (*paths_array*) pos references.(r)))) | |
267 | + references.(r) <- Choice(StringMap.map modifications.(r) (fun pos -> | |
268 | + apply_modifications_rec (*tokens*) pos references.(r)))) | |
267 | 269 | |
268 | 270 | let rec extract_nosem rev = function |
269 | 271 | [] -> List.rev rev, false |
270 | 272 | | ("NOSEM",Val "+") :: l -> (List.rev rev) @ l, true |
271 | 273 | | x :: l -> extract_nosem (x :: rev) l |
272 | - | |
274 | + | |
273 | 275 | let get_nodes = function |
274 | - Node t -> | |
276 | + Node t -> | |
275 | 277 | let attrs,b = extract_nosem [] t.attrs in |
276 | - let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in | |
278 | + (* let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in *) | |
279 | + let t = if t.pred = "<sentence>" || t.pred = "pro-komunikować" then {t with agf=CORE} else t in (* FIXME: przetestować na mowie niezależnej *) | |
277 | 280 | if t.agf = NOGF then failwith ("get_nodes agf=NOGF: " ^ t.pred) else |
278 | 281 | if b then {t with amorf=mark_nosem_morf t.amorf; attrs=attrs} else t |
279 | - | _ -> failwith "get_nodes" | |
280 | - | |
281 | -let rec propagate_nosem_selprefs modifications ei = function | |
282 | + | _ -> failwith "get_nodes" | |
283 | + | |
284 | +let rec propagate_nosem_selprefs modifications ei = function | |
282 | 285 | Choice choice -> Choice(StringMap.map choice (propagate_nosem_selprefs modifications "")) |
283 | 286 | | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, propagate_nosem_selprefs modifications (e ^ i) t)) |
284 | - | Node t -> | |
285 | - if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then | |
287 | + | Node t -> | |
288 | + if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then | |
286 | 289 | let refs = IntSet.of_list (get_arg_refs [] t.args) in |
287 | - IntSet.iter refs (fun r -> | |
288 | - modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l -> | |
290 | + IntSet.iter refs (fun r -> | |
291 | + modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l -> | |
289 | 292 | if l = t.position.WalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.WalTypes.sel_prefs ^ "]"))); |
290 | 293 | Node{t with position= {t.position with WalTypes.sel_prefs = []}} |
291 | 294 | else Node t |
292 | 295 | | _ -> failwith "propagate_nosem_selprefs 2" |
293 | - | |
296 | + | |
294 | 297 | let rec apply_modifications2_rec mods = function |
295 | 298 | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, apply_modifications2_rec mods t)) |
296 | - | Node t -> | |
299 | + | Node t -> | |
297 | 300 | if t.position.WalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else |
298 | 301 | Node{t with position={t.position with WalTypes.sel_prefs=mods}} |
299 | 302 | | _ -> failwith "apply_modifications2_rec" |
... | ... | @@ -302,57 +305,22 @@ let apply_modifications2 modifications references = |
302 | 305 | Int.iter 1 (Array.length references - 1) (fun r -> |
303 | 306 | if not (StringMap.is_empty modifications.(r)) then |
304 | 307 | match references.(r) with |
305 | - Choice choice -> | |
306 | - references.(r) <- Choice(StringMap.mapi choice (fun ei t -> | |
308 | + Choice choice -> | |
309 | + references.(r) <- Choice(StringMap.mapi choice (fun ei t -> | |
307 | 310 | try apply_modifications2_rec (StringMap.find modifications.(r) ei) t with Not_found -> t)) |
308 | 311 | | _ -> failwith "apply_modifications2") |
309 | - | |
310 | -let assign_frames_and_senses paths_array references = | |
312 | + | |
313 | +let assign_frames_and_senses tokens references = | |
311 | 314 | let modifications = Array.make (Array.length references) StringMap.empty in |
312 | - let valence = prepare_valence paths_array in | |
315 | + let valence = prepare_valence tokens in | |
313 | 316 | let nodes = Array.map get_nodes references in |
314 | 317 | let references = Array.map (assign_frames_and_senses_rec modifications valence nodes) nodes in |
315 | - apply_modifications (*paths_array*) modifications nodes references; | |
318 | + apply_modifications (*tokens*) modifications nodes references; | |
316 | 319 | (* let modifications = Array.make (Array.length references) StringMap.empty in |
317 | 320 | Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); (* FIXME: propagowanie preferencji selekcyjnych więcej niż jeden poziom w głąb nie działa *) |
318 | - apply_modifications2 modifications references; | |
321 | + apply_modifications2 modifications references; | |
319 | 322 | Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); |
320 | - apply_modifications2 modifications references; | |
323 | + apply_modifications2 modifications references; | |
321 | 324 | Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); |
322 | 325 | apply_modifications2 modifications references; *) |
323 | 326 | references |
324 | - | |
325 | - | |
326 | - | |
327 | - | |
328 | - | |
329 | - | |
330 | - | |
331 | - | |
332 | - | |
333 | - | |
334 | - | |
335 | - | |
336 | - | |
337 | - | |
338 | - | |
339 | - | |
340 | - | |
341 | - | |
342 | - | |
343 | - | |
344 | - | |
345 | - | |
346 | - | |
347 | - | |
348 | - | |
349 | - | |
350 | - | |
351 | - | |
352 | - | |
353 | - | |
354 | - | |
355 | - | |
356 | - | |
357 | - | |
358 | - | |
359 | 327 | \ No newline at end of file |
... | ... |
parser/exec.ml
... | ... | @@ -27,23 +27,16 @@ let empty_result = { |
27 | 27 | msg=""; |
28 | 28 | pre_time1=0.; |
29 | 29 | pre_time2=0.; |
30 | - (* lex_time=0.; *) | |
31 | 30 | parse_time=0.; |
32 | 31 | parsed_text=RawText ""; |
33 | -(* reduction_time=0.; | |
34 | - sem_time=0.; | |
35 | - paths_size=0; | |
36 | - disamb=[| |]; | |
37 | - sem=[| |]; | |
38 | - sem2=[| |]; | |
39 | - sem3=LCGtypes.Dot; | |
40 | - trees=[]; | |
41 | - mrls=[]; | |
42 | - paths=[| |];*) | |
43 | - (*structs=SemTypes.Atom "",SemTypes.Label "",SemTypes.Label "",[],""*)} | |
32 | + semantic_time=0.; | |
33 | + selected_sent_text=RawText ""; | |
34 | + semantic_text=RawText ""; | |
35 | + selected_semantic_text=RawText ""; | |
36 | + } | |
44 | 37 | |
45 | 38 | let empty_eniam_parse_result = { |
46 | - id=""; | |
39 | + file_prefix=""; | |
47 | 40 | status=Idle; |
48 | 41 | msg=""; |
49 | 42 | lex_time=0.; |
... | ... | @@ -58,7 +51,7 @@ let empty_eniam_parse_result = { |
58 | 51 | } |
59 | 52 | |
60 | 53 | let empty_conll_parse_result = { |
61 | - id=""; | |
54 | + file_prefix=""; | |
62 | 55 | status=Idle; |
63 | 56 | msg=""; |
64 | 57 | lex_time=0.; |
... | ... | @@ -67,12 +60,26 @@ let empty_conll_parse_result = { |
67 | 60 | sem_time=0.; |
68 | 61 | paths_size=0; |
69 | 62 | dependency_tree_size=0; |
63 | + paths=[| |]; | |
70 | 64 | dep_chart=DepNode(-100,[],[],[]); |
71 | 65 | parsed_dep_chart=[]; |
72 | 66 | not_parsed_dep_chart=(-100,[],[],[]); |
73 | 67 | dependency_tree=[| |]; |
74 | 68 | } |
75 | 69 | |
70 | +let empty_semantic_processing_result = { | |
71 | + file_prefix=""; | |
72 | + status=Idle; | |
73 | + msg=""; | |
74 | + sem_time=0.; | |
75 | + disamb=[| |]; | |
76 | + sem=[| |]; | |
77 | + sem2=[| |]; | |
78 | + sem3=LCGtypes.Dot; | |
79 | + trees=[]; | |
80 | + mrls=[]; | |
81 | + } | |
82 | + | |
76 | 83 | let empty_sum_result = { |
77 | 84 | no_queries=0; |
78 | 85 | no_pre_error=0; |
... | ... | @@ -102,14 +109,16 @@ let translate_mode = function |
102 | 109 | | PreTypes.CONLL -> CONLL |
103 | 110 | | PreTypes.ENIAM -> ENIAM |
104 | 111 | | PreTypes.Mate -> Mate |
112 | + | PreTypes.Swigra -> Swigra | |
113 | + | PreTypes.POLFIE -> POLFIE | |
105 | 114 | |
106 | 115 | let rec translate_sentence = function |
107 | 116 | PreTypes.RawSentence s -> RawSentence s |
108 | - | PreTypes.StructSentence(id,paths,last) -> StructSentence(id,paths,last) | |
109 | - | PreTypes.DepSentence(id,paths) -> DepSentence(id,paths) | |
117 | + | PreTypes.StructSentence(paths,last) -> StructSentence(paths,last) | |
118 | + | PreTypes.DepSentence(paths) -> DepSentence(paths) | |
110 | 119 | | PreTypes.QuotedSentences sentences -> |
111 | 120 | QuotedSentences(Xlist.map sentences (fun p -> |
112 | - {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; | |
121 | + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix; | |
113 | 122 | psentence=translate_sentence p.PreTypes.psentence})) |
114 | 123 | | PreTypes.AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) -> |
115 | 124 | translate_mode mode, translate_sentence sentence)) |
... | ... | @@ -118,7 +127,7 @@ let rec translate_paragraph = function |
118 | 127 | PreTypes.RawParagraph s -> RawParagraph s |
119 | 128 | | PreTypes.StructParagraph sentences -> |
120 | 129 | StructParagraph(Xlist.map sentences (fun p -> |
121 | - {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; | |
130 | + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix; | |
122 | 131 | psentence=translate_sentence p.PreTypes.psentence})) |
123 | 132 | | PreTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) -> |
124 | 133 | translate_mode mode, translate_paragraph paragraph)) |
... | ... | @@ -130,8 +139,8 @@ let rec translate_text = function |
130 | 139 | | PreTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
131 | 140 | translate_mode mode, translate_text text)) |
132 | 141 | |
133 | -let eniam_parse_sentence timeout test_only_flag id paths last tokens = | |
134 | - let result = {empty_eniam_parse_result with id=id} in | |
142 | +let eniam_parse_sentence timeout test_only_flag paths last tokens = | |
143 | + let result = empty_eniam_parse_result in | |
135 | 144 | let time2 = time_fun () in |
136 | 145 | try |
137 | 146 | let chart = LCGlexicon.create (paths,last) tokens in |
... | ... | @@ -187,8 +196,8 @@ let eniam_parse_sentence timeout test_only_flag id paths last tokens = |
187 | 196 | let time3 = time_fun () in |
188 | 197 | {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2} |
189 | 198 | |
190 | -let conll_parse_sentence timeout test_only_flag id paths tokens = | |
191 | - let result = {empty_conll_parse_result with id=id} in | |
199 | +let conll_parse_sentence timeout test_only_flag paths tokens = | |
200 | + let result = empty_conll_parse_result in | |
192 | 201 | let time2 = time_fun () in |
193 | 202 | try |
194 | 203 | let dep_chart = LCGlexicon.dep_create paths tokens in |
... | ... | @@ -198,10 +207,10 @@ let conll_parse_sentence timeout test_only_flag id paths tokens = |
198 | 207 | let result = {result with lex_time=time3 -. time2} in |
199 | 208 | try |
200 | 209 | (* print_endline "conll_parse_sentence 1"; *) |
201 | - LCGlatexOf.print_references "results/" "references1" references; | |
210 | + (* LCGlatexOf.print_references "results/" "references1" references; *) | |
202 | 211 | let parsed_dep_chart = LCGchart.dep_parse dep_chart references timeout time_fun in (* uwaga: niejawna zmiana imperatywna w references *) |
203 | 212 | (* print_endline "conll_parse_sentence 2"; *) |
204 | - LCGlatexOf.print_references "results/" "references2" references; | |
213 | + (* LCGlatexOf.print_references "results/" "references2" references; *) | |
205 | 214 | let time4 = time_fun () in |
206 | 215 | let result = if test_only_flag then result else {result with parsed_dep_chart=parsed_dep_chart} in |
207 | 216 | let result = {result with parse_time=time4 -. time3} in |
... | ... | @@ -251,20 +260,58 @@ let conll_parse_sentence timeout test_only_flag id paths tokens = |
251 | 260 | {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2} |
252 | 261 | |
253 | 262 | |
254 | -let mate_in, mate_out = Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test" | |
263 | +let mate_in, mate_out = (*Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test"*) | |
264 | + if Paths.config.Paths.mate_parser_enabled then | |
265 | + Unix.open_process ("java -jar " ^ Paths.config.Paths.mate_parser_path ^ "dist/anna-3.5.jar -model " ^ | |
266 | + Paths.config.Paths.mate_parser_path ^ "examples/160622_Polish_MateParser.mdl -test") | |
267 | + else stdin, stdout | |
255 | 268 | |
256 | -let rec parse_sentence timeout test_only_flag mode tokens = function | |
257 | - RawSentence s -> RawSentence s | |
258 | - | StructSentence(id,paths,last) -> | |
269 | +let swigra_in, swigra_out = (*Unix.open_process "../swigra/parser/run.sh"*) | |
270 | + if Paths.config.Paths.swigra_enabled then | |
271 | + Unix.open_process (Paths.config.Paths.swigra_path ^ "run.sh") | |
272 | + else stdin, stdout | |
273 | + | |
274 | +let file_prefix_of_mode = function | |
275 | + Raw -> "R" | |
276 | + | Struct -> "St" | |
277 | + | CONLL -> "C" | |
278 | + | ENIAM -> "E" | |
279 | + | Mate -> "M" | |
280 | + | Swigra -> "S" | |
281 | + | POLFIE -> "P" | |
282 | + | |
283 | +let get_paths old_paths = function | |
284 | + {PreTypes.psentence=PreTypes.DepSentence(paths)},_ -> | |
285 | + Int.iter 0 (Array.length paths - 1) (fun i -> | |
286 | + let id,_,_ = old_paths.(i) in | |
287 | + let _,super,label = paths.(i) in | |
288 | + paths.(i) <- id,super,label); | |
289 | + paths | |
290 | + | _ -> failwith "get_paths" | |
291 | + | |
292 | +let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function | |
293 | + RawSentence s -> | |
294 | + (match mode with | |
295 | + Swigra -> | |
296 | + if not Paths.config.Paths.swigra_enabled then RawSentence s else ( | |
297 | + Printf.fprintf swigra_out "%s\n%!" s; | |
298 | + print_endline ("swigra: " ^ input_line swigra_in); | |
299 | + RawSentence s) | |
300 | + | _ -> RawSentence s) | |
301 | + | StructSentence(paths,last) -> | |
259 | 302 | (match mode with |
260 | 303 | ENIAM -> |
261 | - let result = eniam_parse_sentence timeout test_only_flag id paths last tokens in | |
304 | + let result = eniam_parse_sentence timeout test_only_flag paths last tokens in | |
305 | + let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in | |
262 | 306 | ENIAMSentence result |
263 | 307 | | _ -> failwith "parse_sentence") |
264 | - | DepSentence(id,paths) -> | |
308 | + | DepSentence(paths) -> | |
265 | 309 | (match mode with |
266 | 310 | CONLL -> |
267 | - let result = conll_parse_sentence timeout test_only_flag id paths tokens in | |
311 | + let result = conll_parse_sentence timeout test_only_flag paths tokens in | |
312 | + let result = {result with | |
313 | + file_prefix = file_prefix_of_mode mode ^ file_prefix; | |
314 | + paths = paths} in | |
268 | 315 | CONLLSentence result |
269 | 316 | (* let xml = DepTree.conll_to_xml paths in |
270 | 317 | let graph = XmlPrinter.graph_of_xml xml in (* FIXME: do poprawy *) |
... | ... | @@ -272,28 +319,31 @@ let rec parse_sentence timeout test_only_flag mode tokens = function |
272 | 319 | let result = {empty_eniam_parse_result with status=Parsed; term=graph} in |
273 | 320 | ENIAMSentence result, next_id *) |
274 | 321 | | Mate -> |
275 | - (*print_endline "parse_sentence 1"; | |
276 | - let conll = CONLL.string_of_sentence PreTypes.Mate (PreTypes.StructSentence(paths,last)) in | |
322 | + if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else ( | |
323 | + print_endline "parse_sentence 1"; | |
324 | + (* print_endline (Visualization.html_of_dep_sentence tokens paths); *) | |
325 | + let conll = CONLL.string_of_paths PreTypes.Mate tokens paths in | |
277 | 326 | print_endline "parse_sentence 2"; |
278 | - printf "%s\n" conll; | |
279 | - Printf.fprintf mate_out "%s\n\n%!" conll; | |
327 | + (* printf "|%s|\n" conll; *) | |
328 | + Printf.fprintf mate_out "%s%!" conll; | |
280 | 329 | print_endline "parse_sentence 3"; |
281 | - let conll = CONLL.load_sentence mate_in in | |
282 | - print_endline "parse_sentence 4";*) | |
283 | - (*konwersja na strukturę danych*) | |
284 | - (* FIXME: tu trzeba wstawić konwersję na tekstowy format CONLL, | |
285 | - uruchomienie MateParser i | |
286 | - powtórną konwersję wyniku. *) | |
287 | - RawSentence "" | |
330 | + let new_paths = get_paths paths (CONLL.load_sentence mate_in) in | |
331 | + print_endline "parse_sentence 4"; | |
332 | + (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *) | |
333 | + let result = conll_parse_sentence timeout test_only_flag new_paths tokens in | |
334 | + let result = {result with | |
335 | + file_prefix = file_prefix_of_mode mode ^ file_prefix; | |
336 | + paths=new_paths} in | |
337 | + CONLLSentence result) | |
288 | 338 | | _ -> failwith "parse_sentence") |
289 | 339 | | QuotedSentences sentences -> |
290 | 340 | let sentences = Xlist.rev_map sentences (fun p -> |
291 | - let sentence = parse_sentence timeout test_only_flag mode tokens p.psentence in | |
341 | + let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in | |
292 | 342 | {p with psentence=sentence}) in |
293 | 343 | QuotedSentences(List.rev sentences) |
294 | 344 | | AltSentence l -> |
295 | 345 | let l = Xlist.rev_map l (fun (mode,sentence) -> |
296 | - mode, parse_sentence timeout test_only_flag mode tokens sentence) in | |
346 | + mode, parse_sentence timeout test_only_flag mode file_prefix tokens sentence) in | |
297 | 347 | AltSentence(List.rev l) |
298 | 348 | | _ -> failwith "parse_sentence" |
299 | 349 | |
... | ... | @@ -301,7 +351,7 @@ let rec parse_paragraph timeout test_only_flag mode tokens = function |
301 | 351 | RawParagraph s -> RawParagraph s |
302 | 352 | | StructParagraph sentences -> |
303 | 353 | let sentences = Xlist.rev_map sentences (fun p -> |
304 | - let sentence = parse_sentence timeout test_only_flag mode tokens p.psentence in | |
354 | + let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in | |
305 | 355 | {p with psentence=sentence}) in |
306 | 356 | StructParagraph(List.rev sentences) |
307 | 357 | | AltParagraph l -> |
... | ... | @@ -318,30 +368,186 @@ let rec parse_text timeout test_only_flag mode = function |
318 | 368 | | AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
319 | 369 | mode, parse_text timeout test_only_flag mode text)) |
320 | 370 | |
371 | +let select_mode = function | |
372 | + (Raw,_),_ -> failwith "select_mode" | |
373 | + | _,(Raw,_) -> failwith "select_mode" | |
374 | + | (Struct,_),_ -> failwith "select_mode" | |
375 | + | _,(Struct,_) -> failwith "select_mode" | |
376 | + | (CONLL,s),_ -> CONLL,s | |
377 | + | _,(CONLL,s) -> CONLL,s | |
378 | + | (ENIAM,s),_ -> ENIAM,s | |
379 | + | _,(ENIAM,s) -> ENIAM,s | |
380 | + | (Swigra,s),_ -> Swigra,s | |
381 | + | _,(Swigra,s) -> Swigra,s | |
382 | + | (Mate,s),_ -> Mate,s | |
383 | + | _,(Mate,s) -> Mate,s | |
384 | + | _ -> failwith "select_mode: ni" | |
385 | + | |
386 | +let rec select_sentences_sentence = function | |
387 | + RawSentence s -> failwith "select_sentences_sentence" | |
388 | + | StructSentence(paths,last) -> failwith "select_sentences_sentence" | |
389 | + | DepSentence paths -> failwith "select_sentences_sentence" | |
390 | + | QuotedSentences sentences -> | |
391 | + let sentences = Xlist.rev_map sentences (fun p -> | |
392 | + let sentence,_ = select_sentences_sentence p.psentence in | |
393 | + {p with psentence=sentence}) in | |
394 | + QuotedSentences(List.rev sentences), Parsed | |
395 | + | AltSentence l -> | |
396 | + let raw,selected = Xlist.fold l ([],[]) (fun (raw,selected) (mode,sentence) -> | |
397 | + if mode = Raw then (mode,sentence) :: raw, selected else | |
398 | + let sentence,status = select_sentences_sentence sentence in | |
399 | + if status <> Parsed && status <> NotTranslated then raw,selected else | |
400 | + match selected with | |
401 | + [] -> raw,[mode,sentence] | |
402 | + | [mode2,sentence2] -> raw,[select_mode ((mode,sentence),(mode2,sentence2))] | |
403 | + | _ -> failwith "select_sentences_sentence") in | |
404 | + AltSentence(raw @ selected), Parsed | |
405 | + | ENIAMSentence result -> ENIAMSentence result, result.status | |
406 | + | CONLLSentence result -> CONLLSentence result, result.status | |
407 | + | SemSentence result -> SemSentence result, result.status | |
408 | + | |
409 | +let rec select_sentences_paragraph = function | |
410 | + RawParagraph s -> RawParagraph s | |
411 | + | StructParagraph sentences -> | |
412 | + let sentences = Xlist.rev_map sentences (fun p -> | |
413 | + let sentence,_ = select_sentences_sentence p.psentence in | |
414 | + {p with psentence=sentence}) in | |
415 | + StructParagraph(List.rev sentences) | |
416 | + | AltParagraph l -> | |
417 | + let l = Xlist.rev_map l (fun (mode,paragraph) -> | |
418 | + mode, select_sentences_paragraph paragraph) in | |
419 | + AltParagraph(List.rev l) | |
420 | + | |
421 | +let rec select_sentences_text = function | |
422 | + RawText s -> RawText s | |
423 | + | StructText(paragraphs,tokens) -> | |
424 | + let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> | |
425 | + select_sentences_paragraph paragraph) in | |
426 | + StructText(List.rev paragraphs, tokens) | |
427 | + | AltText l -> AltText(Xlist.map l (fun (mode,text) -> | |
428 | + mode, select_sentences_text text)) | |
429 | + | |
430 | +let semantic_processing timeout test_only_flag file_prefix tokens max_n dependency_tree = | |
431 | + let time5 = time_fun () in | |
432 | + let result = {empty_semantic_processing_result with file_prefix=file_prefix} in | |
433 | + try | |
434 | + let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses tokens dependency_tree in | |
435 | + let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in | |
436 | + let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in | |
437 | + let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in | |
438 | + let (*sem*)disamb = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in | |
439 | + let (*disamb*)sem = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node2 (*dependency_tree2*)disamb in | |
440 | + let result = if test_only_flag then result else {result with disamb=disamb} in | |
441 | + let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in | |
442 | + let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in | |
443 | + let result = if test_only_flag then result else {result with sem=sem} in | |
444 | + let sem2 = SemGraph.translate tokens (*disamb*)sem in | |
445 | + let result = if test_only_flag then result else {result with sem2=sem2} in | |
446 | + let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in | |
447 | + let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in | |
448 | +(* let sem3(*disamb*) = SemGraph.manage_quantification(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in *) | |
449 | + let sem3(*disamb*) = SemGraph.simplify_gender(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in | |
450 | +(* if Array.length disamb < 10000 then print_xml_dependency_tree "results/trees/" (id ^ "dis") disamb; *) | |
451 | + let result = if test_only_flag then result else {result with sem3=sem3} in | |
452 | + let time6 = time_fun () in | |
453 | + if SemGraph.validate_semantics sem3 then | |
454 | + let trees = SemGraph.draw_trees max_n sem3 in | |
455 | + let trees2 = Xlist.map trees SemMrl.variable_alpha_convertion in | |
456 | + let mrls = Xlist.map trees2 SemMrl.make_mrl in | |
457 | + let mrls = Xlist.map mrls SemMrl.move_requirements in | |
458 | + let mrss = Xlist.map mrls SemMrl.make_mrs_of_mrl in | |
459 | + let mrss = Xlist.map mrss SemMrl.mrs_handle_alpha_convertion in | |
460 | + let fols = Xlist.map mrss (fun mrs -> | |
461 | + let l = SemMrl.foll_of_mrs_greedy mrs in | |
462 | + if l = [] then failwith "empty fol" else | |
463 | + List.hd l) in | |
464 | + let result = if test_only_flag then result else {result with trees=trees; mrls=fols(*mrls*)} in | |
465 | + {result with status=Parsed; sem_time=time6 -. time5} | |
466 | + else {result with status=NotTranslated; sem_time=time6 -. time5} | |
467 | + with e -> | |
468 | + let time6 = time_fun () in | |
469 | + {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5} | |
470 | + | |
471 | + | |
472 | +let rec semantic_processing_sentence timeout test_only_flag tokens max_n = function | |
473 | + RawSentence s -> RawSentence s | |
474 | + | ENIAMSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree) | |
475 | + | CONLLSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree) | |
476 | + | QuotedSentences sentences -> | |
477 | + let sentences = Xlist.rev_map sentences (fun p -> | |
478 | + let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in | |
479 | + {p with psentence=sentence}) in | |
480 | + QuotedSentences(List.rev sentences) | |
481 | + | AltSentence l -> | |
482 | + let l = Xlist.rev_map l (fun (mode,sentence) -> | |
483 | + mode, semantic_processing_sentence timeout test_only_flag tokens max_n sentence) in | |
484 | + AltSentence(List.rev l) | |
485 | + | _ -> failwith "semantic_processing_sentence" | |
486 | + | |
487 | +let rec semantic_processing_paragraph timeout test_only_flag tokens max_n = function | |
488 | + RawParagraph s -> RawParagraph s | |
489 | + | StructParagraph sentences -> | |
490 | + let sentences = Xlist.rev_map sentences (fun p -> | |
491 | + let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in | |
492 | + {p with psentence=sentence}) in | |
493 | + StructParagraph(List.rev sentences) | |
494 | + | AltParagraph l -> | |
495 | + let l = Xlist.rev_map l (fun (mode,paragraph) -> | |
496 | + mode, semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in | |
497 | + AltParagraph(List.rev l) | |
498 | + | |
499 | +let rec semantic_processing_text timeout test_only_flag max_n = function | |
500 | + RawText s -> RawText s | |
501 | + | StructText(paragraphs,tokens) -> | |
502 | + let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> | |
503 | + semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in | |
504 | + StructText(List.rev paragraphs, tokens) | |
505 | + | AltText l -> AltText(Xlist.map l (fun (mode,text) -> | |
506 | + mode, semantic_processing_text timeout test_only_flag max_n text)) | |
507 | + | |
321 | 508 | let rec extract_query_text = function |
322 | 509 | RawText s -> s |
323 | 510 | | AltText l -> (try extract_query_text (Xlist.assoc l Raw) with Not_found -> failwith "extract_query_text") |
324 | 511 | | _ -> failwith "extract_query_text" |
325 | 512 | |
326 | 513 | let process_query pre_in pre_out timeout test_only_flag id full_query max_n = |
327 | - print_endline "process_query 0"; | |
514 | + (* print_endline "process_query 0"; *) | |
328 | 515 | let result = {empty_result with input_text=translate_text full_query} in |
329 | 516 | let time1 = time_fun () in |
330 | - print_endline "process_query 1"; | |
517 | + (* print_endline "process_query 1"; *) | |
331 | 518 | Marshal.to_channel pre_out full_query []; |
332 | 519 | flush pre_out; |
333 | - print_endline "process_query 2"; | |
520 | + (* print_endline "process_query 2"; *) | |
334 | 521 | let pre_text,msg,pre_time1 = (Marshal.from_channel pre_in : PreTypes.text * string * float) in |
335 | 522 | let time2 = time_fun () in |
336 | 523 | let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in |
337 | 524 | let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in |
338 | 525 | if msg <> "" then {result with status=PreprocessingError; msg=msg} else ( |
339 | - print_endline "process_query 3"; | |
526 | + (* print_endline "process_query 3"; *) | |
340 | 527 | let parsed_text = parse_text timeout test_only_flag Struct (translate_text pre_text) in |
341 | - print_endline "process_query 4"; | |
528 | + (* print_endline "process_query 4"; *) | |
342 | 529 | let time3 = time_fun () in |
343 | 530 | let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in |
344 | 531 | let result = {result with parse_time=time3 -. time2} in |
532 | + (* print_endline "process_query 5"; *) | |
533 | + let selected_sent_text = | |
534 | + if not Paths.config.Paths.sentence_selection_enabled then parsed_text | |
535 | + else select_sentences_text parsed_text in | |
536 | + (* print_endline "process_query 6"; *) | |
537 | + let result = if test_only_flag then result else {result with status=Parsed; selected_sent_text=selected_sent_text} in | |
538 | + let semantic_text = semantic_processing_text timeout test_only_flag max_n selected_sent_text in | |
539 | + (* print_endline "process_query 7"; *) | |
540 | + let selected_semantic_text = | |
541 | + if not Paths.config.Paths.sentence_selection_enabled then semantic_text | |
542 | + else select_sentences_text semantic_text in | |
543 | + (* print_endline "process_query 8"; *) | |
544 | + let time4 = time_fun () in | |
545 | + let result = | |
546 | + if test_only_flag then result | |
547 | + else {result with status=Parsed; | |
548 | + semantic_text=semantic_text; | |
549 | + selected_semantic_text=selected_semantic_text} in | |
550 | + let result = {result with semantic_time=time4 -. time3} in | |
345 | 551 | result) |
346 | 552 | (** (* let text,msg,pre_time1 = PreProcessing.mail_loop2 query in *) |
347 | 553 | (* let text = PreTypes.RawText "" in |
... | ... |
parser/execTypes.ml
... | ... | @@ -20,7 +20,7 @@ |
20 | 20 | type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated |
21 | 21 | |
22 | 22 | type eniam_parse_result = { |
23 | - id: string; | |
23 | + file_prefix: string; | |
24 | 24 | status: status; |
25 | 25 | msg: string; |
26 | 26 | lex_time: float; |
... | ... | @@ -35,7 +35,7 @@ type eniam_parse_result = { |
35 | 35 | } |
36 | 36 | |
37 | 37 | type conll_parse_result = { |
38 | - id: string; | |
38 | + file_prefix: string; | |
39 | 39 | status: status; |
40 | 40 | msg: string; |
41 | 41 | lex_time: float; |
... | ... | @@ -44,6 +44,7 @@ type conll_parse_result = { |
44 | 44 | sem_time: float; |
45 | 45 | paths_size: int; |
46 | 46 | dependency_tree_size: int; |
47 | + paths: (int * int * string) array; | |
47 | 48 | dep_chart: LCGtypes.dep_tree; |
48 | 49 | parsed_dep_chart: (LCGtypes.SymbolMap.key * LCGtypes.linear_term) list; |
49 | 50 | not_parsed_dep_chart: int * |
... | ... | @@ -53,22 +54,36 @@ type conll_parse_result = { |
53 | 54 | dependency_tree: LCGtypes.linear_term array; |
54 | 55 | } |
55 | 56 | |
57 | +type semantic_processing_result = { | |
58 | + file_prefix: string; | |
59 | + status: status; | |
60 | + msg: string; | |
61 | + sem_time: float; | |
62 | + disamb: LCGtypes.linear_term array; | |
63 | + sem: LCGtypes.linear_term array; | |
64 | + sem2: LCGtypes.linear_term array; | |
65 | + sem3: LCGtypes.linear_term; | |
66 | + trees: LCGtypes.linear_term list; | |
67 | + mrls: SemTypes.mrl_formula list; | |
68 | + } | |
69 | + | |
56 | 70 | type mode = |
57 | - Raw | Struct | CONLL | ENIAM | Mate | |
71 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | |
58 | 72 | |
59 | 73 | type sentence = |
60 | 74 | RawSentence of string |
61 | 75 | (* | CONLL of conll list *) |
62 | - | StructSentence of string * (int * int * int) list * int (* file_prefix * (id * lnode * rnode) list * last *) | |
63 | - | DepSentence of string * (int * int * string) array (* file_prefix * (id * super * label) conll_id *) | |
76 | + | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *) | |
77 | + | DepSentence of (int * int * string) array (* (id * super * label) conll_id *) | |
64 | 78 | | QuotedSentences of paragraph_record list |
65 | 79 | (* | NKJP1M of nkjp1m list *) |
66 | 80 | (* | Skladnica of skladnica_tree *) |
67 | 81 | | AltSentence of (mode * sentence) list (* string = etykieta np raw, nkjp, krzaki *) |
68 | 82 | | ENIAMSentence of eniam_parse_result |
69 | 83 | | CONLLSentence of conll_parse_result |
84 | + | SemSentence of semantic_processing_result | |
70 | 85 | |
71 | -and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *) | |
86 | +and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *) | |
72 | 87 | |
73 | 88 | and paragraph = |
74 | 89 | RawParagraph of string |
... | ... | @@ -88,19 +103,12 @@ type result = { |
88 | 103 | pre_time2: float; |
89 | 104 | status: status; |
90 | 105 | msg: string; |
91 | - (* lex_time: float; *) | |
92 | 106 | parse_time: float; |
93 | 107 | parsed_text: text; |
94 | - (* reduction_time: float; | |
95 | - sem_time: float; | |
96 | - paths_size: int; | |
97 | - disamb: LCGtypes.linear_term array; | |
98 | - sem: LCGtypes.linear_term array; | |
99 | - sem2: LCGtypes.linear_term array; | |
100 | - sem3: LCGtypes.linear_term; | |
101 | - trees: LCGtypes.linear_term list; | |
102 | - mrls: SemTypes.mrl_formula list; | |
103 | - paths: PreTypes.token_record array; *) | |
108 | + semantic_time: float; | |
109 | + selected_sent_text: text; | |
110 | + semantic_text: text; | |
111 | + selected_semantic_text: text; | |
104 | 112 | } |
105 | 113 | |
106 | 114 | type sum_result = { |
... | ... |
parser/index.html
... | ... | @@ -8,11 +8,12 @@ |
8 | 8 | |
9 | 9 | <body> |
10 | 10 | <center> |
11 | - <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1> | |
12 | - <h3>Podaj tekst:</h3> | |
13 | - <form method=POST action="cgi-bin/parser.cgi"> | |
14 | - <p><input type="text" name="text0" value="" size="40"></p> | |
15 | - <p><input type="submit" value="Analizuj" size="60"></p> | |
11 | + <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1> | |
12 | + <h3>Podaj tekst:</h3> | |
13 | + <form method=POST action="cgi-bin/parser2.cgi"> | |
14 | + <p><textarea name="text0" cols="100" rows="10"></textarea></p> | |
15 | + <!--<p><input type="text" name="text0" value="" size="100"></p> --> | |
16 | + <p><input type="submit" value="Analizuj" size="60"></p> | |
16 | 17 | </form> |
17 | 18 | <BR><BR> |
18 | 19 | <hr align="center" size="2" width="800" /> |
... | ... |
parser/makefile
... | ... | @@ -16,9 +16,9 @@ SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml |
16 | 16 | EXEC= execTypes.ml visualization.ml exec.ml |
17 | 17 | |
18 | 18 | all: |
19 | - $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml | |
20 | -# $(OCAMLOPT) -o server $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml | |
21 | -# $(OCAMLOPT) -o parser.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
19 | +# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml | |
20 | + $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml | |
21 | + $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
22 | 22 | # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml |
23 | 23 | # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml |
24 | 24 | # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml |
... | ... | @@ -30,6 +30,10 @@ all: |
30 | 30 | # of_xml: |
31 | 31 | # $(OCAMLOPT) -o of_xml $(OCAMLOPTFLAGS) LCGofXml.ml |
32 | 32 | |
33 | +swigra_test: swigra_test.ml | |
34 | + $(OCAMLOPT) -o swigra_test $(OCAMLOPTFLAGS) swigra_test.ml | |
35 | + | |
36 | + | |
33 | 37 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
34 | 38 | |
35 | 39 | .mll.ml: |
... | ... | @@ -51,4 +55,4 @@ all: |
51 | 55 | $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< |
52 | 56 | |
53 | 57 | clean: |
54 | - rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker | |
58 | + rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker server2 parser2.cgi | |
... | ... |
parser/pipe.ml
... | ... | @@ -50,6 +50,10 @@ let lcg_process query = |
50 | 50 | Visualization.print_html_text path "input_text" result.input_text; |
51 | 51 | Visualization.print_html_text path "pre_text" result.pre_text; |
52 | 52 | Visualization.print_html_text path "parsed_text" result.parsed_text; |
53 | + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text; | |
54 | + Visualization.print_html_text path "semantic_text" result.semantic_text; | |
55 | + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text; | |
56 | + Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text; | |
53 | 57 | Exec.print_result stdout result; |
54 | 58 | (*Visualization.print_paths "results/" "paths" result.paths; |
55 | 59 | Visualization.print_paths_latex "paths" result.paths; |
... | ... | @@ -118,9 +122,9 @@ let lcg_process query = |
118 | 122 | let _ = Unix.shutdown_connection ic in |
119 | 123 | () |
120 | 124 | |
121 | -(* let _ = | |
125 | +let _ = | |
122 | 126 | if Array.length Sys.argv < 2 then print_endline "missing argument" else |
123 | - lcg_process Sys.argv.(1) *) | |
127 | + lcg_process Sys.argv.(1) | |
124 | 128 | |
125 | 129 | |
126 | 130 | (* FIXME: parser dziwnie się zachowuje dla 'ścieżki anomalia.' 'ścieżki anomalia. GG' itp. - nie parsuje '.' a jak sparsuje to nie chce redukować *) |
... | ... | @@ -210,24 +214,20 @@ let process_conll_corpus filename = |
210 | 214 | let _ = |
211 | 215 | (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *) |
212 | 216 | (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *) |
213 | - process_conll_corpus "../testy/skladnica-test1.conll"; | |
217 | + (* process_conll_corpus "../testy/skladnica-test1.conll"; *) | |
214 | 218 | () |
215 | 219 | |
216 | 220 | (* TO DO: |
217 | 221 | - współbieżne uruchamianie parserów |
218 | - - wybór tego które włączać (w pre i w parserze) | |
219 | 222 | - nkjp jako źródło danych |
220 | 223 | - concraft, wcrft2 |
221 | 224 | - dopasowanie do siebie tokenów w różnych wersjach tokenizacji |
222 | - - dopasowanie do siebie akapitów i zdań | |
223 | 225 | 2016.10.15 |
224 | 226 | - nadmiar węzłów pro po parsowaniu |
225 | 227 | 2016.10.16 |
226 | - - zgubione węzły przy przetwarzaniu dep | |
227 | - - assign_not_parsed | |
228 | 228 | - sprawdzenie zerowania globalnych referencji przy parsowaniu korpusu |
229 | - - mateParser | |
230 | - 2016.10.19 | |
229 | + 2016.10.22 | |
230 | + - instalacja Świgry | |
231 | 231 | *) |
232 | 232 | |
233 | 233 | |
... | ... |
parser/semGraph.ml
... | ... | @@ -21,11 +21,11 @@ open LCGtypes |
21 | 21 | open Xstd |
22 | 22 | open Printf |
23 | 23 | |
24 | -let pro_id_counter = ref 100000 (* FIXME: to trzeba usunąć !!! *) | |
24 | +(*let pro_id_counter = ref 100000 (* FIXME: to trzeba usunąć !!! *) | |
25 | 25 | |
26 | 26 | let get_pro_id () = |
27 | 27 | incr pro_id_counter; |
28 | - !pro_id_counter | |
28 | + !pro_id_counter*) | |
29 | 29 | |
30 | 30 | let empty_concept = |
31 | 31 | {c_sense=Dot;c_name=Dot;(* c_variable: string; c_visible_var: bool;*) c_quant=Dot; c_local_quant=true; (*c_modalities: (string * type_term) list; |
... | ... | @@ -61,7 +61,7 @@ let rec get_person = function |
61 | 61 | | _ :: l -> get_person l |
62 | 62 | | [] -> "" |
63 | 63 | |
64 | -let rec create_normal_concept (*roles role_attrs*) paths t sem_args = | |
64 | +let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | |
65 | 65 | let sem_args = if t.cat = "pro" then |
66 | 66 | match get_person t.attrs with |
67 | 67 | "pri" -> ["indexical"] |
... | ... | @@ -73,10 +73,10 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args = |
73 | 73 | if t.agf = WalTypes.NOSEM then t.args else |
74 | 74 | let c = {empty_concept with |
75 | 75 | c_sense = Val t.meaning; |
76 | - c_relations=(*create_concepts paths*) t.args; | |
76 | + c_relations=(*create_concepts tokens*) t.args; | |
77 | 77 | c_quant=make_sem_args sem_args; |
78 | 78 | c_variable=string_of_int t.id,""; |
79 | - c_pos=(*if t.id >= Array.length paths then -1 else*) paths.(t.id).PreTypes.beg; | |
79 | + c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).PreTypes.beg; | |
80 | 80 | c_local_quant=true} in |
81 | 81 | if t.cat = "subst" || t.cat = "depr" || t.cat = "ger" || t.cat = "unk" || StringSet.mem symbols t.cat then (* FIXME: wykrywanie plurale tantum *) |
82 | 82 | let c = {c with c_local_quant=false} in |
... | ... | @@ -103,7 +103,8 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args = |
103 | 103 | | e,t -> failwith ("create_normal_concept noun: " ^ e)) in |
104 | 104 | let c = if t.cat = "depr" then {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "depr")]} else c in |
105 | 105 | if cx_flag then |
106 | - Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int (get_pro_id ()),""; cx_pos=c.c_pos} | |
106 | + let id = ExtArray.add tokens PreTypes.empty_token in | |
107 | + Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} | |
107 | 108 | else Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else |
108 | 109 | if t.cat = "fin" || t.cat = "bedzie" || t.cat = "praet" || t.cat = "winien" || t.cat = "impt" || t.cat = "imps" || t.cat = "pred" || t.pred = "pro-komunikować" then |
109 | 110 | let c = {c with c_local_quant=false} in |
... | ... | @@ -120,7 +121,9 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args = |
120 | 121 | | "MOOD",Val "imperative" -> {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "impt")]} (* FIXME *) |
121 | 122 | | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
122 | 123 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
123 | - let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int (get_pro_id ()),""; cx_pos=c.c_pos} in | |
124 | + let c = if t.pred = "pro-komunikować" then {c with c_relations=Relation(Val "Theme",Val "",c.c_relations)} else c in (* FIXME: to by trzeba przesunąć na wcześniej *) | |
125 | + let id = ExtArray.add tokens PreTypes.empty_token in | |
126 | + let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in | |
124 | 127 | if t.position.WalTypes.role <> "" || t.position.WalTypes.role_attr <> "" then failwith "create_normal_concept: verb" else |
125 | 128 | (* Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else *) |
126 | 129 | Context cx else |
... | ... | @@ -131,7 +134,8 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args = |
131 | 134 | | "TENSE",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]} |
132 | 135 | | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
133 | 136 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
134 | - let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int (get_pro_id ()),""; cx_pos=c.c_pos} in | |
137 | + let id = ExtArray.add tokens PreTypes.empty_token in | |
138 | + let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in | |
135 | 139 | Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else |
136 | 140 | if t.cat = "adj" || t.cat = "adjc" || t.cat = "adjp" || t.cat = "adja" || t.cat = "pact" || t.cat = "ppas" || t.cat = "ordnum" || t.cat = "roman-adj" then |
137 | 141 | let c = if t.cat = "pact" || t.cat = "ppas" then {c with c_local_quant=false} else c in |
... | ... | @@ -249,42 +253,45 @@ let rec create_normal_concept (*roles role_attrs*) paths t sem_args = |
249 | 253 | if t.cat = "interp" && t.pred = "?" then SingleRelation(Val "int") else |
250 | 254 | if t.cat = "interp" && t.pred = "„" then |
251 | 255 | Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,RemoveRelation t.args) else |
256 | + if t.cat = "interp" || t.pred = "</or-sentence>" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else ( | |
252 | 257 | if t.cat = "interp" then Node t else |
253 | 258 | if t.cat = "" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else |
254 | - Node t | |
259 | + (* print_endline t.pred; *) | |
260 | + Node t) | |
255 | 261 | |
256 | -and create_concepts paths = function | |
262 | +and create_concepts tokens = function | |
257 | 263 | Node t -> |
264 | + (* print_endline ("cc " ^ t.pred); *) | |
258 | 265 | (* let agf = t.agf in *) |
259 | 266 | let attrs = remove_unimportant_attrs t.attrs in |
260 | 267 | (* let attrs,roles,role_attrs = get_roles attrs in *) |
261 | 268 | let t = {t with attrs=attrs} in |
262 | - (match paths.(t.id).PreTypes.semantics with | |
269 | + (match (ExtArray.get tokens t.id).PreTypes.semantics with | |
263 | 270 | PreTypes.Normal -> |
264 | - let t = create_normal_concept paths t [] in | |
271 | + let t = create_normal_concept tokens t [] in | |
265 | 272 | (* if agf = WalTypes.CORE then Core t else *) t |
266 | 273 | | PreTypes.PrepSemantics _ -> |
267 | - let t = create_normal_concept paths t [] in | |
274 | + let t = create_normal_concept tokens t [] in | |
268 | 275 | (* if agf = WalTypes.CORE then Core t else *) t |
269 | 276 | | PreTypes.Special l -> |
270 | - let t = create_normal_concept paths t l in | |
277 | + let t = create_normal_concept tokens t l in | |
271 | 278 | (* if agf = WalTypes.CORE then Core t else *) t |
272 | 279 | (* | PreTypes.SpecialNoun(lemma,_) -> |
273 | - let t = create_normal_concept paths t in*) | |
280 | + let t = create_normal_concept tokens t in*) | |
274 | 281 | (* if agf = WalTypes.CORE then Core t else t*) |
275 | 282 | (*| _ -> failwith "create_concepts: ni"*)) |
276 | - | Tuple l -> Tuple(Xlist.map l (create_concepts paths)) | |
277 | - | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts paths t)) | |
283 | + | Tuple l -> Tuple(Xlist.map l (create_concepts tokens)) | |
284 | + | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens t)) | |
278 | 285 | | Dot -> Dot |
279 | 286 | | Ref i -> Ref i |
280 | - | Choice choices -> Choice(StringMap.map choices (create_concepts paths)) | |
287 | + | Choice choices -> Choice(StringMap.map choices (create_concepts tokens)) | |
281 | 288 | | t -> failwith ("create_concepts: " ^ LCGstringOf.linear_term 0 t) |
282 | 289 | |
283 | 290 | |
284 | -let translate paths term = | |
291 | +let translate tokens term = | |
285 | 292 | let sem = Array.copy term in |
286 | 293 | Int.iter 0 (Array.length sem - 1) (fun i -> |
287 | - sem.(i) <- create_concepts paths sem.(i)); | |
294 | + sem.(i) <- create_concepts tokens sem.(i)); | |
288 | 295 | sem |
289 | 296 | |
290 | 297 | (***************************************************************************************) |
... | ... |
parser/server.ml
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | * You should have received a copy of the GNU General Public License |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | - | |
19 | + | |
20 | 20 | open ExecTypes |
21 | 21 | |
22 | 22 | let logfile = open_out_gen [Open_wronly; Open_append; Open_creat] ((6*8+4)*8+4) "results/queries.log" |
... | ... | @@ -26,35 +26,66 @@ let get_sock_addr host_name port = |
26 | 26 | let addr = he.Unix.h_addr_list in |
27 | 27 | Unix.ADDR_INET(addr.(0),port) |
28 | 28 | |
29 | +let rec clean_result_sentence = function | |
30 | + SemSentence result -> SemSentence {result with disamb=[| |]; sem=[| |]; sem2=[| |]; sem3=LCGtypes.Dot} | |
31 | + | AltSentence l -> | |
32 | + let l = Xlist.rev_map l (fun (mode,sentence) -> | |
33 | + mode, clean_result_sentence sentence) in | |
34 | + AltSentence(List.rev l) | |
35 | + | t -> t | |
36 | + | |
37 | +let rec clean_result_paragraph = function | |
38 | + RawParagraph s -> RawParagraph s | |
39 | + | StructParagraph sentences -> | |
40 | + let sentences = Xlist.rev_map sentences (fun p -> | |
41 | + let sentence = clean_result_sentence p.psentence in | |
42 | + {p with psentence=sentence}) in | |
43 | + StructParagraph(List.rev sentences) | |
44 | + | AltParagraph l -> | |
45 | + let l = Xlist.rev_map l (fun (mode,paragraph) -> | |
46 | + mode, clean_result_paragraph paragraph) in | |
47 | + AltParagraph(List.rev l) | |
48 | + | |
49 | +let rec clean_result_text = function | |
50 | + RawText s -> RawText s | |
51 | + | StructText(paragraphs,tokens) -> | |
52 | + let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> | |
53 | + clean_result_paragraph paragraph) in | |
54 | + StructText(List.rev paragraphs, tokens) | |
55 | + | AltText l -> AltText(Xlist.map l (fun (mode,text) -> | |
56 | + mode, clean_result_text text)) | |
57 | + | |
29 | 58 | let parse query = |
30 | 59 | let max_n = 10 in |
31 | 60 | let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in |
32 | - let result = Exec.process_query ic oc 100. false "x" query max_n in | |
61 | + let result = Exec.process_query ic oc 30. false "x" (PreTypes.RawText query) max_n in | |
33 | 62 | Printf.fprintf oc "\n%!"; |
34 | 63 | let _ = Unix.shutdown_connection ic in |
35 | - {result with | |
36 | - graph=[| |]; | |
37 | - term=[| |]; | |
38 | - disamb=[| |]; | |
39 | - sem=[| |]; | |
40 | - sem2=[| |]; | |
41 | - sem3=LCGtypes.Dot; | |
42 | - paths=[| |]} | |
64 | + {result with | |
65 | + pre_text = RawText ""; | |
66 | + parsed_text = RawText ""; | |
67 | + selected_sent_text = RawText ""; | |
68 | + semantic_text = RawText ""; | |
69 | + selected_semantic_text = clean_result_text result.selected_semantic_text} | |
43 | 70 | |
44 | 71 | let rec main_loop in_chan out_chan = |
45 | - let query = input_line in_chan in | |
46 | - if query = "" then Marshal.to_channel out_chan {Exec.empty_result with msg="Empty query"} [] else | |
47 | - (try | |
72 | + (* let query = input_line in_chan in *) | |
73 | + let query = (Marshal.from_channel in_chan : PreTypes.text) in | |
74 | + let query = match query with PreTypes.RawText q -> q | _ -> failwith "bad query format" in | |
75 | + (* Printf.fprintf logfile "raw query: '%s'\n" query; *) | |
76 | + if query = "" then Marshal.to_channel out_chan {Exec.empty_result with msg="Empty query"} [] else | |
77 | + (try | |
48 | 78 | let result : ExecTypes.result = parse query in |
49 | 79 | Exec.print_result logfile result; |
50 | 80 | Marshal.to_channel out_chan result [] |
51 | - with e -> | |
81 | + with e -> | |
52 | 82 | Printf.fprintf logfile "query: %s\nerror_other: %s\n%!" query (Printexc.to_string e); |
53 | 83 | Marshal.to_channel out_chan {Exec.empty_result with msg=Printexc.to_string e} []); |
54 | - flush out_chan | |
84 | + (* flush out_chan *) | |
85 | + () | |
55 | 86 | |
56 | 87 | |
57 | 88 | let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,Paths.server_port) |
58 | 89 | |
59 | -let _ = | |
60 | - Unix.establish_server main_loop sockaddr | |
90 | +let _ = | |
91 | + Unix.establish_server main_loop sockaddr | |
... | ... |
parser/swigra_test.ml
0 → 100644
1 | + | |
2 | +let swigra_in, swigra_out = Unix.open_process "../swigra/parser/run.sh" | |
3 | + | |
4 | +let _ = | |
5 | + Printf.fprintf swigra_out "Ala ma kota\n%!"; | |
6 | + print_endline (input_line swigra_in); | |
7 | + (* powyższą linię należy zastąpić przez: | |
8 | + let xml = Xml.parse_in swigra_in in | |
9 | + print_endline (Xml.to_string_fmt xml; *) | |
10 | + Printf.fprintf swigra_out "Ela ma kota\n%!"; | |
11 | + print_endline (input_line swigra_in); | |
12 | + (* powyższą linię należy zastąpić przez: | |
13 | + let xml = Xml.parse_in swigra_in in | |
14 | + print_endline (Xml.to_string_fmt xml; *) | |
15 | + () | |
... | ... |
parser/visualization.ml
... | ... | @@ -44,6 +44,23 @@ let rec string_of_token = function |
44 | 44 | | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) |
45 | 45 | | PreTypes.Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) |
46 | 46 | |
47 | +let lemma_of_token = function | |
48 | + PreTypes.SmallLetter orth -> orth | |
49 | + | PreTypes.CapLetter(orth,lc) -> orth | |
50 | + | PreTypes.AllSmall orth -> orth | |
51 | + | PreTypes.AllCap(orth,lc,lc2) -> orth | |
52 | + | PreTypes.FirstCap(orth,lc,cl,ll) -> orth | |
53 | + | PreTypes.SomeCap orth -> orth | |
54 | + | PreTypes.RomanDig(v,t) -> v | |
55 | + | PreTypes.Interp orth -> orth | |
56 | + | PreTypes.Symbol orth -> orth | |
57 | + | PreTypes.Dig(v,t) -> v | |
58 | + | PreTypes.Other2 orth -> orth | |
59 | + | PreTypes.Lemma(lemma,cat,interps) -> lemma | |
60 | + | PreTypes.Proper(lemma,cat,interps,senses) -> lemma | |
61 | + | PreTypes.Compound(sense,l) -> "Compound" | |
62 | + | PreTypes.Tokens(cat,l) -> "Tokens" | |
63 | + | |
47 | 64 | let rec spaces i = |
48 | 65 | if i = 0 then "" else " " ^ spaces (i-1) |
49 | 66 | |
... | ... | @@ -354,7 +371,7 @@ let rec print_graph2_rec file edge_rev edge_label edge_head upper = function |
354 | 371 | incr id_counter; |
355 | 372 | fprintf file " %d [shape=box,label=<%s%s %s>]\n" id |
356 | 373 | (string_of_quant t.c_quant) |
357 | - (LCGstringOf.linear_term 0 t.c_sense) | |
374 | + (escape_string (LCGstringOf.linear_term 0 t.c_sense)) | |
358 | 375 | (if t.c_name=Dot then "" else "„" ^ LCGstringOf.linear_term 0 t.c_name ^ "”"); (* FIXME *) |
359 | 376 | print_edge2 file edge_rev edge_label edge_head "" upper id; |
360 | 377 | print_graph2_rec file false "" "" id t.c_relations |
... | ... | @@ -584,11 +601,13 @@ let page_header path = |
584 | 601 | <center> |
585 | 602 | <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1> |
586 | 603 | <h3>Podaj tekst:</h3> |
587 | - <form method=POST action=\"" ^ path ^ "parser.cgi\"> | |
588 | - <p><input type=\"text\" name=\"text0\" value=\"\" size=\"40\"></p> | |
604 | + <form method=POST action=\"" ^ path ^ "parser2.cgi\"> | |
605 | + <p><input type=\"text\" name=\"text0\" value=\"\" size=\"100\"></p> | |
589 | 606 | <p><input type=\"submit\" value=\"Analizuj\" size=\"60\"></p> |
590 | 607 | </form>" |
591 | 608 | |
609 | +(*<textarea name="text0" cols="100" rows="10"></textarea>*) | |
610 | + | |
592 | 611 | let page_trailer = |
593 | 612 | "<BR><BR> |
594 | 613 | <hr align=\"center\" size=\"2\" width=\"800\" /> |
... | ... | @@ -640,6 +659,8 @@ let string_of_mode = function |
640 | 659 | | CONLL -> "CONLL" |
641 | 660 | | ENIAM -> "ENIAM" |
642 | 661 | | Mate -> "Mate" |
662 | + | Swigra -> "Swigra" | |
663 | + | POLFIE -> "POLFIE" | |
643 | 664 | |
644 | 665 | (*let rec string_of_sentence = function |
645 | 666 | RawSentence s -> sprintf "RawSentence(%s)" s |
... | ... | @@ -716,6 +737,24 @@ let html_of_tokens tokens = |
716 | 737 | (String.concat "; " t.PreTypes.attrs)) :: l))) ^ |
717 | 738 | "</table>" |
718 | 739 | |
740 | +let html_of_tokens_simple_valence tokens = | |
741 | + "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^ | |
742 | + String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> | |
743 | + let t = ExtArray.get tokens id in | |
744 | + Xlist.fold t.simple_valence l (fun l (fnum,frame) -> | |
745 | + (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>" | |
746 | + id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ | |
747 | + "</table>" | |
748 | + | |
749 | +let html_of_tokens_valence tokens = | |
750 | + "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^ | |
751 | + String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> | |
752 | + let t = ExtArray.get tokens id in | |
753 | + Xlist.fold t.valence l (fun l (fnum,frame) -> | |
754 | + (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>" | |
755 | + id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ | |
756 | + "</table>" | |
757 | + | |
719 | 758 | let create_latex_chart path name chart = |
720 | 759 | LatexMain.latex_file_out path name "a1" false (fun file -> |
721 | 760 | Printf.fprintf file "%s\n" (LCGlatexOf.chart chart)); |
... | ... | @@ -742,30 +781,30 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) = |
742 | 781 | (* | PreprocessingError -> "error_pre: %s\n" result.msg *) |
743 | 782 | | LexiconError -> sprintf "error_lex: %s\n" result.msg |
744 | 783 | | ParseError -> |
745 | - create_latex_chart path (result.id ^ "_chart") result.chart; | |
784 | + create_latex_chart path (result.file_prefix ^ "_chart") result.chart; | |
746 | 785 | sprintf "error_parse: %s\n" result.msg ^ |
747 | - sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id | |
786 | + sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix | |
748 | 787 | | ParseTimeout -> |
749 | - create_latex_chart path (result.id ^ "_chart") result.chart; | |
788 | + create_latex_chart path (result.file_prefix ^ "_chart") result.chart; | |
750 | 789 | sprintf "timeout: %s\n" result.msg ^ |
751 | - sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id | |
790 | + sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix | |
752 | 791 | | NotParsed -> |
753 | - create_latex_chart path (result.id ^ "_chart") result.chart; | |
792 | + create_latex_chart path (result.file_prefix ^ "_chart") result.chart; | |
754 | 793 | sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^ |
755 | - sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id | |
794 | + sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix | |
756 | 795 | | ReductionError -> sprintf "error_reduction: %s\n" result.msg |
757 | 796 | | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size |
758 | 797 | | NotReduced -> sprintf "not_reduced: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size |
759 | 798 | | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size |
760 | 799 | (* | NotTranslated -> "not_translated: \n" *) |
761 | 800 | | Parsed -> |
762 | - print_simplified_dependency_tree path (result.id ^ "_simplified_dependency_tree") tokens result.dependency_tree; | |
763 | - print_dependency_tree path (result.id ^ "_dependency_tree") result.dependency_tree; | |
764 | - LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree; | |
801 | + print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; | |
802 | + print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; | |
803 | + (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *) | |
765 | 804 | sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^ |
766 | - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.id ^ | |
767 | - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.id ^ | |
768 | - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id | |
805 | + sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ | |
806 | + sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix (* ^ | |
807 | + sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *) | |
769 | 808 | | _ -> failwith "html_of_eniam_sentence" |
770 | 809 | |
771 | 810 | let html_of_conll_sentence path tokens (result : conll_parse_result) = |
... | ... | @@ -774,48 +813,74 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) = |
774 | 813 | (* | PreprocessingError -> "error_pre: %s\n" result.msg *) |
775 | 814 | | LexiconError -> sprintf "error_lex: %s\n" result.msg |
776 | 815 | | ParseError -> |
777 | - create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart; | |
778 | - create_latex_parsed_dep_chart path (result.id ^ "_parsed_dep_chart") result.parsed_dep_chart; | |
816 | + create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart; | |
817 | + create_latex_parsed_dep_chart path (result.file_prefix ^ "_parsed_dep_chart") result.parsed_dep_chart; | |
779 | 818 | sprintf "error_parse: %s\n" result.msg ^ |
780 | - sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^ | |
781 | - sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.id | |
819 | + sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^ | |
820 | + sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.file_prefix | |
782 | 821 | | ParseTimeout -> |
783 | - create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart; | |
784 | - create_latex_parsed_dep_chart path (result.id ^ "_parsed_dep_chart") result.parsed_dep_chart; | |
822 | + create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart; | |
823 | + create_latex_parsed_dep_chart path (result.file_prefix ^ "_parsed_dep_chart") result.parsed_dep_chart; | |
785 | 824 | sprintf "timeout: %s\n" result.msg ^ |
786 | - sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^ | |
787 | - sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.id | |
825 | + sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^ | |
826 | + sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.file_prefix | |
788 | 827 | | NotParsed -> |
789 | - create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart; | |
790 | - create_latex_not_parsed_dep_chart path (result.id ^ "_not_parsed_dep_chart") result.not_parsed_dep_chart; | |
828 | + create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart; | |
829 | + create_latex_not_parsed_dep_chart path (result.file_prefix ^ "_not_parsed_dep_chart") result.not_parsed_dep_chart; | |
791 | 830 | sprintf "not_parsed\n" ^ |
792 | - sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^ | |
793 | - sprintf "<BR><A HREF=\"%s_not_parsed_dep_chart.pdf\">Not Parsed Chart</A>\n" result.id | |
831 | + html_of_dep_sentence tokens result.paths ^ | |
832 | + sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^ | |
833 | + sprintf "<BR><A HREF=\"%s_not_parsed_dep_chart.pdf\">Not Parsed Chart</A>\n" result.file_prefix | |
794 | 834 | | ReductionError -> sprintf "error_reduction: %s\n" result.msg |
795 | 835 | | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d\n" result.paths_size |
796 | 836 | | NotReduced -> |
797 | - LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree; | |
837 | + LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; | |
798 | 838 | sprintf "not_reduced: paths_size=%d\n" result.paths_size ^ |
799 | - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id | |
839 | + sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix | |
800 | 840 | | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size |
801 | 841 | (* | NotTranslated -> "not_translated: \n" *) |
802 | 842 | | Parsed -> |
803 | - print_simplified_dependency_tree path (result.id ^ "_simplified_dependency_tree") tokens result.dependency_tree; | |
804 | - print_dependency_tree path (result.id ^ "_dependency_tree") result.dependency_tree; | |
805 | - LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree; | |
843 | + print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; | |
844 | + print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; | |
845 | + LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; | |
806 | 846 | sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^ |
807 | - sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.id ^ | |
808 | - sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.id ^ | |
809 | - sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id | |
847 | + sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ | |
848 | + sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ | |
849 | + sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix | |
810 | 850 | | _ -> failwith "html_of_conll_sentence" |
811 | 851 | |
852 | +let html_of_sem_sentence path tokens (result : semantic_processing_result) = | |
853 | + match result.status with | |
854 | + Idle -> "idle\n" | |
855 | + | SemError -> sprintf "error_sem: %s\n" result.msg | |
856 | + (* print_dependency_tree path (result.file_prefix ^ "_disamb") result.disamb; | |
857 | + print_dependency_tree path (result.file_prefix ^ "_sem") result.sem; | |
858 | + print_dependency_tree path (result.file_prefix ^ "_sem2") result.sem2; | |
859 | + print_graph2 "results/" "sem3" query result.sem3; *) | |
860 | + | NotTranslated -> | |
861 | + (* print_dependency_tree path (result.file_prefix ^ "_disamb") result.disamb; | |
862 | + print_dependency_tree path (result.file_prefix ^ "_sem") result.sem; | |
863 | + print_dependency_tree path (result.file_prefix ^ "_sem2") result.sem2; *) | |
864 | + print_graph2 path (result.file_prefix ^ "_sem3") "" result.sem3; | |
865 | + sprintf "not_translated \n" ^ | |
866 | + (* sprintf "<BR><A HREF=\"%s_disamb.png\">Disambiguated Dependency Tree</A>\n" result.file_prefix ^ | |
867 | + sprintf "<BR><A HREF=\"%s_sem.png\">Semantic Graph 1</A>\n" result.file_prefix ^ | |
868 | + sprintf "<BR><A HREF=\"%s_sem2.png\">Semantic Graph 2</A>\n" result.file_prefix ^ *) | |
869 | + sprintf "<BR><A HREF=\"%s_sem3.png\">Semantic Graph</A>\n" result.file_prefix | |
870 | + | Parsed -> | |
871 | + print_graph2 path (result.file_prefix ^ "_sem3") "" result.sem3; | |
872 | + sprintf "parsed \n" ^ | |
873 | + sprintf "<BR><A HREF=\"%s_sem3.png\">Semantic Graph</A>\n" result.file_prefix | |
874 | + | _ -> failwith "html_of_sem_sentence" | |
875 | + | |
812 | 876 | |
813 | 877 | let rec html_of_sentence path tokens = function |
814 | 878 | RawSentence s -> s |
815 | - | StructSentence(_,paths,last) -> html_of_struct_sentence tokens paths last | |
816 | - | DepSentence(_,paths) -> html_of_dep_sentence tokens paths | |
879 | + | StructSentence(paths,last) -> html_of_struct_sentence tokens paths last | |
880 | + | DepSentence paths -> html_of_dep_sentence tokens paths | |
817 | 881 | | ENIAMSentence result -> html_of_eniam_sentence path tokens result |
818 | 882 | | CONLLSentence result -> html_of_conll_sentence path tokens result |
883 | + | SemSentence result -> html_of_sem_sentence path tokens result | |
819 | 884 | | QuotedSentences sentences -> |
820 | 885 | String.concat "<BR>\n" (Xlist.map sentences (fun p -> |
821 | 886 | sprintf "pid=%s pbeg=%d plen=%d pnext=%d<BR>%s" p.pid p.pbeg p.plen p.pnext (html_of_sentence path tokens p.psentence))) |
... | ... | @@ -840,9 +905,9 @@ let rec html_of_paragraph path tokens = function |
840 | 905 | let rec html_of_text path = function |
841 | 906 | RawText s -> s |
842 | 907 | | StructText(paragraphs,tokens) -> |
843 | - sprintf "%s<BR>\n%s" | |
908 | + sprintf "%s<BR>\n%s\n%s\n%s" | |
844 | 909 | (String.concat "<BR>\n" (Xlist.map paragraphs (html_of_paragraph path tokens))) |
845 | - (html_of_tokens tokens) | |
910 | + (html_of_tokens tokens) (html_of_tokens_simple_valence tokens) (html_of_tokens_valence tokens) | |
846 | 911 | | AltText l -> |
847 | 912 | "<table border=3>" ^ |
848 | 913 | String.concat "\n" (Xlist.map l (fun (mode,text) -> |
... | ... | @@ -854,3 +919,136 @@ let print_html_text path name text = |
854 | 919 | fprintf file "%s\n" html_header; |
855 | 920 | fprintf file "%s\n" (html_of_text path text); |
856 | 921 | fprintf file "%s\n" html_trailer) |
922 | + | |
923 | +let rec find_prev_next_sentence pid rev = function | |
924 | + AltSentence[Raw,_;Struct,QuotedSentences sentences] -> | |
925 | + Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.pid rev p.psentence) | |
926 | + | AltSentence[Raw,RawSentence s; mode,SemSentence result] -> result.file_prefix :: rev | |
927 | + | AltSentence[Raw,RawSentence s] -> ("p" ^ pid) :: rev | |
928 | + | _ -> failwith "find_prev_next_sentence: ni" | |
929 | + | |
930 | +let rec find_prev_next_paragraph rev = function | |
931 | + RawParagraph s -> rev | |
932 | + | StructParagraph sentences -> | |
933 | + Xlist.fold sentences rev (fun rev p -> find_prev_next_sentence p.pid rev p.psentence) | |
934 | + | AltParagraph l -> Xlist.fold l rev (fun rev (mode,paragraph) -> find_prev_next_paragraph rev paragraph) | |
935 | + | |
936 | +let rec make_prev_next_map map prev = function | |
937 | + [x] -> StringMap.add map x (prev,"") | |
938 | + | x :: next :: l -> make_prev_next_map (StringMap.add map x (prev,next)) x (next :: l) | |
939 | + | [] -> failwith "make_prev_next_map" | |
940 | + | |
941 | +let print_main_result cg_bin_path mode path id tokens query result prev_next_map = | |
942 | + let prev,next = try StringMap.find prev_next_map result.file_prefix with Not_found -> failwith "print_main_result" in | |
943 | + File.file_out (path ^ "page" ^ id ^ "_" ^ result.file_prefix ^ ".html") (fun file -> | |
944 | + fprintf file "%s\n" (page_header cg_bin_path); | |
945 | + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev; | |
946 | + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next; | |
947 | + fprintf file "\n<H3>%s</H3>\n" query; | |
948 | + if mode <> ENIAM then fprintf file "<P>Parsed by %s\n" (string_of_mode mode); | |
949 | + ignore (Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl -> | |
950 | + print_graph2 path ("tree" ^ id ^ "_" ^ result.file_prefix ^ "_" ^ string_of_int n) "" tree; | |
951 | + print_xml_tree path ("tree" ^ id ^ "_" ^ result.file_prefix ^ "_" ^ string_of_int n) tree; | |
952 | + let mml = SemMmlOf.mml_of_mrl mrl in | |
953 | + print_mml path ("formula" ^ id ^ "_" ^ result.file_prefix ^ "_" ^ string_of_int n) mml; | |
954 | + fprintf file "<P><IMG SRC=\"tree%s_%s_%d.png\">\n" id result.file_prefix n; | |
955 | + fprintf file "<P>%s\n" (Xml.to_string_fmt mml); | |
956 | + fprintf file "<P><A HREF=\"tree%s_%s_%d.xml\">Graf w formacie XML</A>\n" id result.file_prefix n; | |
957 | + fprintf file "<P><A HREF=\"formula%s_%s_%d.mml\">Formuła w formacie MathML</A>\n" id result.file_prefix n; | |
958 | + n+1)); | |
959 | + fprintf file "<P>"; | |
960 | + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev; | |
961 | + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next; | |
962 | + fprintf file "%s\n" page_trailer) | |
963 | + | |
964 | +let print_not_parsed_main_result cg_bin_path path id query pid prev_next_map = | |
965 | + let pid = "p" ^ pid in | |
966 | + let prev,next = try StringMap.find prev_next_map pid with Not_found -> failwith "print_not_parsed_main_result" in | |
967 | + File.file_out (path ^ "page" ^ id ^ "_" ^ pid ^ ".html") (fun file -> | |
968 | + fprintf file "%s\n" (page_header cg_bin_path); | |
969 | + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev; | |
970 | + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next; | |
971 | + fprintf file "\n<H3>%s</H3>\n" query; | |
972 | + fprintf file "<P>Not parsed\n"; | |
973 | + fprintf file "<P>"; | |
974 | + if prev <> "" then fprintf file "<A HREF=\"page%s_%s.html\">Poprzednie zdanie</A> " id prev; | |
975 | + if next <> "" then fprintf file " <A HREF=\"page%s_%s.html\">Następne zdanie</A>" id next; | |
976 | + fprintf file "%s\n" page_trailer) | |
977 | + | |
978 | +let rec print_main_result_sentence cg_bin_path path id tokens pid prev_next_map = function | |
979 | + AltSentence[Raw,_;Struct,QuotedSentences sentences] -> | |
980 | + Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence) | |
981 | + | AltSentence[Raw,RawSentence query; mode,SemSentence result] -> | |
982 | + print_main_result cg_bin_path mode path id tokens query result prev_next_map | |
983 | + | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result cg_bin_path path id query pid prev_next_map | |
984 | + | _ -> failwith "print_main_result_sentence: ni" | |
985 | + | |
986 | +let rec print_main_result_paragraph cg_bin_path path id tokens prev_next_map = function | |
987 | + RawParagraph s -> () | |
988 | + | StructParagraph sentences -> | |
989 | + Xlist.iter sentences (fun p -> print_main_result_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence) | |
990 | + | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_paragraph cg_bin_path path id tokens prev_next_map paragraph) | |
991 | + | |
992 | +let rec print_main_result_text cg_bin_path path id = function | |
993 | + RawText s -> () | |
994 | + | StructText(paragraphs,tokens) -> | |
995 | + let prev_next_map = make_prev_next_map StringMap.empty "" | |
996 | + (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in | |
997 | + Xlist.iter paragraphs (print_main_result_paragraph cg_bin_path path id tokens prev_next_map) | |
998 | + | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_text cg_bin_path path id text) | |
999 | + | |
1000 | +let print_main_result_first_page cg_bin_path mode path id tokens query result prev_next_map = | |
1001 | + let prev,next = try StringMap.find prev_next_map result.file_prefix with Not_found -> failwith "print_main_result" in | |
1002 | + printf "%s\n" (page_header cg_bin_path); | |
1003 | + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev; | |
1004 | + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next; | |
1005 | + printf "\n<H3>%s</H3>\n" query; | |
1006 | + if mode <> ENIAM then printf "<P>Parsed by %s\n" (string_of_mode mode); | |
1007 | + ignore (Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl -> | |
1008 | + let mml = SemMmlOf.mml_of_mrl mrl in | |
1009 | + printf "<P><IMG SRC=\"%stree%s_%s_%d.png\">\n" path id result.file_prefix n; | |
1010 | + printf "<P>%s\n" (Xml.to_string_fmt mml); | |
1011 | + printf "<P><A HREF=\"%stree%s_%s_%d.xml\">Graf w formacie XML</A>\n" path id result.file_prefix n; | |
1012 | + printf "<P><A HREF=\"%sformula%s_%s_%d.mml\">Formuła w formacie MathML</A>\n" path id result.file_prefix n; | |
1013 | + n+1)); | |
1014 | + printf "<P>"; | |
1015 | + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev; | |
1016 | + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next; | |
1017 | + printf "%s\n" page_trailer | |
1018 | + | |
1019 | +let print_not_parsed_main_result_first_page cg_bin_path path id query pid prev_next_map = | |
1020 | + let pid = "p" ^ pid in | |
1021 | + let prev,next = try StringMap.find prev_next_map pid with Not_found -> failwith "print_not_parsed_main_result" in | |
1022 | + printf "%s\n" (page_header cg_bin_path); | |
1023 | + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev; | |
1024 | + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next; | |
1025 | + printf "\n<H3>%s</H3>\n" query; | |
1026 | + printf "<P>Not parsed\n"; | |
1027 | + printf "<P>"; | |
1028 | + if prev <> "" then printf "<A HREF=\"%spage%s_%s.html\">Poprzednie zdanie</A> " path id prev; | |
1029 | + if next <> "" then printf " <A HREF=\"%spage%s_%s.html\">Następne zdanie</A>" path id next; | |
1030 | + printf "%s\n" page_trailer | |
1031 | + | |
1032 | +let rec print_main_result_first_page_sentence cg_bin_path path id tokens pid prev_next_map = function | |
1033 | + AltSentence[Raw,_;Struct,QuotedSentences sentences] -> | |
1034 | + let p = List.hd sentences in | |
1035 | + print_main_result_first_page_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence | |
1036 | + | AltSentence[Raw,RawSentence query; mode,SemSentence result] -> | |
1037 | + print_main_result_first_page cg_bin_path mode path id tokens query result prev_next_map | |
1038 | + | AltSentence[Raw,RawSentence query] -> print_not_parsed_main_result_first_page cg_bin_path path id query pid prev_next_map | |
1039 | + | _ -> failwith "print_main_result_first_page_sentence: ni" | |
1040 | + | |
1041 | +let rec print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map = function | |
1042 | + RawParagraph s -> () | |
1043 | + | StructParagraph sentences -> | |
1044 | + let p = List.hd sentences in | |
1045 | + print_main_result_first_page_sentence cg_bin_path path id tokens p.pid prev_next_map p.psentence | |
1046 | + | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map paragraph) | |
1047 | + | |
1048 | +let rec print_main_result_first_page_text cg_bin_path path id = function | |
1049 | + RawText s -> () | |
1050 | + | StructText(paragraphs,tokens) -> | |
1051 | + let prev_next_map = make_prev_next_map StringMap.empty "" | |
1052 | + (List.rev (Xlist.fold paragraphs [] find_prev_next_paragraph)) in | |
1053 | + print_main_result_first_page_paragraph cg_bin_path path id tokens prev_next_map (List.hd paragraphs) | |
1054 | + | AltText l -> Xlist.iter l (fun (mode,text) -> print_main_result_first_page_text cg_bin_path path id text) | |
... | ... |
parser/webInterface.ml
... | ... | @@ -22,44 +22,40 @@ open LCGtypes |
22 | 22 | open ExecTypes |
23 | 23 | |
24 | 24 | let get_sock_addr host_name port = |
25 | - let he = | |
26 | - try Unix.gethostbyname host_name | |
25 | + let he = | |
26 | + try Unix.gethostbyname host_name | |
27 | 27 | with Not_found -> failwith ("get_sock_addr: host " ^ host_name ^ " not found") in |
28 | 28 | let addr = he.Unix.h_addr_list in |
29 | 29 | Unix.ADDR_INET(addr.(0),port) |
30 | 30 | |
31 | 31 | let ala = Relation(Val "Initiator",Val "",Concept{c_sense=Val "osoba 1"; c_name=Val "Ala"; c_local_quant=false; c_quant=Val "sg"; c_relations=Dot; c_variable="a",""; c_pos=1}) |
32 | 32 | let kot = Relation(Val "Theme",Val "",Concept{c_sense=Val "kot 1"; c_name=Dot; c_local_quant=false; c_quant=Val "sg"; c_relations=Dot; c_variable="k",""; c_pos=3}) |
33 | -let ala_ma_kota n = Context{cx_contents=Concept{c_sense=Val ("mieć " ^ n); c_name=Dot; c_local_quant=false; c_quant=Dot; c_variable="m",""; c_pos=2; c_relations=Tuple[ala;kot]}; | |
33 | +let ala_ma_kota n = Context{cx_contents=Concept{c_sense=Val ("mieć " ^ n); c_name=Dot; c_local_quant=false; c_quant=Dot; c_variable="m",""; c_pos=2; c_relations=Tuple[ala;kot]}; | |
34 | 34 | cx_sense=Dot; cx_relations=Dot; cx_variable="x",""; cx_pos=2} |
35 | 35 | |
36 | 36 | let process_query id query = |
37 | 37 | let sock = get_sock_addr Paths.server_host Paths.server_port in |
38 | - let ic,oc = | |
38 | + let ic,oc = | |
39 | 39 | try Unix.open_connection sock |
40 | 40 | with e -> failwith ("server connection error: " ^ Printexc.to_string e) in |
41 | - Printf.fprintf oc "%s\n%!" query; | |
41 | + (* Printf.fprintf oc "%s\n%!" query; *) | |
42 | + Marshal.to_channel oc (PreTypes.RawText query) []; | |
43 | + flush oc; | |
42 | 44 | let result = (Marshal.from_channel ic : ExecTypes.result) in |
43 | - Printf.fprintf oc "\n%!"; | |
45 | + (* Printf.fprintf oc "\n%!"; *) | |
46 | + (* Marshal.to_channel oc (PreTypes.RawText "") []; | |
47 | + flush oc; *) | |
44 | 48 | let _ = Unix.shutdown_connection ic in |
45 | 49 | if result.status <> Parsed then Visualization.print_other_result stdout "" query result else |
46 | 50 | let path = "results/web/" in |
47 | 51 | let id = "_" ^ id in |
48 | - ignore(Xlist.fold2 result.trees result.mrls 1 (fun n tree mrl -> | |
49 | - Visualization.print_graph2 path ("tree" ^ id ^ "_" ^ string_of_int n) "" tree; | |
50 | - Visualization.print_xml_tree path ("tree" ^ id ^ "_" ^ string_of_int n) tree; | |
51 | - let mml = SemMmlOf.mml_of_mrl mrl in | |
52 | - Visualization.print_mml path ("formula" ^ id ^ "_" ^ string_of_int n) mml; | |
53 | - File.file_out (path ^ "page" ^ id ^ "_" ^ string_of_int n ^ ".html") (fun file -> | |
54 | - Visualization.print_webpage file "../../" "" id query n (Xlist.size result.trees) mml); | |
55 | - n+1)); | |
56 | - Visualization.print_webpage stdout "" "results/web/" id query 1 (Xlist.size result.trees) (SemMmlOf.mml_of_mrl (List.hd result.mrls)) | |
57 | - | |
52 | + Visualization.print_main_result_text "../../" path id result.selected_semantic_text; | |
53 | + Visualization.print_main_result_first_page_text "" "results/web/" id result.selected_semantic_text | |
58 | 54 | |
59 | 55 | let get_input () = |
60 | 56 | let r = ref [] in |
61 | 57 | (try |
62 | - while true do | |
58 | + while true do | |
63 | 59 | r := (input_line stdin) :: (!r) |
64 | 60 | done; |
65 | 61 | !r |
... | ... | @@ -67,31 +63,31 @@ let get_input () = |
67 | 63 | |
68 | 64 | let rec translate_input_rec buf i size query = |
69 | 65 | if i >= size then Buffer.contents buf else ( |
70 | - let c,i = | |
66 | + let c,i = | |
71 | 67 | if String.get query i = '%' then |
72 | 68 | Scanf.sscanf (String.sub query (i+1) 2) "%x" (fun a -> Char.chr a), i+3 else |
73 | 69 | if String.get query i = '+' then ' ', i+1 else |
74 | 70 | String.get query i, i+1 in |
75 | 71 | Buffer.add_char buf c; |
76 | 72 | translate_input_rec buf i size query) |
77 | - | |
73 | + | |
78 | 74 | let translate_input query = |
79 | 75 | match query with |
80 | 76 | [query] -> |
81 | - if String.sub query 0 6 = "text0=" then | |
77 | + if String.sub query 0 6 = "text0=" then | |
82 | 78 | let buf = Buffer.create (String.length query) in |
83 | 79 | translate_input_rec buf 6 (String.length query) query |
84 | 80 | else failwith "translate_input 1" |
85 | 81 | | _ -> failwith "translate_input 2" |
86 | 82 | |
87 | -let get_query_id () = | |
83 | +let get_query_id () = | |
88 | 84 | let filename = Filename.temp_file ~temp_dir:"results/web/" "page_" "" in |
89 | 85 | (* print_endline filename; *) |
90 | 86 | let n = String.length "results/web/" + String.length "page_" in |
91 | 87 | let id = String.sub filename n (String.length filename - n) in |
92 | 88 | (* print_endline id; *) |
93 | 89 | id |
94 | - | |
90 | + | |
95 | 91 | let generate_header () = |
96 | 92 | Printf.printf "Content-type: text/html\n"; |
97 | 93 | Printf.printf "\n" |
... | ... | @@ -106,7 +102,7 @@ let generate_trailer () = |
106 | 102 | (*Printf.printf "</BODY>\n</HTML>\n"*)() |
107 | 103 | |
108 | 104 | let generate_error_message e = |
109 | - Printf.printf | |
105 | + Printf.printf | |
110 | 106 | "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"> |
111 | 107 | <html> |
112 | 108 | <head> |
... | ... | @@ -117,24 +113,25 @@ let generate_error_message e = |
117 | 113 | |
118 | 114 | <body> |
119 | 115 | <center> |
120 | - <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1> | |
121 | - <h3>%s</h3> | |
116 | + <h1>ENIAM: Kategorialny Parser Składniowo-Semantyczny</h1> | |
117 | + <h3>%s</h3> | |
122 | 118 | </center> |
123 | 119 | </body> |
124 | 120 | </html>" e |
125 | - | |
126 | -let _ = | |
121 | + | |
122 | +let _ = | |
127 | 123 | generate_header (); |
128 | 124 | (try |
129 | 125 | let query = get_input () in |
130 | 126 | let query = translate_input query in |
131 | 127 | let id = get_query_id () in |
132 | 128 | process_query id query; |
133 | - with | |
129 | + with | |
134 | 130 | Failure e -> generate_error_message e |
135 | 131 | | e -> generate_error_message (Printexc.to_string e)); |
136 | 132 | generate_trailer () |
137 | 133 | |
138 | 134 | (* testowanie z linii poleceń: |
139 | -echo "text0=Ala ma kota." | ./parser.cgi | |
140 | -*) | |
141 | 135 | \ No newline at end of file |
136 | +echo "text0=Ala ma kota. Ela ma psa." | ./parser2.cgi | |
137 | +echo "text0=Szpak powiedział: „Frunę. Kiszę.”" | ./parser2.cgi | |
138 | +*) | |
... | ... |
pre/.gitignore
pre/concraft_test.ml
0 → 100644
1 | + | |
2 | +(* let concraft_in, concraft_out, concraft_err = Unix.open_process_full "../../../.local/bin/concraft-pl tag ../concraft/nkjp-model-0.2.gz" [| |] *) | |
3 | +(*let concraft_in, concraft_out, concraft_err = | |
4 | + Unix.open_process_full "concraft-pl tag ../concraft/nkjp-model-0.2.gz" | |
5 | + [|"PATH=" ^ Sys.getenv "PATH"|] | |
6 | + | |
7 | +let _ = | |
8 | + print_endline "out"; | |
9 | + Printf.fprintf concraft_out "Ala ma kota.\n\n%!"; | |
10 | + print_endline "in"; | |
11 | + print_endline ("concraft error message: " ^ input_line concraft_err); | |
12 | + ()*) | |
13 | + | |
14 | +(**********************) | |
15 | + | |
16 | +(* | |
17 | +Aby korzytać z concrafta trzeba najpierw postawić serwer wpisując z linii poleceń: | |
18 | +concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz | |
19 | +*) | |
20 | + | |
21 | +let read_whole_channel c = | |
22 | + let r = ref [] in | |
23 | + try | |
24 | + while true do | |
25 | + r := (input_line c) :: !r | |
26 | + done; | |
27 | + !r | |
28 | + with End_of_file -> List.rev (!r) | |
29 | + | |
30 | +(* Gdy serwer jest już włączony na concraft_err trafia komunikat: | |
31 | +concraft-pl: bind: resource busy (Address already in use) | |
32 | +w przeciwnym przypadku się program wiesza się na czytaniu concraft_in | |
33 | +*) | |
34 | + | |
35 | +(*let _ = | |
36 | + print_endline "Starting concraft server 1"; | |
37 | + let concraft_in, concraft_out, concraft_err = | |
38 | + Unix.open_process_full "concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz" | |
39 | + [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in | |
40 | + (* let err_msg = String.concat "\n" (read_whole_channel concraft_err) in | |
41 | + let result = read_whole_channel concraft_in in *) | |
42 | + print_endline "Starting concraft server 2"; | |
43 | + print_endline (input_line concraft_err); | |
44 | + print_endline "Starting concraft server 3"; | |
45 | + print_endline (input_line concraft_err); | |
46 | + print_endline "Starting concraft server 3"; | |
47 | + (* print_endline err_msg; | |
48 | + print_endline "Starting concraft server 3"; | |
49 | + print_endline (String.concat "\n" result); | |
50 | + print_endline "Starting concraft server 4"; *) | |
51 | + ()*) | |
52 | + | |
53 | +let rec process_concraft_result orth lemma interp others rev = function | |
54 | + [] -> List.rev ((orth,(lemma,interp) :: others) :: rev) | |
55 | + | "" :: l -> process_concraft_result orth lemma interp others rev l | |
56 | + | line :: l -> | |
57 | + (match Xstring.split_delim "\t" line with | |
58 | + [orth2;s] when s = "none" || s = "space" -> | |
59 | + if orth = "" then process_concraft_result orth2 lemma interp others rev l | |
60 | + else process_concraft_result orth2 "" "" [] ((orth,(lemma,interp) :: others) :: rev) l | |
61 | + | ["";lemma2;interp2] -> process_concraft_result orth lemma interp ((lemma2,interp2) :: others) rev l | |
62 | + | ["";lemma;interp;"disamb"] -> process_concraft_result orth lemma interp others rev l | |
63 | + | _ -> failwith ("process_concraft_result: " ^ line)) | |
64 | + | |
65 | +let concraft_parse s = | |
66 | + let concraft_in, concraft_out, concraft_err = | |
67 | + Unix.open_process_full ("echo \"" ^ s ^ "\" | concraft-pl client") | |
68 | + [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in | |
69 | + let err_msg = String.concat "\n" (read_whole_channel concraft_err) in | |
70 | + let result = read_whole_channel concraft_in in | |
71 | + if err_msg <> "" then failwith err_msg else | |
72 | + process_concraft_result "" "" "" [] [] result | |
73 | + | |
74 | +let print_parsed_tokens l = | |
75 | + Xlist.iter l (fun (orth,l) -> | |
76 | + if l = [] then failwith "print_parsed_tokens" else | |
77 | + let lemma,interp = List.hd l in | |
78 | + print_endline (orth ^ "\t" ^ lemma ^ "\t" ^ interp)) | |
79 | + | |
80 | +let _ = | |
81 | + print_parsed_tokens (concraft_parse "Ala ma kota."); | |
82 | + print_parsed_tokens (concraft_parse "Szpak frunie."); | |
83 | + print_parsed_tokens (concraft_parse "Miałem miał."); | |
84 | + print_parsed_tokens (concraft_parse "Kiedyś miałem kota."); | |
85 | + print_parsed_tokens (concraft_parse "Kiadyś kupiłem kota."); | |
86 | + () | |
... | ... |
pre/makefile
1 | 1 | OCAMLC=ocamlc |
2 | 2 | OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology -I ../parser | |
4 | +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../morphology -I ../parser -I ../corpora | |
5 | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa inflexion.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where` |
8 | 8 | |
9 | 9 | WAL= paths.ml walTypes.ml walStringOf.ml preTypes.ml preWordnet.ml walParser.ml walTEI.ml walFrames.ml |
10 | -PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preSentences.ml preProcessing.ml | |
10 | +PRE= preTokenizer.ml preAcronyms.ml prePatterns.ml prePaths.ml preMWE.ml preSemantics.ml preSentences.ml ../corpora/CONLL.ml preProcessing.ml | |
11 | 11 | |
12 | 12 | all: |
13 | 13 | $(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE) |
14 | 14 | |
15 | +concraft_test: concraft_test.ml | |
16 | + $(OCAMLOPT) -o concraft_test $(OCAMLOPTFLAGS) concraft_test.ml | |
17 | + | |
15 | 18 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
16 | 19 | |
17 | 20 | .mll.ml: |
... | ... | @@ -33,4 +36,4 @@ all: |
33 | 36 | $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< |
34 | 37 | |
35 | 38 | clean: |
36 | - rm -f *~ *.cm[oix] *.o pre | |
39 | + rm -f *~ *.cm[oix] *.o pre concraft_test | |
... | ... |
pre/paths.ml
... | ... | @@ -17,61 +17,83 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | +type config = | |
21 | + {resources_path: string; walenty_filename: string; pre_port: int; pre_host: string; | |
22 | + results_path: string; max_no_solutions: int; lcg_timeout: float; lcg_no_nodes: int; no_processes: int; | |
23 | + concraft_enabled: bool; concraft_path: string; | |
24 | + mate_parser_enabled: bool; mate_parser_path: string; | |
25 | + swigra_enabled: bool; swigra_path: string; | |
26 | + sentence_selection_enabled: bool | |
27 | + } | |
20 | 28 | |
21 | -let resources_path,walenty_filename,pre_port,pre_host, | |
22 | - results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes = | |
23 | - Xlist.fold (File.load_lines "../config") ("../resources/","/usr/share/walenty/walenty.xml",3258,"localhost","../resuls/",10,100.,10000000,4) | |
24 | - (fun (resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes) s -> | |
29 | +let empty_config = | |
30 | + {resources_path="../resources/"; walenty_filename="/usr/share/walenty/walenty.xml"; pre_port=3258; pre_host="localhost"; | |
31 | + results_path="../resuls/"; max_no_solutions=10; lcg_timeout=100.; lcg_no_nodes=10000000; no_processes=4; | |
32 | + concraft_enabled=false; concraft_path=""; | |
33 | + mate_parser_enabled=false; mate_parser_path=""; | |
34 | + swigra_enabled=false; swigra_path=""; | |
35 | + sentence_selection_enabled=true | |
36 | + } | |
37 | + | |
38 | +let config = Xlist.fold (File.load_lines "../config") empty_config | |
39 | + (fun config s -> | |
25 | 40 | match Str.split (Str.regexp "=") s with |
26 | - ["RESOURCES_PATH";resources_path] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
27 | - | ["WALENTY";walenty_filename] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
28 | - | ["PRE_PORT";pre_port] -> resources_path,walenty_filename,int_of_string pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
29 | - | ["PRE_HOST";pre_host] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
30 | - | ["RESULTS_PATH";results_path] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
31 | - | ["MAX_NO_SOLUTIONS";max_no_solutions] -> resources_path,walenty_filename,pre_port,pre_host,results_path,int_of_string max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
32 | - | ["LCG_TIMEOUT";lcg_timeout] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,float_of_string lcg_timeout,lcg_no_nodes,no_processes | |
33 | - | ["LCG_NO_NODES";lcg_no_nodes] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,int_of_string lcg_no_nodes,no_processes | |
34 | - | ["NO_PROCESSES";no_processes] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,int_of_string no_processes | |
35 | - | [] -> resources_path,walenty_filename,pre_port,pre_host,results_path,max_no_solutions,lcg_timeout,lcg_no_nodes,no_processes | |
36 | - | _ -> failwith ("invalid config format: " ^ s)) | |
37 | - | |
41 | + ["RESOURCES_PATH";resources_path] -> {config with resources_path=resources_path} | |
42 | + | ["WALENTY";walenty_filename] -> {config with walenty_filename=walenty_filename} | |
43 | + | ["PRE_PORT";pre_port] ->{config with pre_port=int_of_string pre_port} | |
44 | + | ["PRE_HOST";pre_host] -> {config with pre_host=pre_host} | |
45 | + | ["RESULTS_PATH";results_path] -> {config with results_path=results_path} | |
46 | + | ["MAX_NO_SOLUTIONS";max_no_solutions] -> {config with max_no_solutions=int_of_string max_no_solutions} | |
47 | + | ["LCG_TIMEOUT";lcg_timeout] -> {config with lcg_timeout=float_of_string lcg_timeout} | |
48 | + | ["LCG_NO_NODES";lcg_no_nodes] -> {config with lcg_no_nodes=int_of_string lcg_no_nodes} | |
49 | + | ["NO_PROCESSES";no_processes] -> {config with no_processes=int_of_string no_processes} | |
50 | + | ["CONCRAFT_ENABLED";concraft_enabled] -> {config with concraft_enabled=bool_of_string concraft_enabled} | |
51 | + | ["CONCRAFT_PATH";concraft_path] -> {config with concraft_path=concraft_path} | |
52 | + | ["MATE_PARSER_ENABLED";mate_parser_enabled] -> {config with mate_parser_enabled=bool_of_string mate_parser_enabled} | |
53 | + | ["MATE_PARSER_PATH";mate_parser_path] -> {config with mate_parser_path=mate_parser_path} | |
54 | + | ["SWIGRA_ENABLED";swigra_enabled] -> {config with swigra_enabled=bool_of_string swigra_enabled} | |
55 | + | ["SWIGRA_PATH";swigra_path] -> {config with swigra_path=swigra_path} | |
56 | + | ["SENTENCE_SELECTION_ENABLED";sentence_selection_enabled] -> {config with sentence_selection_enabled=bool_of_string sentence_selection_enabled} | |
57 | + | [] -> config | |
58 | + | l -> failwith ("invalid config format: " ^ s ^ " $" ^ String.concat "#" l ^ "$")) | |
59 | + | |
38 | 60 | let alt_all = "alt1.tab" |
39 | 61 | let dict_all = "dict1.tab" |
40 | 62 | let rules_all = "rules1.tab" |
41 | 63 | |
42 | 64 | (* let resources_path = "../resources/" *) |
43 | -let sgjp_path = resources_path ^ "SGJP/" | |
65 | +let sgjp_path = config.resources_path ^ "SGJP/" | |
44 | 66 | |
45 | 67 | (* let walenty_filename = "/usr/share/walenty/walenty.xml" *) |
46 | 68 | |
47 | -(* UWAGA: w razie wymiany słownika trzeba przekopiować definicję adv(pron),nonch,possp oraz wygenerować fixed.tab *) | |
48 | -let realizations_filename = resources_path ^ "Walenty/phrase_types_expand_20150909.txt" | |
69 | +(* UWAGA: w razie wymiany słownika trzeba przekopiować definicję adv(pron),nonch,possp oraz wygenerować fixed.tab *) | |
70 | +let realizations_filename = config.resources_path ^ "Walenty/phrase_types_expand_20150909.txt" | |
49 | 71 | |
50 | -let pre_port = 3258 | |
72 | +let pre_port = (*3258*)3158 | |
51 | 73 | let pre_host = "localhost" |
52 | 74 | (* let pre_host = "wloczykij" *) |
53 | -(* let server_port = 3259 *) | |
75 | +let server_port = (*3259*)3159 | |
54 | 76 | (* let server_host = "localhost" *) |
55 | -(* let server_host = "wloczykij" *) | |
77 | +let server_host = "wloczykij" | |
56 | 78 | |
57 | 79 | let proper_names_filename = sgjp_path ^ "proper_names_sgjp_polimorf_20151020.tab" |
58 | -let proper_names_filename2 = resources_path ^ "proper_names_20160104.tab" | |
59 | - | |
60 | -let subst_uncountable_lexemes_filename = resources_path ^ "subst_uncountable.dat" | |
61 | -let subst_uncountable_lexemes_filename2 = resources_path ^ "subst_uncountable_stare.dat" | |
62 | -let subst_container_lexemes_filename = resources_path ^ "subst_container.dat" | |
63 | -let subst_numeral_lexemes_filename = resources_path ^ "subst_numeral.dat" | |
64 | -let subst_time_lexemes_filename = resources_path ^ "subst_time.dat" | |
80 | +let proper_names_filename2 = config.resources_path ^ "proper_names_20160104.tab" | |
81 | + | |
82 | +let subst_uncountable_lexemes_filename = config.resources_path ^ "subst_uncountable.dat" | |
83 | +let subst_uncountable_lexemes_filename2 = config.resources_path ^ "subst_uncountable_stare.dat" | |
84 | +let subst_container_lexemes_filename = config.resources_path ^ "subst_container.dat" | |
85 | +let subst_numeral_lexemes_filename = config.resources_path ^ "subst_numeral.dat" | |
86 | +let subst_time_lexemes_filename = config.resources_path ^ "subst_time.dat" | |
65 | 87 | |
66 | -let rzeczownik_filename = resources_path ^ "plWordnet/rzeczownik.tab" | |
67 | -let czasownik_filename = resources_path ^ "plWordnet/czasownik.tab" | |
68 | -let przymiotnik_filename = resources_path ^ "plWordnet/przymiotnik.tab" | |
69 | -let synsets_filename = resources_path ^ "plWordnet/synsets.tab" | |
70 | -let hipero_filename = resources_path ^ "plWordnet/hipero.tab" | |
71 | -let predef_filename = resources_path ^ "predef_prefs.tab" | |
72 | -let proper_classes_filename = resources_path ^ "proper_classes.tab" | |
88 | +let rzeczownik_filename = config.resources_path ^ "plWordnet/rzeczownik.tab" | |
89 | +let czasownik_filename = config.resources_path ^ "plWordnet/czasownik.tab" | |
90 | +let przymiotnik_filename = config.resources_path ^ "plWordnet/przymiotnik.tab" | |
91 | +let synsets_filename = config.resources_path ^ "plWordnet/synsets.tab" | |
92 | +let hipero_filename = config.resources_path ^ "plWordnet/hipero.tab" | |
93 | +let predef_filename = config.resources_path ^ "predef_prefs.tab" | |
94 | +let proper_classes_filename = config.resources_path ^ "proper_classes.tab" | |
73 | 95 | |
74 | -let brev_filename = resources_path ^ "brev.tab" | |
75 | -let lemma_frequencies_filename = resources_path ^ "NKJP1M/NKJP1M-lemma-freq.tab" | |
96 | +let brev_filename = config.resources_path ^ "brev.tab" | |
97 | +let lemma_frequencies_filename = config.resources_path ^ "NKJP1M/NKJP1M-lemma-freq.tab" | |
76 | 98 | |
77 | 99 | let mte_filename = sgjp_path ^ "mte_20151215.tab" |
... | ... |
pre/preProcessing.ml
... | ... | @@ -582,9 +582,10 @@ let parse query = |
582 | 582 | let paths = PreLemmatization.combine_interps paths in |
583 | 583 | (* print_endline (PrePaths.to_string paths); *)*) |
584 | 584 | |
585 | -let parse_conll tokens dep_paths = | |
585 | +let parse_conll tokens dep_paths = (* FIXME: sprawdzić, czy zachowana jest kolejność elementów paths !!! *) | |
586 | 586 | let paths = List.rev (Int.fold 1 (Array.length dep_paths - 1) [] (fun paths conll_id -> |
587 | - ExtArray.get tokens conll_id :: paths)) in | |
587 | + let id,_,_ = dep_paths.(conll_id) in | |
588 | + ExtArray.get tokens id :: paths)) in | |
588 | 589 | (* print_endline "a12"; *) |
589 | 590 | let paths = find_proper_names paths in |
590 | 591 | (* print_endline "a13"; *) |
... | ... | @@ -600,26 +601,120 @@ let parse_conll tokens dep_paths = |
600 | 601 | let paths = PreSemantics.assign_semantics paths in |
601 | 602 | (* print_endline "a16"; *) |
602 | 603 | let _ = Xlist.fold paths 1 (fun conll_id t -> |
603 | - ExtArray.set tokens conll_id t; | |
604 | + let id,_,_ = dep_paths.(conll_id) in | |
605 | + ExtArray.set tokens id t; | |
604 | 606 | conll_id + 1) in |
605 | 607 | () |
606 | 608 | |
609 | +(* | |
610 | +UWAGA: Aby korzytać z concrafta trzeba najpierw postawić serwer wpisując z linii poleceń: | |
611 | +concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz | |
612 | +*) | |
613 | + | |
614 | +let read_whole_channel c = | |
615 | + let r = ref [] in | |
616 | + try | |
617 | + while true do | |
618 | + r := (input_line c) :: !r | |
619 | + done; | |
620 | + !r | |
621 | + with End_of_file -> List.rev (!r) | |
622 | + | |
623 | +let rec process_concraft_result orth lemma interp others rev = function | |
624 | + [] -> List.rev ((orth,(lemma,interp) :: others) :: rev) | |
625 | + | "" :: l -> process_concraft_result orth lemma interp others rev l | |
626 | + | line :: l -> | |
627 | + (match Xstring.split_delim "\t" line with | |
628 | + [orth2;s] when s = "none" || s = "space" -> | |
629 | + if orth = "" then process_concraft_result orth2 lemma interp others rev l | |
630 | + else process_concraft_result orth2 "" "" [] ((orth,(lemma,interp) :: others) :: rev) l | |
631 | + | ["";lemma2;interp2] -> process_concraft_result orth lemma interp ((lemma2,interp2) :: others) rev l | |
632 | + | ["";lemma;interp;"disamb"] -> process_concraft_result orth lemma interp others rev l | |
633 | + | _ -> failwith ("process_concraft_result: " ^ line)) | |
634 | + | |
635 | +let concraft_parse s = | |
636 | + let concraft_in, concraft_out, concraft_err = | |
637 | + Unix.open_process_full ("echo \"" ^ s ^ "\" | concraft-pl client") | |
638 | + [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in | |
639 | + let err_msg = String.concat "\n" (read_whole_channel concraft_err) in | |
640 | + let result = read_whole_channel concraft_in in | |
641 | + if err_msg <> "" then failwith err_msg else | |
642 | + process_concraft_result "" "" "" [] [] result | |
643 | + | |
644 | +(*let rec load_concraft_sentence white orth rev ic = | |
645 | + (* print_endline "load_concraft_sentence 1"; *) | |
646 | + (* print_endline ("concraft error message: " ^ input_line concraft_err); *) | |
647 | + let s = input_line ic in | |
648 | + (* print_endline ("load_concraft_sentence: " ^ s); *) | |
649 | + if s = "" then List.rev rev else | |
650 | + match Xstring.split_delim "\t" s with | |
651 | + [""; lemma; interp; "disamb"] -> load_concraft_sentence "" "" ((white,orth,lemma,interp) :: rev) ic | |
652 | + | [""; lemma; interp] -> load_concraft_sentence white orth rev ic | |
653 | + | [orth; white] -> load_concraft_sentence white orth rev ic | |
654 | + | _ -> failwith ("load_concraft_sentence: " ^ s)*) | |
655 | + | |
656 | +let make_token (orth,l) = | |
657 | + if l = [] then failwith "make_token 1" else | |
658 | + let lemma,interp = List.hd l in | |
659 | + let cat,interp = match Xstring.split ":" interp with | |
660 | + cat :: l -> cat, [Xlist.map l (fun tag -> [tag])] | |
661 | + | _ -> failwith ("make_token 2: " ^ orth ^ " " ^ lemma ^ " " ^ interp) in | |
662 | + {empty_token with orth = orth; token = Lemma(lemma,cat,interp)} | |
663 | + | |
664 | +let parse_mate tokens pbeg s = | |
665 | + (* print_endline ("parse_mate: " ^ s); *) | |
666 | + (* Printf.fprintf concraft_out "%s\n\n%!" s; | |
667 | + let l = load_concraft_sentence "" "" [] concraft_in in *) | |
668 | + let l = concraft_parse s in | |
669 | + let l = Xlist.map l make_token in | |
670 | + let l = {empty_token with token = Interp "<conll_root>"} :: l in | |
671 | + let l = Xlist.map l (fun t -> ExtArray.add tokens t,-1,"") in | |
672 | + let _ = CONLL.establish_for_token pbeg s tokens (List.tl l) in | |
673 | + let dep_paths = Array.of_list l in | |
674 | + parse_conll tokens dep_paths; | |
675 | + dep_paths | |
676 | + | |
677 | +let rec parse_mate_sentence tokens mode pbeg = function | |
678 | + RawSentence s -> if mode <> Mate || not Paths.config.Paths.concraft_enabled then RawSentence s else DepSentence (parse_mate tokens pbeg s) | |
679 | + | StructSentence(paths,last) -> StructSentence(paths,last) | |
680 | + | DepSentence(paths) -> DepSentence(paths) | |
681 | + | QuotedSentences sentences -> | |
682 | + QuotedSentences(Xlist.map sentences (fun p -> | |
683 | + {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix; | |
684 | + psentence=parse_mate_sentence tokens mode pbeg p.PreTypes.psentence})) | |
685 | + | AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) -> | |
686 | + mode, parse_mate_sentence tokens mode pbeg sentence)) | |
687 | + | |
688 | +let parse_mate_sentences tokens sentences = | |
689 | + Xlist.map sentences (fun p -> | |
690 | + {p with psentence=parse_mate_sentence tokens Struct p.pbeg p.psentence}) | |
691 | + | |
607 | 692 | let parse_text = function |
608 | 693 | RawText query -> |
694 | + (* print_endline ("parse_text: " ^ query); *) | |
609 | 695 | let tokens = ExtArray.make 100 empty_token in |
610 | 696 | let _ = ExtArray.add tokens empty_token in (* id=0 jest zarezerwowane dla pro; FIXME: czy to jest jeszcze aktualne? *) |
611 | - let paragraphs = Xlist.map (Xstring.split "\n" query) (fun paragraph -> | |
697 | + let paragraphs = Xstring.split "\n\\|\r" query in | |
698 | + let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in | |
699 | + let n = if Xlist.size paragraphs = 1 then 0 else 1 in | |
700 | + let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph -> | |
612 | 701 | let paths = parse paragraph in |
613 | - let sentences = PreSentences.split_into_sentences paragraph tokens paths in | |
614 | - AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) in | |
702 | + (* print_endline "parse_text 1"; *) | |
703 | + let pid = if n = 0 then "" else string_of_int n ^ "_" in | |
704 | + let sentences = PreSentences.split_into_sentences pid paragraph tokens paths in | |
705 | + (* print_endline "parse_text 2"; *) | |
706 | + let sentences = parse_mate_sentences tokens sentences in | |
707 | + (* print_endline "parse_text 3"; *) | |
708 | + (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in | |
615 | 709 | AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs, tokens)] |
616 | 710 | | AltText[Raw,RawText query;CONLL,StructText([ |
617 | - StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence(_,dep_paths)]} as p]],tokens)] -> | |
711 | + StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]],tokens)] -> | |
618 | 712 | parse_conll tokens dep_paths; |
619 | 713 | let paths = parse query in |
620 | - let sentences = PreSentences.split_into_sentences query tokens paths in | |
621 | - let conll = StructParagraph[{p with psentence = AltSentence[Raw, RawSentence text; | |
622 | - Mate, DepSentence("M",dep_paths); CONLL, DepSentence("C",dep_paths)]}] in | |
714 | + let sentences = PreSentences.split_into_sentences "" query tokens paths in | |
715 | + let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in | |
716 | + let conll = StructParagraph[{p with psentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths] | |
717 | + @ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in | |
623 | 718 | AltText[Raw,RawText query; Struct, StructText([ |
624 | 719 | AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]],tokens)] |
625 | 720 | | _ -> failwith "parse_text: not implemented" |
... | ... | @@ -632,9 +727,9 @@ let rec main_loop in_chan out_chan = |
632 | 727 | (try |
633 | 728 | (* let time0 = Sys.time () in *) |
634 | 729 | let utime0 = Unix.gettimeofday () in |
635 | - (* print_endline "main_loop 3a"; *) | |
730 | + (* print_endline "main_loop 3a"; *) | |
636 | 731 | let text = parse_text query in |
637 | - (* print_endline "main_loop 4a"; *) | |
732 | + (* print_endline "main_loop 4a"; *) | |
638 | 733 | (* let time2 = Sys.time () in *) |
639 | 734 | let utime2 = Unix.gettimeofday () in |
640 | 735 | (* Printf.printf "time=%f utime=%f\n%!" (time2 -. time0) (utime2 -. utime0); *) |
... | ... |
pre/preSentences.ml
... | ... | @@ -145,20 +145,18 @@ let find_tokens_in_chart tokens chart lnode rnode cat = |
145 | 145 | id |
146 | 146 | (id,lnode,rnode) :: paths)) in*) |
147 | 147 | |
148 | -let rec add_struct_sentence_ids_rec n sentences = | |
148 | +let rec add_struct_sentence_ids_rec pid n sentences = | |
149 | 149 | Xlist.fold sentences ([],n) (fun (l,n) -> function |
150 | - {psentence=AltSentence[Raw,s;ENIAM,StructSentence(_,paths,last)]} as p -> | |
151 | - {p with psentence=AltSentence[Raw,s;ENIAM,StructSentence("E" ^ string_of_int n,paths,last)]} :: l, n+1 | |
152 | - | {psentence=AltSentence[Raw,s;ENIAM,QuotedSentences sentences]} as p -> | |
153 | - let sentences, n = add_struct_sentence_ids_rec n sentences in | |
154 | - {p with psentence=AltSentence[Raw,s;ENIAM,QuotedSentences (List.rev sentences)]} :: l, n+1 | |
155 | - | _ -> failwith "add_struct_sentence_ids") | |
150 | + {psentence=AltSentence[Raw,s;Struct,QuotedSentences sentences]} as p -> | |
151 | + let sentences, n = add_struct_sentence_ids_rec pid n sentences in | |
152 | + {p with psentence=AltSentence[Raw,s;Struct,QuotedSentences (List.rev sentences)]} :: l, n | |
153 | + | p -> {p with pfile_prefix=pid ^ string_of_int n} :: l, n+1) | |
156 | 154 | |
157 | -let add_struct_sentence_ids sentences = | |
155 | +let add_struct_sentence_ids pid sentences = | |
158 | 156 | match sentences with |
159 | - [{psentence=AltSentence[Raw,s;ENIAM,StructSentence(_,paths,last)]} as p] -> | |
160 | - [{p with psentence=AltSentence[Raw,s;ENIAM,StructSentence("E",paths,last)]}] | |
161 | - | _ -> List.rev (fst (add_struct_sentence_ids_rec 1 sentences)) | |
157 | + [{psentence=AltSentence[Raw,_;Struct,QuotedSentences _]}] -> List.rev (fst (add_struct_sentence_ids_rec pid 1 sentences)) | |
158 | + | [p] -> [{p with pfile_prefix=pid}] | |
159 | + | _ -> List.rev (fst (add_struct_sentence_ids_rec pid 1 sentences)) | |
162 | 160 | |
163 | 161 | let prepare_indexes paths = |
164 | 162 | let set = Xlist.fold paths IntSet.empty (fun set (_,beg,next) -> |
... | ... | @@ -181,20 +179,22 @@ let rec extract_sentences_rec tokens id = |
181 | 179 | match t.token with |
182 | 180 | Tokens("sentence",ids) -> |
183 | 181 | let paths,last = make_paths tokens ids in |
184 | - [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; | |
185 | - psentence=AltSentence[Raw,RawSentence t.orth; | |
186 | - ENIAM,StructSentence("",paths,last)]}] | |
182 | + [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix=""; | |
183 | + psentence=AltSentence([Raw,RawSentence t.orth; ENIAM,StructSentence(paths,last)] @ | |
184 | + (if Paths.config.Paths.mate_parser_enabled then [Mate,RawSentence t.orth] else []) @ | |
185 | + (if Paths.config.Paths.swigra_enabled then [Swigra,RawSentence t.orth] else [])(* @ | |
186 | + (if Paths.config.Paths.polfie_enabled then [POLFIE,RawSentence t.orth] else []) *))}] | |
187 | 187 | | Tokens("quoted_sentences",ids) -> |
188 | - [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; | |
188 | + [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix=""; | |
189 | 189 | psentence=AltSentence[Raw,RawSentence t.orth; |
190 | - ENIAM,QuotedSentences(List.sort par_compare (List.flatten (Xlist.rev_map ids (extract_sentences_rec tokens))))]}] | |
190 | + Struct,QuotedSentences(List.sort par_compare (List.flatten (Xlist.rev_map ids (extract_sentences_rec tokens))))]}] | |
191 | 191 | | _ -> [] |
192 | 192 | |
193 | -let extract_sentences tokens chart last = | |
193 | +let extract_sentences pid tokens chart last = | |
194 | 194 | let ids = find_tokens_in_chart tokens chart 0 last "query" in |
195 | 195 | let sentences = List.sort par_compare (List.flatten (Xlist.rev_map ids (fun id -> |
196 | 196 | extract_sentences_rec tokens id))) in |
197 | - add_struct_sentence_ids sentences | |
197 | + add_struct_sentence_ids pid sentences | |
198 | 198 | (* let paths = Int.fold 0 last [] (fun paths lnode -> |
199 | 199 | Xlist.fold chart.(lnode) paths (fun paths (id,rnode) -> |
200 | 200 | (id,lnode,rnode) :: paths)) in |
... | ... | @@ -255,7 +255,7 @@ let make_chart paths last = |
255 | 255 | chart.(beg) <- (id,next) :: chart.(beg)); |
256 | 256 | chart |
257 | 257 | |
258 | -let split_into_sentences paragraph tokens paths = | |
258 | +let split_into_sentences pid paragraph tokens paths = | |
259 | 259 | let paths = make_ids tokens paths in |
260 | 260 | let paths,last = prepare_indexes paths in |
261 | 261 | let chart = make_chart paths last in |
... | ... | @@ -265,4 +265,4 @@ let split_into_sentences paragraph tokens paths = |
265 | 265 | find_sentence par tokens chart last; |
266 | 266 | find_quoted_sentences par tokens chart last; |
267 | 267 | find_query par tokens chart last; |
268 | - extract_sentences tokens chart last | |
268 | + extract_sentences pid tokens chart last | |
... | ... |
pre/preTypes.ml
... | ... | @@ -80,6 +80,7 @@ type token = |
80 | 80 | informacje o poszczególnych tokenach *) |
81 | 81 | and token_record = { |
82 | 82 | orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token *) |
83 | + corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *) | |
83 | 84 | beg: int; (* pozycja początkowa tokenu względem początku akapitu *) |
84 | 85 | len: int; (* długość tokenu *) |
85 | 86 | next: int; (* pozycja początkowa następnego tokenu względem początku akapitu *) |
... | ... | @@ -112,12 +113,12 @@ let empty_labels = { |
112 | 113 | } |
113 | 114 | |
114 | 115 | let empty_token = { |
115 | - orth="";beg=0;len=0;next=0; token=Symbol ""; weight=0.; e=empty_labels; | |
116 | + orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; weight=0.; e=empty_labels; | |
116 | 117 | attrs=[]; valence=[]; simple_valence=[]; senses=[]; |
117 | 118 | lroles="",""; semantics=Normal} |
118 | 119 | |
119 | 120 | type mode = |
120 | - Raw | Struct | CONLL | ENIAM | Mate | |
121 | + Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE | |
121 | 122 | |
122 | 123 | (* warstwy nkjp1m do analizy: |
123 | 124 | header |
... | ... | @@ -133,14 +134,14 @@ ann_named |
133 | 134 | type sentence = |
134 | 135 | RawSentence of string |
135 | 136 | (* | CONLL of conll list *) |
136 | - | StructSentence of string * (int * int * int) list * int (* file_prefix * (id * lnode * rnode) list * last *) | |
137 | - | DepSentence of string * (int * int * string) array (* file_prefix * (id * super * label) conll_id *) | |
137 | + | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *) | |
138 | + | DepSentence of (int * int * string) array (* (id * super * label) conll_id *) | |
138 | 139 | | QuotedSentences of paragraph_record list |
139 | 140 | (* | NKJP1M of nkjp1m list *) |
140 | 141 | (* | Skladnica of skladnica_tree *) |
141 | 142 | | AltSentence of (mode * sentence) list (* string = etykieta np raw, nkjp, krzaki *) |
142 | 143 | |
143 | -and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *) | |
144 | +and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *) | |
144 | 145 | |
145 | 146 | and paragraph = |
146 | 147 | RawParagraph of string |
... | ... |
pre/walTEI.ml
... | ... | @@ -21,23 +21,23 @@ |
21 | 21 | Autor: Maciej Hołubowicz |
22 | 22 | *) |
23 | 23 | |
24 | -let przejdz funkcja poczym = | |
24 | +let przejdz funkcja poczym = | |
25 | 25 | let _ = List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym) in |
26 | 26 | () |
27 | 27 | |
28 | 28 | (*zwraca liste zwróconych wartosci przez funkcje*) |
29 | -let przejdz_lista funkcja poczym = | |
29 | +let przejdz_lista funkcja poczym = | |
30 | 30 | List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym) |
31 | 31 | |
32 | -let przejdz_lista_second funkcja poczym = | |
32 | +let przejdz_lista_second funkcja poczym = | |
33 | 33 | List.rev (List.fold_left (fun l nazwa -> (snd (funkcja nazwa)) :: l) [] poczym) |
34 | - | |
34 | + | |
35 | 35 | (*łączy listy zwróconych wartości przez funkcje*) |
36 | -let przejdz_scal funkcja poczym = | |
36 | +let przejdz_scal funkcja poczym = | |
37 | 37 | List.rev (List.fold_left (fun l nazwa -> funkcja nazwa @ l) [] poczym) |
38 | 38 | |
39 | 39 | (*zapisuje wynik wywołania do zmiennej i wywołuje ze zmienną*) |
40 | -let przejdz_zapisz funkcja zmienna poczym = | |
40 | +let przejdz_zapisz funkcja zmienna poczym = | |
41 | 41 | List.fold_left (fun zmienna nazwa -> funkcja zmienna nazwa) zmienna poczym |
42 | 42 | |
43 | 43 | |
... | ... | @@ -49,7 +49,7 @@ let rec last l = |
49 | 49 | match l with |
50 | 50 | | [a] -> a |
51 | 51 | | a::b -> last b |
52 | - | _ -> failwith "pusta lista" | |
52 | + | _ -> failwith "pusta lista" | |
53 | 53 | |
54 | 54 | let parse_full_id s = |
55 | 55 | if String.length s = 0 then empty_id else |
... | ... | @@ -71,10 +71,10 @@ let parse_id s = |
71 | 71 | [s;suf] -> s,suf |
72 | 72 | | _ -> failwith "zła ilość '-'" in |
73 | 73 | let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in |
74 | - {id with numbers = [last id.numbers]} | |
74 | + {id with numbers = [last id.numbers]} | |
75 | + | |
76 | + | |
75 | 77 | |
76 | - | |
77 | - | |
78 | 78 | (* Początek kodu do wczytywania syntaticLayer *) |
79 | 79 | |
80 | 80 | type preposition = string |
... | ... | @@ -94,7 +94,7 @@ and phrase = |
94 | 94 | | ComprepNP of preposition |
95 | 95 | | CP of comp |
96 | 96 | | NCP of case * comp |
97 | - | PrepNCP of preposition * case * comp | |
97 | + | PrepNCP of preposition * case * comp | |
98 | 98 | | InfP of aspect |
99 | 99 | | XP of category |
100 | 100 | | AdvP of string |
... | ... | @@ -110,15 +110,15 @@ and phrase = |
110 | 110 | | ComparP of string |
111 | 111 | | Nonch |
112 | 112 | | Or |
113 | - | Refl | |
114 | - | Recip | |
113 | + | Refl | |
114 | + | Recip | |
115 | 115 | | E |
116 | 116 | | DistrP |
117 | - | PossP | |
117 | + | PossP | |
118 | 118 | | FixedP of phrase list * string |
119 | 119 | | Lex of lex |
120 | 120 | | Null |
121 | - | |
121 | + | |
122 | 122 | and lex = { |
123 | 123 | phrases_list: phrase list; |
124 | 124 | lemma: string * string * lemmas; |
... | ... | @@ -132,30 +132,30 @@ and lex = { |
132 | 132 | } |
133 | 133 | and position = {psn_id: id; gf: string; phrases: (id * phrase) list; control: string list} |
134 | 134 | |
135 | -let empty_lex = {phrases_list=[]; lemma="","",[]; numeral_lemma="","",[]; negation=""; | |
135 | +let empty_lex = {phrases_list=[]; lemma="","",[]; numeral_lemma="","",[]; negation=""; | |
136 | 136 | degree=""; number=""; reflex=""; gender=""; modification = "",[]} |
137 | 137 | |
138 | -let load_case = function | |
138 | +let load_case = function | |
139 | 139 | | Xml.Element("f", ["name","case"], [Xml.Element("symbol", ["value",value], [])]) -> |
140 | 140 | value |
141 | 141 | | xml -> failwith ("load_case:\n " ^ Xml.to_string_fmt xml) |
142 | 142 | |
143 | -let load_preposition = function | |
143 | +let load_preposition = function | |
144 | 144 | | Xml.Element("f", ["name","preposition"], [Xml.Element("symbol", ["value",value], [])]) -> |
145 | 145 | value |
146 | 146 | | xml -> failwith ("load_preposition:\n " ^ Xml.to_string_fmt xml) |
147 | - | |
148 | -let load_complex_preposition = function | |
147 | + | |
148 | +let load_complex_preposition = function | |
149 | 149 | | Xml.Element("f", ["name","complex_preposition"], [Xml.Element("symbol", ["value",value], [])]) -> |
150 | 150 | value |
151 | 151 | | xml -> failwith ("load_complex_preposition:\n " ^ Xml.to_string_fmt xml) |
152 | 152 | |
153 | -let load_aspect = function | |
153 | +let load_aspect = function | |
154 | 154 | | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value",value], [])]) -> |
155 | 155 | value |
156 | 156 | | xml -> failwith ("load_aspect:\n " ^ Xml.to_string_fmt xml) |
157 | 157 | |
158 | -let load_advp = function | |
158 | +let load_advp = function | |
159 | 159 | | Xml.Element("f", ["name","category"], [Xml.Element("symbol", ["value",value], [])]) -> |
160 | 160 | value |
161 | 161 | | xml -> failwith ("load_advp:\n " ^ Xml.to_string_fmt xml) |
... | ... | @@ -164,14 +164,14 @@ let load_type_constrains = function |
164 | 164 | | Xml.Element("symbol", ["value",value], []) -> |
165 | 165 | value |
166 | 166 | | xml -> failwith ("load_type_constrains:\n " ^ Xml.to_string_fmt xml) |
167 | - | |
168 | - | |
169 | - (*type może mieć dodatkowo "constraints", czego chyba nie ma w dokumentacji, | |
167 | + | |
168 | + | |
169 | + (*type może mieć dodatkowo "constraints", czego chyba nie ma w dokumentacji, | |
170 | 170 | jest to lista elementów w stylu: Xml.Element("symbol", ["value",value], []) *) |
171 | 171 | let load_type = function |
172 | 172 | | Xml.Element("f", ["name","type"],[Xml.Element("fs", ["type","type_def"], x)]) -> |
173 | - begin | |
174 | - match x with | |
173 | + begin | |
174 | + match x with | |
175 | 175 | | [Xml.Element("f",["name","conjunction"], |
176 | 176 | [Xml.Element("symbol",["value",value],[])])] -> |
177 | 177 | value, [] |
... | ... | @@ -184,23 +184,23 @@ let load_type = function |
184 | 184 | end |
185 | 185 | | xml -> failwith ("load_type:\n " ^ Xml.to_string_fmt xml) |
186 | 186 | (*Printf.printf "%s\n" (Xml.to_string_fmt xml)*) |
187 | - | |
187 | + | |
188 | 188 | (*używam w load_lex*) |
189 | 189 | let load_lemmas_set = function |
190 | 190 | | Xml.Element("string",[], [Xml.PCData mstring]) -> |
191 | 191 | mstring |
192 | - | xml -> failwith ("load_lemmas_set:\n " ^ Xml.to_string_fmt xml) | |
193 | - | |
192 | + | xml -> failwith ("load_lemmas_set:\n " ^ Xml.to_string_fmt xml) | |
193 | + | |
194 | 194 | (* category ma dodakowo "constrains", czego chyba nie ma w dokumentacji |
195 | 195 | co więcej constrains zawiera w sobie zbiór typu phrases, więc jest rekurencyjne*) |
196 | -let rec load_category = function | |
196 | +let rec load_category = function | |
197 | 197 | | Xml.Element("f", ["name","category"], [Xml.Element("fs", ["type","category_def"], x)]) -> |
198 | - begin | |
199 | - match x with | |
200 | - | [Xml.Element("f",["name","name"], | |
198 | + begin | |
199 | + match x with | |
200 | + | [Xml.Element("f",["name","name"], | |
201 | 201 | [Xml.Element("symbol",["value",value],[])])] -> |
202 | 202 | value, [] |
203 | - | [Xml.Element("f",["name","name"], | |
203 | + | [Xml.Element("f",["name","name"], | |
204 | 204 | [Xml.Element("symbol",["value",value],[])]); |
205 | 205 | Xml.Element("f",["name","constraints"], |
206 | 206 | [Xml.Element("vColl",["org","set"],set)])] -> |
... | ... | @@ -215,25 +215,25 @@ and load_fixed = function (* również wzajemnie rekurencyjne z load_phrase*) |
215 | 215 | FixedP(przejdz_lista_second load_phrase set, stringg) |
216 | 216 | | _ -> failwith "load_fixed:\n " |
217 | 217 | |
218 | - | |
218 | + | |
219 | 219 | and load_modification_def = function (*pomocnicza do load_lex *) |
220 | - | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])])] -> | |
220 | + | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])])] -> | |
221 | 221 | value, [] |
222 | 222 | | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])]); |
223 | - Xml.Element("f",["name","positions"],[Xml.Element("vColl",["org","set"],set)])] -> | |
223 | + Xml.Element("f",["name","positions"],[Xml.Element("vColl",["org","set"],set)])] -> | |
224 | 224 | value, przejdz_lista load_position set |
225 | 225 | | x -> Printf.printf "%s\n" (Xml.to_string_fmt (List.hd x)); |
226 | - failwith "load_modification_def:\n" | |
227 | - | |
226 | + failwith "load_modification_def:\n" | |
227 | + | |
228 | 228 | and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*) |
229 | - | Xml.Element("f", ["name","argument"],[set]) -> | |
229 | + | Xml.Element("f", ["name","argument"],[set]) -> | |
230 | 230 | {arg with phrases_list = [snd (load_phrase set)]} |
231 | 231 | (* to samo co wyżej, tylko lista*) |
232 | 232 | | Xml.Element("f", ["name","arguments"],[Xml.Element("vColl",["org","set"],set)]) -> |
233 | 233 | {arg with phrases_list = przejdz_lista_second load_phrase set} |
234 | 234 | | Xml.Element("f", ["name","modification"],[Xml.Element("fs", ["type","modification_def"],x)]) -> |
235 | 235 | {arg with modification = load_modification_def x} |
236 | - | |
236 | + | |
237 | 237 | | Xml.Element("f", ["name","lemma"],[Xml.Element("fs", ["type","lemma_def"], |
238 | 238 | [Xml.Element("f",["name","selection_mode"],[Xml.Element("symbol", ["value",value1],[])]); |
239 | 239 | Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]); |
... | ... | @@ -244,113 +244,113 @@ and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*) |
244 | 244 | Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]); |
245 | 245 | Xml.Element("f",["name","lemmas"],[Xml.Element("vColl", ["org","set"],lemmas)])])]) -> |
246 | 246 | {arg with numeral_lemma = value1, value2, przejdz_lista load_lemmas_set lemmas} |
247 | - | |
247 | + | |
248 | 248 | | Xml.Element("f", ["name","negation"],[Xml.Element("symbol",["value",value],[])]) -> |
249 | 249 | {arg with negation = value} |
250 | 250 | | Xml.Element("f", ["name","degree"],[Xml.Element("symbol",["value",value],[])]) -> |
251 | 251 | {arg with degree = value} |
252 | - | Xml.Element("f", ["name","number"],[Xml.Element("symbol",["value",value],[])]) -> | |
252 | + | Xml.Element("f", ["name","number"],[Xml.Element("symbol",["value",value],[])]) -> | |
253 | 253 | {arg with number = value} |
254 | 254 | | Xml.Element("f", ["name","reflex"],[Xml.Element("symbol",["value",value],[])]) -> |
255 | 255 | {arg with reflex = value} |
256 | 256 | | Xml.Element("f", ["name","reflex"],[]) -> |
257 | 257 | {arg with reflex = ""} |
258 | 258 | (*niby set, ale zawsze jest jeden element*) |
259 | - | Xml.Element("f", ["name","gender"], | |
260 | - [Xml.Element("vColl", ["org","set"],[Xml.Element("symbol",["value",value],[])])]) -> | |
259 | + | Xml.Element("f", ["name","gender"], | |
260 | + [Xml.Element("vColl", ["org","set"],[Xml.Element("symbol",["value",value],[])])]) -> | |
261 | 261 | {arg with gender = value} |
262 | - | xml -> | |
262 | + | xml -> | |
263 | 263 | Printf.printf "%s\n" (Xml.to_string_fmt xml); |
264 | 264 | failwith "load_lex:\n " |
265 | 265 | |
266 | -and load_phrase xml:id * phrase = | |
267 | - let id, idtype, x = | |
266 | +and load_phrase xml:id * phrase = | |
267 | + let id, idtype, x = | |
268 | 268 | begin |
269 | - match xml with | |
269 | + match xml with | |
270 | 270 | | Xml.Element("fs", ["xml:id", _id; "type", _idtype], _x) -> (_id, _idtype, _x) |
271 | 271 | | Xml.Element("fs", ["type", _idtype], _x) -> ("", _idtype, _x) |
272 | 272 | | _ -> failwith "load_phrase let id,idtype...\n" |
273 | 273 | end; |
274 | 274 | in |
275 | 275 | let id = parse_id id in |
276 | - match idtype, x with | |
276 | + match idtype, x with | |
277 | 277 | | "np", [a] -> |
278 | 278 | id, NP(load_case a); |
279 | - | "prepnp", [a;b] -> | |
279 | + | "prepnp", [a;b] -> | |
280 | 280 | id, PrepNP(load_preposition a, load_case b) |
281 | - | "adjp", [a] -> | |
281 | + | "adjp", [a] -> | |
282 | 282 | id, AdjP(load_case a) |
283 | - | "prepadjp", [a;b] -> | |
283 | + | "prepadjp", [a;b] -> | |
284 | 284 | id, PrepAdjP(load_preposition a, load_case b) |
285 | - | "comprepnp", [a] -> | |
285 | + | "comprepnp", [a] -> | |
286 | 286 | id, ComprepNP(load_complex_preposition a) |
287 | - | "cp", [a] -> | |
287 | + | "cp", [a] -> | |
288 | 288 | id, CP(load_type a) |
289 | - | "ncp", [a;b] -> | |
289 | + | "ncp", [a;b] -> | |
290 | 290 | id, NCP(load_case a, load_type b) |
291 | - | "prepncp", [a;b;c] -> | |
291 | + | "prepncp", [a;b;c] -> | |
292 | 292 | id, PrepNCP(load_preposition a, load_case b, load_type c) |
293 | - | "infp", [a] -> | |
293 | + | "infp", [a] -> | |
294 | 294 | id, InfP(load_aspect a) |
295 | - | "xp", [a] -> | |
295 | + | "xp", [a] -> | |
296 | 296 | id, XP(load_category a) |
297 | - | "advp", [a] -> | |
297 | + | "advp", [a] -> | |
298 | 298 | id, AdvP(load_advp a) |
299 | - | |
299 | + | |
300 | 300 | | "nonch", [] -> id, Nonch |
301 | 301 | | "or", [] -> id, Or |
302 | 302 | | "refl", [] -> id, Refl |
303 | 303 | | "E", [] -> id, E |
304 | - | |
305 | - | "lex", x -> | |
304 | + | |
305 | + | "lex", x -> | |
306 | 306 | id, Lex(przejdz_zapisz load_lex empty_lex x) |
307 | 307 | (* |
308 | 308 | Printf.printf "%d\n" (List.length x); |
309 | 309 | Printf.printf "%s\n" (Xml.to_string_fmt xml); |
310 | 310 | *) |
311 | - | "fixed", x -> | |
311 | + | "fixed", x -> | |
312 | 312 | id, load_fixed x |
313 | - | |
313 | + | |
314 | 314 | (*dodatkowe, nie ma ich w dokmentacji a są na poziomie 0 load_phrase*) |
315 | 315 | | "possp", [] -> id, PossP |
316 | 316 | | "recip", [] -> id, Recip |
317 | 317 | | "distrp", [] -> id, DistrP |
318 | 318 | | "compar", [Xml.Element("f",["name","compar_category"], |
319 | 319 | [Xml.Element("symbol",["value",value],[])])] -> id, ComparP(value) |
320 | - | |
320 | + | |
321 | 321 | (* dodatkowe: (gerp i prepgerp) są w dokumentacji, |
322 | 322 | i pojawiają się po rekurencyjnym wywołaniu z funkcji load_lex |
323 | 323 | podobne kolejno do: np, prepnp*) |
324 | 324 | | "gerp", [a] -> |
325 | 325 | id, GerP(load_case a) |
326 | - | "prepgerp", [a;b] -> | |
326 | + | "prepgerp", [a;b] -> | |
327 | 327 | id, PrepGerP(load_preposition a, load_case b) |
328 | 328 | (*inne dodatkowe które też są powywołaniu z load_lex*) |
329 | - | "nump", [a] -> | |
329 | + | "nump", [a] -> | |
330 | 330 | id, NumP(load_case a) |
331 | 331 | | "prepnump", [a;b] -> |
332 | 332 | id, PrepNumP(load_preposition a, load_case b) |
333 | - | "ppasp", [a] -> | |
333 | + | "ppasp", [a] -> | |
334 | 334 | id, PpasP(load_case a) |
335 | 335 | | "prepppasp", [a;b] -> |
336 | 336 | id, PrepPpasP(load_preposition a, load_case b) |
337 | 337 | | "qub", [] -> |
338 | 338 | id, Qub |
339 | - | |
339 | + | |
340 | 340 | (*dodatkowe, po wywołaniu z load_position *) |
341 | - | "pactp", [a] -> | |
341 | + | "pactp", [a] -> | |
342 | 342 | id, PactP(load_case a) |
343 | - | |
344 | - | |
343 | + | |
344 | + | |
345 | 345 | | _ -> failwith ("load_phrase match:\n " ^ Xml.to_string_fmt xml) |
346 | - | |
347 | - | |
346 | + | |
347 | + | |
348 | 348 | and load_control = function |
349 | 349 | | Xml.Element("symbol", ["value", value], []) -> |
350 | 350 | value |
351 | - | xml -> failwith ("load_control:\n " ^ Xml.to_string_fmt xml) | |
352 | - | |
353 | -and load_position_info arg = function | |
351 | + | xml -> failwith ("load_control:\n " ^ Xml.to_string_fmt xml) | |
352 | + | |
353 | +and load_position_info arg = function | |
354 | 354 | | Xml.Element("f",["name", "function"], [Xml.Element("symbol",["value", value],[])]) -> |
355 | 355 | {arg with gf = value} |
356 | 356 | | Xml.Element("f",["name", "phrases"], [Xml.Element("vColl",["org", "set"], phrases_set)]) -> |
... | ... | @@ -358,8 +358,8 @@ and load_position_info arg = function |
358 | 358 | | Xml.Element("f",["name", "control"], [Xml.Element("vColl",["org", "set"], control_set)]) -> |
359 | 359 | {arg with control = (przejdz_lista load_control control_set)} |
360 | 360 | | xml -> failwith ("load_position_info:\n " ^ Xml.to_string_fmt xml) |
361 | - | |
362 | -and load_position = function | |
361 | + | |
362 | +and load_position = function | |
363 | 363 | | Xml.Element("fs", ["xml:id", id; "type","position"], listt) -> |
364 | 364 | let id = parse_id id in |
365 | 365 | let result = {psn_id = id; gf = ""; phrases = []; control = []} in |
... | ... | @@ -385,7 +385,7 @@ let load_schema_info arg = function |
385 | 385 | | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value", aspect_value],[])]) -> |
386 | 386 | {arg with aspect = aspect_value} |
387 | 387 | | Xml.Element("f", ["name","aspect"], []) -> |
388 | - arg | |
388 | + arg | |
389 | 389 | | Xml.Element("f", ["name","negativity"], [Xml.Element("symbol", ["value", negativity_value],[])]) -> |
390 | 390 | {arg with negativity = negativity_value} |
391 | 391 | | Xml.Element("f", ["name","negativity"], []) -> |
... | ... | @@ -406,18 +406,18 @@ let load_schema = function |
406 | 406 | let result = przejdz_zapisz load_schema_info result schema in |
407 | 407 | result |
408 | 408 | | xml -> failwith ("load_schema:\n " ^ Xml.to_string_fmt xml) |
409 | - | |
409 | + | |
410 | 410 | let load_syntactic = function |
411 | - Xml.Element("f", ["name", "schemata"], | |
411 | + Xml.Element("f", ["name", "schemata"], | |
412 | 412 | [Xml.Element("vColl", ["org","set"], schemata_set)]) -> |
413 | 413 | przejdz_lista load_schema schemata_set |
414 | 414 | | xml -> failwith ("load_syntactic:\n " ^ Xml.to_string_fmt xml) |
415 | -(* Koniec kodu do wczytywania syntaticLayer *) | |
415 | +(* Koniec kodu do wczytywania syntaticLayer *) | |
416 | 416 | |
417 | 417 | |
418 | -(* Początek kodu do wczytywania examplesLayer *) | |
418 | +(* Początek kodu do wczytywania examplesLayer *) | |
419 | 419 | |
420 | -type example = {exm_id: id; | |
420 | +type example = {exm_id: id; | |
421 | 421 | meaning: string; (*id*) |
422 | 422 | phrases: id list; (*zbiór id!!!*) |
423 | 423 | sentence: string; |
... | ... | @@ -426,7 +426,7 @@ type example = {exm_id: id; |
426 | 426 | note: string} |
427 | 427 | |
428 | 428 | |
429 | -let load_phrases_set = function | |
429 | +let load_phrases_set = function | |
430 | 430 | | Xml.Element("fs", ["sameAs", same_as; "type","phrase"], []) -> |
431 | 431 | let p = parse_full_id same_as in |
432 | 432 | {p with numbers = List.tl p.numbers} |
... | ... | @@ -451,42 +451,42 @@ let load_example_info arg = function |
451 | 451 | |
452 | 452 | let load_example = function |
453 | 453 | | Xml.Element("fs", ["xml:id", id; "type", "example"], example_elements) -> |
454 | - let id = parse_id id in | |
455 | - let result = {exm_id = id; meaning = ""; phrases = []; sentence = ""; | |
454 | + let id = parse_id id in | |
455 | + let result = {exm_id = id; meaning = ""; phrases = []; sentence = ""; | |
456 | 456 | source = ""; opinion = ""; note = "";} in |
457 | 457 | let result = przejdz_zapisz load_example_info result example_elements in |
458 | 458 | result |
459 | 459 | | xml -> failwith ("load_example :\n " ^ Xml.to_string_fmt xml) |
460 | - | |
460 | + | |
461 | 461 | let load_examples = function |
462 | 462 | | Xml.Element("f", ["name","examples"], [Xml.Element("vColl", ["org", "set"], examples_set)]) -> |
463 | - przejdz_lista load_example examples_set | |
463 | + przejdz_lista load_example examples_set | |
464 | 464 | | xml -> failwith ("load_examples:\n " ^ Xml.to_string_fmt xml) |
465 | - | |
465 | + | |
466 | 466 | (* Koniec kodu do wczytywania examplesLayer *) |
467 | - | |
468 | 467 | |
469 | 468 | |
470 | -(* Początek kodu do wczytywania semanticLayer *) | |
471 | 469 | |
472 | -type sel_prefs = | |
470 | +(* Początek kodu do wczytywania semanticLayer *) | |
471 | + | |
472 | +type sel_prefs = | |
473 | 473 | Numeric of string |
474 | 474 | | Symbol of string |
475 | 475 | | Relation of string * string |
476 | - | |
476 | + | |
477 | 477 | type argument = {arg_id: id; |
478 | 478 | role: string; |
479 | 479 | role_attribute: string; |
480 | 480 | sel_prefs: sel_prefs list list} |
481 | - | |
481 | + | |
482 | 482 | type frame = {frm_id: id; |
483 | 483 | opinion: string; |
484 | 484 | meanings: string list; |
485 | 485 | arguments: argument list} |
486 | 486 | |
487 | 487 | |
488 | -let load_self_prefs_sets = function | |
489 | - | Xml.Element("numeric", ["value",value],[]) -> | |
488 | +let load_self_prefs_sets = function | |
489 | + | Xml.Element("numeric", ["value",value],[]) -> | |
490 | 490 | Numeric(value) |
491 | 491 | | Xml.Element("symbol", ["value",value],[]) -> |
492 | 492 | Symbol(value) |
... | ... | @@ -495,54 +495,54 @@ let load_self_prefs_sets = function |
495 | 495 | Xml.Element("f", ["name", "to"],[Xml.Element("fs", ["sameAs", same_as; "type", "argument"], [])])]) -> |
496 | 496 | Relation(value,same_as) |
497 | 497 | | xml -> failwith ("load_self_prefs_sets :\n " ^ Xml.to_string_fmt xml) |
498 | - | |
498 | + | |
499 | 499 | let load_argument_self_prefs = function |
500 | - | Xml.Element("f", ["name", name], [Xml.Element("vColl", ["org","set"], self_prefs_set)]) -> | |
500 | + | Xml.Element("f", ["name", name], [Xml.Element("vColl", ["org","set"], self_prefs_set)]) -> | |
501 | 501 | przejdz_lista load_self_prefs_sets self_prefs_set |
502 | 502 | | xml -> failwith ("load_argument_self_prefs :\n " ^ Xml.to_string_fmt xml) |
503 | - | |
503 | + | |
504 | 504 | let load_argument_info arg = function |
505 | - | Xml.Element("f", ["name","role"], [Xml.Element("symbol", ["value",value],[])]) -> | |
505 | + | Xml.Element("f", ["name","role"], [Xml.Element("symbol", ["value",value],[])]) -> | |
506 | 506 | {arg with role = value} |
507 | - | Xml.Element("f", ["name","role_attribute"], [Xml.Element("symbol", ["value",value],[])]) -> | |
507 | + | Xml.Element("f", ["name","role_attribute"], [Xml.Element("symbol", ["value",value],[])]) -> | |
508 | 508 | {arg with role_attribute = value} |
509 | - | Xml.Element("f", ["name","sel_prefs"], [Xml.Element("fs", ["type","sel_prefs_groups"], self_prefs)]) -> | |
509 | + | Xml.Element("f", ["name","sel_prefs"], [Xml.Element("fs", ["type","sel_prefs_groups"], self_prefs)]) -> | |
510 | 510 | {arg with sel_prefs = przejdz_lista load_argument_self_prefs self_prefs} |
511 | 511 | | xml -> failwith ("load_argument_info :\n " ^ Xml.to_string_fmt xml) |
512 | 512 | |
513 | 513 | let load_arguments_set = function |
514 | - | Xml.Element("fs", ["xml:id", id; "type", "argument"], info) -> | |
514 | + | Xml.Element("fs", ["xml:id", id; "type", "argument"], info) -> | |
515 | 515 | let id = parse_id id in |
516 | 516 | let result = {arg_id = id; role = ""; role_attribute = ""; sel_prefs = []} in |
517 | - let result = przejdz_zapisz load_argument_info result info in | |
517 | + let result = przejdz_zapisz load_argument_info result info in | |
518 | 518 | result |
519 | 519 | | xml -> failwith ("load_arguments_set :\n " ^ Xml.to_string_fmt xml) |
520 | 520 | |
521 | 521 | let load_meanings_set = function |
522 | - | Xml.Element("fs", ["sameAs", same_As; "type", "lexical_unit"], []) -> | |
522 | + | Xml.Element("fs", ["sameAs", same_As; "type", "lexical_unit"], []) -> | |
523 | 523 | same_As |
524 | 524 | | xml -> failwith ("load_meanings_set :\n " ^ Xml.to_string_fmt xml) |
525 | - | |
525 | + | |
526 | 526 | let load_frame = function |
527 | 527 | | Xml.Element("fs", ["xml:id", id; "type", "frame"], [ |
528 | 528 | Xml.Element("f", ["name", "opinion"], [Xml.Element("symbol", ["value", opinion],[])]); |
529 | 529 | Xml.Element("f", ["name", "meanings"], [Xml.Element("vColl", ["org", "set"], meanings_set)]); |
530 | - Xml.Element("f", ["name", "arguments"], [Xml.Element("vColl", ["org", "set"], arguments_set)])]) -> | |
530 | + Xml.Element("f", ["name", "arguments"], [Xml.Element("vColl", ["org", "set"], arguments_set)])]) -> | |
531 | 531 | let id = parse_id id in |
532 | 532 | {frm_id = id; |
533 | 533 | opinion = opinion; |
534 | 534 | meanings = przejdz_lista load_meanings_set meanings_set; |
535 | 535 | arguments = przejdz_lista load_arguments_set arguments_set} |
536 | 536 | | xml -> failwith ("load_frame :\n " ^ Xml.to_string_fmt xml) |
537 | - | |
537 | + | |
538 | 538 | let load_semantic = function |
539 | - | Xml.Element("f", ["name","frames"], [Xml.Element("vColl", ["org", "set"], frame_set)]) -> | |
539 | + | Xml.Element("f", ["name","frames"], [Xml.Element("vColl", ["org", "set"], frame_set)]) -> | |
540 | 540 | przejdz_lista load_frame frame_set |
541 | 541 | | xml -> failwith ("load_semantic:\n " ^ Xml.to_string_fmt xml) |
542 | -(* Koniec kodu do wczytywania semanticLayer *) | |
543 | - | |
542 | +(* Koniec kodu do wczytywania semanticLayer *) | |
544 | 543 | |
545 | -(* Początek do wczytywania meaningsLayer *) | |
544 | + | |
545 | +(* Początek do wczytywania meaningsLayer *) | |
546 | 546 | |
547 | 547 | type meaning = {mng_id: id; |
548 | 548 | name: string; |
... | ... | @@ -558,21 +558,21 @@ let empty_meaning = {mng_id = empty_id; |
558 | 558 | |
559 | 559 | |
560 | 560 | |
561 | -let load_meaning_info arg = function | |
562 | - | Xml.Element("f", ["name", "name"], [Xml.Element("string", [], [Xml.PCData name_string])]) -> | |
561 | +let load_meaning_info arg = function | |
562 | + | Xml.Element("f", ["name", "name"], [Xml.Element("string", [], [Xml.PCData name_string])]) -> | |
563 | 563 | {arg with name = name_string} |
564 | 564 | | Xml.Element("f", ["name", "variant"], [Xml.Element("string", [], [Xml.PCData variant_string])]) -> |
565 | 565 | {arg with variant = variant_string} |
566 | - | Xml.Element("f", ["name", "plwnluid"], [Xml.Element("numeric", ["value",value],[])]) -> | |
566 | + | Xml.Element("f", ["name", "plwnluid"], [Xml.Element("numeric", ["value",value],[])]) -> | |
567 | 567 | {arg with plwnluid = value} |
568 | - | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [Xml.PCData gloss_string])]) -> | |
568 | + | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [Xml.PCData gloss_string])]) -> | |
569 | 569 | {arg with gloss = gloss_string} |
570 | - | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [])]) -> | |
570 | + | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [])]) -> | |
571 | 571 | arg |
572 | 572 | | xml -> failwith ("load_meaning_info:\n " ^ Xml.to_string_fmt xml) |
573 | 573 | |
574 | 574 | |
575 | -let load_meaning = function | |
575 | +let load_meaning = function | |
576 | 576 | | Xml.Element("fs", ["xml:id", id; "type", "lexical_unit"], meaning_info) -> |
577 | 577 | let id = parse_id id in |
578 | 578 | let result = empty_meaning in |
... | ... | @@ -581,44 +581,44 @@ let load_meaning = function |
581 | 581 | result |
582 | 582 | | xml -> failwith ("load_meaning:\n " ^ Xml.to_string_fmt xml) |
583 | 583 | |
584 | -(* Koniec kodu do wczytywania meaningsLayer *) | |
584 | +(* Koniec kodu do wczytywania meaningsLayer *) | |
585 | 585 | |
586 | -(* Początek kodu do wczytywania connectionsLayer *) | |
586 | +(* Początek kodu do wczytywania connectionsLayer *) | |
587 | 587 | |
588 | 588 | type connection = {argument: string; |
589 | 589 | phrases: string list} |
590 | - | |
591 | -type alternation = {connections: connection list} | |
590 | + | |
591 | +type alternation = {connections: connection list} | |
592 | 592 | |
593 | 593 | |
594 | 594 | let load_phrases_connections = function |
595 | - | Xml.Element("fs", ["sameAs",sameAs; "type", "phrase"], []) -> | |
595 | + | Xml.Element("fs", ["sameAs",sameAs; "type", "phrase"], []) -> | |
596 | 596 | sameAs |
597 | 597 | | xml -> failwith ("load_phrases_connections: \n " ^ Xml.to_string_fmt xml) |
598 | 598 | |
599 | -let load_alter_connection = function | |
599 | +let load_alter_connection = function | |
600 | 600 | | Xml.Element("fs", ["type","connection"], [ |
601 | 601 | Xml.Element("f", ["name", "argument"], [Xml.Element("fs", ["sameAs",sameAs; "type","argument"],[])]); |
602 | 602 | Xml.Element("f", ["name", "phrases"], [Xml.Element("vColl", ["org","set";], phrases)])]) -> |
603 | 603 | {argument = sameAs; phrases = (przejdz_lista load_phrases_connections phrases)} |
604 | 604 | | xml -> failwith ("load_alter_connections: \n " ^ Xml.to_string_fmt xml) |
605 | 605 | |
606 | -let load_alternations = function | |
607 | - | Xml.Element("fs", ["type","alternation"], | |
606 | +let load_alternations = function | |
607 | + | Xml.Element("fs", ["type","alternation"], | |
608 | 608 | [Xml.Element("f", ["name", "connections"], [Xml.Element("vColl", ["org", "set"], connections_set)])]) -> |
609 | 609 | {connections = przejdz_lista load_alter_connection connections_set} |
610 | 610 | | xml -> failwith ("load_alternations: \n " ^ Xml.to_string_fmt xml) |
611 | 611 | |
612 | 612 | |
613 | -let load_connections = function | |
613 | +let load_connections = function | |
614 | 614 | | Xml.Element("f", ["name","alternations"], [Xml.Element("vColl", ["org", "set"], alternations)]) -> |
615 | 615 | przejdz_lista load_alternations alternations |
616 | 616 | | xml -> failwith ("load_connections: \n " ^ Xml.to_string_fmt xml) |
617 | 617 | |
618 | 618 | |
619 | -(* Koniec kodu do wczytywania connectionsLayer *) | |
619 | +(* Koniec kodu do wczytywania connectionsLayer *) | |
620 | 620 | |
621 | -type entry = {ent_id: id; | |
621 | +type entry = {ent_id: id; | |
622 | 622 | form_orth: string; |
623 | 623 | form_pos: string; |
624 | 624 | schemata: schema list; |
... | ... | @@ -627,7 +627,7 @@ type entry = {ent_id: id; |
627 | 627 | meanings: meaning list; |
628 | 628 | alternations: alternation list} |
629 | 629 | |
630 | -let load_entry = function | |
630 | +let load_entry = function | |
631 | 631 | | Xml.Element("entry",["xml:id",id], [ |
632 | 632 | Xml.Element("form", [], [ |
633 | 633 | Xml.Element("orth",[],[Xml.PCData orth]); |
... | ... | @@ -635,10 +635,10 @@ let load_entry = function |
635 | 635 | Xml.Element("fs", ["type","syntactic_layer"], [syntactics]); |
636 | 636 | Xml.Element("fs", ["type","examples_layer"], [examples]); |
637 | 637 | Xml.Element("fs", ["type","semantic_layer"], [semantic]); |
638 | - Xml.Element("fs", ["type","meanings_layer"], | |
639 | - [Xml.Element("f", ["name","meanings"], | |
638 | + Xml.Element("fs", ["type","meanings_layer"], | |
639 | + [Xml.Element("f", ["name","meanings"], | |
640 | 640 | [Xml.Element("vColl", ["org", "set"], meanings_set)])]); |
641 | - Xml.Element("fs", ["type","connections_layer"],[connections])]) -> | |
641 | + Xml.Element("fs", ["type","connections_layer"],[connections])]) -> | |
642 | 642 | let id = parse_id id in |
643 | 643 | {ent_id = id; |
644 | 644 | form_orth = orth; |
... | ... | @@ -653,7 +653,7 @@ let load_entry = function |
653 | 653 | Xml.Element("orth",[],[Xml.PCData orth]); |
654 | 654 | Xml.Element("pos",[],[Xml.PCData pos])]); |
655 | 655 | Xml.Element("fs", ["type","syntactic_layer"], [syntactics]); |
656 | - Xml.Element("fs", ["type","examples_layer"], [examples])]) -> | |
656 | + Xml.Element("fs", ["type","examples_layer"], [examples])]) -> | |
657 | 657 | let id = parse_id id in |
658 | 658 | {ent_id = id; |
659 | 659 | form_orth = orth; |
... | ... | @@ -669,8 +669,8 @@ let load_entry = function |
669 | 669 | let load_walenty filename:entry list = |
670 | 670 | begin |
671 | 671 | match Xml.parse_file filename with |
672 | - Xml.Element("TEI", _, | |
673 | - [Xml.Element("teiHeader",_,_) ; | |
672 | + Xml.Element("TEI", _, | |
673 | + [Xml.Element("teiHeader",_,_) ; | |
674 | 674 | Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> |
675 | 675 | przejdz_lista load_entry entries |
676 | 676 | | _ -> failwith "load_walenty" |
... | ... | @@ -682,7 +682,7 @@ let load_walenty filename:entry list = |
682 | 682 | (* let _ = Printf.printf "loading: OK\n" *) |
683 | 683 | |
684 | 684 | (* ******************************************* *) |
685 | - | |
685 | + | |
686 | 686 | |
687 | 687 | |
688 | 688 | |
... | ... | @@ -697,25 +697,25 @@ module StringMap = Map.Make(String) |
697 | 697 | |
698 | 698 | let cnt = ref 0;; |
699 | 699 | |
700 | -let add_new map meaning = | |
700 | +let add_new map meaning = | |
701 | 701 | let num_id = match meaning.mng_id with |
702 | 702 | {hash=false; numbers=[num_id]; suffix="mng"} -> num_id |
703 | - | _ -> failwith "zła składnia id" in | |
704 | - if StringMap.mem num_id map then | |
703 | + | _ -> failwith "zła składnia id" in | |
704 | + if StringMap.mem num_id map then | |
705 | 705 | (Printf.printf "okkk\n"; |
706 | 706 | let meaning2 = StringMap.find num_id map in |
707 | 707 | if meaning = meaning2 then map else |
708 | 708 | failwith "różne1111") |
709 | 709 | else (cnt:=!cnt+1;StringMap.add num_id meaning map) |
710 | - | |
710 | + | |
711 | 711 | let check_entry_menaings mapa entry = |
712 | 712 | przejdz_zapisz add_new mapa entry.meanings |
713 | 713 | |
714 | -let check_meanings walenty = | |
714 | +let check_meanings walenty = | |
715 | 715 | przejdz_zapisz check_entry_menaings StringMap.empty walenty |
716 | 716 | |
717 | 717 | (* |
718 | -let _ = check_meanings walenty | |
718 | +let _ = check_meanings walenty | |
719 | 719 | let _ = Printf.printf "meaning map.size: %d\n" !cnt |
720 | 720 | *) |
721 | 721 | |
... | ... | @@ -724,27 +724,27 @@ let _ = Printf.printf "meaning map.size: %d\n" !cnt |
724 | 724 | let cnt = ref 0;; |
725 | 725 | |
726 | 726 | (*arg_id*) |
727 | -let add_new map argument = | |
727 | +let add_new map argument = | |
728 | 728 | let arg_id = match argument.arg_id with |
729 | 729 | {hash=false; numbers=[num_id]; suffix="arg"} -> num_id |
730 | - | _ -> failwith "zła składnia id" in | |
731 | - if StringMap.mem arg_id map then | |
730 | + | _ -> failwith "zła składnia id" in | |
731 | + if StringMap.mem arg_id map then | |
732 | 732 | (Printf.printf "okkk\n"; |
733 | 733 | let val2 = StringMap.find arg_id map in |
734 | 734 | let val1 = argument in |
735 | 735 | if val1 = val2 then map else |
736 | 736 | failwith "różne1111") |
737 | 737 | else (cnt:=!cnt+1; StringMap.add arg_id argument map) |
738 | - | |
739 | 738 | |
740 | 739 | |
741 | -let check_frame mapa frame = | |
742 | - przejdz_zapisz add_new mapa frame.arguments | |
740 | + | |
741 | +let check_frame mapa frame = | |
742 | + przejdz_zapisz add_new mapa frame.arguments | |
743 | 743 | |
744 | 744 | let check_entry_frames mapa entry = |
745 | 745 | przejdz_zapisz check_frame mapa entry.frames |
746 | 746 | |
747 | -let check_meanings walenty = | |
747 | +let check_meanings walenty = | |
748 | 748 | przejdz_zapisz check_entry_frames StringMap.empty walenty |
749 | 749 | |
750 | 750 | (* |
... | ... | @@ -756,22 +756,22 @@ let _ = Printf.printf "entry.frame.argument map.size: %d\n" !cnt |
756 | 756 | |
757 | 757 | let cnt = ref 0;; |
758 | 758 | (*frm_id*) |
759 | -let add_new map frame = | |
759 | +let add_new map frame = | |
760 | 760 | let id = match frame.frm_id with |
761 | 761 | {hash=false; numbers=[num_id]; suffix="frm"} -> num_id |
762 | - | _ -> failwith "zła składnia id" in | |
763 | - if StringMap.mem id map then | |
762 | + | _ -> failwith "zła składnia id" in | |
763 | + if StringMap.mem id map then | |
764 | 764 | (Printf.printf "okkk\n"; |
765 | 765 | let val2 = StringMap.find id map in |
766 | 766 | let val1 = frame in |
767 | 767 | if val1 = val2 then map else |
768 | 768 | failwith "różne1111") |
769 | 769 | else (cnt:=!cnt+1; StringMap.add id frame map) |
770 | - | |
770 | + | |
771 | 771 | let check_entry_frames mapa entry = |
772 | 772 | przejdz_zapisz add_new mapa entry.frames |
773 | 773 | |
774 | -let check_meanings walenty = | |
774 | +let check_meanings walenty = | |
775 | 775 | przejdz_zapisz check_entry_frames StringMap.empty walenty |
776 | 776 | |
777 | 777 | (* |
... | ... | @@ -783,7 +783,7 @@ let _ = Printf.printf "entry.frame map.size: %d\n" !cnt |
783 | 783 | |
784 | 784 | (*examplesLayer*) |
785 | 785 | (*na razie zeruje phrases!!!!*) |
786 | -let print_example example = | |
786 | +let print_example example = | |
787 | 787 | Printf.printf "meaning: %s\n phrases: " example.meaning; |
788 | 788 | (* print_endline (String.concat "; " example.phrases);*) |
789 | 789 | Printf.printf "sentence: %s\n" example.sentence; |
... | ... | @@ -796,31 +796,31 @@ let print_example example = |
796 | 796 | let cnt = ref 0;; |
797 | 797 | let takiesame = ref 0;; |
798 | 798 | |
799 | -let add_new map example = | |
799 | +let add_new map example = | |
800 | 800 | let id = match example.exm_id with |
801 | 801 | {hash=false; numbers=[num_id]; suffix="exm"} -> num_id |
802 | - | _ -> failwith "zła składnia id" in | |
802 | + | _ -> failwith "zła składnia id" in | |
803 | 803 | (* let example = {example with phrases = []} in (*uwaga!!!! zeruje phrases!!!*)*) |
804 | 804 | let example = {example with meaning = ""} in (*uwaga!!!! zeruje meaning!!!*) |
805 | - if StringMap.mem id map then | |
805 | + if StringMap.mem id map then | |
806 | 806 | (takiesame:=!takiesame+1; |
807 | 807 | (* Printf.printf "okkk\n";*) |
808 | 808 | let val2 = StringMap.find id map in |
809 | 809 | let val1 = example in |
810 | 810 | if val1 = val2 then map else |
811 | - let _ = print_example val1 in | |
811 | + let _ = print_example val1 in | |
812 | 812 | let _ = print_example val2 in |
813 | 813 | failwith "różne1111") |
814 | 814 | else (cnt:=!cnt+1; StringMap.add id example map) |
815 | - | |
815 | + | |
816 | 816 | let check_entry_example mapa entry = |
817 | 817 | przejdz_zapisz add_new mapa entry.examples |
818 | 818 | |
819 | -let check_meanings walenty = | |
819 | +let check_meanings walenty = | |
820 | 820 | przejdz_zapisz check_entry_example StringMap.empty walenty |
821 | 821 | |
822 | 822 | (* |
823 | -let _ = check_meanings walenty | |
823 | +let _ = check_meanings walenty | |
824 | 824 | let _ = Printf.printf "examples map.size: %d takich samych: %d\n" !cnt !takiesame |
825 | 825 | *) |
826 | 826 | |
... | ... | @@ -831,12 +831,12 @@ let _ = Printf.printf "examples map.size: %d takich samych: %d\n" !cnt !takiesam |
831 | 831 | let cnt = ref 0;; |
832 | 832 | let takiesame = ref 0;; |
833 | 833 | |
834 | -let add_new map position = | |
834 | +let add_new map position = | |
835 | 835 | let id = match position.psn_id with |
836 | 836 | {hash=false; numbers=[num_id]; suffix="psn"} -> num_id |
837 | - | _ -> failwith "zła składnia id" in | |
838 | -(* let position = {position with phrases = przejdz_lista (fun (x,y) -> (parse_id "",y)) position.phrases} in*) (*uwaga!!!!*) | |
839 | - if StringMap.mem id map then | |
837 | + | _ -> failwith "zła składnia id" in | |
838 | +(* let position = {position with phrases = przejdz_lista (fun (x,y) -> (parse_id "",y)) position.phrases} in*) (*uwaga!!!!*) | |
839 | + if StringMap.mem id map then | |
840 | 840 | (takiesame:=!takiesame+1; |
841 | 841 | (* Printf.printf "okkk\n";*) |
842 | 842 | let val2 = StringMap.find id map in |
... | ... | @@ -844,18 +844,18 @@ let add_new map position = |
844 | 844 | if val1 = val2 then map else |
845 | 845 | failwith "różne1111") |
846 | 846 | else (cnt:=!cnt+1; StringMap.add id position map) |
847 | - | |
848 | -let check_schema mapa schema = | |
847 | + | |
848 | +let check_schema mapa schema = | |
849 | 849 | przejdz_zapisz add_new mapa schema.positions |
850 | - | |
850 | + | |
851 | 851 | let check_entry mapa entry = |
852 | 852 | przejdz_zapisz check_schema mapa entry.schemata |
853 | 853 | |
854 | -let check walenty = | |
854 | +let check walenty = | |
855 | 855 | przejdz_zapisz check_entry StringMap.empty walenty |
856 | 856 | |
857 | 857 | (* |
858 | -let _ = check walenty | |
858 | +let _ = check walenty | |
859 | 859 | let _ = Printf.printf "syntactic...position map.size: %d takich samych: %d\n" !cnt !takiesame |
860 | 860 | *) |
861 | 861 | |
... | ... | @@ -864,8 +864,8 @@ let _ = Printf.printf "syntactic...position map.size: %d takich samych: %d\n" !c |
864 | 864 | let cnt = ref 0;; |
865 | 865 | let takiesame = ref 0;; |
866 | 866 | |
867 | -(*let clear_id (position:position) = | |
868 | - let position = {position with phrases = [](*przejdz_lista (fun (x,y) -> (empty_id,y)) position.phrases*)} in (*uwaga!!!!*) | |
867 | +(*let clear_id (position:position) = | |
868 | + let position = {position with phrases = [](*przejdz_lista (fun (x,y) -> (empty_id,y)) position.phrases*)} in (*uwaga!!!!*) | |
869 | 869 | let position = {position with psn_id = empty_id} in |
870 | 870 | position*) |
871 | 871 | |
... | ... | @@ -876,12 +876,12 @@ let print_schema (schema:schema) = |
876 | 876 | Printf.printf "schema.negativity= %s\n" schema.negativity; |
877 | 877 | Printf.printf "schema.predicativity= %s\n___________________\n" schema.predicativity |
878 | 878 | |
879 | -let add_new map schema = | |
879 | +let add_new map schema = | |
880 | 880 | let id = match schema.sch_id with |
881 | 881 | {hash=false; numbers=[num_id]; suffix="sch"} -> num_id |
882 | - | _ -> failwith "zła składnia id" in | |
882 | + | _ -> failwith "zła składnia id" in | |
883 | 883 | let schema = {schema with opinion = ""} in (*uwaga, zeruje opinie!!!*) |
884 | - if StringMap.mem id map then | |
884 | + if StringMap.mem id map then | |
885 | 885 | (takiesame:=!takiesame+1; |
886 | 886 | (* Printf.printf "okkk\n";*) |
887 | 887 | let val2 = StringMap.find id map in |
... | ... | @@ -893,14 +893,14 @@ let add_new map schema = |
893 | 893 | else (cnt:=!cnt+1; StringMap.add id schema map) |
894 | 894 | |
895 | 895 | |
896 | - | |
897 | -let check_schema mapa schema = | |
896 | + | |
897 | +let check_schema mapa schema = | |
898 | 898 | add_new mapa schema |
899 | - | |
899 | + | |
900 | 900 | let check_entry mapa entry = |
901 | 901 | przejdz_zapisz check_schema mapa entry.schemata |
902 | 902 | |
903 | -let check walenty = | |
903 | +let check walenty = | |
904 | 904 | przejdz_zapisz check_entry StringMap.empty walenty |
905 | 905 | |
906 | 906 | (* |
... | ... | @@ -913,11 +913,11 @@ let _ = Printf.printf "syntactic...schema map.size: %d takich samych: %d\n" !cnt |
913 | 913 | let cnt = ref 0;; |
914 | 914 | let takiesame = ref 0;; |
915 | 915 | |
916 | -let add_new map (id, phrase) = | |
916 | +let add_new map (id, phrase) = | |
917 | 917 | let id = match id with |
918 | 918 | {hash=false; numbers=[num_id]; suffix="phr"} -> num_id |
919 | - | _ -> failwith "zła składnia id" in | |
920 | - if StringMap.mem id map then | |
919 | + | _ -> failwith "zła składnia id" in | |
920 | + if StringMap.mem id map then | |
921 | 921 | (takiesame:=!takiesame+1; |
922 | 922 | (* Printf.printf "okkk\n";*) |
923 | 923 | let val2 = StringMap.find id map in |
... | ... | @@ -926,20 +926,20 @@ let add_new map (id, phrase) = |
926 | 926 | failwith "różne1111") |
927 | 927 | else (cnt:=!cnt+1; StringMap.add id phrase map) |
928 | 928 | |
929 | -let check_pos mapa (position:position) = | |
929 | +let check_pos mapa (position:position) = | |
930 | 930 | przejdz_zapisz add_new mapa position.phrases |
931 | - | |
932 | -let check_schema mapa schema = | |
931 | + | |
932 | +let check_schema mapa schema = | |
933 | 933 | przejdz_zapisz check_pos mapa schema.positions |
934 | - | |
934 | + | |
935 | 935 | let check_entry mapa entry = |
936 | 936 | przejdz_zapisz check_schema mapa entry.schemata |
937 | 937 | |
938 | -let check walenty = | |
938 | +let check walenty = | |
939 | 939 | przejdz_zapisz check_entry StringMap.empty walenty |
940 | - | |
940 | + | |
941 | 941 | *) |
942 | - | |
942 | + | |
943 | 943 | (* |
944 | 944 | let _ = check walenty |
945 | 945 | let _ = Printf.printf "syntactic...phrases map.size: %d takich samych: %d\n" !cnt !takiesame |
... | ... | @@ -967,7 +967,7 @@ let rec parse_comp = function |
967 | 967 | | "rel",l -> WalTypes.Rel, Xlist.map l (fun s -> WalTypes.Comp s) |
968 | 968 | | s,[] -> WalTypes.CompTypeUndef,[WalTypes.Comp s] |
969 | 969 | | _ -> failwith "parse_comp" |
970 | - | |
970 | + | |
971 | 971 | |
972 | 972 | let rec morf_of_phrase = function |
973 | 973 | NP c -> WalTypes.Phrase (WalTypes.NP(WalParser.parse_case [WalTypes.Text c])) |
... | ... | @@ -995,7 +995,7 @@ let rec morf_of_phrase = function |
995 | 995 | | Lex lex -> (*print_endline "lex";*) WalTypes.Phrase (WalTypes.Null) (* FIXME: ni *) |
996 | 996 | | Null -> WalTypes.Phrase (WalTypes.Null) |
997 | 997 | | _ -> failwith "morf_of_phrase" |
998 | - | |
998 | + | |
999 | 999 | (* | GerP(c) -> WalTypes.Phrase (WalTypes. |
1000 | 1000 | | PrepGerP(prep,c) -> WalTypes.Phrase (WalTypes. |
1001 | 1001 | | PpasP(c) -> WalTypes.Phrase (WalTypes. |
... | ... | @@ -1003,21 +1003,21 @@ let rec morf_of_phrase = function |
1003 | 1003 | | PPact(c) -> WalTypes.Phrase (WalTypes. |
1004 | 1004 | | PrepPactP(prep,c) -> WalTypes.Phrase (WalTypes. |
1005 | 1005 | | Qub -> WalTypes.Phrase (WalTypes.*) |
1006 | - | |
1006 | + | |
1007 | 1007 | |
1008 | 1008 | open WalTypes |
1009 | 1009 | |
1010 | -let process_phrases phrases = | |
1010 | +let process_phrases phrases = | |
1011 | 1011 | Xlist.fold phrases StringMap.empty (fun phrases (id,phrase) -> |
1012 | - let id = | |
1012 | + let id = | |
1013 | 1013 | match id with |
1014 | 1014 | {hash=false; numbers=[(*_;_;_;*)id]; suffix="phr"} -> id |
1015 | - | _ -> failwith "process_phrases" in | |
1015 | + | _ -> failwith "process_phrases" in | |
1016 | 1016 | StringMap.add phrases id phrase) |
1017 | 1017 | |
1018 | 1018 | let process_positions positions = |
1019 | - Xlist.fold positions StringMap.empty (fun positions position -> | |
1020 | - let id = | |
1019 | + Xlist.fold positions StringMap.empty (fun positions position -> | |
1020 | + let id = | |
1021 | 1021 | match position.psn_id with |
1022 | 1022 | {hash=false; numbers=[(*_;_;*)id]; suffix="psn"} -> id |
1023 | 1023 | | _ -> failwith "process_positions" in |
... | ... | @@ -1027,7 +1027,7 @@ let process_positions positions = |
1027 | 1027 | |
1028 | 1028 | let process_schemata schemata = |
1029 | 1029 | Xlist.fold schemata StringMap.empty (fun schemata schema -> |
1030 | - let id = | |
1030 | + let id = | |
1031 | 1031 | match schema.sch_id with |
1032 | 1032 | {hash=false; numbers=[(*_;*)id]; suffix="sch"} -> id |
1033 | 1033 | | _ -> failwith "process_schemata" in |
... | ... | @@ -1043,75 +1043,75 @@ let process_schemata schemata = |
1043 | 1043 | let add_meanings meanings = function |
1044 | 1044 | DefaultAtrs(_,r,o,n,p,a) -> DefaultAtrs(meanings,r,o,n,p,a) |
1045 | 1045 | | _ -> failwith "add_meanings" |
1046 | - | |
1046 | + | |
1047 | 1047 | let process_arguments arguments = |
1048 | - Xlist.fold arguments StringMap.empty (fun arguments argument -> | |
1049 | - let id = | |
1048 | + Xlist.fold arguments StringMap.empty (fun arguments argument -> | |
1049 | + let id = | |
1050 | 1050 | match argument.arg_id with |
1051 | 1051 | {hash=false; numbers=[(*_;_;*)id]; suffix="arg"} -> id |
1052 | 1052 | | _ -> failwith "process_arguments" in |
1053 | - StringMap.add arguments id (argument.role,argument.role_attribute,argument.sel_prefs)) | |
1054 | - | |
1053 | + StringMap.add arguments id (argument.role,argument.role_attribute,argument.sel_prefs)) | |
1054 | + | |
1055 | 1055 | let get_meaning_id meaning = |
1056 | 1056 | match parse_full_id meaning with |
1057 | 1057 | {hash=true; numbers=[_;id]; suffix="mng"} -> id |
1058 | 1058 | | _ -> failwith "get_meaning_id" |
1059 | - | |
1060 | -let get_schema_id alt = | |
1059 | + | |
1060 | +let get_schema_id alt = | |
1061 | 1061 | try |
1062 | 1062 | match parse_full_id (List.hd ((List.hd alt.connections).phrases)) with |
1063 | 1063 | {hash=true; numbers=[_;id;_;_]; suffix="phr"} -> id |
1064 | 1064 | | _ -> failwith "get_schema_id 1" |
1065 | 1065 | with _ -> failwith "get_schema_id 2" |
1066 | - | |
1067 | -let get_frame_id alt = | |
1066 | + | |
1067 | +let get_frame_id alt = | |
1068 | 1068 | try |
1069 | 1069 | match parse_full_id ((List.hd alt.connections).argument) with |
1070 | 1070 | {hash=true; numbers=[_;id;_]; suffix="arg"} -> id |
1071 | 1071 | | _ -> failwith "get_frame_id" |
1072 | 1072 | with _ -> failwith "get_frame_id" |
1073 | - | |
1074 | -let get_argument_id arg = | |
1073 | + | |
1074 | +let get_argument_id arg = | |
1075 | 1075 | match parse_full_id arg with |
1076 | 1076 | {hash=true; numbers=[_;_;id]; suffix="arg"} -> id |
1077 | 1077 | | _ -> failwith "get_argument_id" |
1078 | - | |
1079 | -let get_position_id phrases = | |
1078 | + | |
1079 | +let get_position_id phrases = | |
1080 | 1080 | try |
1081 | 1081 | match parse_full_id (List.hd phrases) with |
1082 | 1082 | {hash=true; numbers=[_;_;id;_]; suffix="phr"} -> id |
1083 | 1083 | | _ -> failwith "get_position_id" |
1084 | 1084 | with _ -> failwith "get_position_id" |
1085 | - | |
1086 | -let get_phrase_id arg = | |
1085 | + | |
1086 | +let get_phrase_id arg = | |
1087 | 1087 | match parse_full_id arg with |
1088 | 1088 | {hash=true; numbers=[_;_;_;id]; suffix="phr"} -> id |
1089 | 1089 | | _ -> failwith "get_phrase_id" |
1090 | - | |
1090 | + | |
1091 | 1091 | let process_frames frames = |
1092 | 1092 | Xlist.fold frames StringMap.empty (fun frames frame -> |
1093 | - let id = | |
1093 | + let id = | |
1094 | 1094 | match frame.frm_id with |
1095 | 1095 | {hash=false; numbers=[(*_;*)id]; suffix="frm"} -> id |
1096 | 1096 | | _ -> failwith "process_frames" in |
1097 | 1097 | let arguments = process_arguments frame.arguments in |
1098 | 1098 | let meaning_ids = Xlist.map frame.meanings get_meaning_id in |
1099 | 1099 | StringMap.add frames id (meaning_ids,arguments)) |
1100 | - | |
1101 | -let process_meanings meanings = | |
1100 | + | |
1101 | +let process_meanings meanings = | |
1102 | 1102 | Xlist.fold meanings StringMap.empty (fun meanings meaning -> |
1103 | - let id = | |
1103 | + let id = | |
1104 | 1104 | match meaning.mng_id with |
1105 | 1105 | {hash=false; numbers=[(*_;*)id]; suffix="mng"} -> id |
1106 | 1106 | | _ -> failwith "process_meanings" in |
1107 | 1107 | StringMap.add meanings id (meaning.name ^ " " ^ meaning.variant)) |
1108 | - | |
1108 | + | |
1109 | 1109 | let process_sel_pref = function |
1110 | 1110 | Numeric s -> (try PreWordnet.synset_name s with Not_found -> "unknown") |
1111 | - | Symbol s -> s | |
1111 | + | Symbol s -> s | |
1112 | 1112 | | Relation(s,t) -> "REL" (* FIXME *) |
1113 | - | |
1114 | -let connect entry = | |
1113 | + | |
1114 | +let connect entry = | |
1115 | 1115 | let schemata = process_schemata entry.schemata in |
1116 | 1116 | let frames = process_frames entry.frames in |
1117 | 1117 | let meanings = process_meanings entry.meanings in |
... | ... | @@ -1133,12 +1133,12 @@ let connect entry = |
1133 | 1133 | let sel_prefs = Xlist.map (List.flatten sel_prefs) process_sel_pref in |
1134 | 1134 | {gf=r; role=role; role_attr=role_attribute; sel_prefs=sel_prefs; |
1135 | 1135 | cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in |
1136 | - let meanings = List.rev (Xlist.fold meaning_ids [] (fun l id -> | |
1136 | + let meanings = List.rev (Xlist.fold meaning_ids [] (fun l id -> | |
1137 | 1137 | (StringMap.find meanings id) :: l)) in |
1138 | 1138 | let schema_atrs = add_meanings meanings schema_atrs in |
1139 | 1139 | (entry.form_orth,entry.form_pos,Frame(schema_atrs,positions)) :: found) |
1140 | - | |
1141 | -let connect2 entry = | |
1140 | + | |
1141 | +let connect2 entry = | |
1142 | 1142 | let schemata = process_schemata entry.schemata in |
1143 | 1143 | StringMap.fold schemata [] (fun found _ (schema_atrs,positions) -> |
1144 | 1144 | let positions = StringMap.fold positions [] (fun positions2 _ (r,cr,ce,phrases) -> |
... | ... | @@ -1146,9 +1146,9 @@ let connect2 entry = |
1146 | 1146 | {gf=r; role=""; role_attr=""; sel_prefs=[]; |
1147 | 1147 | cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in |
1148 | 1148 | (entry.form_orth,entry.form_pos,Frame(schema_atrs,positions)) :: found) |
1149 | - | |
1150 | -let load_walenty2 () = | |
1151 | - let walenty = load_walenty Paths.walenty_filename in | |
1149 | + | |
1150 | +let load_walenty2 () = | |
1151 | + let walenty = load_walenty Paths.config.Paths.walenty_filename in | |
1152 | 1152 | Xlist.fold walenty StringMap.empty (fun walenty entry -> |
1153 | 1153 | if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) -> |
1154 | 1154 | let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in |
... | ... | @@ -1158,63 +1158,63 @@ let load_walenty2 () = |
1158 | 1158 | let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in |
1159 | 1159 | let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in |
1160 | 1160 | StringMap.add walenty pos map)) |
1161 | - | |
1162 | 1161 | |
1163 | -let print_stringqmap filename qmap = | |
1162 | + | |
1163 | +let print_stringqmap filename qmap = | |
1164 | 1164 | let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in |
1165 | 1165 | File.file_out filename (fun file -> |
1166 | 1166 | Xlist.iter (Xlist.sort l compare) (fun (v,k) -> |
1167 | 1167 | Printf.fprintf file "%5d %s\n" v k)) |
1168 | - | |
1168 | + | |
1169 | 1169 | let sel_prefs_quantities walenty = |
1170 | 1170 | Xlist.fold walenty StringQMap.empty (fun quant e -> |
1171 | 1171 | Xlist.fold e.frames quant (fun quant f -> |
1172 | 1172 | Xlist.fold f.arguments quant (fun quant a -> |
1173 | 1173 | Xlist.fold a.sel_prefs quant (fun quant l -> |
1174 | 1174 | Xlist.fold l quant (fun quant -> function |
1175 | - Numeric s -> | |
1175 | + Numeric s -> | |
1176 | 1176 | let name = try PreWordnet.synset_name s with Not_found -> "unknown" in |
1177 | 1177 | StringQMap.add quant ("N " ^ s ^ " " ^ name) |
1178 | 1178 | | Symbol s -> StringQMap.add quant ("S " ^ s) |
1179 | 1179 | | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t)))))) |
1180 | - | |
1181 | -(*let _ = | |
1180 | + | |
1181 | +(*let _ = | |
1182 | 1182 | let walenty = load_walenty walenty_filename in |
1183 | 1183 | let quant = sel_prefs_quantities walenty in |
1184 | 1184 | print_stringqmap "results/quant_sel_prefs.txt" quant*) |
1185 | - | |
1185 | + | |
1186 | 1186 | let print_entry filename lex = |
1187 | 1187 | match Xml.parse_file filename with |
1188 | - Xml.Element("TEI", _, | |
1189 | - [Xml.Element("teiHeader",_,_) ; | |
1188 | + Xml.Element("TEI", _, | |
1189 | + [Xml.Element("teiHeader",_,_) ; | |
1190 | 1190 | Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> |
1191 | 1191 | Xlist.iter entries (function |
1192 | - Xml.Element("entry",_,Xml.Element("form", [], [Xml.Element("orth",[],[Xml.PCData orth]);_]) :: xml :: _) -> | |
1192 | + Xml.Element("entry",_,Xml.Element("form", [], [Xml.Element("orth",[],[Xml.PCData orth]);_]) :: xml :: _) -> | |
1193 | 1193 | if orth = lex then print_endline (Xml.to_string_fmt xml) |
1194 | 1194 | | _ -> failwith "print_entry") |
1195 | 1195 | | _ -> failwith "print_entry" |
1196 | 1196 | |
1197 | 1197 | |
1198 | -(*let _ = | |
1198 | +(*let _ = | |
1199 | 1199 | print_entry walenty_filename "bębnić"*) |
1200 | 1200 | |
1201 | 1201 | let print_full_entry filename lex = |
1202 | 1202 | match Xml.parse_file filename with |
1203 | - Xml.Element("TEI", _, | |
1204 | - [Xml.Element("teiHeader",_,_) ; | |
1203 | + Xml.Element("TEI", _, | |
1204 | + [Xml.Element("teiHeader",_,_) ; | |
1205 | 1205 | Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> |
1206 | 1206 | Xlist.iter entries (function |
1207 | - Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: _ :: l) -> | |
1207 | + Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: _ :: l) -> | |
1208 | 1208 | let xml = Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: l) in |
1209 | 1209 | if orth = lex then print_endline (Xml.to_string_fmt xml) |
1210 | 1210 | | _ -> failwith "print_full_entry") |
1211 | 1211 | | _ -> failwith "print_full_entry" |
1212 | 1212 | |
1213 | -(*let _ = | |
1213 | +(*let _ = | |
1214 | 1214 | print_full_entry walenty_filename "bębnić"*) |
1215 | 1215 | |
1216 | 1216 | (*let _ = |
1217 | 1217 | let walenty = load_walenty2 () in |
1218 | 1218 | let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in |
1219 | 1219 | Xlist.iter frames_sem (fun frame -> |
1220 | - print_endline (WalStringOf.frame "bębnić" frame))*) | |
1221 | 1220 | \ No newline at end of file |
1221 | + print_endline (WalStringOf.frame "bębnić" frame))*) | |
... | ... |
swigra/parser/morfeusz2-swi.so
0 → 100755
No preview for this file type
testy/testy podziału na zdania.tab renamed to testy/testy podzialu na zdania.tab
1 | 1 | - Frunę. or |
2 | 2 | - Frunę. Kiszę. or |
3 | 3 | - Frunę - powiedział szpak. or |
4 | -- Frunę! - powiedział szpak. or | |
5 | -- Frunę - powiedział szpak. - Kiszę. or | |
4 | +- Frunę! - powiedział szpak. or -sem | |
5 | +- Frunę - powiedział szpak. - Kiszę. or -amb | |
6 | 6 | - Ten szpak - powiedział - frunie. or - |
7 | 7 | Szpak powiedział: or |
8 | -Szpak powiedział: frunę. or | |
9 | -Szpak powiedział: „Frunę”. or | |
8 | +Szpak powiedział: frunę. or -amb | |
9 | +Szpak powiedział: „Frunę”. or -amb | |
10 | 10 | Szpak powiedział: „Frunę. Kiszę.” or |
11 | -Szpak powiedział: - Frunę. or | |
11 | +Szpak powiedział: - Frunę. or -vis | |
12 | 12 | Szpak powiedział: - Frunę. - i bryknął. or - |
13 | 13 | Szpak powiedział: - Frunę. - i dodał: - Brykam. or - |
14 | 14 | Szpak bada sprawę: - Frunę - powiada - i brykam. or - |
15 | -SZPAK: Frunę. or | |
16 | -SZPAK: - Frunę. or | |
15 | +SZPAK: Frunę. or -popr | |
16 | +SZPAK: - Frunę. or -amb | |
17 | 17 | Frunę. Kiszę. sentence multi |
18 | -W XX w. Szpak frunie. sentence amb multi | |
18 | +W XX w. Szpak frunie. sentence amb multi -amb | |
19 | +Ala ma kota. Ela ma psa. Kaziu ma żółwika. Blablabla... selekcja parserów | |
... | ... |