Commit 762b53f4fdd9ea2b9d3d25f317ef21b573e5f29e
1 parent
dd414d0b
Informacja o błędach w leksykonie LCG
Showing
4 changed files
with
228 additions
and
189 deletions
LCGlexicon/ENIAM_LCGlexiconParser.ml
@@ -22,6 +22,8 @@ open ENIAM_LCGtypes | @@ -22,6 +22,8 @@ open ENIAM_LCGtypes | ||
22 | open ENIAM_LCGlexiconTypes | 22 | open ENIAM_LCGlexiconTypes |
23 | open ENIAMcategoriesPL | 23 | open ENIAMcategoriesPL |
24 | 24 | ||
25 | +exception ParseError of string * string * int | ||
26 | + | ||
25 | let rec get_first n = function | 27 | let rec get_first n = function |
26 | [] -> [] | 28 | [] -> [] |
27 | | s :: l -> if n = 0 then [] else s :: (get_first (n-1) l) | 29 | | s :: l -> if n = 0 then [] else s :: (get_first (n-1) l) |
@@ -35,98 +37,97 @@ let remove_comments line = | @@ -35,98 +37,97 @@ let remove_comments line = | ||
35 | String.sub line 0 n | 37 | String.sub line 0 n |
36 | with Not_found -> line | 38 | with Not_found -> line |
37 | 39 | ||
38 | -let rec parse_phrase_names_rec rev = function | ||
39 | - "@WEIGHTS" :: tokens -> List.rev rev, "@WEIGHTS" :: tokens | ||
40 | - | "@LEXICON" :: tokens -> List.rev rev, "@LEXICON" :: tokens | ||
41 | - | t :: tokens -> parse_phrase_names_rec (t :: rev) tokens | ||
42 | - | [] -> failwith "parse_phrase_names_rec: unexpexted end of input" | ||
43 | - | ||
44 | -let parse_phrase_names = function | ||
45 | - "@PHRASE_NAMES" :: tokens -> parse_phrase_names_rec [] tokens | ||
46 | - | s :: _ -> failwith ("parse_phrase_names: '@PHRASE_NAMES' expected while '" ^ s ^ "' found") | ||
47 | - | [] -> failwith "parse_phrase_names: unexpexted end of input" | ||
48 | - | ||
49 | -let rec parse_weights_rec weights = function | ||
50 | - "@LEXICON" :: tokens -> weights, "@LEXICON" :: tokens | ||
51 | - | w :: "=" :: n :: tokens -> parse_weights_rec (StringMap.add weights w (float_of_string n)) tokens | ||
52 | - | s :: _ -> failwith ("parse_weights_rec: unexpexted token '" ^ s ^ "'") | ||
53 | - | [] -> failwith "parse_weights_rec: unexpexted end of input" | ||
54 | - | ||
55 | -let parse_weights = function | ||
56 | - "@WEIGHTS" :: tokens -> parse_weights_rec StringMap.empty tokens | ||
57 | - | "@LEXICON" :: tokens -> StringMap.empty, "@LEXICON" :: tokens | ||
58 | - | s :: _ -> failwith ("parse_weights: '@WEIGHTS' expected while '" ^ s ^ "' found") | ||
59 | - | [] -> failwith "parse_weights: unexpexted end of input" | ||
60 | - | ||
61 | -let rec split_semic found rev = function | ||
62 | - "lemma" :: "=" :: ";" :: l -> split_semic found (";" :: "=" :: "lemma" :: rev) l | ||
63 | - | ";" :: l -> split_semic (List.rev rev :: found) [] l | ||
64 | - | s :: l -> split_semic found (s :: rev) l | ||
65 | - | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) | 40 | +let rec parse_phrase_names_rec i0 rev = function |
41 | + (i,"@WEIGHTS") :: tokens -> i, List.rev rev, (i,"@WEIGHTS") :: tokens | ||
42 | + | (i,"@LEXICON") :: tokens -> i, List.rev rev, (i,"@LEXICON") :: tokens | ||
43 | + | (i,t) :: tokens -> parse_phrase_names_rec i0 ((i,t) :: rev) tokens | ||
44 | + | [] -> raise (ParseError("parse_phrase_names_rec", "unexpexted end of input", i0)) | ||
45 | + | ||
46 | +let parse_phrase_names i0 = function | ||
47 | + (i,"@PHRASE_NAMES") :: tokens -> parse_phrase_names_rec i [] tokens | ||
48 | + | (i,s) :: _ -> raise (ParseError("parse_phrase_names", "'@PHRASE_NAMES' expected while '" ^ s ^ "' found", i)) | ||
49 | + | [] -> raise (ParseError("parse_phrase_names", "unexpexted end of input", i0)) | ||
50 | + | ||
51 | +let rec parse_weights_rec i0 weights = function | ||
52 | + (i,"@LEXICON") :: tokens -> i, weights, (i,"@LEXICON") :: tokens | ||
53 | + | (_,w) :: (_,"=") :: (i,n) :: tokens -> parse_weights_rec i (StringMap.add weights w (float_of_string n)) tokens | ||
54 | + | (i,s) :: _ -> raise (ParseError("parse_weights_rec", "unexpexted token '" ^ s ^ "'", i)) | ||
55 | + | [] -> raise (ParseError("parse_weights_rec", "unexpexted end of input", i0)) | ||
56 | + | ||
57 | +let parse_weights i0 = function | ||
58 | + (i,"@WEIGHTS") :: tokens -> parse_weights_rec i StringMap.empty tokens | ||
59 | + | (i,"@LEXICON") :: tokens -> i, StringMap.empty, (i,"@LEXICON") :: tokens | ||
60 | + | (i,s) :: _ -> raise (ParseError("parse_weights", "'@WEIGHTS' expected while '" ^ s ^ "' found", i)) | ||
61 | + | [] -> raise (ParseError("parse_weights", "unexpexted end of input", i0)) | ||
62 | + | ||
63 | +let rec split_semic i0 found rev = function | ||
64 | + (i1,"lemma") :: (i2,"=") :: (i3,";") :: l -> split_semic (if rev = [] then i1 else i0) found ((i1,";") :: (i2,"=") :: (i3,"lemma") :: rev) l | ||
65 | + | (i,";") :: l -> split_semic i ((i0, List.rev rev) :: found) [] l | ||
66 | + | (i,s) :: l -> split_semic (if rev = [] then i else i0) found ((i,s) :: rev) l | ||
67 | + | [] -> if rev = [] then List.rev found else List.rev ((i0, List.rev rev) :: found) | ||
66 | 68 | ||
67 | let rec split_colon found rev = function | 69 | let rec split_colon found rev = function |
68 | - "lemma" :: "=" :: ":" :: l -> split_colon found (":" :: "=" :: "lemma" :: rev) l | ||
69 | - | ":" :: l -> split_colon (List.rev rev :: found) [] l | ||
70 | - | s :: l -> split_colon found (s :: rev) l | 70 | + (i1,"lemma") :: (i2,"=") :: (i3,":") :: l -> split_colon found ((i1,":") :: (i2,"=") :: (i3,"lemma") :: rev) l |
71 | + | (_,":") :: l -> split_colon (List.rev rev :: found) [] l | ||
72 | + | (i,s) :: l -> split_colon found ((i,s) :: rev) l | ||
71 | | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) | 73 | | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) |
72 | 74 | ||
73 | -let rec split_comma found rev = function | ||
74 | - "lemma" :: "=" :: "," :: l -> split_comma found ("," :: "=" :: "lemma" :: rev) l | ||
75 | - | "," :: l -> split_comma (List.rev rev :: found) [] l | ||
76 | - | s :: l -> split_comma found (s :: rev) l | ||
77 | - | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) | 75 | +let rec split_comma i0 found rev = function |
76 | + (i1,"lemma") :: (i2,"=") :: (i3,",") :: l -> split_comma (if rev = [] then i1 else i0) found ((i1,",") :: (i2,"=") :: (i3,"lemma") :: rev) l | ||
77 | + | (i,",") :: l -> split_comma i ((i0, List.rev rev) :: found) [] l | ||
78 | + | (i,s) :: l -> split_comma (if rev = [] then i else i0) found ((i,s) :: rev) l | ||
79 | + | [] -> if rev = [] then List.rev found else List.rev ((i0, List.rev rev) :: found) | ||
78 | 80 | ||
79 | -let rec find_right_bracket rev = function | ||
80 | - "]" :: l -> List.rev rev, l | ||
81 | - | s :: l -> find_right_bracket (s :: rev) l | ||
82 | - | [] -> failwith "find_right_bracket" | 81 | +let catch_selector_of_string i proc s = |
82 | + try selector_of_string s | ||
83 | + with _ -> raise (ParseError(proc, "unknown selector: " ^ s, i)) | ||
83 | 84 | ||
84 | let match_selectors = function | 85 | let match_selectors = function |
85 | - s :: l -> (try selector_of_string s,l with _ -> failwith ("match_selectors: " ^ s)) | ||
86 | - | [] -> failwith "match_selectors: empty" | 86 | + i0,(i,s) :: l -> i,catch_selector_of_string i "match_selectors" s,l |
87 | + | i0,[] -> raise (ParseError("match_selectors", "empty", i0)) | ||
87 | 88 | ||
88 | let match_relation = function | 89 | let match_relation = function |
89 | (* cat,"=" :: "=" :: l -> cat,StrictEq,l *) | 90 | (* cat,"=" :: "=" :: l -> cat,StrictEq,l *) |
90 | - | cat,"!" :: "=" :: l -> cat,Neq,l | ||
91 | - | cat,"=" :: l -> cat,Eq,l | ||
92 | - | cat,s :: l -> failwith ("match_relation: " ^ (String.concat " " (s :: l))) | ||
93 | - | cat,[] -> failwith "match_relation: empty" | ||
94 | - | ||
95 | -let rec split_mid rev = function | ||
96 | - [s] -> List.rev (s :: rev) | ||
97 | - | s :: "|" :: l -> split_mid (s :: rev) l | ||
98 | - | [] -> failwith "split_mid: empty" | ||
99 | - | l -> failwith ("split_mid: " ^ (String.concat " " l)) | ||
100 | - | ||
101 | -let rec check_value selector l = | 91 | + | i,cat,(_,"!") :: (_,"=") :: l -> i,cat,Neq,l |
92 | + | i,cat,(_,"=") :: l -> i,cat,Eq,l | ||
93 | + | _,cat,(i,s) :: l -> raise (ParseError("match_relation", "relation symbol not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) | ||
94 | + | i,cat,[] -> raise (ParseError("match_relation", "empty", i)) | ||
95 | + | ||
96 | +let rec split_mid i0 rev = function | ||
97 | + [i,s] -> List.rev ((i,s) :: rev) | ||
98 | + | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l | ||
99 | + | [] -> raise (ParseError("split_mid", "empty", i0)) | ||
100 | + | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i)) | ||
101 | + | ||
102 | +let rec check_value i0 selector l = | ||
102 | let vals = try SelectorMap.find selector_values selector | 103 | let vals = try SelectorMap.find selector_values selector |
103 | - with Not_found -> failwith ("check_value: invalid selector " ^ string_of_selector selector) in | 104 | + with Not_found -> raise (ParseError("check_value", "invalid selector: " ^ string_of_selector selector, i0)) in |
104 | if vals = [] then () else | 105 | if vals = [] then () else |
105 | - Xlist.iter l (fun s -> | 106 | + Xlist.iter l (fun (i,s) -> |
106 | if not (Xlist.mem vals s) then | 107 | if not (Xlist.mem vals s) then |
107 | - failwith ("check_value: invalid selector " ^ string_of_selector selector ^ "=" ^ s)); | ||
108 | - l | 108 | + raise (ParseError("check_value", "invalid selector: " ^ string_of_selector selector ^ "=" ^ s, i))); |
109 | + Xlist.map l snd | ||
109 | 110 | ||
110 | let match_value = function | 111 | let match_value = function |
111 | - cat,rel,[s] -> cat,rel,[s] | ||
112 | - | cat,rel,[] -> failwith "match_value: empty" | ||
113 | - | cat,rel,l -> cat,rel, check_value cat (split_mid [] l) | 112 | + i,cat,rel,[s] -> cat,rel, check_value i cat [s] |
113 | + | i,cat,rel,[] -> raise (ParseError("match_value", "empty", i)) | ||
114 | + | i,cat,rel,l -> cat,rel, check_value i cat (split_mid i [] l) | ||
114 | 115 | ||
115 | -let parse_selectors l = | 116 | +let parse_selectors i0 l = |
116 | (* print_endline s; *) | 117 | (* print_endline s; *) |
117 | (* let l = Xlist.map (Str.full_split (Str.regexp "|\\|,\\|=\\|!") s) (function | 118 | (* let l = Xlist.map (Str.full_split (Str.regexp "|\\|,\\|=\\|!") s) (function |
118 | Str.Text s -> s | 119 | Str.Text s -> s |
119 | | Str.Delim s -> s) in *) | 120 | | Str.Delim s -> s) in *) |
120 | - let ll = split_comma [] [] l in | 121 | + let ll = split_comma i0 [] [] l in |
121 | let l = Xlist.rev_map ll match_selectors in | 122 | let l = Xlist.rev_map ll match_selectors in |
122 | let l = Xlist.rev_map l match_relation in | 123 | let l = Xlist.rev_map l match_relation in |
123 | let l = Xlist.rev_map l match_value in | 124 | let l = Xlist.rev_map l match_value in |
124 | l | 125 | l |
125 | 126 | ||
126 | let manage_lemmata = function | 127 | let manage_lemmata = function |
127 | - "lemma" :: "=" :: ":" :: "," :: tokens -> ["lemma";"=";":";","],tokens | ||
128 | - | "lemma" :: "=" :: ":" :: s :: "," :: tokens -> ["lemma";"=";":"^s;","],tokens | ||
129 | - | "lemma" :: "=" :: "<" :: "/" :: s :: "," :: tokens -> ["lemma";"=";"</"^s;","],tokens | 128 | + (i1,"lemma") :: (i2,"=") :: (i3,":") :: (i4,",") :: tokens -> [i1,"lemma";i2,"=";i3,":";i4,","],tokens |
129 | + | (i1,"lemma") :: (i2,"=") :: (i3,":") :: (i4,s) :: (i5,",") :: tokens -> [i1,"lemma";i2,"=";i3,":"^s;i5,","],tokens | ||
130 | + | (i1,"lemma") :: (i2,"=") :: (i3,"<") :: (i4,"/") :: (i5,s) :: (i6,",") :: tokens -> [i1,"lemma";i2,"=";i3,"</"^s;i6,","],tokens | ||
130 | | tokens -> [],tokens | 131 | | tokens -> [],tokens |
131 | 132 | ||
132 | 133 | ||
@@ -138,81 +139,104 @@ type syntax = | @@ -138,81 +139,104 @@ type syntax = | ||
138 | | E of (direction * grammar_symbol) list | 139 | | E of (direction * grammar_symbol) list |
139 | 140 | ||
140 | let make_atoms phrase_names = | 141 | let make_atoms phrase_names = |
141 | - SelectorMap.fold selector_values (StringSet.of_list phrase_names) (fun atoms _ l -> | 142 | + SelectorMap.fold selector_values (StringSet.of_list (Xlist.rev_map phrase_names snd)) (fun atoms _ l -> |
142 | Xlist.fold l atoms StringSet.add) | 143 | Xlist.fold l atoms StringSet.add) |
143 | 144 | ||
145 | +let rec find_right_bracket i0 rev = function | ||
146 | + (_,"]") :: l -> List.rev rev, l | ||
147 | + | (i,s) :: l -> find_right_bracket i ((i,s) :: rev) l | ||
148 | + | [] -> raise (ParseError("find_right_bracket", "empty", i0)) | ||
149 | + | ||
144 | let operators = StringSet.of_list [ | 150 | let operators = StringSet.of_list [ |
145 | "*"; "+"; "/"; "|"; "\\"; "("; ")"; ","; "{"; "}"; "?"] | 151 | "*"; "+"; "/"; "|"; "\\"; "("; ")"; ","; "{"; "}"; "?"] |
146 | 152 | ||
147 | let find_internal_grammar_symbols atoms = function | 153 | let find_internal_grammar_symbols atoms = function |
148 | - | "T" -> B Top | ||
149 | - | "1" -> C One | ||
150 | - | "schema" -> D(Both,Tensor[AVar "schema"]) | ||
151 | - | "adjuncts" -> D(Both,Tensor[AVar "adjuncts"]) | ||
152 | - | s -> if StringSet.mem selector_names s then B (AVar s) else | ||
153 | - if StringSet.mem atoms s then B (Atom s) else | ||
154 | - if StringSet.mem operators s then A s else | ||
155 | - failwith ("find_internal_grammar_symbols: unknown symbol " ^ s) | 154 | + | i,"T" -> i,B Top |
155 | + | i,"1" -> i,C One | ||
156 | + | i,"schema" -> i,D(Both,Tensor[AVar "schema"]) | ||
157 | + | i,"adjuncts" -> i,D(Both,Tensor[AVar "adjuncts"]) | ||
158 | + | i,s -> if StringSet.mem selector_names s then i,B (AVar s) else | ||
159 | + if StringSet.mem atoms s then i,B (Atom s) else | ||
160 | + if StringSet.mem operators s then i,A s else | ||
161 | + raise (ParseError("find_internal_grammar_symbols", "unknown symbol " ^ s, i)) | ||
162 | + | ||
163 | +let rec find_tensor2 rev = function | ||
164 | + (_,B s1) :: (_,A "*") :: (i,B s2) :: l -> find_tensor2 (s1 :: rev) ((i,B s2) :: l) | ||
165 | + | (_,B s1) :: l -> List.rev (s1 :: rev), l | ||
166 | + | (i,t) :: l -> raise (ParseError("find_tensor2", "", i)) | ||
167 | + | [] -> failwith "find_tensor2" | ||
156 | 168 | ||
157 | let rec find_tensor = function | 169 | let rec find_tensor = function |
158 | - B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: A "*" :: B s8 :: l -> failwith "find_tensor 1" | 170 | + (* B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: A "*" :: B s8 :: l -> failwith "find_tensor 1" |
159 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6;s7]) :: find_tensor l | 171 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6;s7]) :: find_tensor l |
160 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6]) :: find_tensor l | 172 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6]) :: find_tensor l |
161 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: l -> C (Tensor[s1;s2;s3;s4;s5]) :: find_tensor l | 173 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: l -> C (Tensor[s1;s2;s3;s4;s5]) :: find_tensor l |
162 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: l -> C (Tensor[s1;s2;s3;s4]) :: find_tensor l | 174 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: l -> C (Tensor[s1;s2;s3;s4]) :: find_tensor l |
163 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: l -> C (Tensor[s1;s2;s3]) :: find_tensor l | 175 | | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: l -> C (Tensor[s1;s2;s3]) :: find_tensor l |
164 | - | B s1 :: A "*" :: B s2 :: l -> C (Tensor[s1;s2]) :: find_tensor l | ||
165 | - | B s1 :: l -> C (Tensor[s1]) :: find_tensor l | ||
166 | - | A "*" :: _ -> failwith "find_tensor 2: unexpected '*'" | 176 | + | B s1 :: A "*" :: B s2 :: l -> C (Tensor[s1;s2]) :: find_tensor l *) |
177 | + | (i,B s1) :: l -> let sl,l = find_tensor2 [] ((i,B s1) :: l) in (i,C (Tensor sl)) :: find_tensor l | ||
178 | + | (i,A "*") :: _ -> raise (ParseError("find_tensor", "unexpected '*'", i)) | ||
167 | | t :: l -> t :: find_tensor l | 179 | | t :: l -> t :: find_tensor l |
168 | | [] -> [] | 180 | | [] -> [] |
169 | 181 | ||
182 | +let rec find_plus2 rev = function | ||
183 | + (_,C s1) :: (_,A "+") :: (i,C s2) :: l -> find_plus2 (s1 :: rev) ((i,C s2) :: l) | ||
184 | + | (_,C s1) :: l -> List.rev (s1 :: rev), l | ||
185 | + | (i,t) :: l -> raise (ParseError("find_plus2", "", i)) | ||
186 | + | [] -> failwith "find_plus2" | ||
187 | + | ||
170 | let rec find_plus = function | 188 | let rec find_plus = function |
171 | - C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: A "+" :: C s7 :: l -> failwith "find_plus 1" | 189 | + (* C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: A "+" :: C s7 :: l -> failwith "find_plus 1" |
172 | | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: l -> C (Plus[s1;s2;s3;s4;s5;s6]) :: find_plus l | 190 | | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: l -> C (Plus[s1;s2;s3;s4;s5;s6]) :: find_plus l |
173 | | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: l -> C (Plus[s1;s2;s3;s4;s5]) :: find_plus l | 191 | | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: l -> C (Plus[s1;s2;s3;s4;s5]) :: find_plus l |
174 | | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: l -> C (Plus[s1;s2;s3;s4]) :: find_plus l | 192 | | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: l -> C (Plus[s1;s2;s3;s4]) :: find_plus l |
175 | - | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: l -> C (Plus[s1;s2;s3]) :: find_plus l | ||
176 | - | C s1 :: A "+" :: C s2 :: l -> C (Plus[s1;s2]) :: find_plus l | ||
177 | - | A "+" :: _ -> failwith "find_plus 2: unexpected '+'" | 193 | + | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: l -> C (Plus[s1;s2;s3]) :: find_plus l *) |
194 | + | (i1,C s1) :: (i2,A "+") :: (i3,C s2) :: l -> let sl,l = find_plus2 [] ((i1,C s1) :: (i2,A "+") :: (i3,C s2) :: l) in (i1,C (Plus sl)) :: find_plus l | ||
195 | + | (i,A "+") :: _ -> raise (ParseError("find_plus 2", "unexpected '+'", i)) | ||
178 | | t :: l -> t :: find_plus l | 196 | | t :: l -> t :: find_plus l |
179 | | [] -> [] | 197 | | [] -> [] |
180 | 198 | ||
181 | let rec find_paren = function | 199 | let rec find_paren = function |
182 | - A "(" :: C s :: A ")" :: l -> C s :: find_paren l | ||
183 | - | s :: l -> s :: find_paren l | 200 | + (_,A "(") :: (i,C s) :: (_,A ")") :: l -> (i,C s) :: find_paren l |
201 | + | (i,s) :: l -> (i,s) :: find_paren l | ||
184 | | [] -> [] | 202 | | [] -> [] |
185 | 203 | ||
186 | let rec find_imp = function | 204 | let rec find_imp = function |
187 | - | C s1 :: A "/" :: C s2 :: l -> C (Imp(s1,Forward,s2)) :: find_imp l | ||
188 | - | C s1 :: A "|" :: C s2 :: l -> C (Imp(s1,Both,s2)) :: find_imp l | ||
189 | - | C s1 :: A "\\" :: C s2 :: l -> C (Imp(s1,Backward,s2)) :: find_imp l | ||
190 | - | s :: l -> s :: find_imp l | 205 | + | (i,C s1) :: (_,A "/") :: (_,C s2) :: l -> (i,C (Imp(s1,Forward,s2))) :: find_imp l |
206 | + | (i,C s1) :: (_,A "|") :: (_,C s2) :: l -> (i,C (Imp(s1,Both,s2))) :: find_imp l | ||
207 | + | (i,C s1) :: (_,A "\\") :: (_,C s2) :: l -> (i,C (Imp(s1,Backward,s2))) :: find_imp l | ||
208 | + | (i,s) :: l -> (i,s) :: find_imp l | ||
191 | | [] -> [] | 209 | | [] -> [] |
192 | 210 | ||
193 | let rec find_maybe = function | 211 | let rec find_maybe = function |
194 | - | A "?" :: C s2 :: l -> C (Maybe s2) :: find_maybe l | ||
195 | - | A "?" :: _ -> failwith "find_maybe 1: unexpected '?'" | ||
196 | - | s :: l -> s :: find_maybe l | 212 | + | (i,A "?") :: (_,C s2) :: l -> (i,C (Maybe s2)) :: find_maybe l |
213 | + | (i,A "?") :: _ -> raise (ParseError("find_maybe 1", "unexpected '?'", i)) | ||
214 | + | (i,s) :: l -> (i,s) :: find_maybe l | ||
197 | | [] -> [] | 215 | | [] -> [] |
198 | 216 | ||
199 | let rec find_mult_imp = function | 217 | let rec find_mult_imp = function |
200 | - | A "{" :: A "/" :: C s2 :: l -> A "{" :: D (Forward,s2) :: find_mult_imp l | ||
201 | - | A "{" :: A "|" :: C s2 :: l -> A "{" :: D (Both,s2) :: find_mult_imp l | ||
202 | - | A "{" :: A "\\" :: C s2 :: l -> A "{" :: D (Backward,s2) :: find_mult_imp l | ||
203 | - | A "," :: A "/" :: C s2 :: l -> A "," :: D (Forward,s2) :: find_mult_imp l | ||
204 | - | A "," :: A "|" :: C s2 :: l -> A "," :: D (Both,s2) :: find_mult_imp l | ||
205 | - | A "," :: A "\\" :: C s2 :: l -> A "," :: D (Backward,s2) :: find_mult_imp l | ||
206 | - | A "/" :: _ -> failwith "find_mult_imp 1: unexpected '/'" | ||
207 | - | A "|" :: _ -> failwith "find_mult_imp 2: unexpected '|'" | ||
208 | - | A "\\" :: _ -> failwith "find_mult_imp 3: unexpected '\\'" | ||
209 | - | A "(" :: _ -> failwith "find_mult_imp 4: unexpected '('" | ||
210 | - | A ")" :: _ -> failwith "find_mult_imp 5: unexpected ')'" | ||
211 | - | s :: l -> s :: find_mult_imp l | 218 | + | (i1,A "{") :: (i2,A "/") :: (_,C s2) :: l -> (i1,A "{") :: (i2,D (Forward,s2)) :: find_mult_imp l |
219 | + | (i1,A "{") :: (i2,A "|") :: (_,C s2) :: l -> (i1,A "{") :: (i2,D (Both,s2)) :: find_mult_imp l | ||
220 | + | (i1,A "{") :: (i2,A "\\") :: (_,C s2) :: l -> (i1,A "{") :: (i2,D (Backward,s2)) :: find_mult_imp l | ||
221 | + | (i1,A ",") :: (i2,A "/") :: (_,C s2) :: l -> (i1,A ",") :: (i2,D (Forward,s2)) :: find_mult_imp l | ||
222 | + | (i1,A ",") :: (i2,A "|") :: (_,C s2) :: l -> (i1,A ",") :: (i2,D (Both,s2)) :: find_mult_imp l | ||
223 | + | (i1,A ",") :: (i2,A "\\") :: (_,C s2) :: l -> (i1,A ",") :: (i2,D (Backward,s2)) :: find_mult_imp l | ||
224 | + | (i,A "/") :: _ -> raise (ParseError("find_mult_imp 1", "unexpected '/'", i)) | ||
225 | + | (i,A "|") :: _ -> raise (ParseError("find_mult_imp 2", "unexpected '|'", i)) | ||
226 | + | (i,A "\\") :: _ -> raise (ParseError("find_mult_imp 3", "unexpected '\\'", i)) | ||
227 | + | (i,A "(") :: _ -> raise (ParseError("find_mult_imp 4", "unexpected '('", i)) | ||
228 | + | (i,A ")") :: _ -> raise (ParseError("find_mult_imp 5", "unexpected ')'", i)) | ||
229 | + | (i,s) :: l -> (i,s) :: find_mult_imp l | ||
212 | | [] -> [] | 230 | | [] -> [] |
213 | 231 | ||
232 | +let rec find_mult2 rev = function | ||
233 | + (_,D(s1,t1)) :: (_,A ",") :: (i,D(s2,t2)) :: l -> find_mult2 ((s1,t1) :: rev) ((i,D(s2,t2)) :: l) | ||
234 | + | (_,D(s1,t1)) :: (_,A "}") :: l -> List.rev ((s1,t1) :: rev), l | ||
235 | + | (i,t) :: l -> raise (ParseError("find_mult2", "", i)) | ||
236 | + | [] -> failwith "find_mult2" | ||
237 | + | ||
214 | let rec find_mult = function | 238 | let rec find_mult = function |
215 | - A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }" | 239 | + (* A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }" |
216 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l | 240 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l |
217 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l | 241 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l |
218 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l | 242 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l |
@@ -221,20 +245,20 @@ let rec find_mult = function | @@ -221,20 +245,20 @@ let rec find_mult = function | ||
221 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l | 245 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l |
222 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4] :: find_mult l | 246 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4] :: find_mult l |
223 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3] :: find_mult l | 247 | | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3] :: find_mult l |
224 | - | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "}" :: l -> E[s1,t1;s2,t2] :: find_mult l | ||
225 | - | A "{" :: D(s1,t1) :: A "}" :: l -> E[s1,t1] :: find_mult l | ||
226 | - | A "{" :: _ -> failwith "find_mult 2: unexpected '{'" | ||
227 | - | A "}" :: _ -> failwith "find_mult 3: unexpected '}'" | ||
228 | - | A "," :: _ -> failwith "find_mult 4: unexpected ','" | 248 | + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "}" :: l -> E[s1,t1;s2,t2] :: find_mult l *) |
249 | + | (_,A "{") :: (i,D(s1,t1)) :: l -> let sl,l = find_mult2 [] ((i,D(s1,t1)) :: l) in (i,E sl) :: find_mult l | ||
250 | + | (i,A "{") :: _ -> raise (ParseError("find_mult 2", "unexpected '{'", i)) | ||
251 | + | (i,A "}") :: _ -> raise (ParseError("find_mult 3", "unexpected '}'", i)) | ||
252 | + | (i,A ",") :: _ -> raise (ParseError("find_mult 4", "unexpected ','", i)) | ||
229 | | t :: l -> t :: find_mult l | 253 | | t :: l -> t :: find_mult l |
230 | | [] -> [] | 254 | | [] -> [] |
231 | 255 | ||
232 | -let rec apply_mult = function | ||
233 | - C s :: E t :: l -> apply_mult (C (ImpSet(s,t)) :: l) | ||
234 | - | [C s] -> C s | ||
235 | - | _ -> failwith "apply_mult" | 256 | +let rec apply_mult i0 = function |
257 | + (i1,C s) :: (i2,E t) :: l -> apply_mult i2 ((i1,C (ImpSet(s,t))) :: l) | ||
258 | + | [i,C s] -> i,C s | ||
259 | + | _ -> raise (ParseError("apply_mult","",i0)) | ||
236 | 260 | ||
237 | -let parse_syntax atoms l = | 261 | +let parse_syntax i0 atoms l = |
238 | (* print_endline s; *) | 262 | (* print_endline s; *) |
239 | (* let l = Xlist.map (Str.full_split (Str.regexp "?\\|}\\|{\\|,\\|*\\|/\\|+\\|)\\|(\\||\\|\\") s) (function | 263 | (* let l = Xlist.map (Str.full_split (Str.regexp "?\\|}\\|{\\|,\\|*\\|/\\|+\\|)\\|(\\||\\|\\") s) (function |
240 | Str.Text s -> s | 264 | Str.Text s -> s |
@@ -252,107 +276,122 @@ let parse_syntax atoms l = | @@ -252,107 +276,122 @@ let parse_syntax atoms l = | ||
252 | let l = find_paren l in | 276 | let l = find_paren l in |
253 | let l = find_mult_imp l in | 277 | let l = find_mult_imp l in |
254 | let l = find_mult l in | 278 | let l = find_mult l in |
255 | - match apply_mult l with | ||
256 | - C s -> s | ||
257 | - | _ -> failwith "parse_syntax" | 279 | + match apply_mult i0 l with |
280 | + _,C s -> s | ||
281 | + | i,_ -> raise (ParseError("parse_syntax","",i)) | ||
258 | 282 | ||
259 | -let check_quant_range cat l = | 283 | +let check_quant_range i0 cat l = |
260 | let set = StringSet.of_list ( | 284 | let set = StringSet.of_list ( |
261 | try SelectorMap.find selector_values cat | 285 | try SelectorMap.find selector_values cat |
262 | - with Not_found -> failwith ("check_quant_range: " ^ string_of_selector cat)) in | 286 | + with Not_found -> raise (ParseError("check_quant_range", string_of_selector cat, i0))) in |
263 | if StringSet.is_empty set then () else | 287 | if StringSet.is_empty set then () else |
264 | Xlist.iter l (fun v -> | 288 | Xlist.iter l (fun v -> |
265 | if not (StringSet.mem set v) then | 289 | if not (StringSet.mem set v) then |
266 | - failwith ("check_quant_range: " ^ string_of_selector cat ^ "=" ^ v)) | ||
267 | - | ||
268 | -let parse_quant_range = function | ||
269 | - _,["0"] -> Zero | ||
270 | - | _,["T"] -> Top | ||
271 | - | _,["all_numbers"] -> ENIAM_LCGrenderer.make_quant_restriction all_numbers | ||
272 | - | _,["all_cases"] -> ENIAM_LCGrenderer.make_quant_restriction all_cases | ||
273 | - | _,["all_genders"] -> ENIAM_LCGrenderer.make_quant_restriction all_genders | ||
274 | - | _,["all_persons"] -> ENIAM_LCGrenderer.make_quant_restriction all_persons | 290 | + raise (ParseError("check_quant_range", string_of_selector cat ^ "=" ^ v, i0))) |
291 | + | ||
292 | +let parse_quant_range i0 = function | ||
293 | + _,[_,"0"] -> Zero | ||
294 | + | _,[_,"T"] -> Top | ||
295 | + | _,[_,"all_numbers"] -> ENIAM_LCGrenderer.make_quant_restriction all_numbers | ||
296 | + | _,[_,"all_cases"] -> ENIAM_LCGrenderer.make_quant_restriction all_cases | ||
297 | + | _,[_,"all_genders"] -> ENIAM_LCGrenderer.make_quant_restriction all_genders | ||
298 | + | _,[_,"all_persons"] -> ENIAM_LCGrenderer.make_quant_restriction all_persons | ||
275 | | cat,l -> | 299 | | cat,l -> |
276 | - let l = Xstring.split "&" (String.concat "" l) in | ||
277 | - check_quant_range cat l; | 300 | + let l = Xstring.split "&" (String.concat "" (Xlist.map l snd)) in |
301 | + check_quant_range i0 cat l; | ||
278 | ENIAM_LCGrenderer.make_quant_restriction l | 302 | ENIAM_LCGrenderer.make_quant_restriction l |
279 | 303 | ||
280 | -let parse_quantifiers tokens = | ||
281 | - Xlist.map (split_comma [] [] tokens) (function | ||
282 | - cat :: "=" :: tokens -> | ||
283 | - let cat = selector_of_string cat in | ||
284 | - cat, parse_quant_range (cat,tokens) | ||
285 | - | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'") | ||
286 | - | [] -> failwith "parse_quantifiers: no token") | ||
287 | - | ||
288 | -let parse_raised tokens = | ||
289 | - Xlist.map (split_comma [] [] tokens) (function | ||
290 | - [cat] -> selector_of_string cat | ||
291 | - | t :: _ -> failwith ("parse_raised: unexpected token '" ^ t ^ "'") | ||
292 | - | [] -> failwith "parse_raised: no token") | 304 | +let parse_quantifiers i0 tokens = |
305 | + Xlist.map (split_comma i0 [] [] tokens) (function | ||
306 | + _,(i,cat) :: (_,"=") :: [] -> raise (ParseError("parse_quantifiers", "empty range", i)) | ||
307 | + | _,(i,cat) :: (_,"=") :: tokens -> | ||
308 | + let cat = catch_selector_of_string i "parse_quantifiers" cat in | ||
309 | + cat, parse_quant_range i (cat,tokens) | ||
310 | + | _,(i,t) :: _ -> raise (ParseError("parse_quantifiers", "unexpected token '" ^ t ^ "'", i)) | ||
311 | + | i0,[] -> raise (ParseError("parse_quantifiers", "no token", i0))) | ||
312 | + | ||
313 | +let parse_raised i0 tokens = | ||
314 | + Xlist.map (split_comma i0 [] [] tokens) (function | ||
315 | + _,[i,cat] -> catch_selector_of_string i "parse_raised" cat | ||
316 | + | _,(i,t) :: _ -> raise (ParseError("parse_raised", "unexpected token '" ^ t ^ "'", i)) | ||
317 | + | i0,[] -> raise (ParseError("parse_raised", "no token", i0))) | ||
293 | 318 | ||
294 | let rec find_syntax_end rev = function | 319 | let rec find_syntax_end rev = function |
295 | - ("BRACKET" :: _) as tokens -> List.rev rev, tokens | ||
296 | - | ("QUANT" :: "[" :: _) as tokens -> List.rev rev, tokens | ||
297 | - | ("RAISED" :: "[" :: _) as tokens -> List.rev rev, tokens | ||
298 | - | ("SEM" :: "[" :: _) as tokens -> List.rev rev, tokens | 320 | + ((_,"BRACKET") :: _) as tokens -> List.rev rev, tokens |
321 | + | ((_,"QUANT") :: (_,"[") :: _) as tokens -> List.rev rev, tokens | ||
322 | + | ((_,"RAISED") :: (_,"[") :: _) as tokens -> List.rev rev, tokens | ||
323 | + | ((_,"SEM") :: (_,"[") :: _) as tokens -> List.rev rev, tokens | ||
299 | | s :: tokens -> find_syntax_end (s :: rev) tokens | 324 | | s :: tokens -> find_syntax_end (s :: rev) tokens |
300 | | [] -> List.rev rev, [] | 325 | | [] -> List.rev rev, [] |
301 | 326 | ||
302 | -let parse_sem_term sem_term = String.concat "" sem_term | 327 | +let parse_sem_term sem_term = String.concat "" (Xlist.map sem_term snd) |
303 | 328 | ||
304 | let rec parse_rule atoms = function | 329 | let rec parse_rule atoms = function |
305 | - "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens | ||
306 | - | "QUANT" :: "[" :: tokens -> | ||
307 | - let quant,tokens = find_right_bracket [] tokens in | ||
308 | - Quant(parse_quantifiers quant) :: parse_rule atoms tokens | ||
309 | - | "RAISED" :: "[" :: tokens -> | ||
310 | - let raised,tokens = find_right_bracket [] tokens in | ||
311 | - Raised(parse_raised raised) :: parse_rule atoms tokens | ||
312 | - | "SEM" :: "[" :: tokens -> | ||
313 | - let sem_term,tokens = find_right_bracket [] tokens in | 330 | + (_,"BRACKET") :: tokens -> Bracket :: parse_rule atoms tokens |
331 | + | (_,"QUANT") :: (i,"[") :: tokens -> | ||
332 | + let quant,tokens = find_right_bracket i [] tokens in | ||
333 | + Quant(parse_quantifiers i quant) :: parse_rule atoms tokens | ||
334 | + | (_,"RAISED") :: (i,"[") :: tokens -> | ||
335 | + let raised,tokens = find_right_bracket i [] tokens in | ||
336 | + Raised(parse_raised i raised) :: parse_rule atoms tokens | ||
337 | + | (_,"SEM") :: (i,"[") :: tokens -> | ||
338 | + let sem_term,tokens = find_right_bracket i [] tokens in | ||
314 | Sem(parse_sem_term sem_term) :: parse_rule atoms tokens | 339 | Sem(parse_sem_term sem_term) :: parse_rule atoms tokens |
315 | | [] -> [] | 340 | | [] -> [] |
316 | | tokens -> | 341 | | tokens -> |
342 | + let i = fst (List.hd tokens) in | ||
317 | let syntax,tokens = find_syntax_end [] tokens in | 343 | let syntax,tokens = find_syntax_end [] tokens in |
318 | (* print_prefix 100 tokens; *) | 344 | (* print_prefix 100 tokens; *) |
319 | - Syntax(parse_syntax atoms syntax) :: parse_rule atoms tokens | 345 | + Syntax(parse_syntax i atoms syntax) :: parse_rule atoms tokens |
320 | 346 | ||
321 | -let parse_entry atoms weights tokens = | 347 | +let parse_entry i0 atoms weights tokens = |
322 | let prefix,tokens = manage_lemmata tokens in | 348 | let prefix,tokens = manage_lemmata tokens in |
323 | let selectors, rule, weight = | 349 | let selectors, rule, weight = |
324 | match split_colon [] [] tokens with | 350 | match split_colon [] [] tokens with |
325 | [selectors;rule] -> selectors, rule, 0. | 351 | [selectors;rule] -> selectors, rule, 0. |
326 | - | [selectors;rule;[weight]] -> selectors, rule, | 352 | + | [selectors;rule;[i,weight]] -> selectors, rule, |
327 | (try StringMap.find weights weight | 353 | (try StringMap.find weights weight |
328 | - with Not_found -> failwith ("parse_entry: unknown weight symbol '" ^ weight ^ "'")) | ||
329 | - | _ -> failwith ("parse_entry: invalid number of ':' in entry " ^ (String.concat " " tokens)) in | ||
330 | - let selectors = parse_selectors (prefix @ selectors) in | 354 | + with Not_found -> raise (ParseError("parse_entry", "unknown weight symbol '" ^ weight ^ "'", i))) |
355 | + | _ -> raise (ParseError("parse_entry", "invalid number of ':' in entry " ^ String.concat " " (Xlist.map tokens snd), i0)) in | ||
356 | + let selectors = parse_selectors i0 (prefix @ selectors) in | ||
331 | let rule = parse_rule atoms rule in | 357 | let rule = parse_rule atoms rule in |
332 | selectors, rule, weight | 358 | selectors, rule, weight |
333 | 359 | ||
334 | -let parse_lexicon atoms weights = function | ||
335 | - "@LEXICON" :: tokens -> | ||
336 | - let entries = split_semic [] [] tokens in | ||
337 | - List.rev (Xlist.rev_map entries (parse_entry atoms weights)) | ||
338 | - | s :: _ -> failwith ("parse_lexicon: '@LEXICON' expected while '" ^ s ^ "' found") | ||
339 | - | [] -> failwith "parse_lexicon: unexpexted end of input" | 360 | +let string_of_parse_error proc s i line = |
361 | + Printf.sprintf "LCG lexicon error in line %d: %s\n%s: %s" i line proc s | ||
362 | + | ||
363 | +let parse_lexicon i0 a atoms weights = function | ||
364 | + (i,"@LEXICON") :: tokens -> | ||
365 | + let entries = split_semic i [] [] tokens in | ||
366 | + Xlist.fold entries ([],true) (fun (entries,is_correct) (i,entry) -> | ||
367 | + try (parse_entry i atoms weights entry) :: entries, is_correct | ||
368 | + with ParseError(proc,s,i) -> | ||
369 | + print_endline (string_of_parse_error proc s i a.(i-1)); | ||
370 | + entries,false) | ||
371 | + | (i,s) :: _ -> raise (ParseError("parse_lexicon", "'@LEXICON' expected while '" ^ s ^ "' found", i)) | ||
372 | + | [] -> raise (ParseError("parse_lexicon", "unexpexted end of input", i0)) | ||
340 | 373 | ||
341 | let load_lexicon filename = | 374 | let load_lexicon filename = |
342 | - let lines = File.load_lines filename in | ||
343 | - let lines = List.rev (Xlist.rev_map lines remove_comments) in | ||
344 | - let tokens = List.flatten (Xlist.rev_map lines (fun line -> | 375 | + let lines = Xstring.split "\n" (File.load_file filename) in |
376 | + let a = Array.of_list lines in | ||
377 | + let lines,no_lines = Xlist.fold lines ([],1) (fun (lines,i) line -> (i,line) :: lines, i+1) in | ||
378 | + let lines = Xlist.rev_map lines (fun (i,line) -> i, remove_comments line) in | ||
379 | + let tokens = List.flatten (Xlist.rev_map lines (fun (i,line) -> | ||
345 | Xlist.rev_map (Str.full_split | 380 | Xlist.rev_map (Str.full_split |
346 | (Str.regexp "\\]\\| \\|\t\\|\r\\|\\?\\|:\\|;\\|&\\|!\\|=\\|}\\|{\\|,\\|\\*\\|/\\|\\+\\|)\\|(\\||\\|\\[\\|\\") line) (function | 381 | (Str.regexp "\\]\\| \\|\t\\|\r\\|\\?\\|:\\|;\\|&\\|!\\|=\\|}\\|{\\|,\\|\\*\\|/\\|\\+\\|)\\|(\\||\\|\\[\\|\\") line) (function |
347 | - Str.Text s -> s | ||
348 | - | Str.Delim s -> s))) in | 382 | + Str.Text s -> i,s |
383 | + | Str.Delim s -> i,s))) in | ||
349 | let tokens = Xlist.fold tokens [] (fun tokens -> function | 384 | let tokens = Xlist.fold tokens [] (fun tokens -> function |
350 | - " " -> tokens | ||
351 | - | "\t" -> tokens | ||
352 | - | "\r" -> tokens | ||
353 | - | t -> t :: tokens) in | ||
354 | - let phrase_names,tokens = parse_phrase_names tokens in | ||
355 | - let atoms = make_atoms phrase_names in | ||
356 | - let weights,tokens = parse_weights tokens in | ||
357 | - let lexicon = parse_lexicon atoms weights tokens in | ||
358 | - lexicon | 385 | + _," " -> tokens |
386 | + | _,"\t" -> tokens | ||
387 | + | _,"\r" -> tokens | ||
388 | + | i,t -> (i,t) :: tokens) in | ||
389 | + try | ||
390 | + let i,phrase_names,tokens = parse_phrase_names 1 tokens in | ||
391 | + let atoms = make_atoms phrase_names in | ||
392 | + let i,weights,tokens = parse_weights i tokens in | ||
393 | + let lexicon,is_correct = parse_lexicon i a atoms weights tokens in | ||
394 | + if is_correct then List.rev lexicon else exit 0 | ||
395 | + with ParseError(proc,s,i) -> | ||
396 | + print_endline (string_of_parse_error proc s i a.(i-1)); | ||
397 | + exit 0 |
LCGlexicon/ENIAMcategoriesPL.ml
@@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ | @@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ | ||
37 | "match-result";"url";"email";"obj-id";"building-number";"adj";"adjc";"adjp";"adja"; | 37 | "match-result";"url";"email";"obj-id";"building-number";"adj";"adjc";"adjp";"adja"; |
38 | "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; | 38 | "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; |
39 | "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; | 39 | "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; |
40 | - "sinterj";"burk";"interp";"xxx";"unk";"html-tag"]; | 40 | + "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"]; |
41 | Pos2, []; | 41 | Pos2, []; |
42 | Cat, []; | 42 | Cat, []; |
43 | Proj, []; | 43 | Proj, []; |
LCGlexicon/test.ml
@@ -97,7 +97,7 @@ let create_chart valence tokens last = | @@ -97,7 +97,7 @@ let create_chart valence tokens last = | ||
97 | let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) -> | 97 | let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) -> |
98 | ENIAM_LCGrenderer.reset_variable_names (); | 98 | ENIAM_LCGrenderer.reset_variable_names (); |
99 | ENIAM_LCGrenderer.add_variable_numbers (); | 99 | ENIAM_LCGrenderer.add_variable_numbers (); |
100 | - let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in | 100 | + let cats = ENIAMcategoriesPL.clarify_categories proper "X" ["X"] (lemma,pos,interp) in |
101 | let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence [] in | 101 | let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence [] in |
102 | ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in | 102 | ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in |
103 | chart | 103 | chart |
LCGparser/ENIAM_LCGlatexOf.ml
@@ -216,11 +216,11 @@ let chart page text_fragments g = | @@ -216,11 +216,11 @@ let chart page text_fragments g = | ||
216 | "\\end{longtable}" | 216 | "\\end{longtable}" |
217 | 217 | ||
218 | let chart2 page text_fragments g = | 218 | let chart2 page text_fragments g = |
219 | - let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in | ||
220 | - "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^ | 219 | + let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in |
220 | + "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^ | ||
221 | String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> | 221 | String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> |
222 | let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in | 222 | let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in |
223 | - (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ | 223 | + (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ |
224 | "\\end{longtable}" | 224 | "\\end{longtable}" |
225 | 225 | ||
226 | let print_chart path name page text_fragments g = | 226 | let print_chart path name page text_fragments g = |