Commit 762b53f4fdd9ea2b9d3d25f317ef21b573e5f29e

Authored by Wojciech Jaworski
1 parent dd414d0b

Informacja o błędach w leksykonie LCG

LCGlexicon/ENIAM_LCGlexiconParser.ml
@@ -22,6 +22,8 @@ open ENIAM_LCGtypes @@ -22,6 +22,8 @@ open ENIAM_LCGtypes
22 open ENIAM_LCGlexiconTypes 22 open ENIAM_LCGlexiconTypes
23 open ENIAMcategoriesPL 23 open ENIAMcategoriesPL
24 24
  25 +exception ParseError of string * string * int
  26 +
25 let rec get_first n = function 27 let rec get_first n = function
26 [] -> [] 28 [] -> []
27 | s :: l -> if n = 0 then [] else s :: (get_first (n-1) l) 29 | s :: l -> if n = 0 then [] else s :: (get_first (n-1) l)
@@ -35,98 +37,97 @@ let remove_comments line = @@ -35,98 +37,97 @@ let remove_comments line =
35 String.sub line 0 n 37 String.sub line 0 n
36 with Not_found -> line 38 with Not_found -> line
37 39
38 -let rec parse_phrase_names_rec rev = function  
39 - "@WEIGHTS" :: tokens -> List.rev rev, "@WEIGHTS" :: tokens  
40 - | "@LEXICON" :: tokens -> List.rev rev, "@LEXICON" :: tokens  
41 - | t :: tokens -> parse_phrase_names_rec (t :: rev) tokens  
42 - | [] -> failwith "parse_phrase_names_rec: unexpexted end of input"  
43 -  
44 -let parse_phrase_names = function  
45 - "@PHRASE_NAMES" :: tokens -> parse_phrase_names_rec [] tokens  
46 - | s :: _ -> failwith ("parse_phrase_names: '@PHRASE_NAMES' expected while '" ^ s ^ "' found")  
47 - | [] -> failwith "parse_phrase_names: unexpexted end of input"  
48 -  
49 -let rec parse_weights_rec weights = function  
50 - "@LEXICON" :: tokens -> weights, "@LEXICON" :: tokens  
51 - | w :: "=" :: n :: tokens -> parse_weights_rec (StringMap.add weights w (float_of_string n)) tokens  
52 - | s :: _ -> failwith ("parse_weights_rec: unexpexted token '" ^ s ^ "'")  
53 - | [] -> failwith "parse_weights_rec: unexpexted end of input"  
54 -  
55 -let parse_weights = function  
56 - "@WEIGHTS" :: tokens -> parse_weights_rec StringMap.empty tokens  
57 - | "@LEXICON" :: tokens -> StringMap.empty, "@LEXICON" :: tokens  
58 - | s :: _ -> failwith ("parse_weights: '@WEIGHTS' expected while '" ^ s ^ "' found")  
59 - | [] -> failwith "parse_weights: unexpexted end of input"  
60 -  
61 -let rec split_semic found rev = function  
62 - "lemma" :: "=" :: ";" :: l -> split_semic found (";" :: "=" :: "lemma" :: rev) l  
63 - | ";" :: l -> split_semic (List.rev rev :: found) [] l  
64 - | s :: l -> split_semic found (s :: rev) l  
65 - | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) 40 +let rec parse_phrase_names_rec i0 rev = function
  41 + (i,"@WEIGHTS") :: tokens -> i, List.rev rev, (i,"@WEIGHTS") :: tokens
  42 + | (i,"@LEXICON") :: tokens -> i, List.rev rev, (i,"@LEXICON") :: tokens
  43 + | (i,t) :: tokens -> parse_phrase_names_rec i0 ((i,t) :: rev) tokens
  44 + | [] -> raise (ParseError("parse_phrase_names_rec", "unexpexted end of input", i0))
  45 +
  46 +let parse_phrase_names i0 = function
  47 + (i,"@PHRASE_NAMES") :: tokens -> parse_phrase_names_rec i [] tokens
  48 + | (i,s) :: _ -> raise (ParseError("parse_phrase_names", "'@PHRASE_NAMES' expected while '" ^ s ^ "' found", i))
  49 + | [] -> raise (ParseError("parse_phrase_names", "unexpexted end of input", i0))
  50 +
  51 +let rec parse_weights_rec i0 weights = function
  52 + (i,"@LEXICON") :: tokens -> i, weights, (i,"@LEXICON") :: tokens
  53 + | (_,w) :: (_,"=") :: (i,n) :: tokens -> parse_weights_rec i (StringMap.add weights w (float_of_string n)) tokens
  54 + | (i,s) :: _ -> raise (ParseError("parse_weights_rec", "unexpexted token '" ^ s ^ "'", i))
  55 + | [] -> raise (ParseError("parse_weights_rec", "unexpexted end of input", i0))
  56 +
  57 +let parse_weights i0 = function
  58 + (i,"@WEIGHTS") :: tokens -> parse_weights_rec i StringMap.empty tokens
  59 + | (i,"@LEXICON") :: tokens -> i, StringMap.empty, (i,"@LEXICON") :: tokens
  60 + | (i,s) :: _ -> raise (ParseError("parse_weights", "'@WEIGHTS' expected while '" ^ s ^ "' found", i))
  61 + | [] -> raise (ParseError("parse_weights", "unexpexted end of input", i0))
  62 +
  63 +let rec split_semic i0 found rev = function
  64 + (i1,"lemma") :: (i2,"=") :: (i3,";") :: l -> split_semic (if rev = [] then i1 else i0) found ((i1,";") :: (i2,"=") :: (i3,"lemma") :: rev) l
  65 + | (i,";") :: l -> split_semic i ((i0, List.rev rev) :: found) [] l
  66 + | (i,s) :: l -> split_semic (if rev = [] then i else i0) found ((i,s) :: rev) l
  67 + | [] -> if rev = [] then List.rev found else List.rev ((i0, List.rev rev) :: found)
66 68
67 let rec split_colon found rev = function 69 let rec split_colon found rev = function
68 - "lemma" :: "=" :: ":" :: l -> split_colon found (":" :: "=" :: "lemma" :: rev) l  
69 - | ":" :: l -> split_colon (List.rev rev :: found) [] l  
70 - | s :: l -> split_colon found (s :: rev) l 70 + (i1,"lemma") :: (i2,"=") :: (i3,":") :: l -> split_colon found ((i1,":") :: (i2,"=") :: (i3,"lemma") :: rev) l
  71 + | (_,":") :: l -> split_colon (List.rev rev :: found) [] l
  72 + | (i,s) :: l -> split_colon found ((i,s) :: rev) l
71 | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) 73 | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found)
72 74
73 -let rec split_comma found rev = function  
74 - "lemma" :: "=" :: "," :: l -> split_comma found ("," :: "=" :: "lemma" :: rev) l  
75 - | "," :: l -> split_comma (List.rev rev :: found) [] l  
76 - | s :: l -> split_comma found (s :: rev) l  
77 - | [] -> if rev = [] then List.rev found else List.rev ((List.rev rev) :: found) 75 +let rec split_comma i0 found rev = function
  76 + (i1,"lemma") :: (i2,"=") :: (i3,",") :: l -> split_comma (if rev = [] then i1 else i0) found ((i1,",") :: (i2,"=") :: (i3,"lemma") :: rev) l
  77 + | (i,",") :: l -> split_comma i ((i0, List.rev rev) :: found) [] l
  78 + | (i,s) :: l -> split_comma (if rev = [] then i else i0) found ((i,s) :: rev) l
  79 + | [] -> if rev = [] then List.rev found else List.rev ((i0, List.rev rev) :: found)
78 80
79 -let rec find_right_bracket rev = function  
80 - "]" :: l -> List.rev rev, l  
81 - | s :: l -> find_right_bracket (s :: rev) l  
82 - | [] -> failwith "find_right_bracket" 81 +let catch_selector_of_string i proc s =
  82 + try selector_of_string s
  83 + with _ -> raise (ParseError(proc, "unknown selector: " ^ s, i))
83 84
84 let match_selectors = function 85 let match_selectors = function
85 - s :: l -> (try selector_of_string s,l with _ -> failwith ("match_selectors: " ^ s))  
86 - | [] -> failwith "match_selectors: empty" 86 + i0,(i,s) :: l -> i,catch_selector_of_string i "match_selectors" s,l
  87 + | i0,[] -> raise (ParseError("match_selectors", "empty", i0))
87 88
88 let match_relation = function 89 let match_relation = function
89 (* cat,"=" :: "=" :: l -> cat,StrictEq,l *) 90 (* cat,"=" :: "=" :: l -> cat,StrictEq,l *)
90 - | cat,"!" :: "=" :: l -> cat,Neq,l  
91 - | cat,"=" :: l -> cat,Eq,l  
92 - | cat,s :: l -> failwith ("match_relation: " ^ (String.concat " " (s :: l)))  
93 - | cat,[] -> failwith "match_relation: empty"  
94 -  
95 -let rec split_mid rev = function  
96 - [s] -> List.rev (s :: rev)  
97 - | s :: "|" :: l -> split_mid (s :: rev) l  
98 - | [] -> failwith "split_mid: empty"  
99 - | l -> failwith ("split_mid: " ^ (String.concat " " l))  
100 -  
101 -let rec check_value selector l = 91 + | i,cat,(_,"!") :: (_,"=") :: l -> i,cat,Neq,l
  92 + | i,cat,(_,"=") :: l -> i,cat,Eq,l
  93 + | _,cat,(i,s) :: l -> raise (ParseError("match_relation", "relation symbol not found: " ^ String.concat " " (s :: Xlist.map l snd), i))
  94 + | i,cat,[] -> raise (ParseError("match_relation", "empty", i))
  95 +
  96 +let rec split_mid i0 rev = function
  97 + [i,s] -> List.rev ((i,s) :: rev)
  98 + | (i1,s) :: (i2,"|") :: l -> split_mid i2 ((i1,s) :: rev) l
  99 + | [] -> raise (ParseError("split_mid", "empty", i0))
  100 + | (i,s) :: l -> raise (ParseError("split_mid", "delimiter not found: " ^ String.concat " " (s :: Xlist.map l snd), i))
  101 +
  102 +let rec check_value i0 selector l =
102 let vals = try SelectorMap.find selector_values selector 103 let vals = try SelectorMap.find selector_values selector
103 - with Not_found -> failwith ("check_value: invalid selector " ^ string_of_selector selector) in 104 + with Not_found -> raise (ParseError("check_value", "invalid selector: " ^ string_of_selector selector, i0)) in
104 if vals = [] then () else 105 if vals = [] then () else
105 - Xlist.iter l (fun s -> 106 + Xlist.iter l (fun (i,s) ->
106 if not (Xlist.mem vals s) then 107 if not (Xlist.mem vals s) then
107 - failwith ("check_value: invalid selector " ^ string_of_selector selector ^ "=" ^ s));  
108 - l 108 + raise (ParseError("check_value", "invalid selector: " ^ string_of_selector selector ^ "=" ^ s, i)));
  109 + Xlist.map l snd
109 110
110 let match_value = function 111 let match_value = function
111 - cat,rel,[s] -> cat,rel,[s]  
112 - | cat,rel,[] -> failwith "match_value: empty"  
113 - | cat,rel,l -> cat,rel, check_value cat (split_mid [] l) 112 + i,cat,rel,[s] -> cat,rel, check_value i cat [s]
  113 + | i,cat,rel,[] -> raise (ParseError("match_value", "empty", i))
  114 + | i,cat,rel,l -> cat,rel, check_value i cat (split_mid i [] l)
114 115
115 -let parse_selectors l = 116 +let parse_selectors i0 l =
116 (* print_endline s; *) 117 (* print_endline s; *)
117 (* let l = Xlist.map (Str.full_split (Str.regexp "|\\|,\\|=\\|!") s) (function 118 (* let l = Xlist.map (Str.full_split (Str.regexp "|\\|,\\|=\\|!") s) (function
118 Str.Text s -> s 119 Str.Text s -> s
119 | Str.Delim s -> s) in *) 120 | Str.Delim s -> s) in *)
120 - let ll = split_comma [] [] l in 121 + let ll = split_comma i0 [] [] l in
121 let l = Xlist.rev_map ll match_selectors in 122 let l = Xlist.rev_map ll match_selectors in
122 let l = Xlist.rev_map l match_relation in 123 let l = Xlist.rev_map l match_relation in
123 let l = Xlist.rev_map l match_value in 124 let l = Xlist.rev_map l match_value in
124 l 125 l
125 126
126 let manage_lemmata = function 127 let manage_lemmata = function
127 - "lemma" :: "=" :: ":" :: "," :: tokens -> ["lemma";"=";":";","],tokens  
128 - | "lemma" :: "=" :: ":" :: s :: "," :: tokens -> ["lemma";"=";":"^s;","],tokens  
129 - | "lemma" :: "=" :: "<" :: "/" :: s :: "," :: tokens -> ["lemma";"=";"</"^s;","],tokens 128 + (i1,"lemma") :: (i2,"=") :: (i3,":") :: (i4,",") :: tokens -> [i1,"lemma";i2,"=";i3,":";i4,","],tokens
  129 + | (i1,"lemma") :: (i2,"=") :: (i3,":") :: (i4,s) :: (i5,",") :: tokens -> [i1,"lemma";i2,"=";i3,":"^s;i5,","],tokens
  130 + | (i1,"lemma") :: (i2,"=") :: (i3,"<") :: (i4,"/") :: (i5,s) :: (i6,",") :: tokens -> [i1,"lemma";i2,"=";i3,"</"^s;i6,","],tokens
130 | tokens -> [],tokens 131 | tokens -> [],tokens
131 132
132 133
@@ -138,81 +139,104 @@ type syntax = @@ -138,81 +139,104 @@ type syntax =
138 | E of (direction * grammar_symbol) list 139 | E of (direction * grammar_symbol) list
139 140
140 let make_atoms phrase_names = 141 let make_atoms phrase_names =
141 - SelectorMap.fold selector_values (StringSet.of_list phrase_names) (fun atoms _ l -> 142 + SelectorMap.fold selector_values (StringSet.of_list (Xlist.rev_map phrase_names snd)) (fun atoms _ l ->
142 Xlist.fold l atoms StringSet.add) 143 Xlist.fold l atoms StringSet.add)
143 144
  145 +let rec find_right_bracket i0 rev = function
  146 + (_,"]") :: l -> List.rev rev, l
  147 + | (i,s) :: l -> find_right_bracket i ((i,s) :: rev) l
  148 + | [] -> raise (ParseError("find_right_bracket", "empty", i0))
  149 +
144 let operators = StringSet.of_list [ 150 let operators = StringSet.of_list [
145 "*"; "+"; "/"; "|"; "\\"; "("; ")"; ","; "{"; "}"; "?"] 151 "*"; "+"; "/"; "|"; "\\"; "("; ")"; ","; "{"; "}"; "?"]
146 152
147 let find_internal_grammar_symbols atoms = function 153 let find_internal_grammar_symbols atoms = function
148 - | "T" -> B Top  
149 - | "1" -> C One  
150 - | "schema" -> D(Both,Tensor[AVar "schema"])  
151 - | "adjuncts" -> D(Both,Tensor[AVar "adjuncts"])  
152 - | s -> if StringSet.mem selector_names s then B (AVar s) else  
153 - if StringSet.mem atoms s then B (Atom s) else  
154 - if StringSet.mem operators s then A s else  
155 - failwith ("find_internal_grammar_symbols: unknown symbol " ^ s) 154 + | i,"T" -> i,B Top
  155 + | i,"1" -> i,C One
  156 + | i,"schema" -> i,D(Both,Tensor[AVar "schema"])
  157 + | i,"adjuncts" -> i,D(Both,Tensor[AVar "adjuncts"])
  158 + | i,s -> if StringSet.mem selector_names s then i,B (AVar s) else
  159 + if StringSet.mem atoms s then i,B (Atom s) else
  160 + if StringSet.mem operators s then i,A s else
  161 + raise (ParseError("find_internal_grammar_symbols", "unknown symbol " ^ s, i))
  162 +
  163 +let rec find_tensor2 rev = function
  164 + (_,B s1) :: (_,A "*") :: (i,B s2) :: l -> find_tensor2 (s1 :: rev) ((i,B s2) :: l)
  165 + | (_,B s1) :: l -> List.rev (s1 :: rev), l
  166 + | (i,t) :: l -> raise (ParseError("find_tensor2", "", i))
  167 + | [] -> failwith "find_tensor2"
156 168
157 let rec find_tensor = function 169 let rec find_tensor = function
158 - B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: A "*" :: B s8 :: l -> failwith "find_tensor 1" 170 + (* B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: A "*" :: B s8 :: l -> failwith "find_tensor 1"
159 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6;s7]) :: find_tensor l 171 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: A "*" :: B s7 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6;s7]) :: find_tensor l
160 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6]) :: find_tensor l 172 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: A "*" :: B s6 :: l -> C (Tensor[s1;s2;s3;s4;s5;s6]) :: find_tensor l
161 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: l -> C (Tensor[s1;s2;s3;s4;s5]) :: find_tensor l 173 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: A "*" :: B s5 :: l -> C (Tensor[s1;s2;s3;s4;s5]) :: find_tensor l
162 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: l -> C (Tensor[s1;s2;s3;s4]) :: find_tensor l 174 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: A "*" :: B s4 :: l -> C (Tensor[s1;s2;s3;s4]) :: find_tensor l
163 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: l -> C (Tensor[s1;s2;s3]) :: find_tensor l 175 | B s1 :: A "*" :: B s2 :: A "*" :: B s3 :: l -> C (Tensor[s1;s2;s3]) :: find_tensor l
164 - | B s1 :: A "*" :: B s2 :: l -> C (Tensor[s1;s2]) :: find_tensor l  
165 - | B s1 :: l -> C (Tensor[s1]) :: find_tensor l  
166 - | A "*" :: _ -> failwith "find_tensor 2: unexpected '*'" 176 + | B s1 :: A "*" :: B s2 :: l -> C (Tensor[s1;s2]) :: find_tensor l *)
  177 + | (i,B s1) :: l -> let sl,l = find_tensor2 [] ((i,B s1) :: l) in (i,C (Tensor sl)) :: find_tensor l
  178 + | (i,A "*") :: _ -> raise (ParseError("find_tensor", "unexpected '*'", i))
167 | t :: l -> t :: find_tensor l 179 | t :: l -> t :: find_tensor l
168 | [] -> [] 180 | [] -> []
169 181
  182 +let rec find_plus2 rev = function
  183 + (_,C s1) :: (_,A "+") :: (i,C s2) :: l -> find_plus2 (s1 :: rev) ((i,C s2) :: l)
  184 + | (_,C s1) :: l -> List.rev (s1 :: rev), l
  185 + | (i,t) :: l -> raise (ParseError("find_plus2", "", i))
  186 + | [] -> failwith "find_plus2"
  187 +
170 let rec find_plus = function 188 let rec find_plus = function
171 - C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: A "+" :: C s7 :: l -> failwith "find_plus 1" 189 + (* C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: A "+" :: C s7 :: l -> failwith "find_plus 1"
172 | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: l -> C (Plus[s1;s2;s3;s4;s5;s6]) :: find_plus l 190 | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: A "+" :: C s6 :: l -> C (Plus[s1;s2;s3;s4;s5;s6]) :: find_plus l
173 | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: l -> C (Plus[s1;s2;s3;s4;s5]) :: find_plus l 191 | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: A "+" :: C s5 :: l -> C (Plus[s1;s2;s3;s4;s5]) :: find_plus l
174 | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: l -> C (Plus[s1;s2;s3;s4]) :: find_plus l 192 | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: A "+" :: C s4 :: l -> C (Plus[s1;s2;s3;s4]) :: find_plus l
175 - | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: l -> C (Plus[s1;s2;s3]) :: find_plus l  
176 - | C s1 :: A "+" :: C s2 :: l -> C (Plus[s1;s2]) :: find_plus l  
177 - | A "+" :: _ -> failwith "find_plus 2: unexpected '+'" 193 + | C s1 :: A "+" :: C s2 :: A "+" :: C s3 :: l -> C (Plus[s1;s2;s3]) :: find_plus l *)
  194 + | (i1,C s1) :: (i2,A "+") :: (i3,C s2) :: l -> let sl,l = find_plus2 [] ((i1,C s1) :: (i2,A "+") :: (i3,C s2) :: l) in (i1,C (Plus sl)) :: find_plus l
  195 + | (i,A "+") :: _ -> raise (ParseError("find_plus 2", "unexpected '+'", i))
178 | t :: l -> t :: find_plus l 196 | t :: l -> t :: find_plus l
179 | [] -> [] 197 | [] -> []
180 198
181 let rec find_paren = function 199 let rec find_paren = function
182 - A "(" :: C s :: A ")" :: l -> C s :: find_paren l  
183 - | s :: l -> s :: find_paren l 200 + (_,A "(") :: (i,C s) :: (_,A ")") :: l -> (i,C s) :: find_paren l
  201 + | (i,s) :: l -> (i,s) :: find_paren l
184 | [] -> [] 202 | [] -> []
185 203
186 let rec find_imp = function 204 let rec find_imp = function
187 - | C s1 :: A "/" :: C s2 :: l -> C (Imp(s1,Forward,s2)) :: find_imp l  
188 - | C s1 :: A "|" :: C s2 :: l -> C (Imp(s1,Both,s2)) :: find_imp l  
189 - | C s1 :: A "\\" :: C s2 :: l -> C (Imp(s1,Backward,s2)) :: find_imp l  
190 - | s :: l -> s :: find_imp l 205 + | (i,C s1) :: (_,A "/") :: (_,C s2) :: l -> (i,C (Imp(s1,Forward,s2))) :: find_imp l
  206 + | (i,C s1) :: (_,A "|") :: (_,C s2) :: l -> (i,C (Imp(s1,Both,s2))) :: find_imp l
  207 + | (i,C s1) :: (_,A "\\") :: (_,C s2) :: l -> (i,C (Imp(s1,Backward,s2))) :: find_imp l
  208 + | (i,s) :: l -> (i,s) :: find_imp l
191 | [] -> [] 209 | [] -> []
192 210
193 let rec find_maybe = function 211 let rec find_maybe = function
194 - | A "?" :: C s2 :: l -> C (Maybe s2) :: find_maybe l  
195 - | A "?" :: _ -> failwith "find_maybe 1: unexpected '?'"  
196 - | s :: l -> s :: find_maybe l 212 + | (i,A "?") :: (_,C s2) :: l -> (i,C (Maybe s2)) :: find_maybe l
  213 + | (i,A "?") :: _ -> raise (ParseError("find_maybe 1", "unexpected '?'", i))
  214 + | (i,s) :: l -> (i,s) :: find_maybe l
197 | [] -> [] 215 | [] -> []
198 216
199 let rec find_mult_imp = function 217 let rec find_mult_imp = function
200 - | A "{" :: A "/" :: C s2 :: l -> A "{" :: D (Forward,s2) :: find_mult_imp l  
201 - | A "{" :: A "|" :: C s2 :: l -> A "{" :: D (Both,s2) :: find_mult_imp l  
202 - | A "{" :: A "\\" :: C s2 :: l -> A "{" :: D (Backward,s2) :: find_mult_imp l  
203 - | A "," :: A "/" :: C s2 :: l -> A "," :: D (Forward,s2) :: find_mult_imp l  
204 - | A "," :: A "|" :: C s2 :: l -> A "," :: D (Both,s2) :: find_mult_imp l  
205 - | A "," :: A "\\" :: C s2 :: l -> A "," :: D (Backward,s2) :: find_mult_imp l  
206 - | A "/" :: _ -> failwith "find_mult_imp 1: unexpected '/'"  
207 - | A "|" :: _ -> failwith "find_mult_imp 2: unexpected '|'"  
208 - | A "\\" :: _ -> failwith "find_mult_imp 3: unexpected '\\'"  
209 - | A "(" :: _ -> failwith "find_mult_imp 4: unexpected '('"  
210 - | A ")" :: _ -> failwith "find_mult_imp 5: unexpected ')'"  
211 - | s :: l -> s :: find_mult_imp l 218 + | (i1,A "{") :: (i2,A "/") :: (_,C s2) :: l -> (i1,A "{") :: (i2,D (Forward,s2)) :: find_mult_imp l
  219 + | (i1,A "{") :: (i2,A "|") :: (_,C s2) :: l -> (i1,A "{") :: (i2,D (Both,s2)) :: find_mult_imp l
  220 + | (i1,A "{") :: (i2,A "\\") :: (_,C s2) :: l -> (i1,A "{") :: (i2,D (Backward,s2)) :: find_mult_imp l
  221 + | (i1,A ",") :: (i2,A "/") :: (_,C s2) :: l -> (i1,A ",") :: (i2,D (Forward,s2)) :: find_mult_imp l
  222 + | (i1,A ",") :: (i2,A "|") :: (_,C s2) :: l -> (i1,A ",") :: (i2,D (Both,s2)) :: find_mult_imp l
  223 + | (i1,A ",") :: (i2,A "\\") :: (_,C s2) :: l -> (i1,A ",") :: (i2,D (Backward,s2)) :: find_mult_imp l
  224 + | (i,A "/") :: _ -> raise (ParseError("find_mult_imp 1", "unexpected '/'", i))
  225 + | (i,A "|") :: _ -> raise (ParseError("find_mult_imp 2", "unexpected '|'", i))
  226 + | (i,A "\\") :: _ -> raise (ParseError("find_mult_imp 3", "unexpected '\\'", i))
  227 + | (i,A "(") :: _ -> raise (ParseError("find_mult_imp 4", "unexpected '('", i))
  228 + | (i,A ")") :: _ -> raise (ParseError("find_mult_imp 5", "unexpected ')'", i))
  229 + | (i,s) :: l -> (i,s) :: find_mult_imp l
212 | [] -> [] 230 | [] -> []
213 231
  232 +let rec find_mult2 rev = function
  233 + (_,D(s1,t1)) :: (_,A ",") :: (i,D(s2,t2)) :: l -> find_mult2 ((s1,t1) :: rev) ((i,D(s2,t2)) :: l)
  234 + | (_,D(s1,t1)) :: (_,A "}") :: l -> List.rev ((s1,t1) :: rev), l
  235 + | (i,t) :: l -> raise (ParseError("find_mult2", "", i))
  236 + | [] -> failwith "find_mult2"
  237 +
214 let rec find_mult = function 238 let rec find_mult = function
215 - A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }" 239 + (* A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "," :: D _ :: l -> failwith "find_mult 1: to many elements in { }"
216 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l 240 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "," :: D(s10,t10) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9;s10,t10] :: find_mult l
217 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l 241 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "," :: D(s9,t9) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8;s9,t9] :: find_mult l
218 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l 242 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "," :: D(s6,t6) :: A "," :: D(s7,t7) :: A "," :: D(s8,t8) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5;s6,t6;s7,t7;s8,t8] :: find_mult l
@@ -221,20 +245,20 @@ let rec find_mult = function @@ -221,20 +245,20 @@ let rec find_mult = function
221 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l 245 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "," :: D(s5,t5) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4;s5,t5] :: find_mult l
222 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4] :: find_mult l 246 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "," :: D(s4,t4) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3;s4,t4] :: find_mult l
223 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3] :: find_mult l 247 | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "," :: D(s3,t3) :: A "}" :: l -> E[s1,t1;s2,t2;s3,t3] :: find_mult l
224 - | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "}" :: l -> E[s1,t1;s2,t2] :: find_mult l  
225 - | A "{" :: D(s1,t1) :: A "}" :: l -> E[s1,t1] :: find_mult l  
226 - | A "{" :: _ -> failwith "find_mult 2: unexpected '{'"  
227 - | A "}" :: _ -> failwith "find_mult 3: unexpected '}'"  
228 - | A "," :: _ -> failwith "find_mult 4: unexpected ','" 248 + | A "{" :: D(s1,t1) :: A "," :: D(s2,t2) :: A "}" :: l -> E[s1,t1;s2,t2] :: find_mult l *)
  249 + | (_,A "{") :: (i,D(s1,t1)) :: l -> let sl,l = find_mult2 [] ((i,D(s1,t1)) :: l) in (i,E sl) :: find_mult l
  250 + | (i,A "{") :: _ -> raise (ParseError("find_mult 2", "unexpected '{'", i))
  251 + | (i,A "}") :: _ -> raise (ParseError("find_mult 3", "unexpected '}'", i))
  252 + | (i,A ",") :: _ -> raise (ParseError("find_mult 4", "unexpected ','", i))
229 | t :: l -> t :: find_mult l 253 | t :: l -> t :: find_mult l
230 | [] -> [] 254 | [] -> []
231 255
232 -let rec apply_mult = function  
233 - C s :: E t :: l -> apply_mult (C (ImpSet(s,t)) :: l)  
234 - | [C s] -> C s  
235 - | _ -> failwith "apply_mult" 256 +let rec apply_mult i0 = function
  257 + (i1,C s) :: (i2,E t) :: l -> apply_mult i2 ((i1,C (ImpSet(s,t))) :: l)
  258 + | [i,C s] -> i,C s
  259 + | _ -> raise (ParseError("apply_mult","",i0))
236 260
237 -let parse_syntax atoms l = 261 +let parse_syntax i0 atoms l =
238 (* print_endline s; *) 262 (* print_endline s; *)
239 (* let l = Xlist.map (Str.full_split (Str.regexp "?\\|}\\|{\\|,\\|*\\|/\\|+\\|)\\|(\\||\\|\\") s) (function 263 (* let l = Xlist.map (Str.full_split (Str.regexp "?\\|}\\|{\\|,\\|*\\|/\\|+\\|)\\|(\\||\\|\\") s) (function
240 Str.Text s -> s 264 Str.Text s -> s
@@ -252,107 +276,122 @@ let parse_syntax atoms l = @@ -252,107 +276,122 @@ let parse_syntax atoms l =
252 let l = find_paren l in 276 let l = find_paren l in
253 let l = find_mult_imp l in 277 let l = find_mult_imp l in
254 let l = find_mult l in 278 let l = find_mult l in
255 - match apply_mult l with  
256 - C s -> s  
257 - | _ -> failwith "parse_syntax" 279 + match apply_mult i0 l with
  280 + _,C s -> s
  281 + | i,_ -> raise (ParseError("parse_syntax","",i))
258 282
259 -let check_quant_range cat l = 283 +let check_quant_range i0 cat l =
260 let set = StringSet.of_list ( 284 let set = StringSet.of_list (
261 try SelectorMap.find selector_values cat 285 try SelectorMap.find selector_values cat
262 - with Not_found -> failwith ("check_quant_range: " ^ string_of_selector cat)) in 286 + with Not_found -> raise (ParseError("check_quant_range", string_of_selector cat, i0))) in
263 if StringSet.is_empty set then () else 287 if StringSet.is_empty set then () else
264 Xlist.iter l (fun v -> 288 Xlist.iter l (fun v ->
265 if not (StringSet.mem set v) then 289 if not (StringSet.mem set v) then
266 - failwith ("check_quant_range: " ^ string_of_selector cat ^ "=" ^ v))  
267 -  
268 -let parse_quant_range = function  
269 - _,["0"] -> Zero  
270 - | _,["T"] -> Top  
271 - | _,["all_numbers"] -> ENIAM_LCGrenderer.make_quant_restriction all_numbers  
272 - | _,["all_cases"] -> ENIAM_LCGrenderer.make_quant_restriction all_cases  
273 - | _,["all_genders"] -> ENIAM_LCGrenderer.make_quant_restriction all_genders  
274 - | _,["all_persons"] -> ENIAM_LCGrenderer.make_quant_restriction all_persons 290 + raise (ParseError("check_quant_range", string_of_selector cat ^ "=" ^ v, i0)))
  291 +
  292 +let parse_quant_range i0 = function
  293 + _,[_,"0"] -> Zero
  294 + | _,[_,"T"] -> Top
  295 + | _,[_,"all_numbers"] -> ENIAM_LCGrenderer.make_quant_restriction all_numbers
  296 + | _,[_,"all_cases"] -> ENIAM_LCGrenderer.make_quant_restriction all_cases
  297 + | _,[_,"all_genders"] -> ENIAM_LCGrenderer.make_quant_restriction all_genders
  298 + | _,[_,"all_persons"] -> ENIAM_LCGrenderer.make_quant_restriction all_persons
275 | cat,l -> 299 | cat,l ->
276 - let l = Xstring.split "&" (String.concat "" l) in  
277 - check_quant_range cat l; 300 + let l = Xstring.split "&" (String.concat "" (Xlist.map l snd)) in
  301 + check_quant_range i0 cat l;
278 ENIAM_LCGrenderer.make_quant_restriction l 302 ENIAM_LCGrenderer.make_quant_restriction l
279 303
280 -let parse_quantifiers tokens =  
281 - Xlist.map (split_comma [] [] tokens) (function  
282 - cat :: "=" :: tokens ->  
283 - let cat = selector_of_string cat in  
284 - cat, parse_quant_range (cat,tokens)  
285 - | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'")  
286 - | [] -> failwith "parse_quantifiers: no token")  
287 -  
288 -let parse_raised tokens =  
289 - Xlist.map (split_comma [] [] tokens) (function  
290 - [cat] -> selector_of_string cat  
291 - | t :: _ -> failwith ("parse_raised: unexpected token '" ^ t ^ "'")  
292 - | [] -> failwith "parse_raised: no token") 304 +let parse_quantifiers i0 tokens =
  305 + Xlist.map (split_comma i0 [] [] tokens) (function
  306 + _,(i,cat) :: (_,"=") :: [] -> raise (ParseError("parse_quantifiers", "empty range", i))
  307 + | _,(i,cat) :: (_,"=") :: tokens ->
  308 + let cat = catch_selector_of_string i "parse_quantifiers" cat in
  309 + cat, parse_quant_range i (cat,tokens)
  310 + | _,(i,t) :: _ -> raise (ParseError("parse_quantifiers", "unexpected token '" ^ t ^ "'", i))
  311 + | i0,[] -> raise (ParseError("parse_quantifiers", "no token", i0)))
  312 +
  313 +let parse_raised i0 tokens =
  314 + Xlist.map (split_comma i0 [] [] tokens) (function
  315 + _,[i,cat] -> catch_selector_of_string i "parse_raised" cat
  316 + | _,(i,t) :: _ -> raise (ParseError("parse_raised", "unexpected token '" ^ t ^ "'", i))
  317 + | i0,[] -> raise (ParseError("parse_raised", "no token", i0)))
293 318
294 let rec find_syntax_end rev = function 319 let rec find_syntax_end rev = function
295 - ("BRACKET" :: _) as tokens -> List.rev rev, tokens  
296 - | ("QUANT" :: "[" :: _) as tokens -> List.rev rev, tokens  
297 - | ("RAISED" :: "[" :: _) as tokens -> List.rev rev, tokens  
298 - | ("SEM" :: "[" :: _) as tokens -> List.rev rev, tokens 320 + ((_,"BRACKET") :: _) as tokens -> List.rev rev, tokens
  321 + | ((_,"QUANT") :: (_,"[") :: _) as tokens -> List.rev rev, tokens
  322 + | ((_,"RAISED") :: (_,"[") :: _) as tokens -> List.rev rev, tokens
  323 + | ((_,"SEM") :: (_,"[") :: _) as tokens -> List.rev rev, tokens
299 | s :: tokens -> find_syntax_end (s :: rev) tokens 324 | s :: tokens -> find_syntax_end (s :: rev) tokens
300 | [] -> List.rev rev, [] 325 | [] -> List.rev rev, []
301 326
302 -let parse_sem_term sem_term = String.concat "" sem_term 327 +let parse_sem_term sem_term = String.concat "" (Xlist.map sem_term snd)
303 328
304 let rec parse_rule atoms = function 329 let rec parse_rule atoms = function
305 - "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens  
306 - | "QUANT" :: "[" :: tokens ->  
307 - let quant,tokens = find_right_bracket [] tokens in  
308 - Quant(parse_quantifiers quant) :: parse_rule atoms tokens  
309 - | "RAISED" :: "[" :: tokens ->  
310 - let raised,tokens = find_right_bracket [] tokens in  
311 - Raised(parse_raised raised) :: parse_rule atoms tokens  
312 - | "SEM" :: "[" :: tokens ->  
313 - let sem_term,tokens = find_right_bracket [] tokens in 330 + (_,"BRACKET") :: tokens -> Bracket :: parse_rule atoms tokens
  331 + | (_,"QUANT") :: (i,"[") :: tokens ->
  332 + let quant,tokens = find_right_bracket i [] tokens in
  333 + Quant(parse_quantifiers i quant) :: parse_rule atoms tokens
  334 + | (_,"RAISED") :: (i,"[") :: tokens ->
  335 + let raised,tokens = find_right_bracket i [] tokens in
  336 + Raised(parse_raised i raised) :: parse_rule atoms tokens
  337 + | (_,"SEM") :: (i,"[") :: tokens ->
  338 + let sem_term,tokens = find_right_bracket i [] tokens in
314 Sem(parse_sem_term sem_term) :: parse_rule atoms tokens 339 Sem(parse_sem_term sem_term) :: parse_rule atoms tokens
315 | [] -> [] 340 | [] -> []
316 | tokens -> 341 | tokens ->
  342 + let i = fst (List.hd tokens) in
317 let syntax,tokens = find_syntax_end [] tokens in 343 let syntax,tokens = find_syntax_end [] tokens in
318 (* print_prefix 100 tokens; *) 344 (* print_prefix 100 tokens; *)
319 - Syntax(parse_syntax atoms syntax) :: parse_rule atoms tokens 345 + Syntax(parse_syntax i atoms syntax) :: parse_rule atoms tokens
320 346
321 -let parse_entry atoms weights tokens = 347 +let parse_entry i0 atoms weights tokens =
322 let prefix,tokens = manage_lemmata tokens in 348 let prefix,tokens = manage_lemmata tokens in
323 let selectors, rule, weight = 349 let selectors, rule, weight =
324 match split_colon [] [] tokens with 350 match split_colon [] [] tokens with
325 [selectors;rule] -> selectors, rule, 0. 351 [selectors;rule] -> selectors, rule, 0.
326 - | [selectors;rule;[weight]] -> selectors, rule, 352 + | [selectors;rule;[i,weight]] -> selectors, rule,
327 (try StringMap.find weights weight 353 (try StringMap.find weights weight
328 - with Not_found -> failwith ("parse_entry: unknown weight symbol '" ^ weight ^ "'"))  
329 - | _ -> failwith ("parse_entry: invalid number of ':' in entry " ^ (String.concat " " tokens)) in  
330 - let selectors = parse_selectors (prefix @ selectors) in 354 + with Not_found -> raise (ParseError("parse_entry", "unknown weight symbol '" ^ weight ^ "'", i)))
  355 + | _ -> raise (ParseError("parse_entry", "invalid number of ':' in entry " ^ String.concat " " (Xlist.map tokens snd), i0)) in
  356 + let selectors = parse_selectors i0 (prefix @ selectors) in
331 let rule = parse_rule atoms rule in 357 let rule = parse_rule atoms rule in
332 selectors, rule, weight 358 selectors, rule, weight
333 359
334 -let parse_lexicon atoms weights = function  
335 - "@LEXICON" :: tokens ->  
336 - let entries = split_semic [] [] tokens in  
337 - List.rev (Xlist.rev_map entries (parse_entry atoms weights))  
338 - | s :: _ -> failwith ("parse_lexicon: '@LEXICON' expected while '" ^ s ^ "' found")  
339 - | [] -> failwith "parse_lexicon: unexpexted end of input" 360 +let string_of_parse_error proc s i line =
  361 + Printf.sprintf "LCG lexicon error in line %d: %s\n%s: %s" i line proc s
  362 +
  363 +let parse_lexicon i0 a atoms weights = function
  364 + (i,"@LEXICON") :: tokens ->
  365 + let entries = split_semic i [] [] tokens in
  366 + Xlist.fold entries ([],true) (fun (entries,is_correct) (i,entry) ->
  367 + try (parse_entry i atoms weights entry) :: entries, is_correct
  368 + with ParseError(proc,s,i) ->
  369 + print_endline (string_of_parse_error proc s i a.(i-1));
  370 + entries,false)
  371 + | (i,s) :: _ -> raise (ParseError("parse_lexicon", "'@LEXICON' expected while '" ^ s ^ "' found", i))
  372 + | [] -> raise (ParseError("parse_lexicon", "unexpexted end of input", i0))
340 373
341 let load_lexicon filename = 374 let load_lexicon filename =
342 - let lines = File.load_lines filename in  
343 - let lines = List.rev (Xlist.rev_map lines remove_comments) in  
344 - let tokens = List.flatten (Xlist.rev_map lines (fun line -> 375 + let lines = Xstring.split "\n" (File.load_file filename) in
  376 + let a = Array.of_list lines in
  377 + let lines,no_lines = Xlist.fold lines ([],1) (fun (lines,i) line -> (i,line) :: lines, i+1) in
  378 + let lines = Xlist.rev_map lines (fun (i,line) -> i, remove_comments line) in
  379 + let tokens = List.flatten (Xlist.rev_map lines (fun (i,line) ->
345 Xlist.rev_map (Str.full_split 380 Xlist.rev_map (Str.full_split
346 (Str.regexp "\\]\\| \\|\t\\|\r\\|\\?\\|:\\|;\\|&\\|!\\|=\\|}\\|{\\|,\\|\\*\\|/\\|\\+\\|)\\|(\\||\\|\\[\\|\\") line) (function 381 (Str.regexp "\\]\\| \\|\t\\|\r\\|\\?\\|:\\|;\\|&\\|!\\|=\\|}\\|{\\|,\\|\\*\\|/\\|\\+\\|)\\|(\\||\\|\\[\\|\\") line) (function
347 - Str.Text s -> s  
348 - | Str.Delim s -> s))) in 382 + Str.Text s -> i,s
  383 + | Str.Delim s -> i,s))) in
349 let tokens = Xlist.fold tokens [] (fun tokens -> function 384 let tokens = Xlist.fold tokens [] (fun tokens -> function
350 - " " -> tokens  
351 - | "\t" -> tokens  
352 - | "\r" -> tokens  
353 - | t -> t :: tokens) in  
354 - let phrase_names,tokens = parse_phrase_names tokens in  
355 - let atoms = make_atoms phrase_names in  
356 - let weights,tokens = parse_weights tokens in  
357 - let lexicon = parse_lexicon atoms weights tokens in  
358 - lexicon 385 + _," " -> tokens
  386 + | _,"\t" -> tokens
  387 + | _,"\r" -> tokens
  388 + | i,t -> (i,t) :: tokens) in
  389 + try
  390 + let i,phrase_names,tokens = parse_phrase_names 1 tokens in
  391 + let atoms = make_atoms phrase_names in
  392 + let i,weights,tokens = parse_weights i tokens in
  393 + let lexicon,is_correct = parse_lexicon i a atoms weights tokens in
  394 + if is_correct then List.rev lexicon else exit 0
  395 + with ParseError(proc,s,i) ->
  396 + print_endline (string_of_parse_error proc s i a.(i-1));
  397 + exit 0
LCGlexicon/ENIAMcategoriesPL.ml
@@ -37,7 +37,7 @@ let selector_values = Xlist.fold [ @@ -37,7 +37,7 @@ let selector_values = Xlist.fold [
37 "match-result";"url";"email";"obj-id";"building-number";"adj";"adjc";"adjp";"adja"; 37 "match-result";"url";"email";"obj-id";"building-number";"adj";"adjc";"adjp";"adja";
38 "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; 38 "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt";
39 "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj"; 39 "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj";
40 - "sinterj";"burk";"interp";"xxx";"unk";"html-tag"]; 40 + "sinterj";"burk";"interp";"xxx";"unk";"html-tag";"apron";"compar"];
41 Pos2, []; 41 Pos2, [];
42 Cat, []; 42 Cat, [];
43 Proj, []; 43 Proj, [];
LCGlexicon/test.ml
@@ -97,7 +97,7 @@ let create_chart valence tokens last = @@ -97,7 +97,7 @@ let create_chart valence tokens last =
97 let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) -> 97 let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) ->
98 ENIAM_LCGrenderer.reset_variable_names (); 98 ENIAM_LCGrenderer.reset_variable_names ();
99 ENIAM_LCGrenderer.add_variable_numbers (); 99 ENIAM_LCGrenderer.add_variable_numbers ();
100 - let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in 100 + let cats = ENIAMcategoriesPL.clarify_categories proper "X" ["X"] (lemma,pos,interp) in
101 let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence [] in 101 let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence [] in
102 ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in 102 ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
103 chart 103 chart
LCGparser/ENIAM_LCGlatexOf.ml
@@ -216,11 +216,11 @@ let chart page text_fragments g = @@ -216,11 +216,11 @@ let chart page text_fragments g =
216 "\\end{longtable}" 216 "\\end{longtable}"
217 217
218 let chart2 page text_fragments g = 218 let chart2 page text_fragments g =
219 - let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in  
220 - "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^ 219 + let n = match page with "a4" -> "4" | "a1" -> "10" | _ -> "6" in
  220 + "\\begin{longtable}{|l|p{" ^ n ^ "cm}|l|}\n\\hline\n" ^
221 String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) -> 221 String.concat "" (List.rev (ENIAM_LCGchart.fold g [] (fun l (symbol,node1,node2,sem,layer) ->
222 let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in 222 let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
223 - (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" layer node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^ 223 + (Printf.sprintf "%d--%d & %s & $\\begin{array}{l}%s\\end{array}$\\\\\n\\hline\n" node1 node2 s (grammar_symbol 0 symbol)) :: l))) ^
224 "\\end{longtable}" 224 "\\end{longtable}"
225 225
226 let print_chart path name page text_fragments g = 226 let print_chart path name page text_fragments g =