Commit a1931d44d577a0bbf528654032ceb8468dde9ec3
1 parent
1af3e9b5
przetwarzanie zdań z podnoszeniem typu za pomocą leksykonu
Showing
7 changed files
with
191 additions
and
136 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
... | ... | @@ -42,6 +42,11 @@ let rec get_bracket rev = function |
42 | 42 | | t :: rule -> get_bracket (t :: rev) rule |
43 | 43 | | [] -> false, List.rev rev |
44 | 44 | |
45 | +let rec get_raised rev = function | |
46 | + Raised raised :: rule -> raised, (List.rev rev) @ rule | |
47 | + | t :: rule -> get_raised (t :: rev) rule | |
48 | + | [] -> raise Not_found | |
49 | + | |
45 | 50 | let merge_quant pos_quants quants = |
46 | 51 | let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in |
47 | 52 | let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> |
... | ... | @@ -61,9 +66,91 @@ let assign_quantifiers (selectors,rule,weight) = |
61 | 66 | let quant = merge_quant categories quant in |
62 | 67 | selectors, (bracket,quant,syntax),(rule,weight) |
63 | 68 | |
64 | -let _ = | |
65 | - let lexicon = ENIAM_LCGlexiconParser.load_lexicon "resources/lexicon-pl.dic" in | |
66 | - List.rev (Xlist.rev_map lexicon assign_quantifiers) | |
69 | +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = | |
70 | + let semantics = try | |
71 | + let raised,rule = get_raised [] rule in | |
72 | + if rule <> [] then failwith "assign_semantics" else | |
73 | + RaisedSem(Xlist.map quant fst, raised) | |
74 | + with Not_found -> BasicSem(Xlist.map quant fst) in | |
75 | + selectors,(bracket,quant,syntax),(semantics,weight) | |
76 | + | |
77 | +let rec extract_category pat rev = function | |
78 | + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l | |
79 | + | [] -> raise Not_found | |
80 | + | |
81 | +let dict_of_grammar grammar = | |
82 | + (* print_endline "dict_of_grammar"; *) | |
83 | + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) -> | |
84 | + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in | |
85 | + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in | |
86 | + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else | |
87 | + let rule = selectors,(bracket,quant,syntax),semantics in | |
88 | + Xlist.fold poss dict (fun dict pos -> | |
89 | + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in | |
90 | + let dict2,l = | |
91 | + if lemmas = [] then dict2,rule :: l else | |
92 | + Xlist.fold lemmas dict2 (fun dict2 lemma -> | |
93 | + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in | |
94 | + StringMap.add dict pos (dict2,l))) | |
95 | + | |
96 | +let make_rules filename = | |
97 | + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in | |
98 | + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in | |
99 | + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in | |
100 | + dict_of_grammar lexicon | |
101 | + | |
102 | +let find_rules rules cats = | |
103 | + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith "find_rules 1" in | |
104 | + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in | |
105 | + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) -> | |
106 | + try | |
107 | + let cats = apply_selectors cats selectors in | |
108 | + (cats,syntax,semantics) :: rules | |
109 | + with Not_found -> rules) | |
110 | + | |
111 | +let assign_valence valence rules = | |
112 | + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) -> | |
113 | + (* Printf.printf "%s |valence|=%d\n" cats.lemma (Xlist.size valence); *) | |
114 | + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then | |
115 | + Xlist.fold valence l (fun l (selectors,schema) -> | |
116 | + try | |
117 | + let cats = apply_selectors cats selectors in | |
118 | + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l | |
119 | + with Not_found -> l) | |
120 | + else (cats,(bracket,quant,syntax),semantics) :: l) | |
121 | + | |
122 | +type labels = { | |
123 | + number: string; | |
124 | + case: string; | |
125 | + gender: string; | |
126 | + person: string; | |
127 | + aspect: string; | |
128 | +} | |
129 | + | |
130 | +let get_label e = function | |
131 | + Number -> e.number | |
132 | + | Case -> e.case | |
133 | + | Gender -> e.gender | |
134 | + | Person -> e.person | |
135 | + | Aspect -> e.aspect | |
136 | + | _ -> ENIAM_LCGreductions.get_variant_label () | |
137 | + | |
138 | +let get_labels () = { | |
139 | + number=ENIAM_LCGreductions.get_variant_label (); | |
140 | + case=ENIAM_LCGreductions.get_variant_label (); | |
141 | + gender=ENIAM_LCGreductions.get_variant_label (); | |
142 | + person=ENIAM_LCGreductions.get_variant_label (); | |
143 | + aspect=ENIAM_LCGreductions.get_variant_label (); | |
144 | +} | |
145 | + | |
146 | +let make_quantification e rules = | |
147 | + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) -> | |
148 | + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> | |
149 | + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in | |
150 | + let category = string_of_selector cat in | |
151 | + WithVar(category,t,get_label e cat,syntax)) in | |
152 | + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in | |
153 | + cats,syntax,semantics) | |
67 | 154 | |
68 | 155 | (*** |
69 | 156 | type rule2 = |
... | ... | @@ -114,27 +201,6 @@ let parse_quants_range quant = |
114 | 201 | (selectors,rule,weight) :: grammar)) |
115 | 202 | **) |
116 | 203 | |
117 | -let rec extract_category pat rev = function | |
118 | - (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l | |
119 | - | [] -> raise Not_found | |
120 | - | |
121 | -let dict_of_grammar grammar = | |
122 | - (* print_endline "dict_of_grammar"; *) | |
123 | - Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax,semantics),weight) -> | |
124 | - let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in | |
125 | - let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in | |
126 | - if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else | |
127 | - let rule = selectors,(bracket,quant,syntax,semantics),weight in | |
128 | - Xlist.fold poss dict (fun dict pos -> | |
129 | - let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in | |
130 | - let dict2,l = | |
131 | - if lemmas = [] then dict2,rule :: l else | |
132 | - Xlist.fold lemmas dict2 (fun dict2 lemma -> | |
133 | - StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in | |
134 | - StringMap.add dict pos (dict2,l))) | |
135 | - | |
136 | -(* let rules = dict_of_grammar ENIAM_LCGlexiconPL.grammar *) | |
137 | - | |
138 | 204 | (* let translate_negation = function |
139 | 205 | (Negation:negation) -> ["neg"] |
140 | 206 | | Aff -> ["aff"] |
... | ... | @@ -165,16 +231,7 @@ let dict_of_grammar grammar = |
165 | 231 | | GerAtrs(m,le,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a] |
166 | 232 | | NonPersAtrs(m,le,role,role_attr,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a] |
167 | 233 | | ComprepAtrs _ -> failwith "apply_valence_selectors" *) |
168 | - | |
169 | -let find_rules rules cats = | |
170 | - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith "find_rules 1" in | |
171 | - let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in | |
172 | - Xlist.fold rules [] (fun rules (selectors,(bracket,quant,syntax,semantics),weight) -> | |
173 | - try | |
174 | - let cats = apply_selectors cats selectors in | |
175 | - (cats,(bracket,quant,syntax,semantics),weight) :: rules | |
176 | - with Not_found -> rules) | |
177 | - | |
234 | + ***) | |
178 | 235 | (* FIXME: argumenty X i raised i inne *) |
179 | 236 | |
180 | 237 | (* let render_schema schema = |
... | ... | @@ -196,20 +253,9 @@ let find_rules rules cats = |
196 | 253 | | _ -> l) |
197 | 254 | else (cats,(bracket,quant,syntax,semantics),weight) :: l) *) |
198 | 255 | |
199 | -let assign_valence valence rules = | |
200 | - Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax,semantics),weight) -> | |
201 | - (* Printf.printf "%s |valence|=%d\n" cats.lemma (Xlist.size valence); *) | |
202 | - if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then | |
203 | - Xlist.fold valence l (fun l (selectors,schema) -> | |
204 | - try | |
205 | - let cats = apply_selectors cats selectors in | |
206 | - (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax,semantics),weight) :: l | |
207 | - with Not_found -> l) | |
208 | - else (cats,(bracket,quant,syntax,semantics),weight) :: l) | |
209 | - | |
210 | 256 | (* FIXME: ustawienie wartości symbol *) |
211 | 257 | (* FIXME: problem z atrybutami przy zamianie kolejności rzędników *) |
212 | -let make_node id orth lemma cat weight cat_list = | |
258 | +let make_node id orth lemma pos syntax weight cat_list is_raised = | |
213 | 259 | let attrs = Xlist.fold cat_list(*Xlist.rev_map quant fst*) [] (fun attrs -> function |
214 | 260 | | Lemma -> attrs |
215 | 261 | | Cat -> ("CAT",SubstVar "cat") :: attrs |
... | ... | @@ -230,53 +276,33 @@ let make_node id orth lemma cat weight cat_list = |
230 | 276 | | s -> (string_of_selector s, Dot) :: attrs) in |
231 | 277 | (* | "lex" -> ("LEX",Val "+") :: attrs *) |
232 | 278 | (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) |
233 | - {ENIAM_LCGrenderer.empty_node with orth=orth; lemma=lemma; pos=cat; weight=weight; id=id; attrs=List.rev attrs; args=Dot} | |
234 | - | |
235 | -type labels = { | |
236 | - number: string; | |
237 | - case: string; | |
238 | - gender: string; | |
239 | - person: string; | |
240 | - aspect: string; | |
241 | -} | |
242 | - | |
243 | -let get_label e = function | |
244 | - Number -> e.number | |
245 | - | Case -> e.case | |
246 | - | Gender -> e.gender | |
247 | - | Person -> e.person | |
248 | - | Aspect -> e.aspect | |
249 | - | _ -> ENIAM_LCGreductions.get_variant_label () | |
250 | - | |
251 | -let get_labels () = { | |
252 | - number=ENIAM_LCGreductions.get_variant_label (); | |
253 | - case=ENIAM_LCGreductions.get_variant_label (); | |
254 | - gender=ENIAM_LCGreductions.get_variant_label (); | |
255 | - person=ENIAM_LCGreductions.get_variant_label (); | |
256 | - aspect=ENIAM_LCGreductions.get_variant_label (); | |
257 | -} | |
258 | - | |
259 | -let make_quantification e rules = | |
260 | - Xlist.map rules (fun (cats,(bracket,quant,syntax,semantics),weight) -> | |
261 | - let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) -> | |
262 | - let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in | |
263 | - let category = string_of_selector cat in | |
264 | - WithVar(category,t,get_label e cat,syntax)) in | |
265 | - let syntax = if bracket then Bracket(true,true,syntax) else Bracket(false,false,syntax) in | |
266 | - cats,syntax,semantics,weight) | |
279 | + let symbol = if is_raised then | |
280 | + ENIAM_LCGrenderer.make_raised_symbol syntax | |
281 | + else ENIAM_LCGrenderer.make_symbol syntax in | |
282 | + {ENIAM_LCGrenderer.empty_node with | |
283 | + orth=orth; lemma=lemma; pos=pos; symbol=symbol; | |
284 | + weight=weight; id=id; attrs=List.rev attrs; args=Dot} | |
267 | 285 | |
268 | 286 | let make_term id orth rules = |
269 | - Xlist.map rules (fun (cats,syntax,semantics,weight) -> | |
287 | + Xlist.map rules (fun (cats,syntax,(semantics,weight)) -> | |
288 | + ENIAM_LCGrenderer.reset_variable_names (); | |
289 | + ENIAM_LCGrenderer.add_variable_numbers (); | |
270 | 290 | match semantics with |
271 | 291 | BasicSem cat_list -> |
272 | - let node = make_node id orth cats.lemma cats.pos weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list in | |
292 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in | |
273 | 293 | let semantics = ENIAM_LCGrenderer.make_term node syntax in |
274 | 294 | ENIAM_LCGrenderer.simplify (syntax,semantics) |
295 | + | RaisedSem(cat_list,outer_cat_list) -> | |
296 | + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *) | |
297 | + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in | |
298 | + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in | |
299 | + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in | |
300 | + ENIAM_LCGrenderer.simplify (syntax,semantics) | |
275 | 301 | | _ -> failwith "make_term: ni") |
276 | 302 | (*cats,bracket,quant,syntax,Dot*) |
277 | -(** | |
278 | - let create_entries id orth cats valence = | |
279 | - Xlist.fold cats [] (fun l cats -> | |
303 | + | |
304 | +let create_entries rules id orth cats valence = | |
305 | + Xlist.fold cats [] (fun l cats -> | |
280 | 306 | (* variable_name_ref := []; *) |
281 | 307 | if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else |
282 | 308 | if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else |
... | ... | @@ -291,7 +317,7 @@ let make_term id orth rules = |
291 | 317 | let rules = make_term id orth rules in |
292 | 318 | (* print_endline "create_entries 5"; *) |
293 | 319 | rules @ l) |
294 | - **)(* | |
320 | + (* | |
295 | 321 | (* FIXME: poprawić i dodać moduł testujący *) |
296 | 322 | module OrderedIntInt = struct |
297 | 323 | type t = int * int |
... | ... | @@ -314,4 +340,3 @@ let create (paths,last) tokens lex_sems = |
314 | 340 | let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in |
315 | 341 | chart |
316 | 342 | *) |
317 | - ***) | |
... | ... |
LCGlexicon/ENIAM_LCGlexiconParser.ml
... | ... | @@ -282,6 +282,12 @@ let parse_quantifiers tokens = |
282 | 282 | | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'") |
283 | 283 | | [] -> failwith "parse_quantifiers: no token") |
284 | 284 | |
285 | +let parse_raised tokens = | |
286 | + Xlist.map (split_comma [] [] tokens) (function | |
287 | + [cat] -> selector_of_string cat | |
288 | + | t :: _ -> failwith ("parse_raised: unexpected token '" ^ t ^ "'") | |
289 | + | [] -> failwith "parse_raised: no token") | |
290 | + | |
285 | 291 | let rec parse_rule atoms = function |
286 | 292 | "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens |
287 | 293 | | "QUANT" :: "[" :: tokens -> |
... | ... | @@ -289,7 +295,7 @@ let rec parse_rule atoms = function |
289 | 295 | Quant(parse_quantifiers quant) :: parse_rule atoms tokens |
290 | 296 | | "RAISED" :: "[" :: tokens -> |
291 | 297 | let raised,tokens = find_right_bracket [] tokens in |
292 | - Raised(raised) :: parse_rule atoms tokens | |
298 | + Raised(parse_raised raised) :: parse_rule atoms tokens | |
293 | 299 | | tokens -> |
294 | 300 | (* print_prefix 100 tokens; *) |
295 | 301 | [Syntax(parse_syntax atoms tokens)] |
... | ... |
LCGlexicon/ENIAM_LCGlexiconTypes.ml
... | ... | @@ -37,6 +37,12 @@ end |
37 | 37 | |
38 | 38 | module SelectorMap=Xmap.Make(OrderedSelector) |
39 | 39 | |
40 | +type rule = | |
41 | + Bracket | |
42 | + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list | |
43 | + | Raised of selector list | |
44 | + | Syntax of ENIAM_LCGtypes.grammar_symbol | |
45 | + | |
40 | 46 | type rule_sem = |
41 | 47 | BasicSem of selector list |
42 | 48 | | RaisedSem of selector list * selector list |
... | ... | @@ -46,12 +52,6 @@ type rule_sem = |
46 | 52 | |
47 | 53 | type selector_relation = Eq | Neq (*| StrictEq*) |
48 | 54 | |
49 | -type rule = | |
50 | - Bracket | |
51 | - | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list | |
52 | - | Raised of string list | |
53 | - | Syntax of ENIAM_LCGtypes.grammar_symbol | |
54 | - | |
55 | 55 | |
56 | 56 | (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) |
57 | 57 | (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) |
... | ... |
LCGlexicon/resources/lexicon-pl.dic
... | ... | @@ -151,36 +151,36 @@ pos=ger: np*number*case*gender*person{schema}{\(1+qub),/(1+inclusion)}; |
151 | 151 | pos=pact: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)}; |
152 | 152 | pos=ppas: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)}; |
153 | 153 | |
154 | -pos=fin|bedzie,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; | |
155 | -pos=fin|bedzie,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
156 | -pos=fin,negation=aff,mood=imperative: ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\(1+qub),/(1+inclusion)}; | |
157 | -pos=fin,negation=neg,mood=imperative: ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\(1+qub),/(1+inclusion)}{\nie}; | |
158 | -pos=impt|imps,negation=aff: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; | |
159 | -pos=impt|imps,negation=neg: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
160 | - | |
161 | -pos=pred,negation=aff,tense=pres: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; | |
162 | -pos=pred,negation=neg,tense=pres: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
163 | -pos=pred,negation=aff,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
164 | -pos=pred,negation=neg,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
165 | -pos=pred,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
166 | -pos=pred,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
167 | - | |
168 | -pos=praet|winien,person=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}; | |
169 | -pos=praet|winien,person=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
170 | -pos=praet|winien,person!=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}; | |
171 | -pos=praet|winien,person!=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
172 | - | |
173 | -pos=praet|winien,person=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|by}{\(1+qub),/(1+inclusion)}; | |
174 | -pos=praet|winien,person=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|by}{\(1+qub),/(1+inclusion)}{\nie}; | |
175 | -pos=praet|winien,person!=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}; | |
176 | -pos=praet|winien,person!=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}{\nie}; | |
177 | - | |
178 | -pos=praet|winien,negation=aff,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
179 | - | |
180 | -pos=winien,person=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
181 | -pos=winien,person=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
182 | -pos=winien,person!=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
183 | -pos=winien,person!=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
154 | +pos=fin|bedzie,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; | |
155 | +pos=fin|bedzie,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
156 | +pos=fin,negation=aff,mood=imperative: ip*number*gender*person{/(1+int),schema,|aux-imp}{\(1+qub),/(1+inclusion)}; | |
157 | +pos=fin,negation=neg,mood=imperative: ip*number*gender*person{/(1+int),schema,|aux-imp}{\(1+qub),/(1+inclusion)}{\nie}; | |
158 | +pos=impt|imps,negation=aff: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; | |
159 | +pos=impt|imps,negation=neg: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
160 | + | |
161 | +pos=pred,negation=aff,tense=pres: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; | |
162 | +pos=pred,negation=neg,tense=pres: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
163 | +pos=pred,negation=aff,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
164 | +pos=pred,negation=neg,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
165 | +pos=pred,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
166 | +pos=pred,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
167 | + | |
168 | +pos=praet|winien,person=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}; | |
169 | +pos=praet|winien,person=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie}; | |
170 | +pos=praet|winien,person!=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}; | |
171 | +pos=praet|winien,person!=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
172 | + | |
173 | +pos=praet|winien,person=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int),schema,|by}{\(1+qub),/(1+inclusion)}; | |
174 | +pos=praet|winien,person=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int),schema,|by}{\(1+qub),/(1+inclusion)}{\nie}; | |
175 | +pos=praet|winien,person!=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}; | |
176 | +pos=praet|winien,person!=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}{\nie}; | |
177 | + | |
178 | +pos=praet|winien,negation=aff,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
179 | + | |
180 | +pos=winien,person=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
181 | +pos=winien,person=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
182 | +pos=winien,person!=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}; | |
183 | +pos=winien,person!=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; | |
184 | 184 | |
185 | 185 | pos=bedzie: aux-fut*number*gender*person; |
186 | 186 | lemma=być,pos=praet: aux-past*number*gender*person; |
... | ... |
LCGlexicon/test.ml
... | ... | @@ -20,8 +20,10 @@ |
20 | 20 | open ENIAM_LCGlexiconTypes |
21 | 21 | open ENIAM_LCGtypes |
22 | 22 | |
23 | +let rules = ENIAM_LCGlexicon.make_rules "resources/lexicon-pl.dic" | |
24 | + | |
23 | 25 | let examples = [ |
24 | - "kot",[ | |
26 | + (*"kot",[ | |
25 | 27 | 1, 0, 1, "","<sentence>","interp", [],false; |
26 | 28 | 2, 1, 2, "","<clause>","interp", [],false; |
27 | 29 | 3, 2, 3, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true; |
... | ... | @@ -40,7 +42,18 @@ let examples = [ |
40 | 42 | 7, 4, 5, "kota","kota","subst", [["sg"];["nom"];["f"]],false; |
41 | 43 | 8, 5, 6, "","</clause>","interp", [],false; |
42 | 44 | 9, 6, 7, ".","</sentence>","interp", [],false; |
43 | - ],7; | |
45 | + ],7;*) | |
46 | + "jaki",[ | |
47 | + 1, 0, 1, "","<sentence>","interp", [],false; | |
48 | + 2, 1, 2, "","<clause>","interp", [],false; | |
49 | + 3, 2, 3, "Jakiego","jaki","adj", [["sg"];["gen";"acc"];["m1";"m2"];["pos"]],false; | |
50 | + 4, 3, 4, "kota","kot","subst", [["sg"];["gen";"acc"];["m1";"m2"]],false; | |
51 | + 5, 4, 5, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true; | |
52 | + 6, 5, 6, "ma","mieć","fin", [["sg"];["ter"];["imperf"]],false; | |
53 | + 7, 6, 7, "?","?","interp", [],false; | |
54 | + 8, 7, 8, "","</clause>","interp", [],false; | |
55 | + 9, 8, 9, ".","</sentence>","interp", [],false; | |
56 | + ],9; | |
44 | 57 | ] |
45 | 58 | |
46 | 59 | let valence = [ |
... | ... | @@ -49,16 +62,18 @@ let valence = [ |
49 | 62 | Both,Plus[One;Tensor[Atom "np";Top;Atom "acc";Top;Top]]]; |
50 | 63 | [Lemma,Eq,["mieć"];Pos,Eq,["fin"];Negation,Eq,["neg"];Mood,Eq,["indicative"]],[Both,Plus[One;Tensor[Atom "np";AVar "number";Atom "nom";AVar "gender";AVar "person"]]; |
51 | 64 | Both,Plus[One;Tensor[Atom "np";Top;Atom "gen";Top;Top]]]; |
52 | - [Lemma,Eq,["kot"];Pos,Eq,["subst"]],[]; | |
65 | + [Lemma,Eq,["kot"];Pos,Eq,["subst"]],[Both,Plus[One;Tensor[Atom "adjp";AVar "number";AVar "case";AVar "gender"]]]; | |
53 | 66 | [Lemma,Eq,["kota"];Pos,Eq,["subst"]],[]; |
54 | 67 | ] |
55 | -(** | |
68 | + | |
56 | 69 | let create_chart valence tokens last = |
70 | + ENIAM_LCGrenderer.reset_variable_numbers (); | |
57 | 71 | let chart = ENIAM_LCGchart.make last in |
58 | 72 | let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) -> |
59 | 73 | ENIAM_LCGrenderer.reset_variable_names (); |
74 | + ENIAM_LCGrenderer.add_variable_numbers (); | |
60 | 75 | let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in |
61 | - let l = ENIAM_LCGrendererPL.create_entries id orth cats valence in | |
76 | + let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence in | |
62 | 77 | ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in |
63 | 78 | chart |
64 | 79 | |
... | ... | @@ -89,6 +104,6 @@ let test_example valence (name,tokens,last) = |
89 | 104 | ()) |
90 | 105 | else print_endline "not reduced") |
91 | 106 | else print_endline "not parsed" |
92 | - **) | |
93 | -(*let _ = | |
94 | - Xlist.iter examples (test_example valence)*) | |
107 | + | |
108 | +let _ = | |
109 | + Xlist.iter examples (test_example valence) | |
... | ... |
LCGparser/ENIAM_LCGrenderer.ml
... | ... | @@ -97,6 +97,14 @@ let rec substitute_substvar v g = function |
97 | 97 | let empty_node = { |
98 | 98 | orth=""; lemma=""; pos=""; weight=0.; id=0; symbol=Dot; arg_symbol=Dot; attrs=[]; args=Dot;} |
99 | 99 | |
100 | +let variable_num_ref = ref 0 | |
101 | + | |
102 | +let reset_variable_numbers () = | |
103 | + variable_num_ref := 0 | |
104 | + | |
105 | +let add_variable_numbers () = | |
106 | + incr variable_num_ref | |
107 | + | |
100 | 108 | let variable_name_ref = ref [] |
101 | 109 | |
102 | 110 | let reset_variable_names () = |
... | ... | @@ -109,7 +117,7 @@ let rec add_variable_name = function |
109 | 117 | |
110 | 118 | let get_variable_name () = |
111 | 119 | variable_name_ref := add_variable_name (!variable_name_ref); |
112 | - String.concat "" (List.rev (!variable_name_ref)) | |
120 | + String.concat "" (List.rev (!variable_name_ref)) ^ (string_of_int !variable_num_ref) | |
113 | 121 | |
114 | 122 | let make_arg_symbol l = |
115 | 123 | Tuple(Xlist.map l (function |
... | ... |
LCGparser/test.ml
... | ... | @@ -24,7 +24,7 @@ type entry = |
24 | 24 | | Raised of grammar_symbol |
25 | 25 | |
26 | 26 | let examples = [ |
27 | -(* "kot",[ | |
27 | + "kot",[ | |
28 | 28 | 0, 1, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); |
29 | 29 | 1, 2, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); |
30 | 30 | (* 1, 2, "ma","mieć","fin", Basic(Imp(Imp(Tensor[Atom "ip"],Backward,Tensor[Atom "np"; Atom "nom"]),Forward,Tensor[Atom "np"; Atom "nom"])); *) |
... | ... | @@ -39,7 +39,7 @@ let examples = [ |
39 | 39 | 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])]))); |
40 | 40 | (* 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) |
41 | 41 | 4, 5, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"])); |
42 | - ],5;*) | |
42 | + ],5; | |
43 | 43 | |
44 | 44 | "jaki",[ |
45 | 45 | 0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"], |
... | ... | @@ -49,16 +49,17 @@ let examples = [ |
49 | 49 | (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",Imp(Tensor[Atom "np"; AVar "case"],Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])))); *) |
50 | 50 | (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) |
51 | 51 | 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); |
52 | - 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); | |
53 | 52 | 3, 4, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); |
54 | 53 | 4, 5, "?","?","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "cp";Atom "int";Top])); |
55 | 54 | ],5 |
56 | 55 | ] |
57 | 56 | |
58 | 57 | let create_chart tokens last = |
58 | + ENIAM_LCGrenderer.reset_variable_numbers (); | |
59 | 59 | let chart = ENIAM_LCGchart.make last in |
60 | 60 | let chart = Xlist.fold tokens chart (fun chart (lnode,rnode,orth,lemma,pos,entry) -> |
61 | 61 | ENIAM_LCGrenderer.reset_variable_names (); |
62 | + ENIAM_LCGrenderer.add_variable_numbers (); | |
62 | 63 | let syntax,semantics = match entry with |
63 | 64 | Basic syntax -> |
64 | 65 | let node = {ENIAM_LCGrenderer.empty_node with |
... | ... |