Commit a1931d44d577a0bbf528654032ceb8468dde9ec3

Authored by Wojciech Jaworski
1 parent 1af3e9b5

przetwarzanie zdań z podnoszeniem typu za pomocą leksykonu

LCGlexicon/ENIAM_LCGlexicon.ml
@@ -42,6 +42,11 @@ let rec get_bracket rev = function @@ -42,6 +42,11 @@ let rec get_bracket rev = function
42 | t :: rule -> get_bracket (t :: rev) rule 42 | t :: rule -> get_bracket (t :: rev) rule
43 | [] -> false, List.rev rev 43 | [] -> false, List.rev rev
44 44
  45 +let rec get_raised rev = function
  46 + Raised raised :: rule -> raised, (List.rev rev) @ rule
  47 + | t :: rule -> get_raised (t :: rev) rule
  48 + | [] -> raise Not_found
  49 +
45 let merge_quant pos_quants quants = 50 let merge_quant pos_quants quants =
46 let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in 51 let map = Xlist.fold quants SelectorMap.empty (fun map (k,v) -> SelectorMap.add map k v) in
47 let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) -> 52 let l,map = Xlist.fold pos_quants ([],map) (fun (l,map) (cat,v) ->
@@ -61,9 +66,91 @@ let assign_quantifiers (selectors,rule,weight) = @@ -61,9 +66,91 @@ let assign_quantifiers (selectors,rule,weight) =
61 let quant = merge_quant categories quant in 66 let quant = merge_quant categories quant in
62 selectors, (bracket,quant,syntax),(rule,weight) 67 selectors, (bracket,quant,syntax),(rule,weight)
63 68
64 -let _ =  
65 - let lexicon = ENIAM_LCGlexiconParser.load_lexicon "resources/lexicon-pl.dic" in  
66 - List.rev (Xlist.rev_map lexicon assign_quantifiers) 69 +let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) =
  70 + let semantics = try
  71 + let raised,rule = get_raised [] rule in
  72 + if rule <> [] then failwith "assign_semantics" else
  73 + RaisedSem(Xlist.map quant fst, raised)
  74 + with Not_found -> BasicSem(Xlist.map quant fst) in
  75 + selectors,(bracket,quant,syntax),(semantics,weight)
  76 +
  77 +let rec extract_category pat rev = function
  78 + (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l
  79 + | [] -> raise Not_found
  80 +
  81 +let dict_of_grammar grammar =
  82 + (* print_endline "dict_of_grammar"; *)
  83 + Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax),semantics) ->
  84 + let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in
  85 + let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in
  86 + if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else
  87 + let rule = selectors,(bracket,quant,syntax),semantics in
  88 + Xlist.fold poss dict (fun dict pos ->
  89 + let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in
  90 + let dict2,l =
  91 + if lemmas = [] then dict2,rule :: l else
  92 + Xlist.fold lemmas dict2 (fun dict2 lemma ->
  93 + StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in
  94 + StringMap.add dict pos (dict2,l)))
  95 +
  96 +let make_rules filename =
  97 + let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in
  98 + let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in
  99 + let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in
  100 + dict_of_grammar lexicon
  101 +
  102 +let find_rules rules cats =
  103 + let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith "find_rules 1" in
  104 + let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in
  105 + Xlist.fold rules [] (fun rules (selectors,syntax,semantics) ->
  106 + try
  107 + let cats = apply_selectors cats selectors in
  108 + (cats,syntax,semantics) :: rules
  109 + with Not_found -> rules)
  110 +
  111 +let assign_valence valence rules =
  112 + Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax),semantics) ->
  113 + (* Printf.printf "%s |valence|=%d\n" cats.lemma (Xlist.size valence); *)
  114 + if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then
  115 + Xlist.fold valence l (fun l (selectors,schema) ->
  116 + try
  117 + let cats = apply_selectors cats selectors in
  118 + (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax),semantics) :: l
  119 + with Not_found -> l)
  120 + else (cats,(bracket,quant,syntax),semantics) :: l)
  121 +
  122 +type labels = {
  123 + number: string;
  124 + case: string;
  125 + gender: string;
  126 + person: string;
  127 + aspect: string;
  128 +}
  129 +
  130 +let get_label e = function
  131 + Number -> e.number
  132 + | Case -> e.case
  133 + | Gender -> e.gender
  134 + | Person -> e.person
  135 + | Aspect -> e.aspect
  136 + | _ -> ENIAM_LCGreductions.get_variant_label ()
  137 +
  138 +let get_labels () = {
  139 + number=ENIAM_LCGreductions.get_variant_label ();
  140 + case=ENIAM_LCGreductions.get_variant_label ();
  141 + gender=ENIAM_LCGreductions.get_variant_label ();
  142 + person=ENIAM_LCGreductions.get_variant_label ();
  143 + aspect=ENIAM_LCGreductions.get_variant_label ();
  144 +}
  145 +
  146 +let make_quantification e rules =
  147 + Xlist.map rules (fun (cats,(bracket,quant,syntax),semantics) ->
  148 + let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) ->
  149 + let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in
  150 + let category = string_of_selector cat in
  151 + WithVar(category,t,get_label e cat,syntax)) in
  152 + let syntax = if bracket then ENIAM_LCGtypes.Bracket(true,true,syntax) else ENIAM_LCGtypes.Bracket(false,false,syntax) in
  153 + cats,syntax,semantics)
67 154
68 (*** 155 (***
69 type rule2 = 156 type rule2 =
@@ -114,27 +201,6 @@ let parse_quants_range quant = @@ -114,27 +201,6 @@ let parse_quants_range quant =
114 (selectors,rule,weight) :: grammar)) 201 (selectors,rule,weight) :: grammar))
115 **) 202 **)
116 203
117 -let rec extract_category pat rev = function  
118 - (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l  
119 - | [] -> raise Not_found  
120 -  
121 -let dict_of_grammar grammar =  
122 - (* print_endline "dict_of_grammar"; *)  
123 - Xlist.fold grammar StringMap.empty (fun dict (selectors,(bracket,quant,syntax,semantics),weight) ->  
124 - let pos_rel,poss,selectors = try extract_category Pos [] selectors with Not_found -> failwith "dict_of_grammar 1" in  
125 - let lemma_rel,lemmas,selectors = try extract_category Lemma [] selectors with Not_found -> Eq,[],selectors in  
126 - if pos_rel <> Eq || lemma_rel <> Eq then failwith "dict_of_grammar 2" else  
127 - let rule = selectors,(bracket,quant,syntax,semantics),weight in  
128 - Xlist.fold poss dict (fun dict pos ->  
129 - let dict2,l = try StringMap.find dict pos with Not_found -> StringMap.empty,[] in  
130 - let dict2,l =  
131 - if lemmas = [] then dict2,rule :: l else  
132 - Xlist.fold lemmas dict2 (fun dict2 lemma ->  
133 - StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in  
134 - StringMap.add dict pos (dict2,l)))  
135 -  
136 -(* let rules = dict_of_grammar ENIAM_LCGlexiconPL.grammar *)  
137 -  
138 (* let translate_negation = function 204 (* let translate_negation = function
139 (Negation:negation) -> ["neg"] 205 (Negation:negation) -> ["neg"]
140 | Aff -> ["aff"] 206 | Aff -> ["aff"]
@@ -165,16 +231,7 @@ let dict_of_grammar grammar = @@ -165,16 +231,7 @@ let dict_of_grammar grammar =
165 | GerAtrs(m,le,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a] 231 | GerAtrs(m,le,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a]
166 | NonPersAtrs(m,le,role,role_attr,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a] 232 | NonPersAtrs(m,le,role,role_attr,neg,a) -> [Negation,Eq,translate_negation neg;Aspect,Eq,translate_aspect a]
167 | ComprepAtrs _ -> failwith "apply_valence_selectors" *) 233 | ComprepAtrs _ -> failwith "apply_valence_selectors" *)
168 -  
169 -let find_rules rules cats =  
170 - let lex_rules,rules = try StringMap.find rules cats.pos with Not_found -> failwith "find_rules 1" in  
171 - let rules = try StringMap.find lex_rules cats.lemma @ rules with Not_found -> rules in  
172 - Xlist.fold rules [] (fun rules (selectors,(bracket,quant,syntax,semantics),weight) ->  
173 - try  
174 - let cats = apply_selectors cats selectors in  
175 - (cats,(bracket,quant,syntax,semantics),weight) :: rules  
176 - with Not_found -> rules)  
177 - 234 + ***)
178 (* FIXME: argumenty X i raised i inne *) 235 (* FIXME: argumenty X i raised i inne *)
179 236
180 (* let render_schema schema = 237 (* let render_schema schema =
@@ -196,20 +253,9 @@ let find_rules rules cats = @@ -196,20 +253,9 @@ let find_rules rules cats =
196 | _ -> l) 253 | _ -> l)
197 else (cats,(bracket,quant,syntax,semantics),weight) :: l) *) 254 else (cats,(bracket,quant,syntax,semantics),weight) :: l) *)
198 255
199 -let assign_valence valence rules =  
200 - Xlist.fold rules [] (fun l (cats,(bracket,quant,syntax,semantics),weight) ->  
201 - (* Printf.printf "%s |valence|=%d\n" cats.lemma (Xlist.size valence); *)  
202 - if ENIAM_LCGrenderer.count_avar "schema" syntax > 0 then  
203 - Xlist.fold valence l (fun l (selectors,schema) ->  
204 - try  
205 - let cats = apply_selectors cats selectors in  
206 - (cats,(bracket,quant,ENIAM_LCGrenderer.substitute_schema "schema" schema syntax,semantics),weight) :: l  
207 - with Not_found -> l)  
208 - else (cats,(bracket,quant,syntax,semantics),weight) :: l)  
209 -  
210 (* FIXME: ustawienie wartości symbol *) 256 (* FIXME: ustawienie wartości symbol *)
211 (* FIXME: problem z atrybutami przy zamianie kolejności rzędników *) 257 (* FIXME: problem z atrybutami przy zamianie kolejności rzędników *)
212 -let make_node id orth lemma cat weight cat_list = 258 +let make_node id orth lemma pos syntax weight cat_list is_raised =
213 let attrs = Xlist.fold cat_list(*Xlist.rev_map quant fst*) [] (fun attrs -> function 259 let attrs = Xlist.fold cat_list(*Xlist.rev_map quant fst*) [] (fun attrs -> function
214 | Lemma -> attrs 260 | Lemma -> attrs
215 | Cat -> ("CAT",SubstVar "cat") :: attrs 261 | Cat -> ("CAT",SubstVar "cat") :: attrs
@@ -230,53 +276,33 @@ let make_node id orth lemma cat weight cat_list = @@ -230,53 +276,33 @@ let make_node id orth lemma cat weight cat_list =
230 | s -> (string_of_selector s, Dot) :: attrs) in 276 | s -> (string_of_selector s, Dot) :: attrs) in
231 (* | "lex" -> ("LEX",Val "+") :: attrs *) 277 (* | "lex" -> ("LEX",Val "+") :: attrs *)
232 (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *) 278 (* | s -> failwith ("make_node: " ^ (string_of_selector s))) in *)
233 - {ENIAM_LCGrenderer.empty_node with orth=orth; lemma=lemma; pos=cat; weight=weight; id=id; attrs=List.rev attrs; args=Dot}  
234 -  
235 -type labels = {  
236 - number: string;  
237 - case: string;  
238 - gender: string;  
239 - person: string;  
240 - aspect: string;  
241 -}  
242 -  
243 -let get_label e = function  
244 - Number -> e.number  
245 - | Case -> e.case  
246 - | Gender -> e.gender  
247 - | Person -> e.person  
248 - | Aspect -> e.aspect  
249 - | _ -> ENIAM_LCGreductions.get_variant_label ()  
250 -  
251 -let get_labels () = {  
252 - number=ENIAM_LCGreductions.get_variant_label ();  
253 - case=ENIAM_LCGreductions.get_variant_label ();  
254 - gender=ENIAM_LCGreductions.get_variant_label ();  
255 - person=ENIAM_LCGreductions.get_variant_label ();  
256 - aspect=ENIAM_LCGreductions.get_variant_label ();  
257 -}  
258 -  
259 -let make_quantification e rules =  
260 - Xlist.map rules (fun (cats,(bracket,quant,syntax,semantics),weight) ->  
261 - let syntax = Xlist.fold (List.rev quant) syntax (fun syntax (cat,t) ->  
262 - let t = if t = Top then ENIAM_LCGrenderer.make_quant_restriction (match_selector cats cat) else t in  
263 - let category = string_of_selector cat in  
264 - WithVar(category,t,get_label e cat,syntax)) in  
265 - let syntax = if bracket then Bracket(true,true,syntax) else Bracket(false,false,syntax) in  
266 - cats,syntax,semantics,weight) 279 + let symbol = if is_raised then
  280 + ENIAM_LCGrenderer.make_raised_symbol syntax
  281 + else ENIAM_LCGrenderer.make_symbol syntax in
  282 + {ENIAM_LCGrenderer.empty_node with
  283 + orth=orth; lemma=lemma; pos=pos; symbol=symbol;
  284 + weight=weight; id=id; attrs=List.rev attrs; args=Dot}
267 285
268 let make_term id orth rules = 286 let make_term id orth rules =
269 - Xlist.map rules (fun (cats,syntax,semantics,weight) -> 287 + Xlist.map rules (fun (cats,syntax,(semantics,weight)) ->
  288 + ENIAM_LCGrenderer.reset_variable_names ();
  289 + ENIAM_LCGrenderer.add_variable_numbers ();
270 match semantics with 290 match semantics with
271 BasicSem cat_list -> 291 BasicSem cat_list ->
272 - let node = make_node id orth cats.lemma cats.pos weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list in 292 + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list false in
273 let semantics = ENIAM_LCGrenderer.make_term node syntax in 293 let semantics = ENIAM_LCGrenderer.make_term node syntax in
274 ENIAM_LCGrenderer.simplify (syntax,semantics) 294 ENIAM_LCGrenderer.simplify (syntax,semantics)
  295 + | RaisedSem(cat_list,outer_cat_list) ->
  296 + (* FIXME: jakie atrybuty powinien mieć outer node (w szczególności jaką wagę?) *)
  297 + let node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) cat_list true in
  298 + let outer_node = make_node id orth cats.lemma cats.pos syntax weight(*+.token.ENIAMtokenizerTypes.weight*) outer_cat_list false in
  299 + let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in
  300 + ENIAM_LCGrenderer.simplify (syntax,semantics)
275 | _ -> failwith "make_term: ni") 301 | _ -> failwith "make_term: ni")
276 (*cats,bracket,quant,syntax,Dot*) 302 (*cats,bracket,quant,syntax,Dot*)
277 -(**  
278 - let create_entries id orth cats valence =  
279 - Xlist.fold cats [] (fun l cats -> 303 +
  304 +let create_entries rules id orth cats valence =
  305 + Xlist.fold cats [] (fun l cats ->
280 (* variable_name_ref := []; *) 306 (* variable_name_ref := []; *)
281 if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else 307 if cats.pos="interp" && cats.lemma="<clause>" then (BracketSet(Forward),Dot) :: l else
282 if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else 308 if cats.pos="interp" && cats.lemma="</clause>" then (BracketSet(Backward),Dot) :: l else
@@ -291,7 +317,7 @@ let make_term id orth rules = @@ -291,7 +317,7 @@ let make_term id orth rules =
291 let rules = make_term id orth rules in 317 let rules = make_term id orth rules in
292 (* print_endline "create_entries 5"; *) 318 (* print_endline "create_entries 5"; *)
293 rules @ l) 319 rules @ l)
294 - **)(* 320 + (*
295 (* FIXME: poprawić i dodać moduł testujący *) 321 (* FIXME: poprawić i dodać moduł testujący *)
296 module OrderedIntInt = struct 322 module OrderedIntInt = struct
297 type t = int * int 323 type t = int * int
@@ -314,4 +340,3 @@ let create (paths,last) tokens lex_sems = @@ -314,4 +340,3 @@ let create (paths,last) tokens lex_sems =
314 let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in 340 let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in
315 chart 341 chart
316 *) 342 *)
317 - ***)  
LCGlexicon/ENIAM_LCGlexiconParser.ml
@@ -282,6 +282,12 @@ let parse_quantifiers tokens = @@ -282,6 +282,12 @@ let parse_quantifiers tokens =
282 | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'") 282 | t :: _ -> failwith ("parse_quantifiers: unexpected token '" ^ t ^ "'")
283 | [] -> failwith "parse_quantifiers: no token") 283 | [] -> failwith "parse_quantifiers: no token")
284 284
  285 +let parse_raised tokens =
  286 + Xlist.map (split_comma [] [] tokens) (function
  287 + [cat] -> selector_of_string cat
  288 + | t :: _ -> failwith ("parse_raised: unexpected token '" ^ t ^ "'")
  289 + | [] -> failwith "parse_raised: no token")
  290 +
285 let rec parse_rule atoms = function 291 let rec parse_rule atoms = function
286 "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens 292 "BRACKET" :: tokens -> Bracket :: parse_rule atoms tokens
287 | "QUANT" :: "[" :: tokens -> 293 | "QUANT" :: "[" :: tokens ->
@@ -289,7 +295,7 @@ let rec parse_rule atoms = function @@ -289,7 +295,7 @@ let rec parse_rule atoms = function
289 Quant(parse_quantifiers quant) :: parse_rule atoms tokens 295 Quant(parse_quantifiers quant) :: parse_rule atoms tokens
290 | "RAISED" :: "[" :: tokens -> 296 | "RAISED" :: "[" :: tokens ->
291 let raised,tokens = find_right_bracket [] tokens in 297 let raised,tokens = find_right_bracket [] tokens in
292 - Raised(raised) :: parse_rule atoms tokens 298 + Raised(parse_raised raised) :: parse_rule atoms tokens
293 | tokens -> 299 | tokens ->
294 (* print_prefix 100 tokens; *) 300 (* print_prefix 100 tokens; *)
295 [Syntax(parse_syntax atoms tokens)] 301 [Syntax(parse_syntax atoms tokens)]
LCGlexicon/ENIAM_LCGlexiconTypes.ml
@@ -37,6 +37,12 @@ end @@ -37,6 +37,12 @@ end
37 37
38 module SelectorMap=Xmap.Make(OrderedSelector) 38 module SelectorMap=Xmap.Make(OrderedSelector)
39 39
  40 +type rule =
  41 + Bracket
  42 + | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list
  43 + | Raised of selector list
  44 + | Syntax of ENIAM_LCGtypes.grammar_symbol
  45 +
40 type rule_sem = 46 type rule_sem =
41 BasicSem of selector list 47 BasicSem of selector list
42 | RaisedSem of selector list * selector list 48 | RaisedSem of selector list * selector list
@@ -46,12 +52,6 @@ type rule_sem = @@ -46,12 +52,6 @@ type rule_sem =
46 52
47 type selector_relation = Eq | Neq (*| StrictEq*) 53 type selector_relation = Eq | Neq (*| StrictEq*)
48 54
49 -type rule =  
50 - Bracket  
51 - | Quant of (selector * ENIAM_LCGtypes.internal_grammar_symbol) list  
52 - | Raised of string list  
53 - | Syntax of ENIAM_LCGtypes.grammar_symbol  
54 -  
55 55
56 (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *) 56 (* x="s" oznacza, że żeby reguła została użyta token musi mieć "s" jako jedną z wartości atrybutu x, reguła zostanie wykonana dla x z usuniętymi pozostałymi wartościami *)
57 (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *) 57 (* x!="s" oznacza, że żeby reguła została użyta token musi mieć jako jedną z wartości atrybutu x symbol inny od "s", reguła zostanie wykonana dla x z usuniętą wartością "s" *)
LCGlexicon/resources/lexicon-pl.dic
@@ -151,36 +151,36 @@ pos=ger: np*number*case*gender*person{schema}{\(1+qub),/(1+inclusion)}; @@ -151,36 +151,36 @@ pos=ger: np*number*case*gender*person{schema}{\(1+qub),/(1+inclusion)};
151 pos=pact: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)}; 151 pos=pact: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)};
152 pos=ppas: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)}; 152 pos=ppas: adjp*number*case*gender{schema}{\(1+qub),/(1+inclusion)};
153 153
154 -pos=fin|bedzie,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)};  
155 -pos=fin|bedzie,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie};  
156 -pos=fin,negation=aff,mood=imperative: ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\(1+qub),/(1+inclusion)};  
157 -pos=fin,negation=neg,mood=imperative: ip*number*gender*person{/(1+int)}{schema,|aux-imp}{\(1+qub),/(1+inclusion)}{\nie};  
158 -pos=impt|imps,negation=aff: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)};  
159 -pos=impt|imps,negation=neg: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie};  
160 -  
161 -pos=pred,negation=aff,tense=pres: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)};  
162 -pos=pred,negation=neg,tense=pres: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie};  
163 -pos=pred,negation=aff,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)};  
164 -pos=pred,negation=neg,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};  
165 -pos=pred,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)};  
166 -pos=pred,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};  
167 -  
168 -pos=praet|winien,person=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)};  
169 -pos=praet|winien,person=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema}{\(1+qub),/(1+inclusion)}{\nie};  
170 -pos=praet|winien,person!=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\(1+qub),/(1+inclusion)};  
171 -pos=praet|winien,person!=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}{\nie};  
172 -  
173 -pos=praet|winien,person=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|by}{\(1+qub),/(1+inclusion)};  
174 -pos=praet|winien,person=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|by}{\(1+qub),/(1+inclusion)}{\nie};  
175 -pos=praet|winien,person!=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)};  
176 -pos=praet|winien,person!=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}{\nie};  
177 -  
178 -pos=praet|winien,negation=aff,tense=fut: ip*number*gender*person{/(1+int)}{schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)};  
179 -  
180 -pos=winien,person=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)};  
181 -pos=winien,person=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};  
182 -pos=winien,person!=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)};  
183 -pos=winien,person!=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int)}{schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie}; 154 +pos=fin|bedzie,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)};
  155 +pos=fin|bedzie,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie};
  156 +pos=fin,negation=aff,mood=imperative: ip*number*gender*person{/(1+int),schema,|aux-imp}{\(1+qub),/(1+inclusion)};
  157 +pos=fin,negation=neg,mood=imperative: ip*number*gender*person{/(1+int),schema,|aux-imp}{\(1+qub),/(1+inclusion)}{\nie};
  158 +pos=impt|imps,negation=aff: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)};
  159 +pos=impt|imps,negation=neg: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie};
  160 +
  161 +pos=pred,negation=aff,tense=pres: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)};
  162 +pos=pred,negation=neg,tense=pres: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie};
  163 +pos=pred,negation=aff,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)};
  164 +pos=pred,negation=neg,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};
  165 +pos=pred,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)};
  166 +pos=pred,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};
  167 +
  168 +pos=praet|winien,person=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)};
  169 +pos=praet|winien,person=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema}{\(1+qub),/(1+inclusion)}{\nie};
  170 +pos=praet|winien,person!=ter,negation=aff,mood=indicative: ip*number*gender*person{/(1+int),schema,|aglt*number*person}{\(1+qub),/(1+inclusion)};
  171 +pos=praet|winien,person!=ter,negation=neg,mood=indicative: ip*number*gender*person{/(1+int),schema,|aglt*number*person}{\(1+qub),/(1+inclusion)}{\nie};
  172 +
  173 +pos=praet|winien,person=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int),schema,|by}{\(1+qub),/(1+inclusion)};
  174 +pos=praet|winien,person=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int),schema,|by}{\(1+qub),/(1+inclusion)}{\nie};
  175 +pos=praet|winien,person!=ter,negation=aff,mood=conditional: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)};
  176 +pos=praet|winien,person!=ter,negation=neg,mood=conditional: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|by}{\(1+qub),/(1+inclusion)}{\nie};
  177 +
  178 +pos=praet|winien,negation=aff,tense=fut: ip*number*gender*person{/(1+int),schema,|aux-fut*number*gender*person}{\(1+qub),/(1+inclusion)};
  179 +
  180 +pos=winien,person=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)};
  181 +pos=winien,person=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};
  182 +pos=winien,person!=ter,negation=aff,tense=past: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)};
  183 +pos=winien,person!=ter,negation=neg,tense=past: ip*number*gender*person{/(1+int),schema,|aglt*number*person,|aux-past*number*gender*person}{\(1+qub),/(1+inclusion)}{\nie};
184 184
185 pos=bedzie: aux-fut*number*gender*person; 185 pos=bedzie: aux-fut*number*gender*person;
186 lemma=być,pos=praet: aux-past*number*gender*person; 186 lemma=być,pos=praet: aux-past*number*gender*person;
LCGlexicon/test.ml
@@ -20,8 +20,10 @@ @@ -20,8 +20,10 @@
20 open ENIAM_LCGlexiconTypes 20 open ENIAM_LCGlexiconTypes
21 open ENIAM_LCGtypes 21 open ENIAM_LCGtypes
22 22
  23 +let rules = ENIAM_LCGlexicon.make_rules "resources/lexicon-pl.dic"
  24 +
23 let examples = [ 25 let examples = [
24 - "kot",[ 26 + (*"kot",[
25 1, 0, 1, "","<sentence>","interp", [],false; 27 1, 0, 1, "","<sentence>","interp", [],false;
26 2, 1, 2, "","<clause>","interp", [],false; 28 2, 1, 2, "","<clause>","interp", [],false;
27 3, 2, 3, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true; 29 3, 2, 3, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true;
@@ -40,7 +42,18 @@ let examples = [ @@ -40,7 +42,18 @@ let examples = [
40 7, 4, 5, "kota","kota","subst", [["sg"];["nom"];["f"]],false; 42 7, 4, 5, "kota","kota","subst", [["sg"];["nom"];["f"]],false;
41 8, 5, 6, "","</clause>","interp", [],false; 43 8, 5, 6, "","</clause>","interp", [],false;
42 9, 6, 7, ".","</sentence>","interp", [],false; 44 9, 6, 7, ".","</sentence>","interp", [],false;
43 - ],7; 45 + ],7;*)
  46 + "jaki",[
  47 + 1, 0, 1, "","<sentence>","interp", [],false;
  48 + 2, 1, 2, "","<clause>","interp", [],false;
  49 + 3, 2, 3, "Jakiego","jaki","adj", [["sg"];["gen";"acc"];["m1";"m2"];["pos"]],false;
  50 + 4, 3, 4, "kota","kot","subst", [["sg"];["gen";"acc"];["m1";"m2"]],false;
  51 + 5, 4, 5, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true;
  52 + 6, 5, 6, "ma","mieć","fin", [["sg"];["ter"];["imperf"]],false;
  53 + 7, 6, 7, "?","?","interp", [],false;
  54 + 8, 7, 8, "","</clause>","interp", [],false;
  55 + 9, 8, 9, ".","</sentence>","interp", [],false;
  56 + ],9;
44 ] 57 ]
45 58
46 let valence = [ 59 let valence = [
@@ -49,16 +62,18 @@ let valence = [ @@ -49,16 +62,18 @@ let valence = [
49 Both,Plus[One;Tensor[Atom "np";Top;Atom "acc";Top;Top]]]; 62 Both,Plus[One;Tensor[Atom "np";Top;Atom "acc";Top;Top]]];
50 [Lemma,Eq,["mieć"];Pos,Eq,["fin"];Negation,Eq,["neg"];Mood,Eq,["indicative"]],[Both,Plus[One;Tensor[Atom "np";AVar "number";Atom "nom";AVar "gender";AVar "person"]]; 63 [Lemma,Eq,["mieć"];Pos,Eq,["fin"];Negation,Eq,["neg"];Mood,Eq,["indicative"]],[Both,Plus[One;Tensor[Atom "np";AVar "number";Atom "nom";AVar "gender";AVar "person"]];
51 Both,Plus[One;Tensor[Atom "np";Top;Atom "gen";Top;Top]]]; 64 Both,Plus[One;Tensor[Atom "np";Top;Atom "gen";Top;Top]]];
52 - [Lemma,Eq,["kot"];Pos,Eq,["subst"]],[]; 65 + [Lemma,Eq,["kot"];Pos,Eq,["subst"]],[Both,Plus[One;Tensor[Atom "adjp";AVar "number";AVar "case";AVar "gender"]]];
53 [Lemma,Eq,["kota"];Pos,Eq,["subst"]],[]; 66 [Lemma,Eq,["kota"];Pos,Eq,["subst"]],[];
54 ] 67 ]
55 -(** 68 +
56 let create_chart valence tokens last = 69 let create_chart valence tokens last =
  70 + ENIAM_LCGrenderer.reset_variable_numbers ();
57 let chart = ENIAM_LCGchart.make last in 71 let chart = ENIAM_LCGchart.make last in
58 let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) -> 72 let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) ->
59 ENIAM_LCGrenderer.reset_variable_names (); 73 ENIAM_LCGrenderer.reset_variable_names ();
  74 + ENIAM_LCGrenderer.add_variable_numbers ();
60 let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in 75 let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in
61 - let l = ENIAM_LCGrendererPL.create_entries id orth cats valence in 76 + let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence in
62 ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in 77 ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
63 chart 78 chart
64 79
@@ -89,6 +104,6 @@ let test_example valence (name,tokens,last) = @@ -89,6 +104,6 @@ let test_example valence (name,tokens,last) =
89 ()) 104 ())
90 else print_endline "not reduced") 105 else print_endline "not reduced")
91 else print_endline "not parsed" 106 else print_endline "not parsed"
92 - **)  
93 -(*let _ =  
94 - Xlist.iter examples (test_example valence)*) 107 +
  108 +let _ =
  109 + Xlist.iter examples (test_example valence)
LCGparser/ENIAM_LCGrenderer.ml
@@ -97,6 +97,14 @@ let rec substitute_substvar v g = function @@ -97,6 +97,14 @@ let rec substitute_substvar v g = function
97 let empty_node = { 97 let empty_node = {
98 orth=""; lemma=""; pos=""; weight=0.; id=0; symbol=Dot; arg_symbol=Dot; attrs=[]; args=Dot;} 98 orth=""; lemma=""; pos=""; weight=0.; id=0; symbol=Dot; arg_symbol=Dot; attrs=[]; args=Dot;}
99 99
  100 +let variable_num_ref = ref 0
  101 +
  102 +let reset_variable_numbers () =
  103 + variable_num_ref := 0
  104 +
  105 +let add_variable_numbers () =
  106 + incr variable_num_ref
  107 +
100 let variable_name_ref = ref [] 108 let variable_name_ref = ref []
101 109
102 let reset_variable_names () = 110 let reset_variable_names () =
@@ -109,7 +117,7 @@ let rec add_variable_name = function @@ -109,7 +117,7 @@ let rec add_variable_name = function
109 117
110 let get_variable_name () = 118 let get_variable_name () =
111 variable_name_ref := add_variable_name (!variable_name_ref); 119 variable_name_ref := add_variable_name (!variable_name_ref);
112 - String.concat "" (List.rev (!variable_name_ref)) 120 + String.concat "" (List.rev (!variable_name_ref)) ^ (string_of_int !variable_num_ref)
113 121
114 let make_arg_symbol l = 122 let make_arg_symbol l =
115 Tuple(Xlist.map l (function 123 Tuple(Xlist.map l (function
LCGparser/test.ml
@@ -24,7 +24,7 @@ type entry = @@ -24,7 +24,7 @@ type entry =
24 | Raised of grammar_symbol 24 | Raised of grammar_symbol
25 25
26 let examples = [ 26 let examples = [
27 -(* "kot",[ 27 + "kot",[
28 0, 1, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); 28 0, 1, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]);
29 1, 2, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); 29 1, 2, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
30 (* 1, 2, "ma","mieć","fin", Basic(Imp(Imp(Tensor[Atom "ip"],Backward,Tensor[Atom "np"; Atom "nom"]),Forward,Tensor[Atom "np"; Atom "nom"])); *) 30 (* 1, 2, "ma","mieć","fin", Basic(Imp(Imp(Tensor[Atom "ip"],Backward,Tensor[Atom "np"; Atom "nom"]),Forward,Tensor[Atom "np"; Atom "nom"])); *)
@@ -39,7 +39,7 @@ let examples = [ @@ -39,7 +39,7 @@ let examples = [
39 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])]))); 39 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])])));
40 (* 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) 40 (* 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *)
41 4, 5, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"])); 41 4, 5, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
42 - ],5;*) 42 + ],5;
43 43
44 "jaki",[ 44 "jaki",[
45 0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"], 45 0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"],
@@ -49,16 +49,17 @@ let examples = [ @@ -49,16 +49,17 @@ let examples = [
49 (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",Imp(Tensor[Atom "np"; AVar "case"],Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])))); *) 49 (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",Imp(Tensor[Atom "np"; AVar "case"],Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])))); *)
50 (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *) 50 (* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *)
51 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]); 51 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]);
52 - 2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]);  
53 3, 4, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]])); 52 3, 4, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
54 4, 5, "?","?","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "cp";Atom "int";Top])); 53 4, 5, "?","?","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "cp";Atom "int";Top]));
55 ],5 54 ],5
56 ] 55 ]
57 56
58 let create_chart tokens last = 57 let create_chart tokens last =
  58 + ENIAM_LCGrenderer.reset_variable_numbers ();
59 let chart = ENIAM_LCGchart.make last in 59 let chart = ENIAM_LCGchart.make last in
60 let chart = Xlist.fold tokens chart (fun chart (lnode,rnode,orth,lemma,pos,entry) -> 60 let chart = Xlist.fold tokens chart (fun chart (lnode,rnode,orth,lemma,pos,entry) ->
61 ENIAM_LCGrenderer.reset_variable_names (); 61 ENIAM_LCGrenderer.reset_variable_names ();
  62 + ENIAM_LCGrenderer.add_variable_numbers ();
62 let syntax,semantics = match entry with 63 let syntax,semantics = match entry with
63 Basic syntax -> 64 Basic syntax ->
64 let node = {ENIAM_LCGrenderer.empty_node with 65 let node = {ENIAM_LCGrenderer.empty_node with