Commit b4c2dd39d6fc44b3251cef441e2ee30f5f8a1a9c
1 parent
0b0d4af3
dodanie x-args do parsowania drzew zależnościowych
Showing
6 changed files
with
20 additions
and
14 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
... | ... | @@ -74,6 +74,16 @@ let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = |
74 | 74 | with Not_found -> BasicSem(Xlist.map quant fst) in |
75 | 75 | selectors,(bracket,quant,syntax),(semantics,weight) |
76 | 76 | |
77 | +let rec add_x_args_rec = function | |
78 | + Imp(s,d,t) -> Imp(add_x_args_rec s,d,t) | |
79 | + | ImpSet(s,l) -> ImpSet(add_x_args_rec s,l) | |
80 | + | Tensor[Atom "<conll_root>"] -> Tensor[Atom "<conll_root>"] | |
81 | + | Tensor l -> ImpSet(Tensor l,[Backward,Maybe(Tensor[Atom "X"]);Forward,Maybe(Tensor[Atom "X"])]) | |
82 | + | t -> failwith ("add_x_args_rec: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
83 | + | |
84 | +let add_x_args (selectors,(bracket,quant,syntax),(semantics,weight)) = | |
85 | + (selectors,(bracket,quant,add_x_args_rec syntax),(semantics,weight)) | |
86 | + | |
77 | 87 | let rec extract_category pat rev = function |
78 | 88 | (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l |
79 | 89 | | [] -> raise Not_found |
... | ... | @@ -93,10 +103,11 @@ let dict_of_grammar grammar = |
93 | 103 | StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in |
94 | 104 | StringMap.add dict pos (dict2,l))) |
95 | 105 | |
96 | -let make_rules filename = | |
106 | +let make_rules x_flag filename = | |
97 | 107 | let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in |
98 | 108 | let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in |
99 | 109 | let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in |
110 | + let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in | |
100 | 111 | dict_of_grammar lexicon |
101 | 112 | |
102 | 113 | let find_rules rules cats = |
... | ... |
LCGlexicon/resources/lexicon-pl.dic
... | ... | @@ -265,13 +265,6 @@ lemma=),pos=interp: rparen; |
265 | 265 | lemma=],pos=interp: rparen2; |
266 | 266 | pos=unk: np*number*case*gender*person; |
267 | 267 | |
268 | -# | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] # FIXME: to jest potrzebne przy CONLL | |
269 | -# | "<conll_root>","interp",[] -> | |
270 | -# let batrs = (make_node "<conll_root>" "interp" c.weight 0 []) in | |
271 | -# let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in | |
272 | -# [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] | |
273 | -# | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in | |
274 | - | |
275 | 268 | lemma=<conll_root>,pos=interp: <conll_root>/(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj); |
276 | 269 | |
277 | 270 | pos=sinterj: BRACKET interj; |
... | ... |
LCGlexicon/test.ml
... | ... | @@ -20,8 +20,8 @@ |
20 | 20 | open ENIAM_LCGlexiconTypes |
21 | 21 | open ENIAM_LCGtypes |
22 | 22 | |
23 | -let rules = ENIAM_LCGlexicon.make_rules ENIAM_LCGlexiconTypes.rules_filename | |
24 | -(* let rules = ENIAM_LCGlexicon.make_rules "resources/lexicon-pl.dic" *) | |
23 | +let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename | |
24 | +(* let rules = ENIAM_LCGlexicon.make_rules false "resources/lexicon-pl.dic" *) | |
25 | 25 | |
26 | 26 | let examples = [ |
27 | 27 | "kot",[ |
... | ... |
LCGparser/ENIAM_LCGrules.ml
... | ... | @@ -446,8 +446,8 @@ let backward_cross_composition references args functs = |
446 | 446 | let rules = [ |
447 | 447 | backward_application; |
448 | 448 | forward_application; |
449 | - (* backward_cross_composition; *) | |
450 | - (* forward_cross_composition; *) | |
449 | + (*backward_cross_composition; | |
450 | + forward_cross_composition;*) | |
451 | 451 | ] |
452 | 452 | |
453 | 453 | let rec flatten_functor2 l seml = function |
... | ... |
LCGparser/TODO
1 | +- sprawdzić czy krzyżowe kompozycje działają w zdaniu "Ponownie musiał użyć ręcznika" | |
... | ... |
corpora/test_conll2.ml
... | ... | @@ -22,7 +22,8 @@ open ENIAM_LCGlexiconTypes |
22 | 22 | open ENIAM_LCGtypes |
23 | 23 | open ENIAMsubsyntaxTypes |
24 | 24 | |
25 | -let rules = ENIAM_LCGlexicon.make_rules ENIAM_LCGlexiconTypes.rules_filename | |
25 | +let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename | |
26 | +let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.rules_filename | |
26 | 27 | |
27 | 28 | let examples = [ |
28 | 29 | (* "Szpak","Szpak śpiewa.";*) |
... | ... | @@ -81,7 +82,7 @@ let create_dep_chart tokens lex_sems paths = |
81 | 82 | ENIAM_LCGrenderer.reset_variable_names (); |
82 | 83 | ENIAM_LCGrenderer.add_variable_numbers (); |
83 | 84 | let cats = clarify_categories ["X"] t in |
84 | - let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in | |
85 | + let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in | |
85 | 86 | IntMap.add nodes i l) in |
86 | 87 | dep_create_rec nodes sons 0 |
87 | 88 | |
... | ... |