Commit b4c2dd39d6fc44b3251cef441e2ee30f5f8a1a9c
1 parent
0b0d4af3
dodanie x-args do parsowania drzew zależnościowych
Showing
6 changed files
with
20 additions
and
14 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
| ... | ... | @@ -74,6 +74,16 @@ let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) = |
| 74 | 74 | with Not_found -> BasicSem(Xlist.map quant fst) in |
| 75 | 75 | selectors,(bracket,quant,syntax),(semantics,weight) |
| 76 | 76 | |
| 77 | +let rec add_x_args_rec = function | |
| 78 | + Imp(s,d,t) -> Imp(add_x_args_rec s,d,t) | |
| 79 | + | ImpSet(s,l) -> ImpSet(add_x_args_rec s,l) | |
| 80 | + | Tensor[Atom "<conll_root>"] -> Tensor[Atom "<conll_root>"] | |
| 81 | + | Tensor l -> ImpSet(Tensor l,[Backward,Maybe(Tensor[Atom "X"]);Forward,Maybe(Tensor[Atom "X"])]) | |
| 82 | + | t -> failwith ("add_x_args_rec: " ^ ENIAM_LCGstringOf.grammar_symbol 0 t) | |
| 83 | + | |
| 84 | +let add_x_args (selectors,(bracket,quant,syntax),(semantics,weight)) = | |
| 85 | + (selectors,(bracket,quant,add_x_args_rec syntax),(semantics,weight)) | |
| 86 | + | |
| 77 | 87 | let rec extract_category pat rev = function |
| 78 | 88 | (cat,rel,v) :: l -> if cat = pat then rel,v,(List.rev rev @ l) else extract_category pat ((cat,rel,v) :: rev) l |
| 79 | 89 | | [] -> raise Not_found |
| ... | ... | @@ -93,10 +103,11 @@ let dict_of_grammar grammar = |
| 93 | 103 | StringMap.add_inc dict2 lemma [rule] (fun l -> rule :: l)),l in |
| 94 | 104 | StringMap.add dict pos (dict2,l))) |
| 95 | 105 | |
| 96 | -let make_rules filename = | |
| 106 | +let make_rules x_flag filename = | |
| 97 | 107 | let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in |
| 98 | 108 | let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in |
| 99 | 109 | let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in |
| 110 | + let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in | |
| 100 | 111 | dict_of_grammar lexicon |
| 101 | 112 | |
| 102 | 113 | let find_rules rules cats = |
| ... | ... |
LCGlexicon/resources/lexicon-pl.dic
| ... | ... | @@ -265,13 +265,6 @@ lemma=),pos=interp: rparen; |
| 265 | 265 | lemma=],pos=interp: rparen2; |
| 266 | 266 | pos=unk: np*number*case*gender*person; |
| 267 | 267 | |
| 268 | -# | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] # FIXME: to jest potrzebne przy CONLL | |
| 269 | -# | "<conll_root>","interp",[] -> | |
| 270 | -# let batrs = (make_node "<conll_root>" "interp" c.weight 0 []) in | |
| 271 | -# let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in | |
| 272 | -# [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] | |
| 273 | -# | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in | |
| 274 | - | |
| 275 | 268 | lemma=<conll_root>,pos=interp: <conll_root>/(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj); |
| 276 | 269 | |
| 277 | 270 | pos=sinterj: BRACKET interj; |
| ... | ... |
LCGlexicon/test.ml
| ... | ... | @@ -20,8 +20,8 @@ |
| 20 | 20 | open ENIAM_LCGlexiconTypes |
| 21 | 21 | open ENIAM_LCGtypes |
| 22 | 22 | |
| 23 | -let rules = ENIAM_LCGlexicon.make_rules ENIAM_LCGlexiconTypes.rules_filename | |
| 24 | -(* let rules = ENIAM_LCGlexicon.make_rules "resources/lexicon-pl.dic" *) | |
| 23 | +let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename | |
| 24 | +(* let rules = ENIAM_LCGlexicon.make_rules false "resources/lexicon-pl.dic" *) | |
| 25 | 25 | |
| 26 | 26 | let examples = [ |
| 27 | 27 | "kot",[ |
| ... | ... |
LCGparser/ENIAM_LCGrules.ml
| ... | ... | @@ -446,8 +446,8 @@ let backward_cross_composition references args functs = |
| 446 | 446 | let rules = [ |
| 447 | 447 | backward_application; |
| 448 | 448 | forward_application; |
| 449 | - (* backward_cross_composition; *) | |
| 450 | - (* forward_cross_composition; *) | |
| 449 | + (*backward_cross_composition; | |
| 450 | + forward_cross_composition;*) | |
| 451 | 451 | ] |
| 452 | 452 | |
| 453 | 453 | let rec flatten_functor2 l seml = function |
| ... | ... |
LCGparser/TODO
| 1 | +- sprawdzić czy krzyżowe kompozycje działają w zdaniu "Ponownie musiał użyć ręcznika" | |
| ... | ... |
corpora/test_conll2.ml
| ... | ... | @@ -22,7 +22,8 @@ open ENIAM_LCGlexiconTypes |
| 22 | 22 | open ENIAM_LCGtypes |
| 23 | 23 | open ENIAMsubsyntaxTypes |
| 24 | 24 | |
| 25 | -let rules = ENIAM_LCGlexicon.make_rules ENIAM_LCGlexiconTypes.rules_filename | |
| 25 | +let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename | |
| 26 | +let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.rules_filename | |
| 26 | 27 | |
| 27 | 28 | let examples = [ |
| 28 | 29 | (* "Szpak","Szpak śpiewa.";*) |
| ... | ... | @@ -81,7 +82,7 @@ let create_dep_chart tokens lex_sems paths = |
| 81 | 82 | ENIAM_LCGrenderer.reset_variable_names (); |
| 82 | 83 | ENIAM_LCGrenderer.add_variable_numbers (); |
| 83 | 84 | let cats = clarify_categories ["X"] t in |
| 84 | - let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in | |
| 85 | + let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in | |
| 85 | 86 | IntMap.add nodes i l) in |
| 86 | 87 | dep_create_rec nodes sons 0 |
| 87 | 88 | |
| ... | ... |