test.ml
6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
(*
* ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open ENIAM_LCGlexiconTypes
open ENIAM_LCGtypes
let rules = ENIAM_LCGlexicon.make_rules "resources/lexicon-pl.dic"
let examples = [
(*"kot",[
1, 0, 1, "","<sentence>","interp", [],false;
2, 1, 2, "","<clause>","interp", [],false;
3, 2, 3, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true;
4, 3, 4, "ma","mieć","fin", [["sg"];["ter"];["imperf"]],false;
5, 4, 5, "kota","kot","subst", [["sg"];["gen";"acc"];["m1";"m2"]],false;
6, 5, 6, "","</clause>","interp", [],false;
7, 6, 7, ".","</sentence>","interp", [],false;
],7;
"kota",[
1, 0, 1, "","<sentence>","interp", [],false;
2, 1, 2, "","<clause>","interp", [],false;
3, 2, 3, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true;
4, 2, 3, "Ala","Al","subst", [["sg"];["gen";"acc"];["m1"]],true;
5, 3, 4, "ma","mieć","fin", [["sg"];["ter"];["imperf"]],false;
6, 4, 5, "kota","kot","subst", [["sg"];["gen";"acc"];["m1";"m2"]],false;
7, 4, 5, "kota","kota","subst", [["sg"];["nom"];["f"]],false;
8, 5, 6, "","</clause>","interp", [],false;
9, 6, 7, ".","</sentence>","interp", [],false;
],7;*)
"jaki",[
1, 0, 1, "","<sentence>","interp", [],false;
2, 1, 2, "","<clause>","interp", [],false;
3, 2, 3, "Jakiego","jaki","adj", [["sg"];["gen";"acc"];["m1";"m2"];["pos"]],false;
4, 3, 4, "kota","kot","subst", [["sg"];["gen";"acc"];["m1";"m2"]],false;
5, 4, 5, "Ala","Ala","subst", [["sg"];["nom"];["f"]],true;
6, 5, 6, "ma","mieć","fin", [["sg"];["ter"];["imperf"]],false;
7, 6, 7, "?","?","interp", [],false;
8, 7, 8, "","</clause>","interp", [],false;
9, 8, 9, ".","</sentence>","interp", [],false;
],9;
]
let valence = [
[Lemma,Eq,["Ala";"Al"];Pos,Eq,["subst"]],[];
[Lemma,Eq,["mieć"];Pos,Eq,["fin"];Negation,Eq,["aff"];Mood,Eq,["indicative"]],[Both,Plus[One;Tensor[Atom "np";AVar "number";Atom "nom";AVar "gender";AVar "person"]];
Both,Plus[One;Tensor[Atom "np";Top;Atom "acc";Top;Top]]];
[Lemma,Eq,["mieć"];Pos,Eq,["fin"];Negation,Eq,["neg"];Mood,Eq,["indicative"]],[Both,Plus[One;Tensor[Atom "np";AVar "number";Atom "nom";AVar "gender";AVar "person"]];
Both,Plus[One;Tensor[Atom "np";Top;Atom "gen";Top;Top]]];
[Lemma,Eq,["kot"];Pos,Eq,["subst"]],[Both,Plus[One;Tensor[Atom "adjp";AVar "number";AVar "case";AVar "gender"]]];
[Lemma,Eq,["kota"];Pos,Eq,["subst"]],[];
]
let create_chart valence tokens last =
ENIAM_LCGrenderer.reset_variable_numbers ();
let chart = ENIAM_LCGchart.make last in
let chart = Xlist.fold tokens chart (fun chart (id,lnode,rnode,orth,lemma,pos,interp,proper) ->
ENIAM_LCGrenderer.reset_variable_names ();
ENIAM_LCGrenderer.add_variable_numbers ();
let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in
let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence in
ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
chart
let test_example valence (name,tokens,last) =
ENIAM_LCGreductions.reset_variant_label ();
let chart = create_chart valence tokens last in
ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart;
let chart,references = ENIAM_LCGchart.lazify chart in
ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
if ENIAM_LCGchart.is_parsed chart then (
let term = ENIAM_LCGchart.get_parsed_term chart in
Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
Xlatex.latex_compile_and_clean "results/" (name^"4_term");
let dependency_tree = ENIAM_LCGreductions.reduce term references in
ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
())
else print_endline "not reduced")
else print_endline "not parsed"
let _ =
Xlist.iter examples (test_example valence)