|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
(*
* ENIAM_LCGparser, a parser for Logical Categorial Grammar formalism
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open ENIAM_LCGtypes
|
|
21
|
open Xstd
|
|
22
23
24
|
type entry =
Basic of grammar_symbol
|
|
25
|
| Raised of grammar_symbol
|
|
26
27
|
let examples = [
|
|
28
|
"kot",[
|
|
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
0, 1, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]);
1, 2, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
(* 1, 2, "ma","mieć","fin", Basic(Imp(Imp(Tensor[Atom "ip"],Backward,Tensor[Atom "np"; Atom "nom"]),Forward,Tensor[Atom "np"; Atom "nom"])); *)
2, 3, "kota","kot","subst", Basic(Tensor[Atom "np"; Atom "acc"]);
3, 4, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
],4;
"rudy",[
0, 1, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]);
1, 2, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
2, 3, "rudego","rudy","adj", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"A",Tensor[Atom "adjp"; AVar "case"]));
3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])])));
(* 3, 4, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *)
4, 5, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
|
|
43
|
],5;
|
|
44
45
46
47
|
"jaki",[
0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"],
[Forward,Imp(Tensor[Atom "ip"],Forward,Tensor[Atom "np"; AVar "case"])]),
|
|
48
|
[Forward,Imp(Tensor[Atom "np"; AVar "case"],Backward,Tensor[Atom "adjp"; AVar "case"])])));
|
|
49
|
1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])])));
|
|
50
|
(* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",Imp(Tensor[Atom "np"; AVar "case"],Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])))); *)
|
|
51
|
(* 1, 2, "kota","kot","subst", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *)
|
|
52
53
54
|
2, 3, "Ala","Ala","subst", Basic(Tensor[Atom "np"; Atom "nom"]);
3, 4, "ma","mieć","fin", Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
4, 5, "?","?","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "cp";Atom "int";Top]));
|
|
55
56
57
58
59
60
61
62
63
64
|
],5;
"ocean",[
0, 1, "Wpłynąłem","wpłynąć","praet", Basic(Imp(Tensor[Atom "ip"],Forward,Tensor[Atom "prepnp"; Atom "acc"]));
1, 2, "na","na","prep", Basic(Imp(Tensor[Atom "prepnp";Atom "acc"],Forward,Tensor[Atom "np"; Atom "acc"]));
2, 3, "suchego","suchy","adj", Basic(Tensor[Atom "adjp"; Atom "gen"]);
3, 4, "przestwór","przestwór","subst", Basic(Imp(Tensor[Atom "np"; Atom "acc"],Forward,Tensor[Atom "np"; Atom "gen"]));
4, 5, "oceanu","ocean","subst", Basic(Imp(Tensor[Atom "np"; Atom "gen"],Backward,Tensor[Atom "adjp"; Atom "gen"]));
5, 6, ".",".","interp", Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
],6;
|
|
65
66
67
|
]
let create_chart tokens last =
|
|
68
|
ENIAM_LCGrenderer.reset_variable_numbers ();
|
|
69
70
71
|
let chart = ENIAM_LCGchart.make last in
let chart = Xlist.fold tokens chart (fun chart (lnode,rnode,orth,lemma,pos,entry) ->
ENIAM_LCGrenderer.reset_variable_names ();
|
|
72
|
ENIAM_LCGrenderer.add_variable_numbers ();
|
|
73
74
75
76
77
78
|
let syntax,semantics = match entry with
Basic syntax ->
let node = {ENIAM_LCGrenderer.empty_node with
orth=orth; lemma=lemma; pos=pos;
symbol=ENIAM_LCGrenderer.make_symbol syntax} in
let semantics = ENIAM_LCGrenderer.make_term node syntax in
|
|
79
80
|
ENIAM_LCGrenderer.simplify (syntax,semantics)
| Raised syntax ->
|
|
81
82
83
84
|
let node = {ENIAM_LCGrenderer.empty_node with
orth=orth; lemma=lemma; pos=pos;
symbol=ENIAM_LCGrenderer.make_raised_symbol syntax} in
let outer_node = {ENIAM_LCGrenderer.empty_node with
|
|
85
|
orth=""; lemma=lemma; pos="";
|
|
86
87
|
symbol=ENIAM_LCGrenderer.make_symbol syntax} in
let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in
|
|
88
|
ENIAM_LCGrenderer.simplify (syntax,semantics) in
|
|
89
90
91
|
let lf = if lnode = 0 then true else false in
let rf = if rnode = last then true else false in
ENIAM_LCGchart.add chart lnode rnode (Bracket(lf,rf,syntax),semantics) 0) in
|
|
92
93
|
chart
|
|
94
95
96
97
98
99
100
101
102
103
104
105
|
let create_text_fragments tokens last =
let text_fragments = Array.make last IntMap.empty in
Xlist.iter tokens (fun (lnode,rnode,orth,lemma,pos,entry) ->
text_fragments.(lnode) <- IntMap.add text_fragments.(lnode) rnode orth);
Int.iter_down 0 (last - 1) (fun i ->
let map = IntMap.fold text_fragments.(i) text_fragments.(i) (fun map j orth ->
if j = last then map else
IntMap.fold text_fragments.(j) map (fun map k orth2 ->
IntMap.add map k (orth ^ " " ^ orth2))) in
text_fragments.(i) <- map);
text_fragments
|
|
106
107
108
|
let test_example (name,tokens,last) =
ENIAM_LCGreductions.reset_variant_label ();
let chart = create_chart tokens last in
|
|
109
110
|
let text_fragments = create_text_fragments tokens last in
ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a3" text_fragments chart;
|
|
111
|
let chart,references = ENIAM_LCGchart.lazify chart in
|
|
112
|
ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" text_fragments chart;
|
|
113
114
|
ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
|
|
115
|
ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" text_fragments chart;
|
|
116
117
118
119
120
121
122
|
ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
if ENIAM_LCGchart.is_parsed chart then (
let term = ENIAM_LCGchart.get_parsed_term chart in
Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
Xlatex.latex_compile_and_clean "results/" (name^"4_term");
let dependency_tree = ENIAM_LCGreductions.reduce term references in
|
|
123
|
ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
|
|
124
125
126
127
128
129
130
131
132
133
134
135
136
|
if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
())
else print_endline "not reduced")
else print_endline "not parsed"
let _ =
Xlist.iter examples test_example
|