treeChange.ml
4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
open Xstd
open PreTypes
let if_lemma lemmas = function
Lemma(l,_,_) -> List.exists (fun x -> x = l) lemmas
| _ -> false
let if_cat cats = function
Lemma(_,cat,_) -> List.exists (fun x -> x = cat) cats
| _ -> false
let if_interps interps token =
let interp = match token with
Lemma(_,_,i) -> i
| _ -> [[[]]] in
let if_interp nr value =
List.exists (fun x ->
List.exists (fun y ->
y = value) (List.nth x nr)) interp in
Xlist.fold interps true (fun acc (nr,value) -> acc && (if_interp nr value))
let correct_coordination paths tokens =
let paths_ls = List.mapi (fun i (id,super,label) ->
(i,id,super,label)) (Array.to_list paths) in
(* let ps a sons =
print_endline a;
List.iter (fun (i,_,_,_) -> print_endline (ExtArray.get tokens i).orth) sons;
print_endline "" in *)
let rec correct_rec (i,id,super,label) sons =
let left_s, right_s = List.partition (fun (a,b,c,d) -> a < i) sons in
(* ps "left:" (List.rev left_s);
ps "right:" right_s; *)
find_father i (List.rev left_s);
find_father i right_s
and find_father i0 = function
[(i,id,super,label)] -> paths.(i) <- (id,i0,label)
| (a,b,c,d) :: (i,id,super,label) :: t ->
paths.(i) <- (id,i0,label);
if not (if_cat ["conj"; "interp"] (ExtArray.get tokens i).token)
then (prerr_endline "find_father"; failwith "find_father");
correct_rec (i,id,super,label) (if a < i
then (a,b,c,d) :: t
else List.rev @@ (a,b,c,d) :: t)
| _ -> prerr_endline "find_father"; failwith "find_father" in
Array.iteri (fun i (id,super,label) ->
if if_cat ["conj"; "interp"] (ExtArray.get tokens i).token
then (let sons = List.filter (fun (_,_,super,_) -> super = i) paths_ls in
if (List.length sons > 2)
then correct_rec (i,id,super,label) sons)) paths;
paths
let replace_tokens paths tokens =
(* for i = 0 to ExtArray.size tokens - 1 do
print_endline (string_of_int i ^ ": "^ (ExtArray.get tokens i).orth)
done; *)
let find_token orth = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i ->
if (ExtArray.get tokens i).orth = orth then i else acc) in
let multidot i id0 super0 label0 =
let id1, super1, label1 = paths.(super0) in
let id2, super2, label2 = paths.(super1) in
if (ExtArray.get tokens id1).orth = "." &&
(ExtArray.get tokens id2).orth = "."
then
(paths.(super1) <- (find_token "..." ,super2, label2);
paths.(super0) <- (0,-1,"");
paths.(i) <- (0,-1,"")) in
let brev i id super label =
let if_the_last_dot () =
let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) ->
s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in
Array.fold_left (fun acc (i2,s,l) ->
acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths in
let dot = if if_interps [0,"npun"] (ExtArray.get tokens id).token || if_the_last_dot ()
then ""
else "." in
let n_orth = (ExtArray.get tokens id).orth ^ dot in
paths.(i) <- (find_token n_orth,super,label) in
Array.iteri (fun i (id,super,label) ->
if (ExtArray.get tokens id).orth = "."
then multidot i id super label;
if if_cat ["brev"] (ExtArray.get tokens id).token
then brev i id super label)
paths;
paths
let remove_interps interp paths tokens =
let paths_ls = Array.to_list paths in
Array.iteri (fun i (id,super,label) ->
if (ExtArray.get tokens id).orth = interp &&
not (List.exists (fun (_,super,_) -> super = i) paths_ls)
then paths.(i) <- (0,-1,"")) paths;
paths
let swap_dep paths tokens =
let change_dep i (id,super,label) =
let id_S, super_S, label_S = paths.(super) in
paths.(i) <- (id,super_S,label);
paths.(super) <- (id_S, id, label_S) in
let rec correct_dep i (id,super,label) =
let adv_relators = ["kto";"co";"ile";"czyj";"jaki";"który";
"jak";"skąd";"dokąd";"gdzie";"którędy";"kiedy";"odkąd";"dlaczego";"czemu";"gdy"] in
if (if_cat ["comp"] (ExtArray.get tokens id).token &&
if_cat ["fin"; "praet"; "winien"; "pred"; "imps"] (ExtArray.get tokens super).token) ||
(if_cat ["conj"] (ExtArray.get tokens id).token &&
if_cat ["fin"; "praet"; "winien"; "pred"; "imps"] (ExtArray.get tokens super).token &&
not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths))) ||
(if_cat ["ppron3"] (ExtArray.get tokens id).token &&
if_interps [5,"praep"] (ExtArray.get tokens id).token) ||
(if_lemma adv_relators (ExtArray.get tokens id).token &&
if_cat ["fin"; "praet"; "winien"; "imps"; "subst"; "pred"] (ExtArray.get tokens super).token)
then
change_dep i (id,super,label);
if (if_lemma adv_relators (ExtArray.get tokens id).token &&
if_cat ["subst"; "pred"] (ExtArray.get tokens super).token)
then correct_dep i paths.(i) in
Array.iteri correct_dep paths; paths