fonetics.ml
8.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
open Xstd
open Printf
type status = Idle | Symbols | Rules | RevSymbols | RevRules
type rule = {set: string; find: string; suf: string}
module CharTree = struct
type t = M of t CharMap.t * rule list
let empty = M(CharMap.empty,[])
let rec add_path_rules rule orth i (M(map,rules)) =
if i = String.length orth then M(map,rule :: rules) else
let tree = try CharMap.find map (String.get orth i) with Not_found -> empty in
let tree = add_path_rules rule orth (i+1) tree in
M(CharMap.add map (String.get orth i) tree,rules)
let create rules =
let tree = Xlist.fold rules empty (fun tree rule ->
add_path_rules rule (rule.find ^ rule.suf) 0 tree) in
tree
let rec find_rec l i orth (M(map,rules)) =
if i = String.length orth then Xlist.fold rules l (fun l rule -> ("", rule) :: l) else
let l = try find_rec l (i+1) orth (CharMap.find map (String.get orth i)) with Not_found -> l in
Xlist.fold rules l (fun l rule -> (String.sub orth i (String.length orth - i), rule) :: l)
let find tree orth =
let found = find_rec [] 0 orth tree in
(* printf "%d\n%!" (Xlist.size found); *)
(* Xlist.iter found (fun (stem,rule) -> printf "F %s\t%s\n" stem (string_of_rule rule)); *)
found
end
let load_rules filename =
let status,symbol_defs,rev_symbol_defs,rules,rev_rules =
File.fold_tab filename (Idle,StringMap.empty,StringMap.empty,[],[]) (fun (status,symbol_defs,rev_symbol_defs,rules,rev_rules) -> function
["@symbols"] -> Symbols,symbol_defs,rev_symbol_defs,rules,rev_rules
| ["@rev_symbols"] -> RevSymbols,symbol_defs,rev_symbol_defs,rules,rev_rules
| ["@rules"] -> Rules,symbol_defs,rev_symbol_defs,rules,rev_rules
| ["@rev_rules"] -> RevRules,symbol_defs,rev_symbol_defs,rules,rev_rules
| [key;vals] ->
(match status with
Symbols -> status, StringMap.add symbol_defs key (Xstring.split " " vals), rev_symbol_defs, rules, rev_rules
| RevSymbols -> status, symbol_defs, StringMap.add rev_symbol_defs key (Xstring.split " " vals), rules, rev_rules
| _ -> failwith "Fonetics.load_rules: status 1")
| [v;r;s] ->
(match status with
Rules -> status, symbol_defs, rev_symbol_defs, {set=v; find=r; suf=s} :: rules, rev_rules
| RevRules -> status, symbol_defs, rev_symbol_defs, rules, {set=r; find=v; suf=s} :: rev_rules
| _ -> failwith "Fonetics.load_rules: status 2")
| line -> failwith ("load_rules: " ^ (String.concat "\t" line))) in
if status <> Rules && status <> RevRules then failwith "Fonetics.load_rules: status 3" else
symbol_defs, rev_symbol_defs, rules, rev_rules
let prepare_rules symbol_defs rules =
let rules = List.flatten (Xlist.rev_map rules (fun r ->
let suf = Xunicode.utf8_chars_of_utf8_string r.suf in
let suf = Xlist.map suf (fun s ->
try StringMap.find symbol_defs s with Not_found -> [s]) in
Xlist.rev_map (Xlist.multiply_list suf) (fun l ->
{r with suf=String.concat "" l}))) in
CharTree.create rules
let rules, rev_rules =
(* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_acro.dic" in *)
(* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_pl.dic" in *)
(* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_en.dic" in *)
(* let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_fr.dic" in *)
let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics_de.dic" in
prepare_rules symbol_defs rules,
prepare_rules rev_symbol_defs rev_rules
let sufs = ["ω"; "iκ"; "ρ"; "δ"; "λ"; "i"(*; "zi"*)]
let make_key s =
match Xunicode.utf8_chars_of_utf8_string s with
["ʒ"] -> "d"
| ["ř"] -> "r"
| [c] -> c
| ["d";"j"] -> "dj"
| ["d";"ʲ"] -> "dj"
| ["t";"j"] -> "tj"
| ["t";"ʲ"] -> "tj"
| [c;"′"] -> c
| [c;"j"] -> c
| [c;"ʲ"] -> c
| [c;"ʲ";"j"] -> c
| [c;"′";"j"] -> c
| _ -> failwith ("make_key: " ^ s)
let latex_escape_char = function
"′" -> "$'$"
| "ʲ" -> "\\textipa{\\super{j}}"
| "ʒ" -> "\\textipa{Z}"
| "ǯ" -> "\\textipa{\\v{Z}}"
| c -> c
let latex_of_rule rule =
String.concat "" (Xlist.map (Xunicode.utf8_chars_of_utf8_string rule.set) latex_escape_char) ^ " $\\leftarrow$ " ^ rule.find
let is_excluded r =
match r.set, r.find, r.suf with
"r", "r", "zi" -> true
| "mar", "mar", "z" -> true
| "m′er", "mier", "z" -> true
| "n′e", "nie", "i" -> true
| _ -> false
let latex_of_rules filename =
let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules filename in
let map = Xlist.fold rules StringMap.empty (fun map rule ->
if is_excluded rule then map else
let key = make_key rule.set in
let map2 = try StringMap.find map key with Not_found -> StringMap.empty in
if not (Xlist.mem sufs rule.suf) then failwith ("latex_of_rules: " ^ rule.suf) else
let map2 = StringMap.add_inc map2 rule.suf rule (fun rule2 -> failwith ("latex_of_rules: " ^ key)) in
StringMap.add map key map2) in
StringMap.iter map (fun _ map2 ->
let line = Xlist.map sufs (fun suf ->
try latex_of_rule (StringMap.find map2 suf) with Not_found -> "") in
print_endline (String.concat " & " line ^ "\\\\"))
let rec translate_rec closure found found_maping rules s =
if s = "ε" then [List.rev found,List.rev found_maping] else
let l = CharTree.find rules s in
(* Xlist.iter l (fun (t,r) ->
printf "s=%s t=%s set=%s find=%s suf=%s\n%!" s t r.set r.find r.suf); *)
let l = if l = [] && closure then
let c,s = Xunicode.first_utf8_char_of_utf8_string s in
[s,{find=c; set=c; suf=""}] else l in
(* let n = String.length s in
let c = String.sub s 0 1 in
[String.sub s 1 (n-1),{find=c; set=c; suf=""}] else l in *)
List.flatten (Xlist.rev_map l (fun (t,r) ->
translate_rec closure (r.set :: found) ((r.find,r.set) :: found_maping) rules (r.suf ^ t)))
let translate closure rules s =
(* printf "translate 1: %s\n%!" s; *)
let ll = translate_rec closure [] [] rules (s ^ "ε") in
Xlist.rev_map ll (fun (phon,mapping) -> {Types.phon=String.concat "" phon; Types.mapping=mapping})
(* let s = String.concat "" l in
(* printf "translate 2: %s\n%!" s; *)
s*)
let string_of_phon p =
Printf.sprintf "%s %s" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b)))
let print_phon p = print_endline (string_of_phon p)
(*let _ = translate rules "blafickie"
let _ = translate rules "blafiacki"
let _ = translate rules "dudzia"*)
(* let _ = Xlist.iter (translate true rules "rzódża") print_phon
let _ = Xlist.iter (translate true rules "Mia") print_phon
let _ = Xlist.iter (translate true rules "mia") print_phon
let _ = Xlist.iter (translate true rules "łódź") print_phon
let _ = Xlist.iter (translate true rules "Łódź") print_phon *)
(* let _ = translate true rules "izolował" *)
exception NotFound of string * string
exception NotEqual of string * string * string
exception MulipleSolutions of string * string * string list
let translate_and_check closure rules rev_rules x =
let l = translate closure rules x in
Xlist.iter l (fun s ->
let y = translate closure rev_rules s.Types.phon in
let y = Xlist.map y (fun s -> s.Types.phon) in
match StringSet.to_list (StringSet.of_list y) with
[] -> raise (NotFound(x,s.Types.phon))
| [t] -> if t <> x then raise (NotEqual(x,s.Types.phon,t))
| l -> raise (MulipleSolutions(x,s.Types.phon,l)));
l
let translate_single closure rules x =
let y = translate closure rev_rules x in
let y = Xlist.map y (fun s -> s.Types.phon) in
match StringSet.to_list (StringSet.of_list y) with
[] -> raise (NotFound(x,""))
| [t] -> t
| l ->
Printf.printf "%s %s\n" x (String.concat " " l);
raise (MulipleSolutions(x,"",l))
let rec rev_translate_rec x s = function
[] -> x,s,[]
| (_,"") :: m -> rev_translate_rec x s m
| (a,b) :: m ->
if Xstring.check_prefix b s then rev_translate_rec (x^a) (Xstring.cut_prefix b s) m
else x,s,m
let rev_translate closure rev_rules s m =
let x,s,_ = rev_translate_rec "" s m in
if s = "" then x else
x ^ (translate_single closure rev_rules s)
let rev_translate2 closure rev_rules s m =
let x,s,_ = rev_translate_rec "" s m in
if s = "" then [x] else
let l = translate closure rev_rules s in
if l = [] then raise (NotFound(s,"")) else
Xlist.rev_map l (fun y -> x ^ y.Types.phon)