fonetics.ml
8.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
open Xstd
open Printf
type status = Idle | Symbols | Rules | RevSymbols | RevRules
type rule = {set: string; find: string; suf: string; lang: string}
let string_of_phon p =
Printf.sprintf "%s %s" p.Types.phon (String.concat " " (Xlist.map p.Types.mapping (fun (a,b) -> a ^ "->" ^ b)))
module CharTree = struct
type t = M of t CharMap.t * rule list
let empty = M(CharMap.empty,[])
let rec add_path_rules rule orth i (M(map,rules)) =
if i = String.length orth then M(map,rule :: rules) else
let tree = try CharMap.find map (String.get orth i) with Not_found -> empty in
let tree = add_path_rules rule orth (i+1) tree in
M(CharMap.add map (String.get orth i) tree,rules)
let create rules =
let tree = Xlist.fold rules empty (fun tree rule ->
add_path_rules rule (rule.find ^ rule.suf) 0 tree) in
tree
let rec find_rec l i orth (M(map,rules)) =
if i = String.length orth then Xlist.fold rules l (fun l rule -> ("", rule) :: l) else
let l = try find_rec l (i+1) orth (CharMap.find map (String.get orth i)) with Not_found -> l in
Xlist.fold rules l (fun l rule -> (String.sub orth i (String.length orth - i), rule) :: l)
let find tree orth =
let found = find_rec [] 0 orth tree in
(* printf "%d\n%!" (Xlist.size found); *)
(* Xlist.iter found (fun (stem,rule) -> printf "F %s\t%s\n" stem (string_of_rule rule)); *)
found
end
let load_rules filename =
let status,symbol_defs,rev_symbol_defs,rules,rev_rules =
File.fold_tab filename (Idle,StringMap.empty,StringMap.empty,[],[]) (fun (status,symbol_defs,rev_symbol_defs,rules,rev_rules) -> function
["@symbols"] -> Symbols,symbol_defs,rev_symbol_defs,rules,rev_rules
| ["@rev_symbols"] -> RevSymbols,symbol_defs,rev_symbol_defs,rules,rev_rules
| ["@rules"] -> Rules,symbol_defs,rev_symbol_defs,rules,rev_rules
| ["@rev_rules"] -> RevRules,symbol_defs,rev_symbol_defs,rules,rev_rules
| [key;vals] ->
(match status with
Symbols -> status, StringMap.add symbol_defs key (Xstring.split " " vals), rev_symbol_defs, rules, rev_rules
| RevSymbols -> status, symbol_defs, StringMap.add rev_symbol_defs key (Xstring.split " " vals), rules, rev_rules
| _ -> failwith ("Fonetics.load_rules status 1: " ^ key ^ "\t" ^ vals))
| [lang;v;r;s] ->
(match status with
Rules -> status, symbol_defs, rev_symbol_defs, {set=v; find=r; suf=s; lang=lang} :: rules, rev_rules
| RevRules -> status, symbol_defs, rev_symbol_defs, rules, {set=r; find=v; suf=s; lang=lang} :: rev_rules
| _ -> failwith "Fonetics.load_rules: status 2")
| line -> failwith ("load_rules: " ^ (String.concat "\t" line))) in
if status <> Rules && status <> RevRules then failwith "Fonetics.load_rules: status 3" else
symbol_defs, rev_symbol_defs, rules, rev_rules
let prepare_rules symbol_defs rules =
let rules = List.flatten (Xlist.rev_map rules (fun r ->
let suf = Xunicode.utf8_chars_of_utf8_string r.suf in
let suf = Xlist.map suf (fun s ->
try StringMap.find symbol_defs s with Not_found -> [s]) in
Xlist.rev_map (Xlist.multiply_list suf) (fun l ->
{r with suf=String.concat "" l}))) in
CharTree.create rules
let select_rules lang rules =
Xlist.fold rules [] (fun rules r ->
if r.lang = lang then r :: rules else rules)
let rules, rev_rules, core_rules, core_rev_rules =
let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules "data/fonetics.dic" in
let core_rules = select_rules "core" rules in
let core_rev_rules = select_rules "core" rev_rules in
prepare_rules symbol_defs rules,
prepare_rules rev_symbol_defs rev_rules,
prepare_rules symbol_defs core_rules,
prepare_rules rev_symbol_defs core_rev_rules
let sufs = ["ω"; "iκ"; "ρ"; "δ"; "λ"; "i"(*; "zi"*)]
let make_key s =
match Xunicode.utf8_chars_of_utf8_string s with
["ʒ"] -> "d"
| ["ř"] -> "r"
| [c] -> c
| ["d";"j"] -> "dj"
| ["d";"ʲ"] -> "dj"
| ["t";"j"] -> "tj"
| ["t";"ʲ"] -> "tj"
| [c;"′"] -> c
| [c;"j"] -> c
| [c;"ʲ"] -> c
| [c;"ʲ";"j"] -> c
| [c;"′";"j"] -> c
| _ -> failwith ("make_key: " ^ s)
let latex_escape_char = function
"′" -> "$'$"
| "ʲ" -> "\\textipa{\\super{j}}"
| "ʒ" -> "\\textipa{Z}"
| "ǯ" -> "\\textipa{\\v{Z}}"
| c -> c
let latex_of_rule rule =
String.concat "" (Xlist.map (Xunicode.utf8_chars_of_utf8_string rule.set) latex_escape_char) ^ " $\\leftarrow$ " ^ rule.find
let is_excluded r =
match r.set, r.find, r.suf with
"r", "r", "zi" -> true
| "mar", "mar", "z" -> true
| "m′er", "mier", "z" -> true
| "n′e", "nie", "i" -> true
| _ -> false
let latex_of_rules filename =
let symbol_defs,rev_symbol_defs,rules,rev_rules = load_rules filename in
let map = Xlist.fold rules StringMap.empty (fun map rule ->
if is_excluded rule then map else
let key = make_key rule.set in
let map2 = try StringMap.find map key with Not_found -> StringMap.empty in
if not (Xlist.mem sufs rule.suf) then failwith ("latex_of_rules: " ^ rule.suf) else
let map2 = StringMap.add_inc map2 rule.suf rule (fun rule2 -> failwith ("latex_of_rules: " ^ key)) in
StringMap.add map key map2) in
StringMap.iter map (fun _ map2 ->
let line = Xlist.map sufs (fun suf ->
try latex_of_rule (StringMap.find map2 suf) with Not_found -> "") in
print_endline (String.concat " & " line ^ "\\\\"))
let rec translate_rec closure found found_maping rules s =
if s = "ε" then [List.rev found,List.rev found_maping] else
let l = CharTree.find rules s in
(* Xlist.iter l (fun (t,r) ->
printf "s=%s t=%s set=%s find=%s suf=%s\n%!" s t r.set r.find r.suf); *)
let l = if l = [] && closure then
let c,s = Xunicode.first_utf8_char_of_utf8_string s in
[s,{find=c; set=c; suf=""; lang=""}] else l in
List.flatten (Xlist.rev_map l (fun (t,r) ->
translate_rec closure (r.set :: found) ((r.find,r.set) :: found_maping) rules (r.suf ^ t)))
let translate closure rules s =
(* printf "translate 1: %s\n%!" s; *)
let ll = translate_rec closure [] [] rules (s ^ "ε") in
Xlist.rev_map ll (fun (phon,mapping) -> {Types.phon=String.concat "" phon; Types.mapping=mapping})
exception NotFound of string * string
exception NotEqual of string * string * string
exception MulipleSolutions of string * string * string list
let translate_simple closure rules s =
let ll = translate_rec closure [] [] rules (s ^ "ε") in
if ll = [] then raise (NotFound(s,"")) else
Xlist.rev_map ll (fun (phon,mapping) -> String.concat "" phon)
let print_phon p = print_endline (string_of_phon p)
(*let _ = translate rules "blafickie"
let _ = translate rules "blafiacki"
let _ = translate rules "dudzia"*)
(* let _ = Xlist.iter (translate true rules "rzódża") print_phon
let _ = Xlist.iter (translate true rules "Mia") print_phon
let _ = Xlist.iter (translate true rules "mia") print_phon
let _ = Xlist.iter (translate true rules "łódź") print_phon
let _ = Xlist.iter (translate true rules "Łódź") print_phon *)
(* let _ = translate true rules "izolował" *)
let translate_and_check closure rules rev_rules orth =
let l = translate closure rules orth in
Xlist.iter l (fun s ->
let y = translate closure rev_rules s.Types.phon in
let y = Xlist.map y (fun s -> s.Types.phon) in
match StringSet.to_list (StringSet.of_list y) with
[] -> raise (NotFound(orth,s.Types.phon))
| [t] -> if t <> orth then raise (NotEqual(orth,s.Types.phon,t))
| l -> raise (MulipleSolutions(orth,s.Types.phon,l)))
(*let translate_single closure rules x =
let y = translate closure rev_rules x in
let y = Xlist.map y (fun s -> s.Types.phon) in
match StringSet.to_list (StringSet.of_list y) with
[] -> raise (NotFound(x,""))
| [t] -> t
| l ->
Printf.printf "%s %s\n" x (String.concat " " l);
raise (MulipleSolutions(x,"",l))
let rec rev_translate_rec x s = function
[] -> x,s,[]
| (_,"") :: m -> rev_translate_rec x s m
| (a,b) :: m ->
if Xstring.check_prefix b s then rev_translate_rec (x^a) (Xstring.cut_prefix b s) m
else x,s,m
let rev_translate closure rev_rules s m =
let x,s,_ = rev_translate_rec "" s m in
if s = "" then x else
x ^ (translate_single closure rev_rules s)*)
(*let rev_translate2 closure rev_rules s m =
let x,s,_ = rev_translate_rec "" s m in
if s = "" then [x] else
let l = translate closure rev_rules s in
if l = [] then raise (NotFound(s,"")) else
Xlist.rev_map l (fun y -> x ^ y.Types.phon)*)
let rec get_short_stem x s = function
[] -> if s = "" then x else failwith "get_short_stem"
| (a,b) :: m ->
if Xstring.check_prefix b s then get_short_stem (x^a) (Xstring.cut_prefix b s) m
else x