ENIAMmorphologyTypes.ml
3.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
(*
* ENIAMmorphology, a morphological analyser and a guesser for Polish
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
type star = Productive | Star | Ndm | Dial | Acro | Aux | Aux2
type phon_rule = {pset: string; pfind: string; psuf: string; plang: string}
type rule = {star: star; pref: string; find: string; set: string; tags: (string * string) list;
interp: string; id: string; freq: int}
type phon_orth = {phon: string; mapping: phon_rule(*(string * string)*) list}
type form = {orth: string; phon_orth: phon_orth list; interp: string; freq: int; genre: string; validated: bool; candidates: (string * rule * phon_orth * phon_orth list) list}
type entry = {lemma: string; (*phon_lemma: string list;*) cat: string; forms: form list;
proper_type: string; (*ndm: bool;*) stem: string; phon_stem: string list; aspect: string}
let empty_form = {orth=""; phon_orth=[]; interp=""; freq=1; genre=""; validated=false; candidates=[]}
let empty_entry = {lemma=""; (*phon_lemma=[];*) cat=""; forms=[]; proper_type=""; (*ndm=false;*)
stem=""; phon_stem=[]; aspect=""}
let empty_rule = {star=Productive; pref=""; find=""; set=""; tags=[]; interp=""; id=""; freq=0}
let string_of_tags tags =
String.concat " " (Xlist.map tags (fun (k,v) -> k ^ "=" ^ v))
let string_of_rule r =
Printf.sprintf "%s\t%s\t%s\t%s" r.pref r.find r.set
(string_of_tags r.tags)
type lu_entry = {lemma1: string; lemma2: string; rel_id: int; lu_stem: string;
lu_validated: bool; validated1: bool; validated2: bool}
(* type form = {orth: string; interp: string; freq: int; genre: string; validated: bool}
type entry = {lemma: string; cat: string; forms: form list; proper_type: string; ndm: bool; stem: string}
type star = Productive | Star | Ndm
type rule = {star: star; pref: string; find: string; set: string; tags: (string * string) list;
interp: string; id: string; freq: int} *)
let resource_path =
try Sys.getenv "ENIAM_RESOURCE_PATH"
with Not_found ->
if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else
if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else
if Sys.file_exists "resources" then "resources" else
failwith "resource directory does not exists"
let alt_filename = resource_path ^ "/morphology/alt.tab"
let stem_filename = resource_path ^ "/morphology/stem.tab"
let rules_filename = resource_path ^ "/morphology/freq_rules.tab"
let wyglos_filename = resource_path ^ "/morphology/wyglos.tab"
let lemmata_filename = resource_path ^ "/morphology/lemmata.tab"
let alt_supplement_filename = resource_path ^ "/morphology/alt_supplement.tab"