generate.ml
1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
let nlp_resources_path = "../../NLP resources/"
let sgjp_path = nlp_resources_path ^ "SGJP/"
let sgjp_filename = "sgjp-20170730.tab.gz"
let sources = [
sgjp_path, sgjp_filename;
"data/", "noun-supplement-acro.tab";
"data/", "noun-supplement-polimorf.tab";
"data/", "dial_ach.tab";
"data/", "dial_ami2.tab";
"data/", "dial_ami3.tab";
"data/", "dial_ami4.tab";
"data/", "dial_ami.tab";
"data/", "dial_ą2.tab";
"data/", "dial_ą.tab";
"data/", "dial_ę.tab";
"data/", "dial_my.tab";
"data/", "dial_sz.tab";
"data/", "dial_ym.tab";
]
let compound_rules = ENIAMmorphologyRules.make_compound_rules ()
let interp_compound_rule_trees = ENIAMmorphologyRules.make_interp_compound_rule_trees compound_rules
let generate_alt rules_filename path filename out_filename =
let rules = ENIAMmorphologyRules.load_freq_rules rules_filename in
let rules = ENIAMmorphologyRules.CharTrees.create rules in
let dict = Dict.load_tab (path ^ filename) in
let dict = Dict.merge_entries dict in
let dict = Dict.process_interps dict in
let dict = Dict.remove_cat "cond" dict in
(* let dict = Dict.mark_ndm dict in *)
let dict = Dict.validate_interp rules dict in
let dict = Dict.remove_validated_forms dict in
Dict.print out_filename dict
let _ =
Dict.generate_rule_frequencies_list interp_compound_rule_trees sources "resources/freq_rules.tab";
generate_alt "resources/freq_rules.tab" sgjp_path sgjp_filename "resources/alt.tab";
Dict.generate_stem_dict "resources/freq_rules.tab" sgjp_path sgjp_filename "resources/stem.tab";
()