ENIAMrealizations.ml
11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
(*
* ENIAMwalenty, an interface for Polish Valence Dictionary "Walenty".
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open ENIAMwalTypes
let rec expand_schema_morf expands = function
PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[])
| PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[])
| PhraseAbbr(ComparP s,[]) -> PhraseAbbr(ComparP s,[Phrase(ComparNP(s,Str));Phrase(ComparPP(s))])
| PhraseAbbr(abbr,[]) -> (try PhraseAbbr(abbr,AbbrMap.find expands abbr) with Not_found -> failwith "expand_schema_morf")
| PhraseAbbr(abbr,morfs) -> PhraseAbbr(abbr,Xlist.map morfs (expand_schema_morf expands))
| LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_schema expands schema))
| LexPhraseMode(mode,pos_lex,(restr,schema)) -> LexPhraseMode(mode,pos_lex,(restr,expand_schema expands schema))
| morf -> morf
and expand_schema expands schema =
Xlist.map schema (fun s ->
{s with morfs=Xlist.map s.morfs (fun (id,morf) -> id,expand_schema_morf expands morf)})
let rec expand_subtypes_morf subtypes = function
PhraseComp(comp_morf,(ctype,comps)) ->
let comps = if comps = [] then (try CompMap.find subtypes ctype with Not_found -> failwith "expand_subtypes_schema") else comps in
Xlist.map comps (fun comp -> Phrase(match comp_morf with
Cp -> CP(ctype,comp)
| Ncp case -> NCP(case,ctype,comp)
| Prepncp(prep,case) -> PrepNCP(prep,case,ctype,comp)))
| LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,expand_subtypes subtypes schema))]
| LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,pos_lex,(restr,expand_subtypes subtypes schema))]
| PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_subtypes_morf subtypes)))]
| E Null -> [E(NP(Str));E(NCP(Str,CompTypeUndef,CompUndef));E(CP(CompTypeUndef,CompUndef)); E(Or)]
| morf -> [morf]
and expand_subtypes subtypes schema =
Xlist.map schema (fun s ->
{s with morfs=List.flatten (Xlist.map s.morfs (fun (id,morf) -> id,expand_subtypes_morf subtypes morf))})
let expand_equivs_phrase equivs = function
| PrepNP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNP(prep,case))
| PrepAdjP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepAdjP(prep,case))
| PrepNumP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNumP(prep,case))
| ComprepNP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComprepNP(prep))
| ComparNP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparNP(prep,case))
| ComparPP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparPP(prep))
| CP(ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> CP(ctype,Comp comp))
| NCP(case,ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> NCP(case,ctype,Comp comp))
| PrepNCP(prep,case,ctype,Comp comp) -> List.flatten (
Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp ->
Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep ->
PrepNCP(prep,case,ctype,Comp comp))))
| phrase -> [phrase]
let rec expand_equivs_lex equivs = function
Lexeme s -> (try XOR(Xlist.map (StringMap.find equivs s) (fun s -> Lexeme s)) with Not_found -> Lexeme s)
| ORconcat l -> ORconcat(Xlist.map l (expand_equivs_lex equivs))
| ORcoord l -> ORcoord(Xlist.map l (expand_equivs_lex equivs))
| XOR l -> XOR(Xlist.map l (expand_equivs_lex equivs))
| Elexeme gender -> Elexeme gender
let rec expand_equivs_morf equivs = function
Phrase phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> Phrase phrase)
| E phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> E phrase)
| LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
| LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
| PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_equivs_morf equivs)))]
| morf -> failwith ("expand_equivs_morf: " ^ ENIAMwalStringOf.morf morf)
and expand_equivs_schema equivs schema =
Xlist.map schema (fun s ->
{s with morfs=List.flatten (Xlist.map s.morfs (expand_equivs_morf equivs))})
let rec load_realizations_rec (expands,subtypes,equivs) found rev = function
[] -> if rev <> [] || found <> [] then failwith "load_realizations_rec" else expands,subtypes,equivs
| [Str.Text s; Str.Delim "-->"] :: l -> load_realizations_rec (expands,subtypes,equivs) ((s,rev) :: found) [] l
| [Str.Delim " "; Str.Text s; Str.Delim "\t"; Str.Text t] :: l ->
load_realizations_rec (expands,subtypes,equivs) found ((s,t) :: rev) l
| [Str.Delim " "; Str.Text s] :: l ->
load_realizations_rec (expands,subtypes,equivs) found ((s,"") :: rev) l
| [Str.Delim "% "; Str.Text "Phrase types expand:"] :: l -> load_realizations_rec (found,subtypes,equivs) [] rev l
| [Str.Delim "% "; Str.Text "Attributes subtypes:"] :: l -> load_realizations_rec (expands,found,equivs) [] rev l
| [Str.Delim "% "; Str.Text "Attributes equivalents:"] :: l -> load_realizations_rec (expands,subtypes,found) [] rev l
(* | [Str.Delim "% "; Str.Text s] :: l -> print_endline s; load_realizations_rec found rev l *)
| [] :: l -> load_realizations_rec (expands,subtypes,equivs) found rev l
| _ -> failwith "load_realizations_rec"
let rec get_lexemes = function
Lexeme s -> [s]
| ORconcat l -> List.flatten (Xlist.map l get_lexemes)
| ORcoord l -> List.flatten (Xlist.map l get_lexemes)
| XOR l -> List.flatten (Xlist.map l get_lexemes)
| Elexeme gender -> failwith "get_lexemes"
let find_comprep_reqs compreps =
Xlist.fold compreps StringMap.empty (fun comprep_reqs (s,l) ->
let l = Xlist.map l (function
LexPhrase(pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function
_,Lexeme s -> StringSet.add set s
| _ -> set)
| LexPhraseMode(_,pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function
_,Lexeme s -> StringSet.add set s
| _ -> set)
| morf -> failwith ("find_compreps_reqs: " ^ ENIAMwalStringOf.morf morf)) in
if l = [] then failwith "find_compreps_reqs";
StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union)))
let create_comprep_dict compreps =
Xlist.fold compreps StringMap.empty (fun compreps (s,l) ->
Xlist.fold l compreps (fun compreps -> function
LexPhrase([PREP _,_;SUBST _,lex],_) as morf ->
let lexemes = get_lexemes lex in
Xlist.fold lexemes compreps (fun compreps lexeme ->
StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l))
| LexPhraseMode("misc",[ADV grad,lex],restr) ->
let morf = LexPhrase([ADV grad,lex],restr) in
let lexemes = get_lexemes lex in
Xlist.fold lexemes compreps (fun compreps lexeme ->
StringMap.add_inc compreps lexeme ["adv",(s,morf)] (fun l -> ("adv",(s,morf)) :: l))
| LexPhrase([PREP _,_;NUM _,_;SUBST _,lex],_) as morf ->
let lexemes = get_lexemes lex in
Xlist.fold lexemes compreps (fun compreps lexeme ->
StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l))
| morf -> failwith ("create_comprep_dict: " ^ ENIAMwalStringOf.morf morf)))
let load_realizations () =
(* let lines = Str.split (Str.regexp "\n") (File.load_file realizations_filename) in
let lines = Xlist.rev_map lines (fun line -> Str.full_split (Str.regexp "% \\|-->\\| \\|\t") line) in
let expands,subtypes,equivs = load_realizations_rec ([],[],[]) [] [] lines in *)
let subtypes = Xlist.fold ENIAMwalTEI.subtypes CompMap.empty (fun subtypes -> function
"int",l -> CompMap.add subtypes Int (List.flatten (Xlist.map l (fun v -> snd(ENIAMwalTEI.parse_comp v))))
| "rel",l -> CompMap.add subtypes Rel (List.flatten (Xlist.map l (fun v -> snd(ENIAMwalTEI.parse_comp v))))
| _ -> failwith "load_realizations 1") in
let equivs = Xlist.fold ENIAMwalTEI.equivs StringMap.empty (fun equivs (k,l) -> StringMap.add equivs k (k :: l)) in
let expands,compreps = Xlist.fold ENIAMwalTEI.expands (AbbrMap.empty,[]) (fun (expands, compreps) (k,l) ->
match parse_morf_single (split_schema k) with
(* PhraseAbbr(Advp m,[]) -> AbbrMap.add expands (Advp m) (Xlist.map l (fun (v,_) ->
LexPhraseMode(m,[ADV GradUndef,Lexeme v],(Natr,[])))), compreps
| PhraseAbbr(Nonch,[]) -> AbbrMap.add expands Nonch (Xlist.map l (fun (v,_) ->
LexPhrase([SUBST(NumberUndef,Str),Lexeme v],(Natr,[])))), compreps
| PhraseAbbr(Xp m,[]) -> AbbrMap.add expands (Xp m) (List.flatten (Xlist.map l (fun (v,_) ->
match parse_morfs (split_schema v) with
[PhraseAbbr(Advp m,[])] -> (try AbbrMap.find expands (Advp m) with Not_found -> [PhraseAbbr(Advp m,[])]) (* FIXME: zakładam, że advp się nie rozmnoży *)
| morfs -> morfs))), compreps
| Phrase(ComprepNP(_,s)) -> expands, (s, Xlist.map l (fun (v,_) -> parse_morf_single (split_schema v))) :: compreps
| PhraseAbbr(Distrp,[]) -> AbbrMap.add expands Distrp (Xlist.map l (fun (v,_) -> parse_morf_single (split_schema v))), compreps
| PhraseAbbr(Possp,[]) -> AbbrMap.add expands Possp (Xlist.map l (fun (v,_) -> parse_morf_single (split_schema v))), compreps*)
| _ -> failwith "load_realizations 2") in
let compreps = Xlist.map compreps (fun (s,morfs) ->
s, List.flatten (List.flatten (Xlist.map morfs (fun morf -> Xlist.map (expand_subtypes_morf subtypes (expand_schema_morf expands morf)) (expand_equivs_morf equivs))))) in
let comprep_reqs = find_comprep_reqs compreps in
let compreps = create_comprep_dict compreps in
expands,compreps,comprep_reqs,subtypes,equivs
let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations ()