Commit 2128db1f0a5f1095fdf8009d31754baf4011d3b5
1 parent
cc2d8f8f
przetwarzanie mode
Showing
8 changed files
with
88 additions
and
85 deletions
subsyntax/ENIAMsubsyntaxTypes.ml
... | ... | @@ -51,8 +51,10 @@ let mwe_filename = resource_path ^ "/subsyntax/mwe.tab" |
51 | 51 | |
52 | 52 | let lemma_frequencies_filename = resource_path ^ "/subsyntax/NKJP1M-lemma-freq.tab" |
53 | 53 | |
54 | -let proper_names_filename = resource_path ^ "/subsyntax/proper_names_sgjp_polimorf.tab" | |
55 | -let proper_names_filename2 = resource_path ^ "/subsyntax/proper_names.tab" | |
54 | +(* let proper_names_filename = resource_path ^ "/subsyntax/proper_names_sgjp_polimorf.tab" | |
55 | +let proper_names_filename2 = resource_path ^ "/subsyntax/proper_names.tab" *) | |
56 | +let proper_names_filename = resource_path ^ "/subsyntax/proper_names_sgjp_polimorf_20151020.tab" | |
57 | +let proper_names_filename2 = resource_path ^ "/subsyntax/proper_names_20160104.tab" | |
56 | 58 | |
57 | 59 | let int_of_mode = function |
58 | 60 | Raw -> 0 |
... | ... |
subsyntax/interface.ml
... | ... | @@ -34,10 +34,10 @@ let spec_list = [ |
34 | 34 | "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; |
35 | 35 | "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML"; |
36 | 36 | "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off"; |
37 | - "-r", Arg.String (fun p -> | |
37 | + (* "-r", Arg.String (fun p -> | |
38 | 38 | ENIAMtokenizerTypes.set_resource_path p; |
39 | 39 | ENIAMmorphologyTypes.set_resource_path p; |
40 | - ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; | |
40 | + ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *) | |
41 | 41 | ] |
42 | 42 | |
43 | 43 | let usage_msg = |
... | ... |
walenty/ENIAMwalConnect.ml
... | ... | @@ -146,8 +146,8 @@ let has_nontrivial_lex = function |
146 | 146 | Xlist.fold p.morfs b (fun b -> function |
147 | 147 | MorfId id -> failwith "has_nontrivial_lex" |
148 | 148 | | LexPhrase _ -> true |
149 | - | LexRPhrase _ -> true | |
150 | - | LexPhraseMode _ -> true | |
149 | + (* | LexRPhrase _ -> true | |
150 | + | LexPhraseMode _ -> true *) | |
151 | 151 | | _ -> b)) |
152 | 152 | | _ -> failwith "has_nontrivial_lex: ni" |
153 | 153 | |
... | ... | @@ -219,7 +219,7 @@ let has_mode_coordination = function |
219 | 219 | MorfId id -> failwith "has_nontrivial_lex" |
220 | 220 | | PhraseAbbr(Advp _,_) -> n+1 |
221 | 221 | | PhraseAbbr(Xp _,_) -> n+1 |
222 | - | LexPhraseMode _ -> n+1 | |
222 | + (* | LexPhraseMode _ -> n+1 FIXME*) | |
223 | 223 | | _ -> n) in |
224 | 224 | if n>1 then true else b) |
225 | 225 | | _ -> failwith "has_nontrivial_lex: ni" |
... | ... |
walenty/ENIAMwalRealizations.ml
... | ... | @@ -23,24 +23,24 @@ open Xstd |
23 | 23 | let rec expand_schema_morf expands = function |
24 | 24 | PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[]) |
25 | 25 | | PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[]) |
26 | - | PhraseAbbr(ComparP s,[]) -> PhraseAbbr(ComparP s,[Phrase(ComparNP(s,Str));Phrase(ComparPP(s))]) (* FIXME: albo do walTEI albo usunąć *) | |
26 | + (* | PhraseAbbr(ComparP s,[]) -> PhraseAbbr(ComparP s,[Phrase(ComparNP(s,Str));Phrase(ComparPP(s))]) (* FIXME: albo do walTEI albo usunąć *) *) | |
27 | 27 | | PhraseAbbr(abbr,[]) -> (try PhraseAbbr(abbr,AbbrMap.find expands abbr) with Not_found -> failwith "expand_schema_morf") |
28 | 28 | | PhraseAbbr(abbr,morfs) -> PhraseAbbr(abbr,Xlist.map morfs (expand_schema_morf expands)) |
29 | 29 | | LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_schema expands schema)) |
30 | - | LexPhraseMode(mode,pos_lex,(restr,schema)) -> LexPhraseMode(mode,pos_lex,(restr,expand_schema expands schema)) | |
30 | + (* | LexPhraseMode(mode,pos_lex,(restr,schema)) -> LexPhraseMode(mode,pos_lex,(restr,expand_schema expands schema)) *) | |
31 | 31 | | morf -> morf |
32 | 32 | |
33 | 33 | and expand_schema expands schema = |
34 | 34 | Xlist.map schema (fun s -> |
35 | 35 | {s with morfs=Xlist.map s.morfs (expand_schema_morf expands)}) |
36 | 36 | |
37 | -let get_mode = function | |
37 | +(* let get_mode = function | |
38 | 38 | Xp(m) -> m |
39 | 39 | | Advp(m) -> m |
40 | - | ComparP prep -> "compar" | |
40 | + (* | ComparP prep -> "compar" *) | |
41 | 41 | | Nonch -> "nonch" |
42 | 42 | | Distrp -> "distrp" |
43 | - | Possp -> "possp" | |
43 | + | Possp -> "possp" *) | |
44 | 44 | |
45 | 45 | let rec expand_subtypes_morf subtypes = function |
46 | 46 | PhraseComp(comp_morf,(ctype,comps)) -> |
... | ... | @@ -50,18 +50,18 @@ let rec expand_subtypes_morf subtypes = function |
50 | 50 | | Ncp case -> NCP(case,ctype,comp) |
51 | 51 | | Prepncp(prep,case) -> PrepNCP(prep,case,ctype,comp))) |
52 | 52 | | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,expand_subtypes subtypes schema))] |
53 | - | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,pos_lex,(restr,expand_subtypes subtypes schema))] | |
53 | + (* | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,pos_lex,(restr,expand_subtypes subtypes schema))] *) | |
54 | 54 | | PhraseAbbr(abbr,morfs) -> |
55 | - let mode = get_mode abbr in | |
56 | - let morfs = List.flatten (Xlist.map morfs (expand_subtypes_morf subtypes)) in | |
57 | - Xlist.map morfs (function | |
55 | + (* let mode = get_mode abbr in *) | |
56 | + List.flatten (Xlist.map morfs (expand_subtypes_morf subtypes)) | |
57 | + (* Xlist.map morfs (function | |
58 | 58 | Phrase p -> PhraseMode(mode,p) |
59 | 59 | | PhraseMode(_,p) -> PhraseMode(mode,p) |
60 | 60 | | LexPhrase(pos_lex,r) -> LexPhraseMode(mode,pos_lex,r) |
61 | 61 | | LexRPhrase(pos_lex,r) -> LexRPhraseMode(mode,pos_lex,r) |
62 | 62 | | LexPhraseMode(m,pos_lex,r) -> LexPhraseMode(mode,pos_lex,r) |
63 | 63 | | LexRPhraseMode(m,pos_lex,r) -> LexRPhraseMode(mode,pos_lex,r) |
64 | - | _ -> failwith "expand_subtypes_morf") | |
64 | + | _ -> failwith "expand_subtypes_morf") *) | |
65 | 65 | | E Null -> [E(NP(Str));E(NCP(Str,CompTypeUndef,CompUndef));E(CP(CompTypeUndef,CompUndef)); E(Or)] |
66 | 66 | | morf -> [morf] |
67 | 67 | |
... | ... | @@ -74,8 +74,8 @@ let expand_equivs_phrase equivs = function |
74 | 74 | | PrepAdjP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepAdjP(prep,case)) |
75 | 75 | | PrepNumP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNumP(prep,case)) |
76 | 76 | | ComprepNP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComprepNP(prep)) |
77 | - | ComparNP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparNP(prep,case)) | |
78 | - | ComparPP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparPP(prep)) | |
77 | + (* | ComparNP(prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparNP(prep,case)) *) | |
78 | + | ComparP(prep) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparP(prep)) | |
79 | 79 | | CP(ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> CP(ctype,Comp comp)) |
80 | 80 | | NCP(case,ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> NCP(case,ctype,Comp comp)) |
81 | 81 | | PrepNCP(prep,case,ctype,Comp comp) -> List.flatten ( |
... | ... | @@ -93,12 +93,12 @@ let rec expand_equivs_lex equivs = function |
93 | 93 | |
94 | 94 | let rec expand_equivs_morf equivs = function |
95 | 95 | Phrase phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> Phrase phrase) |
96 | - | PhraseMode(mode,phrase) -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> PhraseMode(mode,phrase)) | |
96 | + (* | PhraseMode(mode,phrase) -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> PhraseMode(mode,phrase)) *) | |
97 | 97 | | E phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> E phrase) |
98 | 98 | | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] |
99 | - | LexRPhrase(pos_lex,(restr,schema)) -> [LexRPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] | |
99 | + (* | LexRPhrase(pos_lex,(restr,schema)) -> [LexRPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] | |
100 | 100 | | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] |
101 | - | LexRPhraseMode(mode,pos_lex,(restr,schema)) -> [LexRPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] | |
101 | + | LexRPhraseMode(mode,pos_lex,(restr,schema)) -> [LexRPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))] *) | |
102 | 102 | (* | PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_equivs_morf equivs)))] *) |
103 | 103 | | morf -> failwith ("expand_equivs_morf: " ^ ENIAMwalStringOf.morf morf) |
104 | 104 | |
... | ... | @@ -134,9 +134,9 @@ let find_comprep_reqs compreps = |
134 | 134 | LexPhrase(pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function |
135 | 135 | _,Lexeme s -> StringSet.add set s |
136 | 136 | | _ -> set) |
137 | - | LexPhraseMode(_,pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function | |
137 | + (* | LexPhraseMode(_,pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function | |
138 | 138 | _,Lexeme s -> StringSet.add set s |
139 | - | _ -> set) | |
139 | + | _ -> set) *) | |
140 | 140 | | morf -> failwith ("find_compreps_reqs: " ^ ENIAMwalStringOf.morf morf)) in |
141 | 141 | if l = [] then failwith "find_compreps_reqs"; |
142 | 142 | StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union))) |
... | ... | @@ -148,11 +148,11 @@ let create_comprep_dict compreps = |
148 | 148 | let lexemes = get_lexemes lex in |
149 | 149 | Xlist.fold lexemes compreps (fun compreps lexeme -> |
150 | 150 | StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l)) |
151 | - | LexPhraseMode("misc",[ADV grad,lex],restr) -> | |
151 | + (* | LexPhraseMode("misc",[ADV grad,lex],restr) -> | |
152 | 152 | let morf = LexPhrase([ADV grad,lex],restr) in |
153 | 153 | let lexemes = get_lexemes lex in |
154 | 154 | Xlist.fold lexemes compreps (fun compreps lexeme -> |
155 | - StringMap.add_inc compreps lexeme ["adv",(s,morf)] (fun l -> ("adv",(s,morf)) :: l)) | |
155 | + StringMap.add_inc compreps lexeme ["adv",(s,morf)] (fun l -> ("adv",(s,morf)) :: l)) *) | |
156 | 156 | | LexPhrase([PREP _,_;NUM _,_;SUBST _,lex],_) as morf -> |
157 | 157 | let lexemes = get_lexemes lex in |
158 | 158 | Xlist.fold lexemes compreps (fun compreps lexeme -> |
... | ... |
walenty/ENIAMwalStringOf.ml
... | ... | @@ -162,8 +162,8 @@ let phrase = function |
162 | 162 | | NumP(c) -> "nump(" ^ case c ^ ")" |
163 | 163 | | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")" |
164 | 164 | | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")" |
165 | - | ComparNP(prep,c) -> "comparnp(" ^ prep ^ "," ^ case c ^ ")" | |
166 | - | ComparPP(prep) -> "comparpp(" ^ prep ^ ")" | |
165 | + (* | ComparNP(prep,c) -> "comparnp(" ^ prep ^ "," ^ case c ^ ")" *) | |
166 | + | ComparP(prep) -> "comparp(" ^ prep ^ ")" | |
167 | 167 | | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")" |
168 | 168 | | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" |
169 | 169 | | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")" |
... | ... | @@ -188,7 +188,7 @@ let phrase = function |
188 | 188 | let phrase_abbr = function |
189 | 189 | Xp(m) -> "xp(" ^ m ^ ")" |
190 | 190 | | Advp(m) -> "advp(" ^ m ^ ")" |
191 | - | ComparP prep -> "compar(" ^ prep ^ ")" | |
191 | + (* | ComparP prep -> "compar(" ^ prep ^ ")" *) | |
192 | 192 | | Nonch -> "nonch" |
193 | 193 | | Distrp -> "distrp" |
194 | 194 | | Possp -> "possp" |
... | ... | @@ -276,12 +276,12 @@ let rec schema schema = |
276 | 276 | |
277 | 277 | and morf = function |
278 | 278 | Phrase p -> phrase p |
279 | - | PhraseMode(m,p) -> m ^ ":" ^ phrase p | |
279 | + (* | PhraseMode(m,p) -> m ^ ":" ^ phrase p *) | |
280 | 280 | | E p -> "E(" ^ phrase p ^ ")" |
281 | 281 | | LexPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" |
282 | - | LexRPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" | |
283 | - | LexPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" | |
284 | - | LexRPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" | |
282 | + (* | LexRPhrase(pos_lex,(r,s)) -> "lex([" ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) | |
283 | + (* | LexPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) | |
284 | + (* | LexRPhraseMode(m,pos_lex,(r,s)) -> "lex([" ^ m ^ "," ^ String.concat ";" (Xlist.map pos_lex (fun (p,le) -> pos p ^ "," ^ lex le)) ^ "]," ^ restr r ^ "[" ^ schema s ^ "])" *) | |
285 | 285 | | PhraseAbbr(p,ml) -> phrase_abbr p ^ "[" ^ String.concat ";" (Xlist.map ml morf) ^ "]" |
286 | 286 | | PhraseComp(p,(ct,l)) -> phrase_comp p ^ "," ^ comp_type ct ^ "[" ^ String.concat ";" (Xlist.map l comp) ^ "]" |
287 | 287 | | LexPhraseId(id,p,le) -> "lex(" ^ id ^ "," ^ pos p ^ "," ^ lex le ^ ")" |
... | ... |
walenty/ENIAMwalTEI.ml
... | ... | @@ -237,21 +237,17 @@ let process_lex_phrase lemma = function |
237 | 237 | (ENIAMwalStringOf.gender gender) (ENIAMwalStringOf.grad grad) (ENIAMwalStringOf.negation negation) (ENIAMwalStringOf.refl reflex); [] |
238 | 238 | |
239 | 239 | let new_schema r cr ce morfs = |
240 | - {psn_id=(-1); gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; morfs=morfs} | |
240 | + {psn_id=(-1); gf=r; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=cr; ce=ce; morfs=morfs} | |
241 | 241 | |
242 | 242 | let rec process_lex lex = function |
243 | - | PhraseAbbr(ComparP prep,[]),arguments,Lexeme "",Lexeme "" -> | |
243 | + | Phrase(ComparP prep),arguments,Lexeme "",Lexeme "" -> | |
244 | 244 | LexPhrase([COMPAR,Lexeme prep],(Ratrs,Xlist.map arguments (fun morf -> new_schema ARG [] [] [morf]))) |
245 | 245 | | PhraseAbbr(Xp mode,[argument]),_,_,_ -> |
246 | - let lex = {lex with lex_argument=argument} in | |
247 | - (match process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma) with | |
248 | - LexPhrase(poss,mods) -> LexPhraseMode(mode,poss,mods) | |
249 | - | LexPhraseMode(mode2,poss,mods) -> | |
250 | - if mode <> mode2 then failwith "process_lex: multiple modes" else LexPhraseMode(mode,poss,mods) | |
251 | - | _ -> failwith "process_lex") | |
246 | + let lex = {lex with lex_argument=argument; lex_mode=mode :: lex.lex_mode} in | |
247 | + process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma) | |
252 | 248 | | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme "" -> |
253 | 249 | let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in |
254 | - LexPhraseMode(mode,poss,lex.lex_modification) | |
250 | + LexPhrase(poss,lex.lex_modification) | |
255 | 251 | | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) |
256 | 252 | | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification) |
257 | 253 | | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification) |
... | ... | @@ -270,7 +266,7 @@ let rec load_category = function |
270 | 266 | (match x with |
271 | 267 | | [F("name",Symbol value)] -> value, [] |
272 | 268 | | [F("name",Symbol value);Fset("constraints",set)] -> |
273 | - value, List.rev (Xlist.rev_map set load_phrase) | |
269 | + value, List.rev (Xlist.rev_map set (load_phrase (ref []))) | |
274 | 270 | | l -> failwith ("load_category 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string))) |
275 | 271 | | xml -> failwith ("load_category:\n " ^ tei_to_string xml) |
276 | 272 | |
... | ... | @@ -281,10 +277,13 @@ and load_modification_def = function (*pomocnicza do load_lex *) |
281 | 277 | | x -> Printf.printf "%s\n" (tei_to_string (List.hd x)); |
282 | 278 | failwith "load_modification_def:\n" |
283 | 279 | |
284 | -and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*) | |
285 | - | F("argument",set) -> {arg with lex_argument = load_phrase set} | |
280 | +and load_lex arg xml = match xml with | |
281 | + | F("argument",set) -> | |
282 | + let mode = ref [] in | |
283 | + let a = load_phrase mode set in | |
284 | + {arg with lex_argument = a; lex_mode = !mode} | |
286 | 285 | | Fset("arguments",set) -> |
287 | - {arg with lex_arguments=List.rev (Xlist.rev_map set load_phrase)} | |
286 | + {arg with lex_arguments=List.rev (Xlist.rev_map set (load_phrase (ref [])))} | |
288 | 287 | | F("modification",Fs("modification_def",x)) -> {arg with lex_modification = load_modification_def x} |
289 | 288 | | F("lemma",Fs("lemma_def",[F("selection_mode",Symbol value1); |
290 | 289 | F("cooccurrence",Symbol value2); |
... | ... | @@ -305,7 +304,7 @@ and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*) |
305 | 304 | Printf.printf "%s\n" (tei_to_string xml); |
306 | 305 | failwith "load_lex:\n " |
307 | 306 | |
308 | -and load_phrase = function | |
307 | +and load_phrase mode = function | |
309 | 308 | | Fs("np",[F("case",Symbol a)]) -> Phrase (NP(parse_case a)); |
310 | 309 | | Fs("prepnp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepNP(a, parse_case b)) |
311 | 310 | | Fs("adjp", [F("case",Symbol a)]) -> Phrase (AdjP(parse_case a)) |
... | ... | @@ -316,24 +315,25 @@ and load_phrase = function |
316 | 315 | | Fs("ncp", [F("case",Symbol a);b]) -> PhraseComp(Ncp(parse_case a),load_ctype b) |
317 | 316 | | Fs("prepncp", [F("preposition",Symbol a);F("case",Symbol b);c]) -> PhraseComp(Prepncp(a, parse_case b),load_ctype c) |
318 | 317 | | Fs("infp", [F("aspect",Symbol a)]) -> Phrase (InfP(parse_aspect a)) |
319 | - | Fs("xp", [a]) -> let x,y = load_category a in PhraseAbbr(Xp x,y) | |
320 | - | Fs("xp", [e;a]) -> let x,y = load_category a in PhraseAbbr(Xp x,y) | |
321 | - | Fs("advp", [F("category",Symbol a)]) -> PhraseAbbr(Advp(a),[]) | |
322 | - | Fs("advp", [e;F("category",Symbol a)]) -> PhraseAbbr(Advp(a),[]) | |
323 | - | Fs("nonch", []) -> PhraseAbbr(Nonch,[]) | |
318 | + | Fs("xp", [a]) -> let x,y = load_category a in mode:=x :: !mode; PhraseAbbr(Xp x,y) | |
319 | + | Fs("xp", [e;a]) -> let x,y = load_category a in mode:=x :: !mode; PhraseAbbr(Xp x,y) | |
320 | + | Fs("advp", [F("category",Symbol a)]) -> mode:=a :: !mode; PhraseAbbr(Advp(a),[]) | |
321 | + | Fs("advp", [e;F("category",Symbol a)]) -> mode:=a :: !mode; PhraseAbbr(Advp(a),[]) | |
322 | + | Fs("nonch", []) -> mode:="nonch" :: !mode; PhraseAbbr(Nonch,[]) | |
324 | 323 | | Fs("or", []) -> Phrase Or |
325 | 324 | | Fs("refl", []) -> Phrase Refl |
326 | 325 | | Fs("E", []) -> E Null |
327 | 326 | | Fs("lex", x) -> |
328 | 327 | let lex = Xlist.fold x empty_lex load_lex in |
328 | + mode := lex.lex_mode @ !mode; | |
329 | 329 | process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma) |
330 | 330 | | Fs("fixed", [F("argument",a);F("string",TEIstring b)]) -> Phrase (FixedP((*snd (load_phrase a),*)b)) |
331 | - | Fs("possp", [e]) -> PhraseAbbr(Possp,[]) | |
332 | - | Fs("possp", []) -> PhraseAbbr(Possp,[]) | |
331 | + | Fs("possp", [e]) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[]) | |
332 | + | Fs("possp", []) -> mode:="possp" :: !mode; PhraseAbbr(Possp,[]) | |
333 | 333 | | Fs("recip", []) -> Phrase Recip |
334 | - | Fs("distrp", [e]) -> PhraseAbbr(Distrp,[]) | |
335 | - | Fs("distrp", []) -> PhraseAbbr(Distrp,[]) | |
336 | - | Fs("compar", [F("compar_category",Symbol value)]) -> PhraseAbbr(ComparP value,[]) | |
334 | + | Fs("distrp", [e]) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[]) | |
335 | + | Fs("distrp", []) -> mode:="distrp" :: !mode; PhraseAbbr(Distrp,[]) | |
336 | + | Fs("compar", [F("compar_category",Symbol value)]) -> Phrase(ComparP value) | |
337 | 337 | | Fs("gerp", [F("case",Symbol a)]) -> Phrase (GerP(parse_case a)) |
338 | 338 | | Fs("prepgerp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepGerP(a, parse_case b)) |
339 | 339 | | Fs("nump", [F("case",Symbol a)]) -> Phrase (NumP(parse_case a)) |
... | ... | @@ -342,17 +342,17 @@ and load_phrase = function |
342 | 342 | | Fs("prepppasp", [F("preposition",Symbol a);F("case",Symbol b)]) -> Phrase (PrepPpasP(a, parse_case b)) |
343 | 343 | | Fs("qub", []) -> Phrase Qub |
344 | 344 | | Fs("pactp", [F("case",Symbol a)]) -> Phrase (PactP(parse_case a)) |
345 | - | Fs("adverb",[F("adverb",Symbol s)]) -> LexRPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[])) | |
345 | + | Fs("adverb",[F("adverb",Symbol s)]) -> LexPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[])) | |
346 | 346 | | xml -> failwith ("load_phrase match:\n " ^ tei_to_string xml) |
347 | 347 | |
348 | -and load_phrase_id ent sch psn phrases = function | |
348 | +and load_phrase_id ent sch psn phrases mode = function | |
349 | 349 | | Fs(morf,Id{hash=false; numbers=[ent_id;sch_id;psn_id;id]; suffix="phr"} :: l) -> |
350 | - if ent_id = ent && sch_id = sch && psn_id = psn then | |
351 | - let morf = load_phrase (Fs(morf, l)) in | |
350 | + if ent_id = ent && sch_id = sch && psn_id = psn then | |
351 | + let morf = load_phrase mode (Fs(morf, l)) in | |
352 | 352 | phrases := IntMap.add_inc (!phrases) id morf (fun morf2 -> if morf = morf2 then morf else failwith "load_phrase_id"); |
353 | 353 | MorfId id |
354 | 354 | else failwith (Printf.sprintf "load_phrase %d %d" ent ent_id) |
355 | - | Fs(morf, l) -> load_phrase (Fs(morf, l)) | |
355 | + | Fs(morf, l) -> load_phrase mode (Fs(morf, l)) | |
356 | 356 | | _ -> failwith "load_phrase_id" |
357 | 357 | |
358 | 358 | |
... | ... | @@ -363,7 +363,9 @@ and load_control arg = function |
363 | 363 | and load_position_info ent sch phrases arg = function |
364 | 364 | | F("function",Symbol value) -> {arg with gf = parse_gf value} |
365 | 365 | | Fset("phrases",phrases_set) -> |
366 | - {arg with morfs = List.rev (Xlist.rev_map phrases_set (load_phrase_id ent sch arg.psn_id phrases))} | |
366 | + let mode = ref [] in | |
367 | + let morfs = List.rev (Xlist.rev_map phrases_set (load_phrase_id ent sch arg.psn_id phrases mode)) in | |
368 | + {arg with morfs = morfs; mode = StringSet.to_list (StringSet.of_list (!mode))} | |
367 | 369 | | Fset("control",control_set) -> Xlist.fold control_set arg load_control |
368 | 370 | | Id{hash=false; numbers=[ent_id;sch_id;id]; suffix="psn"} -> |
369 | 371 | if ent_id = ent && sch_id = sch then {arg with psn_id = id} |
... | ... | @@ -568,7 +570,7 @@ let correct_expansion = function |
568 | 570 | | _ -> failwith "correct_expansion" |
569 | 571 | |
570 | 572 | let load_expansion = function |
571 | - Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",[p])]) -> [load_phrase p] | |
573 | + Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",[p])]) -> [load_phrase (ref []) p] | |
572 | 574 | | Fs("expansion",[F("opinion",Symbol opinion);Fset("positions",set)]) -> correct_expansion (List.rev (Xlist.rev_map set (load_position (-1) (-1) (ref IntMap.empty)))) |
573 | 575 | | tei -> failwith ("load_expansion: \n" ^ tei_to_string tei) |
574 | 576 | |
... | ... | @@ -577,7 +579,7 @@ let load_rentry = function |
577 | 579 | let id = match parse_id id with |
578 | 580 | {hash=false; numbers=[id]; suffix="exp"} -> id |
579 | 581 | | _ -> failwith "process_meanings" in |
580 | - let morf = load_phrase (parse_tei phrase) in | |
582 | + let morf = load_phrase (ref []) (parse_tei phrase) in | |
581 | 583 | let expansions = match parse_tei exp with |
582 | 584 | | Fs("phrase_type_expansions", [Fset("expansions",expansions)]) -> List.flatten (List.rev (Xlist.rev_map expansions load_expansion)) |
583 | 585 | | Fs("phrase_type_expansions", [F("expansions",expansion)]) -> load_expansion expansion |
... | ... | @@ -597,23 +599,23 @@ let load_expands filename = |
597 | 599 | |
598 | 600 | |
599 | 601 | (*let walenty = load_walenty Paths.walenty_filename *) |
600 | -let walenty,phrases = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170304.xml" | |
602 | +let walenty,phrases = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" | |
601 | 603 | |
602 | 604 | (* let _ = Printf.printf "|phrases|=%d\n" (IntMap.size phrases) *) |
603 | 605 | |
604 | 606 | let expands_supplement = [ |
605 | 607 | (-2), PhraseAbbr(Nonch,[]), [ |
606 | - LexRPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[])); | |
607 | - LexRPhrase([SUBST(NumberUndef,Str),Lexeme "coś"],(Natr,[])); | |
608 | - LexRPhrase([SUBST(NumberUndef,Str),Lexeme "nic"],(Natr,[])); | |
609 | - LexRPhrase([SUBST(NumberUndef,Str),Lexeme "to"],(Natr,[])); | |
608 | + LexPhrase([SUBST(NumberUndef,Str),Lexeme "co"],(Natr,[])); | |
609 | + LexPhrase([SUBST(NumberUndef,Str),Lexeme "coś"],(Natr,[])); | |
610 | + LexPhrase([SUBST(NumberUndef,Str),Lexeme "nic"],(Natr,[])); | |
611 | + LexPhrase([SUBST(NumberUndef,Str),Lexeme "to"],(Natr,[])); | |
610 | 612 | ]; |
611 | 613 | (-3), PhraseAbbr(Advp "pron",[]), [ |
612 | - LexRPhrase([ADV (Grad "pos"),Lexeme "tak"],(Natr,[])); | |
613 | - LexRPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[])) | |
614 | + LexPhrase([ADV (Grad "pos"),Lexeme "tak"],(Natr,[])); | |
615 | + LexPhrase([ADV (Grad "pos"),Lexeme "jak"],(Natr,[])) | |
614 | 616 | ]] |
615 | 617 | |
616 | -let expands = expands_supplement @ load_expands "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170304.xml" | |
618 | +let expands = expands_supplement @ load_expands "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170311.xml" | |
617 | 619 | |
618 | 620 | let subtypes = [ |
619 | 621 | "int",[ |
... | ... |
walenty/ENIAMwalTypes.ml
... | ... | @@ -64,8 +64,7 @@ type phrase = |
64 | 64 | | NumP of case |
65 | 65 | | PrepNumP of string * case |
66 | 66 | | ComprepNP of string |
67 | - | ComparNP of string * case | |
68 | - | ComparPP of string | |
67 | + | ComparP of string (** case*) | |
69 | 68 | | CP of comp_type * comp |
70 | 69 | | NCP of case * comp_type * comp |
71 | 70 | | PrepNCP of string * case * comp_type * comp |
... | ... | @@ -89,7 +88,6 @@ type phrase = |
89 | 88 | type phrase_abbr = |
90 | 89 | Xp of string |
91 | 90 | | Advp of string |
92 | - | ComparP of string | |
93 | 91 | | Nonch |
94 | 92 | | Distrp |
95 | 93 | | Possp |
... | ... | @@ -126,16 +124,16 @@ type sel_prefs = |
126 | 124 | | RelationRole of string * string * string (* relacji * rola * atrybut roli *) |
127 | 125 | |
128 | 126 | type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list; |
129 | - cr: string list; ce: string list; morfs: morf list} | |
127 | + mode: string list; cr: string list; ce: string list; morfs: morf list} | |
130 | 128 | |
131 | 129 | and morf = |
132 | 130 | Phrase of phrase |
133 | - | PhraseMode of string * phrase | |
131 | + (* | PhraseMode of string * phrase *) | |
134 | 132 | | E of phrase |
135 | 133 | | LexPhrase of (pos * lex) list * (restr * position list) |
136 | - | LexRPhrase of (pos * lex) list * (restr * position list) | |
137 | - | LexPhraseMode of string * (pos * lex) list * (restr * position list) | |
138 | - | LexRPhraseMode of string * (pos * lex) list * (restr * position list) | |
134 | + (* | LexRPhrase of (pos * lex) list * (restr * position list) *) | |
135 | + (* | LexPhraseMode of string * (pos * lex) list * (restr * position list) *) | |
136 | + (* | LexRPhraseMode of string * (pos * lex) list * (restr * position list) *) | |
139 | 137 | | PhraseAbbr of phrase_abbr * morf list |
140 | 138 | | PhraseComp of phrase_comp * (comp_type * comp list) |
141 | 139 | | LexPhraseId of string * pos * lex |
... | ... | @@ -146,13 +144,14 @@ and morf = |
146 | 144 | | Multi of phrase list*) |
147 | 145 | |
148 | 146 | let empty_position = |
149 | - {psn_id=(-1); gf=ARG; role=""; role_attr="";sel_prefs=[]; cr=[]; ce=[]; morfs=[]} | |
147 | + {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]} | |
150 | 148 | |
151 | 149 | type lex_record = { |
152 | 150 | lex_argument: morf; |
153 | 151 | lex_arguments: morf list; |
154 | 152 | lex_lemma: lex; |
155 | 153 | lex_numeral_lemma: lex; |
154 | + lex_mode: string list; | |
156 | 155 | lex_negation: negation; |
157 | 156 | lex_degree: grad; |
158 | 157 | lex_number: number; |
... | ... | @@ -162,7 +161,7 @@ type lex_record = { |
162 | 161 | } |
163 | 162 | |
164 | 163 | let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme ""; |
165 | - lex_numeral_lemma=Lexeme ""; lex_negation=NegationUndef; | |
164 | + lex_numeral_lemma=Lexeme ""; lex_mode=[]; lex_negation=NegationUndef; | |
166 | 165 | lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef; |
167 | 166 | lex_gender=GenderUndef; lex_modification = Natr,[]} |
168 | 167 | |
... | ... |
walenty/makefile
... | ... | @@ -28,8 +28,8 @@ eniam-walenty.cmxa: $(SOURCES) |
28 | 28 | test: test.ml |
29 | 29 | $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml |
30 | 30 | |
31 | -loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
32 | - $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
31 | +loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml #ENIAMwalLex.ml | |
32 | + $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml #ENIAMwalLex.ml | |
33 | 33 | |
34 | 34 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
35 | 35 | |
... | ... |