diff --git a/LCGlexicon/ENIAMcategoriesPL.ml b/LCGlexicon/ENIAMcategoriesPL.ml index ea5675b..d785617 100644 --- a/LCGlexicon/ENIAMcategoriesPL.ml +++ b/LCGlexicon/ENIAMcategoriesPL.ml @@ -159,7 +159,9 @@ let num_nsem lemma = let part_set = StringSet.of_list ["się"; "nie"; "by"; "niech"; "niechaj"; "niechże"; "niechajże"; "czy"; "gdyby"] -let clarify_categories proper cat coerced snode = function +let snode = SelectorMap.find selector_values SNode + +let clarify_categories proper cat coerced (*snode*) = function lemma,"subst",[numbers;cases;genders] -> let numbers = expand_numbers numbers in let cases = expand_cases cases in diff --git a/LCGlexicon/resources/lexicon-pl.dic b/LCGlexicon/resources/lexicon-pl.dic index ebda1a6..3a44284 100644 --- a/LCGlexicon/resources/lexicon-pl.dic +++ b/LCGlexicon/resources/lexicon-pl.dic @@ -1,6 +1,6 @@ @PHRASE_NAMES lex infp np prepnp adjp ip cp ncp advp padvp - adja prepadjp comprepnp compar measure num aglt aux-fut + adja prepadjp comprepnp comparp measure num aglt aux-fut aux-past aux-imp qub interj hyphen int rparen rparen2 rquot rquot2 rquot3 inclusion day-interval day-lex day-month-interval date-interval @@ -153,8 +153,8 @@ lemma=w,pos=prep,case=loc: prepnp*lemma*case{\(1+advp*T),/(day-month+day+ye # komparatywy # FIXME: trzeba poprawić comparnp i comparpp w walencji -pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)}; -pos=compar: QUANT[case=postp] compar*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)}; +pos=compar: QUANT[case=nom&gen&dat&acc&inst] comparp*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)}; +pos=compar: QUANT[case=postp] comparp*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)}; # frazy przymiotnikowe # FIXME: let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in diff --git a/exec/ENIAMexec.ml b/exec/ENIAMexec.ml index f533b24..41c1560 100644 --- a/exec/ENIAMexec.ml +++ b/exec/ENIAMexec.ml @@ -64,16 +64,16 @@ let rec translate_text = function | ENIAMsubsyntaxTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) -> translate_mode mode, translate_text text)) -let clarify_categories cats snode token = +let clarify_categories cats (*snode*) token = match token.ENIAMtokenizerTypes.token with ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> (* Printf.printf "lemma=%s pos=%s cat=%s coerced=%s\n%!" lemma pos cat (String.concat "," coerced); *) - ENIAMcategoriesPL.clarify_categories false cat coerced snode (lemma,pos,interp))))) + ENIAMcategoriesPL.clarify_categories false cat coerced (*snode*) (lemma,pos,interp))))) | ENIAMtokenizerTypes.Proper(lemma,pos,interp,senses2) -> - List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced snode (lemma,pos,interp))))) + List.flatten (Xlist.map interp (fun interp -> List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories true cat coerced (*snode*) (lemma,pos,interp))))) | ENIAMtokenizerTypes.Interp lemma -> - List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced snode (lemma,"interp",[]))) + List.flatten (Xlist.map cats (fun (cat,coerced) -> ENIAMcategoriesPL.clarify_categories false cat coerced (*snode*) (lemma,"interp",[]))) | _ -> [] let create_chart rules tokens lex_sems paths last = @@ -85,8 +85,8 @@ let create_chart rules tokens lex_sems paths last = ENIAM_LCGrenderer.reset_variable_names (); ENIAM_LCGrenderer.add_variable_numbers (); (* if s.ENIAMlexSemanticsTypes.schemata = [] then failwith ("create_chart: no schema for token=" ^ t.ENIAMtokenizerTypes.orth ^ " lemma=" ^ ENIAMtokens.get_lemma t.ENIAMtokenizerTypes.token) else *) - Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,snode,schema) -> - let cats = clarify_categories cats snode t in + Xlist.fold s.ENIAMlexSemanticsTypes.schemata chart (fun chart (selectors,cats,(*snode,*)schema) -> + let cats = clarify_categories cats (*snode*) t in (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *) let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in ENIAM_LCGchart.add_inc_list chart lnode rnode l 0)) in @@ -115,8 +115,8 @@ let create_dep_chart dep_rules tokens lex_sems paths = let s = ExtArray.get lex_sems id in ENIAM_LCGrenderer.reset_variable_names (); ENIAM_LCGrenderer.add_variable_numbers (); - Xlist.fold s.ENIAMlexSemanticsTypes.schemata nodes (fun nodes (selectors,cats,snode,schema) -> - let cats = clarify_categories ["X",["X"]] snode t in + Xlist.fold s.ENIAMlexSemanticsTypes.schemata nodes (fun nodes (selectors,cats,(*snode,*)schema) -> + let cats = clarify_categories ["X",["X"]] (*snode*) t in (* let chart = ENIAM_LCGchart.add_inc_list chart lnode rnode s.ENIAMlexSemanticsTypes.lex_entries 0 in *) let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats [selectors,schema] s.ENIAMlexSemanticsTypes.lex_entries in IntMap.add_inc nodes i l (fun l2 -> l @ l2))) in diff --git a/exec/ENIAMvisualization.ml b/exec/ENIAMvisualization.ml index a59290c..3359293 100644 --- a/exec/ENIAMvisualization.ml +++ b/exec/ENIAMvisualization.ml @@ -797,9 +797,21 @@ let rec extract_pos_cat_internal vars = function let rec extract_pos_cat vars = function | Tensor [] -> failwith "extract_pos_cat: ni" | Tensor [pos] -> extract_pos_cat_internal vars pos + | Tensor [pos;_] -> extract_pos_cat_internal vars pos + | Tensor [pos;_;_] -> extract_pos_cat_internal vars pos | Tensor (Atom "num" :: _) -> "Number" + | Tensor (Atom "aglt" :: _) -> "Aglt" | Tensor (Atom "prepnp" :: _) -> "Prep" - | Tensor (pos :: cat :: _) -> (*extract_pos_cat_internal vars pos ^ "*" ^*) extract_pos_cat_internal vars cat + | Tensor (Atom "comparp" :: _) -> "Compar" + | Tensor (Atom "cp" :: _) -> "Comp" + | Tensor [_;cat;_;_] -> extract_pos_cat_internal vars cat + | Tensor [_;_;cat;_;_] -> extract_pos_cat_internal vars cat + | Tensor [_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat + | Tensor [_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat + | Tensor [_;_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat + | Tensor [_;_;_;_;_;_;cat;_;_] -> extract_pos_cat_internal vars cat + (* | Tensor (pos :: cat :: _) -> (*extract_pos_cat_internal vars pos ^ "*" ^*) extract_pos_cat_internal vars cat *) + | Tensor _ as t -> print_endline ("Unknown symbol " ^ ENIAM_LCGstringOf.grammar_symbol 0 t); "Unknown" | Plus l -> failwith "extract_pos_cat: ni" | Imp(s,d,t2) -> extract_pos_cat vars s | One -> failwith "extract_pos_cat: ni" @@ -822,7 +834,8 @@ let omited = StringSet.of_list ["<subst>";"<depr>";"<ppron12>";"<ppron3>";"<sieb "<email>";"<obj-id>";"<adj>";"<apron>";"<adjc>";"<adjp>";"<adja>";"<adv>";"<ger>";"<pact>"; "<ppas>";"<fin>";"<bedzie>";"<praet>";"<winien>";"<impt>";"<imps>";"<pred>";"<aglt>";"<inf>"; "<pcon>";"<pant>";"<qub>";"<comp>";"<compar>";"<conj>";"<interj>";"<sinterj>";"<burk>"; - "<interp>";"<part>";"<unk>";"<building-number>";"<html-tag>";"<list-item>";"<numcomp>";"<phone-number>";"<postal-code>"] + "<interp>";"<part>";"<unk>";"<building-number>";"<html-tag>";"<list-item>";"<numcomp>"; + "<phone-number>";"<postal-code>";"<sentence>";"<paragraph>"] let cat_tokens_sequence text_fragments g = let _,_,l = ENIAM_LCGchart.fold g (0,0,[]) (fun (m,n,l) (symbol,node1,node2,sem,layer) -> diff --git a/lexSemantics/ENIAMadjuncts.ml b/lexSemantics/ENIAMadjuncts.ml index 89b2ef5..f571255 100644 --- a/lexSemantics/ENIAMadjuncts.ml +++ b/lexSemantics/ENIAMadjuncts.ml @@ -253,19 +253,19 @@ let simplify_schemata lexemes pos pos2 lemma schemata = "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *) schemata -let add_adjuncts preps compreps compars pos2 (selectors,cat,has_context,schema) = +let add_adjuncts preps compreps compars pos2 (selectors,cat,(*has_context,*)schema) = let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_comprep in let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepnp prep cases) in let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_prepadjp prep cases) in let compars = Xlist.rev_map compars ENIAMwalRenderer.render_compar in match pos2 with - "verb" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] + "verb" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.verb_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars] | "noun" -> [ - [Nsyn,Eq,["proper"]] @ selectors,cat,has_context,ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; - [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,has_context,ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; - [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,has_context,ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] - | "adj" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] - | "adv" -> [selectors,cat,has_context,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] + [Nsyn,Eq,["proper"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.proper_noun_adjuncts_simp @ prepnps @ compreps @ compars; + [Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.measure_noun_adjuncts_simp @ prepnps @ compreps @ compars; + [Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ selectors,cat,(*has_context,*)ENIAMwalRenderer.common_noun_adjuncts_simp @ prepnps @ compreps @ compars] + | "adj" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.adj_adjuncts_simp @ compars] + | "adv" -> [selectors,cat,(*has_context,*)schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars] | _ -> [] open ENIAMlexSemanticsTypes diff --git a/lexSemantics/ENIAMlexSemantics.ml b/lexSemantics/ENIAMlexSemantics.ml index 198b3e5..08ad2df 100644 --- a/lexSemantics/ENIAMlexSemantics.ml +++ b/lexSemantics/ENIAMlexSemantics.ml @@ -23,7 +23,7 @@ open ENIAMlexSemanticsTypes open ENIAMwalTypes open Xstd -let snode_values = ENIAM_LCGlexiconTypes.SelectorMap.find ENIAMcategoriesPL.selector_values ENIAM_LCGlexiconTypes.SNode +(*let snode_values = ENIAM_LCGlexiconTypes.SelectorMap.find ENIAMcategoriesPL.selector_values ENIAM_LCGlexiconTypes.SNode*) let find_sense m = try @@ -305,9 +305,9 @@ let assign_valence tokens lex_sems group = let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *) let schemata = Xlist.rev_map schemata (fun (selectors,schema) -> - selectors,["X",["X"]],snode_values,ENIAMwalRenderer.render_simple_schema schema) in + selectors,["X",["X"]],(*snode_values,*)ENIAMwalRenderer.render_simple_schema schema) in let schemata = List.flatten (Xlist.rev_map schemata (ENIAMadjuncts.add_adjuncts preps compreps compars pos2)) in - let schemata = if schemata = [] then [[],["X",["X"]],snode_values,[]] else schemata in + let schemata = if schemata = [] then [[],["X",["X"]],(*snode_values,*)[]] else schemata in (* Printf.printf "D %s |schemata|=%d\n" lemma (Xlist.size schemata); *) let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in let entries = Xlist.map entries (fun (selectors,entry) -> diff --git a/lexSemantics/ENIAMlexSemanticsHTMLof.ml b/lexSemantics/ENIAMlexSemanticsHTMLof.ml index f2c63bd..e71cf34 100644 --- a/lexSemantics/ENIAMlexSemanticsHTMLof.ml +++ b/lexSemantics/ENIAMlexSemanticsHTMLof.ml @@ -60,10 +60,10 @@ let html_of_lex_sems tokens lex_sems = let core = Printf.sprintf "%3d %s %s" id orth lemma in let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in - let schemata = Xlist.map t.schemata (fun (selectors,cat,snode,l) -> + let schemata = Xlist.map t.schemata (fun (selectors,cat,(*snode,*)l) -> "  [" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ - String.concat "|" snode ^ + (*String.concat "|" snode ^*) " {" ^ String.concat ", " (Xlist.map l (fun (d,s) -> ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in (* let frames = Xlist.map t.frames (fun (selectors,senses,schema) -> FIXME diff --git a/lexSemantics/ENIAMlexSemanticsStringOf.ml b/lexSemantics/ENIAMlexSemanticsStringOf.ml index a026d66..d1c5de6 100644 --- a/lexSemantics/ENIAMlexSemanticsStringOf.ml +++ b/lexSemantics/ENIAMlexSemanticsStringOf.ml @@ -40,10 +40,10 @@ let string_of_lex_sems tokens lex_sems = let core = Printf.sprintf "%3d %s %s" id orth lemma in let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) -> "&[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in - let schemata = Xlist.map t.schemata (fun (selectors,cat,snode,l) -> + let schemata = Xlist.map t.schemata (fun (selectors,cat,(*snode,*)l) -> "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "]" ^ String.concat "," (Xlist.map cat (fun (m,l) -> m ^ "[" ^ String.concat "," l ^ "]")) ^ - String.concat "|" snode ^ + (*String.concat "|" snode ^*) " {" ^ String.concat "," (Xlist.map l (fun (d,s) -> ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in let frames = Xlist.map t.frames (fun f -> diff --git a/lexSemantics/ENIAMlexSemanticsTypes.ml b/lexSemantics/ENIAMlexSemanticsTypes.ml index 27b9580..f0569f5 100644 --- a/lexSemantics/ENIAMlexSemanticsTypes.ml +++ b/lexSemantics/ENIAMlexSemanticsTypes.ml @@ -31,18 +31,18 @@ type frame = { agf: string; sem_args: string list; rev_hipero: bool; - snode: string list; + (*snode: string list;*) sopinion: ENIAMwalTypes.opinion; fopinion: ENIAMwalTypes.opinion; } -let empty_frame = {selectors=[]; senses=[]; cats=["X",["X"]]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; rev_hipero=false; snode=[]; +let empty_frame = {selectors=[]; senses=[]; cats=["X",["X"]]; positions=[]; arole=""; arole_attr=""; arev=false; agf=""; sem_args=[]; rev_hipero=false; (*snode=[];*) sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony} type lex_sem = { schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * (string * string list) list * (* sensy *) - string list * (* has_context *) + (*string list **) (* has_context *) (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; diff --git a/lexSemantics/ENIAMvalence.ml b/lexSemantics/ENIAMvalence.ml index d48e759..986ace1 100644 --- a/lexSemantics/ENIAMvalence.ml +++ b/lexSemantics/ENIAMvalence.ml @@ -465,7 +465,8 @@ let transform_ger_schema lemma negation schema = (* FIXME: zakładam, że ger ze | phrase -> transform_pers_phrase lemma negation "gerundial" phrase))}) let transform_ppas_schema lemma negation mood schema = - if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then raise Not_found else + if not (Xlist.fold schema false (fun b p -> if p.gf = OBJ then true else b)) then + (*failwith ("transform_ppas_schema: attempt to make ppas schema for lemma " ^ lemma ^ "without OBJ arg")*)raise Not_found else Xlist.map schema (fun s -> let morfs = List.flatten (Xlist.map s.morfs (transform_comps negation mood)) in let morfs = List.flatten (Xlist.map morfs transform_preps) in diff --git a/lexSemantics/ENIAMwalRenderer.ml b/lexSemantics/ENIAMwalRenderer.ml index f5cff06..17c49d9 100644 --- a/lexSemantics/ENIAMwalRenderer.ml +++ b/lexSemantics/ENIAMwalRenderer.ml @@ -53,7 +53,7 @@ let render_pos_entry = function | "ppas" -> [Atom "ppas"; AVar "number"; AVar "case"; AVar "gender"; AVar "negation"] | "inf" -> [Atom "inf"; AVar "aspect"; AVar "negation"] | "qub" -> [Atom "qub"] - | "compar" -> [Atom "compar"; AVar "case"] + | "compar" -> [Atom "comparp"; AVar "case"] | "comp" -> [Atom "comp"; AVar "ctype"] | "fin" -> [Atom "pers"; AVar "negation"] | "praet" -> [Atom "pers"; AVar "negation"] @@ -130,7 +130,7 @@ let render_phrase = function | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) (* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top]*) | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep] - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case] + | ComparP(prep,Case case) -> Tensor[Atom "comparp"; Atom prep; Atom case] (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep] *) (* | IP -> Tensor[Atom "ip";Top;Top;Top] *) | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp] @@ -209,7 +209,7 @@ let render_phrase_cat cat role node = function | PrepNumP(_,prep,Case case) -> Tensor[Atom "prepnump"; Atom prep; Atom case] *) (* | ComprepNP("") -> Tensor[Atom "comprepnp"; Top; Atom cat; Atom role; Atom node]*) | ComprepNP(prep) -> Tensor[Atom "comprepnp"; Atom prep; Atom cat; Atom role; Atom node] - | ComparP(prep,Case case) -> Tensor[Atom "compar"; Atom prep; Atom case; Atom cat; Atom role; Atom node] + | ComparP(prep,Case case) -> Tensor[Atom "comparp"; Atom prep; Atom case; Atom cat; Atom role; Atom node] (* | ComparPP(_,prep) -> Tensor[Atom "comparpp"; Atom prep; Atom cat; Atom role; Atom node] *) (* | IP -> Tensor[Atom "ip";Top;Top;Top; Atom cat; Atom role; Atom node] *) | CP (ctype,Comp comp) -> Tensor[Atom "cp"; arg_of_ctype ctype; Atom comp; Atom cat; Atom role; Atom node] @@ -367,9 +367,9 @@ let render_connected_prepadjp prep cases = adjunct (postp @ (Xlist.map cases (fun case -> Tensor[Atom "prepadjp"; Atom prep; Atom case]))) -let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]] +let render_compar prep = Both,Plus[One;Tensor[Atom "comparp"; Atom prep; Top]] -let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]] +let render_connected_compar prep = adjunct [Tensor[Atom "comparp"; Atom prep; Top]] let verb_adjuncts_simp = [ Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]];