walParser.ml 38.3 KB

Edit Raw Blame History

(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open WalTypes

open Xstd

let rec find_brackets = function
    LParen :: l ->
        let found,l = find_rbracket RParen [] l in
        (Paren found) :: (find_brackets l)
  | LBracet :: l ->
        let found,l = find_rbracket RBracet [] l in
        (Bracet found) :: (find_brackets l)
  | LSqBra :: l ->
        let found,l = find_rbracket RSqBra [] l in
        (SqBra found) :: (find_brackets l)
  | s :: l -> s :: (find_brackets l)
  | [] -> []

and find_rbracket bracket rev = function
    LParen :: l ->
        let found,l = find_rbracket RParen [] l in
        find_rbracket bracket (Paren found :: rev) l
  | LBracet :: l ->
        let found,l = find_rbracket RBracet [] l in
        find_rbracket bracket (Bracet found :: rev) l
  | LSqBra :: l ->
        let found,l = find_rbracket RSqBra [] l in
        find_rbracket bracket (SqBra found :: rev) l
  | RParen :: l -> if bracket = RParen then List.rev rev, l else failwith "find_rbracket"
  | RBracet :: l -> if bracket = RBracet then List.rev rev, l else failwith "find_rbracket"
  | RSqBra :: l -> if bracket = RSqBra then List.rev rev, l else failwith "find_rbracket"
  | s :: l -> find_rbracket bracket (s :: rev) l
  | [] -> failwith "find_rbracket"

let rec split_symbol symb rev = function
    [] -> [List.rev rev](*failwith "split_symbol"*)
  | s :: l ->
      if s = symb then
        if l = [] then (*[List.rev rev]*)failwith "split_symbol"
        else (List.rev rev) :: (split_symbol symb [] l)
      else split_symbol symb (s :: rev) l

let parse_opinion = function
    "pewny" -> Pewny
  | "cer" -> Pewny
  | "potoczny" -> Potoczny
  | "col" -> Potoczny
  | "wątpliwy" -> Watpliwy
  | "unc" -> Watpliwy
  | "archaiczny" -> Archaiczny
  | "dat" -> Archaiczny
  | "zły" -> Zly
  | "bad" -> Zly
  | "wulgarny" -> Wulgarny
  | "vul" -> Wulgarny
  | x -> failwith ("parse_opinion: " ^ x)

let parse_roles l =
  let r,cr,ce = Xlist.fold l ([],[],[]) (fun (r,controller,controllee) -> function
      "subj" -> SUBJ :: r, controller, controllee
    | "obj" -> OBJ :: r, controller, controllee
    | "controller" -> r, "1" :: controller, controllee
    | "controllee" -> r, controller, "1" :: controllee
    | "controller2" -> r, "2" :: controller, controllee
    | "controllee2" -> r, controller, "2" :: controllee
    | "" -> r, controller, controllee
    | x -> failwith ("parse_roles: " ^ x)) in
  (match r with
    [] -> ARG
  | [x] -> x
  | _ -> failwith "parse_roles"),cr,ce

let parse_negation = function
    [Text "_"] -> NegationUndef
  | [Text "neg"] -> Negation
  | [Text "aff"] -> Aff
  | [Text ""] -> NegationNA
  | l -> failwith ("parse_negation: " ^ WalStringOf.token_list l)

let parse_pred = function
    [Text ""] -> PredNA
  | [Text "pred"] -> Pred
  | [Text "false"] -> PredNA
  | [Text "true"] -> Pred
  | l -> failwith ("parse_pred: " ^ WalStringOf.token_list l)

let parse_aspect = function
      [Text "perf"] -> Aspect "perf"
    | [Text "imperf"] -> Aspect "imperf"
    | [Text "_"] -> AspectUndef
    | [Text ""] -> AspectNA
    | l -> failwith ("parse_aspect: " ^ WalStringOf.token_list l)

let parse_case = function
      [Text "nom"] -> Case "nom"
    | [Text "gen"] -> Case "gen"
    | [Text "dat"] -> Case "dat"
    | [Text "acc"] -> Case "acc"
    | [Text "inst"] -> Case "inst"
    | [Text "loc"] -> Case "loc"
    | [Text "str"] -> Str
    | [Text "pred"] -> Case "pred"
    | [Text "part"] -> Part
    | [Text "postp"] -> Case "postp"
    | [Text "agr"] -> CaseAgr
    | l -> failwith ("parse_case: " ^ WalStringOf.token_list l)

let parse_prep = function
    | [Text "niż"] -> "niż"
    | [Text "w czasie"] -> "w czasie"
    | [Text "podczas"] -> "podczas"
    | [Text "w roli"] -> "w roli"
    | [Text "pomiędzy"] -> "pomiędzy"
    | [Text "według"] -> "według"
    | [Text "zza"] -> "zza"
    | [Text "poza"] -> "poza"
    | [Text "spośród"] -> "spośród"
    | [Text "spod"] -> "spod"
    | [Text "koło"] -> "koło"
    | [Text "względem"] -> "względem"
    | [Text "wśród"] -> "wśród"
    | [Text "wskutek"] -> "wskutek"
    | [Text "przez"] -> "przez"
    | [Text "co do"] -> "co do"
    | [Text "wokół"] -> "wokół"
    | [Text "między"] -> "między"
    | [Text "bez"] -> "bez"
    | [Text "przy"] -> "przy"
    | [Text "na rzecz"] -> "na rzecz"
    | [Text "na temat"] -> "na temat"
    | [Text "po"] -> "po"
    | [Text "u"] -> "u"
    | [Text "pod"] -> "pod"
    | [Text "ponad"] -> "ponad"
    | [Text "jako"] -> "jako"
    | [Text "w kwestii"] -> "w kwestii"
    | [Text "w sprawie"] -> "w sprawie"
    | [Text "ku"] -> "ku"
    | [Text "przeciw"] -> "przeciw"
    | [Text "nad"] -> "nad"
    | [Text "z powodu"] -> "z powodu"
    | [Text "przed"] -> "przed"
    | [Text "od"] -> "od"
    | [Text "o"] -> "o"
    | [Text "w"] -> "w"
    | [Text "za"] -> "za"
    | [Text "dla"] -> "dla"
    | [Text "na"] -> "na"
    | [Text "z"] -> "z"
    | [Text "do"] ->  "do"
    | [Text "wobec"] -> "wobec"
    | [Text "jak"] -> "jak"
    | [Text "spomiędzy"] -> "spomiędzy"
    | [Text "sponad"] -> "sponad"
    | [Text "spopod"] -> "spopod"
    | [Text "spoza"] -> "spoza"
    | [Text "sprzed"] -> "sprzed"
    | [Text "znad"] -> "znad"
    | [Text "dokoła"] -> "dokoła"
    | [Text "dookoła"] -> "dookoła"
    | [Text "naokoło"] -> "naokoło"
    | [Text "naprzeciw"] -> "naprzeciw"
    | [Text "obok"] -> "obok"
    | [Text "poniżej"] -> "poniżej"
    | [Text "popod"] -> "popod"
    | [Text "pośrodku"] -> "pośrodku"
    | [Text "pośród"] -> "pośród"
    | [Text "powyżej"] -> "powyżej"
    | [Text "wewnątrz"] -> "wewnątrz"
    | [Text "wkoło"] -> "wkoło"
    | [Text "wzdłuż"] -> "wzdłuż"
    | [Text "naokół"] -> "naokół"
    | [Text "śród"] -> "śród"
    | [Text "wpośród"] -> "wpośród"
    | [Text "poprzez"] -> "poprzez"
    | [Text "około"] -> "około"
    | [Text "na punkcie"] -> "na punkcie"
    | [Text "pod względem"] -> "pod względem"
    | [Text "pod wpływem"] -> "pod wpływem"
    | [Text "na skutek"] -> "na skutek"
    | [Text "na polu"] -> "na polu"
    | [Text "na poczet"] -> "na poczet"
    | [Text "na widok"] -> "na widok"
    | [Text "w dziedzinie"] -> "w dziedzinie"
    | [Text "pod pozorem"] -> "pod pozorem"
    | [Text "pod pretekstem"] -> "pod pretekstem"
    | [Text "za pomocą"] -> "za pomocą"
    | [Text "pod adresem"] -> "pod adresem"
    | [Text "w kierunku"] -> "w kierunku"
    | [Text "w stronę"] -> "w stronę"
    | [Text "w obliczu"] -> "w obliczu"
    | [Text "u podstaw"] -> "u podstaw"
    | [Text "pod kątem"] -> "pod kątem"
    | [Text "na okoliczność"] -> "na okoliczność"
    | [Text "w temacie"] -> "w temacie"
    | [Text "od strony"] -> "od strony"
    | [Text "ze strony"] -> "ze strony"
    | [Text "po stronie"] -> "po stronie"
    | [Text "na myśl"] -> "na myśl"
    | [Text "w charakterze"] -> "w charakterze"
    | [Text "w zakresie"] -> "w zakresie"
    | [Text "ze względu na"] -> "ze względu na"
    | [Text "na podstawie"] -> "na podstawie"
    | [Text "w stosunku do"] -> "w stosunku do"
    | [Text "z tytułu"] -> "z tytułu"
    | [Text "z okazji"] -> "z okazji"
    | [Text "z zakresu"] -> "z zakresu"
    | [Text "w wysokości"] -> "w wysokości"
    | [Text "na gruncie"] -> "na gruncie"
    | [Text "z dziedziny"] -> "z dziedziny"
    | [Text "na tle"] -> "na tle"
    | [Text "na niwie"] -> "na niwie"
    | [Text "w ramach"] -> "w ramach"
    | [Text "na korzyść"] -> "na korzyść"
    | [Text "w intencji"] -> "w intencji"
    | [Text "na kanwie"] -> "na kanwie"
    | [Text "na niekorzyść"] -> "na niekorzyść"
    | [Text "z ramienia"] -> "z ramienia"
    | [Text "w wyniku"] -> "w wyniku"
    | [Text "za sprawą"] -> "za sprawą"
    | [Text "w imię"] -> "w imię"
    | [Text "w celu"] -> "w celu"
    | [Text "z pomocą"] -> "z pomocą"
    | [Text "per"] -> "per"
    | [Text "co"] -> "co"
    | [Text s] -> s
    | l -> failwith ("parse_prep: " ^ WalStringOf.token_list l)

let rec parse_comp = function
      [Text "int"] -> Int,[]
    | [Text "rel"] -> Rel,[]
    | [Text "int";SqBra l] -> Int,List.flatten (Xlist.map (split_symbol Semic [] l) (fun c -> snd (parse_comp c)))
    | [Text "rel";SqBra l] -> Rel,List.flatten (Xlist.map (split_symbol Semic [] l) (fun c -> snd (parse_comp c)))
    | [Text "co"] -> CompTypeUndef,[Comp "co"] (* subst qub prep comp *)
    | [Text "kto"] -> CompTypeUndef,[Comp "kto"] (* subst *)
    | [Text "ile"] -> CompTypeUndef,[Comp "ile"] (* num adv *)
    | [Text "jaki"] -> CompTypeUndef,[Comp "jaki"] (* adj *)
    | [Text "który"] -> CompTypeUndef,[Comp "który"] (* adj *)
    | [Text "czyj"] -> CompTypeUndef,[Comp "czyj"] (* adj *)
    | [Text "jak"] -> CompTypeUndef,[Comp "jak"] (* prep conj adv *)
    | [Text "kiedy"] -> CompTypeUndef,[Comp "kiedy"] (* comp adv *)
    | [Text "gdzie"] -> CompTypeUndef,[Comp "gdzie"] (* qub adv *)
    | [Text "odkąd"] -> CompTypeUndef,[Comp "odkąd"] (* adv *)
    | [Text "skąd"] -> CompTypeUndef,[Comp "skąd"] (* adv *)
    | [Text "dokąd"] -> CompTypeUndef,[Comp "dokąd"] (* adv *)
    | [Text "którędy"] -> CompTypeUndef,[Comp "którędy"] (* adv *)
    | [Text "dlaczego"] -> CompTypeUndef,[Comp "dlaczego"] (* adv *)
    | [Text "czemu"] -> CompTypeUndef,[Comp "czemu"] (* adv *)
    | [Text "czy"] -> CompTypeUndef,[Comp "czy"] (* qub conj *)
    | [Text "jakby"] -> CompTypeUndef,[Comp "jakby"] (* qub comp *)
    | [Text "jakoby"] -> CompTypeUndef,[Comp "jakoby"] (* qub comp *)
    | [Text "gdy"] -> CompTypeUndef,[Gdy] (* adv; gdyby: qub comp *)
    | [Text "dopóki"] -> CompTypeUndef,[Comp "dopóki"] (* comp *)
    | [Text "zanim"] -> CompTypeUndef,[Comp "zanim"] (* comp *)
    | [Text "jeśli"] -> CompTypeUndef,[Comp "jeśli"] (* comp *)
    | [Text "żeby2"] -> CompTypeUndef,[Zeby]
    | [Text "żeby"] -> CompTypeUndef,[Comp "żeby"] (* qub comp *)
    | [Text "że"] -> CompTypeUndef,[Comp "że"] (* qub comp *)
    | [Text "aż"] -> CompTypeUndef,[Comp "aż"] (* qub comp *)
    | [Text "bo"] -> CompTypeUndef,[Comp "bo"] (* qub comp *)
    | l -> failwith ("parse_comp: " ^ WalStringOf.token_list l)

let parse_number = function
      [Text "sg"] -> Number "sg"
    | [Text "pl"] -> Number "pl"
    | [Text "agr"] -> NumberAgr
    | [Text "_"] -> NumberUndef
    | l -> failwith ("parse_number: " ^ WalStringOf.token_list l)

let parse_gender = function
      [Text "m1"] -> Gender "m1"
    | [Text "m3"] -> Gender "m3"
    | [Text "n"] -> Genders["n1";"n2"]
    | [Text "f"] -> Gender "f"
    | [Text "m1.n"] -> Genders["m1";"n1";"n2"]
    | [Text "_"] -> GenderUndef
    | [Text "agr"] -> GenderAgr
    | l -> failwith ("parse_gender: " ^ WalStringOf.token_list l)

let parse_grad = function
      [Text "pos"] -> Grad "pos"
    | [Text "com"] -> Grad "com"
    | [Text "sup"] -> Grad "sup"
    | [Text "_"] -> GradUndef
    | l -> failwith ("parse_grad: " ^ WalStringOf.token_list l)

let parse_refl = function
      [] -> ReflEmpty
    | [Text "się"] -> ReflSie
    | [Text ""] -> ReflEmpty
    | [Text "false"] -> ReflEmpty
    | [Text "true"] -> ReflSie
    | l -> failwith ("parse_refl: " ^ WalStringOf.token_list l)

let rec parse_lex = function
      [Quot; Text "E"; Paren[Text gender]; Quot] -> Elexeme(parse_gender [Text gender])
    | [Quot; Text x; Quot] -> Lexeme x
    | [Quot; Text x; Comma; Text y; Quot] -> Lexeme (x ^ "," ^ y)
    | [Text "OR"; Paren l] ->
         (match split_symbol Semic [] l with
           [l] -> (match split_symbol Comma [] l with
               [_] -> failwith "parse_lex OR"
             | ll -> ORconcat(Xlist.map ll parse_lex))
         | ll -> ORcoord(Xlist.map ll parse_lex))
    | [Text "XOR"; Paren l] -> XOR(Xlist.map (List.flatten (Xlist.map (split_symbol Semic [] l) (split_symbol Comma []))) parse_lex)
    | l -> failwith ("parse_lex: " ^ WalStringOf.token_list l)

let get_lexeme = function
    Lexeme s -> s
  | _ -> failwith "get_lexeme"

let new_schema r cr ce morfs =
   {gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=morfs}

let rec parse_restr = function
      [Text "natr"] -> Natr,[]
    | [Text "atr"] -> Atr,[]
    | [Text "ratr"] -> Ratr,[]
    | [Text "atr1"] -> Atr1,[]
    | [Text "ratr1"] -> Ratr1,[]
    | [Text "ratr1"; Paren schema] -> Ratr1, parse_schema_simple schema
    | [Text "ratr"; Paren schema] -> Ratr, parse_schema_simple schema
    | [Text "atr1"; Paren schema] -> Atr1, parse_schema_simple schema
    | [Text "atr"; Paren schema] -> Atr, parse_schema_simple schema
    | l -> failwith ("parse_restr: " ^ WalStringOf.token_list l)

and parse_schema_simple schema =
  Xlist.map (split_symbol Plus [] schema) (function
      [Bracet b] -> let r,cr,ce = parse_roles [] in new_schema r cr ce (parse_morfs b)
    | [Text s1;Bracet b] -> let r,cr,ce = parse_roles [s1] in new_schema r cr ce (parse_morfs b)
    | _ -> failwith "parse_schema_simple")

and parse_mode = function
      [Text "abl"] -> "abl",[]
    | [Text "adl"] -> "adl",[]
    | [Text "caus"] -> "caus",[]
    | [Text "dest"] -> "dest",[]
    | [Text "dur"] -> "dur",[]
    | [Text "instr"] -> "instr",[]
    | [Text "locat"] -> "locat",[]
    | [Text "perl"] -> "perl",[]
    | [Text "temp"] -> "temp",[]
    | [Text "abl";SqBra l] -> "abl",parse_morfs l
    | [Text "adl";SqBra l] -> "adl",parse_morfs l
    | [Text "caus";SqBra l] -> "caus",parse_morfs l
    | [Text "dest";SqBra l] -> "dest",parse_morfs l
    | [Text "dur";SqBra l] -> "dur",parse_morfs l
    | [Text "instr";SqBra l] -> "instr",parse_morfs l
    | [Text "locat";SqBra l] -> "locat",parse_morfs l
    | [Text "perl";SqBra l] -> "perl",parse_morfs l
    | [Text "temp";SqBra l] -> "temp",parse_morfs l
    | [Text "mod"] -> "mod",[]
    | [Text "mod";SqBra l] -> "mod",parse_morfs l
    | [Text "pron"] -> "pron",[]
    | [Text "misc"] -> "misc",[]
    | l -> failwith ("parse_mode: " ^ WalStringOf.token_list l)

and parse_morf = function
    "np",[case] -> Phrase(NP(parse_case case))
  | "prepnp",[prep; case] -> Phrase(PrepNP(Sem,parse_prep prep,parse_case case))
  | "adjp",[case] -> Phrase(AdjP(parse_case case))
  | "prepadjp",[prep; case] -> Phrase(PrepAdjP(Sem,parse_prep prep,parse_case case))
  | "nump",[case] -> Phrase(NumP(parse_case case))
  | "prepnump",[prep; case] -> Phrase(PrepNumP(Sem,parse_prep prep,parse_case case))
  | "comprepnp",[prep] -> Phrase(ComprepNP(Sem,parse_prep prep))
  | "compar",[prep] -> PhraseAbbr(ComparP(parse_prep prep),[])
  | "cp",[comp] -> PhraseComp(Cp,parse_comp comp)
  | "ncp",[case; comp] -> PhraseComp(Ncp(parse_case case),parse_comp comp)
  | "prepncp",[prep; case; comp] -> PhraseComp(Prepncp(parse_prep prep,parse_case case),parse_comp comp)
  | "infp",[aspect] -> Phrase(InfP(parse_aspect aspect(*,ReqUndef*)))
  | "fixed",[morf;lex] -> Phrase(FixedP((*parse_morf_single morf,*) get_lexeme (parse_lex lex)))
  | "fixed",[morf;lex;lex2] -> Phrase(FixedP((*parse_morf_single morf,*) get_lexeme (parse_lex (lex @ [Comma] @ lex2))))
  | "or",[] -> Phrase Or
  | "refl",[] -> Phrase (*Refl*)(Lex "się")
  | "recip",[] -> Phrase (*Recip*)(Lex "się") (* FIXME *)
  | "E",[] -> E Null
  | "advp",[mode] -> let mode,morfs = parse_mode mode in PhraseAbbr(Advp mode,morfs)
  | "xp",[mode] -> let mode,morfs = parse_mode mode in PhraseAbbr(Xp mode,morfs)
  | "nonch",[] -> PhraseAbbr(Nonch,[])
  | "distrp",[] -> PhraseAbbr(Distrp,[])
  | "possp",[] -> PhraseAbbr(Possp,[])
  | "null",[] -> Phrase Null
  | "lex",[Text a; Paren p] :: ll -> parse_lex_morf (a, split_symbol Comma [] p, ll)
  | "lex",[Text a] :: ll -> parse_lex_morf (a, [], ll)
  | s,ll -> failwith ("parse_morf: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll WalStringOf.token_list) ^ ")")

and parse_lex_morf = function
  | "np",[case],[num;lex;restr] -> LexPhrase([SUBST(parse_number num,parse_case case),parse_lex lex], parse_restr restr)
  | "prepnp",[prep; case],[num;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);SUBST(parse_number num,parse_case case),parse_lex lex], parse_restr restr)
  | "adjp",[case],[num;gender;grad;lex;restr] -> LexPhrase([ADJ(parse_number num,parse_case case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr)
  | "prepadjp",[prep; case],[num;gender;grad;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);ADJ(parse_number num,parse_case case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr)
  | "ppasp",[case],[num;gender;negation;lex;restr] -> LexPhrase([PPAS(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation),parse_lex lex], parse_restr restr)
  | "prepppasp",[prep;case],[num;gender;negation;lex;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);PPAS(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation),parse_lex lex], parse_restr restr)
  | "pactp",[case],[num;gender;negation;lex;refl;restr] -> LexPhrase([PACT(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
  | "preppactp",[prep;case],[num;gender;negation;lex;refl;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);PACT(parse_number num,parse_case case,parse_gender gender,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
  | "gerp",[case],[num;negation;lex;refl;restr] -> LexPhrase([GER(parse_number num,parse_case case,GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
  | "prepgerp",[prep;case],[num;negation;lex;refl;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);GER(parse_number num,parse_case case,GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
  | "nump",[case],[lex1;lex2;restr] -> LexPhrase([NUM(parse_case case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
  | "prepnump",[prep;case],[lex1;lex2;restr] -> LexPhrase([PREP(parse_case case),Lexeme(parse_prep prep);NUM(parse_case case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
  | "compar",[prep],[morfs] -> LexPhrase([COMPAR,Lexeme(parse_prep prep)],(Ratrs,Xlist.map (split_symbol Plus [] morfs) (fun morf -> new_schema ARG [] [] [parse_morf_single morf])))
  | "infp",[aspect],[negation;lex;refl;restr] -> LexPhrase([INF(parse_aspect aspect,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
  | "qub",[],[lex;restr] -> LexPhrase([QUB,parse_lex lex], parse_restr restr)
  | "advp",[mode],[grad;lex;restr] ->
       (match parse_mode mode with
          mode, [] -> LexPhraseMode(mode,[ADV(parse_grad grad),parse_lex lex], parse_restr restr)
        | _ -> failwith "parse_lex_morf")
  | "xp",[[Text mode;SqBra [Text "prepgerp"; Paren [prep;Comma;case]]]],[num;negation;lex;refl;restr] ->
       LexPhraseMode(mode,[PREP(parse_case [case]),Lexeme(parse_prep [prep]);GER(parse_number num,parse_case [case],GenderUndef,AspectUndef,parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
  | "xp",[mode],ll ->
       (match parse_mode mode,ll with
          (mode,[Phrase(NP case)]),[num;lex;restr] -> LexPhraseMode(mode,[SUBST(parse_number num,case),parse_lex lex], parse_restr restr)
        | (mode,[Phrase(PrepNP(_,prep,case))]),[num;lex;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;SUBST(parse_number num,case),parse_lex lex], parse_restr restr)
        | (mode,[Phrase(PrepAdjP(_,prep,case))]),[num;gender;grad;lex;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;ADJ(parse_number num,case,parse_gender gender,parse_grad grad),parse_lex lex], parse_restr restr)
        | (mode,[Phrase(NumP case)]),[lex1;lex2;restr] -> LexPhraseMode(mode,[NUM(case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
        | (mode,[Phrase(PrepNumP(_,prep,case))]),[lex1;lex2;restr] -> LexPhraseMode(mode,[PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),parse_lex lex1;SUBST(NumberUndef,CaseUndef),parse_lex lex2], parse_restr restr)
        | (mode,[PhraseAbbr(Advp _,[])]),[grad;lex;restr] -> LexPhraseMode(mode,[ADV(parse_grad grad),parse_lex lex], parse_restr restr)
        | (mode,[PhraseAbbr(ComparP prep,[])]),[morfs] -> LexPhraseMode(mode,[COMPAR,Lexeme prep],(Ratrs,Xlist.map (split_symbol Plus [] morfs) (fun morf -> new_schema ARG [] [] [parse_morf_single morf])))
        | (mode,[PhraseComp(Cp,(ctype,[Comp comp]))]),[negation;lex;refl;restr] -> LexPhrase([COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
        | _ -> failwith ("parse_lex_morf: xp(" ^ WalStringOf.token_list mode ^ ")," ^ String.concat "," (Xlist.map ll WalStringOf.token_list) ^ ")"))
  | "cp",[comp],[negation;lex;refl;restr] ->
       (match parse_comp comp with
          ctype,[Comp comp] -> LexPhrase([COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
        | ctype,[Comp comp1;Comp comp2] -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
        | _,comp -> failwith ("parse_lex_morf comp: " (*^ WalStringOf.comp comp*)))
  | "ncp",[case;comp],[negation;lex;refl;restr] ->
       (match parse_comp comp with
          ctype,[Comp comp] -> LexPhrase([SUBST(Number "sg",parse_case case),Lexeme "to";COMP ctype,Lexeme comp;PERS(parse_negation negation,parse_refl refl),parse_lex lex], parse_restr restr)
        | _,comp -> failwith ("parse_lex_morf comp: " (*^ WalStringOf.comp comp*)))
  | s,ll,ll2 -> failwith ("parse_lex_morf: " ^ s ^ "(" ^ String.concat "," (Xlist.map ll WalStringOf.token_list) ^ ")," ^ String.concat "," (Xlist.map ll2 WalStringOf.token_list) ^ ")")

and parse_morfs l =
  let morfs = Xlist.map (split_symbol Semic [] l) (function
    [Text a; Paren p] -> parse_morf (a, split_symbol Comma [] p)
  | [Text a] -> parse_morf (a, [])
  | l -> failwith ("parse_morfs: " ^ WalStringOf.token_list l)) in
(*   expand_prep_morfs equivs (expand_comp_morfs equivs morfs) *)morfs

and parse_morf_single l = (* FIXME: problem z equivs *)
  match parse_morfs l with
    [x] -> x
  | l -> failwith ("parse_morf_single: " ^ String.concat ";" (Xlist.map l WalStringOf.morf))

let split_schema schema =
  find_brackets (Xlist.map (Str.full_split (Str.regexp "\\]\\|\\+\\|{\\|}\\|(\\|)\\|,\\|;\\|'\\|\\[") schema) (function
      Str.Text s -> Text s
    | Str.Delim "(" -> LParen
    | Str.Delim ")" -> RParen
    | Str.Delim "{" -> LBracet
    | Str.Delim "}" -> RBracet
    | Str.Delim "[" -> LSqBra
    | Str.Delim "]" -> RSqBra
    | Str.Delim ";" -> Semic
    | Str.Delim "+" -> Plus
    | Str.Delim "," -> Comma
    | Str.Delim "'" -> Quot
    | _ -> failwith "parse_schema"))

let parse_schema schema =
  if schema = "" then [] else (
  let l = split_schema schema in
(*   print_endline schema; *)
  Xlist.map (split_symbol Plus [] l) (function
      [Bracet b] -> let r,cr,ce = parse_roles [] in new_schema r cr ce (parse_morfs b)
    | [Text s1;Bracet b] -> let r,cr,ce = parse_roles [s1] in new_schema r cr ce (parse_morfs b)
    | [Text s1;Comma;Text s2;Bracet b] -> let r,cr,ce = parse_roles [s1;s2] in new_schema r cr ce (parse_morfs b)
    | [Text s1;Comma;Text s2;Comma;Text s3;Bracet b] -> let r,cr,ce = parse_roles [s1;s2;s3] in new_schema r cr ce (parse_morfs b)
    | _ -> failwith ("parse_schema: " ^ schema)))

let parse_lexeme s =
  match Str.split (Str.regexp " ") s with
    [s] -> s,""
  | [s;"się"] -> s,"się"
  | _ -> failwith ("parse_lexeme: " ^ s)

let load_frames filename =
  Xlist.fold (List.tl (Str.split (Str.regexp "\n") (File.load_file filename))) StringMap.empty (fun schemas line ->
    if String.sub line 0 1 = "%" then schemas else
    match Str.split_delim (Str.regexp ": ") line with
      [lexeme;opinion;negation;pred;aspect;schema] ->
         let lexeme,refl = parse_lexeme lexeme in
         StringMap.add_inc schemas lexeme [refl,opinion,negation,pred,aspect,schema] (fun l -> (refl,opinion,negation,pred,aspect,schema) :: l)
    | _ -> failwith ("load_frames: " ^ line))


let rec expand_schema_morf expands = function
    PhraseAbbr(Advp "misc",[]) -> PhraseAbbr(Advp "misc",[])
  | PhraseAbbr(Advp "mod",[]) -> PhraseAbbr(Advp "mod",[])
  | PhraseAbbr(ComparP s,[]) -> PhraseAbbr(ComparP s,[Phrase(ComparNP(Sem,s,Str));Phrase(ComparPP(Sem,s))])
  | PhraseAbbr(abbr,[]) -> (try PhraseAbbr(abbr,AbbrMap.find expands abbr) with Not_found -> failwith "expand_schema_morf")
  | PhraseAbbr(abbr,morfs) -> PhraseAbbr(abbr,Xlist.map morfs (expand_schema_morf expands))
  | LexPhrase(pos_lex,(restr,schema)) -> LexPhrase(pos_lex,(restr,expand_schema expands schema))
  | LexPhraseMode(mode,pos_lex,(restr,schema)) -> LexPhraseMode(mode,pos_lex,(restr,expand_schema expands schema))
  | morf -> morf

and expand_schema expands schema =
  Xlist.map schema (fun s ->
    {s with morfs=Xlist.map s.morfs (expand_schema_morf expands)})

let rec expand_subtypes_morf subtypes = function
    PhraseComp(comp_morf,(ctype,comps)) ->
      let comps = if comps = [] then (try CompMap.find subtypes ctype with Not_found -> failwith "expand_subtypes_schema") else comps in
      Xlist.map comps (fun comp -> Phrase(match comp_morf with
          Cp -> CP(ctype,comp)
        | Ncp case -> NCP(case,ctype,comp)
        | Prepncp(prep,case) -> PrepNCP(Sem,prep,case,ctype,comp)))
  | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,expand_subtypes subtypes schema))]
  | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,pos_lex,(restr,expand_subtypes subtypes schema))]
  | PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_subtypes_morf subtypes)))]
  | E Null -> [E(NP(Str));E(NCP(Str,CompTypeUndef,CompUndef));E(CP(CompTypeUndef,CompUndef)); E(Or)]
  | morf -> [morf]

and expand_subtypes subtypes schema =
  Xlist.map schema (fun s ->
    {s with morfs=List.flatten (Xlist.map s.morfs (expand_subtypes_morf subtypes))})

let expand_equivs_phrase equivs = function
  | PrepNP(sem,prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNP(sem,prep,case))
  | PrepAdjP(sem,prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepAdjP(sem,prep,case))
  | PrepNumP(sem,prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> PrepNumP(sem,prep,case))
  | ComprepNP(sem,prep)  -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComprepNP(sem,prep))
  | ComparNP(sem,prep,case) -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparNP(sem,prep,case))
  | ComparPP(sem,prep)  -> Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep -> ComparPP(sem,prep))
  | CP(ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> CP(ctype,Comp comp))
  | NCP(case,ctype,Comp comp) -> Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp -> NCP(case,ctype,Comp comp))
  | PrepNCP(sem,prep,case,ctype,Comp comp) -> List.flatten (
      Xlist.map (try StringMap.find equivs comp with Not_found -> [comp]) (fun comp ->
        Xlist.map (try StringMap.find equivs prep with Not_found -> [prep]) (fun prep ->
          PrepNCP(sem,prep,case,ctype,Comp comp))))
  | phrase -> [phrase]

let rec expand_equivs_lex equivs = function
    Lexeme s -> (try XOR(Xlist.map (StringMap.find equivs s) (fun s -> Lexeme s)) with Not_found -> Lexeme s)
  | ORconcat l -> ORconcat(Xlist.map l (expand_equivs_lex equivs))
  | ORcoord l -> ORcoord(Xlist.map l (expand_equivs_lex equivs))
  | XOR l -> XOR(Xlist.map l (expand_equivs_lex equivs))
  | Elexeme gender -> Elexeme gender

let rec expand_equivs_morf equivs = function
    Phrase phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> Phrase phrase)
  | E phrase -> Xlist.map (expand_equivs_phrase equivs phrase) (fun phrase -> E phrase)
  | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
  | LexPhraseMode(mode,pos_lex,(restr,schema)) -> [LexPhraseMode(mode,Xlist.map pos_lex (fun (pos,lex) -> pos, expand_equivs_lex equivs lex),(restr,expand_equivs_schema equivs schema))]
  | PhraseAbbr(abbr,morfs) -> [PhraseAbbr(abbr,List.flatten (Xlist.map morfs (expand_equivs_morf equivs)))]
  | morf -> failwith ("expand_equivs_morf: " ^ WalStringOf.morf morf)

and expand_equivs_schema equivs schema =
  Xlist.map schema (fun s ->
    {s with morfs=List.flatten (Xlist.map s.morfs (expand_equivs_morf equivs))})


let rec load_realizations_rec (expands,subtypes,equivs) found rev = function
    [] -> if rev <> [] || found <> [] then failwith "load_realizations_rec" else expands,subtypes,equivs
  | [Str.Text s; Str.Delim "-->"] :: l -> load_realizations_rec (expands,subtypes,equivs) ((s,rev) :: found) [] l
  | [Str.Delim "    "; Str.Text s; Str.Delim "\t"; Str.Text t] :: l ->
       load_realizations_rec (expands,subtypes,equivs) found ((s,t) :: rev) l
  | [Str.Delim "    "; Str.Text s] :: l ->
       load_realizations_rec (expands,subtypes,equivs) found ((s,"") :: rev) l
  | [Str.Delim "% "; Str.Text "Phrase types expand:"] :: l -> load_realizations_rec (found,subtypes,equivs) [] rev l
  | [Str.Delim "% "; Str.Text "Attributes subtypes:"] :: l -> load_realizations_rec (expands,found,equivs) [] rev l
  | [Str.Delim "% "; Str.Text "Attributes equivalents:"] :: l -> load_realizations_rec (expands,subtypes,found) [] rev l
(*   | [Str.Delim "% "; Str.Text s] :: l -> print_endline s; load_realizations_rec found rev l *)
  | [] :: l -> load_realizations_rec (expands,subtypes,equivs) found rev l
  | _ -> failwith "load_realizations_rec"

let rec get_lexemes = function
    Lexeme s -> [s]
  | ORconcat l -> List.flatten (Xlist.map l get_lexemes)
  | ORcoord l -> List.flatten (Xlist.map l get_lexemes)
  | XOR l -> List.flatten (Xlist.map l get_lexemes)
  | Elexeme gender -> failwith "get_lexemes"

let find_comprep_reqs compreps =
  Xlist.fold compreps StringMap.empty (fun comprep_reqs (s,l) ->
    let l = Xlist.map l (function
        LexPhrase(pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function
            _,Lexeme s -> StringSet.add set s
          | _ -> set)
      | LexPhraseMode(_,pos_lex,_) -> Xlist.fold pos_lex StringSet.empty (fun set -> function
            _,Lexeme s -> StringSet.add set s
          | _ -> set)
      | morf -> failwith ("find_compreps_reqs: " ^ WalStringOf.morf morf)) in
    if l = [] then failwith "find_compreps_reqs";
    StringMap.add comprep_reqs s (StringSet.to_list (Xlist.fold (List.tl l) (List.hd l) StringSet.union)))

let create_comprep_dict compreps =
  Xlist.fold compreps StringMap.empty (fun compreps (s,l) ->
    Xlist.fold l compreps (fun compreps -> function
        LexPhrase([PREP _,_;SUBST _,lex],_) as morf ->
          let lexemes = get_lexemes lex in
          Xlist.fold lexemes compreps (fun compreps lexeme ->
            StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l))
      | LexPhraseMode("misc",[ADV grad,lex],restr) ->
          let morf = LexPhrase([ADV grad,lex],restr) in
          let lexemes = get_lexemes lex in
          Xlist.fold lexemes compreps (fun compreps lexeme ->
            StringMap.add_inc compreps lexeme ["adv",(s,morf)] (fun l -> ("adv",(s,morf)) :: l))
      | LexPhrase([PREP _,_;NUM _,_;SUBST _,lex],_) as morf ->
          let lexemes = get_lexemes lex in
          Xlist.fold lexemes compreps (fun compreps lexeme ->
            StringMap.add_inc compreps lexeme ["subst",(s,morf)] (fun l -> ("subst",(s,morf)) :: l))
      | morf -> failwith ("create_comprep_dict: " ^ WalStringOf.morf morf)))

let load_realizations () =
  let lines = Str.split (Str.regexp "\n") (File.load_file (Paths.realizations_filename)) in
  let lines = Xlist.rev_map lines (fun line -> Str.full_split (Str.regexp "% \\|-->\\|    \\|\t") line) in
  let expands,subtypes,equivs = load_realizations_rec ([],[],[]) [] [] lines in
  let subtypes = Xlist.fold subtypes CompMap.empty (fun subtypes -> function
      "int",l -> CompMap.add subtypes Int (List.flatten (Xlist.map l (fun (v,_) -> snd(parse_comp [Text v]))))
    | "rel",l -> CompMap.add subtypes Rel (List.flatten (Xlist.map l (fun (v,_) -> snd(parse_comp [Text v]))))
    | _ -> failwith "load_realizations 1") in
  let equivs = Xlist.fold equivs StringMap.empty (fun equivs (k,l) -> StringMap.add equivs k (k :: (Xlist.map l (fun (v,_) -> v)))) in
  let expands,compreps = Xlist.fold expands (AbbrMap.empty,[]) (fun (expands, compreps) (k,l) ->
    match parse_morf_single (split_schema k) with
        PhraseAbbr(Advp m,[]) -> AbbrMap.add expands (Advp m) (Xlist.map l (fun (v,_) ->
             LexPhraseMode(m,[ADV GradUndef,Lexeme v],(Natr,[])))), compreps
      | PhraseAbbr(Nonch,[]) -> AbbrMap.add expands Nonch (Xlist.map l (fun (v,_) ->
             LexPhrase([SUBST(NumberUndef,Str),Lexeme v],(Natr,[])))), compreps
      | PhraseAbbr(Xp m,[]) -> AbbrMap.add expands (Xp m) (List.flatten (Xlist.map l (fun (v,_) ->
           match parse_morfs (split_schema v) with
             [PhraseAbbr(Advp m,[])] -> (try AbbrMap.find expands (Advp m) with Not_found -> [PhraseAbbr(Advp m,[])]) (* FIXME: zakładam, że advp się nie rozmnoży *)
           | morfs -> morfs))), compreps
      | Phrase(ComprepNP(_,s)) -> expands, (s, Xlist.map l (fun (v,_) -> parse_morf_single (split_schema v))) :: compreps
      | PhraseAbbr(Distrp,[]) -> AbbrMap.add expands Distrp (Xlist.map l (fun (v,_) -> parse_morf_single (split_schema v))), compreps
      | PhraseAbbr(Possp,[]) -> AbbrMap.add expands Possp (Xlist.map l (fun (v,_) -> parse_morf_single (split_schema v))), compreps
      | _ -> failwith "load_realizations 2") in
  let compreps = Xlist.map compreps (fun (s,morfs) ->
    s, List.flatten (List.flatten (Xlist.map morfs (fun morf -> Xlist.map (expand_subtypes_morf subtypes (expand_schema_morf expands morf)) (expand_equivs_morf equivs))))) in
  let comprep_reqs = find_comprep_reqs compreps in
  let compreps = create_comprep_dict compreps in
  expands,compreps,comprep_reqs,subtypes,equivs

let rec extract_fixed_schema fixed schema =
  Xlist.fold schema fixed (fun fixed s ->
    Xlist.fold s.morfs fixed (fun fixed -> function
            Phrase(FixedP s) -> StringSet.add fixed s
          | LexPhrase(_,(_,schema)) -> extract_fixed_schema fixed schema
          | LexPhraseMode(_,_,(_,schema)) -> extract_fixed_schema fixed schema
          | _ -> fixed))

(*let extract_fixed fixed_filename = (* FIXME: nie wykrywa fixed w argumentach leksykalizacji *)
(*   let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *)
  let fixed = Xlist.fold Paths.walenty_filenames StringSet.empty (fun fixed filename ->
(*     print_endline filename; *)
    let frames = load_frames (Paths.walenty_path ^ filename) in
    StringMap.fold frames fixed (fun fixed _ l ->
      Xlist.fold l fixed (fun fixed (refl,opinion,negation,pred,aspect,schema) ->
        extract_fixed_schema fixed (parse_schema schema)))) in
  let entries = StringSet.fold fixed StringSet.empty (fun entries s ->
    let tokens = List.rev (Xlist.fold (Str.full_split (Str.regexp " \\|,") s) [] (fun l -> function
        Str.Text t -> t :: l
      | Str.Delim " " -> l
      | Str.Delim t -> t :: l)) in
    let tokens_string = String.concat " " tokens in
    Xlist.fold tokens entries (fun entries token ->
      StringSet.add entries (Printf.sprintf "%s\t%s\t%s:fixed\tfixed" token s tokens_string))) in
  File.file_out fixed_filename (fun file ->
    StringSet.iter entries (fun entry ->
      Printf.fprintf file "%s\n" entry))

(* generowanie fixed.tab *)
(* let _ = extract_fixed "data/fixed.tab" *)

let print_subjs () =
(*   let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *)
  let subjs = Xlist.fold Paths.walenty_filenames StringQMap.empty (fun subjs filename ->
(*     print_endline filename; *)
    let frames = load_frames (Paths.walenty_path ^ filename) in
    StringMap.fold frames subjs (fun subjs _ l ->
      Xlist.fold l subjs (fun subjs (refl,opinion,negation,pred,aspect,schema) ->
        Xlist.fold (parse_schema schema) subjs (fun subjs s ->
          if s.gf = SUBJ then StringQMap.add subjs (WalStringOf.schema [s]) else subjs)))) in
  StringQMap.iter subjs (fun s v ->
      Printf.printf "%5d %s\n" v s)

(* let _ = print_subjs () *)

let print_ctrls () =
(*   let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in *)
  let ctrls = Xlist.fold Paths.walenty_filenames StringQMap.empty (fun ctrls filename ->
(*     print_endline filename; *)
    let frames = load_frames (Paths.walenty_path ^ filename) in
    StringMap.fold frames ctrls (fun ctrls _ l ->
      Xlist.fold l ctrls (fun ctrls (refl,opinion,negation,pred,aspect,schema) ->
        let schema = List.rev (Xlist.fold (parse_schema schema) [] (fun l s ->
          if s.cr = [] && s.ce = [] then l else s :: l)) in
        StringQMap.add ctrls (WalStringOf.schema schema)))) in
  StringQMap.iter ctrls (fun s v ->
      Printf.printf "%5d %s\n" v s)

(* let _ = print_ctrls () *)

(* Test parsowania *)
(*let _ =
  let expands,compreps,comprep_reqs,subtypes,equivs = load_realizations () in
  Xlist.iter Paths.walenty_filenames (fun filename ->
    print_endline filename;
    let frames = load_frames (Paths.walenty_path ^ filename) in
    StringMap.iter frames (fun _ l ->
      Xlist.iter l (fun (refl,opinion,negation,pred,aspect,schema) ->
(*       print_endline schema; *)
        ignore (parse_opinion opinion);
        ignore (parse_negation [Text negation]);
        ignore (parse_pred [Text pred]);
        ignore (parse_aspect [Text aspect]);
        ignore (expand_equivs_schema equivs (expand_subtypes subtypes (expand_schema expands (parse_schema schema)))))))*)*)