Commit c5eea7f4e53bfc8e6793dc3305b9ff764943577b

Authored by Wojciech Jaworski
1 parent dea24466

wczytywanie Walentego w TEI

subsyntax/TODO
... ... @@ -4,5 +4,3 @@
4 4  
5 5 - jak przetwarzać num:comp
6 6 - przetwarzanie liczebników złożonych np dwadzieścia jeden, jedna druga
7   -
8   -- grafy ścieżek
... ...
walenty/.gitignore 0 → 100644
  1 +loader
... ...
walenty/ENIAMwalStringOf.ml
... ... @@ -46,6 +46,8 @@ let opinion = function
46 46 | Zly -> "zły"
47 47 | Wulgarny -> "wulgarny"
48 48 | Domyslny -> "domyślny"
  49 + | Dobry -> "dobry"
  50 + | OpinionUndef -> "_"
49 51  
50 52 let negation = function
51 53 Negation -> "neg"
... ... @@ -55,7 +57,9 @@ let negation = function
55 57  
56 58 let pred = function
57 59 PredNA -> ""
58   - | Pred -> "pred"
  60 + | PredTrue -> "pred"
  61 + | PredFalse -> "nopred"
  62 + | PredUndef -> "_"
59 63  
60 64 let aspect = function
61 65 Aspect s -> s
... ... @@ -83,10 +87,10 @@ let rec comp = function
83 87 let rec comp_type = function
84 88 Int -> "int"
85 89 | Rel -> "rel"
86   - | Sub -> "sub"
87   - | Coord -> "coord"
  90 + (* | Sub -> "sub"
  91 + | Coord -> "coord" *)
88 92 | CompTypeUndef -> "_"
89   - | CompTypeAgr -> "agr"
  93 + (* | CompTypeAgr -> "agr" *)
90 94  
91 95 let number = function
92 96 Number s -> s
... ... @@ -105,7 +109,9 @@ let grad = function
105 109  
106 110 let refl = function
107 111 ReflEmpty -> ""
108   - | ReflSie -> "się"
  112 + | ReflTrue -> "się"
  113 + | ReflFalse -> "nosię"
  114 + | ReflUndef -> "_"
109 115  
110 116 let acm = function
111 117 Acm s -> s
... ... @@ -124,13 +130,13 @@ let gf = function
124 130 SUBJ -> "subj"
125 131 | OBJ -> "obj"
126 132 | ARG -> "arg"(*""*)
127   - | CORE -> "core"
  133 + (* | CORE -> "core"
128 134 | NOSEM -> "nosem"
129 135 | NOGF -> "nogf"
130 136 | ADJUNCT -> "adjunct"
131 137 | RAISED -> "raised"
132 138 | CLAUSE -> "clause"
133   - | SENTENCE -> "sentence"
  139 + | SENTENCE -> "sentence" *)
134 140  
135 141 let pos = function
136 142 SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")"
... ... @@ -149,41 +155,34 @@ let pos = function
149 155  
150 156 let phrase = function
151 157 NP c -> "np(" ^ case c ^ ")"
152   - | PrepNP(s,prep,c) -> "prepnp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
  158 + | PrepNP(prep,c) -> "prepnp(" ^ prep ^ "," ^ case c ^ ")"
153 159 | AdjP c -> "adjp(" ^ case c ^ ")"
154   - | PrepAdjP(s,prep,c) -> "prepadjp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
  160 + | PrepAdjP(prep,c) -> "prepadjp(" ^ prep ^ "," ^ case c ^ ")"
155 161 | NumP(c) -> "nump(" ^ case c ^ ")"
156   - | PrepNumP(s,prep,c) -> "prepnump(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
157   - | ComprepNP(s,prep) -> "comprepnp(" ^ sem s ^ "," ^ prep ^ ")"
158   - | ComparNP(s,prep,c) -> "comparnp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ ")"
159   - | ComparPP(s,prep) -> "comparpp(" ^ sem s ^ "," ^ prep ^ ")"
160   - | IP -> "ip"
  162 + | PrepNumP(prep,c) -> "prepnump(" ^ prep ^ "," ^ case c ^ ")"
  163 + | ComprepNP(prep) -> "comprepnp(" ^ prep ^ ")"
  164 + | ComparNP(prep,c) -> "comparnp(" ^ prep ^ "," ^ case c ^ ")"
  165 + | ComparPP(prep) -> "comparpp(" ^ prep ^ ")"
161 166 | CP(ct,co) -> "cp(" ^ comp_type ct ^ "," ^ comp co ^ ")"
162 167 | NCP(c,ct,co) -> "ncp(" ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
163   - | PrepNCP(s,prep,c,ct,co) -> "prepncp(" ^ sem s ^ "," ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
164   - | InfP(a(*,r*)) -> "infp(" ^ aspect a (*^ req r*) ^ ")"
165   - | PadvP -> "padvp"
  168 + | PrepNCP(prep,c,ct,co) -> "prepncp(" ^ prep ^ "," ^ case c ^ "," ^ comp_type ct ^ "," ^ comp co ^ ")"
  169 + | InfP(a) -> "infp(" ^ aspect a (*^ req r*) ^ ")"
166 170 | AdvP -> "advp"
167 171 | FixedP s -> "fixed(" ^ s ^ ")"
168   - | PrepP -> "prepp"
169   - | Prep(prep,c) -> "prep(" ^ prep ^ "," ^ case c ^ ")"
170 172 | Num(c,a) -> "num(" ^ case c ^ "," ^ acm a ^ ")"
171   - | Measure(c) -> "measure(" ^ case c ^ ")"
172 173 | Or -> "or"
173   -(* | Refl -> "refl" *)
174   -(* | Recip -> "recip" *)
  174 + | Refl -> "refl"
  175 + | Recip -> "recip"
175 176 | Qub -> "qub"
176   - | Inclusion -> "inclusion"
177   - | Adja -> "adja"
178   - | AuxPast -> "aux-past"
179   - | AuxFut -> "aux-fut"
180   - | AuxImp -> "aux-imp"
181   - | Aglt -> "aglt"
182 177 | Pro -> "pro"
183 178 | ProNG -> "prong"
184 179 | Null -> "null"
185   - | X -> "x"
186   - | Lex s -> "lex(" ^ s ^ ")"
  180 + | GerP c -> "gerp(" ^ case c ^ ")"
  181 + | PrepGerP(prep,c) -> "prepgerp(" ^ prep ^ "," ^ case c ^ ")"
  182 + | PpasP c -> "ppasp(" ^ case c ^ ")"
  183 + | PrepPpasP(prep,c) -> "prepppasp(" ^ prep ^ "," ^ case c ^ ")"
  184 + | PactP c -> "pactp(" ^ case c ^ ")"
  185 +
187 186  
188 187 let phrase_abbr = function
189 188 Xp(m) -> "xp(" ^ m ^ ")"
... ... @@ -268,7 +267,7 @@ let rec schema schema =
268 267 (if s.gf = ARG then [] else [gf s.gf])@
269 268 (if s.role = "" then [] else [s.role])@
270 269 (if s.role_attr = "" then [] else [s.role_attr])@
271   - s.sel_prefs@(controllers s.cr)@(controllees s.ce)) ^ direction s.dir ^ "{" ^ String.concat ";" (Xlist.map s.morfs morf) ^ "}"))
  270 + s.sel_prefs@(controllers s.cr)@(controllees s.ce)) ^ direction s.dir ^ "{" ^ String.concat ";" (Xlist.map s.morfs (fun (_,m) -> morf m)) ^ "}"))
272 271  
273 272 (*and schema_role schema =
274 273 String.concat "+" (Xlist.map schema (fun (r,role,cr,ce,morfs) ->
... ...
walenty/ENIAMwalTEI.ml
... ... @@ -17,6 +17,598 @@
17 17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 18 *)
19 19  
  20 +open ENIAMwalTypes
  21 +
  22 +let parse_id s =
  23 + if String.length s = 0 then empty_id else
  24 + if String.length s < 6 then failwith "za krótkie id" else
  25 + let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in
  26 + if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else
  27 + let s,suf = match Str.split (Str.regexp "-") s with
  28 + [s;suf] -> s,suf
  29 + | _ -> failwith "zła ilość '-'" in
  30 + let id = {hash = hash; suffix = suf; numbers = Xstring.split "\\." s} in
  31 + id
  32 +
  33 +(*let parse_id s =
  34 + if String.length s = 0 then empty_id else
  35 + if String.length s < 6 then failwith "za krótkie id" else
  36 + let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in
  37 + if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else
  38 + let s,suf = match Str.split (Str.regexp "-") s with
  39 + [s;suf] -> s,suf
  40 + | _ -> failwith "zła ilość '-'" in
  41 + let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in
  42 + {id with numbers = [last id.numbers]}*)
  43 +
  44 +type tei =
  45 + Symbol of string
  46 + | TEIstring of string
  47 + | Binary of bool
  48 + | Numeric of int
  49 + | F of string * tei
  50 + | Fset of string * tei list
  51 + | Fs of string * tei list
  52 + | Id of id
  53 + | SameAs of id * string
  54 +
  55 +let rec tei_to_string = function
  56 + Symbol s -> Printf.sprintf "Symbol %s" s
  57 + | TEIstring s -> Printf.sprintf "String %s" s
  58 + | Binary b -> Printf.sprintf "Binary %s" (string_of_bool b)
  59 + | Numeric n -> Printf.sprintf "Numeric %d" n
  60 + | F(s,t) -> Printf.sprintf "F(%s,%s)" s (tei_to_string t)
  61 + | Fset(s,l) -> Printf.sprintf "Fset(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
  62 + | Fs(s,l) -> Printf.sprintf "Fs(%s,[%s])" s (String.concat ";" (Xlist.map l tei_to_string))
  63 + | Id id -> Printf.sprintf "Id"
  64 + | SameAs(id,s) -> Printf.sprintf "F(Id,%s)" s
  65 +
  66 +let rec parse_tei = function
  67 + Xml.Element("f",["name",name],[Xml.Element("vColl",["org","set"],set)]) ->
  68 + Fset(name,List.rev (Xlist.map set parse_tei))
  69 + | Xml.Element("f", ["name",name],[]) -> Fset(name,[])
  70 + | Xml.Element("f", ["name",name],[tei]) -> F(name,parse_tei tei)
  71 + | Xml.Element("f", ["name",name],set) -> Fset(name,List.rev (Xlist.map set parse_tei))
  72 + | Xml.Element("fs", ["type",name], l) -> Fs(name,List.rev (Xlist.rev_map l parse_tei))
  73 + | Xml.Element("fs", ["xml:id",id;"type",name], l) -> Fs(name,Id(parse_id id) :: List.rev (Xlist.rev_map l parse_tei))
  74 + | Xml.Element("symbol",["value",value],[]) -> Symbol value
  75 + | Xml.Element("string",[], [Xml.PCData s]) -> TEIstring s
  76 + | Xml.Element("string",[], []) -> TEIstring ""
  77 + | Xml.Element("binary",["value",value],[]) -> Binary(try bool_of_string value with _ -> failwith "parse_tei")
  78 + | Xml.Element("numeric",["value",value],[]) -> Numeric(try int_of_string value with _ -> failwith "parse_tei")
  79 + | Xml.Element("fs", ["sameAs", same_as; "type",name], []) -> SameAs(parse_id same_as,name)
  80 + | Xml.Element("fs", ["sameAs", same_as], []) -> SameAs(parse_id same_as,"")
  81 + | xml -> failwith ("parse_tei: " ^ Xml.to_string_fmt xml)
  82 +
  83 +let parse_gf = function
  84 + "subj" -> SUBJ
  85 + | "obj" -> OBJ
  86 + | s -> failwith ("parse_gf: " ^ s)
  87 +
  88 +let parse_control arg = function
  89 + "controller" -> {arg with cr="1" :: arg.cr}
  90 + | "controllee" -> {arg with ce="1" :: arg.cr}
  91 + | "controller2" -> {arg with cr="2" :: arg.cr}
  92 + | "controllee2" -> {arg with ce="2" :: arg.cr}
  93 + | s -> failwith ("parse_control: " ^ s)
  94 +
  95 +let parse_case = function
  96 + "nom" -> Case "nom"
  97 + | "gen" -> Case "gen"
  98 + | "dat" -> Case "dat"
  99 + | "acc" -> Case "acc"
  100 + | "inst" -> Case "inst"
  101 + | "loc" -> Case "loc"
  102 + | "str" -> Str
  103 + | "pred" -> Case "pred"
  104 + | "part" -> Part
  105 + | "postp" -> Case "postp"
  106 + | "agr" -> CaseAgr
  107 + | s -> failwith ("parse_case: " ^ s)
  108 +
  109 +let parse_aspect = function
  110 + "perf" -> Aspect "perf"
  111 + | "imperf" -> Aspect "imperf"
  112 + | "_" -> AspectUndef
  113 + | "" -> AspectNA
  114 + | s -> failwith ("parse_aspect: " ^ s)
  115 +
  116 +let parse_negation = function
  117 + "_" -> NegationUndef
  118 + | "neg" -> Negation
  119 + | "aff" -> Aff
  120 + | "" -> NegationNA
  121 + | s -> failwith ("parse_negation: " ^ s)
  122 +
  123 +let parse_number = function
  124 + "sg" -> Number "sg"
  125 + | "pl" -> Number "pl"
  126 + | "agr" -> NumberAgr
  127 + | "_" -> NumberUndef
  128 + | s -> failwith ("parse_number: " ^ s)
  129 +
  130 +let parse_gender = function
  131 + "m1" -> Gender "m1"
  132 + | "m3" -> Gender "m3"
  133 + | "n" -> Genders["n1";"n2"]
  134 + | "f" -> Gender "f"
  135 + | "m1.n" -> Genders["m1";"n1";"n2"]
  136 + | "_" -> GenderUndef
  137 + | "agr" -> GenderAgr
  138 + | s -> failwith ("parse_gender: " ^ s)
  139 +
  140 +let parse_grad = function
  141 + "pos" -> Grad "pos"
  142 + | "com" -> Grad "com"
  143 + | "sup" -> Grad "sup"
  144 + | "_" -> GradUndef
  145 + | s -> failwith ("parse_grad: " ^ s)
  146 +
  147 +let rec parse_restr = function
  148 + "natr" -> Natr
  149 + | "atr" -> Atr
  150 + | "ratr" -> Ratr
  151 + | "atr1" -> Atr1
  152 + | "ratr1" -> Ratr1
  153 + | s -> failwith ("parse_restr: " ^ s)
  154 +
  155 +
  156 +let parse_comp = function
  157 + "int" -> Int,[]
  158 + | "rel" -> Rel,[]
  159 + | "co" -> CompTypeUndef,[Comp "co"] (* subst qub prep comp *)
  160 + | "kto" -> CompTypeUndef,[Comp "kto"] (* subst *)
  161 + | "ile" -> CompTypeUndef,[Comp "ile"] (* num adv *)
  162 + | "jaki" -> CompTypeUndef,[Comp "jaki"] (* adj *)
  163 + | "który" -> CompTypeUndef,[Comp "który"] (* adj *)
  164 + | "czyj" -> CompTypeUndef,[Comp "czyj"] (* adj *)
  165 + | "jak" -> CompTypeUndef,[Comp "jak"] (* prep conj adv *)
  166 + | "kiedy" -> CompTypeUndef,[Comp "kiedy"] (* comp adv *)
  167 + | "gdzie" -> CompTypeUndef,[Comp "gdzie"] (* qub adv *)
  168 + | "odkąd" -> CompTypeUndef,[Comp "odkąd"] (* adv *)
  169 + | "skąd" -> CompTypeUndef,[Comp "skąd"] (* adv *)
  170 + | "dokąd" -> CompTypeUndef,[Comp "dokąd"] (* adv *)
  171 + | "którędy" -> CompTypeUndef,[Comp "którędy"] (* adv *)
  172 + | "dlaczego" -> CompTypeUndef,[Comp "dlaczego"] (* adv *)
  173 + | "czemu" -> CompTypeUndef,[Comp "czemu"] (* adv *)
  174 + | "czy" -> CompTypeUndef,[Comp "czy"] (* qub conj *)
  175 + | "jakby" -> CompTypeUndef,[Comp "jakby"] (* qub comp *)
  176 + | "jakoby" -> CompTypeUndef,[Comp "jakoby"] (* qub comp *)
  177 + | "gdy" -> CompTypeUndef,[Gdy] (* adv; gdyby: qub comp *)
  178 + | "dopóki" -> CompTypeUndef,[Comp "dopóki"] (* comp *)
  179 + | "zanim" -> CompTypeUndef,[Comp "zanim"] (* comp *)
  180 + | "jeśli" -> CompTypeUndef,[Comp "jeśli"] (* comp *)
  181 + | "żeby2" -> CompTypeUndef,[Zeby]
  182 + | "żeby" -> CompTypeUndef,[Comp "żeby"] (* qub comp *)
  183 + | "że" -> CompTypeUndef,[Comp "że"] (* qub comp *)
  184 + | "aż" -> CompTypeUndef,[Comp "aż"] (* qub comp *)
  185 + | "bo" -> CompTypeUndef,[Comp "bo"] (* qub comp *)
  186 + | s -> failwith ("parse_comp: " ^ s)
  187 +
  188 +let load_type_constrains = function
  189 + | Symbol value ->
  190 + (match parse_comp value with
  191 + CompTypeUndef,[c] -> c
  192 + | _ -> failwith "load_type_constrains")
  193 + | xml -> failwith ("load_type_constrains:\n " ^ tei_to_string xml)
  194 +
  195 +let load_ctype = function
  196 + | F("type",Fs("type_def", x)) ->
  197 + (match x with
  198 + | [F("conjunction",Symbol value)] -> parse_comp value
  199 + | [F("conjunction",Symbol value);Fset("constraints",set)] ->
  200 + (match parse_comp value with
  201 + CompTypeUndef, _ -> failwith "load_ctype"
  202 + | ctype,[] -> ctype, List.rev (Xlist.rev_map set load_type_constrains)
  203 + | _ -> failwith "load_ctype")
  204 + | l -> failwith ("load_ctype 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))
  205 + | xml -> failwith ("load_ctype:\n " ^ tei_to_string xml)
  206 +(*Printf.printf "%s\n" (tei_to_string xml)*)
  207 +
  208 +let load_lemmas_set = function
  209 + | TEIstring mstring -> mstring
  210 + | xml -> failwith ("load_lemmas_set:\n " ^ tei_to_string xml)
  211 +
  212 +let check_lemma s =
  213 + match Str.full_split (Str.regexp "(\\|)") s with
  214 + [Str.Text s] -> Lexeme s
  215 + | [Str.Text "E"; Str.Delim "("; Str.Text g; Str.Delim ")"] -> Elexeme(parse_gender g)
  216 + | _ -> failwith "check_lemma"
  217 +
  218 +let make_lemma = function
  219 + | _,_,[lemma] -> check_lemma lemma
  220 + | "XOR","concat",lemmas -> XOR(Xlist.map lemmas check_lemma)
  221 + | "OR","coord",lemmas -> ORcoord(Xlist.map lemmas check_lemma)
  222 + | "OR","concat",lemmas -> ORconcat(Xlist.map lemmas check_lemma)
  223 + | _ -> failwith "make_lemma"
  224 +
  225 +let process_lex_phrase lemma = function
  226 + NP(case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [SUBST(number,case),lemma]
  227 + | PrepNP(prep,case),number,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;SUBST(number,case),lemma]
  228 + | AdjP(case),number,gender,grad,NegationUndef,ReflUndef -> [ADJ(number,case,gender,grad),lemma]
  229 + | PrepAdjP(prep,case),number,gender,grad,NegationUndef,ReflUndef -> [PREP case,Lexeme prep;ADJ(number,case,gender,grad),lemma]
  230 + | InfP(aspect),NumberUndef,GenderUndef,GradUndef,negation,refl -> [INF(aspect,negation,refl),lemma]
  231 + | PpasP(case),number,gender,GradUndef,negation,ReflUndef -> [PPAS(number,case,gender,AspectUndef,negation),lemma]
  232 + | PrepPpasP(prep,case),number,gender,GradUndef,negation,ReflUndef -> [PREP case,Lexeme prep;PPAS(number,case,gender,AspectUndef,negation),lemma]
  233 + | PactP(case),number,gender,GradUndef,negation,refl -> [PACT(number,case,gender,AspectUndef,negation,refl),lemma]
  234 + | PrepGerP(prep,case),number,GenderUndef,GradUndef,negation,refl -> [PREP case,Lexeme prep;GER(number,case,GenderUndef,AspectUndef,negation,refl),lemma]
  235 + | Qub,NumberUndef,GenderUndef,GradUndef,NegationUndef,ReflUndef -> [QUB,lemma]
  236 + | AdvP,NumberUndef,GenderUndef,grad,NegationUndef,ReflUndef -> [ADV grad,lemma]
  237 + | phrase,number,gender,grad,negation,reflex ->
  238 + Printf.printf "%s %s %s %s %s %s\n" (ENIAMwalStringOf.phrase phrase) (ENIAMwalStringOf.number number)
  239 + (ENIAMwalStringOf.gender gender) (ENIAMwalStringOf.grad grad) (ENIAMwalStringOf.negation negation) (ENIAMwalStringOf.refl reflex); []
  240 +
  241 +let new_schema r cr ce morfs =
  242 + {psn_id=empty_id; gf=r; role=""; role_attr="";sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=morfs}
  243 +
  244 +let rec process_lex lex = function
  245 + | PhraseAbbr(ComparP prep,[]),arguments,Lexeme "",Lexeme "" ->
  246 + LexPhrase([COMPAR,Lexeme prep],(Ratrs,Xlist.map arguments (fun morf -> new_schema ARG [] [] [empty_id,morf])))
  247 + | PhraseAbbr(Xp mode,[argument]),_,_,_ ->
  248 + let lex = {lex with lex_argument=argument} in
  249 + (match process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma) with
  250 + LexPhrase(poss,mods) -> LexPhraseMode(mode,poss,mods)
  251 + | LexPhraseMode(mode2,poss,mods) ->
  252 + if mode <> mode2 then failwith "process_lex: multiple modes" else LexPhraseMode(mode,poss,mods)
  253 + | _ -> failwith "process_lex")
  254 + | PhraseAbbr(Advp mode,[]),[],lemma,Lexeme "" ->
  255 + let poss = process_lex_phrase lemma (AdvP,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in
  256 + LexPhraseMode(mode,poss,lex.lex_modification)
  257 + | Phrase (NumP(case)),[],lemma,num_lemma -> LexPhrase([NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
  258 + | Phrase (PrepNumP(prep,case)),[],lemma,num_lemma -> LexPhrase([PREP case,Lexeme prep;NUM(case,GenderUndef,AcmUndef),num_lemma;SUBST(NumberUndef,CaseUndef),lemma],lex.lex_modification)
  259 + | PhraseComp(Cp,(ctype,[Comp comp])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,Lexeme comp;PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification)
  260 + | PhraseComp(Cp,(ctype,[Comp comp1;Comp comp2])),[],lemma,Lexeme "" -> LexPhrase([COMP ctype,XOR[Lexeme comp1;Lexeme comp2];PERS(lex.lex_negation,lex.lex_reflex),lemma],lex.lex_modification)
  261 + | Phrase phrase,[],lemma,Lexeme "" ->
  262 + let poss = process_lex_phrase lemma (phrase,lex.lex_number,lex.lex_gender,lex.lex_degree,lex.lex_negation,lex.lex_reflex) in
  263 + LexPhrase(poss,lex.lex_modification)
  264 + | (argument,arguments,lemma,numeral_lemma) ->
  265 + let s = Printf.sprintf "%s [%s] %s %s\n" (ENIAMwalStringOf.morf argument)
  266 + (String.concat ";" (Xlist.map arguments ENIAMwalStringOf.morf))
  267 + (ENIAMwalStringOf.lex lemma) (ENIAMwalStringOf.lex numeral_lemma) in
  268 + failwith ("process_lex: " ^ s)
  269 +
  270 +let rec load_category = function
  271 + | F("category",Fs("category_def",x)) ->
  272 + (match x with
  273 + | [F("name",Symbol value)] -> value, []
  274 + | [F("name",Symbol value);Fset("constraints",set)] ->
  275 + value, List.rev (Xlist.rev_map set (fun s -> snd (load_phrase s)))
  276 + | l -> failwith ("load_category 2:\n " ^ String.concat "\n" (Xlist.map l tei_to_string)))
  277 + | xml -> failwith ("load_category:\n " ^ tei_to_string xml)
  278 +
  279 +and load_modification_def = function (*pomocnicza do load_lex *)
  280 + | [F("type",Symbol value)] -> parse_restr value, []
  281 + | [F("type",Symbol value); Fset("positions",set)] ->
  282 + parse_restr value, List.rev (Xlist.rev_map set load_position)
  283 + | x -> Printf.printf "%s\n" (tei_to_string (List.hd x));
  284 + failwith "load_modification_def:\n"
  285 +
  286 +and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*)
  287 + | F("argument",set) -> {arg with lex_argument = snd (load_phrase set)}
  288 + | Fset("arguments",set) ->
  289 + {arg with lex_arguments=List.rev (Xlist.fold set [] (fun l s -> (snd (load_phrase s)) :: l))}
  290 + | F("modification",Fs("modification_def",x)) -> {arg with lex_modification = load_modification_def x}
  291 + | F("lemma",Fs("lemma_def",[F("selection_mode",Symbol value1);
  292 + F("cooccurrence",Symbol value2);
  293 + Fset("lemmas",lemmas)])) ->
  294 + {arg with lex_lemma = make_lemma (value1, value2, List.rev (Xlist.rev_map lemmas load_lemmas_set))}
  295 + | F("numeral_lemma",Fs("numeral_lemma_def",[F("selection_mode",Symbol value1);
  296 + F("cooccurrence",Symbol value2);
  297 + Fset("lemmas",lemmas)])) ->
  298 + {arg with lex_numeral_lemma = make_lemma (value1, value2, List.rev (Xlist.rev_map lemmas load_lemmas_set))}
  299 + | F("negation",Symbol value) -> {arg with lex_negation = parse_negation value}
  300 + | F("degree",Symbol value) -> {arg with lex_degree = parse_grad value}
  301 + | F("number",Symbol value) -> {arg with lex_number = parse_number value}
  302 + | F("reflex",Binary true) -> {arg with lex_reflex = ReflTrue}
  303 + | F("reflex",Binary false) -> {arg with lex_reflex = ReflFalse}
  304 + | Fset("reflex",[]) -> {arg with lex_reflex = ReflEmpty}
  305 + | Fset("gender",[Symbol value]) -> {arg with lex_gender = parse_gender value}
  306 + | xml ->
  307 + Printf.printf "%s\n" (tei_to_string xml);
  308 + failwith "load_lex:\n "
  309 +
  310 +and load_phrase xml:id * morf =
  311 + let id, idtype, x =
  312 + match xml with
  313 + | Fs(_idtype,Id _id :: _x) -> (_id, _idtype, _x)
  314 + | Fs(_idtype, _x) -> (empty_id, _idtype, _x)
  315 + | _ -> failwith "load_phrase let id,idtype...\n" in
  316 + match idtype, x with
  317 + | "np",[F("case",Symbol a)] -> id, Phrase (NP(parse_case a));
  318 + | "prepnp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepNP(a, parse_case b))
  319 + | "adjp", [F("case",Symbol a)] -> id, Phrase (AdjP(parse_case a))
  320 + | "prepadjp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepAdjP(a, parse_case b))
  321 + | "comprepnp", [e;F("complex_preposition",TEIstring a)] -> id, Phrase (ComprepNP(a))
  322 + | "comprepnp", [F("complex_preposition",TEIstring a)] -> id, Phrase (ComprepNP(a))
  323 + | "cp", [a] -> id, PhraseComp(Cp,load_ctype a)
  324 + | "ncp", [F("case",Symbol a);b] -> id, PhraseComp(Ncp(parse_case a),load_ctype b)
  325 + | "prepncp", [F("preposition",Symbol a);F("case",Symbol b);c] -> id, PhraseComp(Prepncp(a, parse_case b),load_ctype c)
  326 + | "infp", [F("aspect",Symbol a)] -> id, Phrase (InfP(parse_aspect a))
  327 + | "xp", [a] -> let x,y = load_category a in id, PhraseAbbr(Xp x,y)
  328 + | "xp", [e;a] -> let x,y = load_category a in id, PhraseAbbr(Xp x,y)
  329 + | "advp", [F("category",Symbol a)] -> id, PhraseAbbr(Advp(a),[])
  330 + | "advp", [e;F("category",Symbol a)] -> id, PhraseAbbr(Advp(a),[])
  331 + | "nonch", [] -> id, PhraseAbbr(Nonch,[])
  332 + | "or", [] -> id, Phrase Or
  333 + | "refl", [] -> id, Phrase Refl
  334 + | "E", [] -> id, E Null
  335 + | "lex", x ->
  336 + let lex = Xlist.fold x empty_lex load_lex in
  337 + id, process_lex lex (lex.lex_argument,lex.lex_arguments,lex.lex_lemma,lex.lex_numeral_lemma)
  338 + | "fixed", [F("argument",a);F("string",TEIstring b)] -> id, Phrase (FixedP((*snd (load_phrase a),*)b))
  339 + | "possp", [e] -> id, PhraseAbbr(Possp,[])
  340 + | "possp", [] -> id, PhraseAbbr(Possp,[])
  341 + | "recip", [] -> id, Phrase Recip
  342 + | "distrp", [e] -> id, PhraseAbbr(Distrp,[])
  343 + | "distrp", [] -> id, PhraseAbbr(Distrp,[])
  344 + | "compar", [F("compar_category",Symbol value)] -> id, PhraseAbbr(ComparP value,[])
  345 + | "gerp", [F("case",Symbol a)] -> id, Phrase (GerP(parse_case a))
  346 + | "prepgerp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepGerP(a, parse_case b))
  347 + | "nump", [F("case",Symbol a)] -> id, Phrase (NumP(parse_case a))
  348 + | "prepnump", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepNumP(a, parse_case b))
  349 + | "ppasp", [F("case",Symbol a)] -> id, Phrase (PpasP(parse_case a))
  350 + | "prepppasp", [F("preposition",Symbol a);F("case",Symbol b)] -> id, Phrase (PrepPpasP(a, parse_case b))
  351 + | "qub", [] -> id, Phrase Qub
  352 + | "pactp", [F("case",Symbol a)] -> id, Phrase (PactP(parse_case a))
  353 + | "adverb",[F("adverb",Symbol s)] -> id, LexPhrase([ADV (Grad "pos"),Lexeme s],(Natr,[]))
  354 + | _ -> failwith ("load_phrase match:\n " ^ tei_to_string xml)
  355 +
  356 +
  357 +and load_control arg = function
  358 + | Symbol value -> parse_control arg value
  359 + | xml -> failwith ("load_control:\n " ^ tei_to_string xml)
  360 +
  361 +and load_position_info arg = function
  362 + | F("function",Symbol value) -> {arg with gf = parse_gf value}
  363 + | Fset("phrases",phrases_set) ->
  364 + {arg with morfs = List.rev (Xlist.rev_map phrases_set load_phrase)}
  365 + | Fset("control",control_set) -> Xlist.fold control_set arg load_control
  366 + | Id id -> {arg with psn_id=id}
  367 + | xml -> failwith ("load_position_info:\n " ^ tei_to_string xml)
  368 +
  369 +and load_position = function
  370 + | Fs("position", listt) ->
  371 + Xlist.fold listt empty_position load_position_info
  372 + | xml -> failwith ("load_position:\n " ^ tei_to_string xml)
  373 +
  374 +let parse_opinion = function
  375 + "pewny" -> Pewny
  376 + | "cer" -> Pewny
  377 + | "potoczny" -> Potoczny
  378 + | "col" -> Potoczny
  379 + | "wątpliwy" -> Watpliwy
  380 + | "unc" -> Watpliwy
  381 + | "archaiczny" -> Archaiczny
  382 + | "dat" -> Archaiczny
  383 + | "zły" -> Zly
  384 + | "bad" -> Zly
  385 + | "wulgarny" -> Wulgarny
  386 + | "vul" -> Wulgarny
  387 + | "dobry" -> Dobry
  388 + | x -> failwith ("parse_opinion: " ^ x)
  389 +
  390 +let load_schema_info (arg:schema) = function
  391 + | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
  392 + | F("inherent_sie",Binary true) -> {arg with reflexiveMark = ReflTrue}
  393 + | F("inherent_sie",Binary false) -> {arg with reflexiveMark = ReflFalse}
  394 + | F("aspect",Symbol aspect_value) -> {arg with aspect = parse_aspect aspect_value}
  395 + | Fset("aspect", []) -> arg
  396 + | F("negativity",Symbol negativity_value) -> {arg with negativity = parse_negation negativity_value}
  397 + | Fset("negativity",[]) -> arg
  398 + | F("predicativity",Binary true) -> {arg with predicativity = PredTrue}
  399 + | F("predicativity",Binary false) -> {arg with predicativity = PredFalse}
  400 + | Fset("positions", positions) ->
  401 + {arg with positions = List.rev (Xlist.rev_map positions load_position)}
  402 + | F("text_rep",TEIstring text_rep) -> {arg with text_rep = text_rep}
  403 + | Id id -> {arg with sch_id = id}
  404 + | xml -> failwith ("load_schema_info\n " ^ tei_to_string xml)
  405 +
  406 +let load_schema = function
  407 + Fs("schema", schema) ->
  408 + let result = {sch_id = empty_id; opinion = OpinionUndef; reflexiveMark = ReflUndef; aspect = AspectUndef;
  409 + negativity = NegationUndef; predicativity = PredUndef; positions = []; text_rep=""} in
  410 + let result = Xlist.fold schema result load_schema_info in
  411 + result
  412 + | xml -> failwith ("load_schema:\n " ^ tei_to_string xml)
  413 +
  414 +let load_syntax = function
  415 + Fset("schemata",schemata_set) ->
  416 + List.rev (Xlist.rev_map schemata_set load_schema)
  417 + | xml -> failwith ("load_syntax:\n " ^ tei_to_string xml)
  418 +
  419 +let load_phrases_set = function
  420 + | SameAs(same_as,"phrase") -> {same_as with numbers = List.tl same_as.numbers}
  421 + | xml -> failwith ("load_phrases_set :\n " ^ tei_to_string xml)
  422 +
  423 +let load_example_info arg = function
  424 + | F("meaning",SameAs(same_as,"lexical_unit")) -> {arg with meaning = same_as}
  425 + | Fset("phrases",phrases_set) ->
  426 + {arg with phrases = List.rev (Xlist.rev_map phrases_set load_phrases_set)}
  427 + | F("sentence",TEIstring sentence_string) -> {arg with sentence = sentence_string}
  428 + | F("source",Symbol source_value) -> {arg with source = source_value}
  429 + | F("opinion",Symbol opinion_value) -> {arg with opinion = parse_opinion opinion_value}
  430 + | F("note",TEIstring note_string) -> {arg with note = note_string}
  431 + | Id id -> {arg with exm_id = id}
  432 + | xml -> failwith ("load_example_info :\n " ^ tei_to_string xml)
  433 +
  434 +let load_example = function
  435 + | Fs("example",example_elements) ->
  436 + let result = {exm_id = empty_id; meaning = empty_id; phrases = []; sentence = "";
  437 + source = ""; opinion = OpinionUndef; note = "";} in
  438 + let result = Xlist.fold example_elements result load_example_info in
  439 + result
  440 + | xml -> failwith ("load_example :\n " ^ tei_to_string xml)
  441 +
  442 +let load_examples = function
  443 + | Fset("examples",examples_set) ->
  444 + List.rev (Xlist.rev_map examples_set load_example)
  445 + | xml -> failwith ("load_examples:\n " ^ tei_to_string xml)
  446 +
  447 +let load_self_prefs_sets = function
  448 + | Numeric value -> NumericP(value)
  449 + | Symbol value -> SymbolP(value)
  450 + | Fs("relation",[F("type",Symbol value);F("to",SameAs(same_as, "argument"))]) ->
  451 + RelationP(value,same_as)
  452 + | xml -> failwith ("load_self_prefs_sets :\n " ^ tei_to_string xml)
  453 +
  454 +let load_argument_self_prefs = function
  455 + | Fset(name,self_prefs_set) ->
  456 + List.rev (Xlist.rev_map self_prefs_set load_self_prefs_sets)
  457 + | xml -> failwith ("load_argument_self_prefs :\n " ^ tei_to_string xml)
  458 +
  459 +let load_argument_info arg = function
  460 + | F("role",Symbol value) -> {arg with role = value}
  461 + | F("role_attribute",Symbol value) -> {arg with role_attribute = value}
  462 + | F("sel_prefs",Fs("sel_prefs_groups", self_prefs)) ->
  463 + {arg with sel_prefs = List.rev (Xlist.rev_map self_prefs load_argument_self_prefs)}
  464 + | Id id -> {arg with arg_id = id}
  465 + | xml -> failwith ("load_argument_info :\n " ^ tei_to_string xml)
  466 +
  467 +let load_arguments_set = function
  468 + | Fs("argument", info) ->
  469 + let result = {arg_id = empty_id; role = ""; role_attribute = ""; sel_prefs = []} in
  470 + let result = Xlist.fold info result load_argument_info in
  471 + result
  472 + | xml -> failwith ("load_arguments_set :\n " ^ tei_to_string xml)
  473 +
  474 +let load_meanings_set = function
  475 + | SameAs(same_as,"lexical_unit") -> same_as
  476 + | xml -> failwith ("load_meanings_set :\n " ^ tei_to_string xml)
  477 +
  478 +let load_frame = function
  479 + | Fs("frame",[
  480 + Id id;
  481 + F("opinion",Symbol opinion);
  482 + Fset("meanings",meanings_set);
  483 + Fset("arguments",arguments_set)]) ->
  484 + {frm_id = id;
  485 + opinion = opinion;
  486 + meanings = List.rev (Xlist.rev_map meanings_set load_meanings_set);
  487 + arguments = List.rev (Xlist.rev_map arguments_set load_arguments_set)}
  488 + | xml -> failwith ("load_frame :\n " ^ tei_to_string xml)
  489 +
  490 +let load_semantic = function
  491 + | Fset("frames",frame_set) ->
  492 + List.rev (Xlist.rev_map frame_set load_frame)
  493 + | xml -> failwith ("load_semantic:\n " ^ tei_to_string xml)
  494 +
  495 +let load_meaning_info arg = function
  496 + | F("name",TEIstring name_string) -> {arg with name = name_string}
  497 + | F("variant",TEIstring variant_string) -> {arg with variant = variant_string}
  498 + | F("plwnluid",Numeric value) -> {arg with plwnluid = value}
  499 + | F("gloss",TEIstring gloss_string) -> {arg with gloss = gloss_string}
  500 + | Id id -> {arg with mng_id = id}
  501 + | xml -> failwith ("load_meaning_info:\n " ^ tei_to_string xml)
  502 +
  503 +
  504 +let load_meaning = function
  505 + | Fs("lexical_unit", meaning_info) ->
  506 + Xlist.fold meaning_info empty_meaning load_meaning_info
  507 + | xml -> failwith ("load_meaning:\n " ^ tei_to_string xml)
  508 +
  509 +let load_phrases_connections = function
  510 + | SameAs(same_as,"phrase") -> same_as
  511 + | xml -> failwith ("load_phrases_connections: \n " ^ tei_to_string xml)
  512 +
  513 +let load_alter_connection = function
  514 + | Fs("connection", [
  515 + F("argument",SameAs(same_as,"argument"));
  516 + Fset("phrases",phrases)]) ->
  517 + {argument = same_as; phrases = List.rev (Xlist.rev_map phrases load_phrases_connections)}
  518 + | xml -> failwith ("load_alter_connections: \n " ^ tei_to_string xml)
  519 +
  520 +let load_alternations = function
  521 + | Fs("alternation",[Fset("connections",connections_set)]) ->
  522 + {connections = List.rev (Xlist.rev_map connections_set load_alter_connection)}
  523 + | xml -> failwith ("load_alternations: \n " ^ tei_to_string xml)
  524 +
  525 +let load_connections = function
  526 + | Fset("alternations",alternations) ->
  527 + List.rev (Xlist.rev_map alternations load_alternations)
  528 + | xml -> failwith ("load_connections: \n " ^ tei_to_string xml)
  529 +
  530 +let load_entry = function
  531 + | Xml.Element("entry",["xml:id",id], l) ->
  532 + let entry = {empty_entry with ent_id = parse_id id} in
  533 + Xlist.fold l entry (fun e -> function
  534 + Xml.Element("form", [], [
  535 + Xml.Element("orth",[],[Xml.PCData orth]);
  536 + Xml.Element("pos",[],[Xml.PCData pos])]) -> {e with form_orth=orth; form_pos=pos}
  537 + | xml -> (match parse_tei xml with
  538 + | Fs("syntactic_layer", [syntax]) -> {e with schemata = load_syntax syntax}
  539 + | Fs("examples_layer", [examples]) -> {e with examples = load_examples examples}
  540 + | Fs("semantic_layer", [semantic]) -> {e with frames = load_semantic semantic}
  541 + | Fs("meanings_layer", [Fset("meanings",meanings_set)]) ->
  542 + {e with meanings = List.rev (Xlist.rev_map meanings_set load_meaning)}
  543 + | Fs("connections_layer",[connections]) -> {e with alternations = load_connections connections}
  544 + | Fs("general_info",[F("status",TEIstring status)]) -> {e with status=status}
  545 + | xml -> failwith ("load_entry: \n" ^ tei_to_string xml)))
  546 + | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
  547 +
  548 +let load_walenty filename:entry list =
  549 + begin
  550 + match Xml.parse_file filename with
  551 + Xml.Element("TEI", _,
  552 + [Xml.Element("teiHeader",_,_) ;
  553 + Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
  554 + List.rev (Xlist.rev_map entries load_entry)
  555 + | _ -> failwith "load_walenty"
  556 + end
  557 +
  558 +type expansion = Phrases of morf list | Positions of position list
  559 +
  560 +let load_expansion = function
  561 + Fs("expansion",[F("opinion",Symbol opinion);Fset("phrases",set)]) -> Phrases(List.rev (Xlist.map set (fun p -> snd (load_phrase p))))
  562 + | Fs("expansion",[F("opinion",Symbol opinion);Fset("positions",set)]) -> Positions(List.rev (Xlist.map set load_position))
  563 + | tei -> failwith ("load_expansion: \n" ^ tei_to_string tei)
  564 +
  565 +let load_rentry = function
  566 + | Xml.Element("entry",["xml:id",id], [phrase;exp]) ->
  567 + let id = parse_id id in
  568 + let morf = snd (load_phrase (parse_tei phrase)) in
  569 + let expansions = match parse_tei exp with
  570 + | Fs("phrase_type_expansions", [Fset("expansions",expansions)]) -> List.rev (Xlist.map expansions load_expansion)
  571 + | Fs("phrase_type_expansions", [F("expansions",expansion)]) -> [load_expansion expansion]
  572 + | tei -> failwith ("load_entry: \n" ^ tei_to_string tei) in
  573 + id,morf,expansions
  574 + | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
  575 +
  576 +let load_expands filename =
  577 + begin
  578 + match Xml.parse_file filename with
  579 + Xml.Element("TEI", _,
  580 + [Xml.Element("teiHeader",_,_) ;
  581 + Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
  582 + List.rev (Xlist.rev_map entries load_rentry)
  583 + | _ -> failwith "load_walenty"
  584 + end
  585 +
  586 +
  587 + (* !!! Wczytywanie walentego *)
  588 + (*let walenty = load_walenty Paths.walenty_filename *)
  589 +let walenty = load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170304.xml"
  590 +
  591 +let expands = load_expands "/home/yacheu/Dokumenty/NLP resources/Walenty/phrase_types_expand_20170304.xml"
  592 +
  593 +let subtypes = ["int",[
  594 + "co"; "czemu"; "czy"; "czyj"; "dlaczego"; "dokąd"; "gdzie"; "ile"; "jak";
  595 + "jaki"; "kiedy"; "kto"; "którędy"; "który"; "odkąd"; "skąd"; "jakoby"];
  596 + "rel",[
  597 + "co"; "dokąd"; "gdzie"; "jak"; "jakby"; "jaki"; "jakoby"; "kiedy"; "kto";
  598 + "którędy"; "który"; "odkąd"; "skąd"]]
  599 +
  600 +let equivalents = ["jak","niczym"; "przeciw";"przeciwko"]
  601 +
  602 + nonch-->
  603 + co [pewna]
  604 + coś [pewna]
  605 + nic [pewna]
  606 +to [pewna]
  607 +
  608 + advp(pron)-->
  609 + tak [pewna]
  610 + jak [pewna]
  611 +
20 612 (*
21 613 Autor: Maciej Hołubowicz
22 614 *)
... ... @@ -41,10 +633,6 @@ let przejdz_zapisz funkcja zmienna poczym =
41 633 List.fold_left (fun zmienna nazwa -> funkcja zmienna nazwa) zmienna poczym
42 634  
43 635  
44   -type id = {hash: bool; suffix: string; numbers: string list}
45   -
46   -let empty_id = {hash = false; suffix = ""; numbers = []}
47   -
48 636 let rec last l =
49 637 match l with
50 638 | [a] -> a
... ... @@ -76,7 +664,7 @@ let parse_id s =
76 664  
77 665  
78 666 (* Początek kodu do wczytywania syntaticLayer *)
79   -
  667 +(*
80 668 type preposition = string
81 669 type case = string
82 670 type comp = string * (string list)(* oznaczony w xmlu jako "type" *)
... ... @@ -130,560 +718,16 @@ and lex = {
130 718 gender: string;
131 719 modification: string * position list;
132 720 }
133   -and position = {psn_id: id; gf: string; phrases: (id * phrase) list; control: string list}
134   -
135   -let empty_lex = {phrases_list=[]; lemma="","",[]; numeral_lemma="","",[]; negation="";
136   - degree=""; number=""; reflex=""; gender=""; modification = "",[]}
137   -
138   -let load_case = function
139   - | Xml.Element("f", ["name","case"], [Xml.Element("symbol", ["value",value], [])]) ->
140   - value
141   - | xml -> failwith ("load_case:\n " ^ Xml.to_string_fmt xml)
  721 +and*) (*type position = {psn_id: id; gf: string; phrases: (id * morf) list; control: string list}*)
142 722  
143   -let load_preposition = function
144   - | Xml.Element("f", ["name","preposition"], [Xml.Element("symbol", ["value",value], [])]) ->
145   - value
146   - | xml -> failwith ("load_preposition:\n " ^ Xml.to_string_fmt xml)
147 723  
148   -let load_complex_preposition = function
149   - | Xml.Element("f", ["name","complex_preposition"], [Xml.Element("symbol", ["value",value], [])]) ->
150   - value
151   - | xml -> failwith ("load_complex_preposition:\n " ^ Xml.to_string_fmt xml)
152 724  
153   -let load_aspect = function
154   - | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value",value], [])]) ->
155   - value
156   - | xml -> failwith ("load_aspect:\n " ^ Xml.to_string_fmt xml)
157   -
158   -let load_advp = function
159   - | Xml.Element("f", ["name","category"], [Xml.Element("symbol", ["value",value], [])]) ->
160   - value
161   - | xml -> failwith ("load_advp:\n " ^ Xml.to_string_fmt xml)
162   -
163   -let load_type_constrains = function
164   - | Xml.Element("symbol", ["value",value], []) ->
165   - value
166   - | xml -> failwith ("load_type_constrains:\n " ^ Xml.to_string_fmt xml)
167   -
168   -
169   - (*type może mieć dodatkowo "constraints", czego chyba nie ma w dokumentacji,
170   - jest to lista elementów w stylu: Xml.Element("symbol", ["value",value], []) *)
171   -let load_type = function
172   - | Xml.Element("f", ["name","type"],[Xml.Element("fs", ["type","type_def"], x)]) ->
173   - begin
174   - match x with
175   - | [Xml.Element("f",["name","conjunction"],
176   - [Xml.Element("symbol",["value",value],[])])] ->
177   - value, []
178   - | [Xml.Element("f",["name","conjunction"],
179   - [Xml.Element("symbol",["value",value],[])]);
180   - Xml.Element("f",["name","constraints"],
181   - [Xml.Element("vColl",["org","set"],set)])] ->
182   - value, przejdz_lista load_type_constrains set
183   - | _ -> failwith "load_type match"
184   - end
185   - | xml -> failwith ("load_type:\n " ^ Xml.to_string_fmt xml)
186   - (*Printf.printf "%s\n" (Xml.to_string_fmt xml)*)
187   -
188   - (*używam w load_lex*)
189   -let load_lemmas_set = function
190   - | Xml.Element("string",[], [Xml.PCData mstring]) ->
191   - mstring
192   - | xml -> failwith ("load_lemmas_set:\n " ^ Xml.to_string_fmt xml)
193   -
194   - (* category ma dodakowo "constrains", czego chyba nie ma w dokumentacji
195   - co więcej constrains zawiera w sobie zbiór typu phrases, więc jest rekurencyjne*)
196   -let rec load_category = function
197   - | Xml.Element("f", ["name","category"], [Xml.Element("fs", ["type","category_def"], x)]) ->
198   - begin
199   - match x with
200   - | [Xml.Element("f",["name","name"],
201   - [Xml.Element("symbol",["value",value],[])])] ->
202   - value, []
203   - | [Xml.Element("f",["name","name"],
204   - [Xml.Element("symbol",["value",value],[])]);
205   - Xml.Element("f",["name","constraints"],
206   - [Xml.Element("vColl",["org","set"],set)])] ->
207   - value, przejdz_lista_second load_phrase set
208   - | _ -> failwith "load_category match"
209   - end;
210   - | xml -> failwith ("load_category:\n " ^ Xml.to_string_fmt xml)
211   -
212   -and load_fixed = function (* również wzajemnie rekurencyjne z load_phrase*)
213   - | [Xml.Element("f", ["name","argument"],set);
214   - Xml.Element("f", ["name","string"],[Xml.Element("string",[],[Xml.PCData stringg])])] ->
215   - FixedP(przejdz_lista_second load_phrase set, stringg)
216   - | _ -> failwith "load_fixed:\n "
217   -
218   -
219   -and load_modification_def = function (*pomocnicza do load_lex *)
220   - | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])])] ->
221   - value, []
222   - | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])]);
223   - Xml.Element("f",["name","positions"],[Xml.Element("vColl",["org","set"],set)])] ->
224   - value, przejdz_lista load_position set
225   - | x -> Printf.printf "%s\n" (Xml.to_string_fmt (List.hd x));
226   - failwith "load_modification_def:\n"
227   -
228   -and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*)
229   - | Xml.Element("f", ["name","argument"],[set]) ->
230   - {arg with phrases_list = [snd (load_phrase set)]}
231   - (* to samo co wyżej, tylko lista*)
232   - | Xml.Element("f", ["name","arguments"],[Xml.Element("vColl",["org","set"],set)]) ->
233   - {arg with phrases_list = przejdz_lista_second load_phrase set}
234   - | Xml.Element("f", ["name","modification"],[Xml.Element("fs", ["type","modification_def"],x)]) ->
235   - {arg with modification = load_modification_def x}
236   -
237   - | Xml.Element("f", ["name","lemma"],[Xml.Element("fs", ["type","lemma_def"],
238   - [Xml.Element("f",["name","selection_mode"],[Xml.Element("symbol", ["value",value1],[])]);
239   - Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]);
240   - Xml.Element("f",["name","lemmas"],[Xml.Element("vColl", ["org","set"],lemmas)])])]) ->
241   - {arg with lemma = value1, value2, przejdz_lista load_lemmas_set lemmas}
242   - | Xml.Element("f", ["name","numeral_lemma"],[Xml.Element("fs", ["type","numeral_lemma_def"],
243   - [Xml.Element("f",["name","selection_mode"],[Xml.Element("symbol", ["value",value1],[])]);
244   - Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]);
245   - Xml.Element("f",["name","lemmas"],[Xml.Element("vColl", ["org","set"],lemmas)])])]) ->
246   - {arg with numeral_lemma = value1, value2, przejdz_lista load_lemmas_set lemmas}
247   -
248   - | Xml.Element("f", ["name","negation"],[Xml.Element("symbol",["value",value],[])]) ->
249   - {arg with negation = value}
250   - | Xml.Element("f", ["name","degree"],[Xml.Element("symbol",["value",value],[])]) ->
251   - {arg with degree = value}
252   - | Xml.Element("f", ["name","number"],[Xml.Element("symbol",["value",value],[])]) ->
253   - {arg with number = value}
254   - | Xml.Element("f", ["name","reflex"],[Xml.Element("symbol",["value",value],[])]) ->
255   - {arg with reflex = value}
256   - | Xml.Element("f", ["name","reflex"],[]) ->
257   - {arg with reflex = ""}
258   - (*niby set, ale zawsze jest jeden element*)
259   - | Xml.Element("f", ["name","gender"],
260   - [Xml.Element("vColl", ["org","set"],[Xml.Element("symbol",["value",value],[])])]) ->
261   - {arg with gender = value}
262   - | xml ->
263   - Printf.printf "%s\n" (Xml.to_string_fmt xml);
264   - failwith "load_lex:\n "
265   -
266   -and load_phrase xml:id * phrase =
267   - let id, idtype, x =
268   - begin
269   - match xml with
270   - | Xml.Element("fs", ["xml:id", _id; "type", _idtype], _x) -> (_id, _idtype, _x)
271   - | Xml.Element("fs", ["type", _idtype], _x) -> ("", _idtype, _x)
272   - | _ -> failwith "load_phrase let id,idtype...\n"
273   - end;
274   - in
275   - let id = parse_id id in
276   - match idtype, x with
277   - | "np", [a] ->
278   - id, NP(load_case a);
279   - | "prepnp", [a;b] ->
280   - id, PrepNP(load_preposition a, load_case b)
281   - | "adjp", [a] ->
282   - id, AdjP(load_case a)
283   - | "prepadjp", [a;b] ->
284   - id, PrepAdjP(load_preposition a, load_case b)
285   - | "comprepnp", [a] ->
286   - id, ComprepNP(load_complex_preposition a)
287   - | "cp", [a] ->
288   - id, CP(load_type a)
289   - | "ncp", [a;b] ->
290   - id, NCP(load_case a, load_type b)
291   - | "prepncp", [a;b;c] ->
292   - id, PrepNCP(load_preposition a, load_case b, load_type c)
293   - | "infp", [a] ->
294   - id, InfP(load_aspect a)
295   - | "xp", [a] ->
296   - id, XP(load_category a)
297   - | "advp", [a] ->
298   - id, AdvP(load_advp a)
299   -
300   - | "nonch", [] -> id, Nonch
301   - | "or", [] -> id, Or
302   - | "refl", [] -> id, Refl
303   - | "E", [] -> id, E
304   -
305   - | "lex", x ->
306   - id, Lex(przejdz_zapisz load_lex empty_lex x)
307   - (*
308   - Printf.printf "%d\n" (List.length x);
309   - Printf.printf "%s\n" (Xml.to_string_fmt xml);
310   - *)
311   - | "fixed", x ->
312   - id, load_fixed x
313   -
314   - (*dodatkowe, nie ma ich w dokmentacji a są na poziomie 0 load_phrase*)
315   - | "possp", [] -> id, PossP
316   - | "recip", [] -> id, Recip
317   - | "distrp", [] -> id, DistrP
318   - | "compar", [Xml.Element("f",["name","compar_category"],
319   - [Xml.Element("symbol",["value",value],[])])] -> id, ComparP(value)
320   -
321   - (* dodatkowe: (gerp i prepgerp) są w dokumentacji,
322   - i pojawiają się po rekurencyjnym wywołaniu z funkcji load_lex
323   - podobne kolejno do: np, prepnp*)
324   - | "gerp", [a] ->
325   - id, GerP(load_case a)
326   - | "prepgerp", [a;b] ->
327   - id, PrepGerP(load_preposition a, load_case b)
328   - (*inne dodatkowe które też są powywołaniu z load_lex*)
329   - | "nump", [a] ->
330   - id, NumP(load_case a)
331   - | "prepnump", [a;b] ->
332   - id, PrepNumP(load_preposition a, load_case b)
333   - | "ppasp", [a] ->
334   - id, PpasP(load_case a)
335   - | "prepppasp", [a;b] ->
336   - id, PrepPpasP(load_preposition a, load_case b)
337   - | "qub", [] ->
338   - id, Qub
339   -
340   - (*dodatkowe, po wywołaniu z load_position *)
341   - | "pactp", [a] ->
342   - id, PactP(load_case a)
343   -
344   -
345   - | _ -> failwith ("load_phrase match:\n " ^ Xml.to_string_fmt xml)
346   -
347   -
348   -and load_control = function
349   - | Xml.Element("symbol", ["value", value], []) ->
350   - value
351   - | xml -> failwith ("load_control:\n " ^ Xml.to_string_fmt xml)
352   -
353   -and load_position_info arg = function
354   - | Xml.Element("f",["name", "function"], [Xml.Element("symbol",["value", value],[])]) ->
355   - {arg with gf = value}
356   - | Xml.Element("f",["name", "phrases"], [Xml.Element("vColl",["org", "set"], phrases_set)]) ->
357   - {arg with phrases = przejdz_lista load_phrase phrases_set}
358   - | Xml.Element("f",["name", "control"], [Xml.Element("vColl",["org", "set"], control_set)]) ->
359   - {arg with control = (przejdz_lista load_control control_set)}
360   - | xml -> failwith ("load_position_info:\n " ^ Xml.to_string_fmt xml)
361   -
362   -and load_position = function
363   - | Xml.Element("fs", ["xml:id", id; "type","position"], listt) ->
364   - let id = parse_id id in
365   - let result = {psn_id = id; gf = ""; phrases = []; control = []} in
366   - let result = przejdz_zapisz load_position_info result listt in
367   - result
368   - | Xml.Element("fs", ["type","position"], listt) ->
369   - let result = {psn_id = parse_id ""; gf = ""; phrases = []; control = []} in
370   - let result = przejdz_zapisz load_position_info result listt in
371   - result
372   - | xml -> failwith ("load_position:\n " ^ Xml.to_string_fmt xml)
373   -
374   -
375   -
376   -type schema = {sch_id: id; opinion: string; reflexiveMark: string; aspect: string;
377   - negativity: string; predicativity: string; positions: position list}
378   -
379   -
380   -let load_schema_info arg = function
381   - | Xml.Element("f", ["name","opinion"], [Xml.Element("symbol", ["value",opinion_value],[])]) ->
382   - {arg with opinion = opinion_value}
383   - | Xml.Element("f", ["name","reflexive_mark"], [Xml.Element("binary", ["value",mark_value],[])]) ->
384   - {arg with reflexiveMark = mark_value}
385   - | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value", aspect_value],[])]) ->
386   - {arg with aspect = aspect_value}
387   - | Xml.Element("f", ["name","aspect"], []) ->
388   - arg
389   - | Xml.Element("f", ["name","negativity"], [Xml.Element("symbol", ["value", negativity_value],[])]) ->
390   - {arg with negativity = negativity_value}
391   - | Xml.Element("f", ["name","negativity"], []) ->
392   - arg
393   - | Xml.Element("f", ["name","negativity"], [Xml.Element("binary", ["value", binary_value], [])]) ->
394   - {arg with negativity = binary_value}
395   - | Xml.Element("f", ["name","predicativity"], [Xml.Element("binary", ["value", binary_value],[])]) ->
396   - {arg with predicativity = binary_value}
397   - | Xml.Element("f", ["name","positions"], [Xml.Element("vColl",["org","set"], positions)]) ->
398   - {arg with positions = przejdz_lista load_position positions}
399   - | xml -> failwith ("load_schema_info\n " ^ Xml.to_string_fmt xml)
400   -
401   -let load_schema = function
402   - Xml.Element("fs", ["xml:id",id; "type","schema"], schema) ->
403   - let id = parse_id id in
404   - let result = {sch_id = id; opinion = ""; reflexiveMark = ""; aspect = "";
405   - negativity = ""; predicativity = ""; positions = []} in
406   - let result = przejdz_zapisz load_schema_info result schema in
407   - result
408   - | xml -> failwith ("load_schema:\n " ^ Xml.to_string_fmt xml)
409   -
410   -let load_syntactic = function
411   - Xml.Element("f", ["name", "schemata"],
412   - [Xml.Element("vColl", ["org","set"], schemata_set)]) ->
413   - przejdz_lista load_schema schemata_set
414   - | xml -> failwith ("load_syntactic:\n " ^ Xml.to_string_fmt xml)
415   -(* Koniec kodu do wczytywania syntaticLayer *)
416   -
417   -
418   -(* Początek kodu do wczytywania examplesLayer *)
419   -
420   -type example = {exm_id: id;
421   - meaning: string; (*id*)
422   - phrases: id list; (*zbiór id!!!*)
423   - sentence: string;
424   - source: string;
425   - opinion: string;
426   - note: string}
427   -
428   -
429   -let load_phrases_set = function
430   - | Xml.Element("fs", ["sameAs", same_as; "type","phrase"], []) ->
431   - let p = parse_full_id same_as in
432   - {p with numbers = List.tl p.numbers}
433   - | xml -> failwith ("load_phrases_set :\n " ^ Xml.to_string_fmt xml)
434   -
435   -let load_example_info arg = function
436   - | Xml.Element("f", ["name", "meaning"], [Xml.Element("fs", ["sameAs",same_as; "type", "lexical_unit"],[])]) ->
437   - {arg with meaning = same_as}
438   - | Xml.Element("f", ["name", "phrases"], [Xml.Element("vColl", ["org","set"], phrases_set)]) ->
439   - {arg with phrases = przejdz_lista load_phrases_set phrases_set}
440   - | Xml.Element("f", ["name", "sentence"], [Xml.Element("string",[], [Xml.PCData sentence_string])]) ->
441   - {arg with sentence = sentence_string}
442   - | Xml.Element("f", ["name", "source"], [Xml.Element("symbol", ["value",source_value], [])]) ->
443   - {arg with source = source_value}
444   - | Xml.Element("f", ["name", "opinion"], [Xml.Element("symbol", ["value",opinion_value], [])]) ->
445   - {arg with opinion = opinion_value}
446   - | Xml.Element("f", ["name", "note"], [Xml.Element("string",[], [Xml.PCData note_string])]) ->
447   - {arg with note = note_string}
448   - | Xml.Element("f", ["name", "note"], [Xml.Element("string",[], [])]) ->
449   - arg
450   - | xml -> failwith ("load_example_info :\n " ^ Xml.to_string_fmt xml)
451   -
452   -let load_example = function
453   - | Xml.Element("fs", ["xml:id", id; "type", "example"], example_elements) ->
454   - let id = parse_id id in
455   - let result = {exm_id = id; meaning = ""; phrases = []; sentence = "";
456   - source = ""; opinion = ""; note = "";} in
457   - let result = przejdz_zapisz load_example_info result example_elements in
458   - result
459   - | xml -> failwith ("load_example :\n " ^ Xml.to_string_fmt xml)
460   -
461   -let load_examples = function
462   - | Xml.Element("f", ["name","examples"], [Xml.Element("vColl", ["org", "set"], examples_set)]) ->
463   - przejdz_lista load_example examples_set
464   - | xml -> failwith ("load_examples:\n " ^ Xml.to_string_fmt xml)
465   -
466   -(* Koniec kodu do wczytywania examplesLayer *)
467   -
468   -
469   -
470   -(* Początek kodu do wczytywania semanticLayer *)
471   -
472   -type sel_prefs =
473   - Numeric of string
474   - | Symbol of string
475   - | Relation of string * string
476   -
477   -type argument = {arg_id: id;
478   - role: string;
479   - role_attribute: string;
480   - sel_prefs: sel_prefs list list}
481   -
482   -type frame = {frm_id: id;
483   - opinion: string;
484   - meanings: string list;
485   - arguments: argument list}
486   -
487   -
488   -let load_self_prefs_sets = function
489   - | Xml.Element("numeric", ["value",value],[]) ->
490   - Numeric(value)
491   - | Xml.Element("symbol", ["value",value],[]) ->
492   - Symbol(value)
493   - | Xml.Element("fs", ["type", "relation"], [
494   - Xml.Element("f", ["name", "type"],[Xml.Element("symbol", ["value", value], [])]);
495   - Xml.Element("f", ["name", "to"],[Xml.Element("fs", ["sameAs", same_as; "type", "argument"], [])])]) ->
496   - Relation(value,same_as)
497   - | xml -> failwith ("load_self_prefs_sets :\n " ^ Xml.to_string_fmt xml)
498   -
499   -let load_argument_self_prefs = function
500   - | Xml.Element("f", ["name", name], [Xml.Element("vColl", ["org","set"], self_prefs_set)]) ->
501   - przejdz_lista load_self_prefs_sets self_prefs_set
502   - | xml -> failwith ("load_argument_self_prefs :\n " ^ Xml.to_string_fmt xml)
503   -
504   -let load_argument_info arg = function
505   - | Xml.Element("f", ["name","role"], [Xml.Element("symbol", ["value",value],[])]) ->
506   - {arg with role = value}
507   - | Xml.Element("f", ["name","role_attribute"], [Xml.Element("symbol", ["value",value],[])]) ->
508   - {arg with role_attribute = value}
509   - | Xml.Element("f", ["name","sel_prefs"], [Xml.Element("fs", ["type","sel_prefs_groups"], self_prefs)]) ->
510   - {arg with sel_prefs = przejdz_lista load_argument_self_prefs self_prefs}
511   - | xml -> failwith ("load_argument_info :\n " ^ Xml.to_string_fmt xml)
512   -
513   -let load_arguments_set = function
514   - | Xml.Element("fs", ["xml:id", id; "type", "argument"], info) ->
515   - let id = parse_id id in
516   - let result = {arg_id = id; role = ""; role_attribute = ""; sel_prefs = []} in
517   - let result = przejdz_zapisz load_argument_info result info in
518   - result
519   - | xml -> failwith ("load_arguments_set :\n " ^ Xml.to_string_fmt xml)
520   -
521   -let load_meanings_set = function
522   - | Xml.Element("fs", ["sameAs", same_As; "type", "lexical_unit"], []) ->
523   - same_As
524   - | xml -> failwith ("load_meanings_set :\n " ^ Xml.to_string_fmt xml)
525   -
526   -let load_frame = function
527   - | Xml.Element("fs", ["xml:id", id; "type", "frame"], [
528   - Xml.Element("f", ["name", "opinion"], [Xml.Element("symbol", ["value", opinion],[])]);
529   - Xml.Element("f", ["name", "meanings"], [Xml.Element("vColl", ["org", "set"], meanings_set)]);
530   - Xml.Element("f", ["name", "arguments"], [Xml.Element("vColl", ["org", "set"], arguments_set)])]) ->
531   - let id = parse_id id in
532   - {frm_id = id;
533   - opinion = opinion;
534   - meanings = przejdz_lista load_meanings_set meanings_set;
535   - arguments = przejdz_lista load_arguments_set arguments_set}
536   - | xml -> failwith ("load_frame :\n " ^ Xml.to_string_fmt xml)
537   -
538   -let load_semantic = function
539   - | Xml.Element("f", ["name","frames"], [Xml.Element("vColl", ["org", "set"], frame_set)]) ->
540   - przejdz_lista load_frame frame_set
541   - | xml -> failwith ("load_semantic:\n " ^ Xml.to_string_fmt xml)
542   -(* Koniec kodu do wczytywania semanticLayer *)
543   -
544   -
545   -(* Początek do wczytywania meaningsLayer *)
546   -
547   -type meaning = {mng_id: id;
548   - name: string;
549   - variant: string;
550   - plwnluid: string;
551   - gloss: string}
552   -
553   -let empty_meaning = {mng_id = empty_id;
554   - name = "";
555   - variant = "";
556   - plwnluid = "";
557   - gloss = ""}
558   -
559   -
560   -
561   -let load_meaning_info arg = function
562   - | Xml.Element("f", ["name", "name"], [Xml.Element("string", [], [Xml.PCData name_string])]) ->
563   - {arg with name = name_string}
564   - | Xml.Element("f", ["name", "variant"], [Xml.Element("string", [], [Xml.PCData variant_string])]) ->
565   - {arg with variant = variant_string}
566   - | Xml.Element("f", ["name", "plwnluid"], [Xml.Element("numeric", ["value",value],[])]) ->
567   - {arg with plwnluid = value}
568   - | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [Xml.PCData gloss_string])]) ->
569   - {arg with gloss = gloss_string}
570   - | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [])]) ->
571   - arg
572   - | xml -> failwith ("load_meaning_info:\n " ^ Xml.to_string_fmt xml)
573   -
574   -
575   -let load_meaning = function
576   - | Xml.Element("fs", ["xml:id", id; "type", "lexical_unit"], meaning_info) ->
577   - let id = parse_id id in
578   - let result = empty_meaning in
579   - let result = {result with mng_id = id} in
580   - let result = przejdz_zapisz load_meaning_info result meaning_info in
581   - result
582   - | xml -> failwith ("load_meaning:\n " ^ Xml.to_string_fmt xml)
583   -
584   -(* Koniec kodu do wczytywania meaningsLayer *)
585   -
586   -(* Początek kodu do wczytywania connectionsLayer *)
587   -
588   -type connection = {argument: string;
589   - phrases: string list}
590   -
591   -type alternation = {connections: connection list}
592   -
593   -
594   -let load_phrases_connections = function
595   - | Xml.Element("fs", ["sameAs",sameAs; "type", "phrase"], []) ->
596   - sameAs
597   - | xml -> failwith ("load_phrases_connections: \n " ^ Xml.to_string_fmt xml)
598   -
599   -let load_alter_connection = function
600   - | Xml.Element("fs", ["type","connection"], [
601   - Xml.Element("f", ["name", "argument"], [Xml.Element("fs", ["sameAs",sameAs; "type","argument"],[])]);
602   - Xml.Element("f", ["name", "phrases"], [Xml.Element("vColl", ["org","set";], phrases)])]) ->
603   - {argument = sameAs; phrases = (przejdz_lista load_phrases_connections phrases)}
604   - | xml -> failwith ("load_alter_connections: \n " ^ Xml.to_string_fmt xml)
605   -
606   -let load_alternations = function
607   - | Xml.Element("fs", ["type","alternation"],
608   - [Xml.Element("f", ["name", "connections"], [Xml.Element("vColl", ["org", "set"], connections_set)])]) ->
609   - {connections = przejdz_lista load_alter_connection connections_set}
610   - | xml -> failwith ("load_alternations: \n " ^ Xml.to_string_fmt xml)
611   -
612   -
613   -let load_connections = function
614   - | Xml.Element("f", ["name","alternations"], [Xml.Element("vColl", ["org", "set"], alternations)]) ->
615   - przejdz_lista load_alternations alternations
616   - | xml -> failwith ("load_connections: \n " ^ Xml.to_string_fmt xml)
617   -
618   -
619   -(* Koniec kodu do wczytywania connectionsLayer *)
620   -
621   -type entry = {ent_id: id;
622   - form_orth: string;
623   - form_pos: string;
624   - schemata: schema list;
625   - examples: example list;
626   - frames: frame list;
627   - meanings: meaning list;
628   - alternations: alternation list}
629   -
630   -let load_entry = function
631   - | Xml.Element("entry",["xml:id",id], [
632   - Xml.Element("form", [], [
633   - Xml.Element("orth",[],[Xml.PCData orth]);
634   - Xml.Element("pos",[],[Xml.PCData pos])]);
635   - Xml.Element("fs", ["type","syntactic_layer"], [syntactics]);
636   - Xml.Element("fs", ["type","examples_layer"], [examples]);
637   - Xml.Element("fs", ["type","semantic_layer"], [semantic]);
638   - Xml.Element("fs", ["type","meanings_layer"],
639   - [Xml.Element("f", ["name","meanings"],
640   - [Xml.Element("vColl", ["org", "set"], meanings_set)])]);
641   - Xml.Element("fs", ["type","connections_layer"],[connections])]) ->
642   - let id = parse_id id in
643   - {ent_id = id;
644   - form_orth = orth;
645   - form_pos = pos;
646   - schemata = load_syntactic syntactics; (*ok ok2*)
647   - examples = load_examples examples; (*ok ok2*)
648   - frames = load_semantic semantic; (*ok ok2*)
649   - meanings = przejdz_lista load_meaning meanings_set; (*ok ok2*)
650   - alternations = load_connections connections} (*ok ok2*)
651   - | Xml.Element("entry",["xml:id",id], [ (*skopiowane*)
652   - Xml.Element("form", [], [
653   - Xml.Element("orth",[],[Xml.PCData orth]);
654   - Xml.Element("pos",[],[Xml.PCData pos])]);
655   - Xml.Element("fs", ["type","syntactic_layer"], [syntactics]);
656   - Xml.Element("fs", ["type","examples_layer"], [examples])]) ->
657   - let id = parse_id id in
658   - {ent_id = id;
659   - form_orth = orth;
660   - form_pos = pos;
661   - schemata = load_syntactic syntactics;
662   - examples = load_examples examples;
663   - frames = [];
664   - meanings = [];
665   - alternations = []}
666   - | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml)
667   -
668   -
669   -let load_walenty filename:entry list =
670   - begin
671   - match Xml.parse_file filename with
672   - Xml.Element("TEI", _,
673   - [Xml.Element("teiHeader",_,_) ;
674   - Xml.Element("text",[],[Xml.Element("body",[],entries)])]) ->
675   - przejdz_lista load_entry entries
676   - | _ -> failwith "load_walenty"
677   - end
678   -
679   -(* !!! Wczytywanie walentego *)
680   -(* let walenty = load_walenty Paths.walenty_filename *)
681 725  
682 726 (* let _ = Printf.printf "loading: OK\n" *)
683 727  
684 728 (* ******************************************* *)
685 729  
686   -
  730 +(****
687 731  
688 732  
689 733 (*
... ... @@ -1218,3 +1262,4 @@ let print_full_entry filename lex =
1218 1262 let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in
1219 1263 Xlist.iter frames_sem (fun frame ->
1220 1264 print_endline (WalStringOf.frame "bębnić" frame))*)
  1265 + ****)
... ...
walenty/ENIAMwalTypes.ml
... ... @@ -27,17 +27,17 @@ type token =
27 27 | LParen | RParen | LBracet | RBracet | LSqBra | RSqBra
28 28 | Semic | Plus | Comma | Quot
29 29  
30   -type opinion = Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny
  30 +type opinion = Dobry | Pewny | Potoczny | Watpliwy | Archaiczny | Zly | Wulgarny | Domyslny | OpinionUndef
31 31 type negation = Negation | Aff | NegationUndef | NegationNA
32   -type pred = Pred | PredNA
  32 +type pred = PredTrue | PredFalse | PredUndef | PredNA
33 33 type aspect = Aspect of string | AspectUndef | AspectNA
34 34 type case = Case of string | Str | Part | CaseAgr | NomAgr | GenAgr | AllAgr | CaseUndef | AllUAgr | CaseUAgr
35 35 type comp = Comp of string | Zeby | Gdy | CompUndef
36   -type comp_type = Int | Rel | Sub | Coord | CompTypeUndef | CompTypeAgr
  36 +type comp_type = Int | Rel | CompTypeUndef (*| CompTypeAgr*)
37 37 type number = Number of string | NumberUndef | NumberAgr
38 38 type gender = Gender of string | GenderUndef | GenderAgr | Genders of string list
39 39 type grad = Grad of string | GradUndef
40   -type refl = ReflEmpty | ReflSie
  40 +type refl = ReflEmpty | ReflTrue | ReflFalse | ReflUndef
41 41 type acm = Acm of string | AcmUndef
42 42 type sem = Sem | NoSem
43 43 (* type req = Req | NReq | ReqUndef *)
... ... @@ -48,7 +48,7 @@ type aux = NoAux | PastAux | FutAux | ImpAux
48 48  
49 49 type nsem = Common of string | Time
50 50  
51   -type gf = SUBJ | OBJ | ARG | CORE | NOSEM | ADJUNCT | RAISED | NOGF | CLAUSE | SENTENCE
  51 +type gf = SUBJ | OBJ | ARG
52 52  
53 53 type pos =
54 54 SUBST of number * case
... ... @@ -67,42 +67,33 @@ type pos =
67 67  
68 68 type phrase =
69 69 NP of case
70   - | PrepNP of sem * string * case
  70 + | PrepNP of string * case
71 71 | AdjP of case
72   - | PrepAdjP of sem * string * case
  72 + | PrepAdjP of string * case
73 73 | NumP of case
74   - | PrepNumP of sem * string * case
75   - | ComprepNP of sem * string
76   - | ComparNP of sem * string * case
77   - | ComparPP of sem * string
78   - | IP
  74 + | PrepNumP of string * case
  75 + | ComprepNP of string
  76 + | ComparNP of string * case
  77 + | ComparPP of string
79 78 | CP of comp_type * comp
80 79 | NCP of case * comp_type * comp
81   - | PrepNCP of sem * string * case * comp_type * comp
82   - | InfP of aspect (** req*)
83   - | PadvP
  80 + | PrepNCP of string * case * comp_type * comp
  81 + | InfP of aspect
84 82 | AdvP
85 83 | FixedP of string
86   - | PrepP
87   - | Prep of string * case
88 84 | Num of case * acm
89   - | Measure of case
90 85 | Or
91   -(* | Refl *)
92   -(* | Recip *)
  86 + | Refl
  87 + | Recip
93 88 | Qub
94   - | Adja
95   -(* | Nie *)
96   - | AuxPast
97   - | AuxFut
98   - | AuxImp
99   - | Aglt
100   - | Inclusion
101 89 | Pro
102 90 | ProNG
103 91 | Null
104   - | X
105   - | Lex of string
  92 + | GerP of case
  93 + | PrepGerP of string * case
  94 + | PpasP of case
  95 + | PrepPpasP of string * case
  96 + | PactP of case
106 97  
107 98 type phrase_abbr =
108 99 Xp of string
... ... @@ -139,15 +130,18 @@ type direction = Forward | Backward | Both
139 130 | NumSpecs of gender
140 131 | EmptySpecs *)
141 132  
142   -(* type schema = ((string * string * string list) * string list * string list * morf list) list *)
143   -type schema_field = {gf: gf; role: string; role_attr: string; sel_prefs: string list;
144   - cr: string list; ce: string list; dir: direction; morfs: morf list}
  133 +type id = {hash: bool; suffix: string; numbers: string list}
  134 +
  135 +let empty_id = {hash = false; suffix = ""; numbers = []}
  136 +
  137 +type position = {psn_id: id; gf: gf; role: string; role_attr: string; sel_prefs: string list;
  138 + cr: string list; ce: string list; dir: direction; morfs: (id * morf) list}
145 139  
146 140 and morf =
147 141 Phrase of phrase
148 142 | E of phrase
149   - | LexPhrase of (pos * lex) list * (restr * schema_field list)
150   - | LexPhraseMode of string * (pos * lex) list * (restr * schema_field list)
  143 + | LexPhrase of (pos * lex) list * (restr * position list)
  144 + | LexPhraseMode of string * (pos * lex) list * (restr * position list)
151 145 | PhraseAbbr of phrase_abbr * morf list
152 146 | PhraseComp of phrase_comp * (comp_type * comp list)
153 147 | LexPhraseId of string * pos * lex
... ... @@ -156,6 +150,18 @@ and morf =
156 150 | Raised of string list * direction * string list
157 151 | Multi of phrase list
158 152  
  153 +and lex_record = {
  154 + lex_argument: morf;
  155 + lex_arguments: morf list;
  156 + lex_lemma: lex;
  157 + lex_numeral_lemma: lex;
  158 + lex_negation: negation;
  159 + lex_degree: grad;
  160 + lex_number: number;
  161 + lex_reflex: refl;
  162 + lex_gender: gender;
  163 + lex_modification: restr * position list;
  164 +}
159 165 (* and mode = Mode of string * morf list | Pron of morf list | Misc *)
160 166  
161 167 type frame_atrs =
... ... @@ -169,9 +175,9 @@ type frame_atrs =
169 175 | NonPersAtrs of string list * string * string * string * negation * aspect
170 176  
171 177 type frame =
172   - Frame of frame_atrs * schema_field list
173   - | LexFrame of string * pos * restr * schema_field list
174   - | ComprepFrame of string * pos * restr * schema_field list
  178 + Frame of frame_atrs * position list
  179 + | LexFrame of string * pos * restr * position list
  180 + | ComprepFrame of string * pos * restr * position list
175 181 (* | FrameR of frame_atrs * (string * string * string list * string list * morf list) list
176 182 | LexFrameR of string * pos * restr * (string * string * string list * string list * morf list) list
177 183 | ComprepFrameR of string * pos * restr * (string * string * string list * string list * morf list) list *)
... ... @@ -207,3 +213,67 @@ let subst_uncountable_lexemes_filename2 = resource_path ^ &quot;/Walenty/subst_uncoun
207 213 let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
208 214 let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
209 215 let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"
  216 +
  217 +let empty_position =
  218 + {psn_id=empty_id; gf=ARG; role=""; role_attr="";sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}
  219 +
  220 +let empty_lex = {lex_argument=Phrase Null; lex_arguments=[]; lex_lemma=Lexeme "";
  221 + lex_numeral_lemma=Lexeme ""; lex_negation=NegationUndef;
  222 + lex_degree=GradUndef; lex_number=NumberUndef; lex_reflex=ReflUndef;
  223 + lex_gender=GenderUndef; lex_modification = Natr,[]}
  224 +
  225 +type schema = {sch_id: id; opinion: opinion; reflexiveMark: refl; aspect: aspect;
  226 + negativity: negation; predicativity: pred; positions: position list; text_rep: string}
  227 +
  228 +type example = {exm_id: id;
  229 + meaning: id;
  230 + phrases: id list;
  231 + sentence: string;
  232 + source: string;
  233 + opinion: opinion;
  234 + note: string}
  235 +
  236 +type sel_prefs =
  237 + NumericP of int
  238 + | SymbolP of string
  239 + | RelationP of string * id
  240 +
  241 +type argument = {arg_id: id;
  242 + role: string;
  243 + role_attribute: string;
  244 + sel_prefs: sel_prefs list list}
  245 +
  246 +type frame2 = {frm_id: id;
  247 + opinion: string;
  248 + meanings: id list;
  249 + arguments: argument list}
  250 +
  251 +type meaning = {mng_id: id;
  252 + name: string;
  253 + variant: string;
  254 + plwnluid: int;
  255 + gloss: string}
  256 +
  257 +let empty_meaning = {mng_id = empty_id;
  258 + name = "";
  259 + variant = "";
  260 + plwnluid = (-1);
  261 + gloss = ""}
  262 +
  263 +type connection = {argument: id;
  264 + phrases: id list}
  265 +
  266 +type alternation = {connections: connection list}
  267 +
  268 +type entry = {ent_id: id;
  269 + status: string;
  270 + form_orth: string;
  271 + form_pos: string;
  272 + schemata: schema list;
  273 + examples: example list;
  274 + frames: frame2 list;
  275 + meanings: meaning list;
  276 + alternations: alternation list}
  277 +
  278 +let empty_entry = {ent_id=empty_id; status=""; form_orth=""; form_pos=""; schemata=[]; examples=[];
  279 + frames=[]; meanings=[]; alternations=[]}
... ...
walenty/makefile
... ... @@ -3,7 +3,8 @@ OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4 4 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5 5 OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa
  6 +#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa
  7 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa
7 8 INSTALLDIR=`ocamlc -where`/eniam
8 9  
9 10 SOURCES= ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalTEI.ml ENIAMwalFrames.ml ENIAMwalenty.ml
... ... @@ -27,6 +28,9 @@ eniam-walenty.cmxa: $(SOURCES)
27 28 test: test.ml
28 29 $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
29 30  
  31 +loader: ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml
  32 + $(OCAMLOPT) -o loader $(OCAMLOPTFLAGS) ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml
  33 +
30 34 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
31 35  
32 36 .mll.ml:
... ...