(* * ENIAM: Categorial Syntactic-Semantic Parser for Polish * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *) (* Autor: Maciej Hołubowicz *) let przejdz funkcja poczym = let _ = List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym) in () (*zwraca liste zwróconych wartosci przez funkcje*) let przejdz_lista funkcja poczym = List.rev (List.fold_left (fun l nazwa -> funkcja nazwa :: l) [] poczym) let przejdz_lista_second funkcja poczym = List.rev (List.fold_left (fun l nazwa -> (snd (funkcja nazwa)) :: l) [] poczym) (*łączy listy zwróconych wartości przez funkcje*) let przejdz_scal funkcja poczym = List.rev (List.fold_left (fun l nazwa -> funkcja nazwa @ l) [] poczym) (*zapisuje wynik wywołania do zmiennej i wywołuje ze zmienną*) let przejdz_zapisz funkcja zmienna poczym = List.fold_left (fun zmienna nazwa -> funkcja zmienna nazwa) zmienna poczym type id = {hash: bool; suffix: string; numbers: string list} let empty_id = {hash = false; suffix = ""; numbers = []} let rec last l = match l with | [a] -> a | a::b -> last b | _ -> failwith "pusta lista" let parse_full_id s = if String.length s = 0 then empty_id else if String.length s < 6 then failwith "za krótkie id" else let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else let s,suf = match Str.split (Str.regexp "-") s with [s;suf] -> s,suf | _ -> failwith "zła ilość '-'" in let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in id let parse_id s = if String.length s = 0 then empty_id else if String.length s < 6 then failwith "za krótkie id" else let hash,s = if (String.get s 0) = '#' then true, String.sub s 1 (String.length s - 1) else false, s in if String.sub s 0 4 <> "wal_" then failwith "id nie ma wal" else let s,suf = match Str.split (Str.regexp "-") s with [s;suf] -> s,suf | _ -> failwith "zła ilość '-'" in let id = {hash = hash; suffix = suf; numbers = (Str.split (Str.regexp "\\.") s)} in {id with numbers = [last id.numbers]} (* Początek kodu do wczytywania syntaticLayer *) type preposition = string type case = string type comp = string * (string list)(* oznaczony w xmlu jako "type" *) type aspect = string type lemmas = string list type category = string * phrase list and phrase = NP of case | PrepNP of preposition * case | AdjP of case | PrepAdjP of preposition * case | ComprepNP of preposition | CP of comp | NCP of case * comp | PrepNCP of preposition * case * comp | InfP of aspect | XP of category | AdvP of string | GerP of case | PrepGerP of preposition * case | NumP of case | PrepNumP of preposition * case | PpasP of case | PrepPpasP of preposition * case | PactP of case | PrepPactP of preposition * case | Qub | ComparP of string | Nonch | Or | Refl | Recip | E | DistrP | PossP | FixedP of phrase list * string | Lex of lex | Null and lex = { phrases_list: phrase list; lemma: string * string * lemmas; numeral_lemma: string * string * lemmas; negation: string; degree: string; number: string; reflex: string; gender: string; modification: string * position list; } and position = {psn_id: id; gf: string; phrases: (id * phrase) list; control: string list} let empty_lex = {phrases_list=[]; lemma="","",[]; numeral_lemma="","",[]; negation=""; degree=""; number=""; reflex=""; gender=""; modification = "",[]} let load_case = function | Xml.Element("f", ["name","case"], [Xml.Element("symbol", ["value",value], [])]) -> value | xml -> failwith ("load_case:\n " ^ Xml.to_string_fmt xml) let load_preposition = function | Xml.Element("f", ["name","preposition"], [Xml.Element("symbol", ["value",value], [])]) -> value | xml -> failwith ("load_preposition:\n " ^ Xml.to_string_fmt xml) let load_complex_preposition = function | Xml.Element("f", ["name","complex_preposition"], [Xml.Element("symbol", ["value",value], [])]) -> value | xml -> failwith ("load_complex_preposition:\n " ^ Xml.to_string_fmt xml) let load_aspect = function | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value",value], [])]) -> value | xml -> failwith ("load_aspect:\n " ^ Xml.to_string_fmt xml) let load_advp = function | Xml.Element("f", ["name","category"], [Xml.Element("symbol", ["value",value], [])]) -> value | xml -> failwith ("load_advp:\n " ^ Xml.to_string_fmt xml) let load_type_constrains = function | Xml.Element("symbol", ["value",value], []) -> value | xml -> failwith ("load_type_constrains:\n " ^ Xml.to_string_fmt xml) (*type może mieć dodatkowo "constraints", czego chyba nie ma w dokumentacji, jest to lista elementów w stylu: Xml.Element("symbol", ["value",value], []) *) let load_type = function | Xml.Element("f", ["name","type"],[Xml.Element("fs", ["type","type_def"], x)]) -> begin match x with | [Xml.Element("f",["name","conjunction"], [Xml.Element("symbol",["value",value],[])])] -> value, [] | [Xml.Element("f",["name","conjunction"], [Xml.Element("symbol",["value",value],[])]); Xml.Element("f",["name","constraints"], [Xml.Element("vColl",["org","set"],set)])] -> value, przejdz_lista load_type_constrains set | _ -> failwith "load_type match" end | xml -> failwith ("load_type:\n " ^ Xml.to_string_fmt xml) (*Printf.printf "%s\n" (Xml.to_string_fmt xml)*) (*używam w load_lex*) let load_lemmas_set = function | Xml.Element("string",[], [Xml.PCData mstring]) -> mstring | xml -> failwith ("load_lemmas_set:\n " ^ Xml.to_string_fmt xml) (* category ma dodakowo "constrains", czego chyba nie ma w dokumentacji co więcej constrains zawiera w sobie zbiór typu phrases, więc jest rekurencyjne*) let rec load_category = function | Xml.Element("f", ["name","category"], [Xml.Element("fs", ["type","category_def"], x)]) -> begin match x with | [Xml.Element("f",["name","name"], [Xml.Element("symbol",["value",value],[])])] -> value, [] | [Xml.Element("f",["name","name"], [Xml.Element("symbol",["value",value],[])]); Xml.Element("f",["name","constraints"], [Xml.Element("vColl",["org","set"],set)])] -> value, przejdz_lista_second load_phrase set | _ -> failwith "load_category match" end; | xml -> failwith ("load_category:\n " ^ Xml.to_string_fmt xml) and load_fixed = function (* również wzajemnie rekurencyjne z load_phrase*) | [Xml.Element("f", ["name","argument"],set); Xml.Element("f", ["name","string"],[Xml.Element("string",[],[Xml.PCData stringg])])] -> FixedP(przejdz_lista_second load_phrase set, stringg) | _ -> failwith "load_fixed:\n " and load_modification_def = function (*pomocnicza do load_lex *) | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])])] -> value, [] | [Xml.Element("f",["name","type"],[Xml.Element("symbol",["value",value],[])]); Xml.Element("f",["name","positions"],[Xml.Element("vColl",["org","set"],set)])] -> value, przejdz_lista load_position set | x -> Printf.printf "%s\n" (Xml.to_string_fmt (List.hd x)); failwith "load_modification_def:\n" and load_lex arg xml = match xml with (* wzajemnie rekurencyjne z load_phrase*) | Xml.Element("f", ["name","argument"],[set]) -> {arg with phrases_list = [snd (load_phrase set)]} (* to samo co wyżej, tylko lista*) | Xml.Element("f", ["name","arguments"],[Xml.Element("vColl",["org","set"],set)]) -> {arg with phrases_list = przejdz_lista_second load_phrase set} | Xml.Element("f", ["name","modification"],[Xml.Element("fs", ["type","modification_def"],x)]) -> {arg with modification = load_modification_def x} | Xml.Element("f", ["name","lemma"],[Xml.Element("fs", ["type","lemma_def"], [Xml.Element("f",["name","selection_mode"],[Xml.Element("symbol", ["value",value1],[])]); Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]); Xml.Element("f",["name","lemmas"],[Xml.Element("vColl", ["org","set"],lemmas)])])]) -> {arg with lemma = value1, value2, przejdz_lista load_lemmas_set lemmas} | Xml.Element("f", ["name","numeral_lemma"],[Xml.Element("fs", ["type","numeral_lemma_def"], [Xml.Element("f",["name","selection_mode"],[Xml.Element("symbol", ["value",value1],[])]); Xml.Element("f",["name","cooccurrence"],[Xml.Element("symbol", ["value",value2],[])]); Xml.Element("f",["name","lemmas"],[Xml.Element("vColl", ["org","set"],lemmas)])])]) -> {arg with numeral_lemma = value1, value2, przejdz_lista load_lemmas_set lemmas} | Xml.Element("f", ["name","negation"],[Xml.Element("symbol",["value",value],[])]) -> {arg with negation = value} | Xml.Element("f", ["name","degree"],[Xml.Element("symbol",["value",value],[])]) -> {arg with degree = value} | Xml.Element("f", ["name","number"],[Xml.Element("symbol",["value",value],[])]) -> {arg with number = value} | Xml.Element("f", ["name","reflex"],[Xml.Element("symbol",["value",value],[])]) -> {arg with reflex = value} | Xml.Element("f", ["name","reflex"],[]) -> {arg with reflex = ""} (*niby set, ale zawsze jest jeden element*) | Xml.Element("f", ["name","gender"], [Xml.Element("vColl", ["org","set"],[Xml.Element("symbol",["value",value],[])])]) -> {arg with gender = value} | xml -> Printf.printf "%s\n" (Xml.to_string_fmt xml); failwith "load_lex:\n " and load_phrase xml:id * phrase = let id, idtype, x = begin match xml with | Xml.Element("fs", ["xml:id", _id; "type", _idtype], _x) -> (_id, _idtype, _x) | Xml.Element("fs", ["type", _idtype], _x) -> ("", _idtype, _x) | _ -> failwith "load_phrase let id,idtype...\n" end; in let id = parse_id id in match idtype, x with | "np", [a] -> id, NP(load_case a); | "prepnp", [a;b] -> id, PrepNP(load_preposition a, load_case b) | "adjp", [a] -> id, AdjP(load_case a) | "prepadjp", [a;b] -> id, PrepAdjP(load_preposition a, load_case b) | "comprepnp", [a] -> id, ComprepNP(load_complex_preposition a) | "cp", [a] -> id, CP(load_type a) | "ncp", [a;b] -> id, NCP(load_case a, load_type b) | "prepncp", [a;b;c] -> id, PrepNCP(load_preposition a, load_case b, load_type c) | "infp", [a] -> id, InfP(load_aspect a) | "xp", [a] -> id, XP(load_category a) | "advp", [a] -> id, AdvP(load_advp a) | "nonch", [] -> id, Nonch | "or", [] -> id, Or | "refl", [] -> id, Refl | "E", [] -> id, E | "lex", x -> id, Lex(przejdz_zapisz load_lex empty_lex x) (* Printf.printf "%d\n" (List.length x); Printf.printf "%s\n" (Xml.to_string_fmt xml); *) | "fixed", x -> id, load_fixed x (*dodatkowe, nie ma ich w dokmentacji a są na poziomie 0 load_phrase*) | "possp", [] -> id, PossP | "recip", [] -> id, Recip | "distrp", [] -> id, DistrP | "compar", [Xml.Element("f",["name","compar_category"], [Xml.Element("symbol",["value",value],[])])] -> id, ComparP(value) (* dodatkowe: (gerp i prepgerp) są w dokumentacji, i pojawiają się po rekurencyjnym wywołaniu z funkcji load_lex podobne kolejno do: np, prepnp*) | "gerp", [a] -> id, GerP(load_case a) | "prepgerp", [a;b] -> id, PrepGerP(load_preposition a, load_case b) (*inne dodatkowe które też są powywołaniu z load_lex*) | "nump", [a] -> id, NumP(load_case a) | "prepnump", [a;b] -> id, PrepNumP(load_preposition a, load_case b) | "ppasp", [a] -> id, PpasP(load_case a) | "prepppasp", [a;b] -> id, PrepPpasP(load_preposition a, load_case b) | "qub", [] -> id, Qub (*dodatkowe, po wywołaniu z load_position *) | "pactp", [a] -> id, PactP(load_case a) | _ -> failwith ("load_phrase match:\n " ^ Xml.to_string_fmt xml) and load_control = function | Xml.Element("symbol", ["value", value], []) -> value | xml -> failwith ("load_control:\n " ^ Xml.to_string_fmt xml) and load_position_info arg = function | Xml.Element("f",["name", "function"], [Xml.Element("symbol",["value", value],[])]) -> {arg with gf = value} | Xml.Element("f",["name", "phrases"], [Xml.Element("vColl",["org", "set"], phrases_set)]) -> {arg with phrases = przejdz_lista load_phrase phrases_set} | Xml.Element("f",["name", "control"], [Xml.Element("vColl",["org", "set"], control_set)]) -> {arg with control = (przejdz_lista load_control control_set)} | xml -> failwith ("load_position_info:\n " ^ Xml.to_string_fmt xml) and load_position = function | Xml.Element("fs", ["xml:id", id; "type","position"], listt) -> let id = parse_id id in let result = {psn_id = id; gf = ""; phrases = []; control = []} in let result = przejdz_zapisz load_position_info result listt in result | Xml.Element("fs", ["type","position"], listt) -> let result = {psn_id = parse_id ""; gf = ""; phrases = []; control = []} in let result = przejdz_zapisz load_position_info result listt in result | xml -> failwith ("load_position:\n " ^ Xml.to_string_fmt xml) type schema = {sch_id: id; opinion: string; reflexiveMark: string; aspect: string; negativity: string; predicativity: string; positions: position list} let load_schema_info arg = function | Xml.Element("f", ["name","opinion"], [Xml.Element("symbol", ["value",opinion_value],[])]) -> {arg with opinion = opinion_value} | Xml.Element("f", ["name","reflexive_mark"], [Xml.Element("binary", ["value",mark_value],[])]) -> {arg with reflexiveMark = mark_value} | Xml.Element("f", ["name","aspect"], [Xml.Element("symbol", ["value", aspect_value],[])]) -> {arg with aspect = aspect_value} | Xml.Element("f", ["name","aspect"], []) -> arg | Xml.Element("f", ["name","negativity"], [Xml.Element("symbol", ["value", negativity_value],[])]) -> {arg with negativity = negativity_value} | Xml.Element("f", ["name","negativity"], []) -> arg | Xml.Element("f", ["name","negativity"], [Xml.Element("binary", ["value", binary_value], [])]) -> {arg with negativity = binary_value} | Xml.Element("f", ["name","predicativity"], [Xml.Element("binary", ["value", binary_value],[])]) -> {arg with predicativity = binary_value} | Xml.Element("f", ["name","positions"], [Xml.Element("vColl",["org","set"], positions)]) -> {arg with positions = przejdz_lista load_position positions} | xml -> failwith ("load_schema_info\n " ^ Xml.to_string_fmt xml) let load_schema = function Xml.Element("fs", ["xml:id",id; "type","schema"], schema) -> let id = parse_id id in let result = {sch_id = id; opinion = ""; reflexiveMark = ""; aspect = ""; negativity = ""; predicativity = ""; positions = []} in let result = przejdz_zapisz load_schema_info result schema in result | xml -> failwith ("load_schema:\n " ^ Xml.to_string_fmt xml) let load_syntactic = function Xml.Element("f", ["name", "schemata"], [Xml.Element("vColl", ["org","set"], schemata_set)]) -> przejdz_lista load_schema schemata_set | xml -> failwith ("load_syntactic:\n " ^ Xml.to_string_fmt xml) (* Koniec kodu do wczytywania syntaticLayer *) (* Początek kodu do wczytywania examplesLayer *) type example = {exm_id: id; meaning: string; (*id*) phrases: id list; (*zbiór id!!!*) sentence: string; source: string; opinion: string; note: string} let load_phrases_set = function | Xml.Element("fs", ["sameAs", same_as; "type","phrase"], []) -> let p = parse_full_id same_as in {p with numbers = List.tl p.numbers} | xml -> failwith ("load_phrases_set :\n " ^ Xml.to_string_fmt xml) let load_example_info arg = function | Xml.Element("f", ["name", "meaning"], [Xml.Element("fs", ["sameAs",same_as; "type", "lexical_unit"],[])]) -> {arg with meaning = same_as} | Xml.Element("f", ["name", "phrases"], [Xml.Element("vColl", ["org","set"], phrases_set)]) -> {arg with phrases = przejdz_lista load_phrases_set phrases_set} | Xml.Element("f", ["name", "sentence"], [Xml.Element("string",[], [Xml.PCData sentence_string])]) -> {arg with sentence = sentence_string} | Xml.Element("f", ["name", "source"], [Xml.Element("symbol", ["value",source_value], [])]) -> {arg with source = source_value} | Xml.Element("f", ["name", "opinion"], [Xml.Element("symbol", ["value",opinion_value], [])]) -> {arg with opinion = opinion_value} | Xml.Element("f", ["name", "note"], [Xml.Element("string",[], [Xml.PCData note_string])]) -> {arg with note = note_string} | Xml.Element("f", ["name", "note"], [Xml.Element("string",[], [])]) -> arg | xml -> failwith ("load_example_info :\n " ^ Xml.to_string_fmt xml) let load_example = function | Xml.Element("fs", ["xml:id", id; "type", "example"], example_elements) -> let id = parse_id id in let result = {exm_id = id; meaning = ""; phrases = []; sentence = ""; source = ""; opinion = ""; note = "";} in let result = przejdz_zapisz load_example_info result example_elements in result | xml -> failwith ("load_example :\n " ^ Xml.to_string_fmt xml) let load_examples = function | Xml.Element("f", ["name","examples"], [Xml.Element("vColl", ["org", "set"], examples_set)]) -> przejdz_lista load_example examples_set | xml -> failwith ("load_examples:\n " ^ Xml.to_string_fmt xml) (* Koniec kodu do wczytywania examplesLayer *) (* Początek kodu do wczytywania semanticLayer *) type sel_prefs = Numeric of string | Symbol of string | Relation of string * string type argument = {arg_id: id; role: string; role_attribute: string; sel_prefs: sel_prefs list list} type frame = {frm_id: id; opinion: string; meanings: string list; arguments: argument list} let load_self_prefs_sets = function | Xml.Element("numeric", ["value",value],[]) -> Numeric(value) | Xml.Element("symbol", ["value",value],[]) -> Symbol(value) | Xml.Element("fs", ["type", "relation"], [ Xml.Element("f", ["name", "type"],[Xml.Element("symbol", ["value", value], [])]); Xml.Element("f", ["name", "to"],[Xml.Element("fs", ["sameAs", same_as; "type", "argument"], [])])]) -> Relation(value,same_as) | xml -> failwith ("load_self_prefs_sets :\n " ^ Xml.to_string_fmt xml) let load_argument_self_prefs = function | Xml.Element("f", ["name", name], [Xml.Element("vColl", ["org","set"], self_prefs_set)]) -> przejdz_lista load_self_prefs_sets self_prefs_set | xml -> failwith ("load_argument_self_prefs :\n " ^ Xml.to_string_fmt xml) let load_argument_info arg = function | Xml.Element("f", ["name","role"], [Xml.Element("symbol", ["value",value],[])]) -> {arg with role = value} | Xml.Element("f", ["name","role_attribute"], [Xml.Element("symbol", ["value",value],[])]) -> {arg with role_attribute = value} | Xml.Element("f", ["name","sel_prefs"], [Xml.Element("fs", ["type","sel_prefs_groups"], self_prefs)]) -> {arg with sel_prefs = przejdz_lista load_argument_self_prefs self_prefs} | xml -> failwith ("load_argument_info :\n " ^ Xml.to_string_fmt xml) let load_arguments_set = function | Xml.Element("fs", ["xml:id", id; "type", "argument"], info) -> let id = parse_id id in let result = {arg_id = id; role = ""; role_attribute = ""; sel_prefs = []} in let result = przejdz_zapisz load_argument_info result info in result | xml -> failwith ("load_arguments_set :\n " ^ Xml.to_string_fmt xml) let load_meanings_set = function | Xml.Element("fs", ["sameAs", same_As; "type", "lexical_unit"], []) -> same_As | xml -> failwith ("load_meanings_set :\n " ^ Xml.to_string_fmt xml) let load_frame = function | Xml.Element("fs", ["xml:id", id; "type", "frame"], [ Xml.Element("f", ["name", "opinion"], [Xml.Element("symbol", ["value", opinion],[])]); Xml.Element("f", ["name", "meanings"], [Xml.Element("vColl", ["org", "set"], meanings_set)]); Xml.Element("f", ["name", "arguments"], [Xml.Element("vColl", ["org", "set"], arguments_set)])]) -> let id = parse_id id in {frm_id = id; opinion = opinion; meanings = przejdz_lista load_meanings_set meanings_set; arguments = przejdz_lista load_arguments_set arguments_set} | xml -> failwith ("load_frame :\n " ^ Xml.to_string_fmt xml) let load_semantic = function | Xml.Element("f", ["name","frames"], [Xml.Element("vColl", ["org", "set"], frame_set)]) -> przejdz_lista load_frame frame_set | xml -> failwith ("load_semantic:\n " ^ Xml.to_string_fmt xml) (* Koniec kodu do wczytywania semanticLayer *) (* Początek do wczytywania meaningsLayer *) type meaning = {mng_id: id; name: string; variant: string; plwnluid: string; gloss: string} let empty_meaning = {mng_id = empty_id; name = ""; variant = ""; plwnluid = ""; gloss = ""} let load_meaning_info arg = function | Xml.Element("f", ["name", "name"], [Xml.Element("string", [], [Xml.PCData name_string])]) -> {arg with name = name_string} | Xml.Element("f", ["name", "variant"], [Xml.Element("string", [], [Xml.PCData variant_string])]) -> {arg with variant = variant_string} | Xml.Element("f", ["name", "plwnluid"], [Xml.Element("numeric", ["value",value],[])]) -> {arg with plwnluid = value} | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [Xml.PCData gloss_string])]) -> {arg with gloss = gloss_string} | Xml.Element("f", ["name", "gloss"], [Xml.Element("string", [], [])]) -> arg | xml -> failwith ("load_meaning_info:\n " ^ Xml.to_string_fmt xml) let load_meaning = function | Xml.Element("fs", ["xml:id", id; "type", "lexical_unit"], meaning_info) -> let id = parse_id id in let result = empty_meaning in let result = {result with mng_id = id} in let result = przejdz_zapisz load_meaning_info result meaning_info in result | xml -> failwith ("load_meaning:\n " ^ Xml.to_string_fmt xml) (* Koniec kodu do wczytywania meaningsLayer *) (* Początek kodu do wczytywania connectionsLayer *) type connection = {argument: string; phrases: string list} type alternation = {connections: connection list} let load_phrases_connections = function | Xml.Element("fs", ["sameAs",sameAs; "type", "phrase"], []) -> sameAs | xml -> failwith ("load_phrases_connections: \n " ^ Xml.to_string_fmt xml) let load_alter_connection = function | Xml.Element("fs", ["type","connection"], [ Xml.Element("f", ["name", "argument"], [Xml.Element("fs", ["sameAs",sameAs; "type","argument"],[])]); Xml.Element("f", ["name", "phrases"], [Xml.Element("vColl", ["org","set";], phrases)])]) -> {argument = sameAs; phrases = (przejdz_lista load_phrases_connections phrases)} | xml -> failwith ("load_alter_connections: \n " ^ Xml.to_string_fmt xml) let load_alternations = function | Xml.Element("fs", ["type","alternation"], [Xml.Element("f", ["name", "connections"], [Xml.Element("vColl", ["org", "set"], connections_set)])]) -> {connections = przejdz_lista load_alter_connection connections_set} | xml -> failwith ("load_alternations: \n " ^ Xml.to_string_fmt xml) let load_connections = function | Xml.Element("f", ["name","alternations"], [Xml.Element("vColl", ["org", "set"], alternations)]) -> przejdz_lista load_alternations alternations | xml -> failwith ("load_connections: \n " ^ Xml.to_string_fmt xml) (* Koniec kodu do wczytywania connectionsLayer *) type entry = {ent_id: id; form_orth: string; form_pos: string; schemata: schema list; examples: example list; frames: frame list; meanings: meaning list; alternations: alternation list} let load_entry = function | Xml.Element("entry",["xml:id",id], [ Xml.Element("form", [], [ Xml.Element("orth",[],[Xml.PCData orth]); Xml.Element("pos",[],[Xml.PCData pos])]); Xml.Element("fs", ["type","syntactic_layer"], [syntactics]); Xml.Element("fs", ["type","examples_layer"], [examples]); Xml.Element("fs", ["type","semantic_layer"], [semantic]); Xml.Element("fs", ["type","meanings_layer"], [Xml.Element("f", ["name","meanings"], [Xml.Element("vColl", ["org", "set"], meanings_set)])]); Xml.Element("fs", ["type","connections_layer"],[connections])]) -> let id = parse_id id in {ent_id = id; form_orth = orth; form_pos = pos; schemata = load_syntactic syntactics; (*ok ok2*) examples = load_examples examples; (*ok ok2*) frames = load_semantic semantic; (*ok ok2*) meanings = przejdz_lista load_meaning meanings_set; (*ok ok2*) alternations = load_connections connections} (*ok ok2*) | Xml.Element("entry",["xml:id",id], [ (*skopiowane*) Xml.Element("form", [], [ Xml.Element("orth",[],[Xml.PCData orth]); Xml.Element("pos",[],[Xml.PCData pos])]); Xml.Element("fs", ["type","syntactic_layer"], [syntactics]); Xml.Element("fs", ["type","examples_layer"], [examples])]) -> let id = parse_id id in {ent_id = id; form_orth = orth; form_pos = pos; schemata = load_syntactic syntactics; examples = load_examples examples; frames = []; meanings = []; alternations = []} | xml -> failwith ("load_entry: \n" ^ Xml.to_string_fmt xml) let load_walenty filename:entry list = begin match Xml.parse_file filename with Xml.Element("TEI", _, [Xml.Element("teiHeader",_,_) ; Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> przejdz_lista load_entry entries | _ -> failwith "load_walenty" end (* !!! Wczytywanie walentego *) (* let walenty = load_walenty Paths.walenty_filename *) (* let _ = Printf.printf "loading: OK\n" *) (* ******************************************* *) (* (*sprawdzanie czy id jednoznacznie definiuje zawartość typu*) (*meaningsLayer*) module StringMap = Map.Make(String) let cnt = ref 0;; let add_new map meaning = let num_id = match meaning.mng_id with {hash=false; numbers=[num_id]; suffix="mng"} -> num_id | _ -> failwith "zła składnia id" in if StringMap.mem num_id map then (Printf.printf "okkk\n"; let meaning2 = StringMap.find num_id map in if meaning = meaning2 then map else failwith "różne1111") else (cnt:=!cnt+1;StringMap.add num_id meaning map) let check_entry_menaings mapa entry = przejdz_zapisz add_new mapa entry.meanings let check_meanings walenty = przejdz_zapisz check_entry_menaings StringMap.empty walenty (* let _ = check_meanings walenty let _ = Printf.printf "meaning map.size: %d\n" !cnt *) (*semanticLayer*) let cnt = ref 0;; (*arg_id*) let add_new map argument = let arg_id = match argument.arg_id with {hash=false; numbers=[num_id]; suffix="arg"} -> num_id | _ -> failwith "zła składnia id" in if StringMap.mem arg_id map then (Printf.printf "okkk\n"; let val2 = StringMap.find arg_id map in let val1 = argument in if val1 = val2 then map else failwith "różne1111") else (cnt:=!cnt+1; StringMap.add arg_id argument map) let check_frame mapa frame = przejdz_zapisz add_new mapa frame.arguments let check_entry_frames mapa entry = przejdz_zapisz check_frame mapa entry.frames let check_meanings walenty = przejdz_zapisz check_entry_frames StringMap.empty walenty (* let _ = check_meanings walenty let _ = Printf.printf "entry.frame.argument map.size: %d\n" !cnt *) (*arg_id done*) let cnt = ref 0;; (*frm_id*) let add_new map frame = let id = match frame.frm_id with {hash=false; numbers=[num_id]; suffix="frm"} -> num_id | _ -> failwith "zła składnia id" in if StringMap.mem id map then (Printf.printf "okkk\n"; let val2 = StringMap.find id map in let val1 = frame in if val1 = val2 then map else failwith "różne1111") else (cnt:=!cnt+1; StringMap.add id frame map) let check_entry_frames mapa entry = przejdz_zapisz add_new mapa entry.frames let check_meanings walenty = przejdz_zapisz check_entry_frames StringMap.empty walenty (* let _ = check_meanings walenty let _ = Printf.printf "entry.frame map.size: %d\n" !cnt *) (*frm.id done*) (*examplesLayer*) (*na razie zeruje phrases!!!!*) let print_example example = Printf.printf "meaning: %s\n phrases: " example.meaning; (* print_endline (String.concat "; " example.phrases);*) Printf.printf "sentence: %s\n" example.sentence; Printf.printf "source: %s\n" example.source; Printf.printf "opinion: %s\n" example.opinion; Printf.printf "note: %s\n\n" example.note let cnt = ref 0;; let takiesame = ref 0;; let add_new map example = let id = match example.exm_id with {hash=false; numbers=[num_id]; suffix="exm"} -> num_id | _ -> failwith "zła składnia id" in (* let example = {example with phrases = []} in (*uwaga!!!! zeruje phrases!!!*)*) let example = {example with meaning = ""} in (*uwaga!!!! zeruje meaning!!!*) if StringMap.mem id map then (takiesame:=!takiesame+1; (* Printf.printf "okkk\n";*) let val2 = StringMap.find id map in let val1 = example in if val1 = val2 then map else let _ = print_example val1 in let _ = print_example val2 in failwith "różne1111") else (cnt:=!cnt+1; StringMap.add id example map) let check_entry_example mapa entry = przejdz_zapisz add_new mapa entry.examples let check_meanings walenty = przejdz_zapisz check_entry_example StringMap.empty walenty (* let _ = check_meanings walenty let _ = Printf.printf "examples map.size: %d takich samych: %d\n" !cnt !takiesame *) (*syntatcticLayer position*) let cnt = ref 0;; let takiesame = ref 0;; let add_new map position = let id = match position.psn_id with {hash=false; numbers=[num_id]; suffix="psn"} -> num_id | _ -> failwith "zła składnia id" in (* let position = {position with phrases = przejdz_lista (fun (x,y) -> (parse_id "",y)) position.phrases} in*) (*uwaga!!!!*) if StringMap.mem id map then (takiesame:=!takiesame+1; (* Printf.printf "okkk\n";*) let val2 = StringMap.find id map in let val1 = position in if val1 = val2 then map else failwith "różne1111") else (cnt:=!cnt+1; StringMap.add id position map) let check_schema mapa schema = przejdz_zapisz add_new mapa schema.positions let check_entry mapa entry = przejdz_zapisz check_schema mapa entry.schemata let check walenty = przejdz_zapisz check_entry StringMap.empty walenty (* let _ = check walenty let _ = Printf.printf "syntactic...position map.size: %d takich samych: %d\n" !cnt !takiesame *) (* schema *) let cnt = ref 0;; let takiesame = ref 0;; (*let clear_id (position:position) = let position = {position with phrases = [](*przejdz_lista (fun (x,y) -> (empty_id,y)) position.phrases*)} in (*uwaga!!!!*) let position = {position with psn_id = empty_id} in position*) let print_schema (schema:schema) = Printf.printf "schema.opinion= %s\n" schema.opinion; Printf.printf "schema.reflexiveMark= %s\n" schema.reflexiveMark; Printf.printf "schema.aspect= %s\n" schema.aspect; Printf.printf "schema.negativity= %s\n" schema.negativity; Printf.printf "schema.predicativity= %s\n___________________\n" schema.predicativity let add_new map schema = let id = match schema.sch_id with {hash=false; numbers=[num_id]; suffix="sch"} -> num_id | _ -> failwith "zła składnia id" in let schema = {schema with opinion = ""} in (*uwaga, zeruje opinie!!!*) if StringMap.mem id map then (takiesame:=!takiesame+1; (* Printf.printf "okkk\n";*) let val2 = StringMap.find id map in let val1 = schema in if val1 = val2 then map else let _ = print_schema val1 in let _ = print_schema val2 in failwith "różne1111") else (cnt:=!cnt+1; StringMap.add id schema map) let check_schema mapa schema = add_new mapa schema let check_entry mapa entry = przejdz_zapisz check_schema mapa entry.schemata let check walenty = przejdz_zapisz check_entry StringMap.empty walenty (* let _ = check walenty let _ = Printf.printf "syntactic...schema map.size: %d takich samych: %d\n" !cnt !takiesame *) (*phrases*) let cnt = ref 0;; let takiesame = ref 0;; let add_new map (id, phrase) = let id = match id with {hash=false; numbers=[num_id]; suffix="phr"} -> num_id | _ -> failwith "zła składnia id" in if StringMap.mem id map then (takiesame:=!takiesame+1; (* Printf.printf "okkk\n";*) let val2 = StringMap.find id map in let val1 = phrase in if val1 = val2 then map else failwith "różne1111") else (cnt:=!cnt+1; StringMap.add id phrase map) let check_pos mapa (position:position) = przejdz_zapisz add_new mapa position.phrases let check_schema mapa schema = przejdz_zapisz check_pos mapa schema.positions let check_entry mapa entry = przejdz_zapisz check_schema mapa entry.schemata let check walenty = przejdz_zapisz check_entry StringMap.empty walenty *) (* let _ = check walenty let _ = Printf.printf "syntactic...phrases map.size: %d takich samych: %d\n" !cnt !takiesame *) (* loading: OK meaning map.size: 32962 entry.frame.argument map.size: 10475 entry.frame map.size: 3463 examples map.size: 146536 takich samych: 64 syntactic...position map.size: 7021 takich samych: 195288 syntactic...schema map.size: 21247 takich samych: 51241 *) (* author: Wojciech Jaworski *) open Xstd let rec parse_comp = function "int",[] -> WalTypes.Int,[] | "rel",[] -> WalTypes.Rel,[] | "int",l -> WalTypes.Int, Xlist.map l (fun s -> WalTypes.Comp s) | "rel",l -> WalTypes.Rel, Xlist.map l (fun s -> WalTypes.Comp s) | s,[] -> WalTypes.CompTypeUndef,[WalTypes.Comp s] | _ -> failwith "parse_comp" let rec morf_of_phrase = function NP c -> WalTypes.Phrase (WalTypes.NP(WalParser.parse_case [WalTypes.Text c])) | PrepNP(prep,c) -> WalTypes.Phrase (WalTypes.PrepNP(WalTypes.Sem,WalParser.parse_prep [WalTypes.Text prep],WalParser.parse_case [WalTypes.Text c])) | AdjP c -> WalTypes.Phrase (WalTypes.AdjP(WalParser.parse_case [WalTypes.Text c])) | PrepAdjP(prep,c) -> WalTypes.Phrase (WalTypes.PrepAdjP(WalTypes.Sem,WalParser.parse_prep [WalTypes.Text prep],WalParser.parse_case [WalTypes.Text c])) | ComprepNP prep -> WalTypes.Phrase (WalTypes.ComprepNP(WalTypes.Sem,WalParser.parse_prep [WalTypes.Text prep])) | CP(co) -> WalTypes.PhraseComp(WalTypes.Cp,parse_comp co) | NCP(c,co) -> WalTypes.PhraseComp(WalTypes.Ncp(WalParser.parse_case [WalTypes.Text c]),parse_comp co) | PrepNCP(prep,c,co) -> WalTypes.PhraseComp(WalTypes.Prepncp(WalParser.parse_prep [WalTypes.Text prep],WalParser.parse_case [WalTypes.Text c]),parse_comp co) | InfP(a) -> WalTypes.Phrase (WalTypes.InfP(WalParser.parse_aspect [WalTypes.Text a])) | XP(mode,phrases) -> WalTypes.PhraseAbbr(WalTypes.Xp(fst (WalParser.parse_mode [WalTypes.Text mode])), Xlist.map phrases morf_of_phrase) | AdvP mode -> WalTypes.PhraseAbbr(WalTypes.Advp(fst (WalParser.parse_mode [WalTypes.Text mode])), []) | NumP(c) -> WalTypes.Phrase (WalTypes.NumP(WalParser.parse_case [WalTypes.Text c])) | PrepNumP(prep,c) -> WalTypes.Phrase (WalTypes.PrepNumP(WalTypes.Sem,WalParser.parse_prep [WalTypes.Text prep],WalParser.parse_case [WalTypes.Text c])) | ComparP prep -> WalTypes.PhraseAbbr (WalTypes.ComparP(WalParser.parse_prep [WalTypes.Text prep]),[]) | Nonch -> WalTypes.PhraseAbbr (WalTypes.Nonch,[]) | Or -> WalTypes.Phrase WalTypes.Or | Refl -> WalTypes.Phrase (WalTypes.Lex "się") | Recip -> WalTypes.Phrase (WalTypes.Lex "się") | E -> WalTypes.E WalTypes.Null | DistrP -> WalTypes.PhraseAbbr (WalTypes.Distrp,[]) | PossP -> WalTypes.PhraseAbbr (WalTypes.Possp,[]) | FixedP(_,s) -> WalTypes.Phrase (WalTypes.FixedP s) | Lex lex -> (*print_endline "lex";*) WalTypes.Phrase (WalTypes.Null) (* FIXME: ni *) | Null -> WalTypes.Phrase (WalTypes.Null) | _ -> failwith "morf_of_phrase" (* | GerP(c) -> WalTypes.Phrase (WalTypes. | PrepGerP(prep,c) -> WalTypes.Phrase (WalTypes. | PpasP(c) -> WalTypes.Phrase (WalTypes. | PrepPpasP(prep,c) -> WalTypes.Phrase (WalTypes. | PPact(c) -> WalTypes.Phrase (WalTypes. | PrepPactP(prep,c) -> WalTypes.Phrase (WalTypes. | Qub -> WalTypes.Phrase (WalTypes.*) open WalTypes let process_phrases phrases = Xlist.fold phrases StringMap.empty (fun phrases (id,phrase) -> let id = match id with {hash=false; numbers=[(*_;_;_;*)id]; suffix="phr"} -> id | _ -> failwith "process_phrases" in StringMap.add phrases id phrase) let process_positions positions = Xlist.fold positions StringMap.empty (fun positions position -> let id = match position.psn_id with {hash=false; numbers=[(*_;_;*)id]; suffix="psn"} -> id | _ -> failwith "process_positions" in let r,cr,ce = WalParser.parse_roles (position.gf :: position.control) in let phrases = process_phrases position.phrases in StringMap.add positions id (r,cr,ce,phrases)) let process_schemata schemata = Xlist.fold schemata StringMap.empty (fun schemata schema -> let id = match schema.sch_id with {hash=false; numbers=[(*_;*)id]; suffix="sch"} -> id | _ -> failwith "process_schemata" in let schema_atrs = DefaultAtrs([], WalParser.parse_refl [Text schema.reflexiveMark], WalParser.parse_opinion schema.opinion, WalParser.parse_negation [Text schema.negativity], WalParser.parse_pred [Text schema.predicativity], WalParser.parse_aspect [Text schema.aspect]) in let positions = process_positions schema.positions in StringMap.add schemata id (schema_atrs,positions)) let add_meanings meanings = function DefaultAtrs(_,r,o,n,p,a) -> DefaultAtrs(meanings,r,o,n,p,a) | _ -> failwith "add_meanings" let process_arguments arguments = Xlist.fold arguments StringMap.empty (fun arguments argument -> let id = match argument.arg_id with {hash=false; numbers=[(*_;_;*)id]; suffix="arg"} -> id | _ -> failwith "process_arguments" in StringMap.add arguments id (argument.role,argument.role_attribute,argument.sel_prefs)) let get_meaning_id meaning = match parse_full_id meaning with {hash=true; numbers=[_;id]; suffix="mng"} -> id | _ -> failwith "get_meaning_id" let get_schema_id alt = try match parse_full_id (List.hd ((List.hd alt.connections).phrases)) with {hash=true; numbers=[_;id;_;_]; suffix="phr"} -> id | _ -> failwith "get_schema_id 1" with _ -> failwith "get_schema_id 2" let get_frame_id alt = try match parse_full_id ((List.hd alt.connections).argument) with {hash=true; numbers=[_;id;_]; suffix="arg"} -> id | _ -> failwith "get_frame_id" with _ -> failwith "get_frame_id" let get_argument_id arg = match parse_full_id arg with {hash=true; numbers=[_;_;id]; suffix="arg"} -> id | _ -> failwith "get_argument_id" let get_position_id phrases = try match parse_full_id (List.hd phrases) with {hash=true; numbers=[_;_;id;_]; suffix="phr"} -> id | _ -> failwith "get_position_id" with _ -> failwith "get_position_id" let get_phrase_id arg = match parse_full_id arg with {hash=true; numbers=[_;_;_;id]; suffix="phr"} -> id | _ -> failwith "get_phrase_id" let process_frames frames = Xlist.fold frames StringMap.empty (fun frames frame -> let id = match frame.frm_id with {hash=false; numbers=[(*_;*)id]; suffix="frm"} -> id | _ -> failwith "process_frames" in let arguments = process_arguments frame.arguments in let meaning_ids = Xlist.map frame.meanings get_meaning_id in StringMap.add frames id (meaning_ids,arguments)) let process_meanings meanings = Xlist.fold meanings StringMap.empty (fun meanings meaning -> let id = match meaning.mng_id with {hash=false; numbers=[(*_;*)id]; suffix="mng"} -> id | _ -> failwith "process_meanings" in StringMap.add meanings id (meaning.name ^ " " ^ meaning.variant)) let process_sel_pref = function Numeric s -> (try PreWordnet.synset_name s with Not_found -> "unknown") | Symbol s -> s | Relation(s,t) -> "REL" (* FIXME *) let connect entry = let schemata = process_schemata entry.schemata in let frames = process_frames entry.frames in let meanings = process_meanings entry.meanings in Xlist.fold entry.alternations [] (fun found alt -> if alt.connections = [] then found else let schema_id = get_schema_id alt in let frame_id = get_frame_id alt in let schema_atrs,positions = StringMap.find schemata schema_id in let meaning_ids,arguments = StringMap.find frames frame_id in let positions = Xlist.fold alt.connections [] (fun positions2 conn -> let argument_id = get_argument_id conn.argument in let position_id = get_position_id conn.phrases in let r,cr,ce,phrases = StringMap.find positions position_id in let phrases = Xlist.fold conn.phrases [] (fun phrases2 id -> let phrase_id = get_phrase_id id in try StringMap.find phrases phrase_id :: phrases2 with Not_found -> (*Printf.printf "%s\n%!" entry.form_orth;*)phrases2) in let role,role_attribute,sel_prefs = StringMap.find arguments argument_id in let sel_prefs = Xlist.map (List.flatten sel_prefs) process_sel_pref in {gf=r; role=role; role_attr=role_attribute; sel_prefs=sel_prefs; cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in let meanings = List.rev (Xlist.fold meaning_ids [] (fun l id -> (StringMap.find meanings id) :: l)) in let schema_atrs = add_meanings meanings schema_atrs in (entry.form_orth,entry.form_pos,Frame(schema_atrs,positions)) :: found) let connect2 entry = let schemata = process_schemata entry.schemata in StringMap.fold schemata [] (fun found _ (schema_atrs,positions) -> let positions = StringMap.fold positions [] (fun positions2 _ (r,cr,ce,phrases) -> let phrases = StringMap.fold phrases [] (fun phrases2 _ phrase -> phrase :: phrases2) in {gf=r; role=""; role_attr=""; sel_prefs=[]; cr=cr; ce=ce; dir=Both; morfs=Xlist.map phrases morf_of_phrase} :: positions2) in (entry.form_orth,entry.form_pos,Frame(schema_atrs,positions)) :: found) let load_walenty2 () = let walenty = load_walenty Paths.walenty_filename in Xlist.fold walenty StringMap.empty (fun walenty entry -> if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) -> let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in StringMap.add walenty pos map) else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) -> let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in StringMap.add walenty pos map)) let print_stringqmap filename qmap = let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in File.file_out filename (fun file -> Xlist.iter (Xlist.sort l compare) (fun (v,k) -> Printf.fprintf file "%5d %s\n" v k)) let sel_prefs_quantities walenty = Xlist.fold walenty StringQMap.empty (fun quant e -> Xlist.fold e.frames quant (fun quant f -> Xlist.fold f.arguments quant (fun quant a -> Xlist.fold a.sel_prefs quant (fun quant l -> Xlist.fold l quant (fun quant -> function Numeric s -> let name = try PreWordnet.synset_name s with Not_found -> "unknown" in StringQMap.add quant ("N " ^ s ^ " " ^ name) | Symbol s -> StringQMap.add quant ("S " ^ s) | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t)))))) (*let _ = let walenty = load_walenty walenty_filename in let quant = sel_prefs_quantities walenty in print_stringqmap "results/quant_sel_prefs.txt" quant*) let print_entry filename lex = match Xml.parse_file filename with Xml.Element("TEI", _, [Xml.Element("teiHeader",_,_) ; Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> Xlist.iter entries (function Xml.Element("entry",_,Xml.Element("form", [], [Xml.Element("orth",[],[Xml.PCData orth]);_]) :: xml :: _) -> if orth = lex then print_endline (Xml.to_string_fmt xml) | _ -> failwith "print_entry") | _ -> failwith "print_entry" (*let _ = print_entry walenty_filename "bębnić"*) let print_full_entry filename lex = match Xml.parse_file filename with Xml.Element("TEI", _, [Xml.Element("teiHeader",_,_) ; Xml.Element("text",[],[Xml.Element("body",[],entries)])]) -> Xlist.iter entries (function Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: _ :: l) -> let xml = Xml.Element("entry",e,Xml.Element("form",[],[Xml.Element("orth",[],[Xml.PCData orth]);p]) :: syn :: l) in if orth = lex then print_endline (Xml.to_string_fmt xml) | _ -> failwith "print_full_entry") | _ -> failwith "print_full_entry" (*let _ = print_full_entry walenty_filename "bębnić"*) (*let _ = let walenty = load_walenty2 () in let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in Xlist.iter frames_sem (fun frame -> print_endline (WalStringOf.frame "bębnić" frame))*)