ENIAMplWordnetTypes.ml 5.36 KB
(*
 *  ENIAMplWordnet, a converter for Polish Wordnet "Słowosieć".
 *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd

type lu = {lu_id: int; lu_name: string; lu_pos: string; lu_tagcount: string; lu_domain: string; lu_desc: string;
          lu_workstate: string; lu_source: string; lu_variant: string; lu_syn: int}

type syn = {syn_workstate: string; syn_split: string; syn_owner: string; syn_definition: string;
          syn_desc: string; syn_abstract: bool; syn_units: (int * lu) list; syn_pos: string; syn_no_hipo: int; syn_domain: string}

type rels = {r_parent: int; r_child: int; r_relation: int; r_valid: string; r_owner: string}

type rt = {rt_type: string; rt_reverse: int; rt_name: string; rt_description: string;
          rt_posstr: string; rt_display: string; rt_shortcut: string; rt_autoreverse: string; rt_pwn: string; rt_tests: (string * string) list}

let empty_lu = {lu_id=(-1); lu_name=""; lu_pos=""; lu_tagcount=""; lu_domain=""; lu_desc="";
          lu_workstate=""; lu_source=""; lu_variant=""; lu_syn=(-1)}

(* Poniższe dane są uzyskane za pomocą procedury ENIAMplWordnet.count_pwn_relation *)
let pl_pl_relations = IntSet.of_list [
  10; 11; 13; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 34; 35; 36; 37; 38; 39;
  40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 55; 56; 57; 58; 59; 60;
  62; 63; 64; 65; 74; 75; 89; 90; 92; 93; 101; 104; 106; 107; 108; 110; 111; 113; 114; 116;
  117; 118; 119; 120; 121; 122; 124; 125; 126; 127; 129; 130; 131; 134; 136; 137; 138; 140; 141; 142;
  145; 146; 147; 148; 149; 151; 152; 154; 155; 156; 157; 158; 160; 161; 163; 164; 165; 166; 168; 169;
  230; 242; 244
  ]

let en_en_relations = IntSet.of_list [
  170; 171; 172; 173; 174; 175; 176; 177; 178; 179; 180; 181; 182; 183; 184; 185; 186; 187; 188; 189;
  190; 191; 192; 193; 194; 195
  ]

let pl_en_relations = IntSet.of_list [
  201; 202; 203; 205; 206; 207; 208; 210; 212; 222; 228; 235; 238; 239; 3000; 3002; 3005
  ]

let en_pl_relations = IntSet.of_list [
  209; 211; 213; 214; 215; 216; 217; 218; 219; 223; 229; 3006
  ]

(* relacje wyrażone słowotwórczo *)
let morf_relations = IntSet.of_list [
  34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 55; 56; 57; 58; 59; 62; 63; 74; 75; 89; 110; 111; 124;
  131; 134; 136; 141; 142; 148; 149; 151; 152; 154; 155; 156; 157; 158; 160; 161; 163; 164; 165; 166; 168; 169; 242; 244]

(* relacje wyrażone częściowo słowotwórczo *)
let semimorf_relations = IntSet.of_list [
  19; 64; 65; 90; 93; 101; 113; 118; 119; 120; 121; 122; 125; 126; 127; 129; 130; 140; 147]

(* relacje nie wyrażone słowotwórczo *)
let nomorf_relations = IntSet.of_list [
  10; 11; 13; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 60; 92; 104; 106; 107; 108; 114; 116; 117; 137; 138; 145; 146; 230]

(* instancja pojęcia *)
let instance_relations = IntSet.of_list [64;65; 106;107; 145]

(* uogólniona synonimia *)
let synonymy_relations = IntSet.of_list [
  (*19?;*) 53; 55; 56; 57; 60; 62; 74; 75; (*108?;*) 110; 111; 129; 130; 131; 134; 136; 140; 141; 142; 147; 151; 152; 168; 244]
(* 51,52;53; 55; 56; 57; 60; 62,141; 63,142; 108 *)

(* stan, cecha, rola *)
let attr_relations = IntSet.of_list [
  51; 52; 63; 89; 92; 118; 124; 126; 146; 148; 149; 154; 155; 156; 157; 158; 160; 161; 163; 164; 165; 166; 169; 242]

(*X ma Y*)
let has_relations = IntSet.of_list [13;
25;20; 26;21; 27;22; 28;23; 29;24;  (* część,całość;  *)
41;34; 42;35; 43;36; 44;37; 45;38; 46;39; 47;40; (* uczestnik,zdarzenie *)
49;48;50; (* uczestnik,zjawisko *)
58;59(*?*);90;93;
116;113;
114;117;
119;]

(*
34 - agens
48 - agens
49 - miejsce
50 - wytwór|rezultat
58 - mieszkaniec
*)

(* antonimia *)
let antonymy_relations = IntSet.of_list [101; 104]

(* czasowniki: kauzacja, presupozycja *)
let caus_relations = IntSet.of_list [120; 121; 122; 125; 127; 137; 138]

let ex_hipo_rels = [11;65;107] @ IntSet.to_list synonymy_relations
let ex_hipero_rels = [10;64;106;145] @ IntSet.to_list synonymy_relations

(* Child (wartość) jest bardziej ogólny niż Parent (klucz) *)

type dir = Parent | Child
type dir2 = Straight | Reverse

let hipo_relations = [
  0,Straight,[10];
  0,Reverse,[11];
  1,Straight,[64;106;145] @ IntSet.to_list synonymy_relations;
  1,Reverse,[65;107] @ IntSet.to_list synonymy_relations;
  4,Straight,[19;108];
  4,Reverse,[19;108];
  3,Straight,[20;21;22;23;24];
  3,Reverse,[25;26;27;28;29];
  ]

let hipo_extensions = [
  1,"cecha","1",Parent,[52; 146; 148; 149; 154; 155; 156; 157; 158; 160; 161; 163; 164; 165; 166; 169; 242];
  1,"cecha","1",Child,[51; 63; 89; 92; 118; 124; 126];
  ]

let hipo_extensions2 = [
  4,"cecha","1",["przymiotnik"; "przysłówek"];
  ]