From 2667fa91bfa4c7761e59650b82eceea9f2eb209c Mon Sep 17 00:00:00 2001 From: Wojciech Jaworski <wjaworski@mimuw.edu.pl> Date: Wed, 10 May 2017 23:42:20 +0200 Subject: [PATCH] rozpoczęcie poprawiania interfejsu do Słowosieci w lexSemantics --- lexSemantics/ENIAMlexSemanticsTypes.ml | 12 ++++++++++++ lexSemantics/ENIAMplWordnet.ml | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ lexSemantics/makefile | 14 +++++++------- 3 files changed, 67 insertions(+), 7 deletions(-) create mode 100644 lexSemantics/ENIAMplWordnet.ml diff --git a/lexSemantics/ENIAMlexSemanticsTypes.ml b/lexSemantics/ENIAMlexSemanticsTypes.ml index 59994d5..9d44369 100644 --- a/lexSemantics/ENIAMlexSemanticsTypes.ml +++ b/lexSemantics/ENIAMlexSemanticsTypes.ml @@ -77,3 +77,15 @@ let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncoun let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat" let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat" let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *) + +(*let rzeczownik_filename = resource_path ^ "/plWordnet/rzeczownik.tab" +let czasownik_filename = resource_path ^ "/plWordnet/czasownik.tab" +let przymiotnik_filename = resource_path ^ "/plWordnet/przymiotnik.tab" +let synsets_filename = resource_path ^ "/plWordnet/synsets.tab" +let hipero_filename = resource_path ^ "/plWordnet/hipero.tab" +let predef_filename = resource_path ^ "/plWordnet/predef_prefs.tab" +let proper_classes_filename = resource_path ^ "/plWordnet/proper_classes.tab"*) + +let lu_filename = resource_path ^ "/plWordnet/lu.tab" +let ex_hipo_filename = resource_path ^ "/plWordnet/ex_hipo.tab" +let syn_filename = resource_path ^ "/plWordnet/syn.tab" diff --git a/lexSemantics/ENIAMplWordnet.ml b/lexSemantics/ENIAMplWordnet.ml new file mode 100644 index 0000000..65404f6 --- /dev/null +++ b/lexSemantics/ENIAMplWordnet.ml @@ -0,0 +1,48 @@ +(* + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open Xstd +open ENIAMlexSemanticsTypes + +let load_lu filename = + File.fold_tab filename IntMap.empty (fun lumap -> function + [lu_id; lemma; variant; syn_id] -> IntMap.add lumap (int_of_string lu_id) (lemma,variant,int_of_string syn_id) + | l -> failwith ("load_lu: " ^ String.concat "\t" l)) + +let load_syn filename = + File.fold_tab filename IntMap.empty (fun synmap -> function + syn_id :: pos :: lu_ids -> + let lu_ids = Xlist.map lu_ids int_of_string in + IntMap.add synmap (int_of_string syn_id) (pos,lu_ids) + | l -> failwith ("load_syn: " ^ String.concat "\t" l)) + +let load_ex_hipo filename = + File.fold_tab filename IntMap.empty (fun ex_hipo -> function + [parent; child; cost] -> + let parent = int_of_string parent in + let child = int_of_string child in + let cost = int_of_string cost in + let children = try IntMap.find ex_hipo parent with Not_found -> IntMap.empty in + let children = IntMap.add_inc children child cost (fun _ -> failwith "load_ex_hipo") in + IntMap.add ex_hipo parent children + | l -> failwith ("load_ex_hipo: " ^ String.concat "\t" l)) + +let lumap = load_lu lu_filename +let synmap = load_syn syn_filename +let ex_hipo = load_ex_hipo ex_hipo_filename diff --git a/lexSemantics/makefile b/lexSemantics/makefile index f7d14ed..cc3904d 100644 --- a/lexSemantics/makefile +++ b/lexSemantics/makefile @@ -3,20 +3,20 @@ OCAMLOPT=ocamlopt OCAMLDEP=ocamldep INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam OCAMLFLAGS=$(INCLUDES) -g -#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa +#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa INSTALLDIR=`ocamlc -where`/eniam SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMadjuncts.ml \ - ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml + ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMplWordnet.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa install: all mkdir -p $(INSTALLDIR) cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR) - cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) - cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) + cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) + cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) mkdir -p /usr/share/eniam/lexSemantics # cp resources/* /usr/share/eniam/lexSemantics ln -s /usr/share/eniam/lexSemantics/proper_names_20160104.tab /usr/share/eniam/lexSemantics/proper_names.tab @@ -25,8 +25,8 @@ install: all install-local: all mkdir -p $(INSTALLDIR) cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR) - cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) - cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) + cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR) + cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR) mkdir -p /usr/local/share/eniam/lexSemantics # cp resources/* /usr/local/share/eniam/lexSemantics ln -s /usr/local/share/eniam/lexSemantics/proper_names_20160104.tab /usr/local/share/eniam/lexSemantics/proper_names.tab -- libgit2 0.22.2