diff --git a/LCGlexicon/interface.ml b/LCGlexicon/interface.ml new file mode 100644 index 0000000..63e8e6e --- /dev/null +++ b/LCGlexicon/interface.ml @@ -0,0 +1,205 @@ +(* + * ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish + * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open ENIAM_LCGlexiconTypes +open ENIAM_LCGtypes +open ENIAMsubsyntaxTypes + +let rules = ENIAM_LCGlexicon.make_rules ENIAM_LCGlexiconTypes.rules_filename + +let examples = [ + (* "Szpak","Szpak śpiewa.";*) + (* "miał","Miałem miał."; *) +(* "Ala","Ala ma kota."; + "Ale","Ale mają kota:"; *) + (* "zima","Szpak frunie zimą.";*) + (* "październik","Kot miauczy w październiku."; *) +(* "Szpak-Kot","Szpak frunie. Kot miauczy."; + "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*) + (* "teraz","Teraz frunie jakiś szpak."; + "chłopcy","Chłopcy mają ulicę kwiatami."; *) + (* "arabia","Arabia Saudyjska biegnie.";*) +(* "Tom","Tom idzie."; *) + "liceum","W 1984-89 uczęszczał do VII Liceum Ogólnokształcącego im. K.K. Baczyńskiego w Szczecinie."; + "studia","Następnie studiował architekturę na Politechnice Szczecińskiej, dyplom uzyskał w 1994."; +] + +let clarify_categories senses token = + match token.ENIAMtokenizerTypes.token with + ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories false senses (lemma,pos,interp))) + | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories true senses (lemma,pos,interp))) + | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[]) + | _ -> [] + +let create_chart tokens lex_sems paths last = + ENIAM_LCGrenderer.reset_variable_numbers (); + let chart = ENIAM_LCGchart.make last in + let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) -> + let t = ExtArray.get tokens id in + let s = ExtArray.get lex_sems id in + ENIAM_LCGrenderer.reset_variable_names (); + ENIAM_LCGrenderer.add_variable_numbers (); + let cats = clarify_categories ["X"] t in + let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in + ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in + chart + +let test_example name tokens lex_sems paths last = + ENIAM_LCGreductions.reset_variant_label (); + let chart = create_chart tokens lex_sems paths last in + ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart; + let chart,references = ENIAM_LCGchart.lazify chart in + ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart; + ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references; + let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *) + ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart; + ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references; + if ENIAM_LCGchart.is_parsed chart then ( + let term = ENIAM_LCGchart.get_parsed_term chart in + Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file -> + Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term)); + Xlatex.latex_compile_and_clean "results/" (name^"4_term"); + let dependency_tree = ENIAM_LCGreductions.reduce term references in + ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree; + if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then ( + ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *) + ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree; + ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *) + ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree; + ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree; + ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree; + ()) + else print_endline "not reduced") + else print_endline "not parsed" + +let rec parse_sentence name id tokens lex_sems = function + RawSentence s -> id + | StructSentence(paths,last) -> + test_example (name ^ string_of_int id ^ "_") tokens lex_sems paths last; + id + 1 + | DepSentence(paths) -> id + | QuotedSentences sentences -> + Xlist.fold sentences id (fun id p -> + parse_sentence name id tokens lex_sems p.sentence) + | AltSentence l -> + Xlist.fold l id (fun id (mode,sentence) -> + parse_sentence name id tokens lex_sems sentence) + +let rec parse_paragraph name id tokens lex_sems = function + RawParagraph s -> id + | StructParagraph sentences -> + Xlist.fold sentences id (fun id p -> + parse_sentence name id tokens lex_sems p.sentence) + | AltParagraph l -> + Xlist.fold l id (fun id (mode,paragraph) -> + parse_paragraph name id tokens lex_sems paragraph) + +let rec parse_text name id tokens lex_sems = function + RawText s -> id + | StructText paragraphs -> + Xlist.fold paragraphs id (fun id paragraph -> + parse_paragraph name id tokens lex_sems paragraph) + | AltText l -> + Xlist.fold l id (fun id (mode,text) -> + parse_text name id tokens lex_sems text) + + +let _ = + Xlist.iter examples (fun (name,example) -> + let text,tokens = ENIAMsubsyntax.parse_text example in + let lex_sems = ENIAMlexSemantics.assign tokens text in + ignore(parse_text name 1 tokens lex_sems text)) + +(* +type output = Text | Xml | Html | Marsh | Graphviz + +let output = ref Text +let comm_stdio = ref true +let sentence_split = ref true +let port = ref 0 + +let spec_list = [ + "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)"; + "-n", Arg.Unit (fun () -> sentence_split:=false), "Do not split input into sentences"; + "-i", Arg.Unit (fun () -> comm_stdio:=true), "Communication using stdio (default)"; + "-p", Arg.Int (fun p -> comm_stdio:=false; port:=p), "<port> Communication using sockets on given port number"; + "-t", Arg.Unit (fun () -> output:=Text), "Output as plain text (default)"; + "-x", Arg.Unit (fun () -> output:=Xml), "Output as XML"; + "-m", Arg.Unit (fun () -> output:=Marsh), "Output as marshalled Ocaml data structure"; + "-h", Arg.Unit (fun () -> output:=Html), "Output as HTML"; + "-g", Arg.Unit (fun () -> output:=Graphviz; sentence_split:=false), "Output as graphviz dot file; turns sentence split off"; + (* "-r", Arg.String (fun p -> + ENIAMtokenizerTypes.set_resource_path p; + ENIAMmorphologyTypes.set_resource_path p; + ENIAMsubsyntaxTypes.set_resource_path p), "<path> Set resource path"; *) + ] + +let usage_msg = + "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" + +let message = "ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish\n\ +Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ +Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences" + +let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s)) + +let input_text channel = + let s = ref (try input_line channel with End_of_file -> "") in + let lines = ref [] in + while !s <> "" do + lines := !s :: !lines; + s := try input_line channel with End_of_file -> "" + done; + String.concat "\n" (List.rev !lines) + +let rec main_loop in_chan out_chan = + let text = input_text in_chan in + if text = "" then () else ( + (* print_endline "input text begin"; + print_endline text; + print_endline "input text end"; *) + (if !sentence_split then + let text,tokens = ENIAMsubsyntax.parse_text text in + (match !output with + Text -> output_string out_chan (ENIAMsubsyntaxStringOf.text "" tokens text ^ "\n" ^ ENIAMsubsyntaxStringOf.token_extarray tokens ^ "\n\n") + | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.text_and_tokens text tokens) ^ "\n\n") + | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.text_and_tokens text tokens ^ "\n\n") + | Marsh -> Marshal.to_channel out_chan (text,tokens) [] + | Graphviz -> failwith "main_loop: ni") + else + let tokens = ENIAMsubsyntax.parse text in + (match !output with + Text -> output_string out_chan (ENIAMsubsyntaxStringOf.token_list tokens ^ "\n\n") + | Xml -> output_string out_chan (Xml.to_string (ENIAMsubsyntaxXMLof.token_list tokens) ^ "\n\n") + | Html -> output_string out_chan (ENIAMsubsyntaxHTMLof.token_list tokens ^ "\n\n") + | Marsh -> Marshal.to_channel out_chan tokens [] + | Graphviz -> output_string out_chan (ENIAMsubsyntaxGraphOf.token_list tokens ^ "\n\n"))); + flush out_chan; + main_loop in_chan out_chan) + +let _ = + prerr_endline message; + Arg.parse spec_list anon_fun usage_msg; + Gc.compact (); + prerr_endline "Ready!"; + if !comm_stdio then main_loop stdin stdout + else + let sockaddr = Unix.ADDR_INET(Unix.inet_addr_any,!port) in + Unix.establish_server main_loop sockaddr +*) diff --git a/LCGlexicon/makefile b/LCGlexicon/makefile index b4bb33a..37e64ec 100755 --- a/LCGlexicon/makefile +++ b/LCGlexicon/makefile @@ -42,6 +42,9 @@ test2: test2.ml mkdir -p results $(OCAMLOPT) -o test2 $(OCAMLOPTFLAGS2) test2.ml +interface: interface.ml + $(OCAMLOPT) -o parser $(OCAMLOPTFLAGS2) interface.ml + print_lexicon: ENIAM_LCGlexiconLatexOf.ml mkdir -p results $(OCAMLOPT) -o print_lexicon $(OCAMLOPTFLAGS) ENIAM_LCGlexiconLatexOf.ml @@ -67,4 +70,4 @@ print_lexicon: ENIAM_LCGlexiconLatexOf.ml $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< clean: - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test test2 print_lexicon + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a test test2 parser print_lexicon diff --git a/NKJP2/ENIAM_NKJP.ml b/NKJP2/ENIAM_NKJP.ml index 5390230..e58d53c 100644 --- a/NKJP2/ENIAM_NKJP.ml +++ b/NKJP2/ENIAM_NKJP.ml @@ -158,14 +158,72 @@ let load_morphosyntax path name = List.rev (Xlist.rev_map entries load_morph_entry) | _ -> failwith "load_morphosyntax" -let rec merge_entries rev = function +let parse_seg_corresp corresp = + if not (Xstring.check_prefix "text.xml#string-range(" corresp) then failwith "parse_seg_corresp" else + if not (Xstring.check_sufix ")" corresp) then failwith "parse_seg_corresp" else + let corresp = Xstring.cut_prefix "text.xml#string-range(" corresp in + let corresp = Xstring.cut_sufix ")" corresp in + let id,beg,len = match Xstring.split "," corresp with + [id;beg;len] -> parse_id id, int_of_string beg, int_of_string len + | _ -> failwith "parse_seg_corresp" in + let id_div,id_ab = match id with + {corref=""; prefix="txt"; numbers=[id_div;id_ab]; suffix="ab"} -> id_div,id_ab + | _ -> failwith "parse_seg_corresp" in + id_div,id_ab,beg,len + +let pos_set = StringSet.of_list + ["subst";"depr";"ppron12";"ppron3";"siebie";"prep";"adj";"adjc";"adjp";"adja";"num"; + "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt"; + "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"comp";"conj";"interj";"burk";"interp"; + "brev";"xxx";"numcol"] + +let parse_disamb disamb = + if disamb = "::interp" then ":","interp",[] else + if disamb = ":-):interp" then ":-)","interp",[] else + (* if Xstring.check_sufix ":interp" disamb then Xstring.cut_sufix ":interp" disamb, "interp", [] else *) + match Xstring.split_delim ":" disamb with + lemma1 :: lemma2 :: "subst" :: interp -> lemma1 ^ ":" ^ lemma2,"subst",interp + | lemma1 :: lemma2 :: lemma3 :: "subst" :: interp -> lemma1 ^ ":" ^ lemma2 ^ ":" ^ lemma3,"subst",interp + | lemma :: pos :: interp -> + if StringSet.mem pos_set pos then lemma,pos,interp + else failwith ("parse_disamb: " ^ disamb) + | _ -> failwith "parse_disamb" + +let rec merge_tokens name id_p rev = function + (corresp,nps,{corref=""; prefix="segm"; numbers=[id_segm_p;id_segm_s]; suffix="seg"}) :: segmentation, + ({corref="ann_segmentation.xml"; prefix="segm"; numbers=[c_segm_p;c_segm_s]; suffix="seg"}, + {corref=""; prefix="morph"; numbers=[id_morph_p;id_morph_s]; suffix="seg"},orth,disamb) :: morphosyntax -> + (* if id_p <> id_segm_p then Printf.printf "merge_tokens inconsistent numbering: %s segm_%d-p segm_%d.%d-s\n" name id_p id_segm_p id_segm_s; *) + if id_segm_p <> c_segm_p || id_segm_p <> id_morph_p then failwith "merge_tokens 2" else + if id_segm_s <> c_segm_s || c_segm_s <> id_morph_s then failwith "merge_tokens 3" else + let id_div,id_ab,beg,len = parse_seg_corresp corresp in( + (* if id_div <> id_p then (*failwith*)print_endline (Printf.sprintf "merge_tokens 4: %s %d %s" name id_p corresp); (*else*) *) + let lemma,cat,interp = parse_disamb disamb in + merge_tokens name id_p ((id_div,id_ab,beg,nps,len,orth,lemma,cat,interp) :: rev) (segmentation,morphosyntax)) + | [],[] -> List.rev rev + | _ -> failwith "merge_tokens 1" + +let rec merge_sentences name id_p rev = function + ({corref=""; prefix="segm"; numbers=[id_segm_p;id_segm_s]; suffix="s"},segm_tokens) :: segmentation, + ({corref="ann_segmentation.xml"; prefix="segm"; numbers=[c_segm_p;c_segm_s]; suffix="s"}, + {corref=""; prefix="morph"; numbers=[id_morph_p;id_morph_s]; suffix="s"},morph_tokens) :: morphosyntax -> + (* if id_p <> id_segm_p then Printf.printf "merge_sentences inconsistent numbering: %s segm_%d-p segm_%d.%d-s\n" name id_p id_segm_p id_segm_s; *) + if id_segm_p <> c_segm_p || id_segm_p <> id_morph_p then failwith "merge_sentences 2" else + if id_segm_s <> c_segm_s || c_segm_s <> id_morph_s then failwith "merge_sentences 3" else + let tokens = merge_tokens name id_p [] (segm_tokens,morph_tokens) in + merge_sentences name id_p ((id_segm_p,id_segm_s,tokens) :: rev) (segmentation,morphosyntax) + | [],[] -> List.rev rev + | _ -> failwith "merge_sentences" + +let rec merge_entries name rev = function ({corref=""; prefix="txt"; numbers=[id_div]; suffix="div"},paragraphs) :: text, ({corref="text.xml"; prefix="txt"; numbers=[c_div]; suffix="div"}, {corref=""; prefix="segm"; numbers=[id_segm_p]; suffix="p"},segm_sentences) :: segmentation, ({corref="ann_segmentation.xml"; prefix="segm"; numbers=[c_segm_p]; suffix="p"}, {corref=""; prefix="morph"; numbers=[id_morph_p]; suffix="p"},morph_sentences) :: morphosyntax -> if id_div <> c_div || c_div <> id_segm_p || id_segm_p <> c_segm_p || c_segm_p <> id_morph_p then failwith "merge_entries 2" else - merge_entries ((id_div,paragraphs,segm_sentences,morph_sentences) :: rev) (text,segmentation,morphosyntax) + let sentences = merge_sentences name id_div [] (segm_sentences,morph_sentences) in + merge_entries name ((id_div,paragraphs,sentences) :: rev) (text,segmentation,morphosyntax) | [],[],[] -> List.rev rev | _ -> failwith "merge_entries" @@ -174,7 +232,7 @@ let nkjp_path = "../../NLP resources/NKJP-PodkorpusMilionowy-1.2/" let _ = let names = get_folders nkjp_path in Xlist.iter names (fun name -> - print_endline name; + (* print_endline name; *) let typ,channel = load_header nkjp_path name in (* print_endline typ; *) (* print_endline channel; *) @@ -182,7 +240,7 @@ let _ = let text = load_text nkjp_path name in let segmentation = load_segmentation nkjp_path name in let morphosyntax = load_morphosyntax nkjp_path name in - let entries = merge_entries [] (text,segmentation,morphosyntax) in + let entries = merge_entries name [] (text,segmentation,morphosyntax) in ()) (* diff --git a/subsyntax/ENIAM_MWE.ml b/subsyntax/ENIAM_MWE.ml index a13eddf..e111b15 100644 --- a/subsyntax/ENIAM_MWE.ml +++ b/subsyntax/ENIAM_MWE.ml @@ -30,7 +30,7 @@ let load_dict dict filename = let mwe_dict = let dict = load_dict StringMap.empty brev_filename in - let dict = load_dict dict fixed_filename in + let dict = try load_dict dict fixed_filename with _ -> (prerr_endline ("ENIAMsubsyntax file " ^ fixed_filename ^ " not found"); dict) in (* let dict = load_dict dict complete_entries_filename in*) let dict = load_dict dict mwe_filename in dict diff --git a/subsyntax/ENIAMsubsyntax.ml b/subsyntax/ENIAMsubsyntax.ml index adb3e28..e0eef8c 100644 --- a/subsyntax/ENIAMsubsyntax.ml +++ b/subsyntax/ENIAMsubsyntax.ml @@ -200,7 +200,7 @@ let select_tokens paths = (* | Dig(value,cat) -> t :: paths *) | Other orth -> t :: paths | Lemma(lemma,pos,interp) -> if pos = "brev" then paths else t :: paths - | Proper(lemma,pos,interp,cat) -> t :: paths + | Proper(lemma,pos,interp,cat) -> if pos = "brev" then paths else t :: paths (* | Compound _ -> t :: paths *) | _ -> paths)) @@ -213,6 +213,7 @@ let load_proper_name proper = function let proper_names = let proper = File.fold_tab proper_names_filename StringMap.empty load_proper_name in let proper = File.fold_tab proper_names_filename2 proper load_proper_name in + let proper = File.fold_tab proper_names_filename3 proper load_proper_name in proper let remove l s = diff --git a/subsyntax/ENIAMsubsyntaxTypes.ml b/subsyntax/ENIAMsubsyntaxTypes.ml index c1c4cb3..e92dace 100644 --- a/subsyntax/ENIAMsubsyntaxTypes.ml +++ b/subsyntax/ENIAMsubsyntaxTypes.ml @@ -55,6 +55,7 @@ let lemma_frequencies_filename = resource_path ^ "/subsyntax/NKJP1M-lemma-freq.t let proper_names_filename2 = resource_path ^ "/subsyntax/proper_names.tab" *) let proper_names_filename = resource_path ^ "/subsyntax/proper_names_sgjp_polimorf_20151020.tab" let proper_names_filename2 = resource_path ^ "/subsyntax/proper_names_20160104.tab" +let proper_names_filename3 = resource_path ^ "/subsyntax/ne.tab" let int_of_mode = function Raw -> 0 diff --git a/subsyntax/resources/ne.tab b/subsyntax/resources/ne.tab new file mode 100644 index 0000000..1f7410b --- /dev/null +++ b/subsyntax/resources/ne.tab @@ -0,0 +1,34 @@ +Akademia Sztuki ORGANIZACJA +Atelier Bizio + Ligierko ORGANIZACJA +Instytut Architektury i Planowania Przestrzennego ORGANIZACJA +Katedra Architektury Współczesnej Teorii i Metodologii Projektowania ORGANIZACJA +VII Liceum Ogólnokształcące im. K.K. Baczyńskiego ORGANIZACJA +IV Liceum Ogólnokształcące im. L. Szenwalda ORGANIZACJA +Muzeum Narodowe ORGANIZACJA +Nagroda Artystyczna m. Szczecina WYRÓŻNIENIE +Zachodniopomorski Nobel WYRÓŻNIENIE +Politechnika Krakowska ORGANIZACJA +Politechnika Szczecińska ORGANIZACJA +Pracownia Podstaw Projektowania ORGANIZACJA +Przegląd Teatrów Małych Form „Kontrapunkt” ORGANIZACJA +Mistrzowska Szkoła Reżyserii Filmowej Andrzeja Wajdy ORGANIZACJA +Uniwersytet im. M. Kopernika ORGANIZACJA +Zachodniopomorski Uniwersytet Technologiczny ORGANIZACJA +Wydział Budownictwa i Architektury ORGANIZACJA +Wydział Stuk Wizualnych ORGANIZACJA +Zakład Teorii Architektury, Historii i Konserwacji Zabytków ORGANIZACJA +Festiwal Polskich Sztuk Współczesnych R@Port WYDARZENIE +Sosnowiec MIASTO +Stefan IMIĘ +Józefa IMIĘ +Szczecin MIASTO +Waldemar IMIĘ +Marzęcki NAZWISKO +Austria KRAJ +Czechy KRAJ +Niemcy KRAJ +Francja KRAJ +Litwa KRAJ +USA KRAJ +Rosja KRAJ + diff --git a/tokenizer/ENIAMacronyms.ml b/tokenizer/ENIAMacronyms.ml index 36b7646..1c113fe 100644 --- a/tokenizer/ENIAMacronyms.ml +++ b/tokenizer/ENIAMacronyms.ml @@ -21,7 +21,7 @@ open ENIAMtokenizerTypes let mte_patterns = let lines = try File.load_lines mte_filename - with _ -> (print_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in + with _ -> (prerr_endline ("ENIAMtokenizer mte file " ^ mte_filename ^ " not found"); []) in let l = List.rev (Xlist.rev_map lines (fun line -> match Str.split (Str.regexp "\t") line with [orths; lemma; interp] -> Str.split (Str.regexp " ") orths, lemma, interp diff --git a/tokenizer/ENIAMtokenizerTypes.ml b/tokenizer/ENIAMtokenizerTypes.ml index 4c4d1cf..72c45cc 100644 --- a/tokenizer/ENIAMtokenizerTypes.ml +++ b/tokenizer/ENIAMtokenizerTypes.ml @@ -72,4 +72,5 @@ let resource_path = if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else failwith "resource directory does not exists" -let mte_filename = resource_path ^ "/tokenizer/mte.tab" +(* let mte_filename = resource_path ^ "/tokenizer/mte.tab" *) +let mte_filename = resource_path ^ "/tokenizer/mte_20151215.tab" diff --git a/tokenizer/makefile b/tokenizer/makefile index 2d23090..cabf196 100755 --- a/tokenizer/makefile +++ b/tokenizer/makefile @@ -18,7 +18,7 @@ install: all mkdir -p /usr/share/eniam/tokenizer cp resources/mte_20151215.tab /usr/share/eniam/tokenizer/mte_20151215.tab cp resources/README /usr/share/eniam/tokenizer/README - ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab +# ln -s /usr/share/eniam/tokenizer/mte_20151215.tab /usr/share/eniam/tokenizer/mte.tab install-local: all mkdir -p $(INSTALLDIR) @@ -28,7 +28,7 @@ install-local: all mkdir -p /usr/local/share/eniam/tokenizer cp resources/mte_20151215.tab /usr/local/share/eniam/tokenizer/mte_20151215.tab cp resources/README /usr/local/share/eniam/tokenizer/README - ln -s /usr/local/share/eniam/tokenizer/mte_20151215.tab /usr/local/share/eniam/tokenizer/mte.tab +# ln -s /usr/local/share/eniam/tokenizer/mte_20151215.tab /usr/local/share/eniam/tokenizer/mte.tab eniam-tokenizer.cma: $(SOURCES) ocamlc -linkall -a -o eniam-tokenizer.cma $(OCAMLFLAGS) $^