Commit 56fafa1e57cf001c594cc954b9f1d5169975d34c

Authored by Wojciech Jaworski
1 parent 0271fb5e

przygotowanie do przetwarzania drzew zależnościowych z mową nieleżną

@@ -27,13 +27,13 @@ LCG_NO_NODES=10000000 @@ -27,13 +27,13 @@ LCG_NO_NODES=10000000
27 NO_PROCESSES=4 27 NO_PROCESSES=4
28 28
29 # Is Concraft enabled 29 # Is Concraft enabled
30 -CONCRAFT_ENABLED=true 30 +CONCRAFT_ENABLED=false
31 31
32 # Path to Concraft 32 # Path to Concraft
33 CONCRAFT_PATH=../concraft/ 33 CONCRAFT_PATH=../concraft/
34 34
35 # Is MateParser enabled 35 # Is MateParser enabled
36 -MATE_PARSER_ENABLED=true 36 +MATE_PARSER_ENABLED=false
37 37
38 # Path to MateParser 38 # Path to MateParser
39 MATE_PARSER_PATH=../dependencyParser/basic/mate-tools/ 39 MATE_PARSER_PATH=../dependencyParser/basic/mate-tools/
@@ -45,4 +45,4 @@ SWIGRA_ENABLED=false @@ -45,4 +45,4 @@ SWIGRA_ENABLED=false
45 SWIGRA_PATH=../swigra/parser/ 45 SWIGRA_PATH=../swigra/parser/
46 46
47 # Is sentence selection enabled 47 # Is sentence selection enabled
48 -SENTENCE_SELECTION_ENABLED=true 48 +SENTENCE_SELECTION_ENABLED=false
parser/makefile
@@ -16,9 +16,9 @@ SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml @@ -16,9 +16,9 @@ SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
16 EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml #../diagnostics/compTrees.ml 16 EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml #../diagnostics/compTrees.ml
17 17
18 all: 18 all:
19 -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml  
20 - $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml  
21 - $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml 19 + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
  20 +# $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml
  21 +# $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml
22 # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml 22 # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml
23 # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml 23 # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml
24 # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml 24 # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml
parser/pipe.ml
@@ -129,7 +129,7 @@ let process_id s = @@ -129,7 +129,7 @@ let process_id s =
129 let process_conll_corpus filename = 129 let process_conll_corpus filename =
130 let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in 130 let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
131 print_endline "process_conll_corpus"; 131 print_endline "process_conll_corpus";
132 - let corpus = [List.hd corpus] in 132 + (* let corpus = [List.hd corpus] in *)
133 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in 133 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
134 Xlist.iter corpus (fun query -> 134 Xlist.iter corpus (fun query ->
135 let id = process_id (get_query_id query) in 135 let id = process_id (get_query_id query) in
@@ -157,7 +157,8 @@ let process_conll_corpus filename = @@ -157,7 +157,8 @@ let process_conll_corpus filename =
157 let _ = 157 let _ =
158 (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *) 158 (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *)
159 (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *) 159 (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
160 - process_conll_corpus "../testy/skladnica-test1.conll"; 160 + (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
  161 + process_conll_corpus "../testy/skladnica-test2.conll";
161 () 162 ()
162 163
163 (* TO DO: 164 (* TO DO:
testy/skladnica-test2.conll 0 → 100644
  1 +1 Ponownie ponownie adv adv pos 2 adjunct _ _
  2 +2 musiał musieć praet praet sg|m1|imperf 0 pred _ _
  3 +3 użyć użyć inf inf perf 2 comp_inf _ _
  4 +4 ręcznika ręcznik subst subst sg|gen|m3 3 obj _ _
  5 +5 . . interp interp _ 2 punct _ _
  6 +
  7 +1 - - interp interp _ 4 punct _ _
  8 +2 Obcokrajowiec obcokrajowiec subst subst sg|nom|m1 4 subj _ _
  9 +3 też też qub qub _ 4 adjunct _ _
  10 +4 ma mieć fin fin sg|ter|imperf 0 pred _ _
  11 +5 szanse szansa subst subst pl|acc|f 4 obj_th _ _
  12 +6 ? ? interp interp _ 4 punct _ _
  13 +
  14 +# trees/NKJP_1M_1202900095/morph_3-p/morph_3.46-s.xml.tree
  15 +1 - - interp interp 0 _ _ _
  16 +2 Słoń słoń subst subst sg|nom|m2 4 _ _ _
  17 +3 - - interp interp 0 _ _ _
  18 +4 powiedział powiedzieć praet praet sg|m1|perf 0 _ _ _
  19 +5 Pinio Pinio subst subst sg|nom|m1 4 _ _ _
  20 +6 . . interp interp 0 _ _ _
  21 +
  22 +# trees/NKJP_1M_2002000114/morph_2-p/morph_2.72-s.xml.tree
  23 +1 - - interp interp 0 _ _ _
  24 +2 Nie nie qub qub 3 _ _ _
  25 +3 mogę móc fin fin sg|pri|imperf 7 _ _ _
  26 +4 ci ty ppron12 ppron12 sg|dat|m1|sec|nakc 5 _ _ _
  27 +5 powiedzieć powiedzieć inf inf perf 3 _ _ _
  28 +6 - - interp interp 0 _ _ _
  29 +7 zachrypiał zachrypieć praet praet sg|m1|perf 0 _ _ _
  30 +8 . . interp interp 0 _ _ _
  31 +
  32 +# trees/NKJP_1M_2002000028/morph_5-p/morph_5.40-s.xml.tree
  33 +1 - - interp interp 0 _ _ _
  34 +2 Właśnie właśnie qub qub 4 _ _ _
  35 +3 to to subst subst sg|acc|n 4 _ _ _
  36 +4 robię robić fin fin sg|pri|imperf 6 _ _ _
  37 +5 - - interp interp 0 _ _ _
  38 +6 odpowiedział odpowiedzieć praet praet sg|m1|perf 0 _ _ _
  39 +7 twardo twardo adv adv pos 6 _ _ _
  40 +8 . . interp interp 0 _ _ _
  41 +
  42 +# trees/NKJP_1M_1202000001/morph_3-p/morph_3.9-s.xml.tree
  43 +1 CKM CKM subst subst sg|nom|n 0 _ _ _
  44 +2 : interp 0 _ _ _
  45 +3 Jak jak adv adv pos 5 _ _ _
  46 +4 mężczyzna mężczyzna subst subst sg|nom|m1 5 _ _ _
  47 +5 powinien powinien winien winien sg|m1|imperf 1 _ _ _
  48 +6 na na prep prep acc 8 _ _ _
  49 +7 ciebie ty ppron12 ppron12 sg|acc|f|sec|akc 6 _ _ _
  50 +8 patrzeć patrzeć inf inf imperf 5 _ _ _
  51 +9 ? ? interp interp 0 _ _ _
  52 +
  53 +# trees/NKJP_1M_2001000023/morph_1-p/morph_1.61-s.xml.tree
  54 +1 Pochylił pochylić praet praet sg|m1|perf 0 _ _ _
  55 +2 em być aglt aglt sg|pri|imperf|wok 1 _ _ _
  56 +3 się się qub qub 1 _ _ _
  57 +4 nad nad prep prep inst|nwok 1 _ _ _
  58 +5 nim on ppron3 ppron3 sg|inst|m1|ter|akc|praep 4 _ _ _
  59 +6 : interp 0 _ _ _