Commit ec7e858820fc252a3c7ecc9ffe687dbb4ae0509a

Authored by Wojciech Jaworski
2 parents 7989117c b4c599c2

rozwiązanie konfliktów przy ponownym merge dep_trees

.gitignore
... ... @@ -4,7 +4,6 @@
4 4 *.a
5 5 *.cmxa
6 6 .DS_Store
7   -pre
8 7 *.aux
9 8 *.log
10 9 *.tex.backup
... ...
... ... @@ -45,4 +45,4 @@ SWIGRA_ENABLED=false
45 45 SWIGRA_PATH=../swigra/parser/
46 46  
47 47 # Is sentence selection enabled
48   -SENTENCE_SELECTION_ENABLED=true
  48 +SENTENCE_SELECTION_ENABLED=false
... ...
corpora/CONLL.ml
1 1 open Xstd
2 2 open PreTypes
3 3  
4   -let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts
  4 +let alternative_string f mode alts = if List.exists (fun (m,_) -> mode = m) alts
5 5 then f (snd @@ List.find (fun (m,_) -> m = mode) alts)
6 6 else f (snd @@ List.find (fun (m,_) -> m = PreTypes.Struct) alts)
7 7  
... ... @@ -54,7 +54,7 @@ let rec string_of_text mode tokens = function
54 54  
55 55  
56 56 (******************)
57   -
  57 +(***
58 58 let establish_next tokens paths =
59 59 let n = ExtArray.size tokens in
60 60 Int.iter 1 (n - 2) (fun i ->
... ... @@ -128,7 +128,7 @@ let match_sentence (p_record,tokens) =
128 128 with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)]
129 129  
130 130 let match_corpus corpus =
131   - Xlist.map corpus match_sentence
  131 + Xlist.map corpus match_sentence***)
132 132  
133 133 (******************)
134 134  
... ...
diagnostics/LCGfields.ml
... ... @@ -90,14 +90,14 @@ let field_of_conll_sentence fields tokens (result : conll_parse_result) =
90 90  
91 91 let rec field_of_sentence fields tokens = function
92 92 RawSentence s -> s
93   - | StructSentence(_,paths,last) -> "StructSentence"
94   - | DepSentence(_,paths) -> "DepSentence"
  93 + | StructSentence _ -> "StructSentence"
  94 + | DepSentence _ -> "DepSentence"
95 95 | ENIAMSentence result -> field_of_eniam_sentence fields tokens result
96 96 | CONLLSentence result -> field_of_conll_sentence fields tokens result
97 97 | QuotedSentences sentences -> "QuotedSentences"
98 98 | AltSentence l -> String.concat "\n\t" (Xlist.map l (fun (m, s) ->
99 99 Visualization.string_of_mode m ^ "\t" ^ (field_of_sentence fields tokens s)))
100   - (* | _ -> failwith "field_of_sentence: ni" *)
  100 + | _ -> failwith "field_of_sentence: ni"
101 101  
102 102 let rec field_of_paragraph fields tokens = function
103 103 RawParagraph s -> print_endline "no fields detected: only raw paragraph"; s
... ...
diagnostics/treeChange.ml
1 1 open Xstd
2 2 open PreTypes
3 3  
4   -let remove_interps paths tokens =
  4 +let if_lemma lemmas = function
  5 + Lemma(l,_,_) -> List.exists (fun x -> x = l) lemmas
  6 + | _ -> false
  7 +
  8 +let if_cat cats = function
  9 + Lemma(_,cat,_) -> List.exists (fun x -> x = cat) cats
  10 + | _ -> false
  11 +
  12 +let if_interps interps token =
  13 + let interp = match token with
  14 + Lemma(_,_,i) -> i
  15 + | _ -> [[[]]] in
  16 + let if_interp nr value =
  17 + List.exists (fun x ->
  18 + List.exists (fun y ->
  19 + y = value) (List.nth x nr)) interp in
  20 + Xlist.fold interps true (fun acc (nr,value) -> acc && (if_interp nr value))
  21 +
  22 +let correct_coordination paths tokens =
  23 + let paths_ls = List.mapi (fun i (id,super,label) ->
  24 + (i,id,super,label)) (Array.to_list paths) in
  25 +
  26 + let ps a sons =
  27 + print_endline a;
  28 + List.iter (fun (i,_,_,_) -> print_endline (ExtArray.get tokens i).orth) sons;
  29 + print_endline "" in
  30 +
  31 + let rec correct_rec (i,id,super,label) sons =
  32 + let left_s, right_s = List.partition (fun (a,b,c,d) -> a < i) sons in
  33 + (* ps "left:" (List.rev left_s);
  34 + ps "right:" right_s; *)
  35 + find_father i (List.rev left_s);
  36 + find_father i right_s
  37 +
  38 + and find_father i0 = function
  39 + [(i,id,super,label)] -> paths.(i) <- (id,i0,label)
  40 + | (a,b,c,d) :: (i,id,super,label) :: t ->
  41 + paths.(i) <- (id,i0,label);
  42 + if not (if_cat ["conj"; "interp"] (ExtArray.get tokens i).token)
  43 + then (prerr_endline "find_father"; failwith "find_father");
  44 + correct_rec (i,id,super,label) (if a < i
  45 + then (a,b,c,d) :: t
  46 + else List.rev @@ (a,b,c,d) :: t)
  47 + | _ -> prerr_endline "find_father"; failwith "find_father" in
  48 +
  49 + Array.iteri (fun i (id,super,label) ->
  50 + if if_cat ["conj"; "interp"] (ExtArray.get tokens i).token
  51 + then (let sons = List.filter (fun (_,_,super,_) -> super = i) paths_ls in
  52 + if (List.length sons > 2)
  53 + then correct_rec (i,id,super,label) sons)) paths;
  54 + paths
  55 +
  56 +let replace_tokens paths tokens =
  57 +(* for i = 0 to ExtArray.size tokens - 1 do
  58 + print_endline (string_of_int i ^ ": "^ (ExtArray.get tokens i).orth)
  59 +done; *)
  60 + let find_token orth = Int.fold 0 (ExtArray.size tokens - 1) 0 (fun acc i ->
  61 + if (ExtArray.get tokens i).orth = orth then i else acc) in
  62 +
  63 + let multidot i id0 super0 label0 =
  64 + let id1, super1, label1 = paths.(super0) in
  65 + let id2, super2, label2 = paths.(super1) in
  66 + if (ExtArray.get tokens id1).orth = "." &&
  67 + (ExtArray.get tokens id2).orth = "."
  68 + then
  69 + (paths.(super1) <- (find_token "..." ,super2, label2);
  70 + paths.(super0) <- (0,-1,"");
  71 + paths.(i) <- (0,-1,"")) in
  72 +
  73 + let brev i id super label =
  74 + let if_the_last_dot () =
  75 + let (id_dot, s_dot, l_dot) = List.find (fun (i2,s,l) ->
  76 + s = i && ((ExtArray.get tokens i2).orth = "." || (ExtArray.get tokens i2).orth = "...")) (Array.to_list paths) in
  77 + Array.fold_left (fun acc (i2,s,l) ->
  78 + acc && (ExtArray.get tokens i2).beg <= (ExtArray.get tokens id_dot).beg) true paths in
  79 +
  80 + let dot = if if_interps [0,"npun"] (ExtArray.get tokens id).token || if_the_last_dot ()
  81 + then ""
  82 + else "." in
  83 +
  84 + let n_orth = (ExtArray.get tokens id).orth ^ dot in
  85 + paths.(i) <- (find_token n_orth,super,label) in
  86 +
  87 + Array.iteri (fun i (id,super,label) ->
  88 + if (ExtArray.get tokens id).orth = "."
  89 + then multidot i id super label;
  90 + if if_cat ["brev"] (ExtArray.get tokens id).token
  91 + then brev i id super label)
  92 + paths;
  93 + paths
  94 +
  95 +let remove_interps interp paths tokens =
5 96 let paths_ls = Array.to_list paths in
6   - Array.iter (fun (id,super,label) ->
7   - if ((ExtArray.get tokens id).orth = "," ||
8   - (ExtArray.get tokens id).orth = "." ||
9   - (ExtArray.get tokens id).orth = "-") &&
10   - not (List.exists (fun (_,super,_) -> super = id) paths_ls)
11   - then paths.(id) <- (0,-1,"")) paths; paths
12   -
13   -let move_comp paths tokens =
14   - let correct_dep (id,super,label) =
15   - let is_comp = function
16   - Lemma(_,"comp",_) -> true
17   - | _ -> false in
18   - if ((ExtArray.get tokens id).orth = "by" || (ExtArray.get tokens id).orth = "że")
19   - && is_comp (ExtArray.get tokens id).token
20   - then (let id_S, super_S, label_S = paths.(super) in
21   - paths.(id) <- (id,super_S,label);
22   - paths.(super) <- (id_S, id, label_S)) in
23   - Array.iter correct_dep paths; paths
  97 + Array.iteri (fun i (id,super,label) ->
  98 + if (ExtArray.get tokens id).orth = interp &&
  99 + not (List.exists (fun (_,super,_) -> super = i) paths_ls)
  100 + then paths.(i) <- (0,-1,"")) paths;
  101 + paths
  102 +
  103 +let swap_dep paths tokens =
  104 + let change_dep i (id,super,label) =
  105 + let id_S, super_S, label_S = paths.(super) in
  106 + paths.(i) <- (id,super_S,label);
  107 + paths.(super) <- (id_S, id, label_S) in
  108 + let rec correct_dep i (id,super,label) =
  109 + let adv_relators = ["kto";"co";"ile";"czyj";"jaki";"który";
  110 + "jak";"skąd";"dokąd";"gdzie";"którędy";"kiedy";"odkąd";"dlaczego";"czemu";"gdy"] in
  111 + if (if_cat ["comp"] (ExtArray.get tokens id).token &&
  112 + if_cat ["fin"; "praet"; "winien"; "pred"; "imps"] (ExtArray.get tokens super).token) ||
  113 + (if_cat ["conj"] (ExtArray.get tokens id).token &&
  114 + if_cat ["fin"; "praet"; "winien"; "pred"; "imps"] (ExtArray.get tokens super).token &&
  115 + not (List.exists (fun (_,super,_) -> super = i) (Array.to_list paths))) ||
  116 + (if_cat ["ppron3"] (ExtArray.get tokens id).token &&
  117 + if_interps [5,"praep"] (ExtArray.get tokens id).token) ||
  118 + (if_lemma adv_relators (ExtArray.get tokens id).token &&
  119 + if_cat ["fin"; "praet"; "winien"; "imps"; "subst"; "pred"] (ExtArray.get tokens super).token)
  120 + then
  121 + change_dep i (id,super,label);
  122 + if (if_lemma adv_relators (ExtArray.get tokens id).token &&
  123 + if_cat ["subst"; "pred"] (ExtArray.get tokens super).token)
  124 + then correct_dep i paths.(i) in
  125 + Array.iteri correct_dep paths; paths
... ...
parser/LCGlexicon.ml
... ... @@ -596,8 +596,8 @@ let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niekt
596 596 with Not_found -> l)
597 597 | fnum,frame -> failwith ("make_adjp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in
598 598  
599   -let adv_relators = Xlist.fold [
600   - "jak","Attr",[Int;Rel];
  599 +let adv_relators = Xlist.fold [ (* przy zmianie kluczy, trzeba też zmienić analogicznie zawartość *)
  600 + "jak","Attr",[Int;Rel]; (* listy adv_relators w procedurze move_comp w pliku ../diagnostics/treeChange.ml *)
601 601 "skąd","abl",[Int;Rel];
602 602 "dokąd","adl",[Int;Rel];
603 603 "gdzie","locat",[Int;Rel];
... ... @@ -1169,6 +1169,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa
1169 1169 [LCGrenderer.make_frame_simple quant t d ( batrs)]
1170 1170 | _,"xxx",[] -> [] (* FIXME *)
1171 1171 | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
  1172 + | "…","interp",[] -> [LCGrenderer.make_frame_simple [] ["multidot"] d (make_node "…" "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
1172 1173 | lemma,"brev",_ -> [LCGrenderer.make_frame_simple [] ["brev"] d (make_node lemma "brev" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *)
1173 1174 | "<conll_root>","interp",[] ->
1174 1175 let batrs = (make_node "<conll_root>" "interp" d.weight 0 []) in
... ...
parser/LCGvalence.ml
... ... @@ -276,7 +276,7 @@ let get_nodes = function
276 276 Node t ->
277 277 let attrs,b = extract_nosem [] t.attrs in
278 278 (* let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in *)
279   - let t = if t.pred = "<sentence>" || t.pred = "pro-komunikować" then {t with agf=CORE} else t in (* FIXME: przetestować na mowie niezależnej *)
  279 + let t = if t.pred = "<conll_root>" || t.pred = "<sentence>" || t.pred = "pro-komunikować" then {t with agf=CORE} else t in (* FIXME: przetestować na mowie niezależnej *)
280 280 if t.agf = NOGF then failwith ("get_nodes agf=NOGF: " ^ t.pred) else
281 281 if b then {t with amorf=mark_nosem_morf t.amorf; attrs=attrs} else t
282 282 | _ -> failwith "get_nodes"
... ...
parser/exec.ml
... ... @@ -200,8 +200,12 @@ let conll_parse_sentence timeout test_only_flag paths tokens =
200 200 let result = empty_conll_parse_result in
201 201 let time2 = time_fun () in
202 202 try
203   - let paths = TreeChange.remove_interps paths tokens in
204   - let paths = TreeChange.move_comp paths tokens in
  203 + let paths = TreeChange.replace_tokens paths tokens in
  204 + let paths = TreeChange.remove_interps "." paths tokens in
  205 + let paths = TreeChange.correct_coordination paths tokens in
  206 + let paths = TreeChange.remove_interps "," paths tokens in
  207 + let paths = TreeChange.remove_interps "-" paths tokens in
  208 + let paths = TreeChange.swap_dep paths tokens in
205 209 let dep_chart = LCGlexicon.dep_create paths tokens in
206 210 let dep_chart,references = LCGchart.dep_lazify dep_chart in
207 211 let result = if test_only_flag then result else {result with dep_chart=dep_chart} in
... ...
parser/makefile
... ... @@ -13,7 +13,7 @@ LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexO
13 13 DISAMB= disambSelPref.ml disambLemma.ml
14 14 SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml
15 15 #SEM= semGraph.ml semTypes.ml semStringOf.ml semMmlOf.ml semMrl.ml
16   -EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml ../diagnostics/compTrees.ml
  16 +EXEC= execTypes.ml visualization.ml ../diagnostics/treeChange.ml exec.ml ../diagnostics/LCGfields.ml #../diagnostics/compTrees.ml
17 17  
18 18 all:
19 19 $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml
... ...
parser/pipe.ml
... ... @@ -129,7 +129,7 @@ let process_id s =
129 129 let process_conll_corpus filename =
130 130 let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
131 131 print_endline "process_conll_corpus";
132   - (* let corpus = [List.hd corpus] in *)
  132 + let corpus = [List.hd corpus] in
133 133 let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in
134 134 Xlist.iter corpus (fun query ->
135 135 let id = process_id (get_query_id query) in
... ...
parser/visualization.ml
... ... @@ -799,12 +799,13 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) =
799 799 (* | NotTranslated -> "not_translated: \n" *)
800 800 | Parsed ->
801 801 (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *)
802   - (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *)
  802 + print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
803 803 (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
804   - sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size (*^ *)
  804 + sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^
805 805 (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *)
806   - (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *)
  806 + sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
807 807 (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
  808 + ""
808 809 | _ -> failwith "html_of_eniam_sentence"
809 810  
810 811 let html_of_conll_sentence path tokens (result : conll_parse_result) =
... ... @@ -841,12 +842,13 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) =
841 842 (* | NotTranslated -> "not_translated: \n" *)
842 843 | Parsed ->
843 844 (* print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree; *)
844   - (* print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree; *)
  845 + print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
845 846 (* LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree; *)
846   - sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size (* ^ *)
  847 + sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^
847 848 (* sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^ *)
848   - (* sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^ *)
  849 + sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
849 850 (* sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix *)
  851 + ""
850 852 | _ -> failwith "html_of_conll_sentence"
851 853  
852 854 let html_of_sem_sentence path tokens (result : semantic_processing_result) =
... ...