Commit 1e9e955b5e16d923924ddd77a24eab68db47017f

Authored by Wojciech Jaworski
2 parents a731666f 08caf502

Merge branch 'corpora' of ssh://git.nlp.ipipan.waw.pl:8888/wojciech.jaworski/ENIAM into corpora

rozjechało się
corpora/CONLL.ml
... ... @@ -52,6 +52,15 @@ let rec string_of_text mode = function
52 52  
53 53 (******************)
54 54  
  55 +let establish_next rev_tokens =
  56 + let rec pom res = function
  57 + h :: t -> let next = if res = []
  58 + then h.beg+h.len
  59 + else (List.hd res).beg in
  60 + pom ({h with next = next} :: res) t
  61 + | [] -> res in
  62 + pom [] rev_tokens
  63 +
55 64 let rec establish_for_token i res text = function
56 65 h :: t -> if Xstring.check_prefix " " text
57 66 then establish_for_token (i+100) res (Xstring.cut_prefix " " text) (h :: t)
... ... @@ -66,7 +75,8 @@ let rec establish_for_token i res text = function
66 75 let rec establish_lengths text = function
67 76 RawSentence text -> failwith ("establish_lengths: " ^ text)
68 77 | StructSentence (tokens, n) -> let pbeg, plen, rev_tokens = establish_for_token 100 [] text tokens in
69   - pbeg, plen, StructSentence (List.rev rev_tokens, n)
  78 + let tokens = establish_next rev_tokens in
  79 + pbeg, plen-100, StructSentence (tokens, plen)
70 80 | ORSentence (_,_,_,_) -> failwith ("establish_lengths: ORSentence")
71 81 | AltSentence alts -> if List.exists (fun (mode, s) -> mode = CONLL) alts
72 82 then establish_lengths text (snd (List.find (fun (mode, s) -> mode = CONLL) alts))
... ... @@ -141,9 +151,10 @@ let load_token in_channel =
141 151 | [id; orth; lemma; lemma2; cat; cat2; interp; interp2; "-1"; super; "_"; label; "_"; "_"] ->
142 152 (if (cat, lemma, interp) <> (cat2, lemma2, interp2) then fail line;
143 153 n_token id orth lemma cat interp super label)
144   - | id :: orth :: lemma :: cat :: cat2 :: interp :: e ->
145   - (fail line;
146   - n_token id orth lemma cat interp "" "") (* FIXME: "" "" trzeba na coś zmienic *)
  154 + | [id; orth; lemma; cat; cat2; interp; super; label_err; "_"] ->
  155 + (if cat <> cat2 && Xstring.check_sufix "_" label_err then fail line;
  156 + let label = Xstring.cut_sufix "_" label_err in
  157 + n_token id orth lemma cat interp super label)
147 158 | _ -> failwith ("load_token: " ^ line)
148 159 (* {c_id = List.nth pom 1;
149 160 c_lemma = List.nth pom 2;
... ...
corpora/XmlPrinter.ml
... ... @@ -47,13 +47,18 @@ let rec lt_of_xml = function
47 47 | Xml.Element("ref",["id",i],[]) -> Ref(int_of_string i)
48 48 | xml -> print_endline (Xml.to_string_fmt xml); failwith "lt_of_xml"
49 49  
50   -let graph_of_xml = function
  50 +let graph_of_xml xml =
  51 + let establish_indexs graph =
  52 + let max = Xlist.fold graph 0 (fun acc (n, _) -> if n > acc then n else acc) in
  53 + let table = Array.make (max+1) Dot in
  54 + Xlist.iter graph (fun (n,x) -> table.(n) <- x); table in
  55 + match xml with
51 56 Xml.Element("graph",[],l) ->
52   - List.map (function Xml.Element("graph_node",["id",i],[xml]) -> (*int_of_string i,*) lt_of_xml xml | _ -> failwith "graph_of_xml") l
  57 + establish_indexs @@ List.map (function Xml.Element("graph_node",["id",i],[xml]) -> int_of_string i, lt_of_xml xml | _ -> failwith "graph_of_xml") l
53 58 | _ -> failwith "graph_of_xml"
54 59  
55 60 let print_xml path name xml =
56   - let graph = Array.of_list @@ graph_of_xml xml in
  61 + let graph = graph_of_xml xml in
57 62 Visualization.print_graph path name graph
58 63  
59 64 let load_and_print_xml path name filename =
... ...
dependencyParser/basic/mate-tools/README.txt
1 1 To build the project execute
  2 +mkdir dist
  3 +mkdir include
2 4 ant build
3 5 in the main directory (mate-tools).
4 6 This requires ant which can be installed by executing
... ...
parser/exec.ml
... ... @@ -203,7 +203,7 @@ let rec parse_sentence timeout test_only_flag mode next_id = function
203 203 let dep_graph,references,next_reference = LCGchart.dep_parse timeout dep_graph references next_reference time_fun in
204 204 (* FIXME: dodać dalsze przetwarzanie dep_graph *)
205 205 let xml = DepTree.conll_to_xml paths in
206   - let graph = Array.of_list (XmlPrinter.graph_of_xml xml) in (* FIXME: do poprawy *)
  206 + let graph = XmlPrinter.graph_of_xml xml in (* FIXME: do poprawy *)
207 207 Visualization.print_graph "results/" "term_conll" graph;
208 208 let result = {empty_eniam_parse_result with status=Parsed; term=graph} in
209 209 ENIAMSentence result, next_id
... ...