wosedon_test.ml
1.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
(* A mysterious bug currently prevents this from working correctly! *)
let test () =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ^
"<!DOCTYPE cesAna SYSTEM \"xcesAnaIPI.dtd\">\n" ^
"<chunkList xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n" ^
" <chunk id=\"ch1\" type=\"p\">\n" ^
" <sentence>\n" ^
" <tok>\n" ^
" <orth>Ala</orth>\n" ^
" <lex><base>al</base><ctag>subst:sg:gen:m1</ctag></lex>\n" ^
" <lex><base>al</base><ctag>subst:sg:acc:m1</ctag></lex>\n" ^
" <lex><base>ala</base><ctag>subst:sg:nom:f</ctag></lex>\n" ^
" </tok>\n" ^
" <tok>\n" ^
" <orth>ma</orth>\n" ^
" <lex><base>mieć</base><ctag>fin:sg:ter:imperf</ctag></lex>\n" ^
" <lex><base>mój</base><ctag>adj:sg:nom:f:pos</ctag></lex>\n" ^
" </tok>\n" ^
" <tok>\n" ^
" <orth>kota</orth>\n" ^
" <lex><base>kot</base><ctag>subst:sg:gen:m2</ctag></lex>\n" ^
" <lex><base>kot</base><ctag>subst:sg:acc:m2</ctag></lex>\n" ^
" <lex><base>kota</base><ctag>subst:sg:nom:f</ctag></lex>\n" ^
" </tok>\n" ^
" <ns/>\n" ^
" <tok>\n" ^
" <orth>.</orth>\n" ^
" <lex><base>.</base><ctag>interp</ctag></lex>\n" ^
" </tok>\n" ^
" </sentence>\n" ^
" </chunk>\n" ^
"</chunkList>"
let start_wosedon () =
Unix.open_process_full ("wosedon -c cfg/wosedon.ini -i")
(Unix.environment ())
let stop_wosedon (wosedon_in, wosedon_out, wosedon_err) =
ignore @@ Unix.close_process_full (wosedon_in, wosedon_out, wosedon_err)
let analyze ic oc ec s =
let rec read_loop acc =
let l = input_line ic in
if l <> "</chunkList>" then read_loop @@ l::acc
else String.concat "\n" @@ List.rev @@ l::acc in
output_string oc @@ s ^ "\n\n";
flush oc;
read_loop []
let _ =
let wosedon_in, wosedon_out, wosedon_err = start_wosedon () in
print_endline @@ analyze wosedon_in wosedon_out wosedon_err @@ test ();
stop_wosedon (wosedon_in, wosedon_out, wosedon_err)