Commit 9f53f85cc3623461c1cbd40e331f2bd4745cb911
1 parent
56fafa1e
dodana statystyka znaków interpunkcyjnych w krzakach.conll, rozszerzenie info_se…
…ntences (do nowego pliku) info_sentences2.txt, zdebugowanie fukncji replace_tokens w tree_change.ml
Showing
9 changed files
with
79245 additions
and
28 deletions
Too many changes to show.
To preserve performance only 3 of 9 files are displayed.
corpora/CONLL.ml
... | ... | @@ -173,7 +173,7 @@ let rec establish_lengths text paths tokens = |
173 | 173 | |
174 | 174 | exception ErrorInfoFile of string |
175 | 175 | |
176 | -let info_file = "../corpora/info_sentences.txt" | |
176 | +let info_file = "../corpora/info_sentences2.txt" | |
177 | 177 | |
178 | 178 | let info = Xstring.split "\n\n" @@ File.load_file_gen info_file |
179 | 179 | |
... | ... | @@ -183,7 +183,7 @@ let add_to_map map info_str = |
183 | 183 | | _ -> raise (ErrorInfoFile info_str) |
184 | 184 | |
185 | 185 | let info_map = |
186 | - Xlist.fold info StringMap.empty add_to_map | |
186 | + Xlist.fold (List.tl info) StringMap.empty add_to_map | |
187 | 187 | |
188 | 188 | let match_sentence (p_record,tokens) = |
189 | 189 | let rec info_token s = match s with |
... | ... |
corpora/conllParser.ml
1 | 1 | open Xstd |
2 | 2 | open Types |
3 | 3 | |
4 | -let skladnica_zaleznosciowa_filename = "resources/skladnica_zaleznosciowa.conll" | |
4 | +let skladnica_zaleznosciowa_filename = "../../NLP resources/skladnica_zaleznosciowa.conll" | |
5 | 5 | |
6 | 6 | let oc = open_out "../corpora/info_sentences.txt" |
7 | 7 | |
... | ... | @@ -79,7 +79,7 @@ let any_difference string1 string2 = if string1 = string2 |
79 | 79 | let find_info tokens = |
80 | 80 | let text_generated = get_sentence tokens in |
81 | 81 | try |
82 | - let sentence = Resources.InfoMap.find (Xlist.map tokens (fun token -> token.c_orth)) Resources.conll_info in | |
82 | + let sentence = Resources.InfoMap.find (Xlist.map tokens (fun token -> token.c_orth)) (Resources.conll_info ()) in | |
83 | 83 | let id, text = sentence.s_id, sentence.s_text in |
84 | 84 | (*if any_difference text text_generated && text <> "not_found" |
85 | 85 | then print_endline (text ^ "\n" ^ text_generated ^ "\n\n");*) |
... | ... |
corpora/generate.ml
1 | -(* Generowanie pliku info_sentences.txt *) | |
2 | -let _ = | |
1 | +(* Generowanie pliku info_sentences.txt na podstawie skladnicy walencyjnej*) | |
2 | +let _ = | |
3 | 3 | (*ConllParser.processSkladnica () *) |
4 | 4 | () |
5 | + | |
6 | +(* Generowanie pliku info_sentences2.txt na podstawie krzaków *) | |
7 | +let _ = | |
8 | + (* Resources.info_file () *) | |
9 | + () | |
10 | + | |
11 | +(* Generowanie pliku ../../NLP resources/krzaki_interp_statistics.txt na podstawie krzaków *) | |
12 | +let _ = | |
13 | + InterpsInCorpus.print_diagnose () | |
... | ... |