Commit 0dfb2dfb2c7d5b0308aa37015da22c0bd77c2767
0 parents
initial commit
Showing
14 changed files
with
30572 additions
and
0 deletions
Too many changes to show.
To preserve performance only 4 of 14 files are displayed.
BeNePar/DataPreparation.ipynb
0 → 100644
1 | +++ a/BeNePar/DataPreparation.ipynb | ||
1 | +{ | ||
2 | + "cells": [ | ||
3 | + { | ||
4 | + "cell_type": "code", | ||
5 | + "execution_count": 1, | ||
6 | + "id": "5cd26f6f", | ||
7 | + "metadata": {}, | ||
8 | + "outputs": [], | ||
9 | + "source": [ | ||
10 | + "import os\n", | ||
11 | + "\n", | ||
12 | + "from datasets import load_dataset\n", | ||
13 | + "\n", | ||
14 | + "from IPython.display import display\n", | ||
15 | + "\n", | ||
16 | + "import sys\n", | ||
17 | + "sys.path.append('../')\n", | ||
18 | + "from neural_parser import hybrid_tree_utils" | ||
19 | + ] | ||
20 | + }, | ||
21 | + { | ||
22 | + "cell_type": "code", | ||
23 | + "execution_count": 2, | ||
24 | + "id": "fecef4af", | ||
25 | + "metadata": {}, | ||
26 | + "outputs": [ | ||
27 | + { | ||
28 | + "name": "stderr", | ||
29 | + "output_type": "stream", | ||
30 | + "text": [ | ||
31 | + "Found cached dataset pdb_c_beta (/home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1)\n" | ||
32 | + ] | ||
33 | + }, | ||
34 | + { | ||
35 | + "data": { | ||
36 | + "application/vnd.jupyter.widget-view+json": { | ||
37 | + "model_id": "d6fc0deda216433982f304d7451158b2", | ||
38 | + "version_major": 2, | ||
39 | + "version_minor": 0 | ||
40 | + }, | ||
41 | + "text/plain": [ | ||
42 | + " 0%| | 0/3 [00:00<?, ?it/s]" | ||
43 | + ] | ||
44 | + }, | ||
45 | + "metadata": {}, | ||
46 | + "output_type": "display_data" | ||
47 | + } | ||
48 | + ], | ||
49 | + "source": [ | ||
50 | + "pdbc_dataset = load_dataset('../pdb_c_beta/')" | ||
51 | + ] | ||
52 | + }, | ||
53 | + { | ||
54 | + "cell_type": "code", | ||
55 | + "execution_count": 3, | ||
56 | + "id": "23da801f", | ||
57 | + "metadata": {}, | ||
58 | + "outputs": [], | ||
59 | + "source": [ | ||
60 | + "BRACKETS_DIR = 'brackets'\n", | ||
61 | + "! rm -r {BRACKETS_DIR}\n", | ||
62 | + "! mkdir {BRACKETS_DIR}" | ||
63 | + ] | ||
64 | + }, | ||
65 | + { | ||
66 | + "cell_type": "code", | ||
67 | + "execution_count": 5, | ||
68 | + "id": "c105feff", | ||
69 | + "metadata": {}, | ||
70 | + "outputs": [ | ||
71 | + { | ||
72 | + "name": "stdout", | ||
73 | + "output_type": "stream", | ||
74 | + "text": [ | ||
75 | + "train\n", | ||
76 | + " brackets/pdbc-cont-train.dat\n", | ||
77 | + " 15903\n", | ||
78 | + "validation\n", | ||
79 | + " brackets/pdbc-cont-validation.dat\n", | ||
80 | + " 1980\n", | ||
81 | + "test\n", | ||
82 | + " brackets/pdbc-cont-test.dat\n", | ||
83 | + " 1990\n" | ||
84 | + ] | ||
85 | + } | ||
86 | + ], | ||
87 | + "source": [ | ||
88 | + "features = pdbc_dataset['train'].features\n", | ||
89 | + "\n", | ||
90 | + "for part, dataset in pdbc_dataset.items():\n", | ||
91 | + " print(part)\n", | ||
92 | + " b_cont = []\n", | ||
93 | + " for sentence in dataset:\n", | ||
94 | + " tree = hybrid_tree_utils.tree_from_dataset_instance(sentence, features)\n", | ||
95 | + " if tree.is_continuous():\n", | ||
96 | + " b_cont.append(f'(TOP {tree.to_brackets(morph_tags=True)})')\n", | ||
97 | + " filepath = os.path.join(BRACKETS_DIR, f'pdbc-cont-{part}.dat')\n", | ||
98 | + " with open(filepath, 'w') as f:\n", | ||
99 | + " print(' ', filepath)\n", | ||
100 | + " print(' ', len(b_cont))\n", | ||
101 | + " for row in b_cont:\n", | ||
102 | + " print(row, file=f)" | ||
103 | + ] | ||
104 | + }, | ||
105 | + { | ||
106 | + "cell_type": "code", | ||
107 | + "execution_count": 6, | ||
108 | + "id": "c849233c", | ||
109 | + "metadata": {}, | ||
110 | + "outputs": [ | ||
111 | + { | ||
112 | + "name": "stdout", | ||
113 | + "output_type": "stream", | ||
114 | + "text": [ | ||
115 | + " 1990 121784 1024525 brackets/pdbc-cont-test.dat\n", | ||
116 | + " 15903 1022627 8620535 brackets/pdbc-cont-train.dat\n", | ||
117 | + " 1980 126288 1065593 brackets/pdbc-cont-validation.dat\n", | ||
118 | + " 19873 1270699 10710653 total\n" | ||
119 | + ] | ||
120 | + } | ||
121 | + ], | ||
122 | + "source": [ | ||
123 | + "! wc {BRACKETS_DIR}/*.dat" | ||
124 | + ] | ||
125 | + }, | ||
126 | + { | ||
127 | + "cell_type": "code", | ||
128 | + "execution_count": 8, | ||
129 | + "id": "679b9f10", | ||
130 | + "metadata": {}, | ||
131 | + "outputs": [ | ||
132 | + { | ||
133 | + "name": "stdout", | ||
134 | + "output_type": "stream", | ||
135 | + "text": [ | ||
136 | + "(TOP (ROOT (*S (S (NP (AdjP (*Adj (adj:sg:nom:f:pos Skośnooka))) (*NP (*N (subst:sg:nom:f dziewczynka)))) (*VP (*V (fin:sg:ter:imperf trzyma))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*N (subst:pl:loc:f rękach)))) (NP (AdjP (*Adj (adj:pl:nom:f:pos drewniane))) (*NP (*N (subst:pl:nom:f pałeczki))))) (Punct (interp ,)) (*Conj (conj a)) (S (PrepNP (*Prep (prep:inst:nwok przed)) (NP (*N (ppron3:sg:inst:f:ter:akc:praep nią)))) (*VP (*V (fin:pl:ter:imperf znajdują))) (Part (part się)) (NP (*NP (*N (subst:pl:nom:n:ncol naczynia))) (AdjP (*Adj (adj:pl:nom:n:pos kuchenne)))))) (Punct (interp .))))\r\n", | ||
137 | + "(TOP (ROOT (*S (NP (*NP (*N (subst:sg:nom:n:col Dziecko))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (AdjP (*Adj (adj:sg:loc:f:pos różowej))) (*NP (*N (subst:sg:loc:f opasce)))))) (*VP (*V (fin:sg:ter:imperf unosi))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*N (subst:pl:loc:f rękach)))) (NP (AdjP (*Adj (adj:pl:acc:m3:pos drewniane))) (*NP (*N (subst:pl:acc:m3 patyczki)))) (PrepNP (*Prep (prep:inst:nwok nad)) (NP (AdjP (*AdjP (*Adj (ppas:pl:inst:n:perf:aff postawionymi))) (NP (PrepNP (*Prep (prep:gen do)) (NP (*N (subst:sg:gen:f góry)))) (*NP (*N (subst:sg:inst:n:ncol dnem))))) (*NP (NP (*N (subst:sg:inst:f miską))) (*Conj (conj i)) (NP (*N (subst:sg:inst:m3 garnkiem))))))) (Punct (interp .))))\r\n", | ||
138 | + "(TOP (ROOT (*S (NP (*NP (*N (subst:pl:nom:m1 Zawodnicy))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*NP (*N (subst:sg:loc:n:ncol pobliżu))) (NP (*N (subst:sg:gen:f piłki)))))) (*VP (*V (fin:pl:ter:imperf przepychają))) (Part (part się)) (PrepNP (*Prep (prep:inst między)) (NP (*N (siebie:inst sobą)))) (PrepNP (*Prep (prep:loc na)) (NP (*N (subst:sg:loc:n:ncol boisku))))) (Punct (interp .))))\r\n", | ||
139 | + "(TOP (ROOT (*S (S (NP (*NP (*N (subst:sg:nom:f Dziewczynka))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*N (subst:sg:loc:f sukience))))) (*VP (*V (fin:sg:ter:imperf puszcza))) (NP (*NP (*N (subst:pl:acc:f bańki))) (AdjP (*Adj (adj:pl:acc:f:pos mydlane)))) (PrepNP (*Prep (prep:loc na)) (NP (*N (subst:sg:loc:f trawie))))) (Punct (interp ,)) (*Conj (conj a)) (S (PrepNP (*Prep (prep:inst za)) (NP (*N (ppron3:sg:inst:f:ter:akc:praep nią)))) (*VP (*V (fin:sg:ter:imperf stoi))) (NP (AdjP (*Adj (adj:sg:nom:f:pos druga))) (*NP (*N (subst:sg:nom:f dziewczynka)))))) (Punct (interp .))))\r\n", | ||
140 | + "(TOP (ROOT (*S (NP (*NP (*N (subst:pl:nom:f Dziewczynki))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (AdjP (*Adj (adj:pl:loc:f:pos kolorowych))) (*NP (*N (subst:pl:loc:f sukienkach)))))) (*VP (*V (fin:pl:ter:imperf stoją))) (PrepNP (*Prep (prep:loc na)) (NP (*N (subst:sg:loc:f trawie)))) (VP (Punct (interp ,)) (*VP (*V (pcon:imperf puszczając))) (NP (*NP (*N (subst:pl:acc:f bańki))) (AdjP (*Adj (adj:pl:acc:f:pos mydlane)))))) (Punct (interp .))))\r\n", | ||
141 | + "(TOP (ROOT (*S (NP (*NP (*N (subst:sg:nom:f Grupa))) (NP (*N (subst:pl:gen:n:col dzieci)))) (*VP (*V (fin:sg:ter:imperf moczy))) (Part (part się)) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*NP (*N (subst:sg:loc:f wodzie))) (PrepNP (*Prep (prep:gen:nwok z)) (NP (*N (subst:sg:gen:f fontanny))))))) (Punct (interp .))))\r\n", | ||
142 | + "(TOP (ROOT (*S (NP (*NumP (*Num (num:pl:nom:m1:rec:ncol Kilku))) (NP (*N (subst:pl:gen:m1 chłopców)))) (*VP (*V (fin:sg:ter:imperf kąpie))) (Part (part się)) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*NP (*N (subst:sg:loc:f fontannie))) (PrepNP (*Prep (prep:gen obok)) (NP (*NP (*N (subst:pl:gen:m3 stolików))) (CP (Punct (interp ,)) (*S (PrepAdjP (*Prep (prep:loc przy)) (AdjP (*Adj (adj:pl:loc:m3:pos których)))) (*VP (*V (fin:pl:ter:imperf siedzą))) (NP (*N (subst:pl:nom:m1 ludzie)))))))))) (Punct (interp .))))\r\n", | ||
143 | + "(TOP (ROOT (*S (NP (*NP (*N (subst:sg:nom:f Dwójka))) (NP (AdjP (*Adj (adj:pl:gen:n:pos nagich))) (*NP (*N (subst:pl:gen:n:col dzieci))) (AdjP (*AdjP (*Adj (ppas:pl:gen:n:perf:aff ubrudzonych))) (NP (*N (subst:pl:inst:f farbkami)))))) (*VP (*V (fin:sg:ter:imperf siedzi))) (PrepNP (*Prep (prep:loc na)) (NP (*N (subst:sg:loc:f podłodze)))) (PrepNP (*Prep (prep:gen obok)) (NP (AdjP (*Adj (adj:pl:gen:f:pos porozrzucanych))) (*NP (*N (subst:pl:gen:f kartek)))))) (Punct (interp .))))\r\n", | ||
144 | + "(TOP (ROOT (*S (S (NP (*NumP (*Num (num:pl:nom:n:rec:col Dwoje))) (NP (AdjP (AdjP (*Adj (adj:pl:gen:n:pos nagich))) (*Conj (interp ,)) (AdjP (*Adj (adj:pl:gen:n:pos małych)))) (*NP (*N (subst:pl:gen:n:col dzieci))))) (*VP (*V (fin:sg:ter:imperf siedzi))) (PrepNP (*Prep (prep:gen naprzeciwko)) (NP (*N (siebie:gen siebie))))) (*Conj (conj i)) (S (NP (AdjP (*Adj (adj:sg:nom:n:com większe))) (*NP (*N (subst:sg:nom:n:col dziecko)))) (*VP (*V (fin:sg:ter:imperf smaruje))) (NP (*N (subst:sg:inst:f farbą))) (NP (AdjP (*Adj (adj:sg:acc:n:com mniejsze))) (*NP (*N (subst:sg:acc:n:col dziecko)))))) (Punct (interp .))))\r\n", | ||
145 | + "(TOP (ROOT (*S (NP (*NP (*N (subst:sg:nom:f Dziewczynka))) (PrepNP (*Prep (prep:loc o)) (NP (AdjP (*Adj (adj:pl:loc:n:pos ciemnych))) (*NP (*N (subst:pl:loc:n:col oczach)))))) (*VP (*V (fin:sg:ter:imperf patrzy))) (PrepNP (*Prep (prep:acc na)) (NP (AdjP (*Adj (adj:sg:acc:m3:pos czarny))) (*NP (*N (subst:sg:acc:m3 przedmiot))) (CP (Punct (interp ,)) (*S (AdjP (*Adj (adj:sg:acc:m3:pos który))) (*VP (*V (fin:sg:ter:imperf trzyma))) (PrepNP (*Prep (prep:loc:nwok w)) (NP (*N (subst:sg:loc:f ręce))))))))) (Punct (interp .))))\r\n" | ||
146 | + ] | ||
147 | + } | ||
148 | + ], | ||
149 | + "source": [ | ||
150 | + "! head {BRACKETS_DIR}/pdbc-cont-train.dat" | ||
151 | + ] | ||
152 | + } | ||
153 | + ], | ||
154 | + "metadata": { | ||
155 | + "kernelspec": { | ||
156 | + "display_name": "TF_zajecia", | ||
157 | + "language": "python", | ||
158 | + "name": "tf_zajecia" | ||
159 | + }, | ||
160 | + "language_info": { | ||
161 | + "codemirror_mode": { | ||
162 | + "name": "ipython", | ||
163 | + "version": 3 | ||
164 | + }, | ||
165 | + "file_extension": ".py", | ||
166 | + "mimetype": "text/x-python", | ||
167 | + "name": "python", | ||
168 | + "nbconvert_exporter": "python", | ||
169 | + "pygments_lexer": "ipython3", | ||
170 | + "version": "3.10.6" | ||
171 | + } | ||
172 | + }, | ||
173 | + "nbformat": 4, | ||
174 | + "nbformat_minor": 5 | ||
175 | +} |
BeNePar/TrainAndParse.ipynb
0 → 100644
1 | +++ a/BeNePar/TrainAndParse.ipynb | ||
1 | +{ | ||
2 | + "cells": [ | ||
3 | + { | ||
4 | + "cell_type": "code", | ||
5 | + "execution_count": 5, | ||
6 | + "id": "d8404675", | ||
7 | + "metadata": {}, | ||
8 | + "outputs": [], | ||
9 | + "source": [ | ||
10 | + "#BENEPAR = '/home/kkrasnowska/benepar_experiments/self-attentive-parser/src/main.py'" | ||
11 | + ] | ||
12 | + }, | ||
13 | + { | ||
14 | + "cell_type": "code", | ||
15 | + "execution_count": 6, | ||
16 | + "id": "88603098", | ||
17 | + "metadata": {}, | ||
18 | + "outputs": [], | ||
19 | + "source": [ | ||
20 | + "#! mkdir models" | ||
21 | + ] | ||
22 | + }, | ||
23 | + { | ||
24 | + "cell_type": "code", | ||
25 | + "execution_count": 7, | ||
26 | + "id": "d5aedb53", | ||
27 | + "metadata": {}, | ||
28 | + "outputs": [], | ||
29 | + "source": [ | ||
30 | + "#! python {BENEPAR} train \\\n", | ||
31 | + "# --train-path brackets/pdbc-cont-train.dat \\\n", | ||
32 | + "# --dev-path brackets/pdbc-cont-validation.dat \\\n", | ||
33 | + "# --evalb-dir /home/kkrasnowska/benepar_experiments/self-attentive-parser/EVALB_SPMRL \\\n", | ||
34 | + "# --use-pretrained --pretrained-model \"allegro/herbert-large-cased\" \\\n", | ||
35 | + "# --use-encoder --num-layers 2 \\\n", | ||
36 | + "# --predict-tags \\\n", | ||
37 | + "# --model-path-base models" | ||
38 | + ] | ||
39 | + }, | ||
40 | + { | ||
41 | + "cell_type": "code", | ||
42 | + "execution_count": 8, | ||
43 | + "id": "3f6aaf27", | ||
44 | + "metadata": {}, | ||
45 | + "outputs": [], | ||
46 | + "source": [ | ||
47 | + "from IPython.display import display, HTML" | ||
48 | + ] | ||
49 | + }, | ||
50 | + { | ||
51 | + "cell_type": "code", | ||
52 | + "execution_count": 9, | ||
53 | + "id": "8d9d5103", | ||
54 | + "metadata": {}, | ||
55 | + "outputs": [], | ||
56 | + "source": [ | ||
57 | + "import benepar\n", | ||
58 | + "import nltk\n", | ||
59 | + "import spacy" | ||
60 | + ] | ||
61 | + }, | ||
62 | + { | ||
63 | + "cell_type": "code", | ||
64 | + "execution_count": 10, | ||
65 | + "id": "c56eda57", | ||
66 | + "metadata": {}, | ||
67 | + "outputs": [ | ||
68 | + { | ||
69 | + "name": "stderr", | ||
70 | + "output_type": "stream", | ||
71 | + "text": [ | ||
72 | + "Some weights of the model checkpoint at allegro/herbert-large-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.sso.sso_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.sso.sso_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias']\n", | ||
73 | + "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", | ||
74 | + "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" | ||
75 | + ] | ||
76 | + } | ||
77 | + ], | ||
78 | + "source": [ | ||
79 | + "MODEL = 'models_dev=97.36.pt'\n", | ||
80 | + "parser = benepar.Parser(MODEL)" | ||
81 | + ] | ||
82 | + }, | ||
83 | + { | ||
84 | + "cell_type": "code", | ||
85 | + "execution_count": null, | ||
86 | + "id": "35ffd9af", | ||
87 | + "metadata": {}, | ||
88 | + "outputs": [], | ||
89 | + "source": [] | ||
90 | + }, | ||
91 | + { | ||
92 | + "cell_type": "code", | ||
93 | + "execution_count": 11, | ||
94 | + "id": "06ae821c", | ||
95 | + "metadata": {}, | ||
96 | + "outputs": [], | ||
97 | + "source": [ | ||
98 | + "def postprocess(tree):\n", | ||
99 | + " for node in tree.subtrees():\n", | ||
100 | + " l = node.label()\n", | ||
101 | + " node.set_label(l.replace('LPAR', '(').replace('RPAR', ')'))\n", | ||
102 | + " for i, child in enumerate(node):\n", | ||
103 | + " if type(child) == str:\n", | ||
104 | + " node[i] = child.replace('-LSB-', '[').replace('-RSB-', ']')\n", | ||
105 | + " return tree\n", | ||
106 | + "\n", | ||
107 | + "def parse_tokenized_sentences(sentences, parser):\n", | ||
108 | + " [benepar.InputSentence(words=tokens) for tokens in sentences]\n", | ||
109 | + " return list(map(postprocess, parser.parse_sents(\n", | ||
110 | + " [benepar.InputSentence(words=tokens) for tokens in sentences]\n", | ||
111 | + " )))\n", | ||
112 | + "\n", | ||
113 | + "def parse_sentence(sentence, parser):\n", | ||
114 | + " return parse_tokenized_sentences([sentence.split()], parser)[0]" | ||
115 | + ] | ||
116 | + }, | ||
117 | + { | ||
118 | + "cell_type": "code", | ||
119 | + "execution_count": 12, | ||
120 | + "id": "c96dc9d9", | ||
121 | + "metadata": {}, | ||
122 | + "outputs": [ | ||
123 | + { | ||
124 | + "name": "stderr", | ||
125 | + "output_type": "stream", | ||
126 | + "text": [ | ||
127 | + "You're using a HerbertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n", | ||
128 | + "/home/kkrasnowska/venvs/torch_benepar/lib/python3.10/site-packages/torch/distributions/distribution.py:44: UserWarning: <class 'torch_struct.distributions.TreeCRF'> does not define `arg_constraints`. Please set `arg_constraints = {}` or initialize the distribution with `validate_args=False` to turn off validation.\n", | ||
129 | + " warnings.warn(f'{self.__class__} does not define `arg_constraints`. ' +\n" | ||
130 | + ] | ||
131 | + }, | ||
132 | + { | ||
133 | + "data": { | ||
134 | + "image/svg+xml": [ | ||
135 | + "<svg baseProfile=\"full\" height=\"312px\" preserveAspectRatio=\"xMidYMid meet\" style=\"font-family: times, serif; font-weight:normal; font-style: normal; font-size: 16px;\" version=\"1.1\" viewBox=\"0,0,512.0,312.0\" width=\"512px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">TOP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ROOT</text></svg><svg width=\"12.5%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">[</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"6.25%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"75%\" x=\"12.5%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*S</text></svg><svg width=\"35.4167%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:pl:nom:m2</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Koty</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"17.7083%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"12.5%\" x=\"35.4167%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">pred</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">to</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"41.6667%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"35.4167%\" x=\"47.9167%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:pl:nom:m1</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">złodzieje</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"65.625%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"16.6667%\" x=\"83.3333%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">.</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"91.6667%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"12.5%\" x=\"87.5%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">]</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"93.75%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg>" | ||
136 | + ], | ||
137 | + "text/plain": [ | ||
138 | + "Tree('TOP', [Tree('ROOT', [Tree('Punct', [Tree('interp', ['['])]), Tree('*S', [Tree('NP', [Tree('*N', [Tree('subst:pl:nom:m2', ['Koty'])])]), Tree('*VP', [Tree('*V', [Tree('pred', ['to'])])]), Tree('NP', [Tree('*N', [Tree('subst:pl:nom:m1', ['złodzieje'])])]), Tree('Punct', [Tree('interp', ['.'])])]), Tree('Punct', [Tree('interp', [']'])])])])" | ||
139 | + ] | ||
140 | + }, | ||
141 | + "execution_count": 12, | ||
142 | + "metadata": {}, | ||
143 | + "output_type": "execute_result" | ||
144 | + } | ||
145 | + ], | ||
146 | + "source": [ | ||
147 | + "parse_sentence('[ Koty to złodzieje . ]', parser)" | ||
148 | + ] | ||
149 | + }, | ||
150 | + { | ||
151 | + "cell_type": "code", | ||
152 | + "execution_count": 13, | ||
153 | + "id": "d62d1e31", | ||
154 | + "metadata": {}, | ||
155 | + "outputs": [ | ||
156 | + { | ||
157 | + "data": { | ||
158 | + "image/svg+xml": [ | ||
159 | + "<svg baseProfile=\"full\" height=\"312px\" preserveAspectRatio=\"xMidYMid meet\" style=\"font-family: times, serif; font-weight:normal; font-style: normal; font-size: 16px;\" version=\"1.1\" viewBox=\"0,0,560.0,312.0\" width=\"560px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">TOP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ROOT</text></svg><svg width=\"88.5714%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*S</text></svg><svg width=\"72.5806%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"44.4444%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">praet:sg:m1:imperf</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Widział</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"22.2222%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"55.5556%\" x=\"44.4444%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">aglt:sg:pri:imperf:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">am</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"72.2222%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"36.2903%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"27.4194%\" x=\"72.5806%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:acc:m2</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">kotka</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"86.2903%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"44.2857%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"11.4286%\" x=\"88.5714%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">.</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"94.2857%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg>" | ||
160 | + ], | ||
161 | + "text/plain": [ | ||
162 | + "Tree('TOP', [Tree('ROOT', [Tree('*S', [Tree('*VP', [Tree('*V', [Tree('praet:sg:m1:imperf', ['Widział']), Tree('aglt:sg:pri:imperf:nwok', ['am'])])]), Tree('NP', [Tree('*N', [Tree('subst:sg:acc:m2', ['kotka'])])])]), Tree('Punct', [Tree('interp', ['.'])])])])" | ||
163 | + ] | ||
164 | + }, | ||
165 | + "execution_count": 13, | ||
166 | + "metadata": {}, | ||
167 | + "output_type": "execute_result" | ||
168 | + } | ||
169 | + ], | ||
170 | + "source": [ | ||
171 | + "parse_sentence('Widział am kotka .', parser)" | ||
172 | + ] | ||
173 | + }, | ||
174 | + { | ||
175 | + "cell_type": "code", | ||
176 | + "execution_count": 14, | ||
177 | + "id": "418db531", | ||
178 | + "metadata": {}, | ||
179 | + "outputs": [], | ||
180 | + "source": [ | ||
181 | + "with open('brackets/pdbc-cont-validation.dat') as f:\n", | ||
182 | + " val_trees = [postprocess(nltk.Tree.fromstring(l.strip('\\n'))) for l in f.readlines()]\n", | ||
183 | + "with open('brackets/pdbc-cont-test.dat') as f:\n", | ||
184 | + " test_trees = [postprocess(nltk.Tree.fromstring(l.strip('\\n'))) for l in f.readlines()]" | ||
185 | + ] | ||
186 | + }, | ||
187 | + { | ||
188 | + "cell_type": "code", | ||
189 | + "execution_count": 15, | ||
190 | + "id": "2e5f4739", | ||
191 | + "metadata": {}, | ||
192 | + "outputs": [], | ||
193 | + "source": [ | ||
194 | + "val_sentences = [tree.leaves() for tree in val_trees]\n", | ||
195 | + "test_sentences = [tree.leaves() for tree in test_trees]" | ||
196 | + ] | ||
197 | + }, | ||
198 | + { | ||
199 | + "cell_type": "code", | ||
200 | + "execution_count": 16, | ||
201 | + "id": "6c52ef3f", | ||
202 | + "metadata": {}, | ||
203 | + "outputs": [ | ||
204 | + { | ||
205 | + "name": "stderr", | ||
206 | + "output_type": "stream", | ||
207 | + "text": [ | ||
208 | + "/home/kkrasnowska/venvs/torch_benepar/lib/python3.10/site-packages/torch/distributions/distribution.py:44: UserWarning: <class 'torch_struct.distributions.TreeCRF'> does not define `arg_constraints`. Please set `arg_constraints = {}` or initialize the distribution with `validate_args=False` to turn off validation.\n", | ||
209 | + " warnings.warn(f'{self.__class__} does not define `arg_constraints`. ' +\n" | ||
210 | + ] | ||
211 | + } | ||
212 | + ], | ||
213 | + "source": [ | ||
214 | + "val_pred_trees = parse_tokenized_sentences(val_sentences, parser)\n", | ||
215 | + "test_pred_trees = parse_tokenized_sentences(test_sentences, parser)" | ||
216 | + ] | ||
217 | + }, | ||
218 | + { | ||
219 | + "cell_type": "code", | ||
220 | + "execution_count": 17, | ||
221 | + "id": "d6d45ba8", | ||
222 | + "metadata": {}, | ||
223 | + "outputs": [], | ||
224 | + "source": [ | ||
225 | + "assert(len(val_trees) == len(val_pred_trees))\n", | ||
226 | + "assert(len(test_trees) == len(test_pred_trees))" | ||
227 | + ] | ||
228 | + }, | ||
229 | + { | ||
230 | + "cell_type": "code", | ||
231 | + "execution_count": 18, | ||
232 | + "id": "399c3f08", | ||
233 | + "metadata": {}, | ||
234 | + "outputs": [], | ||
235 | + "source": [ | ||
236 | + "# drop the TOP\n", | ||
237 | + "val_trees = [t[0] for t in val_trees]\n", | ||
238 | + "test_trees = [t[0] for t in test_trees]\n", | ||
239 | + "val_pred_trees = [t[0] for t in val_pred_trees]\n", | ||
240 | + "test_pred_trees = [t[0] for t in test_pred_trees]" | ||
241 | + ] | ||
242 | + }, | ||
243 | + { | ||
244 | + "cell_type": "code", | ||
245 | + "execution_count": 19, | ||
246 | + "id": "827be810", | ||
247 | + "metadata": {}, | ||
248 | + "outputs": [ | ||
249 | + { | ||
250 | + "data": { | ||
251 | + "image/svg+xml": [ | ||
252 | + "<svg baseProfile=\"full\" height=\"504px\" preserveAspectRatio=\"xMidYMid meet\" style=\"font-family: times, serif; font-weight:normal; font-style: normal; font-size: 16px;\" version=\"1.1\" viewBox=\"0,0,2808.0,504.0\" width=\"2808px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ROOT</text></svg><svg width=\"97.7208%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*S</text></svg><svg width=\"41.691%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">S</text></svg><svg width=\"22.3776%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"46.875%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:loc:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">W</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"23.4375%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"53.125%\" x=\"46.875%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:loc:m3</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">samolocie</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"73.4375%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"11.1888%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"13.986%\" x=\"22.3776%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">praet:sg:m1:imperf</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">czytał</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"29.3706%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"63.6364%\" x=\"36.3636%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"18.6813%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:pl:acc:m3</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">wycinki</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"9.34066%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"81.3187%\" x=\"18.6813%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"20.2703%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:gen:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">z</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"10.1351%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"79.7297%\" x=\"20.2703%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"27.1186%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:gen:f</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prasy</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"13.5593%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"72.8814%\" x=\"27.1186%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"41.8605%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adj:sg:gen:f:pos</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">polskiej</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"20.9302%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"16.2791%\" x=\"41.8605%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Conj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">conj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">i</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"41.8605%\" x=\"58.1395%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adj:sg:gen:f:pos</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">polonijnej</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"79.0698%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"63.5593%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"60.1351%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"59.3407%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"68.1818%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"20.8455%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"2.33236%\" x=\"41.691%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Conj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">-</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"42.8571%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"55.9767%\" x=\"44.0233%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">S</text></svg><svg width=\"17.7083%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepAdjP</text></svg><svg width=\"44.1176%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:loc:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">w</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"22.0588%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"55.8824%\" x=\"44.1176%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adj:sg:loc:m3:pos</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">każdym</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"72.0588%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"8.85417%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"6.77083%\" x=\"17.7083%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">imps:imperf</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">piętnowano</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"21.0938%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"17.7083%\" x=\"24.4792%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ppron3:sg:acc:m1:ter:nakc:npraep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">go</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"33.3333%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"57.8125%\" x=\"42.1875%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Compar</text></svg><svg width=\"6.30631%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Comp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">comp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">jako</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"3.15315%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"93.6937%\" x=\"6.30631%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"7.69231%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">\"</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"3.84615%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"30.7692%\" x=\"7.69231%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"46.875%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:gen:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">bez</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"23.4375%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"53.125%\" x=\"46.875%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:gen:m3</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">umiaru</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"73.4375%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"23.0769%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"24.0385%\" x=\"38.4615%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ppas:sg:acc:m1:perf:aff</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">zapatrzonego</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50.4808%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"29.8077%\" x=\"62.5%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"48.3871%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:acc:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">w</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"24.1935%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"51.6129%\" x=\"48.3871%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:acc:f</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Moskwę</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"74.1935%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"77.4038%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"7.69231%\" x=\"92.3077%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">\"</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"96.1538%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"53.1532%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"71.0938%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"72.0117%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"48.8604%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"2.2792%\" x=\"97.7208%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">.</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"98.8604%\" y1=\"1.2em\" y2=\"3em\" /></svg>" | ||
253 | + ], | ||
254 | + "text/plain": [ | ||
255 | + "Tree('ROOT', [Tree('*S', [Tree('S', [Tree('PrepNP', [Tree('*Prep', [Tree('prep:loc:nwok', ['W'])]), Tree('NP', [Tree('*N', [Tree('subst:sg:loc:m3', ['samolocie'])])])]), Tree('*VP', [Tree('*V', [Tree('praet:sg:m1:imperf', ['czytał'])])]), Tree('NP', [Tree('*NP', [Tree('*N', [Tree('subst:pl:acc:m3', ['wycinki'])])]), Tree('PrepNP', [Tree('*Prep', [Tree('prep:gen:nwok', ['z'])]), Tree('NP', [Tree('*NP', [Tree('*N', [Tree('subst:sg:gen:f', ['prasy'])])]), Tree('AdjP', [Tree('AdjP', [Tree('*Adj', [Tree('adj:sg:gen:f:pos', ['polskiej'])])]), Tree('*Conj', [Tree('conj', ['i'])]), Tree('AdjP', [Tree('*Adj', [Tree('adj:sg:gen:f:pos', ['polonijnej'])])])])])])])]), Tree('*Conj', [Tree('interp', ['-'])]), Tree('S', [Tree('PrepAdjP', [Tree('*Prep', [Tree('prep:loc:nwok', ['w'])]), Tree('AdjP', [Tree('*Adj', [Tree('adj:sg:loc:m3:pos', ['każdym'])])])]), Tree('*VP', [Tree('*V', [Tree('imps:imperf', ['piętnowano'])])]), Tree('NP', [Tree('*N', [Tree('ppron3:sg:acc:m1:ter:nakc:npraep', ['go'])])]), Tree('Compar', [Tree('*Comp', [Tree('comp', ['jako'])]), Tree('AdjP', [Tree('Punct', [Tree('interp', ['\"'])]), Tree('PrepNP', [Tree('*Prep', [Tree('prep:gen:nwok', ['bez'])]), Tree('NP', [Tree('*N', [Tree('subst:sg:gen:m3', ['umiaru'])])])]), Tree('*AdjP', [Tree('*Adj', [Tree('ppas:sg:acc:m1:perf:aff', ['zapatrzonego'])])]), Tree('PrepNP', [Tree('*Prep', [Tree('prep:acc:nwok', ['w'])]), Tree('NP', [Tree('*N', [Tree('subst:sg:acc:f', ['Moskwę'])])])]), Tree('Punct', [Tree('interp', ['\"'])])])])])]), Tree('Punct', [Tree('interp', ['.'])])])" | ||
256 | + ] | ||
257 | + }, | ||
258 | + "execution_count": 19, | ||
259 | + "metadata": {}, | ||
260 | + "output_type": "execute_result" | ||
261 | + } | ||
262 | + ], | ||
263 | + "source": [ | ||
264 | + "val_trees[504]" | ||
265 | + ] | ||
266 | + }, | ||
267 | + { | ||
268 | + "cell_type": "code", | ||
269 | + "execution_count": 20, | ||
270 | + "id": "1059e782", | ||
271 | + "metadata": {}, | ||
272 | + "outputs": [ | ||
273 | + { | ||
274 | + "data": { | ||
275 | + "image/svg+xml": [ | ||
276 | + "<svg baseProfile=\"full\" height=\"504px\" preserveAspectRatio=\"xMidYMid meet\" style=\"font-family: times, serif; font-weight:normal; font-style: normal; font-size: 16px;\" version=\"1.1\" viewBox=\"0,0,2808.0,504.0\" width=\"2808px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ROOT</text></svg><svg width=\"97.7208%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*S</text></svg><svg width=\"41.691%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">S</text></svg><svg width=\"22.3776%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"46.875%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:loc:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">W</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"23.4375%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"53.125%\" x=\"46.875%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:loc:m3</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">samolocie</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"73.4375%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"11.1888%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"13.986%\" x=\"22.3776%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">praet:sg:m1:imperf</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">czytał</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"29.3706%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"63.6364%\" x=\"36.3636%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"18.6813%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:pl:acc:m3</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">wycinki</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"9.34066%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"81.3187%\" x=\"18.6813%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"20.2703%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:gen:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">z</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"10.1351%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"79.7297%\" x=\"20.2703%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"27.1186%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:gen:f</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prasy</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"13.5593%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"72.8814%\" x=\"27.1186%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"41.8605%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adj:sg:gen:f:pos</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">polskiej</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"20.9302%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"16.2791%\" x=\"41.8605%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Conj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">conj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">i</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"41.8605%\" x=\"58.1395%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adj:sg:gen:f:pos</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">polonijnej</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"79.0698%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"63.5593%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"60.1351%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"59.3407%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"68.1818%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"20.8455%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"2.33236%\" x=\"41.691%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Conj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">-</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"42.8571%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"55.9767%\" x=\"44.0233%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">S</text></svg><svg width=\"17.7083%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepAdjP</text></svg><svg width=\"44.1176%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:loc:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">w</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"22.0588%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"55.8824%\" x=\"44.1176%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adj:sg:loc:m3:pos</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">każdym</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"72.0588%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"8.85417%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"6.77083%\" x=\"17.7083%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">imps:imperf</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">piętnowano</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"21.0938%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"17.7083%\" x=\"24.4792%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ppron3:sg:acc:m1:ter:nakc:npraep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">go</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"33.3333%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"57.8125%\" x=\"42.1875%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Compar</text></svg><svg width=\"6.30631%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Comp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">comp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">jako</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"3.15315%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"93.6937%\" x=\"6.30631%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">AdjP</text></svg><svg width=\"7.69231%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">\"</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"3.84615%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"30.7692%\" x=\"7.69231%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"46.875%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:gen:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">bez</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"23.4375%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"53.125%\" x=\"46.875%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:gen:m3</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">umiaru</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"73.4375%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"23.0769%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"24.0385%\" x=\"38.4615%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*AdjP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Adj</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ppas:sg:acc:m1:perf:aff</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">zapatrzonego</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50.4808%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"29.8077%\" x=\"62.5%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"48.3871%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">prep:acc:nwok</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">w</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"24.1935%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"51.6129%\" x=\"48.3871%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">subst:sg:acc:f</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Moskwę</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"74.1935%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"77.4038%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"7.69231%\" x=\"92.3077%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">\"</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"96.1538%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"53.1532%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"71.0938%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"72.0117%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"48.8604%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"2.2792%\" x=\"97.7208%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">interp</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">.</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"98.8604%\" y1=\"1.2em\" y2=\"3em\" /></svg>" | ||
277 | + ], | ||
278 | + "text/plain": [ | ||
279 | + "Tree('ROOT', [Tree('*S', [Tree('S', [Tree('PrepNP', [Tree('*Prep', [Tree('prep:loc:nwok', ['W'])]), Tree('NP', [Tree('*N', [Tree('subst:sg:loc:m3', ['samolocie'])])])]), Tree('*VP', [Tree('*V', [Tree('praet:sg:m1:imperf', ['czytał'])])]), Tree('NP', [Tree('*NP', [Tree('*N', [Tree('subst:pl:acc:m3', ['wycinki'])])]), Tree('PrepNP', [Tree('*Prep', [Tree('prep:gen:nwok', ['z'])]), Tree('NP', [Tree('*NP', [Tree('*N', [Tree('subst:sg:gen:f', ['prasy'])])]), Tree('AdjP', [Tree('AdjP', [Tree('*Adj', [Tree('adj:sg:gen:f:pos', ['polskiej'])])]), Tree('*Conj', [Tree('conj', ['i'])]), Tree('AdjP', [Tree('*Adj', [Tree('adj:sg:gen:f:pos', ['polonijnej'])])])])])])])]), Tree('*Conj', [Tree('interp', ['-'])]), Tree('S', [Tree('PrepAdjP', [Tree('*Prep', [Tree('prep:loc:nwok', ['w'])]), Tree('AdjP', [Tree('*Adj', [Tree('adj:sg:loc:m3:pos', ['każdym'])])])]), Tree('*VP', [Tree('*V', [Tree('imps:imperf', ['piętnowano'])])]), Tree('NP', [Tree('*N', [Tree('ppron3:sg:acc:m1:ter:nakc:npraep', ['go'])])]), Tree('Compar', [Tree('*Comp', [Tree('comp', ['jako'])]), Tree('AdjP', [Tree('Punct', [Tree('interp', ['\"'])]), Tree('PrepNP', [Tree('*Prep', [Tree('prep:gen:nwok', ['bez'])]), Tree('NP', [Tree('*N', [Tree('subst:sg:gen:m3', ['umiaru'])])])]), Tree('*AdjP', [Tree('*Adj', [Tree('ppas:sg:acc:m1:perf:aff', ['zapatrzonego'])])]), Tree('PrepNP', [Tree('*Prep', [Tree('prep:acc:nwok', ['w'])]), Tree('NP', [Tree('*N', [Tree('subst:sg:acc:f', ['Moskwę'])])])]), Tree('Punct', [Tree('interp', ['\"'])])])])])]), Tree('Punct', [Tree('interp', ['.'])])])" | ||
280 | + ] | ||
281 | + }, | ||
282 | + "execution_count": 20, | ||
283 | + "metadata": {}, | ||
284 | + "output_type": "execute_result" | ||
285 | + } | ||
286 | + ], | ||
287 | + "source": [ | ||
288 | + "val_pred_trees[504]" | ||
289 | + ] | ||
290 | + }, | ||
291 | + { | ||
292 | + "cell_type": "code", | ||
293 | + "execution_count": 25, | ||
294 | + "id": "4d6c7096", | ||
295 | + "metadata": {}, | ||
296 | + "outputs": [], | ||
297 | + "source": [ | ||
298 | + "'''\n", | ||
299 | + "def undummy(_tree):\n", | ||
300 | + " tree = _tree.copy(deep=True)\n", | ||
301 | + " for node in tree.subtrees():\n", | ||
302 | + " for i, child in enumerate(node):\n", | ||
303 | + " if type(child) != str and child.label() == 'DUMMY_PRE':\n", | ||
304 | + " node[i] = child[0]\n", | ||
305 | + " return tree\n", | ||
306 | + "\n", | ||
307 | + "'''\n", | ||
308 | + "def untag(_tree):\n", | ||
309 | + " tree = _tree.copy(deep=True)\n", | ||
310 | + " for node in tree.subtrees():\n", | ||
311 | + " for i, child in enumerate(node):\n", | ||
312 | + " if type(child) != str and set(map(type, child)) == {str}:\n", | ||
313 | + " assert(len(child) == 1)\n", | ||
314 | + " node[i] = child[0]\n", | ||
315 | + " #if set(map(type, node)) == {str}:\n", | ||
316 | + " # assert(len(node) == 1)\n", | ||
317 | + " # node.set_label('xxx')\n", | ||
318 | + " return tree\n", | ||
319 | + "\n", | ||
320 | + "'''\n", | ||
321 | + "\n", | ||
322 | + "ZDANIE_HEADS = {'*ff', '*spójnik', '*przec', '*zdanie', '*formawykrz'}\n", | ||
323 | + "ZDANIE_HEADS2 = set(l.strip('*') for l in ZDANIE_HEADS)\n", | ||
324 | + "ZDANIE_HEAD_HIERARCHY = ('ff', 'spójnik', 'przec', 'zdanie')\n", | ||
325 | + "\n", | ||
326 | + "\n", | ||
327 | + "def correct(_tree):\n", | ||
328 | + " tree = _tree.copy(deep=True)\n", | ||
329 | + " for node in tree.subtrees():\n", | ||
330 | + " if len(node) == 1 and type(node[0]) != str and node.label() == node[0].label():\n", | ||
331 | + " new_children = [child for child in node[0]]\n", | ||
332 | + " node.pop()\n", | ||
333 | + " node += new_children\n", | ||
334 | + " for node in tree.subtrees():\n", | ||
335 | + " if 'zdanie' in node.label():\n", | ||
336 | + " heads = []\n", | ||
337 | + " non_heads = []\n", | ||
338 | + " for child in node:\n", | ||
339 | + " if child.label().startswith('*') and child.label() not in ZDANIE_HEADS:\n", | ||
340 | + " child.set_label(child.label()[1:])\n", | ||
341 | + " (heads if child.label().startswith('*') else non_heads).append(child)\n", | ||
342 | + " \n", | ||
343 | + " return tree\n", | ||
344 | + "''';" | ||
345 | + ] | ||
346 | + }, | ||
347 | + { | ||
348 | + "cell_type": "code", | ||
349 | + "execution_count": 35, | ||
350 | + "id": "4a26b2e1", | ||
351 | + "metadata": { | ||
352 | + "scrolled": false | ||
353 | + }, | ||
354 | + "outputs": [], | ||
355 | + "source": [ | ||
356 | + "from collections import Counter, defaultdict\n", | ||
357 | + "\n", | ||
358 | + "c = Counter()\n", | ||
359 | + "\n", | ||
360 | + "def tree2spans(_tree):\n", | ||
361 | + " # make tokens unique\n", | ||
362 | + " tree = _tree.copy(deep=True)\n", | ||
363 | + " idx = 0\n", | ||
364 | + " for node in tree.subtrees():\n", | ||
365 | + " for i, child in enumerate(node):\n", | ||
366 | + " if type(child) == str:\n", | ||
367 | + " node[i] = f'{idx}##{child}'\n", | ||
368 | + " idx += 1\n", | ||
369 | + " spans = []\n", | ||
370 | + " for node in tree.subtrees():\n", | ||
371 | + " spans.append((\n", | ||
372 | + " node.label(),\n", | ||
373 | + " tuple(child if type(child) == str else child.label() for child in node),\n", | ||
374 | + " ' '.join(node.leaves())\n", | ||
375 | + " ))\n", | ||
376 | + " assert (len(set(spans)) == len(spans))\n", | ||
377 | + " return set(spans)\n", | ||
378 | + "\n", | ||
379 | + "def spans2dict(spans):\n", | ||
380 | + " s = defaultdict(set)\n", | ||
381 | + " for node, children, text in spans:\n", | ||
382 | + " if node in s[text]:\n", | ||
383 | + " print('!!!!!!!!!!!!!!!', node, text)\n", | ||
384 | + " display(spans)\n", | ||
385 | + " s[text].add(node)\n", | ||
386 | + " return s\n", | ||
387 | + "\n", | ||
388 | + "def spans2errors(spans_gold, spans_pred):\n", | ||
389 | + " sg = spans2dict(spans_gold)\n", | ||
390 | + " sp = spans2dict(spans_pred)\n", | ||
391 | + " errors = []\n", | ||
392 | + " tp, fp, fn = 0, 0, 0\n", | ||
393 | + " for text in set(sg.keys()).union(sp.keys()):\n", | ||
394 | + " txt = ' '.join('X' for _ in text.split())\n", | ||
395 | + " errs = []\n", | ||
396 | + " for span in sg[text].union(sp[text]):\n", | ||
397 | + " if span in sg[text] and span not in sp[text]:\n", | ||
398 | + " errs.append(f'-{span}')\n", | ||
399 | + " fn += 1\n", | ||
400 | + " elif span not in sg[text] and span in sp[text]:\n", | ||
401 | + " errs.append(f'+{span}')\n", | ||
402 | + " fp += 1\n", | ||
403 | + " else:\n", | ||
404 | + " tp += 1\n", | ||
405 | + " if errs:\n", | ||
406 | + " errors.append((tuple(sorted(errs)), text))\n", | ||
407 | + " #display(errors)\n", | ||
408 | + " #print('tp:', tp, 'fp:', fp, 'fn:', fn)\n", | ||
409 | + " #p, r = tp / (tp + fp), tp / (tp + fn)\n", | ||
410 | + " #f1 = 2 * tp / (2 * tp + fp + fn)\n", | ||
411 | + " #print('precision: ', p)\n", | ||
412 | + " #print('recall: ', r)\n", | ||
413 | + " #print('f1: ', f1)\n", | ||
414 | + " return (tp, fp, fn), errors" | ||
415 | + ] | ||
416 | + }, | ||
417 | + { | ||
418 | + "cell_type": "code", | ||
419 | + "execution_count": 54, | ||
420 | + "id": "397e3750", | ||
421 | + "metadata": {}, | ||
422 | + "outputs": [], | ||
423 | + "source": [ | ||
424 | + "def eval_trees(trees_gold, trees_pred):\n", | ||
425 | + " evaluation, errors = [], []\n", | ||
426 | + " for _tree_gold, _tree_pred in list(zip(trees_gold, trees_pred)):\n", | ||
427 | + " try:\n", | ||
428 | + " assert(''.join(_tree_gold.leaves()) == ''.join(_tree_pred.leaves()))\n", | ||
429 | + " except:\n", | ||
430 | + " print(_tree_gold.leaves())\n", | ||
431 | + " print(_tree_pred.leaves())\n", | ||
432 | + " raise\n", | ||
433 | + " tree_gold = untag(_tree_gold)\n", | ||
434 | + " tree_pred = untag(_tree_pred)\n", | ||
435 | + " spans_gold = tree2spans(tree_gold)\n", | ||
436 | + " spans_pred = tree2spans(tree_pred)\n", | ||
437 | + " if tree_gold.leaves() == ['Poszedł', 'em', 'do', 'adwokata', '.']:\n", | ||
438 | + " display(tree_gold)\n", | ||
439 | + " display(tree_pred)\n", | ||
440 | + " print(spans_gold)\n", | ||
441 | + " print(spans_pred)\n", | ||
442 | + " try:\n", | ||
443 | + " evl, errs = spans2errors(spans_gold, spans_pred)\n", | ||
444 | + " evaluation.append(evl)\n", | ||
445 | + " errors += [(err, _tree_gold, _tree_pred) for err in errs]\n", | ||
446 | + " except:\n", | ||
447 | + " display(tree_pred)\n", | ||
448 | + " display2(_tree_pred)\n", | ||
449 | + " print(i)\n", | ||
450 | + " raise\n", | ||
451 | + " return evaluation, errors" | ||
452 | + ] | ||
453 | + }, | ||
454 | + { | ||
455 | + "cell_type": "code", | ||
456 | + "execution_count": 55, | ||
457 | + "id": "5dcd68fd", | ||
458 | + "metadata": {}, | ||
459 | + "outputs": [ | ||
460 | + { | ||
461 | + "data": { | ||
462 | + "image/svg+xml": [ | ||
463 | + "<svg baseProfile=\"full\" height=\"264px\" preserveAspectRatio=\"xMidYMid meet\" style=\"font-family: times, serif; font-weight:normal; font-style: normal; font-size: 16px;\" version=\"1.1\" viewBox=\"0,0,296.0,264.0\" width=\"296px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ROOT</text></svg><svg width=\"81.0811%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*S</text></svg><svg width=\"43.3333%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"69.2308%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Poszedł</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"34.6154%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"30.7692%\" x=\"69.2308%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">em</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"84.6154%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"21.6667%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"56.6667%\" x=\"43.3333%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"41.1765%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">do</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"20.5882%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"58.8235%\" x=\"41.1765%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adwokata</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"70.5882%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"71.6667%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"40.5405%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"18.9189%\" x=\"81.0811%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">.</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"90.5405%\" y1=\"1.2em\" y2=\"3em\" /></svg>" | ||
464 | + ], | ||
465 | + "text/plain": [ | ||
466 | + "Tree('ROOT', [Tree('*S', [Tree('*VP', [Tree('*V', ['Poszedł', 'em'])]), Tree('PrepNP', [Tree('*Prep', ['do']), Tree('NP', [Tree('*N', ['adwokata'])])])]), Tree('Punct', ['.'])])" | ||
467 | + ] | ||
468 | + }, | ||
469 | + "metadata": {}, | ||
470 | + "output_type": "display_data" | ||
471 | + }, | ||
472 | + { | ||
473 | + "data": { | ||
474 | + "image/svg+xml": [ | ||
475 | + "<svg baseProfile=\"full\" height=\"264px\" preserveAspectRatio=\"xMidYMid meet\" style=\"font-family: times, serif; font-weight:normal; font-style: normal; font-size: 16px;\" version=\"1.1\" viewBox=\"0,0,296.0,264.0\" width=\"296px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:ev=\"http://www.w3.org/2001/xml-events\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">ROOT</text></svg><svg width=\"81.0811%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*S</text></svg><svg width=\"43.3333%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*VP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*V</text></svg><svg width=\"69.2308%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Poszedł</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"34.6154%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"30.7692%\" x=\"69.2308%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">em</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"84.6154%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"21.6667%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"56.6667%\" x=\"43.3333%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">PrepNP</text></svg><svg width=\"41.1765%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*Prep</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">do</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"20.5882%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"58.8235%\" x=\"41.1765%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">NP</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">*N</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">adwokata</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"70.5882%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"71.6667%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"40.5405%\" y1=\"1.2em\" y2=\"3em\" /><svg width=\"18.9189%\" x=\"81.0811%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">Punct</text></svg><svg width=\"100%\" x=\"0%\" y=\"3em\"><defs /><svg width=\"100%\" x=\"0\" y=\"0em\"><defs /><text text-anchor=\"middle\" x=\"50%\" y=\"1em\">.</text></svg></svg><line stroke=\"black\" x1=\"50%\" x2=\"50%\" y1=\"1.2em\" y2=\"3em\" /></svg><line stroke=\"black\" x1=\"50%\" x2=\"90.5405%\" y1=\"1.2em\" y2=\"3em\" /></svg>" | ||
476 | + ], | ||
477 | + "text/plain": [ | ||
478 | + "Tree('ROOT', [Tree('*S', [Tree('*VP', [Tree('*V', ['Poszedł', 'em'])]), Tree('PrepNP', [Tree('*Prep', ['do']), Tree('NP', [Tree('*N', ['adwokata'])])])]), Tree('Punct', ['.'])])" | ||
479 | + ] | ||
480 | + }, | ||
481 | + "metadata": {}, | ||
482 | + "output_type": "display_data" | ||
483 | + }, | ||
484 | + { | ||
485 | + "name": "stdout", | ||
486 | + "output_type": "stream", | ||
487 | + "text": [ | ||
488 | + "{('Punct', ('4##.',), '4##.'), ('*Prep', ('2##do',), '2##do'), ('ROOT', ('*S', 'Punct'), '0##Poszedł 1##em 2##do 3##adwokata 4##.'), ('NP', ('*N',), '3##adwokata'), ('*V', ('0##Poszedł', '1##em'), '0##Poszedł 1##em'), ('PrepNP', ('*Prep', 'NP'), '2##do 3##adwokata'), ('*S', ('*VP', 'PrepNP'), '0##Poszedł 1##em 2##do 3##adwokata'), ('*N', ('3##adwokata',), '3##adwokata'), ('*VP', ('*V',), '0##Poszedł 1##em')}\n", | ||
489 | + "{('Punct', ('4##.',), '4##.'), ('*Prep', ('2##do',), '2##do'), ('ROOT', ('*S', 'Punct'), '0##Poszedł 1##em 2##do 3##adwokata 4##.'), ('NP', ('*N',), '3##adwokata'), ('*V', ('0##Poszedł', '1##em'), '0##Poszedł 1##em'), ('PrepNP', ('*Prep', 'NP'), '2##do 3##adwokata'), ('*S', ('*VP', 'PrepNP'), '0##Poszedł 1##em 2##do 3##adwokata'), ('*N', ('3##adwokata',), '3##adwokata'), ('*VP', ('*V',), '0##Poszedł 1##em')}\n", | ||
490 | + "!!!!!!!!!!!!!!! *AdvP 0##Trudno\n" | ||
491 | + ] | ||
492 | + }, | ||
493 | + { | ||
494 | + "data": { | ||
495 | + "text/plain": [ | ||
496 | + "{('*Adv', ('0##Trudno',), '0##Trudno'),\n", | ||
497 | + " ('*AdvP', ('*Adv',), '0##Trudno'),\n", | ||
498 | + " ('*AdvP', ('*AdvP',), '0##Trudno'),\n", | ||
499 | + " ('Punct', ('1##.',), '1##.'),\n", | ||
500 | + " ('ROOT', ('*AdvP', 'Punct'), '0##Trudno 1##.')}" | ||
501 | + ] | ||
502 | + }, | ||
503 | + "metadata": {}, | ||
504 | + "output_type": "display_data" | ||
505 | + }, | ||
506 | + { | ||
507 | + "name": "stdout", | ||
508 | + "output_type": "stream", | ||
509 | + "text": [ | ||
510 | + "!!!!!!!!!!!!!!! *AdvP 0##Trudno\n" | ||
511 | + ] | ||
512 | + }, | ||
513 | + { | ||
514 | + "data": { | ||
515 | + "text/plain": [ | ||
516 | + "{('*Adv', ('0##Trudno',), '0##Trudno'),\n", | ||
517 | + " ('*AdvP', ('*Adv',), '0##Trudno'),\n", | ||
518 | + " ('*AdvP', ('*AdvP',), '0##Trudno'),\n", | ||
519 | + " ('Punct', ('1##.',), '1##.'),\n", | ||
520 | + " ('ROOT', ('*AdvP', 'Punct'), '0##Trudno 1##.')}" | ||
521 | + ] | ||
522 | + }, | ||
523 | + "metadata": {}, | ||
524 | + "output_type": "display_data" | ||
525 | + }, | ||
526 | + { | ||
527 | + "name": "stdout", | ||
528 | + "output_type": "stream", | ||
529 | + "text": [ | ||
530 | + "!!!!!!!!!!!!!!! *PrepNP 0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic\n" | ||
531 | + ] | ||
532 | + }, | ||
533 | + { | ||
534 | + "data": { | ||
535 | + "text/plain": [ | ||
536 | + "{('*Comp', ('3##iż',), '3##iż'),\n", | ||
537 | + " ('*Comp', ('7##iż',), '7##iż'),\n", | ||
538 | + " ('*Conj', ('6##,',), '6##,'),\n", | ||
539 | + " ('*N', ('1##tym',), '1##tym'),\n", | ||
540 | + " ('*N', ('11##nic',), '11##nic'),\n", | ||
541 | + " ('*N', ('5##nikim',), '5##nikim'),\n", | ||
542 | + " ('*N', ('9##cię',), '9##cię'),\n", | ||
543 | + " ('*NP', ('*N',), '1##tym'),\n", | ||
544 | + " ('*Prep', ('0##O',), '0##O'),\n", | ||
545 | + " ('*Prep', ('10##za',), '10##za'),\n", | ||
546 | + " ('*PrepNP',\n", | ||
547 | + " ('*Prep', 'NP'),\n", | ||
548 | + " '0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
549 | + " ('*PrepNP',\n", | ||
550 | + " ('*PrepNP',),\n", | ||
551 | + " '0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
552 | + " ('*V', ('4##jesteś',), '4##jesteś'),\n", | ||
553 | + " ('*V', ('8##mają',), '8##mają'),\n", | ||
554 | + " ('*VP', ('*V',), '4##jesteś'),\n", | ||
555 | + " ('*VP', ('*V',), '8##mają'),\n", | ||
556 | + " ('CP', ('*Comp', 'S'), '7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
557 | + " ('CP',\n", | ||
558 | + " ('CP', '*Conj', 'CP'),\n", | ||
559 | + " '2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
560 | + " ('CP', ('Punct', '*Comp', 'S'), '2##, 3##iż 4##jesteś 5##nikim'),\n", | ||
561 | + " ('NP', ('*N',), '11##nic'),\n", | ||
562 | + " ('NP', ('*N',), '5##nikim'),\n", | ||
563 | + " ('NP', ('*N',), '9##cię'),\n", | ||
564 | + " ('NP',\n", | ||
565 | + " ('*NP', 'CP'),\n", | ||
566 | + " '1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
567 | + " ('PrepNP', ('*Prep', 'NP'), '10##za 11##nic'),\n", | ||
568 | + " ('Punct', ('12##!',), '12##!'),\n", | ||
569 | + " ('Punct', ('2##,',), '2##,'),\n", | ||
570 | + " ('ROOT',\n", | ||
571 | + " ('*PrepNP', 'Punct'),\n", | ||
572 | + " '0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic 12##!'),\n", | ||
573 | + " ('S', ('*VP', 'NP'), '4##jesteś 5##nikim'),\n", | ||
574 | + " ('S', ('*VP', 'NP', 'PrepNP'), '8##mają 9##cię 10##za 11##nic')}" | ||
575 | + ] | ||
576 | + }, | ||
577 | + "metadata": {}, | ||
578 | + "output_type": "display_data" | ||
579 | + }, | ||
580 | + { | ||
581 | + "name": "stdout", | ||
582 | + "output_type": "stream", | ||
583 | + "text": [ | ||
584 | + "!!!!!!!!!!!!!!! *PrepNP 0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic\n" | ||
585 | + ] | ||
586 | + }, | ||
587 | + { | ||
588 | + "data": { | ||
589 | + "text/plain": [ | ||
590 | + "{('*Comp', ('3##iż',), '3##iż'),\n", | ||
591 | + " ('*Comp', ('7##iż',), '7##iż'),\n", | ||
592 | + " ('*Conj', ('6##,',), '6##,'),\n", | ||
593 | + " ('*N', ('1##tym',), '1##tym'),\n", | ||
594 | + " ('*N', ('11##nic',), '11##nic'),\n", | ||
595 | + " ('*N', ('5##nikim',), '5##nikim'),\n", | ||
596 | + " ('*N', ('9##cię',), '9##cię'),\n", | ||
597 | + " ('*NP', ('*N',), '1##tym'),\n", | ||
598 | + " ('*Prep', ('0##O',), '0##O'),\n", | ||
599 | + " ('*Prep', ('10##za',), '10##za'),\n", | ||
600 | + " ('*PrepNP',\n", | ||
601 | + " ('*Prep', 'NP'),\n", | ||
602 | + " '0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
603 | + " ('*PrepNP',\n", | ||
604 | + " ('*PrepNP',),\n", | ||
605 | + " '0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
606 | + " ('*V', ('4##jesteś',), '4##jesteś'),\n", | ||
607 | + " ('*V', ('8##mają',), '8##mają'),\n", | ||
608 | + " ('*VP', ('*V',), '4##jesteś'),\n", | ||
609 | + " ('*VP', ('*V',), '8##mają'),\n", | ||
610 | + " ('CP', ('*Comp', 'S'), '7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
611 | + " ('CP',\n", | ||
612 | + " ('CP', '*Conj', 'CP'),\n", | ||
613 | + " '2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
614 | + " ('CP', ('Punct', '*Comp', 'S'), '2##, 3##iż 4##jesteś 5##nikim'),\n", | ||
615 | + " ('NP', ('*N',), '11##nic'),\n", | ||
616 | + " ('NP', ('*N',), '5##nikim'),\n", | ||
617 | + " ('NP', ('*N',), '9##cię'),\n", | ||
618 | + " ('NP',\n", | ||
619 | + " ('*NP', 'CP'),\n", | ||
620 | + " '1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic'),\n", | ||
621 | + " ('PrepNP', ('*Prep', 'NP'), '10##za 11##nic'),\n", | ||
622 | + " ('Punct', ('12##!',), '12##!'),\n", | ||
623 | + " ('Punct', ('2##,',), '2##,'),\n", | ||
624 | + " ('ROOT',\n", | ||
625 | + " ('*PrepNP', 'Punct'),\n", | ||
626 | + " '0##O 1##tym 2##, 3##iż 4##jesteś 5##nikim 6##, 7##iż 8##mają 9##cię 10##za 11##nic 12##!'),\n", | ||
627 | + " ('S', ('*VP', 'NP'), '4##jesteś 5##nikim'),\n", | ||
628 | + " ('S', ('*VP', 'NP', 'PrepNP'), '8##mają 9##cię 10##za 11##nic')}" | ||
629 | + ] | ||
630 | + }, | ||
631 | + "metadata": {}, | ||
632 | + "output_type": "display_data" | ||
633 | + }, | ||
634 | + { | ||
635 | + "name": "stdout", | ||
636 | + "output_type": "stream", | ||
637 | + "text": [ | ||
638 | + "!!!!!!!!!!!!!!! *NP 0##Cztery 1##małe 2##groszki 3##w 4##strąku 5##w 6##tunelu 7##miłości\n" | ||
639 | + ] | ||
640 | + }, | ||
641 | + { | ||
642 | + "data": { | ||
643 | + "text/plain": [ | ||
644 | + "{('*Adj', ('1##małe',), '1##małe'),\n", | ||
645 | + " ('*N', ('2##groszki',), '2##groszki'),\n", | ||
646 | + " ('*N', ('4##strąku',), '4##strąku'),\n", | ||
647 | + " ('*N', ('6##tunelu',), '6##tunelu'),\n", | ||
648 | + " ('*N', ('7##miłości',), '7##miłości'),\n", | ||
649 | + " ('*NP', ('*N',), '2##groszki'),\n", | ||
650 | + " ('*NP', ('*N',), '6##tunelu'),\n", | ||
651 | + " ('*NP',\n", | ||
652 | + " ('*NP',),\n", | ||
653 | + " '0##Cztery 1##małe 2##groszki 3##w 4##strąku 5##w 6##tunelu 7##miłości'),\n", | ||
654 | + " ('*NP',\n", | ||
655 | + " ('*NumP', 'NP'),\n", | ||
656 | + " '0##Cztery 1##małe 2##groszki 3##w 4##strąku 5##w 6##tunelu 7##miłości'),\n", | ||
657 | + " ('*Num', ('0##Cztery',), '0##Cztery'),\n", | ||
658 | + " ('*NumP', ('*Num',), '0##Cztery'),\n", | ||
659 | + " ('*Prep', ('3##w',), '3##w'),\n", | ||
660 | + " ('*Prep', ('5##w',), '5##w'),\n", | ||
661 | + " ('AdjP', ('*Adj',), '1##małe'),\n", | ||
662 | + " ('NP', ('*N',), '4##strąku'),\n", | ||
663 | + " ('NP', ('*N',), '7##miłości'),\n", | ||
664 | + " ('NP', ('*NP', 'NP'), '6##tunelu 7##miłości'),\n", | ||
665 | + " ('NP',\n", | ||
666 | + " ('AdjP', '*NP', 'PrepNP', 'PrepNP'),\n", | ||
667 | + " '1##małe 2##groszki 3##w 4##strąku 5##w 6##tunelu 7##miłości'),\n", | ||
668 | + " ('PrepNP', ('*Prep', 'NP'), '3##w 4##strąku'),\n", | ||
669 | + " ('PrepNP', ('*Prep', 'NP'), '5##w 6##tunelu 7##miłości'),\n", | ||
670 | + " ('Punct', ('8##.',), '8##.'),\n", | ||
671 | + " ('ROOT',\n", | ||
672 | + " ('*NP', 'Punct'),\n", | ||
673 | + " '0##Cztery 1##małe 2##groszki 3##w 4##strąku 5##w 6##tunelu 7##miłości 8##.')}" | ||
674 | + ] | ||
675 | + }, | ||
676 | + "metadata": {}, | ||
677 | + "output_type": "display_data" | ||
678 | + }, | ||
679 | + { | ||
680 | + "name": "stdout", | ||
681 | + "output_type": "stream", | ||
682 | + "text": [ | ||
683 | + "!!!!!!!!!!!!!!! *PrepNP 0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala\n" | ||
684 | + ] | ||
685 | + }, | ||
686 | + { | ||
687 | + "data": { | ||
688 | + "text/plain": [ | ||
689 | + "{('*Adv', ('3##gdy',), '3##gdy'),\n", | ||
690 | + " ('*N', ('1##chwili',), '1##chwili'),\n", | ||
691 | + " ('*N', ('7##Alpy',), '7##Alpy'),\n", | ||
692 | + " ('*N', ('8##słonie',), '8##słonie'),\n", | ||
693 | + " ('*N', ('9##Hannibala',), '9##Hannibala'),\n", | ||
694 | + " ('*NP', ('*N',), '1##chwili'),\n", | ||
695 | + " ('*NP', ('*N',), '8##słonie'),\n", | ||
696 | + " ('*Prep', ('0##W',), '0##W'),\n", | ||
697 | + " ('*Prep', ('6##przez',), '6##przez'),\n", | ||
698 | + " ('*PrepNP',\n", | ||
699 | + " ('*Prep', 'NP'),\n", | ||
700 | + " '0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
701 | + " ('*PrepNP',\n", | ||
702 | + " ('*PrepNP',),\n", | ||
703 | + " '0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
704 | + " ('*S',\n", | ||
705 | + " ('AdvP', '*VP', 'PrepNP', 'NP'),\n", | ||
706 | + " '3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
707 | + " ('*V', ('4##przeprowadzał', '5##em'), '4##przeprowadzał 5##em'),\n", | ||
708 | + " ('*VP', ('*V',), '4##przeprowadzał 5##em'),\n", | ||
709 | + " ('AdvP', ('*Adv',), '3##gdy'),\n", | ||
710 | + " ('CP',\n", | ||
711 | + " ('Punct', '*S'),\n", | ||
712 | + " '2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
713 | + " ('NP', ('*N',), '7##Alpy'),\n", | ||
714 | + " ('NP', ('*N',), '9##Hannibala'),\n", | ||
715 | + " ('NP',\n", | ||
716 | + " ('*NP', 'CP'),\n", | ||
717 | + " '1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
718 | + " ('NP', ('*NP', 'NP'), '8##słonie 9##Hannibala'),\n", | ||
719 | + " ('PrepNP', ('*Prep', 'NP'), '6##przez 7##Alpy'),\n", | ||
720 | + " ('Punct', ('10##.',), '10##.'),\n", | ||
721 | + " ('Punct', ('2##,',), '2##,'),\n", | ||
722 | + " ('ROOT',\n", | ||
723 | + " ('*PrepNP', 'Punct'),\n", | ||
724 | + " '0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala 10##.')}" | ||
725 | + ] | ||
726 | + }, | ||
727 | + "metadata": {}, | ||
728 | + "output_type": "display_data" | ||
729 | + }, | ||
730 | + { | ||
731 | + "name": "stdout", | ||
732 | + "output_type": "stream", | ||
733 | + "text": [ | ||
734 | + "!!!!!!!!!!!!!!! *PrepNP 0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala\n" | ||
735 | + ] | ||
736 | + }, | ||
737 | + { | ||
738 | + "data": { | ||
739 | + "text/plain": [ | ||
740 | + "{('*Adv', ('3##gdy',), '3##gdy'),\n", | ||
741 | + " ('*N', ('1##chwili',), '1##chwili'),\n", | ||
742 | + " ('*N', ('7##Alpy',), '7##Alpy'),\n", | ||
743 | + " ('*N', ('8##słonie',), '8##słonie'),\n", | ||
744 | + " ('*N', ('9##Hannibala',), '9##Hannibala'),\n", | ||
745 | + " ('*NP', ('*N',), '1##chwili'),\n", | ||
746 | + " ('*NP', ('*N',), '8##słonie'),\n", | ||
747 | + " ('*Prep', ('0##W',), '0##W'),\n", | ||
748 | + " ('*Prep', ('6##przez',), '6##przez'),\n", | ||
749 | + " ('*PrepNP',\n", | ||
750 | + " ('*Prep', 'NP'),\n", | ||
751 | + " '0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
752 | + " ('*PrepNP',\n", | ||
753 | + " ('*PrepNP',),\n", | ||
754 | + " '0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
755 | + " ('*S',\n", | ||
756 | + " ('AdvP', '*VP', 'PrepNP', 'NP'),\n", | ||
757 | + " '3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
758 | + " ('*V', ('4##przeprowadzał', '5##em'), '4##przeprowadzał 5##em'),\n", | ||
759 | + " ('*VP', ('*V',), '4##przeprowadzał 5##em'),\n", | ||
760 | + " ('AdvP', ('*Adv',), '3##gdy'),\n", | ||
761 | + " ('CP',\n", | ||
762 | + " ('Punct', '*S'),\n", | ||
763 | + " '2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
764 | + " ('NP', ('*N',), '7##Alpy'),\n", | ||
765 | + " ('NP', ('*N',), '9##Hannibala'),\n", | ||
766 | + " ('NP',\n", | ||
767 | + " ('*NP', 'CP'),\n", | ||
768 | + " '1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala'),\n", | ||
769 | + " ('NP', ('*NP', 'NP'), '8##słonie 9##Hannibala'),\n", | ||
770 | + " ('PrepNP', ('*Prep', 'NP'), '6##przez 7##Alpy'),\n", | ||
771 | + " ('Punct', ('10##.',), '10##.'),\n", | ||
772 | + " ('Punct', ('2##,',), '2##,'),\n", | ||
773 | + " ('ROOT',\n", | ||
774 | + " ('*PrepNP', 'Punct'),\n", | ||
775 | + " '0##W 1##chwili 2##, 3##gdy 4##przeprowadzał 5##em 6##przez 7##Alpy 8##słonie 9##Hannibala 10##.')}" | ||
776 | + ] | ||
777 | + }, | ||
778 | + "metadata": {}, | ||
779 | + "output_type": "display_data" | ||
780 | + }, | ||
781 | + { | ||
782 | + "name": "stdout", | ||
783 | + "output_type": "stream", | ||
784 | + "text": [ | ||
785 | + "!!!!!!!!!!!!!!! *PrepNP 1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna\n" | ||
786 | + ] | ||
787 | + }, | ||
788 | + { | ||
789 | + "data": { | ||
790 | + "text/plain": [ | ||
791 | + "{('*Adj', ('2##roczną',), '2##roczną'),\n", | ||
792 | + " ('*Adj', ('7##czternasty',), '7##czternasty'),\n", | ||
793 | + " ('*N', ('3##misję',), '3##misję'),\n", | ||
794 | + " ('*N', ('5##Tytana',), '5##Tytana'),\n", | ||
795 | + " ('*N', ('8##księżyc',), '8##księżyc'),\n", | ||
796 | + " ('*N', ('9##Saturna',), '9##Saturna'),\n", | ||
797 | + " ('*NP', ('*N',), '3##misję'),\n", | ||
798 | + " ('*NP', ('*N',), '5##Tytana'),\n", | ||
799 | + " ('*NP', ('*N',), '8##księżyc'),\n", | ||
800 | + " ('*Prep', ('1##na',), '1##na'),\n", | ||
801 | + " ('*Prep', ('4##na',), '4##na'),\n", | ||
802 | + " ('*PrepNP',\n", | ||
803 | + " ('*Prep', 'NP'),\n", | ||
804 | + " '1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
805 | + " ('*PrepNP',\n", | ||
806 | + " ('*PrepNP',),\n", | ||
807 | + " '1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
808 | + " ('AdjP', ('*Adj',), '2##roczną'),\n", | ||
809 | + " ('AdjP', ('*Adj',), '7##czternasty'),\n", | ||
810 | + " ('NP', ('*N',), '9##Saturna'),\n", | ||
811 | + " ('NP', ('*NP', 'NP'), '5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
812 | + " ('NP',\n", | ||
813 | + " ('AdjP', '*NP', 'PrepNP'),\n", | ||
814 | + " '2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
815 | + " ('NP',\n", | ||
816 | + " ('Punct', 'AdjP', '*NP', 'NP'),\n", | ||
817 | + " '6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
818 | + " ('PrepNP',\n", | ||
819 | + " ('*Prep', 'NP'),\n", | ||
820 | + " '4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
821 | + " ('Punct', ('0##-',), '0##-'),\n", | ||
822 | + " ('Punct', ('10##.',), '10##.'),\n", | ||
823 | + " ('Punct', ('6##,',), '6##,'),\n", | ||
824 | + " ('ROOT',\n", | ||
825 | + " ('Punct', '*PrepNP', 'Punct'),\n", | ||
826 | + " '0##- 1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna 10##.')}" | ||
827 | + ] | ||
828 | + }, | ||
829 | + "metadata": {}, | ||
830 | + "output_type": "display_data" | ||
831 | + }, | ||
832 | + { | ||
833 | + "name": "stdout", | ||
834 | + "output_type": "stream", | ||
835 | + "text": [ | ||
836 | + "!!!!!!!!!!!!!!! *PrepNP 1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna\n" | ||
837 | + ] | ||
838 | + }, | ||
839 | + { | ||
840 | + "data": { | ||
841 | + "text/plain": [ | ||
842 | + "{('*Adj', ('2##roczną',), '2##roczną'),\n", | ||
843 | + " ('*Adj', ('7##czternasty',), '7##czternasty'),\n", | ||
844 | + " ('*N', ('3##misję',), '3##misję'),\n", | ||
845 | + " ('*N', ('5##Tytana',), '5##Tytana'),\n", | ||
846 | + " ('*N', ('8##księżyc',), '8##księżyc'),\n", | ||
847 | + " ('*N', ('9##Saturna',), '9##Saturna'),\n", | ||
848 | + " ('*NP', ('*N',), '3##misję'),\n", | ||
849 | + " ('*NP', ('*N',), '5##Tytana'),\n", | ||
850 | + " ('*NP', ('*N',), '8##księżyc'),\n", | ||
851 | + " ('*Prep', ('1##na',), '1##na'),\n", | ||
852 | + " ('*Prep', ('4##na',), '4##na'),\n", | ||
853 | + " ('*PrepNP',\n", | ||
854 | + " ('*Prep', 'NP'),\n", | ||
855 | + " '1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
856 | + " ('*PrepNP',\n", | ||
857 | + " ('*PrepNP',),\n", | ||
858 | + " '1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
859 | + " ('AdjP', ('*Adj',), '2##roczną'),\n", | ||
860 | + " ('AdjP', ('*Adj',), '7##czternasty'),\n", | ||
861 | + " ('NP', ('*N',), '9##Saturna'),\n", | ||
862 | + " ('NP', ('*NP', 'NP'), '5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
863 | + " ('NP',\n", | ||
864 | + " ('AdjP', '*NP', 'PrepNP'),\n", | ||
865 | + " '2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
866 | + " ('NP',\n", | ||
867 | + " ('Punct', 'AdjP', '*NP', 'NP'),\n", | ||
868 | + " '6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
869 | + " ('PrepNP',\n", | ||
870 | + " ('*Prep', 'NP'),\n", | ||
871 | + " '4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna'),\n", | ||
872 | + " ('Punct', ('0##-',), '0##-'),\n", | ||
873 | + " ('Punct', ('10##.',), '10##.'),\n", | ||
874 | + " ('Punct', ('6##,',), '6##,'),\n", | ||
875 | + " ('ROOT',\n", | ||
876 | + " ('Punct', '*PrepNP', 'Punct'),\n", | ||
877 | + " '0##- 1##na 2##roczną 3##misję 4##na 5##Tytana 6##, 7##czternasty 8##księżyc 9##Saturna 10##.')}" | ||
878 | + ] | ||
879 | + }, | ||
880 | + "metadata": {}, | ||
881 | + "output_type": "display_data" | ||
882 | + } | ||
883 | + ], | ||
884 | + "source": [ | ||
885 | + "evaluation_val, errors_val = eval_trees(val_trees, val_pred_trees)\n", | ||
886 | + "evaluation_test, errors_test = eval_trees(test_trees, test_pred_trees)" | ||
887 | + ] | ||
888 | + }, | ||
889 | + { | ||
890 | + "cell_type": "code", | ||
891 | + "execution_count": 56, | ||
892 | + "id": "65af3522", | ||
893 | + "metadata": {}, | ||
894 | + "outputs": [ | ||
895 | + { | ||
896 | + "name": "stdout", | ||
897 | + "output_type": "stream", | ||
898 | + "text": [ | ||
899 | + "DEV:\n", | ||
900 | + "precision: 0.974400637684714\n", | ||
901 | + "recall: 0.9742960930674555\n", | ||
902 | + "f1: 0.9743483625717548\n" | ||
903 | + ] | ||
904 | + } | ||
905 | + ], | ||
906 | + "source": [ | ||
907 | + "tp, fp, fn = list(map(sum, zip(*evaluation_val)))\n", | ||
908 | + "p, r = tp / (tp + fp), tp / (tp + fn)\n", | ||
909 | + "f1 = 2 * tp / (2 * tp + fp + fn)\n", | ||
910 | + "print('DEV:')\n", | ||
911 | + "print('precision: ', p)\n", | ||
912 | + "print('recall: ', r)\n", | ||
913 | + "print('f1: ', f1)" | ||
914 | + ] | ||
915 | + }, | ||
916 | + { | ||
917 | + "cell_type": "code", | ||
918 | + "execution_count": 57, | ||
919 | + "id": "8e0f3f93", | ||
920 | + "metadata": {}, | ||
921 | + "outputs": [ | ||
922 | + { | ||
923 | + "name": "stdout", | ||
924 | + "output_type": "stream", | ||
925 | + "text": [ | ||
926 | + "TEST:\n", | ||
927 | + "precision: 0.9774147274466051\n", | ||
928 | + "recall: 0.9775082092645137\n", | ||
929 | + "f1: 0.9774614661204711\n" | ||
930 | + ] | ||
931 | + } | ||
932 | + ], | ||
933 | + "source": [ | ||
934 | + "tp, fp, fn = list(map(sum, zip(*evaluation_test)))\n", | ||
935 | + "p, r = tp / (tp + fp), tp / (tp + fn)\n", | ||
936 | + "f1 = 2 * tp / (2 * tp + fp + fn)\n", | ||
937 | + "print('TEST:')\n", | ||
938 | + "print('precision: ', p)\n", | ||
939 | + "print('recall: ', r)\n", | ||
940 | + "print('f1: ', f1)" | ||
941 | + ] | ||
942 | + }, | ||
943 | + { | ||
944 | + "cell_type": "code", | ||
945 | + "execution_count": null, | ||
946 | + "id": "302b2333", | ||
947 | + "metadata": {}, | ||
948 | + "outputs": [], | ||
949 | + "source": [] | ||
950 | + } | ||
951 | + ], | ||
952 | + "metadata": { | ||
953 | + "kernelspec": { | ||
954 | + "display_name": "torch_benepar", | ||
955 | + "language": "python", | ||
956 | + "name": "torch_benepar" | ||
957 | + }, | ||
958 | + "language_info": { | ||
959 | + "codemirror_mode": { | ||
960 | + "name": "ipython", | ||
961 | + "version": 3 | ||
962 | + }, | ||
963 | + "file_extension": ".py", | ||
964 | + "mimetype": "text/x-python", | ||
965 | + "name": "python", | ||
966 | + "nbconvert_exporter": "python", | ||
967 | + "pygments_lexer": "ipython3", | ||
968 | + "version": "3.10.6" | ||
969 | + } | ||
970 | + }, | ||
971 | + "nbformat": 4, | ||
972 | + "nbformat_minor": 5 | ||
973 | +} |
COMBO/DataPreparation.ipynb
0 → 100644
1 | +++ a/COMBO/DataPreparation.ipynb | ||
1 | +{ | ||
2 | + "cells": [ | ||
3 | + { | ||
4 | + "cell_type": "code", | ||
5 | + "execution_count": 46, | ||
6 | + "id": "5cd26f6f", | ||
7 | + "metadata": {}, | ||
8 | + "outputs": [], | ||
9 | + "source": [ | ||
10 | + "import os\n", | ||
11 | + "\n", | ||
12 | + "from datasets import load_dataset\n", | ||
13 | + "\n", | ||
14 | + "from IPython.display import display" | ||
15 | + ] | ||
16 | + }, | ||
17 | + { | ||
18 | + "cell_type": "code", | ||
19 | + "execution_count": 47, | ||
20 | + "id": "fecef4af", | ||
21 | + "metadata": {}, | ||
22 | + "outputs": [ | ||
23 | + { | ||
24 | + "name": "stderr", | ||
25 | + "output_type": "stream", | ||
26 | + "text": [ | ||
27 | + "Found cached dataset pdb_c_beta (/home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1)\n" | ||
28 | + ] | ||
29 | + }, | ||
30 | + { | ||
31 | + "data": { | ||
32 | + "application/vnd.jupyter.widget-view+json": { | ||
33 | + "model_id": "1c89c7103bba4347a3fa7d23cac42cfe", | ||
34 | + "version_major": 2, | ||
35 | + "version_minor": 0 | ||
36 | + }, | ||
37 | + "text/plain": [ | ||
38 | + " 0%| | 0/3 [00:00<?, ?it/s]" | ||
39 | + ] | ||
40 | + }, | ||
41 | + "metadata": {}, | ||
42 | + "output_type": "display_data" | ||
43 | + } | ||
44 | + ], | ||
45 | + "source": [ | ||
46 | + "pdbc_dataset = load_dataset('../pdb_c_beta')" | ||
47 | + ] | ||
48 | + }, | ||
49 | + { | ||
50 | + "cell_type": "code", | ||
51 | + "execution_count": 48, | ||
52 | + "id": "23da801f", | ||
53 | + "metadata": {}, | ||
54 | + "outputs": [], | ||
55 | + "source": [ | ||
56 | + "CONLLU_DIR = 'connlu'\n", | ||
57 | + "! rm -r {CONLLU_DIR}\n", | ||
58 | + "! mkdir {CONLLU_DIR}" | ||
59 | + ] | ||
60 | + }, | ||
61 | + { | ||
62 | + "cell_type": "code", | ||
63 | + "execution_count": 50, | ||
64 | + "id": "91fb3bf3", | ||
65 | + "metadata": {}, | ||
66 | + "outputs": [], | ||
67 | + "source": [ | ||
68 | + "import sys\n", | ||
69 | + "sys.path.append('../')\n", | ||
70 | + "from neural_parser.hybrid_tree_utils import tree_from_dataset_instance" | ||
71 | + ] | ||
72 | + }, | ||
73 | + { | ||
74 | + "cell_type": "code", | ||
75 | + "execution_count": 60, | ||
76 | + "id": "c105feff", | ||
77 | + "metadata": {}, | ||
78 | + "outputs": [ | ||
79 | + { | ||
80 | + "name": "stdout", | ||
81 | + "output_type": "stream", | ||
82 | + "text": [ | ||
83 | + "train\n", | ||
84 | + " connlu/pdbc-train.conllu\n", | ||
85 | + " 17659\n", | ||
86 | + " connlu/pdbc-cont-train.conllu\n", | ||
87 | + " 15903\n", | ||
88 | + "validation\n", | ||
89 | + " connlu/pdbc-validation.conllu\n", | ||
90 | + " 2211\n", | ||
91 | + " connlu/pdbc-cont-validation.conllu\n", | ||
92 | + " 1980\n", | ||
93 | + "test\n", | ||
94 | + " connlu/pdbc-test.conllu\n", | ||
95 | + " 2205\n", | ||
96 | + " connlu/pdbc-cont-test.conllu\n", | ||
97 | + " 1990\n" | ||
98 | + ] | ||
99 | + } | ||
100 | + ], | ||
101 | + "source": [ | ||
102 | + "features = pdbc_dataset['train'].features\n", | ||
103 | + "\n", | ||
104 | + "for part, dataset in pdbc_dataset.items():\n", | ||
105 | + " print(part)\n", | ||
106 | + " s_cont, s_all = [], [] \n", | ||
107 | + " for sentence in dataset:\n", | ||
108 | + " # TODO! check if discont\n", | ||
109 | + " tokens = sentence['tokens']\n", | ||
110 | + " lemmas = sentence['lemmas']\n", | ||
111 | + " heads = sentence['heads']\n", | ||
112 | + " heads = [h + 1 if h is not None else 0 for i, h in enumerate(heads)]\n", | ||
113 | + " deprels = [features['deprels'].feature.int2str(d) for d in sentence['deprels']]\n", | ||
114 | + " deprels = ['root' if deprel == 'ROOT' else deprel for deprel in deprels]\n", | ||
115 | + " rows = [f'# text = {\" \".join(tokens)}'] + [\n", | ||
116 | + " f'{i + 1}\\t{t}\\t{l}\\t_\\t_\\t_\\t{h}\\t{d}\\t{h}:{d}\\t_'\n", | ||
117 | + " for i, (t, l, h, d) in enumerate(zip(tokens, lemmas, heads, deprels))\n", | ||
118 | + " ]\n", | ||
119 | + " s_all.append(rows)\n", | ||
120 | + " if tree_from_dataset_instance(sentence, features).is_continuous():\n", | ||
121 | + " s_cont.append(rows)\n", | ||
122 | + " f_all = os.path.join(CONLLU_DIR, f'pdbc-{part}.conllu')\n", | ||
123 | + " f_cont = os.path.join(CONLLU_DIR, f'pdbc-cont-{part}.conllu')\n", | ||
124 | + " with open(f_all, 'w') as f:\n", | ||
125 | + " print(' ', f_all)\n", | ||
126 | + " print(' ', len(s_all))\n", | ||
127 | + " for rows in s_all:\n", | ||
128 | + " print('\\n'.join(rows), end='\\n\\n', file=f)\n", | ||
129 | + " with open(f_cont, 'w') as f:\n", | ||
130 | + " print(' ', f_cont)\n", | ||
131 | + " print(' ', len(s_cont))\n", | ||
132 | + " for rows in s_cont:\n", | ||
133 | + " print('\\n'.join(rows), end='\\n\\n', file=f)" | ||
134 | + ] | ||
135 | + }, | ||
136 | + { | ||
137 | + "cell_type": "code", | ||
138 | + "execution_count": 61, | ||
139 | + "id": "c849233c", | ||
140 | + "metadata": {}, | ||
141 | + "outputs": [ | ||
142 | + { | ||
143 | + "name": "stdout", | ||
144 | + "output_type": "stream", | ||
145 | + "text": [ | ||
146 | + " 32509 319813 1398303 connlu/pdbc-cont-test.conllu\n", | ||
147 | + " 32509 319813 1198902 connlu/pdbc-cont-test-pred.conllu\n", | ||
148 | + " 271337 2682725 11781617 connlu/pdbc-cont-train.conllu\n", | ||
149 | + " 33491 330792 1452373 connlu/pdbc-cont-validation.conllu\n", | ||
150 | + " 33491 330792 1244192 connlu/pdbc-cont-validation-pred.conllu\n", | ||
151 | + " 37754 373431 1639937 connlu/pdbc-test.conllu\n", | ||
152 | + " 37754 373431 1406776 connlu/pdbc-test-pred.conllu\n", | ||
153 | + " 315364 3133712 13808053 connlu/pdbc-train.conllu\n", | ||
154 | + " 38987 386865 1704685 connlu/pdbc-validation.conllu\n", | ||
155 | + " 38987 386865 1461922 connlu/pdbc-validation-pred.conllu\n", | ||
156 | + " 872183 8638239 37096760 total\n" | ||
157 | + ] | ||
158 | + } | ||
159 | + ], | ||
160 | + "source": [ | ||
161 | + "! wc {CONLLU_DIR}/*.conllu" | ||
162 | + ] | ||
163 | + }, | ||
164 | + { | ||
165 | + "cell_type": "code", | ||
166 | + "execution_count": 62, | ||
167 | + "id": "6b571716", | ||
168 | + "metadata": {}, | ||
169 | + "outputs": [ | ||
170 | + { | ||
171 | + "name": "stdout", | ||
172 | + "output_type": "stream", | ||
173 | + "text": [ | ||
174 | + "# text = Skośnooka dziewczynka trzyma w rękach drewniane pałeczki , a przed nią znajdują się naczynia kuchenne .\r\n", | ||
175 | + "1\tSkośnooka\tskośnooki\t_\t_\t_\t2\tadjunct\t2:adjunct\t_\r\n", | ||
176 | + "2\tdziewczynka\tdziewczynka\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
177 | + "3\ttrzyma\ttrzymać\t_\t_\t_\t9\tconjunct\t9:conjunct\t_\r\n", | ||
178 | + "4\tw\tw\t_\t_\t_\t3\tadjunct_locat\t3:adjunct_locat\t_\r\n", | ||
179 | + "5\trękach\tręka\t_\t_\t_\t4\tcomp\t4:comp\t_\r\n", | ||
180 | + "6\tdrewniane\tdrewniany\t_\t_\t_\t7\tadjunct\t7:adjunct\t_\r\n", | ||
181 | + "7\tpałeczki\tpałeczka\t_\t_\t_\t3\tobj\t3:obj\t_\r\n", | ||
182 | + "8\t,\t,\t_\t_\t_\t9\tpunct\t9:punct\t_\r\n", | ||
183 | + "9\ta\ta\t_\t_\t_\t0\troot\t0:root\t_\r\n" | ||
184 | + ] | ||
185 | + } | ||
186 | + ], | ||
187 | + "source": [ | ||
188 | + "! head {CONLLU_DIR}/pdbc-train.conllu" | ||
189 | + ] | ||
190 | + } | ||
191 | + ], | ||
192 | + "metadata": { | ||
193 | + "kernelspec": { | ||
194 | + "display_name": "TF_zajecia", | ||
195 | + "language": "python", | ||
196 | + "name": "tf_zajecia" | ||
197 | + }, | ||
198 | + "language_info": { | ||
199 | + "codemirror_mode": { | ||
200 | + "name": "ipython", | ||
201 | + "version": 3 | ||
202 | + }, | ||
203 | + "file_extension": ".py", | ||
204 | + "mimetype": "text/x-python", | ||
205 | + "name": "python", | ||
206 | + "nbconvert_exporter": "python", | ||
207 | + "pygments_lexer": "ipython3", | ||
208 | + "version": "3.10.6" | ||
209 | + } | ||
210 | + }, | ||
211 | + "nbformat": 4, | ||
212 | + "nbformat_minor": 5 | ||
213 | +} |
COMBO/ParseValAndTrain.ipynb
0 → 100644
1 | +++ a/COMBO/ParseValAndTrain.ipynb | ||
1 | +{ | ||
2 | + "cells": [ | ||
3 | + { | ||
4 | + "cell_type": "code", | ||
5 | + "execution_count": 1, | ||
6 | + "id": "aabfb24b", | ||
7 | + "metadata": {}, | ||
8 | + "outputs": [], | ||
9 | + "source": [ | ||
10 | + "COMBO = '/home/kkrasnowska/anaconda3/envs/combo_p39/bin/combo'" | ||
11 | + ] | ||
12 | + }, | ||
13 | + { | ||
14 | + "cell_type": "markdown", | ||
15 | + "id": "787fff78", | ||
16 | + "metadata": {}, | ||
17 | + "source": [ | ||
18 | + "Main model" | ||
19 | + ] | ||
20 | + }, | ||
21 | + { | ||
22 | + "cell_type": "code", | ||
23 | + "execution_count": 2, | ||
24 | + "id": "1d9daaa9", | ||
25 | + "metadata": {}, | ||
26 | + "outputs": [ | ||
27 | + { | ||
28 | + "name": "stdout", | ||
29 | + "output_type": "stream", | ||
30 | + "text": [ | ||
31 | + "I0407 10:49:31.448594 140072765682752 archival.py:184] loading archive file model-pdbc/model.tar.gz\n", | ||
32 | + "I0407 10:49:31.449148 140072765682752 archival.py:263] extracting archive file model-pdbc/model.tar.gz to temp dir /tmp/tmp_htckuhc\n", | ||
33 | + "I0407 10:49:48.075045 140072765682752 params.py:248] dataset_reader.type = conllu\n", | ||
34 | + "I0407 10:49:48.075561 140072765682752 params.py:248] dataset_reader.lazy = False\n", | ||
35 | + "I0407 10:49:48.075693 140072765682752 params.py:248] dataset_reader.cache_directory = None\n", | ||
36 | + "I0407 10:49:48.075764 140072765682752 params.py:248] dataset_reader.max_instances = None\n", | ||
37 | + "I0407 10:49:48.075832 140072765682752 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
38 | + "I0407 10:49:48.075901 140072765682752 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
39 | + "I0407 10:49:48.076193 140072765682752 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
40 | + "I0407 10:49:48.076388 140072765682752 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
41 | + "I0407 10:49:48.076621 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
42 | + "I0407 10:49:48.076697 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
43 | + "I0407 10:49:48.076790 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
44 | + "I0407 10:49:48.076939 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
45 | + "I0407 10:49:48.077063 140072765682752 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
46 | + "I0407 10:49:48.077118 140072765682752 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
47 | + "I0407 10:49:48.077185 140072765682752 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
48 | + "I0407 10:49:48.077238 140072765682752 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
49 | + "I0407 10:49:48.077383 140072765682752 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
50 | + "I0407 10:49:48.077555 140072765682752 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
51 | + "I0407 10:49:48.077628 140072765682752 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
52 | + "I0407 10:49:48.077702 140072765682752 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
53 | + "I0407 10:49:48.077838 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
54 | + "I0407 10:49:48.078031 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
55 | + "I0407 10:49:48.078231 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
56 | + "I0407 10:49:48.078300 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
57 | + "I0407 10:49:48.078378 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
58 | + "I0407 10:49:48.078666 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
59 | + "I0407 10:49:48.078786 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
60 | + "I0407 10:49:48.078862 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
61 | + "I0407 10:49:48.078916 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
62 | + "I0407 10:49:48.078969 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
63 | + "I0407 10:49:48.079103 140072765682752 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
64 | + "I0407 10:49:48.079328 140072765682752 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
65 | + "I0407 10:49:48.079406 140072765682752 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
66 | + "I0407 10:49:48.079461 140072765682752 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
67 | + "I0407 10:49:48.079525 140072765682752 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
68 | + "I0407 10:49:48.079628 140072765682752 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
69 | + "I0407 10:49:51.185825 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
70 | + "I0407 10:49:51.186234 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
71 | + "I0407 10:49:51.186336 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
72 | + "I0407 10:49:51.186398 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
73 | + "I0407 10:49:51.186465 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
74 | + "I0407 10:49:51.186517 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
75 | + "I0407 10:49:51.186579 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
76 | + "I0407 10:49:51.186631 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
77 | + "I0407 10:49:51.186791 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
78 | + "I0407 10:49:51.186975 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
79 | + "I0407 10:49:51.187041 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
80 | + "I0407 10:49:51.187107 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
81 | + "I0407 10:49:51.187170 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
82 | + "I0407 10:49:51.187220 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
83 | + "I0407 10:49:51.187275 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
84 | + "I0407 10:49:51.187334 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
85 | + "I0407 10:49:51.187556 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
86 | + "I0407 10:49:51.187731 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
87 | + "I0407 10:49:51.187935 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
88 | + "I0407 10:49:51.187995 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
89 | + "I0407 10:49:51.188073 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
90 | + "I0407 10:49:51.188217 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
91 | + "I0407 10:49:51.188334 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
92 | + "I0407 10:49:51.188398 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
93 | + "I0407 10:49:51.188460 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
94 | + "I0407 10:49:51.188522 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
95 | + "I0407 10:49:51.188614 140072765682752 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
96 | + "I0407 10:49:51.188712 140072765682752 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
97 | + "I0407 10:49:51.188802 140072765682752 params.py:248] dataset_reader.use_sem = False\n", | ||
98 | + "I0407 10:49:51.188952 140072765682752 params.py:248] dataset_reader.type = conllu\n", | ||
99 | + "I0407 10:49:51.189191 140072765682752 params.py:248] dataset_reader.lazy = False\n", | ||
100 | + "I0407 10:49:51.189266 140072765682752 params.py:248] dataset_reader.cache_directory = None\n", | ||
101 | + "I0407 10:49:51.189324 140072765682752 params.py:248] dataset_reader.max_instances = None\n", | ||
102 | + "I0407 10:49:51.189382 140072765682752 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
103 | + "I0407 10:49:51.189436 140072765682752 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
104 | + "I0407 10:49:51.189675 140072765682752 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
105 | + "I0407 10:49:51.189843 140072765682752 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
106 | + "I0407 10:49:51.190060 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
107 | + "I0407 10:49:51.190128 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
108 | + "I0407 10:49:51.190197 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
109 | + "I0407 10:49:51.190324 140072765682752 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
110 | + "I0407 10:49:51.190443 140072765682752 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
111 | + "I0407 10:49:51.190508 140072765682752 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
112 | + "I0407 10:49:51.190564 140072765682752 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
113 | + "I0407 10:49:51.190627 140072765682752 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
114 | + "I0407 10:49:51.190772 140072765682752 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
115 | + "I0407 10:49:51.190932 140072765682752 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
116 | + "I0407 10:49:51.191003 140072765682752 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
117 | + "I0407 10:49:51.191065 140072765682752 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
118 | + "I0407 10:49:51.191206 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
119 | + "I0407 10:49:51.191369 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
120 | + "I0407 10:49:51.191561 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
121 | + "I0407 10:49:51.191629 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
122 | + "I0407 10:49:51.191706 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
123 | + "I0407 10:49:51.191827 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
124 | + "I0407 10:49:51.191938 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
125 | + "I0407 10:49:51.191999 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
126 | + "I0407 10:49:51.192067 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
127 | + "I0407 10:49:51.192142 140072765682752 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
128 | + "I0407 10:49:51.192281 140072765682752 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
129 | + "I0407 10:49:51.192501 140072765682752 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
130 | + "I0407 10:49:51.192575 140072765682752 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
131 | + "I0407 10:49:51.192638 140072765682752 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
132 | + "I0407 10:49:51.192698 140072765682752 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
133 | + "I0407 10:49:51.192795 140072765682752 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
134 | + "I0407 10:49:51.194080 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n" | ||
135 | + ] | ||
136 | + }, | ||
137 | + { | ||
138 | + "name": "stdout", | ||
139 | + "output_type": "stream", | ||
140 | + "text": [ | ||
141 | + "I0407 10:49:51.194318 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
142 | + "I0407 10:49:51.194404 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
143 | + "I0407 10:49:51.194471 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
144 | + "I0407 10:49:51.194532 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
145 | + "I0407 10:49:51.194586 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
146 | + "I0407 10:49:51.194648 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
147 | + "I0407 10:49:51.194708 140072765682752 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
148 | + "I0407 10:49:51.194854 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
149 | + "I0407 10:49:51.195033 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
150 | + "I0407 10:49:51.195105 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
151 | + "I0407 10:49:51.195167 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
152 | + "I0407 10:49:51.195222 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
153 | + "I0407 10:49:51.195280 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
154 | + "I0407 10:49:51.195338 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
155 | + "I0407 10:49:51.195398 140072765682752 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
156 | + "I0407 10:49:51.195601 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
157 | + "I0407 10:49:51.195774 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
158 | + "I0407 10:49:51.195971 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
159 | + "I0407 10:49:51.196039 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
160 | + "I0407 10:49:51.196113 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
161 | + "I0407 10:49:51.196244 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
162 | + "I0407 10:49:51.196364 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
163 | + "I0407 10:49:51.196430 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
164 | + "I0407 10:49:51.196492 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
165 | + "I0407 10:49:51.196552 140072765682752 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
166 | + "I0407 10:49:51.196640 140072765682752 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
167 | + "I0407 10:49:51.196732 140072765682752 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
168 | + "I0407 10:49:51.196815 140072765682752 params.py:248] dataset_reader.use_sem = False\n", | ||
169 | + "I0407 10:49:51.197346 140072765682752 params.py:248] vocabulary.type = from_instances_extended\n", | ||
170 | + "I0407 10:49:51.197421 140072765682752 vocabulary.py:323] Loading token dictionary from /tmp/tmp_htckuhc/vocabulary.\n", | ||
171 | + "I0407 10:49:51.197736 140072765682752 filelock.py:254] Lock 140069359832176 acquired on /tmp/tmp_htckuhc/vocabulary/.lock\n", | ||
172 | + "I0407 10:49:51.198361 140072765682752 filelock.py:317] Lock 140069359832176 released on /tmp/tmp_htckuhc/vocabulary/.lock\n", | ||
173 | + "I0407 10:49:51.198865 140072765682752 params.py:248] model.type = semantic_multitask\n", | ||
174 | + "I0407 10:49:51.199399 140072765682752 params.py:248] model.text_field_embedder.type = basic\n", | ||
175 | + "I0407 10:49:51.199762 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.type = char_embeddings_from_config\n", | ||
176 | + "I0407 10:49:51.199955 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.embedding_dim = 64\n", | ||
177 | + "I0407 10:49:51.200206 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.input_dim = 64\n", | ||
178 | + "I0407 10:49:51.200286 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.filters = [512, 256, 64]\n", | ||
179 | + "I0407 10:49:51.200380 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.kernel_size = [3, 3, 3]\n", | ||
180 | + "I0407 10:49:51.200467 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.stride = [1, 1, 1]\n", | ||
181 | + "I0407 10:49:51.200556 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.padding = [1, 2, 4]\n", | ||
182 | + "I0407 10:49:51.200649 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.dilation = [1, 2, 4]\n", | ||
183 | + "I0407 10:49:51.200745 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.activations = ['relu', 'relu', 'linear']\n", | ||
184 | + "I0407 10:49:51.200886 140072765682752 params.py:248] type = relu\n", | ||
185 | + "I0407 10:49:51.201073 140072765682752 params.py:248] type = relu\n", | ||
186 | + "I0407 10:49:51.201222 140072765682752 params.py:248] type = linear\n", | ||
187 | + "I0407 10:49:51.208180 140072765682752 params.py:248] model.text_field_embedder.token_embedders.char.vocab_namespace = token_characters\n", | ||
188 | + "I0407 10:49:51.208718 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.type = transformers_word_embeddings\n", | ||
189 | + "I0407 10:49:51.208946 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.model_name = allegro/herbert-large-cased\n", | ||
190 | + "I0407 10:49:51.209028 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.projection_dim = 100\n", | ||
191 | + "I0407 10:49:51.209110 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.projection_activation = <function TransformersWordEmbedder.<lambda> at 0x7f646dd85280>\n", | ||
192 | + "I0407 10:49:51.209182 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.projection_dropout_rate = 0.0\n", | ||
193 | + "I0407 10:49:51.209239 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.freeze_transformer = True\n", | ||
194 | + "I0407 10:49:51.209295 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.last_layer_only = True\n", | ||
195 | + "I0407 10:49:51.209401 140072765682752 params.py:384] model.text_field_embedder.token_embedders.token.tokenizer_kwargs.use_fast = False\n", | ||
196 | + "I0407 10:49:51.209471 140072765682752 params.py:248] model.text_field_embedder.token_embedders.token.transformer_kwargs = None\n", | ||
197 | + "I0407 10:49:58.747374 140072765682752 params.py:248] model.seq_encoder.type = combo_encoder\n", | ||
198 | + "I0407 10:49:58.747746 140072765682752 params.py:248] model.seq_encoder.stacked_bilstm.input_size = 164\n", | ||
199 | + "I0407 10:49:58.747819 140072765682752 params.py:248] model.seq_encoder.stacked_bilstm.hidden_size = 512\n", | ||
200 | + "I0407 10:49:58.747869 140072765682752 params.py:248] model.seq_encoder.stacked_bilstm.num_layers = 2\n", | ||
201 | + "I0407 10:49:58.747919 140072765682752 params.py:248] model.seq_encoder.stacked_bilstm.recurrent_dropout_probability = 0.33\n", | ||
202 | + "I0407 10:49:58.747966 140072765682752 params.py:248] model.seq_encoder.stacked_bilstm.layer_dropout_probability = 0.33\n", | ||
203 | + "I0407 10:49:58.748013 140072765682752 params.py:248] model.seq_encoder.stacked_bilstm.use_highway = False\n", | ||
204 | + "I0407 10:49:59.084017 140072765682752 params.py:248] model.seq_encoder.layer_dropout_probability = 0.33\n", | ||
205 | + "I0407 10:49:59.084280 140072765682752 params.py:248] model.use_sample_weight = True\n", | ||
206 | + "I0407 10:49:59.084377 140072765682752 params.py:248] model.lemmatizer = None\n", | ||
207 | + "I0407 10:49:59.084436 140072765682752 params.py:248] model.upos_tagger = None\n", | ||
208 | + "I0407 10:49:59.084487 140072765682752 params.py:248] model.xpos_tagger = None\n", | ||
209 | + "I0407 10:49:59.084537 140072765682752 params.py:248] model.semantic_relation = None\n", | ||
210 | + "I0407 10:49:59.084585 140072765682752 params.py:248] model.morphological_feat = None\n", | ||
211 | + "I0407 10:49:59.084832 140072765682752 params.py:248] model.dependency_relation.type = combo_dependency_parsing_from_vocab\n", | ||
212 | + "I0407 10:49:59.085025 140072765682752 params.py:248] model.dependency_relation.vocab_namespace = deprel_labels\n", | ||
213 | + "I0407 10:49:59.085301 140072765682752 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.in_features = 1024\n", | ||
214 | + "I0407 10:49:59.085365 140072765682752 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.out_features = 512\n", | ||
215 | + "I0407 10:49:59.085421 140072765682752 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.activation = tanh\n", | ||
216 | + "I0407 10:49:59.085520 140072765682752 params.py:248] type = tanh\n", | ||
217 | + "I0407 10:49:59.085608 140072765682752 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.dropout_rate = 0.0\n", | ||
218 | + "I0407 10:49:59.089095 140072765682752 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.in_features = 1024\n", | ||
219 | + "I0407 10:49:59.089183 140072765682752 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.out_features = 512\n", | ||
220 | + "I0407 10:49:59.089244 140072765682752 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.activation = tanh\n", | ||
221 | + "I0407 10:49:59.089346 140072765682752 params.py:248] type = tanh\n", | ||
222 | + "I0407 10:49:59.089423 140072765682752 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.dropout_rate = 0.0\n", | ||
223 | + "I0407 10:49:59.092701 140072765682752 params.py:248] model.dependency_relation.head_predictor.cycle_loss_n = 0\n", | ||
224 | + "I0407 10:49:59.092917 140072765682752 params.py:248] model.dependency_relation.head_projection_layer.in_features = 1024\n", | ||
225 | + "I0407 10:49:59.092972 140072765682752 params.py:248] model.dependency_relation.head_projection_layer.out_features = 128\n", | ||
226 | + "I0407 10:49:59.093022 140072765682752 params.py:248] model.dependency_relation.head_projection_layer.activation = tanh\n", | ||
227 | + "I0407 10:49:59.093108 140072765682752 params.py:248] type = tanh\n", | ||
228 | + "I0407 10:49:59.093183 140072765682752 params.py:248] model.dependency_relation.head_projection_layer.dropout_rate = 0.25\n", | ||
229 | + "I0407 10:49:59.094336 140072765682752 params.py:248] model.dependency_relation.dependency_projection_layer.in_features = 1024\n", | ||
230 | + "I0407 10:49:59.094411 140072765682752 params.py:248] model.dependency_relation.dependency_projection_layer.out_features = 128\n", | ||
231 | + "I0407 10:49:59.094463 140072765682752 params.py:248] model.dependency_relation.dependency_projection_layer.activation = tanh\n", | ||
232 | + "I0407 10:49:59.094551 140072765682752 params.py:248] type = tanh\n", | ||
233 | + "I0407 10:49:59.094618 140072765682752 params.py:248] model.dependency_relation.dependency_projection_layer.dropout_rate = 0.25\n", | ||
234 | + "I0407 10:49:59.095806 140072765682752 params.py:248] model.enhanced_dependency_relation = None\n", | ||
235 | + "I0407 10:49:59.096206 140072765682752 params.py:248] model.regularizer.regexes.0.1.type = l2\n", | ||
236 | + "I0407 10:49:59.096345 140072765682752 params.py:248] model.regularizer.regexes.0.1.alpha = 1e-06\n", | ||
237 | + "I0407 10:49:59.096471 140072765682752 params.py:248] model.regularizer.regexes.1.1.type = l2\n", | ||
238 | + "I0407 10:49:59.096584 140072765682752 params.py:248] model.regularizer.regexes.1.1.alpha = 1e-06\n", | ||
239 | + "I0407 10:49:59.096696 140072765682752 params.py:248] model.regularizer.regexes.2.1.type = l2\n", | ||
240 | + "I0407 10:49:59.096809 140072765682752 params.py:248] model.regularizer.regexes.2.1.alpha = 1e-06\n", | ||
241 | + "I0407 10:49:59.096917 140072765682752 params.py:248] model.regularizer.regexes.3.1.type = l2\n", | ||
242 | + "I0407 10:49:59.097025 140072765682752 params.py:248] model.regularizer.regexes.3.1.alpha = 1e-05\n" | ||
243 | + ] | ||
244 | + }, | ||
245 | + { | ||
246 | + "name": "stdout", | ||
247 | + "output_type": "stream", | ||
248 | + "text": [ | ||
249 | + "I0407 10:50:01.854557 140072765682752 archival.py:211] removing temporary unarchived model dir at /tmp/tmp_htckuhc\n", | ||
250 | + "reading instances: 2211it [01:52, 19.69it/s]\n" | ||
251 | + ] | ||
252 | + } | ||
253 | + ], | ||
254 | + "source": [ | ||
255 | + "! {COMBO} --mode predict \\\n", | ||
256 | + " --cuda_device 0 \\\n", | ||
257 | + " --model_path model-pdbc/model.tar.gz \\\n", | ||
258 | + " --input_file connlu/pdbc-validation.conllu \\\n", | ||
259 | + " --output_file connlu/pdbc-validation-pred.conllu" | ||
260 | + ] | ||
261 | + }, | ||
262 | + { | ||
263 | + "cell_type": "code", | ||
264 | + "execution_count": 3, | ||
265 | + "id": "11f1b7b1", | ||
266 | + "metadata": {}, | ||
267 | + "outputs": [ | ||
268 | + { | ||
269 | + "name": "stdout", | ||
270 | + "output_type": "stream", | ||
271 | + "text": [ | ||
272 | + "# text = Dwie dziewczynki opierają się o dach kapliczki , chłopiec wspina się na niego , a trzecia dziewczynka stoi obok .\r\n", | ||
273 | + "1\tDwie\tdwa\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
274 | + "2\tdziewczynki\tdziewczynka\t_\t_\t_\t1\tcomp\t1:comp\t_\r\n", | ||
275 | + "3\topierają\topierać\t_\t_\t_\t15\tconjunct\t15:conjunct\t_\r\n", | ||
276 | + "4\tsię\tsię\t_\t_\t_\t3\trefl\t3:refl\t_\r\n", | ||
277 | + "5\to\to\t_\t_\t_\t3\tcomp\t3:comp\t_\r\n", | ||
278 | + "6\tdach\tdach\t_\t_\t_\t5\tcomp\t5:comp\t_\r\n", | ||
279 | + "7\tkapliczki\tkapliczka\t_\t_\t_\t6\tadjunct\t6:adjunct\t_\r\n", | ||
280 | + "8\t,\t,\t_\t_\t_\t15\tpunct\t15:punct\t_\r\n", | ||
281 | + "9\tchłopiec\tchłopiec\t_\t_\t_\t10\tsubj\t10:subj\t_\r\n" | ||
282 | + ] | ||
283 | + } | ||
284 | + ], | ||
285 | + "source": [ | ||
286 | + "! head connlu/pdbc-validation.conllu" | ||
287 | + ] | ||
288 | + }, | ||
289 | + { | ||
290 | + "cell_type": "code", | ||
291 | + "execution_count": 4, | ||
292 | + "id": "8fa72124", | ||
293 | + "metadata": {}, | ||
294 | + "outputs": [ | ||
295 | + { | ||
296 | + "name": "stdout", | ||
297 | + "output_type": "stream", | ||
298 | + "text": [ | ||
299 | + "# text = Dwie dziewczynki opierają się o dach kapliczki , chłopiec wspina się na niego , a trzecia dziewczynka stoi obok .\r\n", | ||
300 | + "1\tDwie\tdwa\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
301 | + "2\tdziewczynki\tdziewczynka\t_\t_\t_\t1\tcomp\t1:comp\t_\r\n", | ||
302 | + "3\topierają\topierać\t_\t_\t_\t15\tconjunct\t15:conjunct\t_\r\n", | ||
303 | + "4\tsię\tsię\t_\t_\t_\t3\trefl\t3:refl\t_\r\n", | ||
304 | + "5\to\to\t_\t_\t_\t3\tcomp\t3:comp\t_\r\n", | ||
305 | + "6\tdach\tdach\t_\t_\t_\t5\tcomp\t5:comp\t_\r\n", | ||
306 | + "7\tkapliczki\tkapliczka\t_\t_\t_\t6\tadjunct\t6:adjunct\t_\r\n", | ||
307 | + "8\t,\t,\t_\t_\t_\t15\tpunct\t15:punct\t_\r\n", | ||
308 | + "9\tchłopiec\tchłopiec\t_\t_\t_\t10\tsubj\t10:subj\t_\r\n" | ||
309 | + ] | ||
310 | + } | ||
311 | + ], | ||
312 | + "source": [ | ||
313 | + "! head connlu/pdbc-validation-pred.conllu" | ||
314 | + ] | ||
315 | + }, | ||
316 | + { | ||
317 | + "cell_type": "code", | ||
318 | + "execution_count": 5, | ||
319 | + "id": "dde6dd31", | ||
320 | + "metadata": {}, | ||
321 | + "outputs": [ | ||
322 | + { | ||
323 | + "name": "stdout", | ||
324 | + "output_type": "stream", | ||
325 | + "text": [ | ||
326 | + "I0407 10:52:00.220404 139754138821696 archival.py:184] loading archive file model-pdbc/model.tar.gz\n", | ||
327 | + "I0407 10:52:00.221079 139754138821696 archival.py:263] extracting archive file model-pdbc/model.tar.gz to temp dir /tmp/tmp2jhqu3i6\n", | ||
328 | + "I0407 10:52:16.996590 139754138821696 params.py:248] dataset_reader.type = conllu\n", | ||
329 | + "I0407 10:52:16.997079 139754138821696 params.py:248] dataset_reader.lazy = False\n", | ||
330 | + "I0407 10:52:16.997236 139754138821696 params.py:248] dataset_reader.cache_directory = None\n", | ||
331 | + "I0407 10:52:16.997326 139754138821696 params.py:248] dataset_reader.max_instances = None\n", | ||
332 | + "I0407 10:52:16.997391 139754138821696 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
333 | + "I0407 10:52:16.997456 139754138821696 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
334 | + "I0407 10:52:16.997756 139754138821696 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
335 | + "I0407 10:52:16.997950 139754138821696 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
336 | + "I0407 10:52:16.998211 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
337 | + "I0407 10:52:16.998285 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
338 | + "I0407 10:52:16.998367 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
339 | + "I0407 10:52:16.998522 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
340 | + "I0407 10:52:16.998643 139754138821696 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
341 | + "I0407 10:52:16.998707 139754138821696 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
342 | + "I0407 10:52:16.998770 139754138821696 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
343 | + "I0407 10:52:16.998831 139754138821696 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
344 | + "I0407 10:52:16.998980 139754138821696 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
345 | + "I0407 10:52:16.999143 139754138821696 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
346 | + "I0407 10:52:16.999213 139754138821696 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
347 | + "I0407 10:52:16.999269 139754138821696 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
348 | + "I0407 10:52:16.999412 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
349 | + "I0407 10:52:16.999578 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
350 | + "I0407 10:52:16.999774 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
351 | + "I0407 10:52:16.999842 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
352 | + "I0407 10:52:16.999923 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
353 | + "I0407 10:52:17.000045 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
354 | + "I0407 10:52:17.000156 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
355 | + "I0407 10:52:17.000220 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
356 | + "I0407 10:52:17.000282 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
357 | + "I0407 10:52:17.000344 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
358 | + "I0407 10:52:17.000521 139754138821696 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
359 | + "I0407 10:52:17.000770 139754138821696 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
360 | + "I0407 10:52:17.000865 139754138821696 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
361 | + "I0407 10:52:17.000947 139754138821696 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
362 | + "I0407 10:52:17.001028 139754138821696 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
363 | + "I0407 10:52:17.001172 139754138821696 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
364 | + "I0407 10:52:20.459573 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
365 | + "I0407 10:52:20.459947 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
366 | + "I0407 10:52:20.460046 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
367 | + "I0407 10:52:20.460119 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
368 | + "I0407 10:52:20.460172 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
369 | + "I0407 10:52:20.460235 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
370 | + "I0407 10:52:20.460288 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
371 | + "I0407 10:52:20.460351 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
372 | + "I0407 10:52:20.460508 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
373 | + "I0407 10:52:20.460695 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
374 | + "I0407 10:52:20.460773 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
375 | + "I0407 10:52:20.460840 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
376 | + "I0407 10:52:20.460901 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
377 | + "I0407 10:52:20.460962 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
378 | + "I0407 10:52:20.461021 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
379 | + "I0407 10:52:20.461083 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
380 | + "I0407 10:52:20.461313 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
381 | + "I0407 10:52:20.461496 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
382 | + "I0407 10:52:20.461706 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
383 | + "I0407 10:52:20.461774 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
384 | + "I0407 10:52:20.461853 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
385 | + "I0407 10:52:20.462028 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
386 | + "I0407 10:52:20.462157 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
387 | + "I0407 10:52:20.462226 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
388 | + "I0407 10:52:20.462283 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
389 | + "I0407 10:52:20.462336 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
390 | + "I0407 10:52:20.462417 139754138821696 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
391 | + "I0407 10:52:20.462514 139754138821696 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
392 | + "I0407 10:52:20.462607 139754138821696 params.py:248] dataset_reader.use_sem = False\n", | ||
393 | + "I0407 10:52:20.462767 139754138821696 params.py:248] dataset_reader.type = conllu\n", | ||
394 | + "I0407 10:52:20.463083 139754138821696 params.py:248] dataset_reader.lazy = False\n", | ||
395 | + "I0407 10:52:20.463172 139754138821696 params.py:248] dataset_reader.cache_directory = None\n", | ||
396 | + "I0407 10:52:20.463237 139754138821696 params.py:248] dataset_reader.max_instances = None\n", | ||
397 | + "I0407 10:52:20.463301 139754138821696 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
398 | + "I0407 10:52:20.463361 139754138821696 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
399 | + "I0407 10:52:20.463605 139754138821696 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
400 | + "I0407 10:52:20.463779 139754138821696 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
401 | + "I0407 10:52:20.463980 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
402 | + "I0407 10:52:20.464051 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
403 | + "I0407 10:52:20.464129 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
404 | + "I0407 10:52:20.464254 139754138821696 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
405 | + "I0407 10:52:20.464366 139754138821696 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
406 | + "I0407 10:52:20.464429 139754138821696 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
407 | + "I0407 10:52:20.464490 139754138821696 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
408 | + "I0407 10:52:20.464552 139754138821696 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
409 | + "I0407 10:52:20.464691 139754138821696 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
410 | + "I0407 10:52:20.464847 139754138821696 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
411 | + "I0407 10:52:20.464918 139754138821696 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
412 | + "I0407 10:52:20.464980 139754138821696 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
413 | + "I0407 10:52:20.465120 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
414 | + "I0407 10:52:20.465285 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
415 | + "I0407 10:52:20.465479 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
416 | + "I0407 10:52:20.465544 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
417 | + "I0407 10:52:20.465618 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
418 | + "I0407 10:52:20.465741 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
419 | + "I0407 10:52:20.465851 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
420 | + "I0407 10:52:20.465914 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
421 | + "I0407 10:52:20.466024 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
422 | + "I0407 10:52:20.466112 139754138821696 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
423 | + "I0407 10:52:20.466268 139754138821696 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
424 | + "I0407 10:52:20.466485 139754138821696 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
425 | + "I0407 10:52:20.466559 139754138821696 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
426 | + "I0407 10:52:20.466621 139754138821696 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
427 | + "I0407 10:52:20.466682 139754138821696 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
428 | + "I0407 10:52:20.466777 139754138821696 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
429 | + "I0407 10:52:20.468071 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
430 | + "I0407 10:52:20.468319 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
431 | + "I0407 10:52:20.468404 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
432 | + "I0407 10:52:20.468464 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
433 | + "I0407 10:52:20.468523 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
434 | + "I0407 10:52:20.468573 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
435 | + "I0407 10:52:20.468636 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
436 | + "I0407 10:52:20.468697 139754138821696 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
437 | + "I0407 10:52:20.468832 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
438 | + "I0407 10:52:20.469012 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
439 | + "I0407 10:52:20.469086 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
440 | + "I0407 10:52:20.469144 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
441 | + "I0407 10:52:20.469196 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
442 | + "I0407 10:52:20.469256 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
443 | + "I0407 10:52:20.469320 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
444 | + "I0407 10:52:20.469382 139754138821696 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
445 | + "I0407 10:52:20.469586 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
446 | + "I0407 10:52:20.469758 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
447 | + "I0407 10:52:20.469957 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
448 | + "I0407 10:52:20.470050 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
449 | + "I0407 10:52:20.470128 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
450 | + "I0407 10:52:20.470261 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
451 | + "I0407 10:52:20.470381 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
452 | + "I0407 10:52:20.470448 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
453 | + "I0407 10:52:20.470509 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
454 | + "I0407 10:52:20.470579 139754138821696 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
455 | + "I0407 10:52:20.470668 139754138821696 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
456 | + "I0407 10:52:20.470764 139754138821696 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
457 | + "I0407 10:52:20.470849 139754138821696 params.py:248] dataset_reader.use_sem = False\n", | ||
458 | + "I0407 10:52:20.471387 139754138821696 params.py:248] vocabulary.type = from_instances_extended\n", | ||
459 | + "I0407 10:52:20.471461 139754138821696 vocabulary.py:323] Loading token dictionary from /tmp/tmp2jhqu3i6/vocabulary.\n", | ||
460 | + "I0407 10:52:20.471798 139754138821696 filelock.py:254] Lock 139750732975216 acquired on /tmp/tmp2jhqu3i6/vocabulary/.lock\n", | ||
461 | + "I0407 10:52:20.472387 139754138821696 filelock.py:317] Lock 139750732975216 released on /tmp/tmp2jhqu3i6/vocabulary/.lock\n", | ||
462 | + "I0407 10:52:20.472922 139754138821696 params.py:248] model.type = semantic_multitask\n", | ||
463 | + "I0407 10:52:20.473455 139754138821696 params.py:248] model.text_field_embedder.type = basic\n", | ||
464 | + "I0407 10:52:20.473808 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.type = char_embeddings_from_config\n", | ||
465 | + "I0407 10:52:20.474030 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.embedding_dim = 64\n", | ||
466 | + "I0407 10:52:20.474286 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.input_dim = 64\n", | ||
467 | + "I0407 10:52:20.474377 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.filters = [512, 256, 64]\n", | ||
468 | + "I0407 10:52:20.474480 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.kernel_size = [3, 3, 3]\n", | ||
469 | + "I0407 10:52:20.474578 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.stride = [1, 1, 1]\n", | ||
470 | + "I0407 10:52:20.474673 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.padding = [1, 2, 4]\n", | ||
471 | + "I0407 10:52:20.474768 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.dilation = [1, 2, 4]\n", | ||
472 | + "I0407 10:52:20.474864 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.activations = ['relu', 'relu', 'linear']\n", | ||
473 | + "I0407 10:52:20.475005 139754138821696 params.py:248] type = relu\n", | ||
474 | + "I0407 10:52:20.475197 139754138821696 params.py:248] type = relu\n", | ||
475 | + "I0407 10:52:20.475347 139754138821696 params.py:248] type = linear\n", | ||
476 | + "I0407 10:52:20.481609 139754138821696 params.py:248] model.text_field_embedder.token_embedders.char.vocab_namespace = token_characters\n", | ||
477 | + "I0407 10:52:20.482178 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.type = transformers_word_embeddings\n", | ||
478 | + "I0407 10:52:20.482446 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.model_name = allegro/herbert-large-cased\n", | ||
479 | + "I0407 10:52:20.482533 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.projection_dim = 100\n", | ||
480 | + "I0407 10:52:20.482632 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.projection_activation = <function TransformersWordEmbedder.<lambda> at 0x7f1a3e346280>\n", | ||
481 | + "I0407 10:52:20.482703 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.projection_dropout_rate = 0.0\n", | ||
482 | + "I0407 10:52:20.482769 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.freeze_transformer = True\n", | ||
483 | + "I0407 10:52:20.482831 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.last_layer_only = True\n", | ||
484 | + "I0407 10:52:20.482933 139754138821696 params.py:384] model.text_field_embedder.token_embedders.token.tokenizer_kwargs.use_fast = False\n", | ||
485 | + "I0407 10:52:20.483003 139754138821696 params.py:248] model.text_field_embedder.token_embedders.token.transformer_kwargs = None\n" | ||
486 | + ] | ||
487 | + }, | ||
488 | + { | ||
489 | + "name": "stdout", | ||
490 | + "output_type": "stream", | ||
491 | + "text": [ | ||
492 | + "I0407 10:52:28.699278 139754138821696 params.py:248] model.seq_encoder.type = combo_encoder\n", | ||
493 | + "I0407 10:52:28.699747 139754138821696 params.py:248] model.seq_encoder.stacked_bilstm.input_size = 164\n", | ||
494 | + "I0407 10:52:28.699841 139754138821696 params.py:248] model.seq_encoder.stacked_bilstm.hidden_size = 512\n", | ||
495 | + "I0407 10:52:28.699910 139754138821696 params.py:248] model.seq_encoder.stacked_bilstm.num_layers = 2\n", | ||
496 | + "I0407 10:52:28.699976 139754138821696 params.py:248] model.seq_encoder.stacked_bilstm.recurrent_dropout_probability = 0.33\n", | ||
497 | + "I0407 10:52:28.700042 139754138821696 params.py:248] model.seq_encoder.stacked_bilstm.layer_dropout_probability = 0.33\n", | ||
498 | + "I0407 10:52:28.700106 139754138821696 params.py:248] model.seq_encoder.stacked_bilstm.use_highway = False\n", | ||
499 | + "I0407 10:52:29.089101 139754138821696 params.py:248] model.seq_encoder.layer_dropout_probability = 0.33\n", | ||
500 | + "I0407 10:52:29.089426 139754138821696 params.py:248] model.use_sample_weight = True\n", | ||
501 | + "I0407 10:52:29.089556 139754138821696 params.py:248] model.lemmatizer = None\n", | ||
502 | + "I0407 10:52:29.089638 139754138821696 params.py:248] model.upos_tagger = None\n", | ||
503 | + "I0407 10:52:29.089704 139754138821696 params.py:248] model.xpos_tagger = None\n", | ||
504 | + "I0407 10:52:29.089766 139754138821696 params.py:248] model.semantic_relation = None\n", | ||
505 | + "I0407 10:52:29.089827 139754138821696 params.py:248] model.morphological_feat = None\n", | ||
506 | + "I0407 10:52:29.090160 139754138821696 params.py:248] model.dependency_relation.type = combo_dependency_parsing_from_vocab\n", | ||
507 | + "I0407 10:52:29.090409 139754138821696 params.py:248] model.dependency_relation.vocab_namespace = deprel_labels\n", | ||
508 | + "I0407 10:52:29.090762 139754138821696 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.in_features = 1024\n", | ||
509 | + "I0407 10:52:29.090843 139754138821696 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.out_features = 512\n", | ||
510 | + "I0407 10:52:29.090915 139754138821696 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.activation = tanh\n", | ||
511 | + "I0407 10:52:29.091041 139754138821696 params.py:248] type = tanh\n", | ||
512 | + "I0407 10:52:29.091149 139754138821696 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.dropout_rate = 0.0\n", | ||
513 | + "I0407 10:52:29.096003 139754138821696 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.in_features = 1024\n", | ||
514 | + "I0407 10:52:29.096106 139754138821696 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.out_features = 512\n", | ||
515 | + "I0407 10:52:29.096185 139754138821696 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.activation = tanh\n", | ||
516 | + "I0407 10:52:29.096311 139754138821696 params.py:248] type = tanh\n", | ||
517 | + "I0407 10:52:29.096407 139754138821696 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.dropout_rate = 0.0\n", | ||
518 | + "I0407 10:52:29.101276 139754138821696 params.py:248] model.dependency_relation.head_predictor.cycle_loss_n = 0\n", | ||
519 | + "I0407 10:52:29.101581 139754138821696 params.py:248] model.dependency_relation.head_projection_layer.in_features = 1024\n", | ||
520 | + "I0407 10:52:29.101692 139754138821696 params.py:248] model.dependency_relation.head_projection_layer.out_features = 128\n", | ||
521 | + "I0407 10:52:29.101771 139754138821696 params.py:248] model.dependency_relation.head_projection_layer.activation = tanh\n", | ||
522 | + "I0407 10:52:29.101904 139754138821696 params.py:248] type = tanh\n", | ||
523 | + "I0407 10:52:29.102032 139754138821696 params.py:248] model.dependency_relation.head_projection_layer.dropout_rate = 0.25\n", | ||
524 | + "I0407 10:52:29.103649 139754138821696 params.py:248] model.dependency_relation.dependency_projection_layer.in_features = 1024\n", | ||
525 | + "I0407 10:52:29.103747 139754138821696 params.py:248] model.dependency_relation.dependency_projection_layer.out_features = 128\n", | ||
526 | + "I0407 10:52:29.103819 139754138821696 params.py:248] model.dependency_relation.dependency_projection_layer.activation = tanh\n", | ||
527 | + "I0407 10:52:29.103948 139754138821696 params.py:248] type = tanh\n", | ||
528 | + "I0407 10:52:29.104044 139754138821696 params.py:248] model.dependency_relation.dependency_projection_layer.dropout_rate = 0.25\n", | ||
529 | + "I0407 10:52:29.105780 139754138821696 params.py:248] model.enhanced_dependency_relation = None\n", | ||
530 | + "I0407 10:52:29.106371 139754138821696 params.py:248] model.regularizer.regexes.0.1.type = l2\n", | ||
531 | + "I0407 10:52:29.106555 139754138821696 params.py:248] model.regularizer.regexes.0.1.alpha = 1e-06\n", | ||
532 | + "I0407 10:52:29.106724 139754138821696 params.py:248] model.regularizer.regexes.1.1.type = l2\n", | ||
533 | + "I0407 10:52:29.106879 139754138821696 params.py:248] model.regularizer.regexes.1.1.alpha = 1e-06\n", | ||
534 | + "I0407 10:52:29.107035 139754138821696 params.py:248] model.regularizer.regexes.2.1.type = l2\n", | ||
535 | + "I0407 10:52:29.107207 139754138821696 params.py:248] model.regularizer.regexes.2.1.alpha = 1e-06\n", | ||
536 | + "I0407 10:52:29.107368 139754138821696 params.py:248] model.regularizer.regexes.3.1.type = l2\n", | ||
537 | + "I0407 10:52:29.107544 139754138821696 params.py:248] model.regularizer.regexes.3.1.alpha = 1e-05\n", | ||
538 | + "I0407 10:52:32.063793 139754138821696 archival.py:211] removing temporary unarchived model dir at /tmp/tmp2jhqu3i6\n", | ||
539 | + "reading instances: 2205it [01:49, 20.15it/s]\n" | ||
540 | + ] | ||
541 | + } | ||
542 | + ], | ||
543 | + "source": [ | ||
544 | + "! {COMBO} --mode predict \\\n", | ||
545 | + " --cuda_device 0 \\\n", | ||
546 | + " --model_path model-pdbc/model.tar.gz \\\n", | ||
547 | + " --input_file connlu/pdbc-test.conllu \\\n", | ||
548 | + " --output_file connlu/pdbc-test-pred.conllu" | ||
549 | + ] | ||
550 | + }, | ||
551 | + { | ||
552 | + "cell_type": "code", | ||
553 | + "execution_count": 6, | ||
554 | + "id": "13748ca1", | ||
555 | + "metadata": {}, | ||
556 | + "outputs": [ | ||
557 | + { | ||
558 | + "name": "stdout", | ||
559 | + "output_type": "stream", | ||
560 | + "text": [ | ||
561 | + "# text = Mały chłopiec patrzy w bok po ściągnięciu okularów .\r\n", | ||
562 | + "1\tMały\tmały\t_\t_\t_\t2\tadjunct\t2:adjunct\t_\r\n", | ||
563 | + "2\tchłopiec\tchłopiec\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
564 | + "3\tpatrzy\tpatrzeć\t_\t_\t_\t0\troot\t0:root\t_\r\n", | ||
565 | + "4\tw\tw\t_\t_\t_\t3\tadjunct_adl\t3:adjunct_adl\t_\r\n", | ||
566 | + "5\tbok\tbok\t_\t_\t_\t4\tcomp\t4:comp\t_\r\n", | ||
567 | + "6\tpo\tpo\t_\t_\t_\t3\tadjunct_temp\t3:adjunct_temp\t_\r\n", | ||
568 | + "7\tściągnięciu\tściągnąć\t_\t_\t_\t6\tcomp\t6:comp\t_\r\n", | ||
569 | + "8\tokularów\tokulary\t_\t_\t_\t7\tobj\t7:obj\t_\r\n", | ||
570 | + "9\t.\t.\t_\t_\t_\t3\tpunct\t3:punct\t_\r\n" | ||
571 | + ] | ||
572 | + } | ||
573 | + ], | ||
574 | + "source": [ | ||
575 | + "! head connlu/pdbc-test.conllu" | ||
576 | + ] | ||
577 | + }, | ||
578 | + { | ||
579 | + "cell_type": "code", | ||
580 | + "execution_count": 7, | ||
581 | + "id": "30021124", | ||
582 | + "metadata": {}, | ||
583 | + "outputs": [ | ||
584 | + { | ||
585 | + "name": "stdout", | ||
586 | + "output_type": "stream", | ||
587 | + "text": [ | ||
588 | + "# text = Mały chłopiec patrzy w bok po ściągnięciu okularów .\r\n", | ||
589 | + "1\tMały\tmały\t_\t_\t_\t2\tadjunct\t2:adjunct\t_\r\n", | ||
590 | + "2\tchłopiec\tchłopiec\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
591 | + "3\tpatrzy\tpatrzeć\t_\t_\t_\t0\troot\t0:root\t_\r\n", | ||
592 | + "4\tw\tw\t_\t_\t_\t3\tcomp\t3:adjunct_adl\t_\r\n", | ||
593 | + "5\tbok\tbok\t_\t_\t_\t4\tcomp\t4:comp\t_\r\n", | ||
594 | + "6\tpo\tpo\t_\t_\t_\t3\tadjunct_temp\t3:adjunct_temp\t_\r\n", | ||
595 | + "7\tściągnięciu\tściągnąć\t_\t_\t_\t6\tcomp\t6:comp\t_\r\n", | ||
596 | + "8\tokularów\tokulary\t_\t_\t_\t7\tobj\t7:obj\t_\r\n", | ||
597 | + "9\t.\t.\t_\t_\t_\t3\tpunct\t3:punct\t_\r\n" | ||
598 | + ] | ||
599 | + } | ||
600 | + ], | ||
601 | + "source": [ | ||
602 | + "! head connlu/pdbc-test-pred.conllu" | ||
603 | + ] | ||
604 | + }, | ||
605 | + { | ||
606 | + "cell_type": "markdown", | ||
607 | + "id": "99359d8c", | ||
608 | + "metadata": {}, | ||
609 | + "source": [ | ||
610 | + "Continuous-only model" | ||
611 | + ] | ||
612 | + }, | ||
613 | + { | ||
614 | + "cell_type": "code", | ||
615 | + "execution_count": 8, | ||
616 | + "id": "30a66da6", | ||
617 | + "metadata": {}, | ||
618 | + "outputs": [ | ||
619 | + { | ||
620 | + "name": "stdout", | ||
621 | + "output_type": "stream", | ||
622 | + "text": [ | ||
623 | + "I0407 10:54:27.401382 140321380496448 archival.py:184] loading archive file model-pdbc-cont/model.tar.gz\n", | ||
624 | + "I0407 10:54:27.402150 140321380496448 archival.py:263] extracting archive file model-pdbc-cont/model.tar.gz to temp dir /tmp/tmpuvesoi4q\n", | ||
625 | + "I0407 10:54:43.091615 140321380496448 params.py:248] dataset_reader.type = conllu\n", | ||
626 | + "I0407 10:54:43.092000 140321380496448 params.py:248] dataset_reader.lazy = False\n", | ||
627 | + "I0407 10:54:43.092082 140321380496448 params.py:248] dataset_reader.cache_directory = None\n", | ||
628 | + "I0407 10:54:43.092129 140321380496448 params.py:248] dataset_reader.max_instances = None\n", | ||
629 | + "I0407 10:54:43.092173 140321380496448 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
630 | + "I0407 10:54:43.092208 140321380496448 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
631 | + "I0407 10:54:43.092409 140321380496448 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
632 | + "I0407 10:54:43.092535 140321380496448 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
633 | + "I0407 10:54:43.092682 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
634 | + "I0407 10:54:43.092730 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
635 | + "I0407 10:54:43.092786 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
636 | + "I0407 10:54:43.092888 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
637 | + "I0407 10:54:43.092970 140321380496448 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
638 | + "I0407 10:54:43.093014 140321380496448 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
639 | + "I0407 10:54:43.093051 140321380496448 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
640 | + "I0407 10:54:43.093093 140321380496448 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
641 | + "I0407 10:54:43.093198 140321380496448 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
642 | + "I0407 10:54:43.093306 140321380496448 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
643 | + "I0407 10:54:43.093353 140321380496448 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
644 | + "I0407 10:54:43.093388 140321380496448 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
645 | + "I0407 10:54:43.093482 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
646 | + "I0407 10:54:43.093593 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
647 | + "I0407 10:54:43.093723 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
648 | + "I0407 10:54:43.093769 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
649 | + "I0407 10:54:43.093816 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
650 | + "I0407 10:54:43.093899 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
651 | + "I0407 10:54:43.093993 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
652 | + "I0407 10:54:43.094043 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
653 | + "I0407 10:54:43.094079 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
654 | + "I0407 10:54:43.094121 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
655 | + "I0407 10:54:43.094226 140321380496448 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
656 | + "I0407 10:54:43.094377 140321380496448 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
657 | + "I0407 10:54:43.094430 140321380496448 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
658 | + "I0407 10:54:43.094474 140321380496448 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
659 | + "I0407 10:54:43.094522 140321380496448 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
660 | + "I0407 10:54:43.094592 140321380496448 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
661 | + "I0407 10:54:45.858621 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
662 | + "I0407 10:54:45.858990 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
663 | + "I0407 10:54:45.859087 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
664 | + "I0407 10:54:45.859157 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
665 | + "I0407 10:54:45.859210 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
666 | + "I0407 10:54:45.859268 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
667 | + "I0407 10:54:45.859321 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
668 | + "I0407 10:54:45.859382 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
669 | + "I0407 10:54:45.859541 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
670 | + "I0407 10:54:45.859729 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
671 | + "I0407 10:54:45.859802 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
672 | + "I0407 10:54:45.859875 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
673 | + "I0407 10:54:45.859931 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
674 | + "I0407 10:54:45.859991 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
675 | + "I0407 10:54:45.860045 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
676 | + "I0407 10:54:45.860103 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
677 | + "I0407 10:54:45.860332 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
678 | + "I0407 10:54:45.860523 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
679 | + "I0407 10:54:45.860739 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
680 | + "I0407 10:54:45.860809 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
681 | + "I0407 10:54:45.860888 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
682 | + "I0407 10:54:45.861032 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
683 | + "I0407 10:54:45.861149 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
684 | + "I0407 10:54:45.861213 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
685 | + "I0407 10:54:45.861277 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
686 | + "I0407 10:54:45.861337 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
687 | + "I0407 10:54:45.861427 140321380496448 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
688 | + "I0407 10:54:45.861522 140321380496448 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
689 | + "I0407 10:54:45.861611 140321380496448 params.py:248] dataset_reader.use_sem = False\n", | ||
690 | + "I0407 10:54:45.861762 140321380496448 params.py:248] dataset_reader.type = conllu\n", | ||
691 | + "I0407 10:54:45.862029 140321380496448 params.py:248] dataset_reader.lazy = False\n", | ||
692 | + "I0407 10:54:45.862116 140321380496448 params.py:248] dataset_reader.cache_directory = None\n", | ||
693 | + "I0407 10:54:45.862177 140321380496448 params.py:248] dataset_reader.max_instances = None\n", | ||
694 | + "I0407 10:54:45.862234 140321380496448 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
695 | + "I0407 10:54:45.862295 140321380496448 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
696 | + "I0407 10:54:45.862535 140321380496448 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
697 | + "I0407 10:54:45.862701 140321380496448 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
698 | + "I0407 10:54:45.862900 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
699 | + "I0407 10:54:45.862966 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
700 | + "I0407 10:54:45.863043 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
701 | + "I0407 10:54:45.863168 140321380496448 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
702 | + "I0407 10:54:45.863281 140321380496448 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
703 | + "I0407 10:54:45.863344 140321380496448 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
704 | + "I0407 10:54:45.863406 140321380496448 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
705 | + "I0407 10:54:45.863469 140321380496448 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
706 | + "I0407 10:54:45.863596 140321380496448 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
707 | + "I0407 10:54:45.863752 140321380496448 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
708 | + "I0407 10:54:45.863821 140321380496448 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
709 | + "I0407 10:54:45.863883 140321380496448 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
710 | + "I0407 10:54:45.864030 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
711 | + "I0407 10:54:45.864196 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
712 | + "I0407 10:54:45.864392 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
713 | + "I0407 10:54:45.864460 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
714 | + "I0407 10:54:45.864540 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
715 | + "I0407 10:54:45.864660 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
716 | + "I0407 10:54:45.864772 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
717 | + "I0407 10:54:45.864835 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
718 | + "I0407 10:54:45.864896 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
719 | + "I0407 10:54:45.864965 140321380496448 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
720 | + "I0407 10:54:45.865104 140321380496448 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
721 | + "I0407 10:54:45.865323 140321380496448 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
722 | + "I0407 10:54:45.865396 140321380496448 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
723 | + "I0407 10:54:45.865460 140321380496448 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
724 | + "I0407 10:54:45.865518 140321380496448 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
725 | + "I0407 10:54:45.865614 140321380496448 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
726 | + "I0407 10:54:45.866884 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
727 | + "I0407 10:54:45.867116 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
728 | + "I0407 10:54:45.867190 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
729 | + "I0407 10:54:45.867258 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
730 | + "I0407 10:54:45.867316 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
731 | + "I0407 10:54:45.867376 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
732 | + "I0407 10:54:45.867437 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
733 | + "I0407 10:54:45.867497 140321380496448 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
734 | + "I0407 10:54:45.867640 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
735 | + "I0407 10:54:45.867815 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
736 | + "I0407 10:54:45.867887 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
737 | + "I0407 10:54:45.867951 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
738 | + "I0407 10:54:45.868006 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
739 | + "I0407 10:54:45.868063 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
740 | + "I0407 10:54:45.868122 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
741 | + "I0407 10:54:45.868181 140321380496448 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
742 | + "I0407 10:54:45.868388 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
743 | + "I0407 10:54:45.868559 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
744 | + "I0407 10:54:45.868757 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
745 | + "I0407 10:54:45.868824 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
746 | + "I0407 10:54:45.868897 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
747 | + "I0407 10:54:45.869028 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
748 | + "I0407 10:54:45.869139 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
749 | + "I0407 10:54:45.869202 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
750 | + "I0407 10:54:45.869256 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
751 | + "I0407 10:54:45.869315 140321380496448 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
752 | + "I0407 10:54:45.869398 140321380496448 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
753 | + "I0407 10:54:45.869489 140321380496448 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
754 | + "I0407 10:54:45.869572 140321380496448 params.py:248] dataset_reader.use_sem = False\n", | ||
755 | + "I0407 10:54:45.870136 140321380496448 params.py:248] vocabulary.type = from_instances_extended\n", | ||
756 | + "I0407 10:54:45.870218 140321380496448 vocabulary.py:323] Loading token dictionary from /tmp/tmpuvesoi4q/vocabulary.\n", | ||
757 | + "I0407 10:54:45.870543 140321380496448 filelock.py:254] Lock 140317974842768 acquired on /tmp/tmpuvesoi4q/vocabulary/.lock\n", | ||
758 | + "I0407 10:54:45.871132 140321380496448 filelock.py:317] Lock 140317974842768 released on /tmp/tmpuvesoi4q/vocabulary/.lock\n", | ||
759 | + "I0407 10:54:45.871641 140321380496448 params.py:248] model.type = semantic_multitask\n", | ||
760 | + "I0407 10:54:45.872183 140321380496448 params.py:248] model.text_field_embedder.type = basic\n", | ||
761 | + "I0407 10:54:45.872548 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.type = char_embeddings_from_config\n", | ||
762 | + "I0407 10:54:45.872749 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.embedding_dim = 64\n", | ||
763 | + "I0407 10:54:45.873004 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.input_dim = 64\n", | ||
764 | + "I0407 10:54:45.873091 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.filters = [512, 256, 64]\n", | ||
765 | + "I0407 10:54:45.873195 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.kernel_size = [3, 3, 3]\n", | ||
766 | + "I0407 10:54:45.873291 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.stride = [1, 1, 1]\n", | ||
767 | + "I0407 10:54:45.873384 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.padding = [1, 2, 4]\n", | ||
768 | + "I0407 10:54:45.873478 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.dilation = [1, 2, 4]\n", | ||
769 | + "I0407 10:54:45.873572 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.activations = ['relu', 'relu', 'linear']\n", | ||
770 | + "I0407 10:54:45.873714 140321380496448 params.py:248] type = relu\n", | ||
771 | + "I0407 10:54:45.873904 140321380496448 params.py:248] type = relu\n", | ||
772 | + "I0407 10:54:45.874098 140321380496448 params.py:248] type = linear\n", | ||
773 | + "I0407 10:54:45.880232 140321380496448 params.py:248] model.text_field_embedder.token_embedders.char.vocab_namespace = token_characters\n", | ||
774 | + "I0407 10:54:45.880783 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.type = transformers_word_embeddings\n", | ||
775 | + "I0407 10:54:45.881011 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.model_name = allegro/herbert-large-cased\n", | ||
776 | + "I0407 10:54:45.881093 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.projection_dim = 100\n", | ||
777 | + "I0407 10:54:45.881184 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.projection_activation = <function TransformersWordEmbedder.<lambda> at 0x7f9e50745280>\n", | ||
778 | + "I0407 10:54:45.881261 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.projection_dropout_rate = 0.0\n", | ||
779 | + "I0407 10:54:45.881328 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.freeze_transformer = True\n", | ||
780 | + "I0407 10:54:45.881389 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.last_layer_only = True\n", | ||
781 | + "I0407 10:54:45.881492 140321380496448 params.py:384] model.text_field_embedder.token_embedders.token.tokenizer_kwargs.use_fast = False\n", | ||
782 | + "I0407 10:54:45.881562 140321380496448 params.py:248] model.text_field_embedder.token_embedders.token.transformer_kwargs = None\n" | ||
783 | + ] | ||
784 | + }, | ||
785 | + { | ||
786 | + "name": "stdout", | ||
787 | + "output_type": "stream", | ||
788 | + "text": [ | ||
789 | + "I0407 10:54:52.911276 140321380496448 params.py:248] model.seq_encoder.type = combo_encoder\n", | ||
790 | + "I0407 10:54:52.911743 140321380496448 params.py:248] model.seq_encoder.stacked_bilstm.input_size = 164\n", | ||
791 | + "I0407 10:54:52.911836 140321380496448 params.py:248] model.seq_encoder.stacked_bilstm.hidden_size = 512\n", | ||
792 | + "I0407 10:54:52.911902 140321380496448 params.py:248] model.seq_encoder.stacked_bilstm.num_layers = 2\n", | ||
793 | + "I0407 10:54:52.911965 140321380496448 params.py:248] model.seq_encoder.stacked_bilstm.recurrent_dropout_probability = 0.33\n", | ||
794 | + "I0407 10:54:52.912029 140321380496448 params.py:248] model.seq_encoder.stacked_bilstm.layer_dropout_probability = 0.33\n", | ||
795 | + "I0407 10:54:52.912090 140321380496448 params.py:248] model.seq_encoder.stacked_bilstm.use_highway = False\n", | ||
796 | + "I0407 10:54:53.279199 140321380496448 params.py:248] model.seq_encoder.layer_dropout_probability = 0.33\n", | ||
797 | + "I0407 10:54:53.279505 140321380496448 params.py:248] model.use_sample_weight = True\n", | ||
798 | + "I0407 10:54:53.279624 140321380496448 params.py:248] model.lemmatizer = None\n", | ||
799 | + "I0407 10:54:53.279695 140321380496448 params.py:248] model.upos_tagger = None\n", | ||
800 | + "I0407 10:54:53.279757 140321380496448 params.py:248] model.xpos_tagger = None\n", | ||
801 | + "I0407 10:54:53.279815 140321380496448 params.py:248] model.semantic_relation = None\n", | ||
802 | + "I0407 10:54:53.279873 140321380496448 params.py:248] model.morphological_feat = None\n", | ||
803 | + "I0407 10:54:53.280155 140321380496448 params.py:248] model.dependency_relation.type = combo_dependency_parsing_from_vocab\n", | ||
804 | + "I0407 10:54:53.280393 140321380496448 params.py:248] model.dependency_relation.vocab_namespace = deprel_labels\n", | ||
805 | + "I0407 10:54:53.280741 140321380496448 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.in_features = 1024\n", | ||
806 | + "I0407 10:54:53.280819 140321380496448 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.out_features = 512\n", | ||
807 | + "I0407 10:54:53.280887 140321380496448 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.activation = tanh\n", | ||
808 | + "I0407 10:54:53.281012 140321380496448 params.py:248] type = tanh\n", | ||
809 | + "I0407 10:54:53.281121 140321380496448 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.dropout_rate = 0.0\n", | ||
810 | + "I0407 10:54:53.285843 140321380496448 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.in_features = 1024\n", | ||
811 | + "I0407 10:54:53.286010 140321380496448 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.out_features = 512\n", | ||
812 | + "I0407 10:54:53.286088 140321380496448 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.activation = tanh\n", | ||
813 | + "I0407 10:54:53.286234 140321380496448 params.py:248] type = tanh\n", | ||
814 | + "I0407 10:54:53.286334 140321380496448 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.dropout_rate = 0.0\n", | ||
815 | + "I0407 10:54:53.290788 140321380496448 params.py:248] model.dependency_relation.head_predictor.cycle_loss_n = 0\n", | ||
816 | + "I0407 10:54:53.291093 140321380496448 params.py:248] model.dependency_relation.head_projection_layer.in_features = 1024\n", | ||
817 | + "I0407 10:54:53.291184 140321380496448 params.py:248] model.dependency_relation.head_projection_layer.out_features = 128\n", | ||
818 | + "I0407 10:54:53.291281 140321380496448 params.py:248] model.dependency_relation.head_projection_layer.activation = tanh\n", | ||
819 | + "I0407 10:54:53.291444 140321380496448 params.py:248] type = tanh\n", | ||
820 | + "I0407 10:54:53.291567 140321380496448 params.py:248] model.dependency_relation.head_projection_layer.dropout_rate = 0.25\n", | ||
821 | + "I0407 10:54:53.293048 140321380496448 params.py:248] model.dependency_relation.dependency_projection_layer.in_features = 1024\n", | ||
822 | + "I0407 10:54:53.293147 140321380496448 params.py:248] model.dependency_relation.dependency_projection_layer.out_features = 128\n", | ||
823 | + "I0407 10:54:53.293218 140321380496448 params.py:248] model.dependency_relation.dependency_projection_layer.activation = tanh\n", | ||
824 | + "I0407 10:54:53.293342 140321380496448 params.py:248] type = tanh\n", | ||
825 | + "I0407 10:54:53.293437 140321380496448 params.py:248] model.dependency_relation.dependency_projection_layer.dropout_rate = 0.25\n", | ||
826 | + "I0407 10:54:53.295091 140321380496448 params.py:248] model.enhanced_dependency_relation = None\n", | ||
827 | + "I0407 10:54:53.295609 140321380496448 params.py:248] model.regularizer.regexes.0.1.type = l2\n", | ||
828 | + "I0407 10:54:53.295784 140321380496448 params.py:248] model.regularizer.regexes.0.1.alpha = 1e-06\n", | ||
829 | + "I0407 10:54:53.295953 140321380496448 params.py:248] model.regularizer.regexes.1.1.type = l2\n", | ||
830 | + "I0407 10:54:53.296107 140321380496448 params.py:248] model.regularizer.regexes.1.1.alpha = 1e-06\n", | ||
831 | + "I0407 10:54:53.296261 140321380496448 params.py:248] model.regularizer.regexes.2.1.type = l2\n", | ||
832 | + "I0407 10:54:53.296412 140321380496448 params.py:248] model.regularizer.regexes.2.1.alpha = 1e-06\n", | ||
833 | + "I0407 10:54:53.296564 140321380496448 params.py:248] model.regularizer.regexes.3.1.type = l2\n", | ||
834 | + "I0407 10:54:53.296715 140321380496448 params.py:248] model.regularizer.regexes.3.1.alpha = 1e-05\n", | ||
835 | + "I0407 10:54:56.194218 140321380496448 archival.py:211] removing temporary unarchived model dir at /tmp/tmpuvesoi4q\n", | ||
836 | + "reading instances: 1980it [01:33, 21.15it/s]\n" | ||
837 | + ] | ||
838 | + } | ||
839 | + ], | ||
840 | + "source": [ | ||
841 | + "! {COMBO} --mode predict \\\n", | ||
842 | + " --cuda_device 0 \\\n", | ||
843 | + " --model_path model-pdbc-cont/model.tar.gz \\\n", | ||
844 | + " --input_file connlu/pdbc-cont-validation.conllu \\\n", | ||
845 | + " --output_file connlu/pdbc-cont-validation-pred.conllu" | ||
846 | + ] | ||
847 | + }, | ||
848 | + { | ||
849 | + "cell_type": "code", | ||
850 | + "execution_count": 9, | ||
851 | + "id": "cfe7a3c4", | ||
852 | + "metadata": {}, | ||
853 | + "outputs": [ | ||
854 | + { | ||
855 | + "name": "stdout", | ||
856 | + "output_type": "stream", | ||
857 | + "text": [ | ||
858 | + "# text = Dwie dziewczynki opierają się o dach kapliczki , chłopiec wspina się na niego , a trzecia dziewczynka stoi obok .\r\n", | ||
859 | + "1\tDwie\tdwa\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
860 | + "2\tdziewczynki\tdziewczynka\t_\t_\t_\t1\tcomp\t1:comp\t_\r\n", | ||
861 | + "3\topierają\topierać\t_\t_\t_\t15\tconjunct\t15:conjunct\t_\r\n", | ||
862 | + "4\tsię\tsię\t_\t_\t_\t3\trefl\t3:refl\t_\r\n", | ||
863 | + "5\to\to\t_\t_\t_\t3\tcomp\t3:comp\t_\r\n", | ||
864 | + "6\tdach\tdach\t_\t_\t_\t5\tcomp\t5:comp\t_\r\n", | ||
865 | + "7\tkapliczki\tkapliczka\t_\t_\t_\t6\tadjunct\t6:adjunct\t_\r\n", | ||
866 | + "8\t,\t,\t_\t_\t_\t15\tpunct\t15:punct\t_\r\n", | ||
867 | + "9\tchłopiec\tchłopiec\t_\t_\t_\t10\tsubj\t10:subj\t_\r\n" | ||
868 | + ] | ||
869 | + } | ||
870 | + ], | ||
871 | + "source": [ | ||
872 | + "! head connlu/pdbc-cont-validation.conllu" | ||
873 | + ] | ||
874 | + }, | ||
875 | + { | ||
876 | + "cell_type": "code", | ||
877 | + "execution_count": 10, | ||
878 | + "id": "7dba9571", | ||
879 | + "metadata": {}, | ||
880 | + "outputs": [ | ||
881 | + { | ||
882 | + "name": "stdout", | ||
883 | + "output_type": "stream", | ||
884 | + "text": [ | ||
885 | + "# text = Dwie dziewczynki opierają się o dach kapliczki , chłopiec wspina się na niego , a trzecia dziewczynka stoi obok .\r\n", | ||
886 | + "1\tDwie\tdwa\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
887 | + "2\tdziewczynki\tdziewczynka\t_\t_\t_\t1\tcomp\t1:comp\t_\r\n", | ||
888 | + "3\topierają\topierać\t_\t_\t_\t15\tconjunct\t15:conjunct\t_\r\n", | ||
889 | + "4\tsię\tsię\t_\t_\t_\t3\trefl\t3:refl\t_\r\n", | ||
890 | + "5\to\to\t_\t_\t_\t3\tcomp\t3:comp\t_\r\n", | ||
891 | + "6\tdach\tdach\t_\t_\t_\t5\tcomp\t5:comp\t_\r\n", | ||
892 | + "7\tkapliczki\tkapliczka\t_\t_\t_\t6\tadjunct\t6:adjunct\t_\r\n", | ||
893 | + "8\t,\t,\t_\t_\t_\t15\tpunct\t15:punct\t_\r\n", | ||
894 | + "9\tchłopiec\tchłopiec\t_\t_\t_\t10\tsubj\t10:subj\t_\r\n" | ||
895 | + ] | ||
896 | + } | ||
897 | + ], | ||
898 | + "source": [ | ||
899 | + "! head connlu/pdbc-cont-validation-pred.conllu" | ||
900 | + ] | ||
901 | + }, | ||
902 | + { | ||
903 | + "cell_type": "code", | ||
904 | + "execution_count": 11, | ||
905 | + "id": "679601c2", | ||
906 | + "metadata": {}, | ||
907 | + "outputs": [ | ||
908 | + { | ||
909 | + "name": "stdout", | ||
910 | + "output_type": "stream", | ||
911 | + "text": [ | ||
912 | + "I0407 10:56:35.295660 140254825452608 archival.py:184] loading archive file model-pdbc-cont/model.tar.gz\n", | ||
913 | + "I0407 10:56:35.296370 140254825452608 archival.py:263] extracting archive file model-pdbc-cont/model.tar.gz to temp dir /tmp/tmpdhtf4et1\n", | ||
914 | + "I0407 10:56:52.876630 140254825452608 params.py:248] dataset_reader.type = conllu\n", | ||
915 | + "I0407 10:56:52.877122 140254825452608 params.py:248] dataset_reader.lazy = False\n", | ||
916 | + "I0407 10:56:52.877243 140254825452608 params.py:248] dataset_reader.cache_directory = None\n", | ||
917 | + "I0407 10:56:52.877313 140254825452608 params.py:248] dataset_reader.max_instances = None\n", | ||
918 | + "I0407 10:56:52.877380 140254825452608 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
919 | + "I0407 10:56:52.877446 140254825452608 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
920 | + "I0407 10:56:52.877737 140254825452608 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
921 | + "I0407 10:56:52.877938 140254825452608 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
922 | + "I0407 10:56:52.878201 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
923 | + "I0407 10:56:52.878276 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
924 | + "I0407 10:56:52.878360 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
925 | + "I0407 10:56:52.878507 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
926 | + "I0407 10:56:52.878633 140254825452608 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
927 | + "I0407 10:56:52.878702 140254825452608 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
928 | + "I0407 10:56:52.878761 140254825452608 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
929 | + "I0407 10:56:52.878825 140254825452608 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
930 | + "I0407 10:56:52.878969 140254825452608 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
931 | + "I0407 10:56:52.879144 140254825452608 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
932 | + "I0407 10:56:52.879218 140254825452608 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
933 | + "I0407 10:56:52.879282 140254825452608 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
934 | + "I0407 10:56:52.879426 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
935 | + "I0407 10:56:52.879594 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
936 | + "I0407 10:56:52.879792 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
937 | + "I0407 10:56:52.879862 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
938 | + "I0407 10:56:52.879944 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
939 | + "I0407 10:56:52.880068 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
940 | + "I0407 10:56:52.880184 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
941 | + "I0407 10:56:52.880254 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
942 | + "I0407 10:56:52.880316 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
943 | + "I0407 10:56:52.880378 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
944 | + "I0407 10:56:52.880523 140254825452608 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
945 | + "I0407 10:56:52.880748 140254825452608 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
946 | + "I0407 10:56:52.880829 140254825452608 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
947 | + "I0407 10:56:52.880893 140254825452608 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
948 | + "I0407 10:56:52.880957 140254825452608 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
949 | + "I0407 10:56:52.881069 140254825452608 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
950 | + "I0407 10:56:55.893562 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
951 | + "I0407 10:56:55.894115 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
952 | + "I0407 10:56:55.894256 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
953 | + "I0407 10:56:55.894343 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
954 | + "I0407 10:56:55.894395 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
955 | + "I0407 10:56:55.894465 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
956 | + "I0407 10:56:55.894520 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
957 | + "I0407 10:56:55.894590 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
958 | + "I0407 10:56:55.894762 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
959 | + "I0407 10:56:55.894958 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
960 | + "I0407 10:56:55.895048 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
961 | + "I0407 10:56:55.895111 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
962 | + "I0407 10:56:55.895176 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
963 | + "I0407 10:56:55.895228 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
964 | + "I0407 10:56:55.895297 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
965 | + "I0407 10:56:55.895349 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
966 | + "I0407 10:56:55.895593 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
967 | + "I0407 10:56:55.895786 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
968 | + "I0407 10:56:55.896016 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
969 | + "I0407 10:56:55.896095 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
970 | + "I0407 10:56:55.896188 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
971 | + "I0407 10:56:55.896353 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
972 | + "I0407 10:56:55.896480 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
973 | + "I0407 10:56:55.896552 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
974 | + "I0407 10:56:55.896607 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
975 | + "I0407 10:56:55.896675 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
976 | + "I0407 10:56:55.896760 140254825452608 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
977 | + "I0407 10:56:55.896864 140254825452608 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
978 | + "I0407 10:56:55.896962 140254825452608 params.py:248] dataset_reader.use_sem = False\n", | ||
979 | + "I0407 10:56:55.897153 140254825452608 params.py:248] dataset_reader.type = conllu\n", | ||
980 | + "I0407 10:56:55.897414 140254825452608 params.py:248] dataset_reader.lazy = False\n", | ||
981 | + "I0407 10:56:55.897499 140254825452608 params.py:248] dataset_reader.cache_directory = None\n", | ||
982 | + "I0407 10:56:55.897570 140254825452608 params.py:248] dataset_reader.max_instances = None\n", | ||
983 | + "I0407 10:56:55.897637 140254825452608 params.py:248] dataset_reader.manual_distributed_sharding = False\n", | ||
984 | + "I0407 10:56:55.897707 140254825452608 params.py:248] dataset_reader.manual_multi_process_sharding = False\n", | ||
985 | + "I0407 10:56:55.897995 140254825452608 params.py:248] dataset_reader.token_indexers.char.type = characters_const_padding\n", | ||
986 | + "I0407 10:56:55.898183 140254825452608 params.py:248] dataset_reader.token_indexers.char.namespace = token_characters\n", | ||
987 | + "I0407 10:56:55.898398 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
988 | + "I0407 10:56:55.898473 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
989 | + "I0407 10:56:55.898542 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
990 | + "I0407 10:56:55.898677 140254825452608 params.py:248] dataset_reader.token_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
991 | + "I0407 10:56:55.898799 140254825452608 params.py:248] dataset_reader.token_indexers.char.start_tokens = None\n", | ||
992 | + "I0407 10:56:55.898869 140254825452608 params.py:248] dataset_reader.token_indexers.char.end_tokens = None\n", | ||
993 | + "I0407 10:56:55.898936 140254825452608 params.py:248] dataset_reader.token_indexers.char.min_padding_length = 32\n", | ||
994 | + "I0407 10:56:55.898998 140254825452608 params.py:248] dataset_reader.token_indexers.char.token_min_padding_length = 0\n", | ||
995 | + "I0407 10:56:55.899158 140254825452608 params.py:248] dataset_reader.token_indexers.feats.type = feats_indexer\n", | ||
996 | + "I0407 10:56:55.899337 140254825452608 params.py:248] dataset_reader.token_indexers.feats.namespace = feats\n", | ||
997 | + "I0407 10:56:55.899414 140254825452608 params.py:248] dataset_reader.token_indexers.feats.feature_name = feats_\n", | ||
998 | + "I0407 10:56:55.899485 140254825452608 params.py:248] dataset_reader.token_indexers.feats.token_min_padding_length = 0\n", | ||
999 | + "I0407 10:56:55.899629 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.type = characters_const_padding\n", | ||
1000 | + "I0407 10:56:55.899797 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.namespace = token_characters\n", | ||
1001 | + "I0407 10:56:55.899995 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.byte_encoding = None\n", | ||
1002 | + "I0407 10:56:55.900055 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.lowercase_characters = False\n", | ||
1003 | + "I0407 10:56:55.900130 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.start_tokens = ['__START__']\n", | ||
1004 | + "I0407 10:56:55.900250 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.character_tokenizer.end_tokens = ['__END__']\n", | ||
1005 | + "I0407 10:56:55.900363 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.start_tokens = None\n", | ||
1006 | + "I0407 10:56:55.900426 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.end_tokens = None\n", | ||
1007 | + "I0407 10:56:55.900486 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.min_padding_length = 32\n", | ||
1008 | + "I0407 10:56:55.900547 140254825452608 params.py:248] dataset_reader.token_indexers.lemma.token_min_padding_length = 0\n", | ||
1009 | + "I0407 10:56:55.900689 140254825452608 params.py:248] dataset_reader.token_indexers.token.type = pretrained_transformer_mismatched_fixed\n", | ||
1010 | + "I0407 10:56:55.900916 140254825452608 params.py:248] dataset_reader.token_indexers.token.token_min_padding_length = 0\n", | ||
1011 | + "I0407 10:56:55.900995 140254825452608 params.py:248] dataset_reader.token_indexers.token.model_name = allegro/herbert-large-cased\n", | ||
1012 | + "I0407 10:56:55.901061 140254825452608 params.py:248] dataset_reader.token_indexers.token.namespace = tags\n", | ||
1013 | + "I0407 10:56:55.901125 140254825452608 params.py:248] dataset_reader.token_indexers.token.max_length = None\n", | ||
1014 | + "I0407 10:56:55.901226 140254825452608 params.py:384] dataset_reader.token_indexers.token.tokenizer_kwargs.use_fast = False\n", | ||
1015 | + "I0407 10:56:55.902561 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.type = single_id\n", | ||
1016 | + "I0407 10:56:55.902824 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.namespace = upostag\n", | ||
1017 | + "I0407 10:56:55.902909 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.lowercase_tokens = False\n", | ||
1018 | + "I0407 10:56:55.902969 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.start_tokens = None\n", | ||
1019 | + "I0407 10:56:55.903034 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.end_tokens = None\n", | ||
1020 | + "I0407 10:56:55.903095 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.feature_name = pos_\n", | ||
1021 | + "I0407 10:56:55.903159 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
1022 | + "I0407 10:56:55.903219 140254825452608 params.py:248] dataset_reader.token_indexers.upostag.token_min_padding_length = 0\n", | ||
1023 | + "I0407 10:56:55.903364 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.type = single_id\n", | ||
1024 | + "I0407 10:56:55.903547 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.namespace = xpostag\n", | ||
1025 | + "I0407 10:56:55.903621 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.lowercase_tokens = False\n", | ||
1026 | + "I0407 10:56:55.903687 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.start_tokens = None\n", | ||
1027 | + "I0407 10:56:55.903748 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.end_tokens = None\n", | ||
1028 | + "I0407 10:56:55.903811 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.feature_name = tag_\n", | ||
1029 | + "I0407 10:56:55.903868 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.default_value = THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING\n", | ||
1030 | + "I0407 10:56:55.903931 140254825452608 params.py:248] dataset_reader.token_indexers.xpostag.token_min_padding_length = 0\n", | ||
1031 | + "I0407 10:56:55.904146 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.type = characters_const_padding\n", | ||
1032 | + "I0407 10:56:55.904325 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.namespace = lemma_characters\n", | ||
1033 | + "I0407 10:56:55.904539 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.byte_encoding = None\n", | ||
1034 | + "I0407 10:56:55.904611 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.lowercase_characters = False\n", | ||
1035 | + "I0407 10:56:55.904691 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.start_tokens = ['__START__']\n", | ||
1036 | + "I0407 10:56:55.904827 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.character_tokenizer.end_tokens = ['__END__']\n", | ||
1037 | + "I0407 10:56:55.904946 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.start_tokens = None\n", | ||
1038 | + "I0407 10:56:55.905013 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.end_tokens = None\n", | ||
1039 | + "I0407 10:56:55.905084 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.min_padding_length = 32\n", | ||
1040 | + "I0407 10:56:55.905149 140254825452608 params.py:248] dataset_reader.lemma_indexers.char.token_min_padding_length = 0\n", | ||
1041 | + "I0407 10:56:55.905237 140254825452608 params.py:248] dataset_reader.features = ['token', 'char']\n", | ||
1042 | + "I0407 10:56:55.905334 140254825452608 params.py:248] dataset_reader.targets = ['head', 'deprel']\n", | ||
1043 | + "I0407 10:56:55.905422 140254825452608 params.py:248] dataset_reader.use_sem = False\n", | ||
1044 | + "I0407 10:56:55.906047 140254825452608 params.py:248] vocabulary.type = from_instances_extended\n", | ||
1045 | + "I0407 10:56:55.906157 140254825452608 vocabulary.py:323] Loading token dictionary from /tmp/tmpdhtf4et1/vocabulary.\n", | ||
1046 | + "I0407 10:56:55.906635 140254825452608 filelock.py:254] Lock 140251419626896 acquired on /tmp/tmpdhtf4et1/vocabulary/.lock\n", | ||
1047 | + "I0407 10:56:55.907354 140254825452608 filelock.py:317] Lock 140251419626896 released on /tmp/tmpdhtf4et1/vocabulary/.lock\n", | ||
1048 | + "I0407 10:56:55.907914 140254825452608 params.py:248] model.type = semantic_multitask\n", | ||
1049 | + "I0407 10:56:55.908506 140254825452608 params.py:248] model.text_field_embedder.type = basic\n", | ||
1050 | + "I0407 10:56:55.908878 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.type = char_embeddings_from_config\n", | ||
1051 | + "I0407 10:56:55.909080 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.embedding_dim = 64\n", | ||
1052 | + "I0407 10:56:55.909353 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.input_dim = 64\n", | ||
1053 | + "I0407 10:56:55.909446 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.filters = [512, 256, 64]\n", | ||
1054 | + "I0407 10:56:55.909554 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.kernel_size = [3, 3, 3]\n" | ||
1055 | + ] | ||
1056 | + }, | ||
1057 | + { | ||
1058 | + "name": "stdout", | ||
1059 | + "output_type": "stream", | ||
1060 | + "text": [ | ||
1061 | + "I0407 10:56:55.909654 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.stride = [1, 1, 1]\n", | ||
1062 | + "I0407 10:56:55.909750 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.padding = [1, 2, 4]\n", | ||
1063 | + "I0407 10:56:55.909847 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.dilation = [1, 2, 4]\n", | ||
1064 | + "I0407 10:56:55.909946 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.dilated_cnn_encoder.activations = ['relu', 'relu', 'linear']\n", | ||
1065 | + "I0407 10:56:55.910176 140254825452608 params.py:248] type = relu\n", | ||
1066 | + "I0407 10:56:55.910410 140254825452608 params.py:248] type = relu\n", | ||
1067 | + "I0407 10:56:55.910567 140254825452608 params.py:248] type = linear\n", | ||
1068 | + "I0407 10:56:55.917278 140254825452608 params.py:248] model.text_field_embedder.token_embedders.char.vocab_namespace = token_characters\n", | ||
1069 | + "I0407 10:56:55.917941 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.type = transformers_word_embeddings\n", | ||
1070 | + "I0407 10:56:55.918267 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.model_name = allegro/herbert-large-cased\n", | ||
1071 | + "I0407 10:56:55.918358 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.projection_dim = 100\n", | ||
1072 | + "I0407 10:56:55.918458 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.projection_activation = <function TransformersWordEmbedder.<lambda> at 0x7f8ed1745280>\n", | ||
1073 | + "I0407 10:56:55.918541 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.projection_dropout_rate = 0.0\n", | ||
1074 | + "I0407 10:56:55.918609 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.freeze_transformer = True\n", | ||
1075 | + "I0407 10:56:55.918674 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.last_layer_only = True\n", | ||
1076 | + "I0407 10:56:55.918785 140254825452608 params.py:384] model.text_field_embedder.token_embedders.token.tokenizer_kwargs.use_fast = False\n", | ||
1077 | + "I0407 10:56:55.918858 140254825452608 params.py:248] model.text_field_embedder.token_embedders.token.transformer_kwargs = None\n", | ||
1078 | + "I0407 10:57:03.624983 140254825452608 params.py:248] model.seq_encoder.type = combo_encoder\n", | ||
1079 | + "I0407 10:57:03.625626 140254825452608 params.py:248] model.seq_encoder.stacked_bilstm.input_size = 164\n", | ||
1080 | + "I0407 10:57:03.625742 140254825452608 params.py:248] model.seq_encoder.stacked_bilstm.hidden_size = 512\n", | ||
1081 | + "I0407 10:57:03.625796 140254825452608 params.py:248] model.seq_encoder.stacked_bilstm.num_layers = 2\n", | ||
1082 | + "I0407 10:57:03.625844 140254825452608 params.py:248] model.seq_encoder.stacked_bilstm.recurrent_dropout_probability = 0.33\n", | ||
1083 | + "I0407 10:57:03.625942 140254825452608 params.py:248] model.seq_encoder.stacked_bilstm.layer_dropout_probability = 0.33\n", | ||
1084 | + "I0407 10:57:03.626068 140254825452608 params.py:248] model.seq_encoder.stacked_bilstm.use_highway = False\n", | ||
1085 | + "I0407 10:57:03.933019 140254825452608 params.py:248] model.seq_encoder.layer_dropout_probability = 0.33\n", | ||
1086 | + "I0407 10:57:03.933302 140254825452608 params.py:248] model.use_sample_weight = True\n", | ||
1087 | + "I0407 10:57:03.933391 140254825452608 params.py:248] model.lemmatizer = None\n", | ||
1088 | + "I0407 10:57:03.933440 140254825452608 params.py:248] model.upos_tagger = None\n", | ||
1089 | + "I0407 10:57:03.933486 140254825452608 params.py:248] model.xpos_tagger = None\n", | ||
1090 | + "I0407 10:57:03.933528 140254825452608 params.py:248] model.semantic_relation = None\n", | ||
1091 | + "I0407 10:57:03.933570 140254825452608 params.py:248] model.morphological_feat = None\n", | ||
1092 | + "I0407 10:57:03.933835 140254825452608 params.py:248] model.dependency_relation.type = combo_dependency_parsing_from_vocab\n", | ||
1093 | + "I0407 10:57:03.934096 140254825452608 params.py:248] model.dependency_relation.vocab_namespace = deprel_labels\n", | ||
1094 | + "I0407 10:57:03.934389 140254825452608 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.in_features = 1024\n", | ||
1095 | + "I0407 10:57:03.934459 140254825452608 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.out_features = 512\n", | ||
1096 | + "I0407 10:57:03.934515 140254825452608 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.activation = tanh\n", | ||
1097 | + "I0407 10:57:03.934614 140254825452608 params.py:248] type = tanh\n", | ||
1098 | + "I0407 10:57:03.934703 140254825452608 params.py:248] model.dependency_relation.head_predictor.head_projection_layer.dropout_rate = 0.0\n", | ||
1099 | + "I0407 10:57:03.938141 140254825452608 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.in_features = 1024\n", | ||
1100 | + "I0407 10:57:03.938247 140254825452608 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.out_features = 512\n", | ||
1101 | + "I0407 10:57:03.938306 140254825452608 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.activation = tanh\n", | ||
1102 | + "I0407 10:57:03.938404 140254825452608 params.py:248] type = tanh\n", | ||
1103 | + "I0407 10:57:03.938489 140254825452608 params.py:248] model.dependency_relation.head_predictor.dependency_projection_layer.dropout_rate = 0.0\n", | ||
1104 | + "I0407 10:57:03.941669 140254825452608 params.py:248] model.dependency_relation.head_predictor.cycle_loss_n = 0\n", | ||
1105 | + "I0407 10:57:03.941908 140254825452608 params.py:248] model.dependency_relation.head_projection_layer.in_features = 1024\n", | ||
1106 | + "I0407 10:57:03.941985 140254825452608 params.py:248] model.dependency_relation.head_projection_layer.out_features = 128\n", | ||
1107 | + "I0407 10:57:03.942037 140254825452608 params.py:248] model.dependency_relation.head_projection_layer.activation = tanh\n", | ||
1108 | + "I0407 10:57:03.942123 140254825452608 params.py:248] type = tanh\n", | ||
1109 | + "I0407 10:57:03.942194 140254825452608 params.py:248] model.dependency_relation.head_projection_layer.dropout_rate = 0.25\n", | ||
1110 | + "I0407 10:57:03.943288 140254825452608 params.py:248] model.dependency_relation.dependency_projection_layer.in_features = 1024\n", | ||
1111 | + "I0407 10:57:03.943376 140254825452608 params.py:248] model.dependency_relation.dependency_projection_layer.out_features = 128\n", | ||
1112 | + "I0407 10:57:03.943423 140254825452608 params.py:248] model.dependency_relation.dependency_projection_layer.activation = tanh\n", | ||
1113 | + "I0407 10:57:03.943510 140254825452608 params.py:248] type = tanh\n", | ||
1114 | + "I0407 10:57:03.943577 140254825452608 params.py:248] model.dependency_relation.dependency_projection_layer.dropout_rate = 0.25\n", | ||
1115 | + "I0407 10:57:03.944838 140254825452608 params.py:248] model.enhanced_dependency_relation = None\n", | ||
1116 | + "I0407 10:57:03.945286 140254825452608 params.py:248] model.regularizer.regexes.0.1.type = l2\n", | ||
1117 | + "I0407 10:57:03.945443 140254825452608 params.py:248] model.regularizer.regexes.0.1.alpha = 1e-06\n", | ||
1118 | + "I0407 10:57:03.945568 140254825452608 params.py:248] model.regularizer.regexes.1.1.type = l2\n", | ||
1119 | + "I0407 10:57:03.945679 140254825452608 params.py:248] model.regularizer.regexes.1.1.alpha = 1e-06\n", | ||
1120 | + "I0407 10:57:03.945787 140254825452608 params.py:248] model.regularizer.regexes.2.1.type = l2\n", | ||
1121 | + "I0407 10:57:03.945892 140254825452608 params.py:248] model.regularizer.regexes.2.1.alpha = 1e-06\n", | ||
1122 | + "I0407 10:57:03.946047 140254825452608 params.py:248] model.regularizer.regexes.3.1.type = l2\n", | ||
1123 | + "I0407 10:57:03.946158 140254825452608 params.py:248] model.regularizer.regexes.3.1.alpha = 1e-05\n", | ||
1124 | + "I0407 10:57:06.549506 140254825452608 archival.py:211] removing temporary unarchived model dir at /tmp/tmpdhtf4et1\n", | ||
1125 | + "reading instances: 1990it [01:39, 20.00it/s]\n" | ||
1126 | + ] | ||
1127 | + } | ||
1128 | + ], | ||
1129 | + "source": [ | ||
1130 | + "! {COMBO} --mode predict \\\n", | ||
1131 | + " --cuda_device 0 \\\n", | ||
1132 | + " --model_path model-pdbc-cont/model.tar.gz \\\n", | ||
1133 | + " --input_file connlu/pdbc-cont-test.conllu \\\n", | ||
1134 | + " --output_file connlu/pdbc-cont-test-pred.conllu" | ||
1135 | + ] | ||
1136 | + }, | ||
1137 | + { | ||
1138 | + "cell_type": "code", | ||
1139 | + "execution_count": 12, | ||
1140 | + "id": "ddc3986b", | ||
1141 | + "metadata": {}, | ||
1142 | + "outputs": [ | ||
1143 | + { | ||
1144 | + "name": "stdout", | ||
1145 | + "output_type": "stream", | ||
1146 | + "text": [ | ||
1147 | + "# text = Mały chłopiec patrzy w bok po ściągnięciu okularów .\r\n", | ||
1148 | + "1\tMały\tmały\t_\t_\t_\t2\tadjunct\t2:adjunct\t_\r\n", | ||
1149 | + "2\tchłopiec\tchłopiec\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
1150 | + "3\tpatrzy\tpatrzeć\t_\t_\t_\t0\troot\t0:root\t_\r\n", | ||
1151 | + "4\tw\tw\t_\t_\t_\t3\tadjunct_adl\t3:adjunct_adl\t_\r\n", | ||
1152 | + "5\tbok\tbok\t_\t_\t_\t4\tcomp\t4:comp\t_\r\n", | ||
1153 | + "6\tpo\tpo\t_\t_\t_\t3\tadjunct_temp\t3:adjunct_temp\t_\r\n", | ||
1154 | + "7\tściągnięciu\tściągnąć\t_\t_\t_\t6\tcomp\t6:comp\t_\r\n", | ||
1155 | + "8\tokularów\tokulary\t_\t_\t_\t7\tobj\t7:obj\t_\r\n", | ||
1156 | + "9\t.\t.\t_\t_\t_\t3\tpunct\t3:punct\t_\r\n" | ||
1157 | + ] | ||
1158 | + } | ||
1159 | + ], | ||
1160 | + "source": [ | ||
1161 | + "! head connlu/pdbc-cont-test.conllu" | ||
1162 | + ] | ||
1163 | + }, | ||
1164 | + { | ||
1165 | + "cell_type": "code", | ||
1166 | + "execution_count": 13, | ||
1167 | + "id": "34aa16d9", | ||
1168 | + "metadata": {}, | ||
1169 | + "outputs": [ | ||
1170 | + { | ||
1171 | + "name": "stdout", | ||
1172 | + "output_type": "stream", | ||
1173 | + "text": [ | ||
1174 | + "# text = Mały chłopiec patrzy w bok po ściągnięciu okularów .\r\n", | ||
1175 | + "1\tMały\tmały\t_\t_\t_\t2\tadjunct\t2:adjunct\t_\r\n", | ||
1176 | + "2\tchłopiec\tchłopiec\t_\t_\t_\t3\tsubj\t3:subj\t_\r\n", | ||
1177 | + "3\tpatrzy\tpatrzeć\t_\t_\t_\t0\troot\t0:root\t_\r\n", | ||
1178 | + "4\tw\tw\t_\t_\t_\t3\tcomp\t3:adjunct_adl\t_\r\n", | ||
1179 | + "5\tbok\tbok\t_\t_\t_\t4\tcomp\t4:comp\t_\r\n", | ||
1180 | + "6\tpo\tpo\t_\t_\t_\t3\tadjunct_temp\t3:adjunct_temp\t_\r\n", | ||
1181 | + "7\tściągnięciu\tściągnąć\t_\t_\t_\t6\tcomp\t6:comp\t_\r\n", | ||
1182 | + "8\tokularów\tokulary\t_\t_\t_\t7\tobj\t7:obj\t_\r\n", | ||
1183 | + "9\t.\t.\t_\t_\t_\t3\tpunct\t3:punct\t_\r\n" | ||
1184 | + ] | ||
1185 | + } | ||
1186 | + ], | ||
1187 | + "source": [ | ||
1188 | + "! head connlu/pdbc-cont-test-pred.conllu" | ||
1189 | + ] | ||
1190 | + } | ||
1191 | + ], | ||
1192 | + "metadata": { | ||
1193 | + "kernelspec": { | ||
1194 | + "display_name": "combo_python39", | ||
1195 | + "language": "python", | ||
1196 | + "name": "combo_python39" | ||
1197 | + }, | ||
1198 | + "language_info": { | ||
1199 | + "codemirror_mode": { | ||
1200 | + "name": "ipython", | ||
1201 | + "version": 3 | ||
1202 | + }, | ||
1203 | + "file_extension": ".py", | ||
1204 | + "mimetype": "text/x-python", | ||
1205 | + "name": "python", | ||
1206 | + "nbconvert_exporter": "python", | ||
1207 | + "pygments_lexer": "ipython3", | ||
1208 | + "version": "3.8.16" | ||
1209 | + } | ||
1210 | + }, | ||
1211 | + "nbformat": 4, | ||
1212 | + "nbformat_minor": 5 | ||
1213 | +} |