code cleanup

Katarzyna Krasnowska
1 parent 13ac1d32
Showing 1 changed file with 54 additions and 835 deletions
TrainingAndEval.ipynb
@@ -2,23 +2,10 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "97d0c9ab",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-04-11 11:17:29.095631: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-04-11 11:17:29.331444: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2023-04-11 11:17:30.167497: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
-      "2023-04-11 11:17:30.167593: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
-      "2023-04-11 11:17:30.167603: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import importlib\n",
     "\n",
@@ -40,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "c41d6630",
    "metadata": {},
    "outputs": [],
@@ -51,36 +38,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "f30d7b7c",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1 Physical GPUs, 1 Logical GPUs\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-04-11 11:17:31.717262: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:31.762533: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:31.763529: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:31.765670: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-04-11 11:17:31.769196: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:31.770058: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:31.770816: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.722287: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.723281: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.724062: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.724846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20480 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:00:05.0, compute capability: 8.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# https://www.tensorflow.org/guide/gpu\n",
     "gpus = tf.config.list_physical_devices('GPU')\n",
@@ -98,33 +59,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "89afdb1e",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/device:GPU:0\n",
-      "2.10.0\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-04-11 11:17:32.739308: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.740224: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.740975: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.741809: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.742586: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
-      "2023-04-11 11:17:32.743322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /device:GPU:0 with 20480 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:00:05.0, compute capability: 8.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(tf.test.gpu_device_name())\n",
     "print(tf.__version__)"
@@ -132,63 +72,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "2b0ab576",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Found cached dataset pdb_c_beta (/home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "55f181333dc44c7a811c515cc55c4988",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pdbc_dataset = load_dataset('pdb_c_beta')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "2f4c317a",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-ff2490f308f7f25b.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-cbb40b0e978ab6ee.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-3facbd810991cd6c.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-e54a8628e59de21f.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-9692de6b8224e758.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-4042ffa1dc5d9323.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-fb250709424f85ec.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-1f6ce0a488a89d56.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-2ae4daf5101c7aa2.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-a1686820d15bcf04.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-fe2c12481861f4bd.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-da5a875c385c3570.arrow\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import importlib\n",
     "\n",
@@ -203,20 +102,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "de1966ed",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-1dfcf507d62f6da8.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-264c0111246b25c1.arrow\n",
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-6a40675124a412f0.arrow\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "features = pdbc_dataset_spines['train'].features\n",
     "pdbc_dataset_spines_cont = pdbc_dataset_spines.filter(\n",
@@ -226,41 +115,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "33ff295b",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "DatasetDict({\n",
-       "    train: Dataset({\n",
-       "        features: ['corp_id', 'sent_id', 'tokens', 'lemmas', 'cposes', 'poses', 'tags', 'heads', 'deprels', 'nonterminals', 'spines', 'anchors', 'anchor_hs'],\n",
-       "        num_rows: 15903\n",
-       "    })\n",
-       "    validation: Dataset({\n",
-       "        features: ['corp_id', 'sent_id', 'tokens', 'lemmas', 'cposes', 'poses', 'tags', 'heads', 'deprels', 'nonterminals', 'spines', 'anchors', 'anchor_hs'],\n",
-       "        num_rows: 1980\n",
-       "    })\n",
-       "    test: Dataset({\n",
-       "        features: ['corp_id', 'sent_id', 'tokens', 'lemmas', 'cposes', 'poses', 'tags', 'heads', 'deprels', 'nonterminals', 'spines', 'anchors', 'anchor_hs'],\n",
-       "        num_rows: 1990\n",
-       "    })\n",
-       "})"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pdbc_dataset_spines_cont"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "a8ddbc1f",
    "metadata": {},
    "outputs": [],
@@ -270,7 +135,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "8029594b",
    "metadata": {},
    "outputs": [],
@@ -288,30 +153,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "id": "be8e93fa",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def crop(dataset, n):\n",
-    "    return dataset.filter(lambda example: len(example['tokens']) <= n)\n",
-    "\n",
     "spines_pdbc = ClassificationTask(\n",
     "    'spines_pdbc',\n",
     "    pdbc_dataset_spines,\n",
-    "    #crop(pdbc_dataset, 6),\n",
     ")\n",
     "\n",
     "spines_pdbc_cont = ClassificationTask(\n",
     "    'spines_pdbc_cont',\n",
     "    pdbc_dataset_spines_cont,\n",
-    "    #crop(pdbc_dataset, 6),\n",
     ")\n",
     "\n",
     "spines_pdbc_compressed = ClassificationTask(\n",
     "    'spines_pdbc_compressed',\n",
     "    pdbc_dataset_spines_compressed,\n",
-    "    #crop(pdbc_dataset, 6),\n",
     ")\n",
     "\n",
     "TASK = spines_pdbc_compressed\n",
@@ -320,7 +179,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
    "id": "7824fcee",
    "metadata": {},
    "outputs": [],
@@ -330,56 +189,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
    "id": "1eb5f41a",
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading BERT tokenizer...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-49fe5b05228c3588.arrow\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Preprocessing the dataset for BERT...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5f108b00fcab4db8a610f24ae03b7308",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/2211 [00:00<?, ?ex/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-b8e2900fbd9615fd.arrow\n",
-      "You're using a HerbertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "trainer = training.Trainer(\n",
     "    MODEL,\n",
@@ -398,21 +213,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "id": "276708cc",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('keras_fit_logs_spines_pdbc_compressed', 'models_spines_pdbc_compressed')"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "log_dir = f'keras_fit_logs_{TASK.name}'\n",
     "model_dir = f'models_{TASK.name}'\n",
@@ -422,51 +226,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "id": "e8ccde06",
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The tensorboard extension is already loaded. To reload it, use:\n",
-      "  %reload_ext tensorboard\n",
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "      <iframe id=\"tensorboard-frame-83a6a03964d4187a\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
-       "      </iframe>\n",
-       "      <script>\n",
-       "        (function() {\n",
-       "          const frame = document.getElementById(\"tensorboard-frame-83a6a03964d4187a\");\n",
-       "          const url = new URL(\"/\", window.location);\n",
-       "          const port = 6004;\n",
-       "          if (port) {\n",
-       "            url.port = port;\n",
-       "          }\n",
-       "          frame.src = url;\n",
-       "        })();\n",
-       "      </script>\n",
-       "    "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "%load_ext tensorboard\n",
     "! killall tensorboard\n",
@@ -476,21 +241,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "id": "a5b0da64",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 6 µs, sys: 1 µs, total: 7 µs\n",
-      "Wall time: 15.7 µs\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "\n",
@@ -505,45 +261,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "id": "e42b2bd4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#import importlib\n",
-    "#from neural_parser import hybrid_tree_utils\n",
-    "#importlib.reload(hybrid_tree_utils)\n",
-    "#from neural_parser import data_utils\n",
-    "#importlib.reload(data_utils)\n",
-    "#from neural_parser import constituency_parser\n",
-    "#importlib.reload(constituency_parser)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": null,
    "id": "2f65dead",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "created 3 classifier(s)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some layers from the model checkpoint at models_spines_pdbc_compressed/model were not used when initializing TFBertForMultiTargetTokenClassification: ['dropout_73']\n",
-      "- This IS expected if you are initializing TFBertForMultiTargetTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing TFBertForMultiTargetTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "All the layers of TFBertForMultiTargetTokenClassification were initialized from the model checkpoint at models_spines_pdbc_compressed/model.\n",
-      "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForMultiTargetTokenClassification for predictions without further training.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "if not TRAIN:\n",
     "    from neural_parser import constituency_parser\n",
@@ -552,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
    "id": "24edee79",
    "metadata": {},
    "outputs": [],
@@ -565,41 +286,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "id": "4a7cd10b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1/1 [==============================] - 10s 10s/step\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[(['Miał', 'em', 'kotka', '.'],\n",
-       "  {'spines': ['ROOT_S_VP_V', '<EMPTY>', 'NP_N', 'Punct'],\n",
-       "   'anchors': ['<ROOT>', 'V', 'S', 'ROOT'],\n",
-       "   'anchor_hs': ['<ROOT>', '1', '1', '1']}),\n",
-       " (['Wlazł', 'kotek', 'na', 'płotek', 'i', 'mruga', '.'],\n",
-       "  {'spines': ['VP_V',\n",
-       "    'NP_N',\n",
-       "    'PrepNP_Prep',\n",
-       "    'NP_N',\n",
-       "    'ROOT_S_VP_Conj',\n",
-       "    'VP_V',\n",
-       "    'Punct'],\n",
-       "   'anchors': ['VP', 'S', 'VP', 'PrepNP', '<ROOT>', 'VP', 'ROOT'],\n",
-       "   'anchor_hs': ['1', '1', '2', '1', '<ROOT>', '1', '1']})]"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "parser.parse(sentences)"
    ]
@@ -616,21 +306,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "id": "4ac4b9df",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<module 'neural_parser.constants' from '/home/kkrasnowska/neural-parsing/ICCS/neural_parser/constants.py'>"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from neural_parser import hybrid_tree_utils\n",
     "importlib.reload(hybrid_tree_utils)\n",
@@ -640,86 +319,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "id": "d1b28792",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from spacy import displacy\n",
-    "\n",
-    "def to_deps(tokens, deprels, heads):\n",
-    "    deps = {'words' : [], 'arcs' : []}\n",
-    "    for i, (token, deprel, head) in enumerate(zip(tokens, deprels, heads)):\n",
-    "        deps['words'].append({'text' : token, 'tag' : 'X'})\n",
-    "        if head >= 0:\n",
-    "            d = 'left' if head > i else 'right'\n",
-    "            start, end = sorted((i, head))\n",
-    "            deps['arcs'].append({'start' : start, 'end' : end, 'label' : deprel, 'dir' : d})\n",
-    "    return deps\n",
-    "\n",
-    "def display_deps(tokens, deprels, heads):\n",
-    "    displacy.render(to_deps(tokens, deprels, heads), manual=True, options={'distance' : 80})\n",
-    "    \n",
-    "import urllib.parse\n",
-    "import json\n",
-    "\n",
-    "def show_tree(tree):\n",
-    "    tree_json = json.dumps(hybrid_tree_utils.tree2dict(tree)['tree'])\n",
-    "    src = f'http://127.0.0.1:8010/?tree={urllib.parse.quote(tree_json)}'\n",
-    "    display(IFrame(src, 950, 550))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": null,
    "id": "9f443569",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2211\n",
-      "2205\n",
-      "['Całuję', '.']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'heads': [None, 0],\n",
-       " 'deprels': ['ROOT', 'punct'],\n",
-       " 'spines': ['ROOT_S_VP_V', 'Punct'],\n",
-       " 'anchors': ['<ROOT>', 'ROOT'],\n",
-       " 'anchor_hs': ['<ROOT>', '1']}"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Drzemał', '.']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'heads': [None, 0],\n",
-       " 'deprels': ['ROOT', 'punct'],\n",
-       " 'spines': ['ROOT_S_VP_V', 'Punct'],\n",
-       " 'anchors': ['<ROOT>', 'ROOT'],\n",
-       " 'anchor_hs': ['<ROOT>', '1']}"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "HDR = [\n",
     "    'heads', 'deprels',\n",
@@ -753,49 +358,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": null,
    "id": "3f53c039",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "70/70 [==============================] - 17s 152ms/step\n",
-      "69/69 [==============================] - 12s 168ms/step\n",
-      "['Całuję', '.']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'spines': ['ROOT_S_VP_V', 'Punct'],\n",
-       " 'anchors': ['<ROOT>', 'ROOT'],\n",
-       " 'anchor_hs': ['<ROOT>', '1']}"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Drzemał', '.']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'spines': ['ROOT_S_VP_V', 'Punct'],\n",
-       " 'anchors': ['<ROOT>', 'ROOT'],\n",
-       " 'anchor_hs': ['<ROOT>', '1']}"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "def get_predicted_data(TOKENS_TRUE):\n",
     "    PARSED = parser.parse([' '.join(toks) for toks in TOKENS_TRUE])\n",
@@ -821,45 +387,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": null,
    "id": "17c1d9cb",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2211\n",
-      "2205\n",
-      "['Całuję', '.']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'heads': [None, 0], 'deprels': ['root', 'punct']}"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Drzemał', '.']\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'heads': [None, 0], 'deprels': ['root', 'punct']}"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import conllu\n",
     "\n",
@@ -894,7 +425,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": null,
    "id": "004918c6",
    "metadata": {},
    "outputs": [],
@@ -913,42 +444,22 @@
     "def tree2spans(tree, labeled=True, headed=False):\n",
     "    spans = []\n",
     "    _tree2spans(tree, spans, labeled=labeled, headed=headed)\n",
-    "    # TODO\n",
-    "    #try:\n",
-    "    #    assert(len(spans) == len(set(spans)))\n",
-    "    #except:\n",
-    "    #    show_tree(tree)\n",
-    "    #    (display(spans))\n",
-    "    #    1/0\n",
     "    return set(spans)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": null,
    "id": "65d493ca",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<module 'neural_parser.hybrid_tree_utils' from '/home/kkrasnowska/neural-parsing/ICCS/neural_parser/hybrid_tree_utils.py'>"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score\n",
-    "\n",
-    "importlib.reload(hybrid_tree_utils)"
+    "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": null,
    "id": "e5f88e76",
    "metadata": {
     "scrolled": false
@@ -964,18 +475,11 @@
     "        key : {'true' : [], 'pred' : []} for key in ('heads', ('heads', 'deprels'))\n",
     "    }\n",
     "\n",
-    "    k = 0\n",
     "    i = 0\n",
     "    PROBLEM_TREES = []\n",
     "\n",
     "    for toks, true, pred, combo in zip(tokens, tags_true, tags_pred, tags_combo):\n",
     "        \n",
-    "        #sent = ' '.join(toks)\n",
-    "        #cats = HDR\n",
-    "        #true = dict(zip(cats, zip(*true)))\n",
-    "        #pred = dict(zip(cats, zip(*pred)))\n",
-    "        #print('----------------------------')\n",
-    "        #print(sent)\n",
     "        dummy = {'lemmas' : ['_' for _ in toks], 'tags' : ['_' for _ in toks]}\n",
     "        true.update(dummy)\n",
     "        pred.update(dummy)\n",
@@ -994,12 +498,6 @@
     "            print('=============================')\n",
     "            raise\n",
     "            tree_pred, problems = None, None\n",
-    "        #if 'reattach' in problems:\n",
-    "        #    show_tree(tree_pred)\n",
-    "        \n",
-    "        #if pred['lemmas_corr'] != pred['lemmas']:\n",
-    "        #    print(pred['lemmas_corr'])\n",
-    "        #    print(pred['lemmas'])\n",
     "        \n",
     "        for key, v in accuracies.items():\n",
     "            if type(key) == str:\n",
@@ -1011,31 +509,11 @@
     "        \n",
     "        spans_true = tree2spans(tree_true, labeled=labeled, headed=headed)\n",
     "        spans_pred = tree2spans(tree_pred, labeled=labeled, headed=headed) if tree_pred else set()\n",
-    "        if 'adwokata' in toks:\n",
-    "            print(spans_true)\n",
-    "            print(spans_pred)\n",
     "        tp = len(spans_true.intersection(spans_pred))\n",
     "        P[0] += tp\n",
     "        R[0] += tp\n",
     "        P[1] += len(spans_pred)\n",
     "        R[1] += len(spans_true)\n",
-    "        leafs = tree_true.get_yield()\n",
-    "        discont = [leaf.from_index for leaf in leafs] != list(range(len(leafs)))\n",
-    "        #if k < 5 and len(toks) > 9 and [leaf.features['index'] for leaf in leafs] != list(range(len(leafs))):\n",
-    "        #if k < 5 and spans_combo != spans_true:\n",
-    "        #if k < 5 and not OK:\n",
-    "        #if discont and len(toks) > 12 and k < 0 and spans_pred == spans_true:\n",
-    "        if len(toks) == 8 and k < 0:\n",
-    "            print('GOLD TREE:')\n",
-    "            show_tree(tree_true)\n",
-    "            display(true)\n",
-    "            #display(_tree2dict(tree_true))\n",
-    "            print('PREDICTED TREE:')\n",
-    "            show_tree(tree_pred)\n",
-    "            display(pred)\n",
-    "            print('FP:', spans_pred - spans_true)\n",
-    "            print('FN:', spans_true - spans_pred)\n",
-    "            k += 1\n",
     "        i += 1\n",
     "    \n",
     "    p, r = P[0]/P[1], R[0]/R[1]\n",
@@ -1060,25 +538,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": null,
    "id": "8f8a771a",
    "metadata": {
     "scrolled": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "unlabeled{((3,), 'SPAN', False), ((2, 3), 'SPAN', False), ((4,), 'SPAN', False), ((0, 1, 2, 3, 4), 'SPAN', False), ((0, 1, 2, 3), 'SPAN', False), ((2,), 'SPAN', False), ((0, 1), 'SPAN', False)}\n",
-      "{((3,), 'SPAN', False), ((2, 3), 'SPAN', False), ((4,), 'SPAN', False), ((0, 1, 2, 3, 4), 'SPAN', False), ((0, 1, 2, 3), 'SPAN', False), ((2,), 'SPAN', False), ((0, 1), 'SPAN', False)}\n",
-      "non-headed{((2,), 'Prep', False), ((4,), 'Punct', False), ((2, 3), 'PrepNP', False), ((3,), 'N', False), ((0, 1, 2, 3), 'S', False), ((0, 1), 'VP', False), ((0, 1), 'V', False), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False)}\n",
-      "{((2,), 'Prep', False), ((4,), 'Punct', False), ((2, 3), 'PrepNP', False), ((3,), 'N', False), ((0, 1, 2, 3), 'S', False), ((0, 1), 'VP', False), ((0, 1), 'V', False), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False)}\n",
-      "headed{((0, 1, 2, 3), 'S', True), ((4,), 'Punct', False), ((0, 1), 'VP', True), ((2, 3), 'PrepNP', False), ((0, 1), 'V', True), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False), ((2,), 'Prep', True), ((3,), 'N', True)}\n",
-      "{((0, 1, 2, 3), 'S', True), ((4,), 'Punct', False), ((0, 1), 'VP', True), ((2, 3), 'PrepNP', False), ((0, 1), 'V', True), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False), ((2,), 'Prep', True), ((3,), 'N', True)}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "EVAL_DATA = {\n",
     "    '1val' : (TOKENS_VAL, TAGS_VAL, TAGS_P_VAL, TAGS_C_VAL),\n",
@@ -1113,7 +578,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": null,
    "id": "63192852",
    "metadata": {},
    "outputs": [],
@@ -1123,7 +588,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": null,
    "id": "78250b1b",
    "metadata": {},
    "outputs": [],
@@ -1133,7 +598,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": null,
    "id": "bba6ed15",
    "metadata": {},
    "outputs": [],
@@ -1143,260 +608,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": null,
    "id": "543377f8",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>dataset</th>\n",
-       "      <th>measure_type</th>\n",
-       "      <th>measure</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>dataset</th>\n",
-       "      <th>measure</th>\n",
-       "      <th>measure_type</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"9\" valign=\"top\">test</th>\n",
-       "      <th rowspan=\"3\" valign=\"top\">F1</th>\n",
-       "      <th>headed</th>\n",
-       "      <th>7</th>\n",
-       "      <td>test</td>\n",
-       "      <td>headed</td>\n",
-       "      <td>F1</td>\n",
-       "      <td>0.959192</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>non-headed</th>\n",
-       "      <th>8</th>\n",
-       "      <td>test</td>\n",
-       "      <td>non-headed</td>\n",
-       "      <td>F1</td>\n",
-       "      <td>0.965236</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unlabeled</th>\n",
-       "      <th>15</th>\n",
-       "      <td>test</td>\n",
-       "      <td>unlabeled</td>\n",
-       "      <td>F1</td>\n",
-       "      <td>0.964436</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">P</th>\n",
-       "      <th>headed</th>\n",
-       "      <th>9</th>\n",
-       "      <td>test</td>\n",
-       "      <td>headed</td>\n",
-       "      <td>P</td>\n",
-       "      <td>0.959611</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>non-headed</th>\n",
-       "      <th>6</th>\n",
-       "      <td>test</td>\n",
-       "      <td>non-headed</td>\n",
-       "      <td>P</td>\n",
-       "      <td>0.965658</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unlabeled</th>\n",
-       "      <th>13</th>\n",
-       "      <td>test</td>\n",
-       "      <td>unlabeled</td>\n",
-       "      <td>P</td>\n",
-       "      <td>0.964118</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">R</th>\n",
-       "      <th>headed</th>\n",
-       "      <th>2</th>\n",
-       "      <td>test</td>\n",
-       "      <td>headed</td>\n",
-       "      <td>R</td>\n",
-       "      <td>0.958773</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>non-headed</th>\n",
-       "      <th>5</th>\n",
-       "      <td>test</td>\n",
-       "      <td>non-headed</td>\n",
-       "      <td>R</td>\n",
-       "      <td>0.964815</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unlabeled</th>\n",
-       "      <th>0</th>\n",
-       "      <td>test</td>\n",
-       "      <td>unlabeled</td>\n",
-       "      <td>R</td>\n",
-       "      <td>0.964754</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"9\" valign=\"top\">val</th>\n",
-       "      <th rowspan=\"3\" valign=\"top\">F1</th>\n",
-       "      <th>headed</th>\n",
-       "      <th>14</th>\n",
-       "      <td>val</td>\n",
-       "      <td>headed</td>\n",
-       "      <td>F1</td>\n",
-       "      <td>0.957423</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>non-headed</th>\n",
-       "      <th>4</th>\n",
-       "      <td>val</td>\n",
-       "      <td>non-headed</td>\n",
-       "      <td>F1</td>\n",
-       "      <td>0.963231</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unlabeled</th>\n",
-       "      <th>1</th>\n",
-       "      <td>val</td>\n",
-       "      <td>unlabeled</td>\n",
-       "      <td>F1</td>\n",
-       "      <td>0.962553</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">P</th>\n",
-       "      <th>headed</th>\n",
-       "      <th>10</th>\n",
-       "      <td>val</td>\n",
-       "      <td>headed</td>\n",
-       "      <td>P</td>\n",
-       "      <td>0.958145</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>non-headed</th>\n",
-       "      <th>16</th>\n",
-       "      <td>val</td>\n",
-       "      <td>non-headed</td>\n",
-       "      <td>P</td>\n",
-       "      <td>0.963958</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unlabeled</th>\n",
-       "      <th>11</th>\n",
-       "      <td>val</td>\n",
-       "      <td>unlabeled</td>\n",
-       "      <td>P</td>\n",
-       "      <td>0.962762</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"3\" valign=\"top\">R</th>\n",
-       "      <th>headed</th>\n",
-       "      <th>17</th>\n",
-       "      <td>val</td>\n",
-       "      <td>headed</td>\n",
-       "      <td>R</td>\n",
-       "      <td>0.956702</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>non-headed</th>\n",
-       "      <th>12</th>\n",
-       "      <td>val</td>\n",
-       "      <td>non-headed</td>\n",
-       "      <td>R</td>\n",
-       "      <td>0.962505</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>unlabeled</th>\n",
-       "      <th>3</th>\n",
-       "      <td>val</td>\n",
-       "      <td>unlabeled</td>\n",
-       "      <td>R</td>\n",
-       "      <td>0.962343</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                dataset measure_type measure     value\n",
-       "dataset measure measure_type                                          \n",
-       "test    F1      headed       7     test       headed      F1  0.959192\n",
-       "                non-headed   8     test   non-headed      F1  0.965236\n",
-       "                unlabeled    15    test    unlabeled      F1  0.964436\n",
-       "        P       headed       9     test       headed       P  0.959611\n",
-       "                non-headed   6     test   non-headed       P  0.965658\n",
-       "                unlabeled    13    test    unlabeled       P  0.964118\n",
-       "        R       headed       2     test       headed       R  0.958773\n",
-       "                non-headed   5     test   non-headed       R  0.964815\n",
-       "                unlabeled    0     test    unlabeled       R  0.964754\n",
-       "val     F1      headed       14     val       headed      F1  0.957423\n",
-       "                non-headed   4      val   non-headed      F1  0.963231\n",
-       "                unlabeled    1      val    unlabeled      F1  0.962553\n",
-       "        P       headed       10     val       headed       P  0.958145\n",
-       "                non-headed   16     val   non-headed       P  0.963958\n",
-       "                unlabeled    11     val    unlabeled       P  0.962762\n",
-       "        R       headed       17     val       headed       R  0.956702\n",
-       "                non-headed   12     val   non-headed       R  0.962505\n",
-       "                unlabeled    3      val    unlabeled       R  0.962343"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "results.groupby(['dataset', 'measure', 'measure_type'], group_keys=True).apply(lambda x: x)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": null,
    "id": "0b5d3fe4",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\\toprule\n",
-      "& \\multicolumn{3}{c}{validation} & \\multicolumn{3}{c}{test} \\\\\n",
-      "& precision & recall & F1 & precision & recall & F1 \\\\\n",
-      "\\midrule\n",
-      "1unlabeled & 96.28\\% & 96.23\\% & 96.26\\% & 96.41\\% & 96.48\\% & 96.44\\% \\\\\n",
-      "2non-headed & 96.40\\% & 96.25\\% & 96.32\\% & 96.57\\% & 96.48\\% & 96.52\\% \\\\\n",
-      "3headed & 95.81\\% & 95.67\\% & 95.74\\% & 95.96\\% & 95.88\\% & 95.92\\% \\\\\n",
-      "\\bottomrule\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "for t in tex:\n",
     "    print(t, end='')"
@@ -1444,10 +669,6 @@
     "    precisions = precision_score(TRUE, PRED, average=None)\n",
     "    recalls = recall_score(TRUE, PRED, average=None)\n",
     "    f1s = f1_score(TRUE, PRED, average=None)\n",
-    "    #for v, p, r, f in sorted(zip(values, precisions, recalls, f1s), key=lambda x: -x[3]):\n",
-    "    #    if v.endswith('formarzecz') or v.endswith('formaczas'):\n",
-    "    #        spine = ' $\\\\rightarrow$ '.join(f'\\\\nt{{{n}}}' for n in v.split('_'))\n",
-    "    #        print(f'{spine} & {100 * p:.2f}\\\\% & {100 * r:.2f}\\\\% & {100 * f:.2f}\\\\% \\\\\\\\')\n",
     "    \n",
     "    ct_pre, cp_pre = Counter(), Counter()\n",
     "    for val in values:\n",
@@ -1458,7 +679,6 @@
     "    rows = []\n",
     "    \n",
     "    for pre in ct_pre.keys():\n",
-    "        # TODO\n",
     "        if pre == 'ign':\n",
     "            continue\n",
     "        if not cp_pre[pre] * ct_pre[pre]:\n",
@@ -1472,7 +692,6 @@
     "                    spine = ' $\\\\rightarrow$ '.join(f'\\\\nt{{{n}}}' for n in v.split('_'))\n",
     "                    rws.append(f'{spine} & {100 * p:.2f}\\\\% & {100 * r:.2f}\\\\% & {100 * f:.2f}\\\\% & {ct[v]} \\\\\\\\')\n",
     "                wp, wr = cp[v] / cp_pre[pre], ct[v] / ct_pre[pre]\n",
-    "                #print(f'    {v:36s} {100 * p:6.2f} {wp:7.3f} {100 * r:6.2f} {wr:7.3f}')\n",
     "                P += p * wp\n",
     "                R += r * wr\n",
     "        F = 2 * P * R / (P + R)\n",