From 3634f1f2dfd8df37bc3f0fcd6e094c9eddfa81b2 Mon Sep 17 00:00:00 2001
From: Katarzyna Krasnowska <kasia.krasnowska@gmail.com>
Date: Mon, 19 Jun 2023 12:59:58 +0200
Subject: [PATCH] code cleanup
---
TrainingAndEval.ipynb | 889 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 file changed, 54 insertions(+), 835 deletions(-)
diff --git a/TrainingAndEval.ipynb b/TrainingAndEval.ipynb
index 753718e..1c6067e 100644
--- a/TrainingAndEval.ipynb
+++ b/TrainingAndEval.ipynb
@@ -2,23 +2,10 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "97d0c9ab",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-04-11 11:17:29.095631: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
- "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
- "2023-04-11 11:17:29.331444: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
- "2023-04-11 11:17:30.167497: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
- "2023-04-11 11:17:30.167593: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
- "2023-04-11 11:17:30.167603: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import importlib\n",
"\n",
@@ -40,7 +27,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "c41d6630",
"metadata": {},
"outputs": [],
@@ -51,36 +38,10 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "f30d7b7c",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1 Physical GPUs, 1 Logical GPUs\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-04-11 11:17:31.717262: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:31.762533: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:31.763529: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:31.765670: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
- "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
- "2023-04-11 11:17:31.769196: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:31.770058: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:31.770816: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.722287: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.723281: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.724062: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.724846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20480 MB memory: -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:00:05.0, compute capability: 8.0\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# https://www.tensorflow.org/guide/gpu\n",
"gpus = tf.config.list_physical_devices('GPU')\n",
@@ -98,33 +59,12 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "89afdb1e",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "/device:GPU:0\n",
- "2.10.0\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-04-11 11:17:32.739308: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.740224: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.740975: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.741809: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.742586: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
- "2023-04-11 11:17:32.743322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /device:GPU:0 with 20480 MB memory: -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:00:05.0, compute capability: 8.0\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"print(tf.test.gpu_device_name())\n",
"print(tf.__version__)"
@@ -132,63 +72,22 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "2b0ab576",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Found cached dataset pdb_c_beta (/home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1)\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "55f181333dc44c7a811c515cc55c4988",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- " 0%| | 0/3 [00:00<?, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"pdbc_dataset = load_dataset('pdb_c_beta')"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "2f4c317a",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-ff2490f308f7f25b.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-cbb40b0e978ab6ee.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-3facbd810991cd6c.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-e54a8628e59de21f.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-9692de6b8224e758.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-4042ffa1dc5d9323.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-fb250709424f85ec.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-1f6ce0a488a89d56.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-2ae4daf5101c7aa2.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-a1686820d15bcf04.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-fe2c12481861f4bd.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-da5a875c385c3570.arrow\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import importlib\n",
"\n",
@@ -203,20 +102,10 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "de1966ed",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-1dfcf507d62f6da8.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-264c0111246b25c1.arrow\n",
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-6a40675124a412f0.arrow\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"features = pdbc_dataset_spines['train'].features\n",
"pdbc_dataset_spines_cont = pdbc_dataset_spines.filter(\n",
@@ -226,41 +115,17 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "33ff295b",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "DatasetDict({\n",
- " train: Dataset({\n",
- " features: ['corp_id', 'sent_id', 'tokens', 'lemmas', 'cposes', 'poses', 'tags', 'heads', 'deprels', 'nonterminals', 'spines', 'anchors', 'anchor_hs'],\n",
- " num_rows: 15903\n",
- " })\n",
- " validation: Dataset({\n",
- " features: ['corp_id', 'sent_id', 'tokens', 'lemmas', 'cposes', 'poses', 'tags', 'heads', 'deprels', 'nonterminals', 'spines', 'anchors', 'anchor_hs'],\n",
- " num_rows: 1980\n",
- " })\n",
- " test: Dataset({\n",
- " features: ['corp_id', 'sent_id', 'tokens', 'lemmas', 'cposes', 'poses', 'tags', 'heads', 'deprels', 'nonterminals', 'spines', 'anchors', 'anchor_hs'],\n",
- " num_rows: 1990\n",
- " })\n",
- "})"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"pdbc_dataset_spines_cont"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "a8ddbc1f",
"metadata": {},
"outputs": [],
@@ -270,7 +135,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "8029594b",
"metadata": {},
"outputs": [],
@@ -288,30 +153,24 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": null,
"id": "be8e93fa",
"metadata": {},
"outputs": [],
"source": [
- "def crop(dataset, n):\n",
- " return dataset.filter(lambda example: len(example['tokens']) <= n)\n",
- "\n",
"spines_pdbc = ClassificationTask(\n",
" 'spines_pdbc',\n",
" pdbc_dataset_spines,\n",
- " #crop(pdbc_dataset, 6),\n",
")\n",
"\n",
"spines_pdbc_cont = ClassificationTask(\n",
" 'spines_pdbc_cont',\n",
" pdbc_dataset_spines_cont,\n",
- " #crop(pdbc_dataset, 6),\n",
")\n",
"\n",
"spines_pdbc_compressed = ClassificationTask(\n",
" 'spines_pdbc_compressed',\n",
" pdbc_dataset_spines_compressed,\n",
- " #crop(pdbc_dataset, 6),\n",
")\n",
"\n",
"TASK = spines_pdbc_compressed\n",
@@ -320,7 +179,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": null,
"id": "7824fcee",
"metadata": {},
"outputs": [],
@@ -330,56 +189,12 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": null,
"id": "1eb5f41a",
"metadata": {
"scrolled": false
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Loading BERT tokenizer...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-49fe5b05228c3588.arrow\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Preprocessing the dataset for BERT...\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "5f108b00fcab4db8a610f24ae03b7308",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- " 0%| | 0/2211 [00:00<?, ?ex/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Loading cached processed dataset at /home/kkrasnowska/.cache/huggingface/datasets/pdb_c_beta/pdb_c_beta/0.2.0/d9c6dc764ae2a3483fa112c6159db4a0342dba8083bdb3b5981c45435b0692e1/cache-b8e2900fbd9615fd.arrow\n",
- "You're using a HerbertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"trainer = training.Trainer(\n",
" MODEL,\n",
@@ -398,21 +213,10 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": null,
"id": "276708cc",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "('keras_fit_logs_spines_pdbc_compressed', 'models_spines_pdbc_compressed')"
- ]
- },
- "execution_count": 39,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"log_dir = f'keras_fit_logs_{TASK.name}'\n",
"model_dir = f'models_{TASK.name}'\n",
@@ -422,51 +226,12 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": null,
"id": "e8ccde06",
"metadata": {
"scrolled": false
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "The tensorboard extension is already loaded. To reload it, use:\n",
- " %reload_ext tensorboard\n",
- "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
- "To disable this warning, you can either:\n",
- "\t- Avoid using `tokenizers` before the fork if possible\n",
- "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " <iframe id=\"tensorboard-frame-83a6a03964d4187a\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
- " </iframe>\n",
- " <script>\n",
- " (function() {\n",
- " const frame = document.getElementById(\"tensorboard-frame-83a6a03964d4187a\");\n",
- " const url = new URL(\"/\", window.location);\n",
- " const port = 6004;\n",
- " if (port) {\n",
- " url.port = port;\n",
- " }\n",
- " frame.src = url;\n",
- " })();\n",
- " </script>\n",
- " "
- ],
- "text/plain": [
- "<IPython.core.display.HTML object>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"%load_ext tensorboard\n",
"! killall tensorboard\n",
@@ -476,21 +241,12 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": null,
"id": "a5b0da64",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 6 µs, sys: 1 µs, total: 7 µs\n",
- "Wall time: 15.7 µs\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"\n",
@@ -505,45 +261,10 @@
},
{
"cell_type": "code",
- "execution_count": 42,
- "id": "e42b2bd4",
- "metadata": {},
- "outputs": [],
- "source": [
- "#import importlib\n",
- "#from neural_parser import hybrid_tree_utils\n",
- "#importlib.reload(hybrid_tree_utils)\n",
- "#from neural_parser import data_utils\n",
- "#importlib.reload(data_utils)\n",
- "#from neural_parser import constituency_parser\n",
- "#importlib.reload(constituency_parser)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
+ "execution_count": null,
"id": "2f65dead",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "created 3 classifier(s)\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Some layers from the model checkpoint at models_spines_pdbc_compressed/model were not used when initializing TFBertForMultiTargetTokenClassification: ['dropout_73']\n",
- "- This IS expected if you are initializing TFBertForMultiTargetTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "- This IS NOT expected if you are initializing TFBertForMultiTargetTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "All the layers of TFBertForMultiTargetTokenClassification were initialized from the model checkpoint at models_spines_pdbc_compressed/model.\n",
- "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForMultiTargetTokenClassification for predictions without further training.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"if not TRAIN:\n",
" from neural_parser import constituency_parser\n",
@@ -552,7 +273,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": null,
"id": "24edee79",
"metadata": {},
"outputs": [],
@@ -565,41 +286,10 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": null,
"id": "4a7cd10b",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1/1 [==============================] - 10s 10s/step\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[(['Miał', 'em', 'kotka', '.'],\n",
- " {'spines': ['ROOT_S_VP_V', '<EMPTY>', 'NP_N', 'Punct'],\n",
- " 'anchors': ['<ROOT>', 'V', 'S', 'ROOT'],\n",
- " 'anchor_hs': ['<ROOT>', '1', '1', '1']}),\n",
- " (['Wlazł', 'kotek', 'na', 'płotek', 'i', 'mruga', '.'],\n",
- " {'spines': ['VP_V',\n",
- " 'NP_N',\n",
- " 'PrepNP_Prep',\n",
- " 'NP_N',\n",
- " 'ROOT_S_VP_Conj',\n",
- " 'VP_V',\n",
- " 'Punct'],\n",
- " 'anchors': ['VP', 'S', 'VP', 'PrepNP', '<ROOT>', 'VP', 'ROOT'],\n",
- " 'anchor_hs': ['1', '1', '2', '1', '<ROOT>', '1', '1']})]"
- ]
- },
- "execution_count": 45,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"parser.parse(sentences)"
]
@@ -616,21 +306,10 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": null,
"id": "4ac4b9df",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<module 'neural_parser.constants' from '/home/kkrasnowska/neural-parsing/ICCS/neural_parser/constants.py'>"
- ]
- },
- "execution_count": 46,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from neural_parser import hybrid_tree_utils\n",
"importlib.reload(hybrid_tree_utils)\n",
@@ -640,86 +319,12 @@
},
{
"cell_type": "code",
- "execution_count": 47,
- "id": "d1b28792",
- "metadata": {},
- "outputs": [],
- "source": [
- "from spacy import displacy\n",
- "\n",
- "def to_deps(tokens, deprels, heads):\n",
- " deps = {'words' : [], 'arcs' : []}\n",
- " for i, (token, deprel, head) in enumerate(zip(tokens, deprels, heads)):\n",
- " deps['words'].append({'text' : token, 'tag' : 'X'})\n",
- " if head >= 0:\n",
- " d = 'left' if head > i else 'right'\n",
- " start, end = sorted((i, head))\n",
- " deps['arcs'].append({'start' : start, 'end' : end, 'label' : deprel, 'dir' : d})\n",
- " return deps\n",
- "\n",
- "def display_deps(tokens, deprels, heads):\n",
- " displacy.render(to_deps(tokens, deprels, heads), manual=True, options={'distance' : 80})\n",
- " \n",
- "import urllib.parse\n",
- "import json\n",
- "\n",
- "def show_tree(tree):\n",
- " tree_json = json.dumps(hybrid_tree_utils.tree2dict(tree)['tree'])\n",
- " src = f'http://127.0.0.1:8010/?tree={urllib.parse.quote(tree_json)}'\n",
- " display(IFrame(src, 950, 550))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
+ "execution_count": null,
"id": "9f443569",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2211\n",
- "2205\n",
- "['Całuję', '.']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "{'heads': [None, 0],\n",
- " 'deprels': ['ROOT', 'punct'],\n",
- " 'spines': ['ROOT_S_VP_V', 'Punct'],\n",
- " 'anchors': ['<ROOT>', 'ROOT'],\n",
- " 'anchor_hs': ['<ROOT>', '1']}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['Drzemał', '.']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "{'heads': [None, 0],\n",
- " 'deprels': ['ROOT', 'punct'],\n",
- " 'spines': ['ROOT_S_VP_V', 'Punct'],\n",
- " 'anchors': ['<ROOT>', 'ROOT'],\n",
- " 'anchor_hs': ['<ROOT>', '1']}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"HDR = [\n",
" 'heads', 'deprels',\n",
@@ -753,49 +358,10 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": null,
"id": "3f53c039",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "70/70 [==============================] - 17s 152ms/step\n",
- "69/69 [==============================] - 12s 168ms/step\n",
- "['Całuję', '.']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "{'spines': ['ROOT_S_VP_V', 'Punct'],\n",
- " 'anchors': ['<ROOT>', 'ROOT'],\n",
- " 'anchor_hs': ['<ROOT>', '1']}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['Drzemał', '.']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "{'spines': ['ROOT_S_VP_V', 'Punct'],\n",
- " 'anchors': ['<ROOT>', 'ROOT'],\n",
- " 'anchor_hs': ['<ROOT>', '1']}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"def get_predicted_data(TOKENS_TRUE):\n",
" PARSED = parser.parse([' '.join(toks) for toks in TOKENS_TRUE])\n",
@@ -821,45 +387,10 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": null,
"id": "17c1d9cb",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2211\n",
- "2205\n",
- "['Całuję', '.']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "{'heads': [None, 0], 'deprels': ['root', 'punct']}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['Drzemał', '.']\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "{'heads': [None, 0], 'deprels': ['root', 'punct']}"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"import conllu\n",
"\n",
@@ -894,7 +425,7 @@
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": null,
"id": "004918c6",
"metadata": {},
"outputs": [],
@@ -913,42 +444,22 @@
"def tree2spans(tree, labeled=True, headed=False):\n",
" spans = []\n",
" _tree2spans(tree, spans, labeled=labeled, headed=headed)\n",
- " # TODO\n",
- " #try:\n",
- " # assert(len(spans) == len(set(spans)))\n",
- " #except:\n",
- " # show_tree(tree)\n",
- " # (display(spans))\n",
- " # 1/0\n",
" return set(spans)"
]
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": null,
"id": "65d493ca",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<module 'neural_parser.hybrid_tree_utils' from '/home/kkrasnowska/neural-parsing/ICCS/neural_parser/hybrid_tree_utils.py'>"
- ]
- },
- "execution_count": 52,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score\n",
- "\n",
- "importlib.reload(hybrid_tree_utils)"
+ "from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score"
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": null,
"id": "e5f88e76",
"metadata": {
"scrolled": false
@@ -964,18 +475,11 @@
" key : {'true' : [], 'pred' : []} for key in ('heads', ('heads', 'deprels'))\n",
" }\n",
"\n",
- " k = 0\n",
" i = 0\n",
" PROBLEM_TREES = []\n",
"\n",
" for toks, true, pred, combo in zip(tokens, tags_true, tags_pred, tags_combo):\n",
" \n",
- " #sent = ' '.join(toks)\n",
- " #cats = HDR\n",
- " #true = dict(zip(cats, zip(*true)))\n",
- " #pred = dict(zip(cats, zip(*pred)))\n",
- " #print('----------------------------')\n",
- " #print(sent)\n",
" dummy = {'lemmas' : ['_' for _ in toks], 'tags' : ['_' for _ in toks]}\n",
" true.update(dummy)\n",
" pred.update(dummy)\n",
@@ -994,12 +498,6 @@
" print('=============================')\n",
" raise\n",
" tree_pred, problems = None, None\n",
- " #if 'reattach' in problems:\n",
- " # show_tree(tree_pred)\n",
- " \n",
- " #if pred['lemmas_corr'] != pred['lemmas']:\n",
- " # print(pred['lemmas_corr'])\n",
- " # print(pred['lemmas'])\n",
" \n",
" for key, v in accuracies.items():\n",
" if type(key) == str:\n",
@@ -1011,31 +509,11 @@
" \n",
" spans_true = tree2spans(tree_true, labeled=labeled, headed=headed)\n",
" spans_pred = tree2spans(tree_pred, labeled=labeled, headed=headed) if tree_pred else set()\n",
- " if 'adwokata' in toks:\n",
- " print(spans_true)\n",
- " print(spans_pred)\n",
" tp = len(spans_true.intersection(spans_pred))\n",
" P[0] += tp\n",
" R[0] += tp\n",
" P[1] += len(spans_pred)\n",
" R[1] += len(spans_true)\n",
- " leafs = tree_true.get_yield()\n",
- " discont = [leaf.from_index for leaf in leafs] != list(range(len(leafs)))\n",
- " #if k < 5 and len(toks) > 9 and [leaf.features['index'] for leaf in leafs] != list(range(len(leafs))):\n",
- " #if k < 5 and spans_combo != spans_true:\n",
- " #if k < 5 and not OK:\n",
- " #if discont and len(toks) > 12 and k < 0 and spans_pred == spans_true:\n",
- " if len(toks) == 8 and k < 0:\n",
- " print('GOLD TREE:')\n",
- " show_tree(tree_true)\n",
- " display(true)\n",
- " #display(_tree2dict(tree_true))\n",
- " print('PREDICTED TREE:')\n",
- " show_tree(tree_pred)\n",
- " display(pred)\n",
- " print('FP:', spans_pred - spans_true)\n",
- " print('FN:', spans_true - spans_pred)\n",
- " k += 1\n",
" i += 1\n",
" \n",
" p, r = P[0]/P[1], R[0]/R[1]\n",
@@ -1060,25 +538,12 @@
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": null,
"id": "8f8a771a",
"metadata": {
"scrolled": false
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "unlabeled{((3,), 'SPAN', False), ((2, 3), 'SPAN', False), ((4,), 'SPAN', False), ((0, 1, 2, 3, 4), 'SPAN', False), ((0, 1, 2, 3), 'SPAN', False), ((2,), 'SPAN', False), ((0, 1), 'SPAN', False)}\n",
- "{((3,), 'SPAN', False), ((2, 3), 'SPAN', False), ((4,), 'SPAN', False), ((0, 1, 2, 3, 4), 'SPAN', False), ((0, 1, 2, 3), 'SPAN', False), ((2,), 'SPAN', False), ((0, 1), 'SPAN', False)}\n",
- "non-headed{((2,), 'Prep', False), ((4,), 'Punct', False), ((2, 3), 'PrepNP', False), ((3,), 'N', False), ((0, 1, 2, 3), 'S', False), ((0, 1), 'VP', False), ((0, 1), 'V', False), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False)}\n",
- "{((2,), 'Prep', False), ((4,), 'Punct', False), ((2, 3), 'PrepNP', False), ((3,), 'N', False), ((0, 1, 2, 3), 'S', False), ((0, 1), 'VP', False), ((0, 1), 'V', False), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False)}\n",
- "headed{((0, 1, 2, 3), 'S', True), ((4,), 'Punct', False), ((0, 1), 'VP', True), ((2, 3), 'PrepNP', False), ((0, 1), 'V', True), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False), ((2,), 'Prep', True), ((3,), 'N', True)}\n",
- "{((0, 1, 2, 3), 'S', True), ((4,), 'Punct', False), ((0, 1), 'VP', True), ((2, 3), 'PrepNP', False), ((0, 1), 'V', True), ((3,), 'NP', False), ((0, 1, 2, 3, 4), 'ROOT', False), ((2,), 'Prep', True), ((3,), 'N', True)}\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"EVAL_DATA = {\n",
" '1val' : (TOKENS_VAL, TAGS_VAL, TAGS_P_VAL, TAGS_C_VAL),\n",
@@ -1113,7 +578,7 @@
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": null,
"id": "63192852",
"metadata": {},
"outputs": [],
@@ -1123,7 +588,7 @@
},
{
"cell_type": "code",
- "execution_count": 56,
+ "execution_count": null,
"id": "78250b1b",
"metadata": {},
"outputs": [],
@@ -1133,7 +598,7 @@
},
{
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": null,
"id": "bba6ed15",
"metadata": {},
"outputs": [],
@@ -1143,260 +608,20 @@
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": null,
"id": "543377f8",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th>dataset</th>\n",
- " <th>measure_type</th>\n",
- " <th>measure</th>\n",
- " <th>value</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>dataset</th>\n",
- " <th>measure</th>\n",
- " <th>measure_type</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th rowspan=\"9\" valign=\"top\">test</th>\n",
- " <th rowspan=\"3\" valign=\"top\">F1</th>\n",
- " <th>headed</th>\n",
- " <th>7</th>\n",
- " <td>test</td>\n",
- " <td>headed</td>\n",
- " <td>F1</td>\n",
- " <td>0.959192</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>non-headed</th>\n",
- " <th>8</th>\n",
- " <td>test</td>\n",
- " <td>non-headed</td>\n",
- " <td>F1</td>\n",
- " <td>0.965236</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>unlabeled</th>\n",
- " <th>15</th>\n",
- " <td>test</td>\n",
- " <td>unlabeled</td>\n",
- " <td>F1</td>\n",
- " <td>0.964436</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th rowspan=\"3\" valign=\"top\">P</th>\n",
- " <th>headed</th>\n",
- " <th>9</th>\n",
- " <td>test</td>\n",
- " <td>headed</td>\n",
- " <td>P</td>\n",
- " <td>0.959611</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>non-headed</th>\n",
- " <th>6</th>\n",
- " <td>test</td>\n",
- " <td>non-headed</td>\n",
- " <td>P</td>\n",
- " <td>0.965658</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>unlabeled</th>\n",
- " <th>13</th>\n",
- " <td>test</td>\n",
- " <td>unlabeled</td>\n",
- " <td>P</td>\n",
- " <td>0.964118</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th rowspan=\"3\" valign=\"top\">R</th>\n",
- " <th>headed</th>\n",
- " <th>2</th>\n",
- " <td>test</td>\n",
- " <td>headed</td>\n",
- " <td>R</td>\n",
- " <td>0.958773</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>non-headed</th>\n",
- " <th>5</th>\n",
- " <td>test</td>\n",
- " <td>non-headed</td>\n",
- " <td>R</td>\n",
- " <td>0.964815</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>unlabeled</th>\n",
- " <th>0</th>\n",
- " <td>test</td>\n",
- " <td>unlabeled</td>\n",
- " <td>R</td>\n",
- " <td>0.964754</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th rowspan=\"9\" valign=\"top\">val</th>\n",
- " <th rowspan=\"3\" valign=\"top\">F1</th>\n",
- " <th>headed</th>\n",
- " <th>14</th>\n",
- " <td>val</td>\n",
- " <td>headed</td>\n",
- " <td>F1</td>\n",
- " <td>0.957423</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>non-headed</th>\n",
- " <th>4</th>\n",
- " <td>val</td>\n",
- " <td>non-headed</td>\n",
- " <td>F1</td>\n",
- " <td>0.963231</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>unlabeled</th>\n",
- " <th>1</th>\n",
- " <td>val</td>\n",
- " <td>unlabeled</td>\n",
- " <td>F1</td>\n",
- " <td>0.962553</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th rowspan=\"3\" valign=\"top\">P</th>\n",
- " <th>headed</th>\n",
- " <th>10</th>\n",
- " <td>val</td>\n",
- " <td>headed</td>\n",
- " <td>P</td>\n",
- " <td>0.958145</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>non-headed</th>\n",
- " <th>16</th>\n",
- " <td>val</td>\n",
- " <td>non-headed</td>\n",
- " <td>P</td>\n",
- " <td>0.963958</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>unlabeled</th>\n",
- " <th>11</th>\n",
- " <td>val</td>\n",
- " <td>unlabeled</td>\n",
- " <td>P</td>\n",
- " <td>0.962762</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th rowspan=\"3\" valign=\"top\">R</th>\n",
- " <th>headed</th>\n",
- " <th>17</th>\n",
- " <td>val</td>\n",
- " <td>headed</td>\n",
- " <td>R</td>\n",
- " <td>0.956702</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>non-headed</th>\n",
- " <th>12</th>\n",
- " <td>val</td>\n",
- " <td>non-headed</td>\n",
- " <td>R</td>\n",
- " <td>0.962505</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>unlabeled</th>\n",
- " <th>3</th>\n",
- " <td>val</td>\n",
- " <td>unlabeled</td>\n",
- " <td>R</td>\n",
- " <td>0.962343</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " dataset measure_type measure value\n",
- "dataset measure measure_type \n",
- "test F1 headed 7 test headed F1 0.959192\n",
- " non-headed 8 test non-headed F1 0.965236\n",
- " unlabeled 15 test unlabeled F1 0.964436\n",
- " P headed 9 test headed P 0.959611\n",
- " non-headed 6 test non-headed P 0.965658\n",
- " unlabeled 13 test unlabeled P 0.964118\n",
- " R headed 2 test headed R 0.958773\n",
- " non-headed 5 test non-headed R 0.964815\n",
- " unlabeled 0 test unlabeled R 0.964754\n",
- "val F1 headed 14 val headed F1 0.957423\n",
- " non-headed 4 val non-headed F1 0.963231\n",
- " unlabeled 1 val unlabeled F1 0.962553\n",
- " P headed 10 val headed P 0.958145\n",
- " non-headed 16 val non-headed P 0.963958\n",
- " unlabeled 11 val unlabeled P 0.962762\n",
- " R headed 17 val headed R 0.956702\n",
- " non-headed 12 val non-headed R 0.962505\n",
- " unlabeled 3 val unlabeled R 0.962343"
- ]
- },
- "execution_count": 58,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"results.groupby(['dataset', 'measure', 'measure_type'], group_keys=True).apply(lambda x: x)"
]
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": null,
"id": "0b5d3fe4",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\\toprule\n",
- "& \\multicolumn{3}{c}{validation} & \\multicolumn{3}{c}{test} \\\\\n",
- "& precision & recall & F1 & precision & recall & F1 \\\\\n",
- "\\midrule\n",
- "1unlabeled & 96.28\\% & 96.23\\% & 96.26\\% & 96.41\\% & 96.48\\% & 96.44\\% \\\\\n",
- "2non-headed & 96.40\\% & 96.25\\% & 96.32\\% & 96.57\\% & 96.48\\% & 96.52\\% \\\\\n",
- "3headed & 95.81\\% & 95.67\\% & 95.74\\% & 95.96\\% & 95.88\\% & 95.92\\% \\\\\n",
- "\\bottomrule\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"for t in tex:\n",
" print(t, end='')"
@@ -1444,10 +669,6 @@
" precisions = precision_score(TRUE, PRED, average=None)\n",
" recalls = recall_score(TRUE, PRED, average=None)\n",
" f1s = f1_score(TRUE, PRED, average=None)\n",
- " #for v, p, r, f in sorted(zip(values, precisions, recalls, f1s), key=lambda x: -x[3]):\n",
- " # if v.endswith('formarzecz') or v.endswith('formaczas'):\n",
- " # spine = ' $\\\\rightarrow$ '.join(f'\\\\nt{{{n}}}' for n in v.split('_'))\n",
- " # print(f'{spine} & {100 * p:.2f}\\\\% & {100 * r:.2f}\\\\% & {100 * f:.2f}\\\\% \\\\\\\\')\n",
" \n",
" ct_pre, cp_pre = Counter(), Counter()\n",
" for val in values:\n",
@@ -1458,7 +679,6 @@
" rows = []\n",
" \n",
" for pre in ct_pre.keys():\n",
- " # TODO\n",
" if pre == 'ign':\n",
" continue\n",
" if not cp_pre[pre] * ct_pre[pre]:\n",
@@ -1472,7 +692,6 @@
" spine = ' $\\\\rightarrow$ '.join(f'\\\\nt{{{n}}}' for n in v.split('_'))\n",
" rws.append(f'{spine} & {100 * p:.2f}\\\\% & {100 * r:.2f}\\\\% & {100 * f:.2f}\\\\% & {ct[v]} \\\\\\\\')\n",
" wp, wr = cp[v] / cp_pre[pre], ct[v] / ct_pre[pre]\n",
- " #print(f' {v:36s} {100 * p:6.2f} {wp:7.3f} {100 * r:6.2f} {wr:7.3f}')\n",
" P += p * wp\n",
" R += r * wr\n",
" F = 2 * P * R / (P + R)\n",
--
libgit2 0.22.2