From 1ccb3142046f2295874cfe9983db47e601f57f89 Mon Sep 17 00:00:00 2001
From: bniton <bartek.niton@gmail.com>
Date: Wed, 17 Feb 2021 10:27:00 +0100
Subject: [PATCH] Fix minor docker issues.

---
 Dockerfile.marcell                | 19 +++++++++++++------
 collector/loaders/marcell_rest.py |  7 ++++---
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/Dockerfile.marcell b/Dockerfile.marcell
index a75a83b..b7d5d72 100644
--- a/Dockerfile.marcell
+++ b/Dockerfile.marcell
@@ -84,6 +84,12 @@ WORKDIR $APP_HOME
 # copy django settings
 COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py
 
+# download and unpack db init file
+WORKDIR $APP_HOME/resources
+RUN wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \
+    tar -xvf ./marcell-init.db.tar.gz && \
+    rm ./marcell-init.db.tar.gz
+
 # chown all the files to the collector user
 RUN chown -R collector:collector $HOME
 
@@ -92,14 +98,15 @@ USER postgres
 RUN /etc/init.d/postgresql start && \
     psql --command "CREATE USER collector WITH SUPERUSER PASSWORD 'collector';" && \
     createdb -O collector collector && \
-    wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \
-    tar -xvf ./marcell-init.db.tar.gz && \
-    rm ./marcell-init.db.tar.gz && \
-    psql collector < ./marcell-init.db && \
-    rm ./marcell-init.db
+    psql collector < $APP_HOME/resources/marcell-init.db
+
+# remove db init file
+USER collector
+RUN rm $APP_HOME/resources/marcell-init.db
 
 # change to the root user
 USER 0
 
 # run entrypoint-marcell.sh
-ENTRYPOINT ["/home/collector/app/entrypoint-marcell.sh"]
+WORKDIR $APP_HOME
+ENTRYPOINT ["/home/collector/app/entrypoint-marcell.sh"]
\ No newline at end of file
diff --git a/collector/loaders/marcell_rest.py b/collector/loaders/marcell_rest.py
index 15b811a..0969c9b 100644
--- a/collector/loaders/marcell_rest.py
+++ b/collector/loaders/marcell_rest.py
@@ -83,9 +83,10 @@ def _create_document(pipeline, doc_id, metadata, text_file):
                                     sequence=metadata_sequence)
             metadata_sequence += 1
 
-    for label in metadata['keywords']:
-        keyword_obj, _ = Keyword.objects.get_or_create(label=label)
-        document.keywords.add(keyword_obj)
+    if 'keywords' in metadata:
+        for label in metadata['keywords']:
+            keyword_obj, _ = Keyword.objects.get_or_create(label=label)
+            document.keywords.add(keyword_obj)
 
     return document
 
--
libgit2 0.22.2