diff --git a/Dockerfile.marcell b/Dockerfile.marcell index a75a83b..b7d5d72 100644 --- a/Dockerfile.marcell +++ b/Dockerfile.marcell @@ -84,6 +84,12 @@ WORKDIR $APP_HOME # copy django settings COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py +# download and unpack db init file +WORKDIR $APP_HOME/resources +RUN wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \ + tar -xvf ./marcell-init.db.tar.gz && \ + rm ./marcell-init.db.tar.gz + # chown all the files to the collector user RUN chown -R collector:collector $HOME @@ -92,14 +98,15 @@ USER postgres RUN /etc/init.d/postgresql start && \ psql --command "CREATE USER collector WITH SUPERUSER PASSWORD 'collector';" && \ createdb -O collector collector && \ - wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \ - tar -xvf ./marcell-init.db.tar.gz && \ - rm ./marcell-init.db.tar.gz && \ - psql collector < ./marcell-init.db && \ - rm ./marcell-init.db + psql collector < $APP_HOME/resources/marcell-init.db + +# remove db init file +USER collector +RUN rm $APP_HOME/resources/marcell-init.db # change to the root user USER 0 # run entrypoint-marcell.sh -ENTRYPOINT ["/home/collector/app/entrypoint-marcell.sh"] +WORKDIR $APP_HOME +ENTRYPOINT ["/home/collector/app/entrypoint-marcell.sh"] \ No newline at end of file diff --git a/collector/loaders/marcell_rest.py b/collector/loaders/marcell_rest.py index 15b811a..0969c9b 100644 --- a/collector/loaders/marcell_rest.py +++ b/collector/loaders/marcell_rest.py @@ -83,9 +83,10 @@ def _create_document(pipeline, doc_id, metadata, text_file): sequence=metadata_sequence) metadata_sequence += 1 - for label in metadata['keywords']: - keyword_obj, _ = Keyword.objects.get_or_create(label=label) - document.keywords.add(keyword_obj) + if 'keywords' in metadata: + for label in metadata['keywords']: + keyword_obj, _ = Keyword.objects.get_or_create(label=label) + document.keywords.add(keyword_obj) return document