Commit 1ccb3142046f2295874cfe9983db47e601f57f89

Authored by Bartłomiej Nitoń
1 parent 87f27441

Fix minor docker issues.

Dockerfile.marcell
... ... @@ -84,6 +84,12 @@ WORKDIR $APP_HOME
84 84 # copy django settings
85 85 COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py
86 86  
  87 +# download and unpack db init file
  88 +WORKDIR $APP_HOME/resources
  89 +RUN wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \
  90 + tar -xvf ./marcell-init.db.tar.gz && \
  91 + rm ./marcell-init.db.tar.gz
  92 +
87 93 # chown all the files to the collector user
88 94 RUN chown -R collector:collector $HOME
89 95  
... ... @@ -92,14 +98,15 @@ USER postgres
92 98 RUN /etc/init.d/postgresql start && \
93 99 psql --command "CREATE USER collector WITH SUPERUSER PASSWORD 'collector';" && \
94 100 createdb -O collector collector && \
95   - wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \
96   - tar -xvf ./marcell-init.db.tar.gz && \
97   - rm ./marcell-init.db.tar.gz && \
98   - psql collector < ./marcell-init.db && \
99   - rm ./marcell-init.db
  101 + psql collector < $APP_HOME/resources/marcell-init.db
  102 +
  103 +# remove db init file
  104 +USER collector
  105 +RUN rm $APP_HOME/resources/marcell-init.db
100 106  
101 107 # change to the root user
102 108 USER 0
103 109  
104 110 # run entrypoint-marcell.sh
105   -ENTRYPOINT ["/home/collector/app/entrypoint-marcell.sh"]
  111 +WORKDIR $APP_HOME
  112 +ENTRYPOINT ["/home/collector/app/entrypoint-marcell.sh"]
106 113 \ No newline at end of file
... ...
collector/loaders/marcell_rest.py
... ... @@ -83,9 +83,10 @@ def _create_document(pipeline, doc_id, metadata, text_file):
83 83 sequence=metadata_sequence)
84 84 metadata_sequence += 1
85 85  
86   - for label in metadata['keywords']:
87   - keyword_obj, _ = Keyword.objects.get_or_create(label=label)
88   - document.keywords.add(keyword_obj)
  86 + if 'keywords' in metadata:
  87 + for label in metadata['keywords']:
  88 + keyword_obj, _ = Keyword.objects.get_or_create(label=label)
  89 + document.keywords.add(keyword_obj)
89 90  
90 91 return document
91 92  
... ...