Commit 87f2744166643eda4b73deb53b565916bd07c301

Authored by Bartłomiej Nitoń
1 parent badd76e0

Update docker configuration.

Too many changes to show.

To preserve performance only 9 of 10 files are displayed.

Dockerfile
@@ -14,7 +14,8 @@ ENV PYTHONUNBUFFERED 1 @@ -14,7 +14,8 @@ ENV PYTHONUNBUFFERED 1
14 14
15 # install dependencies 15 # install dependencies
16 RUN apt-get update && \ 16 RUN apt-get update && \
17 - apt-get install -y python3-pip 17 + apt-get install -y python3-pip && \
  18 + pip3 install --upgrade pip
18 COPY ./requirements.txt /usr/src/collector/requirements.txt 19 COPY ./requirements.txt /usr/src/collector/requirements.txt
19 RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/collector/wheels -r requirements.txt 20 RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/collector/wheels -r requirements.txt
20 21
@@ -30,33 +31,35 @@ FROM ubuntu:18.04 @@ -30,33 +31,35 @@ FROM ubuntu:18.04
30 RUN apt-get update && \ 31 RUN apt-get update && \
31 apt-get install -y locales && \ 32 apt-get install -y locales && \
32 locale-gen pl_PL.UTF-8 33 locale-gen pl_PL.UTF-8
  34 +
  35 +# set envs
33 ENV LANG pl_PL.UTF-8 36 ENV LANG pl_PL.UTF-8
34 ENV LC_ALL pl_PL.UTF-8 37 ENV LC_ALL pl_PL.UTF-8
  38 +ENV HOME=/home/collector
  39 +ENV APP_HOME=/home/collector/app
35 40
36 -# create directory for the collector user  
37 -RUN mkdir -p /home/collector  
38 -  
39 -# create the collector user  
40 -RUN addgroup --group collector && adduser collector --ingroup collector 41 +# create directory for the collector user and user itself
  42 +RUN mkdir -p $HOME && \
  43 + addgroup --group collector && \
  44 + adduser collector --ingroup collector
41 45
42 # create the appropriate directories 46 # create the appropriate directories
43 -ENV HOME=/home/collector  
44 -ENV APP_HOME=/home/collector/app  
45 RUN mkdir $APP_HOME 47 RUN mkdir $APP_HOME
46 WORKDIR $APP_HOME 48 WORKDIR $APP_HOME
47 49
48 # install dependencies 50 # install dependencies
49 COPY --from=builder /usr/src/collector/wheels /wheels 51 COPY --from=builder /usr/src/collector/wheels /wheels
50 COPY --from=builder /usr/src/collector/requirements.txt . 52 COPY --from=builder /usr/src/collector/requirements.txt .
51 -RUN apt-get install -y netcat openjdk-8-jre python3-pip software-properties-common wget && \ 53 +RUN apt-get update && apt-get install -y netcat openjdk-8-jre python3-pip software-properties-common wget && \
52 wget -O - http://download.sgjp.pl/apt/sgjp.gpg.key|apt-key add - && \ 54 wget -O - http://download.sgjp.pl/apt/sgjp.gpg.key|apt-key add - && \
53 apt-add-repository http://download.sgjp.pl/apt/ubuntu && \ 55 apt-add-repository http://download.sgjp.pl/apt/ubuntu && \
54 wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \ 56 wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
55 echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" |tee /etc/apt/sources.list.d/pgdg.list && \ 57 echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" |tee /etc/apt/sources.list.d/pgdg.list && \
56 apt-get update && \ 58 apt-get update && \
57 - apt-get install -y morfeusz2 python3-morfeusz2 && \  
58 - DEBIAN_FRONTEND="noninteractive" apt-get -y install postgresql-12  
59 -RUN pip3 install --no-cache /wheels/* 59 + apt-get install -y morfeusz2 python3-morfeusz2 libopenblas-dev libomp-dev && \
  60 + DEBIAN_FRONTEND="noninteractive" apt-get -y install postgresql-12 && \
  61 + pip3 install --upgrade pip && \
  62 + pip3 install --no-cache /wheels/*
60 63
61 # copy project 64 # copy project
62 COPY . $APP_HOME 65 COPY . $APP_HOME
@@ -65,17 +68,29 @@ COPY . $APP_HOME @@ -65,17 +68,29 @@ COPY . $APP_HOME
65 WORKDIR ./tools/liner2/g419-external-dependencies 68 WORKDIR ./tools/liner2/g419-external-dependencies
66 RUN tar -xvf ./CRF++-0.57.tar.gz 69 RUN tar -xvf ./CRF++-0.57.tar.gz
67 WORKDIR ./CRF++-0.57 70 WORKDIR ./CRF++-0.57
68 -RUN ./configure  
69 -RUN make  
70 -RUN make install  
71 -RUN ldconfig 71 +RUN ./configure && \
  72 + make && \
  73 + make install && \
  74 + make clean && \
  75 + ldconfig
  76 +
  77 +# install eurobert
  78 +WORKDIR $APP_HOME/tools
  79 +RUN wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/eurobert-model.tar.gz && \
  80 + tar -xvf ./eurobert-model.tar.gz && \
  81 + rm ./eurobert-model.tar.gz
  82 +
  83 +# install labse
  84 +RUN wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/labse-model.tar.gz && \
  85 + tar -xvf ./labse-model.tar.gz && \
  86 + rm ./labse-model.tar.gz
72 WORKDIR $APP_HOME 87 WORKDIR $APP_HOME
73 88
74 # copy django settings 89 # copy django settings
75 COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py 90 COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py
76 91
77 # chown all the files to the collector user 92 # chown all the files to the collector user
78 -RUN chown -R collector:collector $APP_HOME 93 +RUN chown -R collector:collector $HOME
79 94
80 # change to the collector user 95 # change to the collector user
81 USER collector 96 USER collector
Dockerfile.marcell
@@ -14,7 +14,8 @@ ENV PYTHONUNBUFFERED 1 @@ -14,7 +14,8 @@ ENV PYTHONUNBUFFERED 1
14 14
15 # install dependencies 15 # install dependencies
16 RUN apt-get update && \ 16 RUN apt-get update && \
17 - apt-get install -y python3-pip 17 + apt-get install -y python3-pip && \
  18 + pip3 install --upgrade pip
18 COPY ./requirements.txt /usr/src/collector/requirements.txt 19 COPY ./requirements.txt /usr/src/collector/requirements.txt
19 RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/collector/wheels -r requirements.txt 20 RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/collector/wheels -r requirements.txt
20 21
@@ -38,8 +39,8 @@ ENV HOME=/home/collector @@ -38,8 +39,8 @@ ENV HOME=/home/collector
38 ENV APP_HOME=/home/collector/app 39 ENV APP_HOME=/home/collector/app
39 40
40 # create directory for the collector user and user itself 41 # create directory for the collector user and user itself
41 -RUN mkdir -p $HOME && \  
42 - addgroup --group collector && \ 42 +RUN mkdir -p $HOME && \
  43 + addgroup --group collector && \
43 adduser collector --ingroup collector 44 adduser collector --ingroup collector
44 45
45 # create the appropriate directories 46 # create the appropriate directories
@@ -55,8 +56,9 @@ RUN apt-get update && apt-get install -y openjdk-8-jre python3-pip software-prop @@ -55,8 +56,9 @@ RUN apt-get update && apt-get install -y openjdk-8-jre python3-pip software-prop
55 wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \ 56 wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
56 echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" |tee /etc/apt/sources.list.d/pgdg.list && \ 57 echo "deb http://apt.postgresql.org/pub/repos/apt/ `lsb_release -cs`-pgdg main" |tee /etc/apt/sources.list.d/pgdg.list && \
57 apt-get update && \ 58 apt-get update && \
58 - apt-get install -y morfeusz2 python3-morfeusz2 && \ 59 + apt-get install -y morfeusz2 python3-morfeusz2 libopenblas-dev libomp-dev && \
59 DEBIAN_FRONTEND="noninteractive" apt-get -y install postgresql-12 && \ 60 DEBIAN_FRONTEND="noninteractive" apt-get -y install postgresql-12 && \
  61 + pip3 install --upgrade pip && \
60 pip3 install --no-cache /wheels/* 62 pip3 install --no-cache /wheels/*
61 63
62 # copy project 64 # copy project
@@ -66,25 +68,35 @@ COPY . $APP_HOME @@ -66,25 +68,35 @@ COPY . $APP_HOME
66 WORKDIR ./tools/liner2/g419-external-dependencies 68 WORKDIR ./tools/liner2/g419-external-dependencies
67 RUN tar -xvf ./CRF++-0.57.tar.gz 69 RUN tar -xvf ./CRF++-0.57.tar.gz
68 WORKDIR ./CRF++-0.57 70 WORKDIR ./CRF++-0.57
69 -RUN ./configure && \  
70 - make && \  
71 - make install && \  
72 - make clean && \ 71 +RUN ./configure && \
  72 + make && \
  73 + make install && \
  74 + make clean && \
73 ldconfig 75 ldconfig
  76 +
  77 +# install eurobert
  78 +WORKDIR $APP_HOME/tools
  79 +RUN wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/eurobert-model.tar.gz && \
  80 + tar -xvf ./eurobert-model.tar.gz && \
  81 + rm ./eurobert-model.tar.gz
74 WORKDIR $APP_HOME 82 WORKDIR $APP_HOME
75 83
76 # copy django settings 84 # copy django settings
77 COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py 85 COPY ./collector/collector/docker-settings.py $APP_HOME/collector/collector/settings.py
78 86
79 # chown all the files to the collector user 87 # chown all the files to the collector user
80 -RUN chown -R collector:collector $APP_HOME 88 +RUN chown -R collector:collector $HOME
81 89
82 # configure and init database 90 # configure and init database
83 USER postgres 91 USER postgres
84 RUN /etc/init.d/postgresql start && \ 92 RUN /etc/init.d/postgresql start && \
85 psql --command "CREATE USER collector WITH SUPERUSER PASSWORD 'collector';" && \ 93 psql --command "CREATE USER collector WITH SUPERUSER PASSWORD 'collector';" && \
86 createdb -O collector collector && \ 94 createdb -O collector collector && \
87 - psql collector < $APP_HOME/resources/db/marcell-init.db 95 + wget https://manage.legis.nlp.ipipan.waw.pl/download/marcell/marcell-init.db.tar.gz && \
  96 + tar -xvf ./marcell-init.db.tar.gz && \
  97 + rm ./marcell-init.db.tar.gz && \
  98 + psql collector < ./marcell-init.db && \
  99 + rm ./marcell-init.db
88 100
89 # change to the root user 101 # change to the root user
90 USER 0 102 USER 0
README.md
@@ -2,13 +2,22 @@ @@ -2,13 +2,22 @@
2 2
3 ## Running MARCELL annotate API using Docker 3 ## Running MARCELL annotate API using Docker
4 4
  5 +#### Using docker-compose
  6 +
5 ```sh 7 ```sh
6 docker-compose up -d 8 docker-compose up -d
7 ``` 9 ```
8 10
9 -### Exemplary usage in Python 11 +#### Using docker
10 12
11 ```sh 13 ```sh
  14 +docker build -t "marcell-pl" -f Dockerfile.marcell .
  15 +docker run --name "marcell-pl-running" -p 8006:8000 -d marcell-pl
  16 +```
  17 +
  18 +### Exemplary usage in Python
  19 +
  20 +```
12 import requests 21 import requests
13 22
14 url = 'http://<container_url>:<exposed_port>/annotate' 23 url = 'http://<container_url>:<exposed_port>/annotate'
@@ -20,4 +29,3 @@ with open(&#39;/text/file/path.[txt/html/pdf]&#39;, &#39;rb&#39;) as text_file, open(&#39;/meta/fil @@ -20,4 +29,3 @@ with open(&#39;/text/file/path.[txt/html/pdf]&#39;, &#39;rb&#39;) as text_file, open(&#39;/meta/fil
20 ## Conda based installation 29 ## Conda based installation
21 30
22 See INSTALL.md for installation instructions. 31 See INSTALL.md for installation instructions.
23 -  
collector/collector/docker-settings.py
@@ -68,6 +68,9 @@ SOLR_URL = &#39;http://localhost:8983/solr/&#39; @@ -68,6 +68,9 @@ SOLR_URL = &#39;http://localhost:8983/solr/&#39;
68 # Language-agnostic BERT sentence embedding model path 68 # Language-agnostic BERT sentence embedding model path
69 LABSE_MODEL_PATH = os.path.join(TOOLS_DIR, 'labse', 'labse_bert_model') 69 LABSE_MODEL_PATH = os.path.join(TOOLS_DIR, 'labse', 'labse_bert_model')
70 70
  71 +# expose REST API
  72 +EXPOSE_API = True
  73 +
71 # Quick-start development settings - unsuitable for production 74 # Quick-start development settings - unsuitable for production
72 # See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/ 75 # See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
73 76
collector/pipeline/urls.py
1 from django.urls import path 1 from django.urls import path
2 2
3 from . import views 3 from . import views
  4 +from collector import settings
4 5
5 -urlpatterns = [  
6 - path('annotate', views.AnnotateView.as_view(), name='annotate'),  
7 -] 6 +
  7 +urlpatterns = []
  8 +if settings.EXPOSE_API:
  9 + urlpatterns = [
  10 + path('annotate', views.AnnotateView.as_view(), name='annotate'),
  11 + ]
collector/projects/marcell/utils.py
@@ -100,6 +100,6 @@ def vectorize_eurovocs(): @@ -100,6 +100,6 @@ def vectorize_eurovocs():
100 labse = Labse(settings.LABSE_MODEL_PATH) 100 labse = Labse(settings.LABSE_MODEL_PATH)
101 for eurovoc_label in EuroVocLabel.objects.filter(vector__isnull=True): 101 for eurovoc_label in EuroVocLabel.objects.filter(vector__isnull=True):
102 print('Creating EuroVoc vector representation for "{}".'.format(eurovoc_label.text)) 102 print('Creating EuroVoc vector representation for "{}".'.format(eurovoc_label.text))
103 - laser_vector = [float(x) for x in list(labse.embed(eurovoc_label.text))]  
104 - eurovoc_label.vector = laser_vector 103 + labse_vector = [float(x) for x in list(labse.embed(eurovoc_label.text))]
  104 + eurovoc_label.vector = labse_vector
105 eurovoc_label.save() 105 eurovoc_label.save()
collector/terminology/eurovoc.py
@@ -20,10 +20,13 @@ def annotate(documents): @@ -20,10 +20,13 @@ def annotate(documents):
20 20
21 def _add_document_level_domains(document, title_based_tld_preditor, min_score, k): 21 def _add_document_level_domains(document, title_based_tld_preditor, min_score, k):
22 print('Adding EuroVoc domains to: {}.'.format(document.id)) 22 print('Adding EuroVoc domains to: {}.'.format(document.id))
  23 + eurovoc_domains = []
  24 + tld_score_value = 'sim'
  25 +
23 if document.keywords.exists(): 26 if document.keywords.exists():
24 - tld_score_value = 'sim'  
25 eurovoc_domains = _get_keyword_based_domains(document.keywords.all(), min_score, k) 27 eurovoc_domains = _get_keyword_based_domains(document.keywords.all(), min_score, k)
26 - else: 28 +
  29 + if not eurovoc_domains:
27 tld_score_value = 'prob' 30 tld_score_value = 'prob'
28 eurovoc_domains = _get_title_based_domains(document.title, title_based_tld_preditor, min_score, k) 31 eurovoc_domains = _get_title_based_domains(document.title, title_based_tld_preditor, min_score, k)
29 32
entrypoint.sh
@@ -15,6 +15,8 @@ python3 collector/manage.py makemigrations --noinput @@ -15,6 +15,8 @@ python3 collector/manage.py makemigrations --noinput
15 python3 collector/manage.py migrate 15 python3 collector/manage.py migrate
16 python3 collector/manage.py configure_marcell_pipelines 16 python3 collector/manage.py configure_marcell_pipelines
17 python3 collector/manage.py load_eurovoc_terms -i resources/eurovoc -l pl 17 python3 collector/manage.py load_eurovoc_terms -i resources/eurovoc -l pl
  18 +python3 collector/manage.py load_eurovoc_terms -i resources/eurovoc -l en
18 python3 collector/manage.py load_iate_terms -i resources/iate/iate.tbx 19 python3 collector/manage.py load_iate_terms -i resources/iate/iate.tbx
  20 +python3 collector/manage.py map_eurovoc_terms
19 21
20 exec "$@" 22 exec "$@"
requirements.txt
@@ -6,15 +6,14 @@ Django==2.2.5 @@ -6,15 +6,14 @@ Django==2.2.5
6 faiss 6 faiss
7 gunicorn==20.0.4 7 gunicorn==20.0.4
8 Keras==2.2.5 8 Keras==2.2.5
9 -laserembeddings[en,pl]  
10 natsort 9 natsort
11 psycopg2-binary 10 psycopg2-binary
12 pysolr 11 pysolr
13 python-dateutil 12 python-dateutil
14 -pytorch  
15 scikit-learn==0.22.1 13 scikit-learn==0.22.1
16 scrapy 14 scrapy
17 -tensorflow==1.14 15 +tensorflow>=1.15,<2
18 tika==1.19 16 tika==1.19
19 -xmldiff  
20 - 17 +torch
  18 +transformers>=3.0.2
  19 +xmldiff
21 \ No newline at end of file 20 \ No newline at end of file