Commit 1850891a742580ba3307cc657e93fb2172243ecb

Authored by Bartłomiej Nitoń
1 parent efc36fb7

Move Labse class to new file.

collector/projects/marcell/utils.py
... ... @@ -3,25 +3,14 @@ import requests
3 3  
4 4 import faiss
5 5 import numpy as np
6   -from sentence_transformers import SentenceTransformer
7 6 from sklearn.metrics.pairwise import cosine_similarity
8   -from sklearn.preprocessing import normalize
9 7  
10 8 from downloader import settings as scrapy_settings
  9 +from terminology.labse import Labse
11 10 from terminology.models import EuroVocLabel, Keyword2EuroVoc
12 11 from storage.models import Keyword
13 12  
14 13  
15   -class Labse:
16   -
17   - def __init__(self):
18   - self.model = SentenceTransformer('sentence-transformers/LaBSE')
19   -
20   - def embed(self, text):
21   - embeddings = self.model.encode([text])
22   - return normalize(embeddings)[0]
23   -
24   -
25 14 def map_kws_to_eurovoc(dim=768):
26 15 Keyword2EuroVoc.objects.all().delete()
27 16  
... ...
collector/terminology/labse.py 0 → 100644
  1 +from sentence_transformers import SentenceTransformer
  2 +from sklearn.preprocessing import normalize
  3 +
  4 +
  5 +class Labse:
  6 +
  7 + def __init__(self):
  8 + self.model = SentenceTransformer('sentence-transformers/LaBSE')
  9 +
  10 + def embed(self, text):
  11 + embeddings = self.model.encode([text])
  12 + return normalize(embeddings)[0]
... ...