Commit 1850891a742580ba3307cc657e93fb2172243ecb
1 parent
efc36fb7
Move Labse class to new file.
Showing
2 changed files
with
13 additions
and
12 deletions
collector/projects/marcell/utils.py
... | ... | @@ -3,25 +3,14 @@ import requests |
3 | 3 | |
4 | 4 | import faiss |
5 | 5 | import numpy as np |
6 | -from sentence_transformers import SentenceTransformer | |
7 | 6 | from sklearn.metrics.pairwise import cosine_similarity |
8 | -from sklearn.preprocessing import normalize | |
9 | 7 | |
10 | 8 | from downloader import settings as scrapy_settings |
9 | +from terminology.labse import Labse | |
11 | 10 | from terminology.models import EuroVocLabel, Keyword2EuroVoc |
12 | 11 | from storage.models import Keyword |
13 | 12 | |
14 | 13 | |
15 | -class Labse: | |
16 | - | |
17 | - def __init__(self): | |
18 | - self.model = SentenceTransformer('sentence-transformers/LaBSE') | |
19 | - | |
20 | - def embed(self, text): | |
21 | - embeddings = self.model.encode([text]) | |
22 | - return normalize(embeddings)[0] | |
23 | - | |
24 | - | |
25 | 14 | def map_kws_to_eurovoc(dim=768): |
26 | 15 | Keyword2EuroVoc.objects.all().delete() |
27 | 16 | |
... | ... |
collector/terminology/labse.py
0 → 100644
1 | +from sentence_transformers import SentenceTransformer | |
2 | +from sklearn.preprocessing import normalize | |
3 | + | |
4 | + | |
5 | +class Labse: | |
6 | + | |
7 | + def __init__(self): | |
8 | + self.model = SentenceTransformer('sentence-transformers/LaBSE') | |
9 | + | |
10 | + def embed(self, text): | |
11 | + embeddings = self.model.encode([text]) | |
12 | + return normalize(embeddings)[0] | |
... | ... |