models.py
3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from django.contrib.postgres.fields import ArrayField, JSONField
from django.db import models
from storage.models import Keyword
class EuroVocTerm(models.Model):
tid = models.CharField(max_length=10)
type = models.CharField(max_length=10)
subterms = models.ManyToManyField('EuroVocTerm', related_name='superterms')
def get_domains(self):
domains = []
if self.type == 'domain':
if self not in domains:
domains.append(self)
elif self.type in ['thesaurus', 'descriptor']:
for superterm in self.superterms.all():
for domain in superterm.get_domains():
if domain not in domains:
domains.append(domain)
return domains
def get_subdomains(self):
subdomains = []
if self.type == 'domain':
pass
elif self.type == 'thesaurus':
if self not in subdomains:
subdomains.append(self)
elif self.type == 'descriptor':
for superterm in self.superterms.all():
for subdomain in superterm.get_subdomains():
if subdomain not in subdomains:
subdomains.append(subdomain)
return subdomains
class Meta:
db_table = 'eurovoc_term'
ordering = ['tid']
unique_together = ['tid', 'type']
def __str__(self):
return ' | '.join([str(label) for label in self.labels.all()])
class EuroVocLabel(models.Model):
lang = models.CharField(max_length=5)
text = models.CharField(max_length=150)
term = models.ForeignKey(EuroVocTerm, related_name='labels', on_delete=models.CASCADE)
lemmatization_graph = JSONField(blank=True, null=True)
used_for = models.BooleanField(default=False)
vector = ArrayField(models.FloatField(), null=True)
class Meta:
db_table = 'eurovoc_label'
ordering = ['term__tid', 'lang']
def __str__(self):
return '{}: {}'.format(self.lang, self.text)
class IATETerm(models.Model):
tid = models.CharField(max_length=10, primary_key=True)
subject_field = models.TextField(blank=True)
class Meta:
db_table = 'iate_term'
ordering = ['tid']
def eurovoc_terms(self):
eurovoc_ids = []
for subject in self.subject_field.split(';'):
for evlabel in EuroVocLabel.objects.filter(lang='en', text=subject.strip(), used_for=False):
eurovoc_ids.append(evlabel.term.tid)
return EuroVocTerm.objects.filter(tid__in=eurovoc_ids, type__in=['domain', 'thesaurus'])
def __str__(self):
return ' | '.join([str(label) for label in self.labels.all()])
class IATELabel(models.Model):
lang = models.CharField(max_length=5)
text = models.CharField(max_length=1000)
term = models.ForeignKey(IATETerm, related_name='labels', on_delete=models.CASCADE)
type = models.CharField(max_length=12)
administrative_status = models.CharField(max_length=24, blank=True)
reliability_code = models.PositiveIntegerField()
lemmatization_graph = JSONField(blank=True, null=True)
class Meta:
db_table = 'iate_label'
ordering = ['term__tid', 'lang']
def __str__(self):
return '{}: {}'.format(self.lang, self.text)
class Keyword2EuroVoc(models.Model):
keyword = models.ForeignKey(Keyword, related_name='similarities', on_delete=models.CASCADE)
eurovoc = models.ForeignKey(EuroVocTerm, related_name='similarities', on_delete=models.CASCADE)
score_pl = models.FloatField(default=0.0)
score_en = models.FloatField(default=0.0)
class Meta:
db_table = 'keyword2eurovoc'
ordering = ['-score_pl', '-score_en']
def __str__(self):
return '{} {} ({}, {})'.format(str(self.keyword), str(self.eurovoc), self.score_pl, self.score_en)