get_examples.py
6.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#-*- coding:utf-8 -*-
import codecs
import datetime
import os
from django.core.management.base import BaseCommand
from dictionary.models import Lemma, NKJP_Example, get_ready_statuses, get_checked_statuses
from semantics.models import LexicalUnitExamples
from settings import PROJECT_PATH
BASE_PATH = os.path.join(PROJECT_PATH, 'data')
LABELS = (u'hasło',
u'status hasła',
u'identyfikator schematu',
u'schemat',
u'opinia o schemacie',
u'przykład',
u'opinia o przykładzie',
u'zródło przykładu',
u'wybór typów fraz')
class Command(BaseCommand):
help = 'Get pinned examples from Slowal.'
def handle(self, **options):
get_examples()
def get_examples():
write_example_sentences('semantyczne-S_sprawdzone-20170811.txt', ['(S) sprawdzone'], True)
write_example_sentences('wszystkie-S_sprawdzone-20170811.txt', ['(S) sprawdzone'], False)
checked_names = [checked.status for checked in get_checked_statuses()]
write_example_sentences('wszystkie-sprawdzone-20170811.txt', checked_names, False)
# ready_statuses = get_ready_statuses()
# write_detailed_examples(ready_statuses)
# write_examples(ready_statuses)
def write_detailed_examples(statuses):
try:
lemmas = Lemma.objects.filter(old=False)
lemmas = lemmas.filter(status__in=statuses)
now = datetime.datetime.now().strftime('%Y%m%d')
examples_file = codecs.open(os.path.join(BASE_PATH, 'detailed_examples_%s.csv' % now), 'wt', 'utf-8')
examples_file.write(u'%s\n' % u'\t'.join(LABELS))
for lemma in lemmas.order_by('entry_obj__name'):
print lemma
lemma_entry = lemma.entry_obj.name
lemma_status = lemma.status.status
for frame in lemma.frames.order_by('text_rep').all():
if not lemma.phraseology_ready() and frame.phraseologic:
continue
frame_opinion = lemma.frame_opinions.filter(frame=frame).all()[0].value
for example in lemma.nkjp_examples.filter(frame=frame):
sentence = example.sentence.replace('\n', ' ').replace('\r', '').replace('\t', ' ')
arguments_selection = u'%s' % u' + '.join([u'%s' % selection.__unicode__() for selection in example.arguments.all()])
examples_file.write(u'%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n' % (lemma_entry,
lemma_status,
frame.id,
frame.get_position_spaced_text_rep(),
frame_opinion,
sentence,
example.opinion.opinion,
example.source.source,
arguments_selection))
finally:
examples_file.close()
def write_examples(statuses):
try:
examples_file = codecs.open(os.path.join(BASE_PATH,
'examples_gotowe_plus.txt'), 'wt', 'utf-8')
for lemma in Lemma.objects.filter(old=False).filter(status__in=statuses).order_by('entry_obj__name').all():
print lemma
examples_file.write(lemma.entry_obj.name+'\n')
for frame in lemma.frames.order_by('text_rep').all():
if lemma.frame_opinions.get(frame=frame).value.value != u'zła':
examples_file.write('\t%s\n' % frame.text_rep)
for example in lemma.nkjp_examples.filter(frame=frame):
examples_file.write('\t\t--> %s\n' % example.sentence)
examples_file.write('\n\n')
finally:
examples_file.close()
def write_example_sentences(filename, statuses, semantic):
try:
examples_file = codecs.open(os.path.join(BASE_PATH, filename), 'wt', 'utf-8')
for lemma in Lemma.objects.filter(old=False, entry_obj__pos__tag='verb').filter(status__status__in=statuses).order_by('entry_obj__name'):
print lemma
wrong_examples = lemma.nkjp_examples.filter(opinion__opinion=u'zły')
not_wanted_semantic_examples = get_not_needed_semantic_examples(lemma)
wanted_semantic_examples = get_wanted_semantic_examples(lemma)
for example in lemma.nkjp_examples.filter(source__sym_name__in=['NKJP300M', 'NKJP1800M']):
if (lemma.frame_opinions.filter(frame=example.frame, value__value__in=[u'archaiczny',
u'zły']).exists()):
continue
if semantic:
if (wanted_semantic_examples.filter(pk=example.pk).exists() and
not wrong_examples.filter(pk=example.pk).exists()):
examples_file.write(u'%s\n' % example.sentence)
else:
if (not not_wanted_semantic_examples.filter(pk=example.pk).exists() and
not wrong_examples.filter(pk=example.pk).exists()):
examples_file.write(u'%s\n' % example.sentence)
finally:
examples_file.close()
def get_not_needed_semantic_examples(lemma):
not_needed_ids = []
not_needed_frames = lemma.entry_obj.actual_frames().filter(opinion__value__in=[u'archaiczna', u'zła'])
for frame in not_needed_frames:
for lu in frame.lexical_units.all():
for luex in LexicalUnitExamples.objects.filter(lexical_unit=lu):
not_needed_ids.append(luex.example.id)
return NKJP_Example.objects.filter(id__in=not_needed_ids)
def get_wanted_semantic_examples(lemma):
needed_ids = []
needed_frames = lemma.entry_obj.actual_frames().exclude(opinion__value__in=[u'archaiczna', u'zła'])
for frame in needed_frames:
for lu in frame.lexical_units.all():
for luex in LexicalUnitExamples.objects.filter(lexical_unit=lu):
needed_ids.append(luex.example.id)
return NKJP_Example.objects.filter(id__in=needed_ids)