Commit 19e22b7d7cd8316ce41c40a7f15bf71a81dffded

Authored by janek37
1 parent 72bb98ec

import przymiotników i przysłówków od Witka

dictionary/export.py
... ... @@ -289,9 +289,9 @@ where ls.slownik in (%(vocabs)s) and %(antivocabs)s %(x_qual)s %(table_clause)s
289 289 % ', '.join(str(id) for id in self.refls),
290 290 'attr_clauses': ' and '.join(attr_clauses)
291 291 }
292   - if tt.name == 'czasowniki':
293   - print >>sys.stderr, query
294   - print >>sys.stderr, [tt.id] + params + list(tt_c + cell_c + cell_c)
  292 + # if tt.name == 'czasowniki':
  293 + # print >>sys.stderr, query
  294 + # print >>sys.stderr, [tt.id] + params + list(tt_c + cell_c + cell_c)
295 295 cursor.execute(
296 296 query, [tt.id] + params + list(tt_c + cell_c + cell_c))
297 297 for row in cursor:
... ...
dictionary/management/commands/import_witek.py
1 1 # -*- coding: utf-8 -*-
  2 +import sys
2 3 from django.core.management.base import BaseCommand
3 4 from django.db import transaction
4 5 from django.db.models import Max
5 6 from common.util import uniopen, no_history
6 7 from dictionary.models import Lexeme, Vocabulary, LexemeInflectionPattern, \
7   - Pattern, Qualifier, ClassificationValue, LexemeCV, Gender
  8 + Pattern, Qualifier, ClassificationValue, LexemeCV, Gender, \
  9 + LexemeAttributeValue, CrossReferenceType, CrossReference
8 10  
9 11  
10 12 class Command(BaseCommand):
... ... @@ -25,10 +27,31 @@ def import_lexemes(lines, comment):
25 27 elements = line.strip().split(';')
26 28 if elements[1] == 'subst':
27 29 import_subst(elements, comment)
  30 + elif elements[1] == 'adv':
  31 + import_adv(elements, comment)
  32 + elif elements[1] == 'adj':
  33 + import_adj(elements, comment)
28 34 transaction.commit()
29 35 transaction.leave_transaction_management()
30 36  
31 37  
  38 +next_id = None
  39 +
  40 +def new_lexeme(entry, pos, comment):
  41 + global next_id
  42 + if next_id:
  43 + next_id += 1
  44 + else:
  45 + next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
  46 + l = Lexeme(
  47 + id=next_id, entry=entry, part_of_speech_id=pos,
  48 + status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP,
  49 + comment=comment)
  50 + l.save()
  51 + WSJP.add_lexeme(l)
  52 + return l
  53 +
  54 +
32 55 def import_subst(elements, comment):
33 56 try:
34 57 entry, pos, gender, pattern_data, commonness = elements
... ... @@ -38,14 +61,7 @@ def import_subst(elements, comment):
38 61 assert pos == 'subst'
39 62 gender = Gender.objects.get(symbol=gender)
40 63 lip_data = [p.rsplit(' ', 1) for p in pattern_data.split('/')]
41   - next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
42   - l = Lexeme(
43   - id=next_id, entry=entry, part_of_speech_id='subst',
44   - status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP,
45   - comment=comment)
46   - l.fix_homonym_number()
47   - l.save()
48   - WSJP.add_lexeme(l)
  64 + l = new_lexeme(entry, 'subst', comment)
49 65 comm_value = ClassificationValue.objects.get(
50 66 classification__name=u'pospolitość', label=commonness)
51 67 LexemeCV.objects.create(lexeme=l, classification_value=comm_value)
... ... @@ -62,4 +78,61 @@ def import_subst(elements, comment):
62 78 raise ValueError(u"%s: can't find root" % repr(entry))
63 79 lip.save()
64 80 if qualifier:
65   - lip.qualifiers.add(Qualifier.objects.get(label=qualifier))
66 81 \ No newline at end of file
  82 + lip.qualifiers.add(Qualifier.objects.get(label=qualifier))
  83 +
  84 +
  85 +NDM = Pattern.objects.get(name='ndm')
  86 +ADVADJ = CrossReferenceType.objects.get(symbol='advadj')
  87 +
  88 +def import_adv(elements, comment):
  89 + try:
  90 + entry, pos, pattern_name, adj_entry = elements
  91 + except ValueError:
  92 + print elements
  93 + raise
  94 + assert pos == 'adv' and pattern_name == 'ndm'
  95 + l = new_lexeme(entry, 'adv', comment)
  96 + lip = LexemeInflectionPattern(lexeme=l, index=1, pattern=NDM)
  97 + lip.root = lip.get_root()
  98 + lip.save()
  99 + adjs = Lexeme.objects.filter(entry=adj_entry)
  100 + if len(adjs) == 1:
  101 + adj = adjs.get()
  102 + CrossReference.objects.create(from_lexeme=l, to_lexeme=adj, type=ADVADJ)
  103 + else:
  104 + if len(adjs) == 0:
  105 + print >>sys.stderr, 'Brak przymiotnika: %s (%s)' % (adj_entry, entry)
  106 + else:
  107 + print >>sys.stderr, 'Niejednoznaczny przymiotnik: %s (%s)' % (
  108 + adj_entry, entry)
  109 +
  110 +
  111 +POPRZ = LexemeAttributeValue.objects.get(
  112 + value=u'obecna', attribute__name=u'forma poprz.')
  113 +NO_POPRZ = LexemeAttributeValue.objects.get(
  114 + value=u'nieobecna', attribute__name=u'forma poprz.')
  115 +ZLOZ = LexemeAttributeValue.objects.get(
  116 + value=u'obecna', attribute__name=u'forma złoż.')
  117 +NO_ZLOZ = LexemeAttributeValue.objects.get(
  118 + value=u'nieobecna', attribute__name=u'forma złoż.')
  119 +
  120 +def import_adj(elements, comment):
  121 + try:
  122 + entry, pos, pattern_name, zloz, poprz = elements
  123 + except ValueError:
  124 + print elements
  125 + raise
  126 + assert pos == 'adj'
  127 + l = new_lexeme(entry, 'adj', comment)
  128 + if zloz[0] == '+':
  129 + ZLOZ.add_lexeme(l)
  130 + else:
  131 + NO_ZLOZ.add_lexeme(l)
  132 + if poprz[0] == '+':
  133 + POPRZ.add_lexeme(l)
  134 + else:
  135 + NO_POPRZ.add_lexeme(l)
  136 + pattern = Pattern.objects.get(name=pattern_name)
  137 + lip = LexemeInflectionPattern(lexeme=l, index=1, pattern=pattern)
  138 + lip.root = lip.get_root()
  139 + lip.save()
67 140 \ No newline at end of file
... ...