import przymiotników i przysłówków od Witka

janek37
1 parent 72bb98ec
Showing 2 changed files with 86 additions and 13 deletions
dictionary/export.py
dictionary/management/commands/import_witek.py
@@ -289,9 +289,9 @@ where ls.slownik in (%(vocabs)s) and %(antivocabs)s %(x_qual)s %(table_clause)s
                 % ', '.join(str(id) for id in self.refls),
             'attr_clauses': ' and '.join(attr_clauses)
         }
-        if tt.name == 'czasowniki':
-            print >>sys.stderr, query
-            print >>sys.stderr, [tt.id] + params + list(tt_c + cell_c + cell_c)
+        # if tt.name == 'czasowniki':
+        #     print >>sys.stderr, query
+        #     print >>sys.stderr, [tt.id] + params + list(tt_c + cell_c + cell_c)
         cursor.execute(
             query, [tt.id] + params + list(tt_c + cell_c + cell_c))
         for row in cursor:
 # -*- coding: utf-8 -*-
+import sys
 from django.core.management.base import BaseCommand
 from django.db import transaction
 from django.db.models import Max
 from common.util import uniopen, no_history
 from dictionary.models import Lexeme, Vocabulary, LexemeInflectionPattern, \
-    Pattern, Qualifier, ClassificationValue, LexemeCV, Gender
+    Pattern, Qualifier, ClassificationValue, LexemeCV, Gender, \
+    LexemeAttributeValue, CrossReferenceType, CrossReference
  
  
 class Command(BaseCommand):
@@ -25,10 +27,31 @@ def import_lexemes(lines, comment):
         elements = line.strip().split(';')
         if elements[1] == 'subst':
             import_subst(elements, comment)
+        elif elements[1] == 'adv':
+            import_adv(elements, comment)
+        elif elements[1] == 'adj':
+            import_adj(elements, comment)
     transaction.commit()
     transaction.leave_transaction_management()
  
  
+next_id = None
+
+def new_lexeme(entry, pos, comment):
+    global next_id
+    if next_id:
+        next_id += 1
+    else:
+        next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
+    l = Lexeme(
+        id=next_id, entry=entry, part_of_speech_id=pos,
+        status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP,
+        comment=comment)
+    l.save()
+    WSJP.add_lexeme(l)
+    return l
+
+
 def import_subst(elements, comment):
     try:
         entry, pos, gender, pattern_data, commonness = elements
@@ -38,14 +61,7 @@ def import_subst(elements, comment):
     assert pos == 'subst'
     gender = Gender.objects.get(symbol=gender)
     lip_data = [p.rsplit(' ', 1) for p in pattern_data.split('/')]
-    next_id = Lexeme.all_objects.aggregate(Max('id'))['id__max'] + 1
-    l = Lexeme(
-        id=next_id, entry=entry, part_of_speech_id='subst',
-        status=Lexeme.STATUS_DESCRIBED, owner_vocabulary_id=WSJP,
-        comment=comment)
-    l.fix_homonym_number()
-    l.save()
-    WSJP.add_lexeme(l)
+    l = new_lexeme(entry, 'subst', comment)
     comm_value = ClassificationValue.objects.get(
         classification__name=u'pospolitość', label=commonness)
     LexemeCV.objects.create(lexeme=l, classification_value=comm_value)
@@ -62,4 +78,61 @@ def import_subst(elements, comment):
             raise ValueError(u"%s: can't find root" % repr(entry))
         lip.save()
         if qualifier:
-            lip.qualifiers.add(Qualifier.objects.get(label=qualifier))
 \ No newline at end of file
+            lip.qualifiers.add(Qualifier.objects.get(label=qualifier))
+
+
+NDM = Pattern.objects.get(name='ndm')
+ADVADJ = CrossReferenceType.objects.get(symbol='advadj')
+
+def import_adv(elements, comment):
+    try:
+        entry, pos, pattern_name, adj_entry = elements
+    except ValueError:
+        print elements
+        raise
+    assert pos == 'adv' and pattern_name == 'ndm'
+    l = new_lexeme(entry, 'adv', comment)
+    lip = LexemeInflectionPattern(lexeme=l, index=1, pattern=NDM)
+    lip.root = lip.get_root()
+    lip.save()
+    adjs = Lexeme.objects.filter(entry=adj_entry)
+    if len(adjs) == 1:
+        adj = adjs.get()
+        CrossReference.objects.create(from_lexeme=l, to_lexeme=adj, type=ADVADJ)
+    else:
+        if len(adjs) == 0:
+            print >>sys.stderr, 'Brak przymiotnika: %s (%s)' % (adj_entry, entry)
+        else:
+            print >>sys.stderr, 'Niejednoznaczny przymiotnik: %s (%s)' % (
+                adj_entry, entry)
+
+
+POPRZ = LexemeAttributeValue.objects.get(
+    value=u'obecna', attribute__name=u'forma poprz.')
+NO_POPRZ = LexemeAttributeValue.objects.get(
+    value=u'nieobecna', attribute__name=u'forma poprz.')
+ZLOZ = LexemeAttributeValue.objects.get(
+    value=u'obecna', attribute__name=u'forma złoż.')
+NO_ZLOZ = LexemeAttributeValue.objects.get(
+    value=u'nieobecna', attribute__name=u'forma złoż.')
+
+def import_adj(elements, comment):
+    try:
+        entry, pos, pattern_name, zloz, poprz = elements
+    except ValueError:
+        print elements
+        raise
+    assert pos == 'adj'
+    l = new_lexeme(entry, 'adj', comment)
+    if zloz[0] == '+':
+        ZLOZ.add_lexeme(l)
+    else:
+        NO_ZLOZ.add_lexeme(l)
+    if poprz[0] == '+':
+        POPRZ.add_lexeme(l)
+    else:
+        NO_POPRZ.add_lexeme(l)
+    pattern = Pattern.objects.get(name=pattern_name)
+    lip = LexemeInflectionPattern(lexeme=l, index=1, pattern=pattern)
+    lip.root = lip.get_root()
+    lip.save()
 \ No newline at end of file