Commit a475a2a92f9a1cc4a0118cb9fc4c0f307719b255

Authored by Tomasz Bartosiak
2 parents b82f1028 248425eb

Merge branch 'dev' of http://git.nlp.ipipan.waw.pl/walenty/Slowal into dev

1   -Copyright (c) 2015, Bartłomiej Nitoń
  1 +Copyright (c) 2016 by Institute of Computer Science, Polish Academy of Sciences
2 2 All rights reserved.
3 3  
4 4 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
... ...
dictionary/management/commands/create_TEI_walenty.py
... ... @@ -2,11 +2,12 @@
2 2  
3 3 import datetime
4 4 import os
  5 +import tarfile
5 6  
6 7 from django.core.management.base import BaseCommand
7 8  
8 9 from dictionary.models import Lemma, Frame_Opinion_Value, \
9   - get_checked_statuses
  10 + get_ready_statuses
10 11 from dictionary.teixml import createteixml
11 12 from settings import WALENTY_PATH
12 13  
... ... @@ -14,12 +15,19 @@ class Command(BaseCommand):
14 15 args = 'none'
15 16  
16 17 def handle(self, *args, **options):
17   - now = datetime.datetime.now().strftime('%Y%m%d')
18   - filename_base = '%s_%s.xml' % ('walenty', now)
19   - outpath = os.path.join(WALENTY_PATH, filename_base)
20   - checked_statuses = get_checked_statuses()
21   - lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name')
22   - checked_lemmas = lemmas.filter(status__in=checked_statuses)
23   - frame_opinion_values = Frame_Opinion_Value.objects.all()
24   - createteixml(outpath, checked_lemmas, frame_opinion_values)
25   -
26 18 \ No newline at end of file
  19 + try:
  20 + now = datetime.datetime.now().strftime('%Y%m%d')
  21 + filename_base = '%s_%s' % ('walenty', now)
  22 + base_path = os.path.join(WALENTY_PATH, filename_base)
  23 + outpath = base_path + '.xml'
  24 + ready_statuses = get_ready_statuses()
  25 + lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name')
  26 + ready_lemmas = lemmas.filter(status__in=ready_statuses)
  27 + frame_opinion_values = Frame_Opinion_Value.objects.all()
  28 + createteixml(outpath, ready_lemmas, frame_opinion_values)
  29 + archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
  30 + os.chdir(WALENTY_PATH)
  31 + archive.add(os.path.basename(outpath))
  32 + finally:
  33 + archive.close()
  34 + os.remove(outpath)
... ...
dictionary/management/commands/create_tex_walenty.py 0 → 100644
  1 +#-*- coding:utf-8 -*-
  2 +
  3 +import codecs
  4 +import datetime
  5 +import HTMLParser
  6 +import os
  7 +import tarfile
  8 +
  9 +from django.core.management.base import BaseCommand
  10 +from django.template.loader import render_to_string
  11 +from django.utils.encoding import smart_str
  12 +
  13 +from dictionary.models import Lemma, get_ready_statuses
  14 +from settings import WALENTY_PATH
  15 +
  16 +class Command(BaseCommand):
  17 + args = 'none'
  18 + help = 'Script for creating Walenty vocabulary in tex format.'
  19 +
  20 + def handle(self, *args, **options):
  21 + try:
  22 + now = datetime.datetime.now().strftime('%Y%m%d')
  23 + filename_base = '%s_%s' % ('walenty', now)
  24 + base_path = os.path.join(WALENTY_PATH, filename_base)
  25 + outpath = base_path + '.tex'
  26 + ready_statuses = get_ready_statuses()
  27 + lemmas = Lemma.objects.filter(old=False)
  28 + ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name')
  29 + write_tex_walenty(outpath, ready_lemmas)
  30 + archive = tarfile.open(base_path + '-tex.tar.gz', 'w:gz')
  31 + os.chdir(WALENTY_PATH)
  32 + archive.add(os.path.basename(outpath))
  33 + finally:
  34 + archive.close()
  35 + os.remove(outpath)
  36 +
  37 +
  38 +def write_tex_walenty(outpath, lemmas):
  39 + try:
  40 + outfile = codecs.open(outpath, 'w')
  41 + h = HTMLParser.HTMLParser()
  42 + outfile.write(smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas,
  43 + 'q_frame_opinions': [],
  44 + 'download_dict' : {'frame_opinions': []}}))))
  45 + finally:
  46 + outfile.close()
... ...
dictionary/management/commands/create_text_walenty.py
1 1 #-*- coding:utf-8 -*-
2 2  
3   -#Copyright (c) 2015, Bartłomiej Nitoń
4   -#All rights reserved.
5   -
6   -#Redistribution and use in source and binary forms, with or without modification, are permitted provided
7   -#that the following conditions are met:
8   -
9   -# Redistributions of source code must retain the above copyright notice, this list of conditions and
10   -# the following disclaimer.
11   -# Redistributions in binary form must reproduce the above copyright notice, this list of conditions
12   -# and the following disclaimer in the documentation and/or other materials provided with the distribution.
13   -
14   -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
15   -# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
16   -# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
17   -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
18   -# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
19   -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20   -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21   -# POSSIBILITY OF SUCH DAMAGE.
22   -
23 3 import datetime
24 4 import os
25 5 import tarfile
... ... @@ -52,7 +32,7 @@ class Command(BaseCommand):
52 32 all_stats = Counter({})
53 33 verified_stats = Counter({})
54 34 base_path = os.path.join(WALENTY_PATH, filename_base)
55   - archive = tarfile.open(base_path + '.tar.gz', 'w:gz')
  35 + archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz')
56 36 os.chdir(WALENTY_PATH)
57 37 for pos in POS.objects.exclude(tag=u'unk').order_by('priority'):
58 38 pos_stats = create_pos_archive_and_get_stats(archive, pos, filename_base)
... ... @@ -65,7 +45,6 @@ class Command(BaseCommand):
65 45 archive.add(os.path.basename(checked_stats_path))
66 46 write_stats(ready_stats_path, all_stats)
67 47 archive.add(os.path.basename(ready_stats_path))
68   -
69 48 update_walenty_stats(all_stats)
70 49 finally:
71 50 archive.close()
... ... @@ -79,7 +58,7 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base):
79 58 try:
80 59 checked_statuses = get_checked_statuses()
81 60 ready_statuses = get_ready_statuses()
82   - lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry')
  61 + lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry_obj__name')
83 62 checked_lemmas = lemmas.filter(status__in=checked_statuses)
84 63 ready_lemmas = lemmas.filter(status__in=ready_statuses)
85 64  
... ...
dictionary/templates/tex/frame.tex
1   -{% comment %}
2   -% Copyright (c) 2012, Bartłomiej Nitoń
3   -% All rights reserved.
4   -%
5   -% Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6   -%
7   -% Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8   -% Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9   -%
10   -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11   -{% endcomment %}
12 1  
  2 +\begin{minipage}[t]{\textwidth}
13 3 \def\arraystretch{1.25}
14   -\begin{tabular}{p{5pt}ll}
15   - & ({{ fr_loop_counter }}) \textbf{ {{ lemma.entry }} {{ reflex }} ({{ negativity }},{{ predicativity }},{{ aspect }})}
16   - --- \textit{ {{ frame_opinion }} [{{ frame.id }}]} \\
17   -
18   -\end{tabular}
19   -
20   -\def\arraystretch{1.25}
21   -\begin{tabular}{p{10pt} {{ colsAlignmentTex }} }
22   -
23   -&
24   -{% for pos_dict in pos_dict_ls %}
25   - & \cellcolor{light}
26   - {% if pos_dict.pos_cats_str %}
27   - \textbf{({{ pos_dict.idx }}) {{ pos_dict.pos_cats_str }}}
28   - {% else %}
29   - \textbf{({{ pos_dict.idx }})}
30   - {% endif %}
31   -{% endfor %}\\
32   -
33   -{% for i in max_args %}
34   - {% for pos_dict in pos_dict_ls %}
35   - {% if forloop.first %}
36   - &&
37   - {% else %}
38   - &
39   - {% endif %}
40   - {% for argument in pos_dict.arguments %}
41   - {% if forloop.counter0 == forloop.parentloop.parentloop.counter0 %}
42   - {\footnotesize \framebox{ {{ argument.idx }} }} {{ argument.argument.text_rep }}
43   - {% endif %}
44   - {% endfor %}
45   - {% if forloop.last %}
46   - \\
47   - {% endif %}
48   - {% endfor %}
49   -{% endfor %}
50   -
51   -\end{tabular}
  4 +\begin{tabular}{lll}
  5 +({{ fr_loop_counter }}) \textbf{ {{ lemma.entry }} {{ reflex }} ({{ negativity }},{{ predicativity }},{{ aspect }})} --- \textit{ {{ frame_opinion }} [{{ frame.id }}]} \\
  6 +\end{tabular}\\
  7 +%{% for pos_dict in pos_dict_ls %}
  8 +% {% if pos_dict.pos_cats_str %}
  9 + \position{({{ pos_dict.idx }}) {{ pos_dict.pos_cats_str }}{% for argument in pos_dict.arguments %}\\\phrasetype{ {{ argument.idx }} }{ {{ argument.argument.text_rep }} }{% endfor %} }
  10 +% {% else %}
  11 + \position{({{ pos_dict.idx }}){% for argument in pos_dict.arguments %}\\\phrasetype{ {{ argument.idx }} }{ {{ argument.argument.text_rep }} }{% endfor %} }
  12 +% {% endif %}
  13 +%{% endfor %}
  14 +\end{minipage}
52 15  
53 16 {% if example_dict_ls %}
54 17 \vspace{5pt}
... ...
dictionary/templates/tex/slowal.tex
1   -{% comment %}
2   -% Copyright (c) 2012, Bartłomiej Nitoń
3   -% All rights reserved.
4   -%
5   -% Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6   -%
7   -% Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8   -% Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9   -%
10   -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11   -{% endcomment %}
12   -
13 1 {% load tex_tags %}
14 2  
15 3 \documentclass[12pt,a4paper,fleqn,landscape]{article}
... ... @@ -18,11 +6,8 @@
18 6 \usepackage[utf8x]{inputenc}
19 7 \usepackage[T1]{fontenc}
20 8 \usepackage{textcomp}
21   -%\usepackage{array}
22   -%\usepackage{times}
23   -%\usepackage{lmodern}
  9 +\usepackage{lmodern}
24 10 \usepackage{tgheros}
25   -%\usepackage{longtable}
26 11 \usepackage[top=50pt, bottom=75pt, left=50pt, right=50pt]{geometry}
27 12 \usepackage{color, colortbl}
28 13 \definecolor{dark}{rgb}{.7,.7,.7}
... ... @@ -40,13 +25,70 @@
40 25 \setlength{\parindent}{0cm}
41 26 \setlength{\parskip}{1ex}
42 27  
  28 +\usepackage{graphicx}
  29 +\newcommand*{\position}[1]{%
  30 + \def\arraystretch{1.25}
  31 + \begin{tabular}[t]{l}
  32 + \cellcolor{light}\bfseries#1
  33 + \end{tabular}%
  34 + \hfil\penalty500 \hfilneg\ignorespaces
  35 +}
  36 +\newsavebox{\argbox}
  37 +\newlength{\maxposlen}
  38 +\maxposlen=\dimexpr\columnwidth-2cm
  39 +\maxposlen=26cm
  40 +\newcommand*{\phrasetype}[2]{%
  41 + \setbox\argbox\hbox{ {\footnotesize \framebox{ #1 } } #2}%
  42 + \ifdim\wd\argbox<\maxposlen
  43 + \box\argbox
  44 + \else
  45 + \resizebox{\maxposlen}{!}{\box\argbox}
  46 + \fi
  47 +}
  48 +
43 49  
44   -\title{Walenty}
45   -\author{ {{request.user.username}} }
46   -\date{ {% now "jS F Y H:i" %} }
  50 +\title{\textbf{The Polish Valence Dictionary (Walenty)}}
  51 +\author{Institute of Computer Science, Polish Academy of Sciences}
  52 +\date{ {% now "jS F Y" %} }
47 53  
48 54 \begin{document}
49 55  
  56 +\maketitle
  57 +
  58 +The Polish Valence Dictionary (Walenty) is an adaptation of
  59 +the Syntactic Dictionary of Polish Verbs by Marek Świdziński
  60 +in its electronic version provided by Łukasz Dębowski and
  61 +Elżbieta Hajnicz and further expanded by Witold Kieraś to
  62 +include the most frequent verbs in the 1 million sample of
  63 +NKJP (National Corpus of Polish).
  64 +
  65 +The presented resource results from an automatic conversion
  66 +of the aforementioned dictionary, manually reviewed by Filip
  67 +Skwarski to include correct information about a number of new
  68 +features, including sentential subjects, passivisation, and
  69 +control relations.
  70 +
  71 +The format of the new dictionary has been established by Filip
  72 +Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski,
  73 +Marek Świdziński, and Marcin Woliński.
  74 +
  75 +The dictionary has been edited and compiled using a tool
  76 +created by Bartłomiej Nitoń.
  77 +
  78 +The original Syntactic Dictionary of Polish Verbs derives from:
  79 +
  80 +Marek Świdziński
  81 +Institute of Polish
  82 +Warsaw University
  83 +Warsaw, Poland
  84 +
  85 +© Copyright 1998,2012 by Marek Świdziński
  86 +
  87 +This work is distributed under a CC BY-SA license:
  88 +\textit{http://creativecommons.org/licenses/by-sa/2.0/}
  89 +
  90 +\newpage
  91 +
50 92 {% for lemma in lemmas %}
51 93 {% lemma_tex lemma forloop.counter0 q_frame_opinions download_dict %}
52 94 {% endfor %}
... ...
dictionary/views.py
... ... @@ -423,14 +423,14 @@ def manage_arg_realizations(request):
423 423 }
424 424 return to_return
425 425  
426   -def download_walenty(request):
  426 +def download_dict(request, format):
427 427 generation_date = datetime.datetime.now() - datetime.timedelta(days=1)
428 428 walenty_file_name = '%s_%s.tar.gz' % ('walenty', generation_date.strftime('%Y%m%d'))
429 429 walenty_path = os.path.join(settings.WALENTY_PATH, walenty_file_name)
430 430  
431 431 while not os.path.isfile(walenty_path):
432 432 generation_date = generation_date - datetime.timedelta(days=1)
433   - walenty_file_name = '%s_%s.tar.gz' % ('walenty', generation_date.strftime('%Y%m%d'))
  433 + walenty_file_name = '%s_%s-%s.tar.gz' % ('walenty', generation_date.strftime('%Y%m%d'), format)
434 434 walenty_path = os.path.join(settings.WALENTY_PATH, walenty_file_name)
435 435  
436 436 response = HttpResponse(file(walenty_path), content_type='application/x-gzip')
... ...
semantics/management/commands/find_hanging_examples.py
... ... @@ -18,7 +18,7 @@ def find_hanging_examples():
18 18 print_hanging_examples(lemma)
19 19  
20 20 def print_hanging_examples(lemma):
21   - lex_units = lemma.entry_obj.lexical_units().all()
  21 + lex_units = lemma.entry_obj.meanings.all()
22 22 for lu in lex_units:
23 23 lu_examples = LexicalUnitExamples.objects.filter(lexical_unit=lu)
24 24 for lu_ex in lu_examples:
... ...
semantics/utils.py
... ... @@ -61,10 +61,15 @@ def roles_match(roles1, roles2):
61 61 return False
62 62  
63 63 def preference_match(sel_preference1, sel_preference2):
64   - if(general_sel_prefs_match(sel_preference1, sel_preference2) and
65   - synset_sel_prefs_match(sel_preference1, sel_preference2) and
66   - relation_sel_prefs_match(sel_preference1, sel_preference2) and
67   - synset_rel_sel_prefs_match(sel_preference1, sel_preference2)):
  64 + if sel_preference1 is None and sel_preference2 is None:
  65 + return True
  66 + elif((sel_preference1 is None and sel_preference2 is not None) or
  67 + (sel_preference1 is not None and sel_preference2 is None)):
  68 + return False
  69 + elif(general_sel_prefs_match(sel_preference1, sel_preference2) and
  70 + synset_sel_prefs_match(sel_preference1, sel_preference2) and
  71 + relation_sel_prefs_match(sel_preference1, sel_preference2) and
  72 + synset_rel_sel_prefs_match(sel_preference1, sel_preference2)):
68 73 return True
69 74 return False
70 75  
... ...
templates/main_menu.html
1 1 <!--
2   -Copyright (c) 2012, Bartłomiej Nitoń
  2 +Copyright (c) 2016, Bartłomiej Nitoń
3 3 All rights reserved.
4 4  
5 5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
... ... @@ -36,5 +36,14 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS &quot;AS IS&quot; AND
36 36 <li id='menu_option'><a href="{% url 'user_statistics' %}">Statystyki</a></li>
37 37 {% endif %}
38 38 <li id='menu_option'><a href="{% url 'manage_arg_realizations' %}">Rozwinięcia typów fraz</a></li>
39   - <li id='menu_option'><a href="{% url 'download_walenty' %}">Pobierz słownik</a></li>
  39 + <li><a href="#">Pobierz słownik</a>
  40 + <ul id="download">
  41 + <li id='menu_option'><a href="{% url 'download_dict' format='text' %}">Format tekstowy</a></li>
  42 + {% if user.is_authenticated %}
  43 + <li id='menu_option'><a href="{% url 'download_dict' format='tex' %}">Format TeX</a></li>
  44 + {% endif %}
  45 + <li id='menu_option'><a href="{% url 'download_dict' format='pdf' %}">PDF</a></li>
  46 + <li id='menu_option'><a href="{% url 'download_dict' format='TEI' %}">Format TEI</a></li>
  47 + </ul>
  48 + </li>
40 49 </ul>
... ...
... ... @@ -213,7 +213,7 @@ urlpatterns += patterns(&#39;dictionary.views&#39;,
213 213 url(r'^wszystkie_statystyki/$', 'all_statistics'),
214 214 url(r'^zarzadzanie_kontami/$', 'manage_users'),
215 215 url(r'^rozwiniecia_typow_fraz/$', 'manage_arg_realizations'),
216   - url(r'^pobieranie_slownika/$', 'download_walenty'),
  216 + url(r'^pobieranie_slownika/(?P<format>.+)/$', 'download_dict'),
217 217 )
218 218  
219 219 # notatki
... ...