diff --git b/.gitignore a/.gitignore new file mode 100644 index 0000000..6cb1223 --- /dev/null +++ a/.gitignore @@ -0,0 +1,4 @@ +/target/ +.project +.settings +.classpath diff --git b/doc/llncs.cls a/doc/llncs.cls new file mode 100755 index 0000000..1d49f3d --- /dev/null +++ a/doc/llncs.cls @@ -0,0 +1,1207 @@ +% LLNCS DOCUMENT CLASS -- version 2.17 (12-Jul-2010) +% Springer Verlag LaTeX2e support for Lecture Notes in Computer Science +% +%% +%% \CharacterTable +%% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z +%% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z +%% Digits \0\1\2\3\4\5\6\7\8\9 +%% Exclamation \! Double quote \" Hash (number) \# +%% Dollar \$ Percent \% Ampersand \& +%% Acute accent \' Left paren \( Right paren \) +%% Asterisk \* Plus \+ Comma \, +%% Minus \- Point \. Solidus \/ +%% Colon \: Semicolon \; Less than \< +%% Equals \= Greater than \> Question mark \? +%% Commercial at \@ Left bracket \[ Backslash \\ +%% Right bracket \] Circumflex \^ Underscore \_ +%% Grave accent \` Left brace \{ Vertical bar \| +%% Right brace \} Tilde \~} +%% +\NeedsTeXFormat{LaTeX2e}[1995/12/01] +\ProvidesClass{llncs}[2010/07/12 v2.17 +^^J LaTeX document class for Lecture Notes in Computer Science] +% Options +\let\if@envcntreset\iffalse +\DeclareOption{envcountreset}{\let\if@envcntreset\iftrue} +\DeclareOption{citeauthoryear}{\let\citeauthoryear=Y} +\DeclareOption{oribibl}{\let\oribibl=Y} +\let\if@custvec\iftrue +\DeclareOption{orivec}{\let\if@custvec\iffalse} +\let\if@envcntsame\iffalse +\DeclareOption{envcountsame}{\let\if@envcntsame\iftrue} +\let\if@envcntsect\iffalse +\DeclareOption{envcountsect}{\let\if@envcntsect\iftrue} +\let\if@runhead\iffalse +\DeclareOption{runningheads}{\let\if@runhead\iftrue} + +\let\if@openright\iftrue +\let\if@openbib\iffalse +\DeclareOption{openbib}{\let\if@openbib\iftrue} + +% languages +\let\switcht@@therlang\relax +\def\ds@deutsch{\def\switcht@@therlang{\switcht@deutsch}} +\def\ds@francais{\def\switcht@@therlang{\switcht@francais}} + +\DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} + +\ProcessOptions + +\LoadClass[twoside]{article} +\RequirePackage{multicol} % needed for the list of participants, index +\RequirePackage{aliascnt} + +\setlength{\textwidth}{12.2cm} +\setlength{\textheight}{19.3cm} +\renewcommand\@pnumwidth{2em} +\renewcommand\@tocrmarg{3.5em} +% +\def\@dottedtocline#1#2#3#4#5{% + \ifnum #1>\c@tocdepth \else + \vskip \z@ \@plus.2\p@ + {\leftskip #2\relax \rightskip \@tocrmarg \advance\rightskip by 0pt plus 2cm + \parfillskip -\rightskip \pretolerance=10000 + \parindent #2\relax\@afterindenttrue + \interlinepenalty\@M + \leavevmode + \@tempdima #3\relax + \advance\leftskip \@tempdima \null\nobreak\hskip -\leftskip + {#4}\nobreak + \leaders\hbox{$\m@th + \mkern \@dotsep mu\hbox{.}\mkern \@dotsep + mu$}\hfill + \nobreak + \hb@xt@\@pnumwidth{\hfil\normalfont \normalcolor #5}% + \par}% + \fi} +% +\def\switcht@albion{% +\def\abstractname{Abstract.} +\def\ackname{Acknowledgement.} +\def\andname{and} +\def\lastandname{\unskip, and} +\def\appendixname{Appendix} +\def\chaptername{Chapter} +\def\claimname{Claim} +\def\conjecturename{Conjecture} +\def\contentsname{Table of Contents} +\def\corollaryname{Corollary} +\def\definitionname{Definition} +\def\examplename{Example} +\def\exercisename{Exercise} +\def\figurename{Fig.} +\def\keywordname{{\bf Keywords:}} +\def\indexname{Index} +\def\lemmaname{Lemma} +\def\contriblistname{List of Contributors} +\def\listfigurename{List of Figures} +\def\listtablename{List of Tables} +\def\mailname{{\it Correspondence to\/}:} +\def\noteaddname{Note added in proof} +\def\notename{Note} +\def\partname{Part} +\def\problemname{Problem} +\def\proofname{Proof} +\def\propertyname{Property} +\def\propositionname{Proposition} +\def\questionname{Question} +\def\remarkname{Remark} +\def\seename{see} +\def\solutionname{Solution} +\def\subclassname{{\it Subject Classifications\/}:} +\def\tablename{Table} +\def\theoremname{Theorem}} +\switcht@albion +% Names of theorem like environments are already defined +% but must be translated if another language is chosen +% +% French section +\def\switcht@francais{%\typeout{On parle francais.}% + \def\abstractname{R\'esum\'e.}% + \def\ackname{Remerciements.}% + \def\andname{et}% + \def\lastandname{ et}% + \def\appendixname{Appendice} + \def\chaptername{Chapitre}% + \def\claimname{Pr\'etention}% + \def\conjecturename{Hypoth\`ese}% + \def\contentsname{Table des mati\`eres}% + \def\corollaryname{Corollaire}% + \def\definitionname{D\'efinition}% + \def\examplename{Exemple}% + \def\exercisename{Exercice}% + \def\figurename{Fig.}% + \def\keywordname{{\bf Mots-cl\'e:}} + \def\indexname{Index} + \def\lemmaname{Lemme}% + \def\contriblistname{Liste des contributeurs} + \def\listfigurename{Liste des figures}% + \def\listtablename{Liste des tables}% + \def\mailname{{\it Correspondence to\/}:} + \def\noteaddname{Note ajout\'ee \`a l'\'epreuve}% + \def\notename{Remarque}% + \def\partname{Partie}% + \def\problemname{Probl\`eme}% + \def\proofname{Preuve}% + \def\propertyname{Caract\'eristique}% +%\def\propositionname{Proposition}% + \def\questionname{Question}% + \def\remarkname{Remarque}% + \def\seename{voir} + \def\solutionname{Solution}% + \def\subclassname{{\it Subject Classifications\/}:} + \def\tablename{Tableau}% + \def\theoremname{Th\'eor\`eme}% +} +% +% German section +\def\switcht@deutsch{%\typeout{Man spricht deutsch.}% + \def\abstractname{Zusammenfassung.}% + \def\ackname{Danksagung.}% + \def\andname{und}% + \def\lastandname{ und}% + \def\appendixname{Anhang}% + \def\chaptername{Kapitel}% + \def\claimname{Behauptung}% + \def\conjecturename{Hypothese}% + \def\contentsname{Inhaltsverzeichnis}% + \def\corollaryname{Korollar}% +%\def\definitionname{Definition}% + \def\examplename{Beispiel}% + \def\exercisename{\"Ubung}% + \def\figurename{Abb.}% + \def\keywordname{{\bf Schl\"usselw\"orter:}} + \def\indexname{Index} +%\def\lemmaname{Lemma}% + \def\contriblistname{Mitarbeiter} + \def\listfigurename{Abbildungsverzeichnis}% + \def\listtablename{Tabellenverzeichnis}% + \def\mailname{{\it Correspondence to\/}:} + \def\noteaddname{Nachtrag}% + \def\notename{Anmerkung}% + \def\partname{Teil}% +%\def\problemname{Problem}% + \def\proofname{Beweis}% + \def\propertyname{Eigenschaft}% +%\def\propositionname{Proposition}% + \def\questionname{Frage}% + \def\remarkname{Anmerkung}% + \def\seename{siehe} + \def\solutionname{L\"osung}% + \def\subclassname{{\it Subject Classifications\/}:} + \def\tablename{Tabelle}% +%\def\theoremname{Theorem}% +} + +% Ragged bottom for the actual page +\def\thisbottomragged{\def\@textbottom{\vskip\z@ plus.0001fil +\global\let\@textbottom\relax}} + +\renewcommand\small{% + \@setfontsize\small\@ixpt{11}% + \abovedisplayskip 8.5\p@ \@plus3\p@ \@minus4\p@ + \abovedisplayshortskip \z@ \@plus2\p@ + \belowdisplayshortskip 4\p@ \@plus2\p@ \@minus2\p@ + \def\@listi{\leftmargin\leftmargini + \parsep 0\p@ \@plus1\p@ \@minus\p@ + \topsep 8\p@ \@plus2\p@ \@minus4\p@ + \itemsep0\p@}% + \belowdisplayskip \abovedisplayskip +} + +\frenchspacing +\widowpenalty=10000 +\clubpenalty=10000 + +\setlength\oddsidemargin {63\p@} +\setlength\evensidemargin {63\p@} +\setlength\marginparwidth {90\p@} + +\setlength\headsep {16\p@} + +\setlength\footnotesep{7.7\p@} +\setlength\textfloatsep{8mm\@plus 2\p@ \@minus 4\p@} +\setlength\intextsep {8mm\@plus 2\p@ \@minus 2\p@} + +\setcounter{secnumdepth}{2} + +\newcounter {chapter} +\renewcommand\thechapter {\@arabic\c@chapter} + +\newif\if@mainmatter \@mainmattertrue +\newcommand\frontmatter{\cleardoublepage + \@mainmatterfalse\pagenumbering{Roman}} +\newcommand\mainmatter{\cleardoublepage + \@mainmattertrue\pagenumbering{arabic}} +\newcommand\backmatter{\if@openright\cleardoublepage\else\clearpage\fi + \@mainmatterfalse} + +\renewcommand\part{\cleardoublepage + \thispagestyle{empty}% + \if@twocolumn + \onecolumn + \@tempswatrue + \else + \@tempswafalse + \fi + \null\vfil + \secdef\@part\@spart} + +\def\@part[#1]#2{% + \ifnum \c@secnumdepth >-2\relax + \refstepcounter{part}% + \addcontentsline{toc}{part}{\thepart\hspace{1em}#1}% + \else + \addcontentsline{toc}{part}{#1}% + \fi + \markboth{}{}% + {\centering + \interlinepenalty \@M + \normalfont + \ifnum \c@secnumdepth >-2\relax + \huge\bfseries \partname~\thepart + \par + \vskip 20\p@ + \fi + \Huge \bfseries #2\par}% + \@endpart} +\def\@spart#1{% + {\centering + \interlinepenalty \@M + \normalfont + \Huge \bfseries #1\par}% + \@endpart} +\def\@endpart{\vfil\newpage + \if@twoside + \null + \thispagestyle{empty}% + \newpage + \fi + \if@tempswa + \twocolumn + \fi} + +\newcommand\chapter{\clearpage + \thispagestyle{empty}% + \global\@topnum\z@ + \@afterindentfalse + \secdef\@chapter\@schapter} +\def\@chapter[#1]#2{\ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \refstepcounter{chapter}% + \typeout{\@chapapp\space\thechapter.}% + \addcontentsline{toc}{chapter}% + {\protect\numberline{\thechapter}#1}% + \else + \addcontentsline{toc}{chapter}{#1}% + \fi + \else + \addcontentsline{toc}{chapter}{#1}% + \fi + \chaptermark{#1}% + \addtocontents{lof}{\protect\addvspace{10\p@}}% + \addtocontents{lot}{\protect\addvspace{10\p@}}% + \if@twocolumn + \@topnewpage[\@makechapterhead{#2}]% + \else + \@makechapterhead{#2}% + \@afterheading + \fi} +\def\@makechapterhead#1{% +% \vspace*{50\p@}% + {\centering + \ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \large\bfseries \@chapapp{} \thechapter + \par\nobreak + \vskip 20\p@ + \fi + \fi + \interlinepenalty\@M + \Large \bfseries #1\par\nobreak + \vskip 40\p@ + }} +\def\@schapter#1{\if@twocolumn + \@topnewpage[\@makeschapterhead{#1}]% + \else + \@makeschapterhead{#1}% + \@afterheading + \fi} +\def\@makeschapterhead#1{% +% \vspace*{50\p@}% + {\centering + \normalfont + \interlinepenalty\@M + \Large \bfseries #1\par\nobreak + \vskip 40\p@ + }} + +\renewcommand\section{\@startsection{section}{1}{\z@}% + {-18\p@ \@plus -4\p@ \@minus -4\p@}% + {12\p@ \@plus 4\p@ \@minus 4\p@}% + {\normalfont\large\bfseries\boldmath + \rightskip=\z@ \@plus 8em\pretolerance=10000 }} +\renewcommand\subsection{\@startsection{subsection}{2}{\z@}% + {-18\p@ \@plus -4\p@ \@minus -4\p@}% + {8\p@ \@plus 4\p@ \@minus 4\p@}% + {\normalfont\normalsize\bfseries\boldmath + \rightskip=\z@ \@plus 8em\pretolerance=10000 }} +\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}% + {-18\p@ \@plus -4\p@ \@minus -4\p@}% + {-0.5em \@plus -0.22em \@minus -0.1em}% + {\normalfont\normalsize\bfseries\boldmath}} +\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}% + {-12\p@ \@plus -4\p@ \@minus -4\p@}% + {-0.5em \@plus -0.22em \@minus -0.1em}% + {\normalfont\normalsize\itshape}} +\renewcommand\subparagraph[1]{\typeout{LLNCS warning: You should not use + \string\subparagraph\space with this class}\vskip0.5cm +You should not use \verb|\subparagraph| with this class.\vskip0.5cm} + +\DeclareMathSymbol{\Gamma}{\mathalpha}{letters}{"00} +\DeclareMathSymbol{\Delta}{\mathalpha}{letters}{"01} +\DeclareMathSymbol{\Theta}{\mathalpha}{letters}{"02} +\DeclareMathSymbol{\Lambda}{\mathalpha}{letters}{"03} +\DeclareMathSymbol{\Xi}{\mathalpha}{letters}{"04} +\DeclareMathSymbol{\Pi}{\mathalpha}{letters}{"05} +\DeclareMathSymbol{\Sigma}{\mathalpha}{letters}{"06} +\DeclareMathSymbol{\Upsilon}{\mathalpha}{letters}{"07} +\DeclareMathSymbol{\Phi}{\mathalpha}{letters}{"08} +\DeclareMathSymbol{\Psi}{\mathalpha}{letters}{"09} +\DeclareMathSymbol{\Omega}{\mathalpha}{letters}{"0A} + +\let\footnotesize\small + +\if@custvec +\def\vec#1{\mathchoice{\mbox{\boldmath$\displaystyle#1$}} +{\mbox{\boldmath$\textstyle#1$}} +{\mbox{\boldmath$\scriptstyle#1$}} +{\mbox{\boldmath$\scriptscriptstyle#1$}}} +\fi + +\def\squareforqed{\hbox{\rlap{$\sqcap$}$\sqcup$}} +\def\qed{\ifmmode\squareforqed\else{\unskip\nobreak\hfil +\penalty50\hskip1em\null\nobreak\hfil\squareforqed +\parfillskip=0pt\finalhyphendemerits=0\endgraf}\fi} + +\def\getsto{\mathrel{\mathchoice {\vcenter{\offinterlineskip +\halign{\hfil +$\displaystyle##$\hfil\cr\gets\cr\to\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr\gets +\cr\to\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr\gets +\cr\to\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +\gets\cr\to\cr}}}}} +\def\lid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil +$\displaystyle##$\hfil\cr<\cr\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr<\cr +\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr<\cr +\noalign{\vskip1pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +<\cr +\noalign{\vskip0.9pt}=\cr}}}}} +\def\gid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil +$\displaystyle##$\hfil\cr>\cr\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr>\cr +\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr>\cr +\noalign{\vskip1pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +>\cr +\noalign{\vskip0.9pt}=\cr}}}}} +\def\grole{\mathrel{\mathchoice {\vcenter{\offinterlineskip +\halign{\hfil +$\displaystyle##$\hfil\cr>\cr\noalign{\vskip-1pt}<\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr +>\cr\noalign{\vskip-1pt}<\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr +>\cr\noalign{\vskip-0.8pt}<\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +>\cr\noalign{\vskip-0.3pt}<\cr}}}}} +\def\bbbr{{\rm I\!R}} %reelle Zahlen +\def\bbbm{{\rm I\!M}} +\def\bbbn{{\rm I\!N}} %natuerliche Zahlen +\def\bbbf{{\rm I\!F}} +\def\bbbh{{\rm I\!H}} +\def\bbbk{{\rm I\!K}} +\def\bbbp{{\rm I\!P}} +\def\bbbone{{\mathchoice {\rm 1\mskip-4mu l} {\rm 1\mskip-4mu l} +{\rm 1\mskip-4.5mu l} {\rm 1\mskip-5mu l}}} +\def\bbbc{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}}}} +\def\bbbq{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm +Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle\rm Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle\rm Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}}}} +\def\bbbt{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm +T$}\hbox{\hbox to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle\rm T$}\hbox{\hbox +to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle\rm T$}\hbox{\hbox +to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm T$}\hbox{\hbox +to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}}}} +\def\bbbs{{\mathchoice +{\setbox0=\hbox{$\displaystyle \rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox +to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle \rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox +to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle \rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox +to0pt{\kern0.5\wd0\vrule height0.45\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.4\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox +to0pt{\kern0.55\wd0\vrule height0.45\ht0\hss}\box0}}}} +\def\bbbz{{\mathchoice {\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} +{\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} +{\hbox{$\mathsf\scriptstyle Z\kern-0.3em Z$}} +{\hbox{$\mathsf\scriptscriptstyle Z\kern-0.2em Z$}}}} + +\let\ts\, + +\setlength\leftmargini {17\p@} +\setlength\leftmargin {\leftmargini} +\setlength\leftmarginii {\leftmargini} +\setlength\leftmarginiii {\leftmargini} +\setlength\leftmarginiv {\leftmargini} +\setlength \labelsep {.5em} +\setlength \labelwidth{\leftmargini} +\addtolength\labelwidth{-\labelsep} + +\def\@listI{\leftmargin\leftmargini + \parsep 0\p@ \@plus1\p@ \@minus\p@ + \topsep 8\p@ \@plus2\p@ \@minus4\p@ + \itemsep0\p@} +\let\@listi\@listI +\@listi +\def\@listii {\leftmargin\leftmarginii + \labelwidth\leftmarginii + \advance\labelwidth-\labelsep + \topsep 0\p@ \@plus2\p@ \@minus\p@} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii + \advance\labelwidth-\labelsep + \topsep 0\p@ \@plus\p@\@minus\p@ + \parsep \z@ + \partopsep \p@ \@plus\z@ \@minus\p@} + +\renewcommand\labelitemi{\normalfont\bfseries --} +\renewcommand\labelitemii{$\m@th\bullet$} + +\setlength\arraycolsep{1.4\p@} +\setlength\tabcolsep{1.4\p@} + +\def\tableofcontents{\chapter*{\contentsname\@mkboth{{\contentsname}}% + {{\contentsname}}} + \def\authcount##1{\setcounter{auco}{##1}\setcounter{@auth}{1}} + \def\lastand{\ifnum\value{auco}=2\relax + \unskip{} \andname\ + \else + \unskip \lastandname\ + \fi}% + \def\and{\stepcounter{@auth}\relax + \ifnum\value{@auth}=\value{auco}% + \lastand + \else + \unskip, + \fi}% + \@starttoc{toc}\if@restonecol\twocolumn\fi} + +\def\l@part#1#2{\addpenalty{\@secpenalty}% + \addvspace{2em plus\p@}% % space above part line + \begingroup + \parindent \z@ + \rightskip \z@ plus 5em + \hrule\vskip5pt + \large % same size as for a contribution heading + \bfseries\boldmath % set line in boldface + \leavevmode % TeX command to enter horizontal mode. + #1\par + \vskip5pt + \hrule + \vskip1pt + \nobreak % Never break after part entry + \endgroup} + +\def\@dotsep{2} + +\let\phantomsection=\relax + +\def\hyperhrefextend{\ifx\hyper@anchor\@undefined\else +{}\fi} + +\def\addnumcontentsmark#1#2#3{% +\addtocontents{#1}{\protect\contentsline{#2}{\protect\numberline + {\thechapter}#3}{\thepage}\hyperhrefextend}}% +\def\addcontentsmark#1#2#3{% +\addtocontents{#1}{\protect\contentsline{#2}{#3}{\thepage}\hyperhrefextend}}% +\def\addcontentsmarkwop#1#2#3{% +\addtocontents{#1}{\protect\contentsline{#2}{#3}{0}\hyperhrefextend}}% + +\def\@adcmk[#1]{\ifcase #1 \or +\def\@gtempa{\addnumcontentsmark}% + \or \def\@gtempa{\addcontentsmark}% + \or \def\@gtempa{\addcontentsmarkwop}% + \fi\@gtempa{toc}{chapter}% +} +\def\addtocmark{% +\phantomsection +\@ifnextchar[{\@adcmk}{\@adcmk[3]}% +} + +\def\l@chapter#1#2{\addpenalty{-\@highpenalty} + \vskip 1.0em plus 1pt \@tempdima 1.5em \begingroup + \parindent \z@ \rightskip \@tocrmarg + \advance\rightskip by 0pt plus 2cm + \parfillskip -\rightskip \pretolerance=10000 + \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip + {\large\bfseries\boldmath#1}\ifx0#2\hfil\null + \else + \nobreak + \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern + \@dotsep mu$}\hfill + \nobreak\hbox to\@pnumwidth{\hss #2}% + \fi\par + \penalty\@highpenalty \endgroup} + +\def\l@title#1#2{\addpenalty{-\@highpenalty} + \addvspace{8pt plus 1pt} + \@tempdima \z@ + \begingroup + \parindent \z@ \rightskip \@tocrmarg + \advance\rightskip by 0pt plus 2cm + \parfillskip -\rightskip \pretolerance=10000 + \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip + #1\nobreak + \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern + \@dotsep mu$}\hfill + \nobreak\hbox to\@pnumwidth{\hss #2}\par + \penalty\@highpenalty \endgroup} + +\def\l@author#1#2{\addpenalty{\@highpenalty} + \@tempdima=15\p@ %\z@ + \begingroup + \parindent \z@ \rightskip \@tocrmarg + \advance\rightskip by 0pt plus 2cm + \pretolerance=10000 + \leavevmode \advance\leftskip\@tempdima %\hskip -\leftskip + \textit{#1}\par + \penalty\@highpenalty \endgroup} + +\setcounter{tocdepth}{0} +\newdimen\tocchpnum +\newdimen\tocsecnum +\newdimen\tocsectotal +\newdimen\tocsubsecnum +\newdimen\tocsubsectotal +\newdimen\tocsubsubsecnum +\newdimen\tocsubsubsectotal +\newdimen\tocparanum +\newdimen\tocparatotal +\newdimen\tocsubparanum +\tocchpnum=\z@ % no chapter numbers +\tocsecnum=15\p@ % section 88. plus 2.222pt +\tocsubsecnum=23\p@ % subsection 88.8 plus 2.222pt +\tocsubsubsecnum=27\p@ % subsubsection 88.8.8 plus 1.444pt +\tocparanum=35\p@ % paragraph 88.8.8.8 plus 1.666pt +\tocsubparanum=43\p@ % subparagraph 88.8.8.8.8 plus 1.888pt +\def\calctocindent{% +\tocsectotal=\tocchpnum +\advance\tocsectotal by\tocsecnum +\tocsubsectotal=\tocsectotal +\advance\tocsubsectotal by\tocsubsecnum +\tocsubsubsectotal=\tocsubsectotal +\advance\tocsubsubsectotal by\tocsubsubsecnum +\tocparatotal=\tocsubsubsectotal +\advance\tocparatotal by\tocparanum} +\calctocindent + +\def\l@section{\@dottedtocline{1}{\tocchpnum}{\tocsecnum}} +\def\l@subsection{\@dottedtocline{2}{\tocsectotal}{\tocsubsecnum}} +\def\l@subsubsection{\@dottedtocline{3}{\tocsubsectotal}{\tocsubsubsecnum}} +\def\l@paragraph{\@dottedtocline{4}{\tocsubsubsectotal}{\tocparanum}} +\def\l@subparagraph{\@dottedtocline{5}{\tocparatotal}{\tocsubparanum}} + +\def\listoffigures{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn + \fi\section*{\listfigurename\@mkboth{{\listfigurename}}{{\listfigurename}}} + \@starttoc{lof}\if@restonecol\twocolumn\fi} +\def\l@figure{\@dottedtocline{1}{0em}{1.5em}} + +\def\listoftables{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn + \fi\section*{\listtablename\@mkboth{{\listtablename}}{{\listtablename}}} + \@starttoc{lot}\if@restonecol\twocolumn\fi} +\let\l@table\l@figure + +\renewcommand\listoffigures{% + \section*{\listfigurename + \@mkboth{\listfigurename}{\listfigurename}}% + \@starttoc{lof}% + } + +\renewcommand\listoftables{% + \section*{\listtablename + \@mkboth{\listtablename}{\listtablename}}% + \@starttoc{lot}% + } + +\ifx\oribibl\undefined +\ifx\citeauthoryear\undefined +\renewenvironment{thebibliography}[1] + {\section*{\refname} + \def\@biblabel##1{##1.} + \small + \list{\@biblabel{\@arabic\c@enumiv}}% + {\settowidth\labelwidth{\@biblabel{#1}}% + \leftmargin\labelwidth + \advance\leftmargin\labelsep + \if@openbib + \advance\leftmargin\bibindent + \itemindent -\bibindent + \listparindent \itemindent + \parsep \z@ + \fi + \usecounter{enumiv}% + \let\p@enumiv\@empty + \renewcommand\theenumiv{\@arabic\c@enumiv}}% + \if@openbib + \renewcommand\newblock{\par}% + \else + \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% + \fi + \sloppy\clubpenalty4000\widowpenalty4000% + \sfcode`\.=\@m} + {\def\@noitemerr + {\@latex@warning{Empty `thebibliography' environment}}% + \endlist} +\def\@lbibitem[#1]#2{\item[{[#1]}\hfill]\if@filesw + {\let\protect\noexpand\immediate + \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} +\newcount\@tempcntc +\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi + \@tempcnta\z@\@tempcntb\m@ne\def\@citea{}\@cite{\@for\@citeb:=#2\do + {\@ifundefined + {b@\@citeb}{\@citeo\@tempcntb\m@ne\@citea\def\@citea{,}{\bfseries + ?}\@warning + {Citation `\@citeb' on page \thepage \space undefined}}% + {\setbox\z@\hbox{\global\@tempcntc0\csname b@\@citeb\endcsname\relax}% + \ifnum\@tempcntc=\z@ \@citeo\@tempcntb\m@ne + \@citea\def\@citea{,}\hbox{\csname b@\@citeb\endcsname}% + \else + \advance\@tempcntb\@ne + \ifnum\@tempcntb=\@tempcntc + \else\advance\@tempcntb\m@ne\@citeo + \@tempcnta\@tempcntc\@tempcntb\@tempcntc\fi\fi}}\@citeo}{#1}} +\def\@citeo{\ifnum\@tempcnta>\@tempcntb\else + \@citea\def\@citea{,\,\hskip\z@skip}% + \ifnum\@tempcnta=\@tempcntb\the\@tempcnta\else + {\advance\@tempcnta\@ne\ifnum\@tempcnta=\@tempcntb \else + \def\@citea{--}\fi + \advance\@tempcnta\m@ne\the\@tempcnta\@citea\the\@tempcntb}\fi\fi} +\else +\renewenvironment{thebibliography}[1] + {\section*{\refname} + \small + \list{}% + {\settowidth\labelwidth{}% + \leftmargin\parindent + \itemindent=-\parindent + \labelsep=\z@ + \if@openbib + \advance\leftmargin\bibindent + \itemindent -\bibindent + \listparindent \itemindent + \parsep \z@ + \fi + \usecounter{enumiv}% + \let\p@enumiv\@empty + \renewcommand\theenumiv{}}% + \if@openbib + \renewcommand\newblock{\par}% + \else + \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% + \fi + \sloppy\clubpenalty4000\widowpenalty4000% + \sfcode`\.=\@m} + {\def\@noitemerr + {\@latex@warning{Empty `thebibliography' environment}}% + \endlist} + \def\@cite#1{#1}% + \def\@lbibitem[#1]#2{\item[]\if@filesw + {\def\protect##1{\string ##1\space}\immediate + \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} + \fi +\else +\@cons\@openbib@code{\noexpand\small} +\fi + +\def\idxquad{\hskip 10\p@}% space that divides entry from number + +\def\@idxitem{\par\hangindent 10\p@} + +\def\subitem{\par\setbox0=\hbox{--\enspace}% second order + \noindent\hangindent\wd0\box0}% index entry + +\def\subsubitem{\par\setbox0=\hbox{--\,--\enspace}% third + \noindent\hangindent\wd0\box0}% order index entry + +\def\indexspace{\par \vskip 10\p@ plus5\p@ minus3\p@\relax} + +\renewenvironment{theindex} + {\@mkboth{\indexname}{\indexname}% + \thispagestyle{empty}\parindent\z@ + \parskip\z@ \@plus .3\p@\relax + \let\item\par + \def\,{\relax\ifmmode\mskip\thinmuskip + \else\hskip0.2em\ignorespaces\fi}% + \normalfont\small + \begin{multicols}{2}[\@makeschapterhead{\indexname}]% + } + {\end{multicols}} + +\renewcommand\footnoterule{% + \kern-3\p@ + \hrule\@width 2truecm + \kern2.6\p@} + \newdimen\fnindent + \fnindent1em +\long\def\@makefntext#1{% + \parindent \fnindent% + \leftskip \fnindent% + \noindent + \llap{\hb@xt@1em{\hss\@makefnmark\ }}\ignorespaces#1} + +\long\def\@makecaption#1#2{% + \small + \vskip\abovecaptionskip + \sbox\@tempboxa{{\bfseries #1.} #2}% + \ifdim \wd\@tempboxa >\hsize + {\bfseries #1.} #2\par + \else + \global \@minipagefalse + \hb@xt@\hsize{\hfil\box\@tempboxa\hfil}% + \fi + \vskip\belowcaptionskip} + +\def\fps@figure{htbp} +\def\fnum@figure{\figurename\thinspace\thefigure} +\def \@floatboxreset {% + \reset@font + \small + \@setnobreak + \@setminipage +} +\def\fps@table{htbp} +\def\fnum@table{\tablename~\thetable} +\renewenvironment{table} + {\setlength\abovecaptionskip{0\p@}% + \setlength\belowcaptionskip{10\p@}% + \@float{table}} + {\end@float} +\renewenvironment{table*} + {\setlength\abovecaptionskip{0\p@}% + \setlength\belowcaptionskip{10\p@}% + \@dblfloat{table}} + {\end@dblfloat} + +\long\def\@caption#1[#2]#3{\par\addcontentsline{\csname + ext@#1\endcsname}{#1}{\protect\numberline{\csname + the#1\endcsname}{\ignorespaces #2}}\begingroup + \@parboxrestore + \@makecaption{\csname fnum@#1\endcsname}{\ignorespaces #3}\par + \endgroup} + +% LaTeX does not provide a command to enter the authors institute +% addresses. The \institute command is defined here. + +\newcounter{@inst} +\newcounter{@auth} +\newcounter{auco} +\newdimen\instindent +\newbox\authrun +\newtoks\authorrunning +\newtoks\tocauthor +\newbox\titrun +\newtoks\titlerunning +\newtoks\toctitle + +\def\clearheadinfo{\gdef\@author{No Author Given}% + \gdef\@title{No Title Given}% + \gdef\@subtitle{}% + \gdef\@institute{No Institute Given}% + \gdef\@thanks{}% + \global\titlerunning={}\global\authorrunning={}% + \global\toctitle={}\global\tocauthor={}} + +\def\institute#1{\gdef\@institute{#1}} + +\def\institutename{\par + \begingroup + \parskip=\z@ + \parindent=\z@ + \setcounter{@inst}{1}% + \def\and{\par\stepcounter{@inst}% + \noindent$^{\the@inst}$\enspace\ignorespaces}% + \setbox0=\vbox{\def\thanks##1{}\@institute}% + \ifnum\c@@inst=1\relax + \gdef\fnnstart{0}% + \else + \xdef\fnnstart{\c@@inst}% + \setcounter{@inst}{1}% + \noindent$^{\the@inst}$\enspace + \fi + \ignorespaces + \@institute\par + \endgroup} + +\def\@fnsymbol#1{\ensuremath{\ifcase#1\or\star\or{\star\star}\or + {\star\star\star}\or \dagger\or \ddagger\or + \mathchar "278\or \mathchar "27B\or \|\or **\or \dagger\dagger + \or \ddagger\ddagger \else\@ctrerr\fi}} + +\def\inst#1{\unskip$^{#1}$} +\def\fnmsep{\unskip$^,$} +\def\email#1{{\tt#1}} +\AtBeginDocument{\@ifundefined{url}{\def\url#1{#1}}{}% +\@ifpackageloaded{babel}{% +\@ifundefined{extrasenglish}{}{\addto\extrasenglish{\switcht@albion}}% +\@ifundefined{extrasfrenchb}{}{\addto\extrasfrenchb{\switcht@francais}}% +\@ifundefined{extrasgerman}{}{\addto\extrasgerman{\switcht@deutsch}}% +}{\switcht@@therlang}% +\providecommand{\keywords}[1]{\par\addvspace\baselineskip +\noindent\keywordname\enspace\ignorespaces#1}% +} +\def\homedir{\~{ }} + +\def\subtitle#1{\gdef\@subtitle{#1}} +\clearheadinfo +% +%%% to avoid hyperref warnings +\providecommand*{\toclevel@author}{999} +%%% to make title-entry parent of section-entries +\providecommand*{\toclevel@title}{0} +% +\renewcommand\maketitle{\newpage +\phantomsection + \refstepcounter{chapter}% + \stepcounter{section}% + \setcounter{section}{0}% + \setcounter{subsection}{0}% + \setcounter{figure}{0} + \setcounter{table}{0} + \setcounter{equation}{0} + \setcounter{footnote}{0}% + \begingroup + \parindent=\z@ + \renewcommand\thefootnote{\@fnsymbol\c@footnote}% + \if@twocolumn + \ifnum \col@number=\@ne + \@maketitle + \else + \twocolumn[\@maketitle]% + \fi + \else + \newpage + \global\@topnum\z@ % Prevents figures from going at top of page. + \@maketitle + \fi + \thispagestyle{empty}\@thanks +% + \def\\{\unskip\ \ignorespaces}\def\inst##1{\unskip{}}% + \def\thanks##1{\unskip{}}\def\fnmsep{\unskip}% + \instindent=\hsize + \advance\instindent by-\headlineindent + \if!\the\toctitle!\addcontentsline{toc}{title}{\@title}\else + \addcontentsline{toc}{title}{\the\toctitle}\fi + \if@runhead + \if!\the\titlerunning!\else + \edef\@title{\the\titlerunning}% + \fi + \global\setbox\titrun=\hbox{\small\rm\unboldmath\ignorespaces\@title}% + \ifdim\wd\titrun>\instindent + \typeout{Title too long for running head. Please supply}% + \typeout{a shorter form with \string\titlerunning\space prior to + \string\maketitle}% + \global\setbox\titrun=\hbox{\small\rm + Title Suppressed Due to Excessive Length}% + \fi + \xdef\@title{\copy\titrun}% + \fi +% + \if!\the\tocauthor!\relax + {\def\and{\noexpand\protect\noexpand\and}% + \protected@xdef\toc@uthor{\@author}}% + \else + \def\\{\noexpand\protect\noexpand\newline}% + \protected@xdef\scratch{\the\tocauthor}% + \protected@xdef\toc@uthor{\scratch}% + \fi + \addtocontents{toc}{\noexpand\protect\noexpand\authcount{\the\c@auco}}% + \addcontentsline{toc}{author}{\toc@uthor}% + \if@runhead + \if!\the\authorrunning! + \value{@inst}=\value{@auth}% + \setcounter{@auth}{1}% + \else + \edef\@author{\the\authorrunning}% + \fi + \global\setbox\authrun=\hbox{\small\unboldmath\@author\unskip}% + \ifdim\wd\authrun>\instindent + \typeout{Names of authors too long for running head. Please supply}% + \typeout{a shorter form with \string\authorrunning\space prior to + \string\maketitle}% + \global\setbox\authrun=\hbox{\small\rm + Authors Suppressed Due to Excessive Length}% + \fi + \xdef\@author{\copy\authrun}% + \markboth{\@author}{\@title}% + \fi + \endgroup + \setcounter{footnote}{\fnnstart}% + \clearheadinfo} +% +\def\@maketitle{\newpage + \markboth{}{}% + \def\lastand{\ifnum\value{@inst}=2\relax + \unskip{} \andname\ + \else + \unskip \lastandname\ + \fi}% + \def\and{\stepcounter{@auth}\relax + \ifnum\value{@auth}=\value{@inst}% + \lastand + \else + \unskip, + \fi}% + \begin{center}% + \let\newline\\ + {\Large \bfseries\boldmath + \pretolerance=10000 + \@title \par}\vskip .8cm +\if!\@subtitle!\else {\large \bfseries\boldmath + \vskip -.65cm + \pretolerance=10000 + \@subtitle \par}\vskip .8cm\fi + \setbox0=\vbox{\setcounter{@auth}{1}\def\and{\stepcounter{@auth}}% + \def\thanks##1{}\@author}% + \global\value{@inst}=\value{@auth}% + \global\value{auco}=\value{@auth}% + \setcounter{@auth}{1}% +{\lineskip .5em +\noindent\ignorespaces +\@author\vskip.35cm} + {\small\institutename} + \end{center}% + } + +% definition of the "\spnewtheorem" command. +% +% Usage: +% +% \spnewtheorem{env_nam}{caption}[within]{cap_font}{body_font} +% or \spnewtheorem{env_nam}[numbered_like]{caption}{cap_font}{body_font} +% or \spnewtheorem*{env_nam}{caption}{cap_font}{body_font} +% +% New is "cap_font" and "body_font". It stands for +% fontdefinition of the caption and the text itself. +% +% "\spnewtheorem*" gives a theorem without number. +% +% A defined spnewthoerem environment is used as described +% by Lamport. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\def\@thmcountersep{} +\def\@thmcounterend{.} + +\def\spnewtheorem{\@ifstar{\@sthm}{\@Sthm}} + +% definition of \spnewtheorem with number + +\def\@spnthm#1#2{% + \@ifnextchar[{\@spxnthm{#1}{#2}}{\@spynthm{#1}{#2}}} +\def\@Sthm#1{\@ifnextchar[{\@spothm{#1}}{\@spnthm{#1}}} + +\def\@spxnthm#1#2[#3]#4#5{\expandafter\@ifdefinable\csname #1\endcsname + {\@definecounter{#1}\@addtoreset{#1}{#3}% + \expandafter\xdef\csname the#1\endcsname{\expandafter\noexpand + \csname the#3\endcsname \noexpand\@thmcountersep \@thmcounter{#1}}% + \expandafter\xdef\csname #1name\endcsname{#2}% + \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% + \global\@namedef{end#1}{\@endtheorem}}} + +\def\@spynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname + {\@definecounter{#1}% + \expandafter\xdef\csname the#1\endcsname{\@thmcounter{#1}}% + \expandafter\xdef\csname #1name\endcsname{#2}% + \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#3}{#4}}% + \global\@namedef{end#1}{\@endtheorem}}} + +\def\@spothm#1[#2]#3#4#5{% + \@ifundefined{c@#2}{\@latexerr{No theorem environment `#2' defined}\@eha}% + {\expandafter\@ifdefinable\csname #1\endcsname + {\newaliascnt{#1}{#2}% + \expandafter\xdef\csname #1name\endcsname{#3}% + \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% + \global\@namedef{end#1}{\@endtheorem}}}} + +\def\@spthm#1#2#3#4{\topsep 7\p@ \@plus2\p@ \@minus4\p@ +\refstepcounter{#1}% +\@ifnextchar[{\@spythm{#1}{#2}{#3}{#4}}{\@spxthm{#1}{#2}{#3}{#4}}} + +\def\@spxthm#1#2#3#4{\@spbegintheorem{#2}{\csname the#1\endcsname}{#3}{#4}% + \ignorespaces} + +\def\@spythm#1#2#3#4[#5]{\@spopargbegintheorem{#2}{\csname + the#1\endcsname}{#5}{#3}{#4}\ignorespaces} + +\def\@spbegintheorem#1#2#3#4{\trivlist + \item[\hskip\labelsep{#3#1\ #2\@thmcounterend}]#4} + +\def\@spopargbegintheorem#1#2#3#4#5{\trivlist + \item[\hskip\labelsep{#4#1\ #2}]{#4(#3)\@thmcounterend\ }#5} + +% definition of \spnewtheorem* without number + +\def\@sthm#1#2{\@Ynthm{#1}{#2}} + +\def\@Ynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname + {\global\@namedef{#1}{\@Thm{\csname #1name\endcsname}{#3}{#4}}% + \expandafter\xdef\csname #1name\endcsname{#2}% + \global\@namedef{end#1}{\@endtheorem}}} + +\def\@Thm#1#2#3{\topsep 7\p@ \@plus2\p@ \@minus4\p@ +\@ifnextchar[{\@Ythm{#1}{#2}{#3}}{\@Xthm{#1}{#2}{#3}}} + +\def\@Xthm#1#2#3{\@Begintheorem{#1}{#2}{#3}\ignorespaces} + +\def\@Ythm#1#2#3[#4]{\@Opargbegintheorem{#1} + {#4}{#2}{#3}\ignorespaces} + +\def\@Begintheorem#1#2#3{#3\trivlist + \item[\hskip\labelsep{#2#1\@thmcounterend}]} + +\def\@Opargbegintheorem#1#2#3#4{#4\trivlist + \item[\hskip\labelsep{#3#1}]{#3(#2)\@thmcounterend\ }} + +\if@envcntsect + \def\@thmcountersep{.} + \spnewtheorem{theorem}{Theorem}[section]{\bfseries}{\itshape} +\else + \spnewtheorem{theorem}{Theorem}{\bfseries}{\itshape} + \if@envcntreset + \@addtoreset{theorem}{section} + \else + \@addtoreset{theorem}{chapter} + \fi +\fi + +%definition of divers theorem environments +\spnewtheorem*{claim}{Claim}{\itshape}{\rmfamily} +\spnewtheorem*{proof}{Proof}{\itshape}{\rmfamily} +\if@envcntsame % alle Umgebungen wie Theorem. + \def\spn@wtheorem#1#2#3#4{\@spothm{#1}[theorem]{#2}{#3}{#4}} +\else % alle Umgebungen mit eigenem Zaehler + \if@envcntsect % mit section numeriert + \def\spn@wtheorem#1#2#3#4{\@spxnthm{#1}{#2}[section]{#3}{#4}} + \else % nicht mit section numeriert + \if@envcntreset + \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} + \@addtoreset{#1}{section}} + \else + \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} + \@addtoreset{#1}{chapter}}% + \fi + \fi +\fi +\spn@wtheorem{case}{Case}{\itshape}{\rmfamily} +\spn@wtheorem{conjecture}{Conjecture}{\itshape}{\rmfamily} +\spn@wtheorem{corollary}{Corollary}{\bfseries}{\itshape} +\spn@wtheorem{definition}{Definition}{\bfseries}{\itshape} +\spn@wtheorem{example}{Example}{\itshape}{\rmfamily} +\spn@wtheorem{exercise}{Exercise}{\itshape}{\rmfamily} +\spn@wtheorem{lemma}{Lemma}{\bfseries}{\itshape} +\spn@wtheorem{note}{Note}{\itshape}{\rmfamily} +\spn@wtheorem{problem}{Problem}{\itshape}{\rmfamily} +\spn@wtheorem{property}{Property}{\itshape}{\rmfamily} +\spn@wtheorem{proposition}{Proposition}{\bfseries}{\itshape} +\spn@wtheorem{question}{Question}{\itshape}{\rmfamily} +\spn@wtheorem{solution}{Solution}{\itshape}{\rmfamily} +\spn@wtheorem{remark}{Remark}{\itshape}{\rmfamily} + +\def\@takefromreset#1#2{% + \def\@tempa{#1}% + \let\@tempd\@elt + \def\@elt##1{% + \def\@tempb{##1}% + \ifx\@tempa\@tempb\else + \@addtoreset{##1}{#2}% + \fi}% + \expandafter\expandafter\let\expandafter\@tempc\csname cl@#2\endcsname + \expandafter\def\csname cl@#2\endcsname{}% + \@tempc + \let\@elt\@tempd} + +\def\theopargself{\def\@spopargbegintheorem##1##2##3##4##5{\trivlist + \item[\hskip\labelsep{##4##1\ ##2}]{##4##3\@thmcounterend\ }##5} + \def\@Opargbegintheorem##1##2##3##4{##4\trivlist + \item[\hskip\labelsep{##3##1}]{##3##2\@thmcounterend\ }} + } + +\renewenvironment{abstract}{% + \list{}{\advance\topsep by0.35cm\relax\small + \leftmargin=1cm + \labelwidth=\z@ + \listparindent=\z@ + \itemindent\listparindent + \rightmargin\leftmargin}\item[\hskip\labelsep + \bfseries\abstractname]} + {\endlist} + +\newdimen\headlineindent % dimension for space between +\headlineindent=1.166cm % number and text of headings. + +\def\ps@headings{\let\@mkboth\@gobbletwo + \let\@oddfoot\@empty\let\@evenfoot\@empty + \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% + \leftmark\hfil} + \def\@oddhead{\normalfont\small\hfil\rightmark\hspace{\headlineindent}% + \llap{\thepage}} + \def\chaptermark##1{}% + \def\sectionmark##1{}% + \def\subsectionmark##1{}} + +\def\ps@titlepage{\let\@mkboth\@gobbletwo + \let\@oddfoot\@empty\let\@evenfoot\@empty + \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% + \hfil} + \def\@oddhead{\normalfont\small\hfil\hspace{\headlineindent}% + \llap{\thepage}} + \def\chaptermark##1{}% + \def\sectionmark##1{}% + \def\subsectionmark##1{}} + +\if@runhead\ps@headings\else +\ps@empty\fi + +\setlength\arraycolsep{1.4\p@} +\setlength\tabcolsep{1.4\p@} + +\endinput +%end of file llncs.cls diff --git b/doc/manual.pdf a/doc/manual.pdf new file mode 100644 index 0000000..2e04e62 --- /dev/null +++ a/doc/manual.pdf diff --git b/doc/manual.tex a/doc/manual.tex new file mode 100644 index 0000000..f58c861 --- /dev/null +++ a/doc/manual.tex @@ -0,0 +1,111 @@ +\documentclass[runningheads,a4paper]{llncs} + +\setcounter{tocdepth}{3} +\usepackage[OT4]{fontenc} +\usepackage{graphicx} +\usepackage[utf8]{inputenc} +%\usepackage[polish]{babel} + +\usepackage{url} + +\newcommand{\comment}[2]{\noindent{\textbf{\sffamily(\marginpar{\sffamily\footnotesize #1}#2)}}} +\newcommand{\kg}[1]{\comment{KG}{#1}} + + +\setlength{\parindent}{0pt} +\setlength{\parskip}{1ex plus 0.5ex minus 0.2ex} + +\begin{document} + +\mainmatter + +\title{Scoreference Manual} +\subtitle{\today} + +\author{Mateusz Kopeć} + +\institute{Institute of Computer Science, Polish Academy of Sciences \\ \url{m.kopec@ipipan.waw.pl}} + +\maketitle + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section*{About} + +The current version of the program facilitates the automatic evaluation of mention detection and coreference resolution, given an automatically and manually annotated corpus. Mention detection is evaluated by precision and recall. Coreference resolution is scored by 5 metrics: MUC, B$^3$, CEAFE, CEAFM, BLANC. Details about the scores calculation are in section \ref{details}. + +\textbf{Homepage:} \url{http://zil.ipipan.waw.pl/MentionDetector} \\ +\textbf{Contact person:} Mateusz Kopeć [mateusz.kopec@ipipan.waw.pl] \\ +\textbf{Author:} Mateusz Kopeć \\ +\textbf{License:} CC BY v.3 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Requirements} +Java Runtime Environment (JRE) 1.7 or newer. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Input data format} + +Gold and system texts must be stored in the same format, either TEI or MMAX. Details about TEI or MMAX format used may be found in the Polish Coreference Corpus description\footnote{Available at \url{http://zil.ipipan.waw.pl/PolishCoreferenceCorpus}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Output data format} + +Results are printed to standard output. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Usage} + +Standalone jar doesn't need any installation. To run it, simply execute:\\ + +\texttt{java -jar scoreference-1.0-SNAPSHOT.one-jar.jar <dir with gold texts> <dir for system texts> <type>}\\ + +\texttt{<dir with system texts>} is the directory with the corpus annotated automatically with coreference, \texttt{<dir with gold texts>} is the gold standard version of the same data. \texttt{<type>} should be either ``mmax'' or ``tei'', which indicates in what format the corpora are stored. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Score details}\label{details} + +Scoring performed by \emph{Scoreference} is mostly in line with the SemEval \cite{Marquez2012} approach: we have chosen to evaluate separately mention detection and coreference resolution, but also allow for end-to-end system evaluation. + +\subsection{Mention detection measures} +We evaluate mention detection using precision, recall and F-measure. +Different than during SemEval, we have decided not to reward partial matches, but to present instead two alternative mention detection scores: +\begin{itemize} + \item score of exact boundary matches (there is a match when automatic and manual mention have exactly the same boundaries) (EXACT), + \item score of head matches (we reduce system and manual mentions to their single head tokens and compare them) (HEAD). +\end{itemize} + +\subsection{Coreference resolution measures} +As there is still no consensus about the single best coreference resolution measure, our evaluation tool provides results for 5 widely known measures: MUC\cite{muc}, $B^3$\cite{b3}, mention- and entity-based CEAF \cite{ceaf}(called CEAFM and CEAFE, respectively) and BLANC \cite{blanc}. + +As these measures assume that system and gold mentions are the same, we implemented two alternatives to make that happen for systems not using gold mentions: +\begin{itemize} + \item consider only correct system mentions (i.e. the intersection between gold and system mentions) (INTERSECT), + \item transform system and gold mentions as in \cite{Marquez2012}, following a procedure described below (TRANSFORM). +\end{itemize} +TRANSFORM procedure of dealing with so-called "twinless" mentions (not in the intersection of system and gold mention sets) was presented in \cite{Marquez2012} and uses the following steps: +\begin{enumerate} + \item insert twinless true mentions into the response partition as singletons, + \item remove twinless system mentions that are resolved as singletons, + \item insert twinless system mentions that are resolved as coreferent into the key partition (as singletons). +\end{enumerate} +This approach was also used in CoNLL-2011 shared task \cite{Pradhan2011}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\bibliographystyle{plain} +\bibliography{references} + +\end{document} + diff --git b/doc/references.bib a/doc/references.bib new file mode 100755 index 0000000..c2ce66a --- /dev/null +++ a/doc/references.bib @@ -0,0 +1,128 @@ +@book{ + prz:etal:11:ed, + editor = "Przepiórkowski, Adam and Bańko, Mirosław and Górski, Rafał L. and Lewandowska-Tomaszczyk, Barbara", + publisher = "Wydawnictwo Naukowe PWN", + title = "Narodowy Korpus Języka Polskiego", + booktitle = "Narodowy Korpus Języka Polskiego", + year = "2011", + address = "Warsaw" +} + +@InProceedings{ban:prz:10, + author = "Piotr Bański and Adam Przepiórkowski", + title = "The {TEI} and the {NCP}: the model and its + application", + crossref = "lrec:lrslm:10"} + +@Proceedings{lrec:lrslm:10, + key = "LREC:LRSLM", + title = "{LREC}\,2010 Workshop on Language Resources: From + Storyboard to Sustainability and {LR} Lifecycle + Management", + booktitle = "{LREC}\,2010 Workshop on Language Resources: From + Storyboard to Sustainability and {LR} Lifecycle + Management", + address = "Valletta, Malta", + organization = "ELRA", + year = 2010 + } + + @inproceedings{Pradhan2011, + author = {Pradhan, Sameer and Ramshaw, Lance and Marcus, Mitchell and Palmer, Martha and Weischedel, Ralph and Xue, Nianwen}, + title = {CoNLL-2011 Shared Task: Modeling Unrestricted Coreference in OntoNotes}, + booktitle = {Proceedings of the Fifteenth Conference on Computational Natural Language Learning: Shared Task}, + series = {CONLL Shared Task '11}, + year = {2011}, + isbn = {9781937284084}, + location = {Portland, Oregon}, + pages = {1--27}, + numpages = {27}, + url = {http://dl.acm.org/citation.cfm?id=2132936.2132937}, + acmid = {2132937}, + publisher = {Association for Computational Linguistics}, + address = {Stroudsburg, PA, USA}, +} + + +@inproceedings{muc, + author = {Vilain, Marc and Burger, John and Aberdeen, John and Connolly, Dennis and Hirschman, Lynette}, + title = {A model-theoretic coreference scoring scheme}, + booktitle = {Proceedings of the 6th conference on Message understanding}, + series = {MUC6 '95}, + year = {1995}, + isbn = {1-55860-402-2}, + location = {Columbia, Maryland}, + pages = {45--52}, + numpages = {8}, + url = {http://dx.doi.org/10.3115/1072399.1072405}, + doi = {http://dx.doi.org/10.3115/1072399.1072405}, + acmid = {1072405}, + publisher = {Association for Computational Linguistics}, + address = {Stroudsburg, PA, USA}, +} + +@ARTICLE{Marquez2012, + author = {M\`{a}rquez, Llu\'{\i}s and Recasens, Marta and Sapena, Emili}, + title = {Coreference resolution: an empirical study based on {SemEval}-2010 + shared Task 1}, + journal = {Language Resources and Evaluation}, + year = {2012}, + pages = {1--34}, + month = jul, + abstract = {This paper presents an empirical evaluation of coreference resolution + that covers several interrelated dimensions. The main goal is to + complete the comparative analysis from the {SemEval}-2010 task on + Coreference Resolution in Multiple Languages . To do so, the study + restricts the number of languages and systems involved, but extends + and deepens the analysis of the system outputs, including a more + qualitative discussion. The paper compares three automatic coreference + resolution systems for three languages (English, Catalan and Spanish) + in four evaluation settings, and using four evaluation measures. + Given that our main goal is not to provide a comparison between resolution + algorithms, these are merely used as tools to shed light on the different + conditions under which coreference resolution is evaluated. Although + the dimensions are strongly interdependent, making it very difficult + to extract general principles, the study reveals a series of interesting + issues in relation to coreference resolution: the portability of + systems across languages, the influence of the type and quality of + input annotations, and the behavior of the scoring measures.}, + citeulike-article-id = {10887108}, + citeulike-linkout-0 = {http://dx.doi.org/10.1007/s10579-012-9194-z}, + citeulike-linkout-1 = {http://www.springerlink.com/content/006123u1h7812455}, + day = {13}, + doi = {10.1007/s10579-012-9194-z}, + issn = {1574-020X}, + owner = {me2}, + posted-at = {2012-07-16 11:35:31}, + publisher = {Springer Netherlands}, + timestamp = {2013.11.18}, + url = {http://dx.doi.org/10.1007/s10579-012-9194-z} +} + + + +@inproceedings{b3, + author = {Amit Bagga and Breck Baldwin}, + title = {Algorithms for Scoring Coreference Chains}, + booktitle = {In The First International Conference on Language Resources and Evaluation Workshop on Linguistics Coreference}, + year = {1998}, + pages = {563--566} +} + +@Article{ceaf, + author = "Xiaoqiang Luo", + title = "{On Coreference Resolution Performance Metrics}", + booktitle = {Proceedings of HLT-EMNLP}, + year = {2005}, + pages = {25--32}, + address = "Vancouver, Canada", +} + +@article{blanc, + author = {Marta Recasens and E. Hovy}, + title = {BLANC: Implementing the Rand index for coreference evaluation}, + booktitle = {Natural Language Engineering, 17}, + pages = {485--510}, + doi = {10.1017/S135132491000029X}, + year = 2011, +} \ No newline at end of file diff --git b/pom.xml a/pom.xml new file mode 100755 index 0000000..f3fc106 --- /dev/null +++ a/pom.xml @@ -0,0 +1,81 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>pl.waw.ipipan.zil.core</groupId> + <artifactId>scoreference</artifactId> + <version>1.0-SNAPSHOT</version> + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + </properties> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.0</version> + <configuration> + <source>1.7</source> + <target>1.7</target> + </configuration> + </plugin> + <plugin> + <groupId>org.dstovall</groupId> + <artifactId>onejar-maven-plugin</artifactId> + <version>1.4.4</version> + <executions> + <execution> + <configuration> + <mainClass>pl.waw.ipipan.zil.core.scoreference.main.Main</mainClass> + </configuration> + <goals> + <goal>one-jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.9</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + <version>1.2.16</version> + </dependency> + <dependency> + <groupId>javax.xml.stream</groupId> + <artifactId>stax-api</artifactId> + <version>1.0-2</version> + </dependency> + <dependency> + <groupId>ipipan</groupId> + <artifactId>teiapi</artifactId> + <version>1.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-io</artifactId> + <version>1.3.2</version> + </dependency> + </dependencies> + + <repositories> + <repository> + <id>zil-maven-repo</id> + <name>ZIL maven repository</name> + <url>http://maven.nlp.ipipan.waw.pl/content/repositories/snapshots/</url> + </repository> + </repositories> + + <pluginRepositories> + <pluginRepository> + <id>onejar-maven-plugin.googlecode.com</id> + <url>http://onejar-maven-plugin.googlecode.com/svn/mavenrepo</url> + </pluginRepository> + </pluginRepositories> +</project> \ No newline at end of file diff --git b/src/main/java/log4j.properties a/src/main/java/log4j.properties new file mode 100755 index 0000000..381fc38 --- /dev/null +++ a/src/main/java/log4j.properties @@ -0,0 +1,8 @@ +# konfiguracja root logger (logger) +log4j.appender.stderr=org.apache.log4j.ConsoleAppender +log4j.appender.stderr.layout=org.apache.log4j.PatternLayout +#log4j.appender.stderr.layout.ConversionPattern=[%t] %-5p %c - %m%n +log4j.appender.stderr.layout.ConversionPattern=[%p] [%C{1}] %m%n + +log4j.logger.pl.waw.ipipan.zil.core.scoreference=INFO, stderr +log4j.logger.ipipan.clarin=ERROR, stderr \ No newline at end of file diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/AnnotationPair.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/AnnotationPair.java new file mode 100755 index 0000000..ea864f4 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/AnnotationPair.java @@ -0,0 +1,17 @@ +package pl.waw.ipipan.zil.core.scoreference.basic; + +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public interface AnnotationPair { + + void addPair(SingleTextAnnotation goldText, SingleTextAnnotation sysText, boolean transform); + + int getTextCount(); + + SingleTextAnnotation getGold(int i); + + SingleTextAnnotation getSys(int i); + + Result getMentionDetectionResult(boolean zeroOnly); + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/AnnotationPairImpl.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/AnnotationPairImpl.java new file mode 100755 index 0000000..9a9a4d2 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/AnnotationPairImpl.java @@ -0,0 +1,117 @@ +package pl.waw.ipipan.zil.core.scoreference.basic; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; + +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class AnnotationPairImpl implements AnnotationPair { + + private final List<SingleTextAnnotation> golds = new ArrayList<>(); + private final List<SingleTextAnnotation> syss = new ArrayList<>(); + private int totalTruePositives; + private int totalSysMentions; + private int totalGoldMentions; + + private int zeroTruePositives; + private int zeroSysMentions; + private int zeroGoldMentions; + + @Override + public void addPair(SingleTextAnnotation goldText, SingleTextAnnotation sysText, boolean transform) { + + evaluateMD(goldText, sysText); + + // ensure that annotations have the same mentions + if (transform) { + // 1. insert twinless true mentions into the response partition as + // singletons + for (Mention m : goldText.getMentions()) { + if (!sysText.getMentions().contains(m)) { + sysText.addSingletons(m.getId()); + } + } + + for (Mention m : sysText.getMentions()) { + if (!goldText.getMentions().contains(m)) { + if (m.getMentionGroup().getMentions().size() == 1) { + // 2. remove twinless system mentions that are resolved + // as singletons + sysText.removeMentions(m.getId()); + } else { + // 3. insert twinless system mentions that are resolved + // as coreferent into the key partition (as singletons) + goldText.addSingletons(m.getId()); + } + } + } + + } else { + // simple intersection + Collection<Mention> goldMentions = goldText.getMentions(); + goldMentions.retainAll(sysText.getMentions()); + goldText.retainMentions(goldMentions); + sysText.retainMentions(goldMentions); + } + + this.golds.add(goldText); + this.syss.add(sysText); + } + + private void evaluateMD(SingleTextAnnotation goldText, SingleTextAnnotation sysText) { + Collection<Mention> goldMentions = goldText.getMentions(); + Collection<Mention> sysMentions = sysText.getMentions(); + totalGoldMentions += goldMentions.size(); + totalSysMentions += sysMentions.size(); + Collection<Mention> goldMentionsCopy = new HashSet<>(goldMentions); + goldMentionsCopy.retainAll(sysMentions); + totalTruePositives += goldMentionsCopy.size(); + + Collection<Mention> goldZeroMentions = goldText.getZeroMentions(); + Collection<Mention> sysZeroMentions = sysText.getZeroMentions(); + zeroGoldMentions += goldZeroMentions.size(); + zeroSysMentions += sysZeroMentions.size(); + Collection<Mention> goldZeroMentionsCopy = new HashSet<>(goldZeroMentions); + goldZeroMentionsCopy.retainAll(sysZeroMentions); + zeroTruePositives += goldZeroMentionsCopy.size(); + } + + @Override + public int getTextCount() { + return golds.size(); + } + + @Override + public Result getMentionDetectionResult(boolean zeroOnly) { + if (zeroOnly) { + System.out.println("Total gold mentions (zero subject only): " + zeroGoldMentions); + System.out.println("Total sys mentions (zero subject only): " + zeroSysMentions); + System.out.println("Total common mentions (zero subject only): " + zeroTruePositives); + return new Result(zeroTruePositives, zeroSysMentions, zeroTruePositives, zeroGoldMentions); + } else { + System.out.println("Total gold mentions: " + totalGoldMentions); + System.out.println("Total sys mentions: " + totalSysMentions); + System.out.println("Total common mentions: " + totalTruePositives); + return new Result(totalTruePositives, totalSysMentions, totalTruePositives, totalGoldMentions); + } + } + + @Override + public SingleTextAnnotation getGold(int i) { + if (i < golds.size()) + return golds.get(i); + else + return null; + } + + @Override + public SingleTextAnnotation getSys(int i) { + if (i < syss.size()) + return syss.get(i); + else + return null; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/Mention.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/Mention.java new file mode 100755 index 0000000..e4f4c8a --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/Mention.java @@ -0,0 +1,53 @@ +package pl.waw.ipipan.zil.core.scoreference.basic; + +public class Mention { + + private MentionGroup mentionGroup = null; + private String id; + + public Mention(String i) { + id = i; + } + + public MentionGroup getMentionGroup() { + return mentionGroup; + } + + public void setMentionGroup(MentionGroup mentionGroup) { + this.mentionGroup = mentionGroup; + } + + public String getId() { + return id; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Mention other = (Mention) obj; + if (id == null) { + if (other.id != null) + return false; + } else if (!id.equals(other.id)) + return false; + return true; + } + + public String toString() { + return id; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/MentionGroup.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/MentionGroup.java new file mode 100755 index 0000000..c89da87 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/MentionGroup.java @@ -0,0 +1,32 @@ +package pl.waw.ipipan.zil.core.scoreference.basic; + +import java.util.HashSet; +import java.util.Set; + +public class MentionGroup implements Comparable<MentionGroup> { + + private Set<Mention> mentions = new HashSet<Mention>(); + + public Set<Mention> getMentions() { + return mentions; + } + + public void addMention(Mention m) { + mentions.add(m); + m.setMentionGroup(this); + } + + public String toString() { + return mentions.toString(); + } + + public void removeMention(Mention m) { + this.mentions.remove(m); + m.setMentionGroup(null); + } + + public int compareTo(MentionGroup o) { + return this.toString().compareTo(o.toString()); + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/SingleTextAnnotation.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/SingleTextAnnotation.java new file mode 100755 index 0000000..71f45fb --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/SingleTextAnnotation.java @@ -0,0 +1,22 @@ +package pl.waw.ipipan.zil.core.scoreference.basic; + +import java.util.Collection; + +public interface SingleTextAnnotation { + + public Collection<Mention> getMentions(); + + public Mention getMention(Mention m); + + public Collection<MentionGroup> getMentionGroups(); + + public void retainMentions(Collection<Mention> mentions); + + public String getFilename(); + + public void addSingletons(String... ids); + + public void removeMentions(String... id); + + public Collection<Mention> getZeroMentions(); +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/SingleTextAnnotationImpl.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/SingleTextAnnotationImpl.java new file mode 100755 index 0000000..fae1695 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/basic/SingleTextAnnotationImpl.java @@ -0,0 +1,134 @@ +package pl.waw.ipipan.zil.core.scoreference.basic; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +public class SingleTextAnnotationImpl implements SingleTextAnnotation { + + Set<MentionGroup> mentionGroups = new HashSet<>(); + Map<String, Mention> mentions = new HashMap<>(); + private String filename = null; + private Set<String> zeroSubjectIds = new HashSet<>(); + + public SingleTextAnnotationImpl() { + } + + public SingleTextAnnotationImpl(String filename) { + this.filename = filename; + } + + public String toString() { + ArrayList<MentionGroup> mgs = new ArrayList<MentionGroup>(mentionGroups); + Iterator<MentionGroup> it = mgs.iterator(); + while (it.hasNext()) { + if (it.next().getMentions().size() == 1) + it.remove(); + } + + Collections.sort(mgs); + return mgs.toString(); + } + + public Collection<Mention> getMentions() { + return new HashSet<Mention>(mentions.values()); + } + + public Mention getMention(Mention m) { + return mentions.get(m.getId()); + } + + public void addMentionGroup(String... ids) { + MentionGroup mg = new MentionGroup(); + for (String i : ids) { + Mention m = new Mention(i); + mg.addMention(m); + + mentions.put(i, m); + } + mentionGroups.add(mg); + } + + public void addSingletons(String... ids) { + for (String i : ids) { + MentionGroup mg = new MentionGroup(); + Mention m = new Mention(i); + mg.addMention(m); + + mentions.put(i, m); + mentionGroups.add(mg); + } + } + + private String[] intArrayToStringArray(int[] intArray) { + String[] stringArray = new String[intArray.length]; + for (int i = 0; i < intArray.length; i++) { + stringArray[i] = Integer.toString(intArray[i]); + } + return stringArray; + } + + public void addMentionGroup(int... ids) { + addMentionGroup(intArrayToStringArray(ids)); + } + + public void addSingletons(int... ids) { + addSingletons(intArrayToStringArray(ids)); + } + + public Collection<MentionGroup> getMentionGroups() { + return new HashSet<MentionGroup>(mentionGroups); + } + + public void retainMentions(Collection<Mention> mentions) { + Iterator<Entry<String, Mention>> iterator = this.mentions.entrySet().iterator(); + while (iterator.hasNext()) { + Entry<String, Mention> next = iterator.next(); + Mention m = next.getValue(); + if (!mentions.contains(m)) { + iterator.remove(); + MentionGroup mg = m.getMentionGroup(); + mg.removeMention(m); + if (mg.getMentions().size() == 0) + mentionGroups.remove(mg); + } + } + } + + public String getFilename() { + return filename; + } + + @Override + public void removeMentions(String... ids) { + for (String id : ids) { + Mention mention = this.mentions.get(id); + + MentionGroup mg = mention.getMentionGroup(); + mg.removeMention(mention); + if (mg.getMentions().size() == 0) + this.mentionGroups.remove(mg); + + this.mentions.remove(id); + } + } + + public void setZeroSubjectIds(Set<String> zeroSubjectIds) { + this.zeroSubjectIds = zeroSubjectIds; + } + + @Override + public Collection<Mention> getZeroMentions() { + Set<Mention> result = new HashSet<>(); + for (Mention m : mentions.values()) + if (zeroSubjectIds.contains(m.getId())) + result.add(m); + return result; + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/EvaluateSeveralSystemsLatex.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/EvaluateSeveralSystemsLatex.java new file mode 100755 index 0000000..e7d5eb0 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/EvaluateSeveralSystemsLatex.java @@ -0,0 +1,147 @@ +package pl.waw.ipipan.zil.core.scoreference.main; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.log4j.Logger; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPairImpl; +import pl.waw.ipipan.zil.core.scoreference.readers.Mmax; +import pl.waw.ipipan.zil.core.scoreference.readers.Reader; +import pl.waw.ipipan.zil.core.scoreference.readers.Tei; +import pl.waw.ipipan.zil.core.scoreference.scorers.Scorer; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerB3; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerBlanc; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerCeafe; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerCeafm; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerMuc; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class EvaluateSeveralSystemsLatex { + + private static final Logger logger = Logger.getLogger(EvaluateSeveralSystemsLatex.class); + + /** + * @param args + */ + public static void main(String[] args) { + + if (args.length != 3) { + logger.error("Wrong number of arguments! Try: " + EvaluateSeveralSystemsLatex.class.getSimpleName() + + " goldDir sysDirs format"); + return; + } + + File gold = new File(args[0]); + File sys = new File(args[1]); + String format = args[2]; + + if (!gold.isDirectory()) { + logger.error("Gold data directory (" + gold + ") not found"); + return; + } + if (!sys.isDirectory()) { + logger.error("Sys data directory (" + sys + ") not found"); + return; + } + if (!format.equalsIgnoreCase("mmax") && !format.equalsIgnoreCase("tei")) { + logger.error("Wrong format! Try \"mmax\" or \"tei\"."); + return; + } + + Reader reader = format.equalsIgnoreCase("mmax") ? new Mmax() : new Tei(); + + printStats(false, false, gold, sys, reader); + printStats(true, false, gold, sys, reader); + printStats(false, true, gold, sys, reader); + printStats(true, true, gold, sys, reader); + } + + private static void printStats(boolean headsOnly, boolean transform, File gold, File sys, Reader reader) { + + List<File> systemDirs = new ArrayList<>(Arrays.asList(sys.listFiles())); + + List<Scorer> scorers = new ArrayList<>(); + scorers.add(new ScorerMuc()); + scorers.add(new ScorerB3()); + scorers.add(new ScorerCeafm()); + scorers.add(new ScorerCeafe()); + scorers.add(new ScorerBlanc()); + + Map<String, String> scorerClass2name = new LinkedHashMap<>(); + scorerClass2name.put("ScorerMuc", "MUC"); + scorerClass2name.put("ScorerB3", "$B^3$"); + scorerClass2name.put("ScorerCeafm", "CEAF-M"); + scorerClass2name.put("ScorerCeafe", "CEAF-E"); + scorerClass2name.put("ScorerBlanc", "BLANC"); + + Map<String, Map<String, Map<String, Double>>> system2scorer2results = new HashMap<>(); + for (File dir : systemDirs) { + String systemName = dir.getName(); + system2scorer2results.put(systemName, new HashMap<String, Map<String, Double>>()); + + AnnotationPair ann = new AnnotationPairImpl(); + reader.loadAnnotationsFromDirs(gold, dir, ann, headsOnly, transform); + + logger.info(ann.getTextCount() + " text pairs loaded."); + for (Scorer scorer : scorers) { + Result result = scorer.compare(ann); + Map<String, Double> resultMap = new LinkedHashMap<>(); + resultMap.put("Precision", result.getPrecision()); + resultMap.put("Recall", result.getRecall()); + resultMap.put("$F_1$\t", result.getF1()); + system2scorer2results.get(systemName).put(scorerClass2name.get(scorer.getName()), resultMap); + } + } + + System.out.println("Mention matching: " + (headsOnly ? "HEAD" : "EXACT")); + System.out.println("Mention mapping: " + (transform ? "TRANSFORM" : "INTERSECT")); + System.out.println(); + + List<String> measureNames = new ArrayList<>(system2scorer2results.values().iterator().next().values() + .iterator().next().keySet()); + List<String> systemNames = new ArrayList<>(system2scorer2results.keySet()); + Collections.sort(systemNames); + + for (String scorerName : scorerClass2name.values()) { + System.out.println("\\midrule"); + + boolean first = true; + for (String measure : measureNames) { + if (first) { + System.out.print("\\multirow{3}{*}{" + scorerName + "}\t& "); + first = false; + } else { + System.out.print("\t\t\t\t\t\t& "); + } + System.out.print(measure + "\t"); + + double max = 0; + for (String systemName : systemNames) { + Double score = system2scorer2results.get(systemName).get(scorerName).get(measure); + if (score > max) + max = score; + } + for (String systemName : systemNames) { + System.out.print("& "); + Double score = system2scorer2results.get(systemName).get(scorerName).get(measure); + String result = String.format("%.2f", score * 100) + "\\%"; + if (score == max) + result = "\\textbf{" + result + "}\t"; + else + result = result + "\t\t\t"; + System.out.print(result); + } + System.out.println("\\\\"); + } + System.out.println(); + } + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/KAlpha.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/KAlpha.java new file mode 100755 index 0000000..3047334 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/KAlpha.java @@ -0,0 +1,183 @@ +package pl.waw.ipipan.zil.core.scoreference.main; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.log4j.Logger; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPairImpl; +import pl.waw.ipipan.zil.core.scoreference.basic.Mention; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.readers.Mmax; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class KAlpha { + + private static final Logger logger = Logger.getLogger(KAlpha.class); + + /** + * @param args + */ + public static void main(String[] args) { + + if (args.length < 2) { + logger.error("Wrong number of arguments! Try: " + KAlpha.class.getSimpleName() + " goldDir sysDir [map]"); + return; + } + + File gold = new File(args[0]); + File sys = new File(args[1]); + File mapFile = args.length > 2 ? new File(args[2]) : null; + AnnotationPair ann = new AnnotationPairImpl(); + + (new Mmax()).loadAnnotationsFromDirs(gold, sys, ann, false, false); + logger.info(ann.getTextCount() + " pairs of texts found"); + + Result md = ann.getMentionDetectionResult(false); + System.out.println("\nMention detection result:"); + System.out.println(md); + + System.out.println("\nK Alpha results:"); + Map<String, String> map = mapFile == null ? null : loadMap(mapFile); + Map<String, Double> kalphaBlanc = KAlpha.computeKappaBlanc(ann, map); + List<Entry<String, Double>> entries = new ArrayList<>(kalphaBlanc.entrySet()); + Collections.sort(entries, new Comparator<Entry<String, Double>>() { + + @Override + public int compare(Entry<String, Double> o1, Entry<String, Double> o2) { + return o1.getValue().compareTo(o2.getValue()); + } + }); + for (Entry<String, Double> e : entries) + System.out.println(String.format("%6.3f", e.getValue()) + "\t" + e.getKey().replaceAll("\"", "")); + + } + + private static Map<String, String> loadMap(File mapFile) { + if (!mapFile.exists()) + return null; + + Map<String, String> result = new HashMap<>(); + + try (BufferedReader br = new BufferedReader(new FileReader(mapFile))) { + String line = null; + while ((line = br.readLine()) != null) { + String[] spl = line.split(" ", 2); + result.put(spl[0], spl[1]); + } + + } catch (IOException e) { + logger.error("Error loading map file: " + e); + } + + return result; + } + + public static Map<String, Double> computeKappaBlanc(AnnotationPair goldAndSys, Map<String, String> map) { + + Map<String, Double> kappaSums = new HashMap<>(); + Map<String, Double> textCounts = new HashMap<>(); + + for (int i = 0; i < goldAndSys.getTextCount(); i++) { + + int[] table = computeTable(goldAndSys.getGold(i), goldAndSys.getSys(i)); + + double kappa = computeAlphaBlanc4Text(table); + + String filename = goldAndSys.getGold(i).getFilename(); + String idx = filename.substring(filename.lastIndexOf("/") + 1, filename.lastIndexOf("_")); + String type = "only type"; + if (map != null) + type = map.get(idx); + + if (kappaSums.containsKey(type)) + kappaSums.put(type, kappaSums.get(type) + kappa); + else + kappaSums.put(type, kappa); + + if (textCounts.containsKey(type)) + textCounts.put(type, textCounts.get(type) + 1); + else + textCounts.put(type, 1.0); + } + + for (String type : kappaSums.keySet()) { + double sum = kappaSums.get(type); + double count = textCounts.get(type); + sum /= count; + + kappaSums.put(type, sum); + } + + return kappaSums; + } + + private static double computeAlphaBlanc4Text(int[] table) { + double totalRc = table[0]; + double totalWc = table[1]; + double totalRn = table[2]; + double totalWn = table[3]; + + double sum = (totalRc + totalRn + totalWc + totalWn); + + double agrObs = (totalRc + totalRn) / sum; + double agrExp = (totalRc + totalWc) / sum * (totalRc + totalWn) / sum + (totalRn + totalWc) / sum + * (totalRn + totalWn) / sum; + double kappa = (agrObs - agrExp) / (1 - agrExp); + + return kappa; + } + + public static int[] computeTable(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + int rc = 0; + int wc = 0; + int rn = 0; + int wn = 0; + + List<Mention> mentions = new ArrayList<Mention>(gold.getMentions()); + int mentionsCount = mentions.size(); + + for (int i = 0; i < mentionsCount - 1; i++) { + Mention m1 = mentions.get(i); + for (int j = i + 1; j < mentionsCount; j++) { + Mention m2 = mentions.get(j); + + boolean goldCoreferent = m1.getMentionGroup().getMentions().contains(m2); + boolean sysCoreferent = sys.getMention(m1).getMentionGroup().getMentions().contains(sys.getMention(m2)); + + if (goldCoreferent) { + if (sysCoreferent) { + rc++; // right coreference link + } else { + wn++; // wrong non-coreference link + } + } else { + if (sysCoreferent) { + wc++; // wrong coreference link + } else { + rn++; // right non-coreference link + } + } + } + } + + int[] result = new int[4]; + result[0] = rc; + result[1] = wc; + result[2] = rn; + result[3] = wn; + + return result; + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/Main.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/Main.java new file mode 100755 index 0000000..3794767 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/main/Main.java @@ -0,0 +1,99 @@ +package pl.waw.ipipan.zil.core.scoreference.main; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPairImpl; +import pl.waw.ipipan.zil.core.scoreference.readers.Mmax; +import pl.waw.ipipan.zil.core.scoreference.readers.Reader; +import pl.waw.ipipan.zil.core.scoreference.readers.Tei; +import pl.waw.ipipan.zil.core.scoreference.scorers.Scorer; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerB3; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerBlanc; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerCeafe; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerCeafm; +import pl.waw.ipipan.zil.core.scoreference.scorers.ScorerMuc; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class Main { + + private static final Logger logger = Logger.getLogger(Main.class); + + /** + * @param args + */ + public static void main(String[] args) { + + if (args.length != 3) { + logger.error("Wrong number of arguments! Try: " + Main.class.getSimpleName() + " goldDir sysDir format"); + return; + } + + File gold = new File(args[0]); + File sys = new File(args[1]); + String format = args[2]; + + if (!gold.isDirectory()) { + logger.error("Gold data directory (" + gold + ") not found"); + return; + } + if (!sys.isDirectory()) { + logger.error("Sys data directory (" + sys + ") not found"); + return; + } + if (!format.equalsIgnoreCase("mmax") && !format.equalsIgnoreCase("tei")) { + logger.error("Wrong format! Try \"mmax\" or \"tei\"."); + return; + } + + Reader reader = format.equalsIgnoreCase("mmax") ? new Mmax() : new Tei(); + + printStats(false, false, gold, sys, reader); + printStats(true, false, gold, sys, reader); + printStats(false, true, gold, sys, reader); + printStats(true, true, gold, sys, reader); + } + + private static void printStats(boolean headsOnly, boolean transform, File gold, File sys, Reader reader) { + AnnotationPair ann = new AnnotationPairImpl(); + reader.loadAnnotationsFromDirs(gold, sys, ann, headsOnly, transform); + + String sepLine = "###########################################"; + + System.out.println(sepLine); + System.out.println(sepLine); + System.out.println("Mention matching: " + (headsOnly ? "HEAD" : "EXACT")); + System.out.println("Mention mapping: " + (transform ? "TRANSFORM" : "INTERSECT")); + System.out.println(sepLine); + + System.out.println(ann.getTextCount() + " pairs of texts found"); + + System.out.println(sepLine); + Result md = ann.getMentionDetectionResult(false); + System.out.println("Mention detection result:"); + System.out.println(md); + System.out.println(sepLine); + + Result mdz = ann.getMentionDetectionResult(true); + System.out.println("Mention detection result zero only:"); + System.out.println(mdz); + System.out.println(sepLine); + + List<Scorer> scorers = new ArrayList<>(); + scorers.add(new ScorerMuc()); + scorers.add(new ScorerB3()); + scorers.add(new ScorerCeafm()); + scorers.add(new ScorerCeafe()); + scorers.add(new ScorerBlanc()); + + for (Scorer scorer : scorers) { + Result result = scorer.compare(ann); + System.out.println(scorer.getName() + ":"); + System.out.println(result); + } + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Mmax.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Mmax.java new file mode 100755 index 0000000..f0522bf --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Mmax.java @@ -0,0 +1,193 @@ +package pl.waw.ipipan.zil.core.scoreference.readers; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.xml.namespace.QName; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamReader; + +import org.apache.log4j.Logger; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class Mmax implements Reader { + + private static final XMLInputFactory factory = XMLInputFactory.newInstance(); + private static final Logger logger = Logger.getLogger(Mmax.class); + + public void loadAnnotationsFromDirs(File goldDir, File sysDir, AnnotationPair ann, boolean headsOnly, + boolean transform) { + + List<File> children = new ArrayList<File>(Arrays.asList(goldDir.listFiles())); + Collections.sort(children); + + for (File goldFile : children) { + File sysFile = new File(sysDir.getPath() + File.separator + goldFile.getName()); + if (sysFile.exists()) { + if (goldFile.isDirectory()) + loadAnnotationsFromDirs(goldFile, sysFile, ann, headsOnly, transform); + else if (goldFile.getName().matches("\\d+_mentions\\.xml")) + loadAnnotationsFrom2Files(goldFile, sysFile, ann, headsOnly, transform); + } else { + logger.warn("No pair text for file:" + goldFile); + } + } + } + + public void loadAnnotationsFrom2Files(File goldFile, File sysFile, AnnotationPair ann, boolean headsOnly, + boolean transform) { + + File wordsFile = new File(goldFile.getParentFile() + File.separator + + goldFile.getName().replace("_mentions", "_words")); + + Map<String, Integer> id2start = new HashMap<>(); + Map<String, Integer> id2end = new HashMap<>(); + Map<String, String> id2orth = new HashMap<>(); + List<String> ids = new ArrayList<>(); + extractMappings(id2start, id2end, id2orth, ids, wordsFile); + + SingleTextAnnotation goldAnno = getAnnotation(goldFile, id2start, id2end, ids, id2orth, headsOnly); + SingleTextAnnotation sysAnno = getAnnotation(sysFile, id2start, id2end, ids, id2orth, headsOnly); + + ann.addPair(goldAnno, sysAnno, transform); + } + + private void extractMappings(Map<String, Integer> id2start, Map<String, Integer> id2end, + Map<String, String> id2orth, List<String> ids, File wordsFile) { + XMLStreamReader sr = null; + try (InputStreamReader isr = new InputStreamReader(new FileInputStream(wordsFile), "UTF-8");) { + sr = factory.createXMLStreamReader(isr); + + int currpos = 0; + String text = null; + String id = null; + while (sr.hasNext()) { + sr.next(); + if (sr.isStartElement()) { + if (sr.getName().equals(new QName("word"))) { + id = sr.getAttributeValue(null, "id"); + } + } else if (sr.isCharacters()) { + text = sr.getText(); + } else if (sr.isEndElement()) { + if (sr.getName().equals(new QName("word"))) { + id2start.put(id, currpos); + id2end.put(id, currpos + text.length() - 1); + id2orth.put(id, text); + currpos += text.length(); + ids.add(id); + } + } + } + } catch (Exception e) { + logger.error("Error reading words file:" + e); + } finally { + try { + if (sr != null) + sr.close(); + } catch (Exception e) { + logger.error("Error closing words file:" + e); + } + } + } + + private static SingleTextAnnotation getAnnotation(File file, Map<String, Integer> id2start, + Map<String, Integer> id2end, List<String> ids, Map<String, String> id2orth, boolean headsOnly) { + + Map<String, Set<String>> groups = new HashMap<String, Set<String>>(); + Set<String> singletons = new HashSet<String>(); + + XMLStreamReader sr = null; + try (InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8")) { + + sr = factory.createXMLStreamReader(isr); + + while (sr.hasNext()) { + sr.next(); + if (sr.isStartElement()) { + if (sr.getName().equals(new QName("www.eml.org/NameSpaces/mention", "markable"))) { + String span = sr.getAttributeValue(null, "span"); + String head = sr.getAttributeValue(null, "mention_head"); + String mg = sr.getAttributeValue(null, "mention_group"); + + String id = ""; + + List<String> childIds = new ArrayList<>(); + for (String s : span.split(",")) { + String[] spl = s.split("\\.\\."); + String first = spl[0]; + String last = spl[spl.length - 1]; + + if (headsOnly) { + int start = ids.indexOf(first); + int end = ids.indexOf(last); + childIds.addAll(ids.subList(start, end + 1)); + } else { + id += id2start.get(first) + ".." + id2end.get(last) + "#"; + } + } + if (headsOnly) { + List<String> childOrths = new ArrayList<>(); + for (String chId : childIds) { + String orth = id2orth.get(chId); + childOrths.add(orth); + if (orth.equalsIgnoreCase(head)) { + id = chId; + break; + } + } + if (!childIds.contains(id)) { + logger.warn("Ignorning mention without head with id = " + + sr.getAttributeValue(null, "id") + " in file: " + file); + continue; + } + + } + + if (mg.equals("empty")) + singletons.add(id); + else { + if (!groups.containsKey(mg)) + groups.put(mg, new HashSet<String>()); + groups.get(mg).add(id); + } + } + } + } + } catch (Exception e) { + logger.error("Error reading mentions file:" + e); + } finally { + try { + if (sr != null) + sr.close(); + } catch (Exception e) { + logger.error("Error closing mentions file:" + e); + } + } + + // get rid of any mention added as singleton AND as in group + for (Set<String> mentionGroup : groups.values()) { + singletons.removeAll(mentionGroup); + } + + SingleTextAnnotationImpl anno = new SingleTextAnnotationImpl(file.getPath()); + for (String sing : singletons) + anno.addSingletons(sing); + for (Set<String> set : groups.values()) + anno.addMentionGroup(set.toArray(new String[0])); + + return anno; + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Reader.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Reader.java new file mode 100755 index 0000000..1360e4f --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Reader.java @@ -0,0 +1,12 @@ +package pl.waw.ipipan.zil.core.scoreference.readers; + +import java.io.File; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; + +public interface Reader { + + public void loadAnnotationsFromDirs(File goldDir, File sysDir, AnnotationPair ann, boolean headsOnly, + boolean transform); + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Tei.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Tei.java new file mode 100755 index 0000000..1bce93e --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/readers/Tei.java @@ -0,0 +1,163 @@ +package pl.waw.ipipan.zil.core.scoreference.readers; + +import ipipan.clarin.tei.api.entities.TEICoreference; +import ipipan.clarin.tei.api.entities.TEICorpusText; +import ipipan.clarin.tei.api.entities.TEIMention; +import ipipan.clarin.tei.api.entities.TEIMorph; +import ipipan.clarin.tei.api.entities.TEIParagraph; +import ipipan.clarin.tei.api.entities.TEISegment; +import ipipan.clarin.tei.api.entities.TEISentence; +import ipipan.clarin.tei.api.exceptions.TEIException; +import ipipan.clarin.tei.api.io.TEI_IO; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.log4j.Logger; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class Tei implements Reader { + + private static final Logger logger = Logger.getLogger(Tei.class); + + public void loadAnnotationsFromDirs(File goldDir, File sysDir, AnnotationPair ann, boolean headsOnly, + boolean transform) { + + List<File> children = new ArrayList<File>(Arrays.asList(goldDir.listFiles())); + Collections.sort(children); + + for (File goldFile : children) { + File sysFile = new File(sysDir.getPath() + File.separator + goldFile.getName()); + if (sysFile.exists()) { + if (goldFile.isDirectory()) + loadAnnotationsFromDirs(goldFile, sysFile, ann, headsOnly, transform); + else if (goldFile.getName().matches("ann_coreference.xml(\\.gz)?")) + loadAnnotationsFrom2Files(goldFile.getParentFile(), sysFile.getParentFile(), ann, headsOnly, + transform); + } else if (goldFile.getName().matches("ann_coreference.xml(\\.gz)?")) { + logger.warn("No sys anno found for gold anno: " + goldFile); + } + } + } + + private static void loadAnnotationsFrom2Files(File goldDir, File sysDir, AnnotationPair ann, boolean headsOnly, + boolean transform) { + try { + SingleTextAnnotation goldAnno = getAnnotation(goldDir, headsOnly); + SingleTextAnnotation sysAnno = getAnnotation(sysDir, headsOnly); + ann.addPair(goldAnno, sysAnno, transform); + } catch (TEIException e) { + logger.error(e.getLocalizedMessage()); + } + + } + + private static SingleTextAnnotation getAnnotation(File dir, boolean headsOnly) throws TEIException { + + TEI_IO tei = TEI_IO.getInstance(); + TEICorpusText text = tei.readFromNKJPDirectory(dir); + + SingleTextAnnotationImpl anno = new SingleTextAnnotationImpl(dir.getPath()); + Set<String> zeroSubjectIds = new HashSet<>(); + + Map<String, Integer> parOffsets = getParOffsets(text); + + // distinct spans check + int skipped = 0; + Set<String> distinctSpans = new HashSet<>(); + for (TEISentence s : text.getAllSentences()) + for (TEIMention m : s.getAllMentions()) { + if (headsOnly) { + m.getMorphs().clear(); + m.getMorphs().addAll(m.getHeadMorphs()); + } + String span = getMentionId(m, parOffsets); + if (distinctSpans.contains(span)) { + logger.debug("Duplicate mention " + m.getId() + " in text " + dir.getName() + + ". A mention with exact same borders exists! Will be skipped for evaluation."); + skipped++; + } else { + distinctSpans.add(span); + } + } + + Set<String> alreadyAddedSpans = new HashSet<>(); + + // nonsingletons + int nonSingletons = 0; + for (TEICoreference cor : text.getAllCoreferences()) { + if (!cor.getType().equals("ident")) + continue; + + Set<String> ids = new HashSet<>(); + for (TEIMention m : cor.getMentions()) { + String span = getMentionId(m, parOffsets); + if (!alreadyAddedSpans.contains(span)) { + alreadyAddedSpans.add(span); + ids.add(span); + if (m.isZeroSubject()) + zeroSubjectIds.add(span); + nonSingletons++; + } + } + if (ids.size() > 0) + anno.addMentionGroup(ids.toArray(new String[0])); + } + + // singletons + int singletons = 0; + for (TEISentence s : text.getAllSentences()) + for (TEIMention m : s.getAllMentions()) { + String span = getMentionId(m, parOffsets); + if (!alreadyAddedSpans.contains(span)) { + alreadyAddedSpans.add(span); + anno.addSingletons(span); + singletons++; + if (m.isZeroSubject()) + zeroSubjectIds.add(span); + } + } + + anno.setZeroSubjectIds(zeroSubjectIds); + + logger.debug(nonSingletons + " non-singletons, " + singletons + " singletons, " + skipped + + " skipped duplicates in text " + dir.getName()); + + return anno; + } + + private static Map<String, Integer> getParOffsets(TEICorpusText text) { + Map<String, Integer> parOffsets = new HashMap<>(); + int parOffset = 0; + for (TEIParagraph par : text.getParagraphs()) { + parOffsets.put(par.getId(), parOffset); + for (TEIMorph m : par.getMorphs()) { + parOffset += m.getCorrespSegment().getLength() + 1; + if (m.hasNps()) + parOffset--; + } + } + return parOffsets; + } + + private static String getMentionId(TEIMention m, Map<String, Integer> parOffsets) { + StringBuffer id = new StringBuffer(); + for (TEIMorph morph : m.getMorphs()) { + TEISegment seg = morph.getCorrespSegment(); + int curparOffset = parOffsets.get(seg.getParagraph().getId()); + String morphId = (seg.getOffset() + curparOffset) + ":" + seg.getLength(); + id.append(morphId + "#"); + } + return id.toString(); + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/Scorer.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/Scorer.java new file mode 100755 index 0000000..4ea1b7b --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/Scorer.java @@ -0,0 +1,33 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public abstract class Scorer { + + public Result compare(AnnotationPair goldAndSys) { + Result totalResult = new Result(); + + for (int i = 0; i < goldAndSys.getTextCount(); i++) { + try { + + Result textResult = compare(goldAndSys.getGold(i), goldAndSys.getSys(i)); + + totalResult.add(textResult); + + } catch (Exception ex) { + System.out.println(this.getClass().getSimpleName() + " Error scoring text: " + + goldAndSys.getGold(i).getFilename()); + } + } + + return totalResult; + } + + public abstract Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys); + + public String getName() { + return this.getClass().getSimpleName(); + } +} \ No newline at end of file diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerB3.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerB3.java new file mode 100755 index 0000000..6485f4d --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerB3.java @@ -0,0 +1,33 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import java.util.HashSet; +import java.util.Set; + +import pl.waw.ipipan.zil.core.scoreference.basic.Mention; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class ScorerB3 extends Scorer { + + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + int mentionCount = 0; + double recallNominator = 0; + double precisionNominator = 0; + + for (Mention m : gold.getMentions()) { + mentionCount++; + Set<Mention> coreferentsGold = m.getMentionGroup().getMentions(); + Set<Mention> coreferentsSys = sys.getMention(m).getMentionGroup().getMentions(); + + Set<Mention> intersection = new HashSet<Mention>(coreferentsGold); + intersection.retainAll(coreferentsSys); + + recallNominator += 1.0 * intersection.size() / coreferentsGold.size(); + precisionNominator += 1.0 * intersection.size() / coreferentsSys.size(); + } + + return new Result(precisionNominator, mentionCount, recallNominator, mentionCount); + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerBlanc.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerBlanc.java new file mode 100755 index 0000000..26920fd --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerBlanc.java @@ -0,0 +1,150 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import java.util.ArrayList; +import java.util.List; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.Mention; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class ScorerBlanc extends Scorer { + + public Result compare(AnnotationPair goldAndSys) { + Result totalCoref = new Result(); + Result totalNonCoref = new Result(); + + for (int i = 0; i < goldAndSys.getTextCount(); i++) { + int[] table = computeTable(goldAndSys.getGold(i), goldAndSys.getSys(i)); + int rc = table[0]; + int wc = table[1]; + int rn = table[2]; + int wn = table[3]; + + Result cr = new Result(rc, rc + wc, rc, rc + wn); + Result nr = new Result(rn, rn + wn, rn, rn + wc); + + totalCoref.add(cr); + totalNonCoref.add(nr); + } + + return computeResultFromTwo(totalCoref, totalNonCoref); + } + + private Result computeResultFromTwo(Result totalCoref, Result totalNonCoref) { + + double avgP = (totalCoref.getPrecision() + totalNonCoref.getPrecision()) / 2; + double avgR = (totalCoref.getRecall() + totalNonCoref.getRecall()) / 2; + double avgF1 = (totalCoref.getF1() + totalNonCoref.getF1()) / 2; + + return new Result(avgP, avgR, avgF1); + } + + public int[] computeTable(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + int rc = 0; + int wc = 0; + int rn = 0; + int wn = 0; + + List<Mention> mentions = new ArrayList<Mention>(gold.getMentions()); + int mentionsCount = mentions.size(); + + for (int i = 0; i < mentionsCount - 1; i++) { + Mention m1 = mentions.get(i); + for (int j = i + 1; j < mentionsCount; j++) { + Mention m2 = mentions.get(j); + + boolean goldCoreferent = m1.getMentionGroup().getMentions().contains(m2); + boolean sysCoreferent = sys.getMention(m1).getMentionGroup().getMentions().contains(sys.getMention(m2)); + + if (goldCoreferent) { + if (sysCoreferent) { + rc++; // right coreference link + } else { + wn++; // wrong non-coreference link + } + } else { + if (sysCoreferent) { + wc++; // wrong coreference link + } else { + rn++; // right non-coreference link + } + } + } + } + + int[] result = new int[4]; + result[0] = rc; + result[1] = wc; + result[2] = rn; + result[3] = wn; + + return result; + } + + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + int rc = 0; + int wc = 0; + int rn = 0; + int wn = 0; + + List<Mention> mentions = new ArrayList<Mention>(gold.getMentions()); + int mentionsCount = mentions.size(); + + for (int i = 0; i < mentionsCount - 1; i++) { + Mention m1 = mentions.get(i); + for (int j = i + 1; j < mentionsCount; j++) { + Mention m2 = mentions.get(j); + + boolean goldCoreferent = m1.getMentionGroup().getMentions().contains(m2); + boolean sysCoreferent = sys.getMention(m1).getMentionGroup().getMentions().contains(sys.getMention(m2)); + + if (goldCoreferent) { + if (sysCoreferent) { + rc++; // right coreference link + } else { + wn++; // wrong non-coreference link + } + } else { + if (sysCoreferent) { + wc++; // wrong coreference link + } else { + rn++; // right non-coreference link + } + } + } + } + + Result cr = new Result(rc, rc + wc, rc, rc + wn); + Result nr = new Result(rn, rn + wn, rn, rn + wc); + + // handle border cases + + if (rc + wc == 0 || rc + wn == 0) + cr = new Result(0.0, 0.0, 0.0); + + if (rn + wn == 0 || rn + wc == 0) + nr = new Result(0.0, 0.0, 0.0); + + // perfect annotation + if (wc + wn == 0) + return new Result(1.0, 1.0, 1.0); + + // anti-perfect annotation + if (rc + rn == 0) + return new Result(0.0, 0.0, 0.0); + + // no coreference in gold + if (rc + wn == 0) + return nr; + + // only coreference in gold + if (rn + wc == 0) + return cr; + + return computeResultFromTwo(cr, nr); + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeaf.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeaf.java new file mode 100755 index 0000000..d227892 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeaf.java @@ -0,0 +1,73 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import pl.waw.ipipan.zil.core.scoreference.basic.MentionGroup; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.HungarianAlgorithm; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public abstract class ScorerCeaf extends Scorer { + + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + Map<MentionGroup, MentionGroup> mapping = getOptimalMapping(gold, sys); + + double recallDenominator = 0; + for (MentionGroup mg : gold.getMentionGroups()) + recallDenominator += getSimilarity(mg, mg); + + double precisionDenominator = 0; + for (MentionGroup mg : sys.getMentionGroups()) + precisionDenominator += getSimilarity(mg, mg); + + double nominator = 0; + for (MentionGroup mg : gold.getMentionGroups()) + if (mapping.containsKey(mg)) + nominator += getSimilarity(mg, mapping.get(mg)); + + return new Result(nominator, precisionDenominator, nominator, recallDenominator); + } + + private Map<MentionGroup, MentionGroup> getOptimalMapping(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + List<MentionGroup> mgsGold = new ArrayList<MentionGroup>(gold.getMentionGroups()); + List<MentionGroup> mgsSys = new ArrayList<MentionGroup>(sys.getMentionGroups()); + + boolean transposed = mgsGold.size() > mgsSys.size(); + double[][] array = new double[mgsGold.size()][mgsSys.size()]; + int i = 0; + for (MentionGroup mgGold : mgsGold) { + int j = 0; + for (MentionGroup mgSys : mgsSys) { + + array[i][j] = getSimilarity(mgGold, mgSys); + j++; + } + i++; + } + if (transposed) { + array = HungarianAlgorithm.transpose(array); + } + + int[][] assignment = new int[array.length][2]; + assignment = HungarianAlgorithm.hgAlgorithm(array, "max"); + + // decode the assignment + Map<MentionGroup, MentionGroup> mapping = new HashMap<MentionGroup, MentionGroup>(); + for (int n = 0; n < assignment.length; n++) { + if (transposed) + mapping.put(mgsGold.get(assignment[n][1]), mgsSys.get(assignment[n][0])); + else + mapping.put(mgsGold.get(assignment[n][0]), mgsSys.get(assignment[n][1])); + } + + return mapping; + } + + protected abstract double getSimilarity(MentionGroup mg, MentionGroup mg2); + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafe.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafe.java new file mode 100755 index 0000000..436285f --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafe.java @@ -0,0 +1,17 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import pl.waw.ipipan.zil.core.scoreference.basic.Mention; +import pl.waw.ipipan.zil.core.scoreference.basic.MentionGroup; + +public class ScorerCeafe extends ScorerCeaf { + + protected double getSimilarity(MentionGroup mg, MentionGroup mg2) { + int nominator = 0; + for (Mention m : mg.getMentions()) { + if (mg2.getMentions().contains(m)) + nominator++; + } + return 2.0 * nominator / (mg.getMentions().size() + mg2.getMentions().size()); + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafm.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafm.java new file mode 100755 index 0000000..344e25e --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafm.java @@ -0,0 +1,17 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import pl.waw.ipipan.zil.core.scoreference.basic.Mention; +import pl.waw.ipipan.zil.core.scoreference.basic.MentionGroup; + +public class ScorerCeafm extends ScorerCeaf { + + protected double getSimilarity(MentionGroup mg, MentionGroup mg2) { + int nominator = 0; + for (Mention m : mg.getMentions()) { + if (mg2.getMentions().contains(m)) + nominator++; + } + return nominator; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerF1.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerF1.java new file mode 100755 index 0000000..76ae498 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerF1.java @@ -0,0 +1,14 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class ScorerF1 extends Scorer { + + @Override + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + // TODO Auto-generated method stub + return null; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerH.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerH.java new file mode 100755 index 0000000..940778c --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerH.java @@ -0,0 +1,14 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class ScorerH extends Scorer { + + @Override + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + // TODO Auto-generated method stub + return null; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerMuc.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerMuc.java new file mode 100755 index 0000000..0f779ca --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerMuc.java @@ -0,0 +1,75 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import java.util.HashSet; +import java.util.Set; + +import pl.waw.ipipan.zil.core.scoreference.basic.Mention; +import pl.waw.ipipan.zil.core.scoreference.basic.MentionGroup; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class ScorerMuc extends Scorer { + + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + Pair recall = calculatePorR(gold, sys); + Pair precision = calculatePorR(sys, gold); + + return new Result(precision.getNominator(), precision.getDenominator(), recall.getNominator(), + recall.getDenominator()); + } + + private Pair calculatePorR(SingleTextAnnotation gold, SingleTextAnnotation sys) { + + int nominator = 0; + int denominator = 0; + + for (MentionGroup mg : gold.getMentionGroups()) { + int mgSize = mg.getMentions().size(); + nominator += mgSize - getNumberOfGroupsForMentions(mg.getMentions(), sys); + denominator += mgSize - 1; + } + + return new Pair(nominator, denominator); + } + + /** + * Calculates the number of different mention groups, to which given + * mentions belong regarding given annotation. + * + * @param mentions + * @param annotation + * @return + */ + private int getNumberOfGroupsForMentions(Set<Mention> mentions, SingleTextAnnotation annotation) { + Set<MentionGroup> groups = new HashSet<MentionGroup>(); + for (Mention m : mentions) { + Mention correspMention = annotation.getMention(m); + if (correspMention == null) + System.out.println("Problem with mention " + m.getId()); + + groups.add(correspMention.getMentionGroup()); + } + + return groups.size(); + } + + private class Pair { + + private double denominator; + private double nominator; + + public Pair(double nominator, double denominator) { + this.nominator = nominator; + this.denominator = denominator; + } + + public double getDenominator() { + return denominator; + } + + public double getNominator() { + return nominator; + } + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerRand.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerRand.java new file mode 100755 index 0000000..54fd79e --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerRand.java @@ -0,0 +1,14 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +public class ScorerRand extends Scorer { + + @Override + public Result compare(SingleTextAnnotation gold, SingleTextAnnotation sys) { + // TODO Auto-generated method stub + return null; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/HungarianAlgorithm.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/HungarianAlgorithm.java new file mode 100755 index 0000000..01a7e6d --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/HungarianAlgorithm.java @@ -0,0 +1,584 @@ +package pl.waw.ipipan.zil.core.scoreference.utils; + +/* + * Created on Apr 25, 2005 + * + * Munkres-Kuhn (Hungarian) Algorithm Clean Version: 0.11 + * + * Konstantinos A. Nedas + * Department of Spatial Information Science & Engineering + * University of Maine, Orono, ME 04469-5711, USA + * kostas@spatial.maine.edu + * http://www.spatial.maine.edu/~kostas + * + * This Java class implements the Hungarian algorithm [a.k.a Munkres' algorithm, + * a.k.a. Kuhn algorithm, a.k.a. Assignment problem, a.k.a. Marriage problem, + * a.k.a. Maximum Weighted Maximum Cardinality Bipartite Matching]. + * + * [It can be used as a method call from within any main (or other function).] + * It takes 2 arguments: + * a. A 2-D array (could be rectangular or square). + * b. A string ("min" or "max") specifying whether you want the min or max assignment. + * [It returns an assignment matrix[array.length][2] that contains the row and col of + * the elements (in the original inputted array) that make up the optimum assignment.] + * + * [This version contains only scarce comments. If you want to understand the + * inner workings of the algorithm, get the tutorial version of the algorithm + * from the same website you got this one (http://www.spatial.maine.edu/~kostas/dev/soft/munkres.htm)] + * + * Any comments, corrections, or additions would be much appreciated. + * Credit due to professor Bob Pilgrim for providing an online copy of the + * pseudocode for this algorithm (http://216.249.163.93/bob.pilgrim/445/munkres.html) + * + * Feel free to redistribute this source code, as long as this header--with + * the exception of sections in brackets--remains as part of the file. + * + * Requirements: JDK 1.5.0_01 or better. + * [Created in Eclipse 3.1M6 (www.eclipse.org).] + * + */ + +import static java.lang.Math.floor; +import static java.lang.Math.round; + +import java.util.Random; +import java.util.Scanner; + +public class HungarianAlgorithm { + + // ********************************// + // METHODS FOR CONSOLE INPUT-OUTPUT// + // ********************************// + + public static int readInput(String prompt) // Reads input,returns double. + { + Scanner in = new Scanner(System.in); + System.out.print(prompt); + int input = in.nextInt(); + in.close(); + return input; + } + + public static void printTime(double time) // Formats time output. + { + String timeElapsed = ""; + int days = (int) floor(time) / (24 * 3600); + int hours = (int) floor(time % (24 * 3600)) / (3600); + int minutes = (int) floor((time % 3600) / 60); + int seconds = (int) round(time % 60); + + if (days > 0) + timeElapsed = Integer.toString(days) + "d:"; + if (hours > 0) + timeElapsed = timeElapsed + Integer.toString(hours) + "h:"; + if (minutes > 0) + timeElapsed = timeElapsed + Integer.toString(minutes) + "m:"; + + timeElapsed = timeElapsed + Integer.toString(seconds) + "s"; + System.out.print("\nTotal time required: " + timeElapsed + "\n\n"); + } + + // *******************************************// + // METHODS THAT PERFORM ARRAY-PROCESSING TASKS// + // *******************************************// + + public static void generateRandomArray // Generates random 2-D array. + (double[][] array, String randomMethod) { + Random generator = new Random(); + for (int i = 0; i < array.length; i++) { + for (int j = 0; j < array[i].length; j++) { + if (randomMethod.equals("random")) { + array[i][j] = generator.nextDouble(); + } + if (randomMethod.equals("gaussian")) { + array[i][j] = generator.nextGaussian() / 4; // range length + // to 1. + if (array[i][j] > 0.5) { + array[i][j] = 0.5; + } // eliminate outliers. + if (array[i][j] < -0.5) { + array[i][j] = -0.5; + } // eliminate outliers. + array[i][j] = array[i][j] + 0.5; // make elements positive. + } + } + } + } + + public static double findLargest // Finds the largest element in a positive + // array. + (double[][] array) + // works for arrays where all values are >= 0. + { + double largest = 0; + for (int i = 0; i < array.length; i++) { + for (int j = 0; j < array[i].length; j++) { + if (array[i][j] > largest) { + largest = array[i][j]; + } + } + } + + return largest; + } + + public static double[][] transpose // Transposes a double[][] array. + (double[][] array) { + double[][] transposedArray = new double[array[0].length][array.length]; + for (int i = 0; i < transposedArray.length; i++) { + for (int j = 0; j < transposedArray[i].length; j++) { + transposedArray[i][j] = array[j][i]; + } + } + return transposedArray; + } + + public static double[][] copyOf // Copies all elements of an array to a new + // array. + (double[][] original) { + double[][] copy = new double[original.length][original[0].length]; + for (int i = 0; i < original.length; i++) { + // Need to do it this way, otherwise it copies only memory location + System.arraycopy(original[i], 0, copy[i], 0, original[i].length); + } + + return copy; + } + + // **********************************// + // METHODS OF THE HUNGARIAN ALGORITHM// + // **********************************// + + public static int[][] hgAlgorithm(double[][] array, String sumType) { + double[][] cost = copyOf(array); // Create the cost matrix + + if (sumType.equalsIgnoreCase("max")) // Then array is weight array. Must + // change to cost. + { + double maxWeight = findLargest(cost); + for (int i = 0; i < cost.length; i++) // Generate cost by + // subtracting. + { + for (int j = 0; j < cost[i].length; j++) { + cost[i][j] = (maxWeight - cost[i][j]); + } + } + } + double maxCost = findLargest(cost); // Find largest cost matrix element + // (needed for step 6). + + int[][] mask = new int[cost.length][cost[0].length]; // The mask array. + int[] rowCover = new int[cost.length]; // The row covering vector. + int[] colCover = new int[cost[0].length]; // The column covering vector. + int[] zero_RC = new int[2]; // Position of last zero from Step 4. + int step = 1; + boolean done = false; + while (done == false) // main execution loop + { + switch (step) { + case 1: + step = hg_step1(step, cost); + break; + case 2: + step = hg_step2(step, cost, mask, rowCover, colCover); + break; + case 3: + step = hg_step3(step, mask, colCover); + break; + case 4: + step = hg_step4(step, cost, mask, rowCover, colCover, zero_RC); + break; + case 5: + step = hg_step5(step, mask, rowCover, colCover, zero_RC); + break; + case 6: + step = hg_step6(step, cost, rowCover, colCover, maxCost); + break; + case 7: + done = true; + break; + } + }// end while + + int[][] assignment = new int[array.length][2]; // Create the returned + // array. + for (int i = 0; i < mask.length; i++) { + for (int j = 0; j < mask[i].length; j++) { + if (mask[i][j] == 1) { + assignment[i][0] = i; + assignment[i][1] = j; + } + } + } + + // If you want to return the min or max sum, in your own main method + // instead of the assignment array, then use the following code: + /* + * double sum = 0; for (int i=0; i<assignment.length; i++) { sum = sum + + * array[assignment[i][0]][assignment[i][1]]; } return sum; + */ + // Of course you must also change the header of the method to: + // public static double hgAlgorithm (double[][] array, String sumType) + + return assignment; + } + + public static int hg_step1(int step, double[][] cost) { + // What STEP 1 does: + // For each row of the cost matrix, find the smallest element + // and subtract it from from every other element in its row. + + double minval; + + for (int i = 0; i < cost.length; i++) { + minval = cost[i][0]; + for (int j = 0; j < cost[i].length; j++) // 1st inner loop finds min + // val in row. + { + if (minval > cost[i][j]) { + minval = cost[i][j]; + } + } + for (int j = 0; j < cost[i].length; j++) // 2nd inner loop subtracts + // it. + { + cost[i][j] = cost[i][j] - minval; + } + } + + step = 2; + return step; + } + + public static int hg_step2(int step, double[][] cost, int[][] mask, int[] rowCover, int[] colCover) { + // What STEP 2 does: + // Marks uncovered zeros as starred and covers their row and column. + + for (int i = 0; i < cost.length; i++) { + for (int j = 0; j < cost[i].length; j++) { + if ((cost[i][j] == 0) && (colCover[j] == 0) && (rowCover[i] == 0)) { + mask[i][j] = 1; + colCover[j] = 1; + rowCover[i] = 1; + } + } + } + + clearCovers(rowCover, colCover); // Reset cover vectors. + + step = 3; + return step; + } + + public static int hg_step3(int step, int[][] mask, int[] colCover) { + // What STEP 3 does: + // Cover columns of starred zeros. Check if all columns are covered. + + for (int i = 0; i < mask.length; i++) // Cover columns of starred zeros. + { + for (int j = 0; j < mask[i].length; j++) { + if (mask[i][j] == 1) { + colCover[j] = 1; + } + } + } + + int count = 0; + for (int j = 0; j < colCover.length; j++) // Check if all columns are + // covered. + { + count = count + colCover[j]; + } + + if (count >= mask.length) // Should be cost.length but ok, because mask + // has same dimensions. + { + step = 7; + } else { + step = 4; + } + + return step; + } + + public static int hg_step4(int step, double[][] cost, int[][] mask, int[] rowCover, int[] colCover, int[] zero_RC) { + // What STEP 4 does: + // Find an uncovered zero in cost and prime it (if none go to step 6). + // Check for star in same row: + // if yes, cover the row and uncover the star's column. Repeat until no + // uncovered zeros are left + // and go to step 6. If not, save location of primed zero and go to step + // 5. + + int[] row_col = new int[2]; // Holds row and col of uncovered zero. + boolean done = false; + while (done == false) { + row_col = findUncoveredZero(row_col, cost, rowCover, colCover); + if (row_col[0] == -1) { + done = true; + step = 6; + } else { + mask[row_col[0]][row_col[1]] = 2; // Prime the found uncovered + // zero. + + boolean starInRow = false; + for (int j = 0; j < mask[row_col[0]].length; j++) { + if (mask[row_col[0]][j] == 1) // If there is a star in the + // same row... + { + starInRow = true; + row_col[1] = j; // remember its column. + } + } + + if (starInRow == true) { + rowCover[row_col[0]] = 1; // Cover the star's row. + colCover[row_col[1]] = 0; // Uncover its column. + } else { + zero_RC[0] = row_col[0]; // Save row of primed zero. + zero_RC[1] = row_col[1]; // Save column of primed zero. + done = true; + step = 5; + } + } + } + + return step; + } + + public static int[] findUncoveredZero // Aux 1 for hg_step4. + (int[] row_col, double[][] cost, int[] rowCover, int[] colCover) { + row_col[0] = -1; // Just a check value. Not a real index. + row_col[1] = 0; + + int i = 0; + boolean done = false; + while (done == false) { + int j = 0; + while (j < cost[i].length) { + if (cost[i][j] == 0 && rowCover[i] == 0 && colCover[j] == 0) { + row_col[0] = i; + row_col[1] = j; + done = true; + } + j = j + 1; + }// end inner while + i = i + 1; + if (i >= cost.length) { + done = true; + } + }// end outer while + + return row_col; + } + + public static int hg_step5(int step, int[][] mask, int[] rowCover, int[] colCover, int[] zero_RC) { + // What STEP 5 does: + // Construct series of alternating primes and stars. Start with prime + // from step 4. + // Take star in the same column. Next take prime in the same row as the + // star. Finish + // at a prime with no star in its column. Unstar all stars and star the + // primes of the + // series. Erasy any other primes. Reset covers. Go to step 3. + + int count = 0; // Counts rows of the path matrix. + int[][] path = new int[(mask[0].length * mask.length)][2]; // Path + // matrix + // (stores + // row and + // col). + path[count][0] = zero_RC[0]; // Row of last prime. + path[count][1] = zero_RC[1]; // Column of last prime. + + boolean done = false; + while (done == false) { + int r = findStarInCol(mask, path[count][1]); + if (r >= 0) { + count = count + 1; + path[count][0] = r; // Row of starred zero. + path[count][1] = path[count - 1][1]; // Column of starred zero. + } else { + done = true; + } + + if (done == false) { + int c = findPrimeInRow(mask, path[count][0]); + count = count + 1; + path[count][0] = path[count - 1][0]; // Row of primed zero. + path[count][1] = c; // Col of primed zero. + } + }// end while + + convertPath(mask, path, count); + clearCovers(rowCover, colCover); + erasePrimes(mask); + + step = 3; + return step; + + } + + public static int findStarInCol // Aux 1 for hg_step5. + (int[][] mask, int col) { + int r = -1; // Again this is a check value. + for (int i = 0; i < mask.length; i++) { + if (mask[i][col] == 1) { + r = i; + } + } + + return r; + } + + public static int findPrimeInRow // Aux 2 for hg_step5. + (int[][] mask, int row) { + int c = -1; + for (int j = 0; j < mask[row].length; j++) { + if (mask[row][j] == 2) { + c = j; + } + } + + return c; + } + + public static void convertPath // Aux 3 for hg_step5. + (int[][] mask, int[][] path, int count) { + for (int i = 0; i <= count; i++) { + if (mask[(path[i][0])][(path[i][1])] == 1) { + mask[(path[i][0])][(path[i][1])] = 0; + } else { + mask[(path[i][0])][(path[i][1])] = 1; + } + } + } + + public static void erasePrimes // Aux 4 for hg_step5. + (int[][] mask) { + for (int i = 0; i < mask.length; i++) { + for (int j = 0; j < mask[i].length; j++) { + if (mask[i][j] == 2) { + mask[i][j] = 0; + } + } + } + } + + public static void clearCovers // Aux 5 for hg_step5 (and not only). + (int[] rowCover, int[] colCover) { + for (int i = 0; i < rowCover.length; i++) { + rowCover[i] = 0; + } + for (int j = 0; j < colCover.length; j++) { + colCover[j] = 0; + } + } + + public static int hg_step6(int step, double[][] cost, int[] rowCover, int[] colCover, double maxCost) { + // What STEP 6 does: + // Find smallest uncovered value in cost: a. Add it to every element of + // covered rows + // b. Subtract it from every element of uncovered columns. Go to step 4. + + double minval = findSmallest(cost, rowCover, colCover, maxCost); + + for (int i = 0; i < rowCover.length; i++) { + for (int j = 0; j < colCover.length; j++) { + if (rowCover[i] == 1) { + cost[i][j] = cost[i][j] + minval; + } + if (colCover[j] == 0) { + cost[i][j] = cost[i][j] - minval; + } + } + } + + step = 4; + return step; + } + + public static double findSmallest // Aux 1 for hg_step6. + (double[][] cost, int[] rowCover, int[] colCover, double maxCost) { + double minval = maxCost; // There cannot be a larger cost than this. + for (int i = 0; i < cost.length; i++) // Now find the smallest uncovered + // value. + { + for (int j = 0; j < cost[i].length; j++) { + if (rowCover[i] == 0 && colCover[j] == 0 && (minval > cost[i][j])) { + minval = cost[i][j]; + } + } + } + + return minval; + } + + // ***********// + // MAIN METHOD// + // ***********// + + public static void main(String[] args) { + // Below enter "max" or "min" to find maximum sum or minimum sum + // assignment. + String sumType = "max"; + + // Hard-coded example. + // double[][] array = + // { + // {1, 2, 3}, + // {2, 4, 6}, + // {3, 6, 9} + // }; + + // <UNCOMMENT> BELOW AND COMMENT BLOCK ABOVE TO USE A RANDOMLY GENERATED + // MATRIX + int numOfRows = readInput("How many rows for the matrix? "); + int numOfCols = readInput("How many columns for the matrix? "); + double[][] array = new double[numOfRows][numOfCols]; + generateRandomArray(array, "random"); // All elements within [0,1]. + // </UNCOMMENT> + + if (array.length > array[0].length) { + System.out.println("Array transposed (because rows>columns).\n"); // Cols + // must + // be + // >= + // Rows. + array = transpose(array); + } + + // <COMMENT> TO AVOID PRINTING THE MATRIX FOR WHICH THE ASSIGNMENT IS + // CALCULATED + System.out.println("\n(Printing out only 2 decimals)\n"); + System.out.println("The matrix is:"); + for (int i = 0; i < array.length; i++) { + for (int j = 0; j < array[i].length; j++) { + System.out.printf("%.2f\t", array[i][j]); + } + System.out.println(); + } + System.out.println(); + // </COMMENT>*/ + + double startTime = System.nanoTime(); + int[][] assignment = new int[array.length][2]; + assignment = hgAlgorithm(array, sumType); // Call Hungarian algorithm. + double endTime = System.nanoTime(); + + System.out.println("The winning assignment (" + sumType + " sum) is:\n"); + double sum = 0; + for (int i = 0; i < assignment.length; i++) { + // <COMMENT> to avoid printing the elements that make up the + // assignment + System.out.printf("array(%d,%d) = %.2f\n", (assignment[i][0] + 1), (assignment[i][1] + 1), + array[assignment[i][0]][assignment[i][1]]); + sum = sum + array[assignment[i][0]][assignment[i][1]]; + // </COMMENT> + } + + System.out.printf("\nThe %s is: %.2f\n", sumType, sum); + printTime((endTime - startTime) / 1000000000.0); + + } +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/Result.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/Result.java new file mode 100755 index 0000000..1e27819 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/Result.java @@ -0,0 +1,80 @@ +package pl.waw.ipipan.zil.core.scoreference.utils; + +public class Result { + + private Double precisionNominator = 0.0; + private Double precisionDenominator = 0.0; + private Double recallNominator = 0.0; + private Double recallDenominator = 0.0; + + private Double precision = null; + private Double recall = null; + private Double f1 = null; + + public Result(double precisionNominator, double precisionDenominator, double recallNominator, + double recallDenominator) { + this.precisionNominator = precisionNominator; + this.precisionDenominator = precisionDenominator; + this.recallNominator = recallNominator; + this.recallDenominator = recallDenominator; + } + + public Result() { + } + + public Result(double p, double r) { + this.precisionNominator = p; + this.recallNominator = r; + this.precisionDenominator = 1.0; + this.recallDenominator = 1.0; + } + + public Result(double avgP, double avgR, double avgF1) { + this.precision = avgP; + this.recall = avgR; + this.f1 = avgF1; + } + + public double getPrecision() { + if (precision != null) + return precision; + return precisionNominator / precisionDenominator; + } + + public double getRecall() { + if (recall != null) + return recall; + return recallNominator / recallDenominator; + } + + public String getPrecisionString() { + return precisionNominator + "/" + precisionDenominator; + } + + public String getRecallString() { + return recallNominator + "/" + recallDenominator; + } + + public double getF1() { + if (f1 != null) + return f1; + double recall = getRecall(); + double precision = getPrecision(); + return (recall + precision) == 0 ? 0 : 2 * precision * recall / (recall + precision); + } + + @Override + public String toString() { + String precLine = "Precision: " + getPrecision() + "\n"; + String recLine = "Recall: " + getRecall() + "\n"; + return precLine + recLine + "F1: " + getF1(); + } + + public void add(Result textResult) { + this.precisionDenominator += textResult.precisionDenominator; + this.precisionNominator += textResult.precisionNominator; + this.recallDenominator += textResult.recallDenominator; + this.recallNominator += textResult.recallNominator; + } + +} diff --git b/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/Splitter.java a/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/Splitter.java new file mode 100755 index 0000000..aca2859 --- /dev/null +++ a/src/main/java/pl/waw/ipipan/zil/core/scoreference/utils/Splitter.java @@ -0,0 +1,105 @@ +package pl.waw.ipipan.zil.core.scoreference.utils; + +import ipipan.clarin.tei.api.io.IOUtils; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.TreeSet; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Logger; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class Splitter { + + private static final Logger logger = Logger.getLogger(Splitter.class); + + public static void main(String[] args) { + if (args.length != 3) { + logger.error("Wrong number of arguments! Try: " + Splitter.class.getSimpleName() + + " corpusDir targetDir percent"); + return; + } + + File corpusDir = new File(args[0]); + File targetDir = new File(args[1]); + Integer percent = Integer.parseInt(args[2]); + + Map<String, TreeSet<File>> type2texts = new HashMap<>(); + for (File textDir : IOUtils.getNKJPDirs(corpusDir)) { + try { + String textType = getTextType(textDir); + if (!type2texts.containsKey(textType)) + type2texts.put(textType, new TreeSet<File>()); + type2texts.get(textType).add(textDir); + + } catch (ParserConfigurationException | SAXException | IOException e) { + logger.error("Error processing text:" + textDir + ":" + e); + } + } + + File trainDir = new File(targetDir, "train"); + File testDir = new File(targetDir, "test"); + trainDir.mkdir(); + testDir.mkdir(); + + Random r = new Random(1); + for (String type : type2texts.keySet()) { + TreeSet<File> files = type2texts.get(type); + int size = files.size(); + int test = Math.max(1, size * percent / 100); + int train = size - test; + + List<File> shuffledFiles = new ArrayList<>(files); + Collections.shuffle(shuffledFiles, r); + List<File> trainFiles = shuffledFiles.subList(0, train); + List<File> testFiles = shuffledFiles.subList(train, shuffledFiles.size()); + + logger.info("Text type:" + type); + logger.info("\t" + size + " both"); + logger.info("\t" + train + " train"); + logger.info("\t" + test + " test"); + + for (File f : trainFiles) + try { + FileUtils.copyDirectoryToDirectory(f, trainDir); + } catch (IOException e) { + e.printStackTrace(); + } + + for (File f : testFiles) + try { + FileUtils.copyDirectoryToDirectory(f, testDir); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + private static String getTextType(File textDir) throws ParserConfigurationException, SAXException, IOException { + Document doc = loadDocument(new File(textDir, "header.xml")); + NodeList refs = doc.getElementsByTagName("catRef"); + return refs.item(0).getAttributes().getNamedItem("target").getTextContent(); + } + + public static Document loadDocument(File xmlFile) throws ParserConfigurationException, SAXException, IOException { + DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + dbFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); + Document doc = dBuilder.parse(xmlFile); + doc.normalize(); + return doc; + } +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationA.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationA.java new file mode 100755 index 0000000..10f73ea --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationA.java @@ -0,0 +1,12 @@ +package pl.waw.ipipan.zil.core.scoreference.annotations; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class AnnotationA extends SingleTextAnnotationImpl { + + { + addMentionGroup(1, 2, 3, 4, 5); + addMentionGroup(6, 7, 8, 9, 10, 11, 12); + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationB.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationB.java new file mode 100755 index 0000000..e62986a --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationB.java @@ -0,0 +1,12 @@ +package pl.waw.ipipan.zil.core.scoreference.annotations; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class AnnotationB extends SingleTextAnnotationImpl { + + { + addMentionGroup(1, 2, 3, 4, 5, 8, 9, 10, 11, 12); + addMentionGroup(6, 7); + } + +} \ No newline at end of file diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationC.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationC.java new file mode 100755 index 0000000..191d35a --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationC.java @@ -0,0 +1,11 @@ +package pl.waw.ipipan.zil.core.scoreference.annotations; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class AnnotationC extends SingleTextAnnotationImpl { + + { + addMentionGroup(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationD.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationD.java new file mode 100755 index 0000000..ff6d19a --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationD.java @@ -0,0 +1,11 @@ +package pl.waw.ipipan.zil.core.scoreference.annotations; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class AnnotationD extends SingleTextAnnotationImpl { + + { + addSingletons(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationGold.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationGold.java new file mode 100755 index 0000000..ac2a8b9 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/annotations/AnnotationGold.java @@ -0,0 +1,13 @@ +package pl.waw.ipipan.zil.core.scoreference.annotations; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class AnnotationGold extends SingleTextAnnotationImpl { + + { + addMentionGroup(1, 2, 3, 4, 5); + addMentionGroup(6, 7); + addMentionGroup(8, 9, 10, 11, 12); + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/main/KAlphaTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/main/KAlphaTest.java new file mode 100755 index 0000000..ecbc499 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/main/KAlphaTest.java @@ -0,0 +1,35 @@ +package pl.waw.ipipan.zil.core.scoreference.main; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.util.Map; + +import org.junit.Test; + +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPairImpl; +import pl.waw.ipipan.zil.core.scoreference.readers.Mmax; + +public class KAlphaTest { + @Test + public final void testMmax() { + + String goldPath = new File(KAlphaTest.class.getResource( + "/teksty_mmax/teksty_gold").getFile()).getAbsolutePath(); + String sysPath = new File(KAlphaTest.class.getResource( + "/teksty_mmax/teksty_sys").getFile()).getAbsolutePath(); + + AnnotationPair ann = new AnnotationPairImpl(); + (new Mmax()).loadAnnotationsFrom2Files(new File(goldPath, + "0_mentions.xml"), new File(sysPath, "0_mentions.xml"), ann, + false, false); + + ann.getMentionDetectionResult(false); + + Map<String, Double> kalphaBlanc = KAlpha.computeKappaBlanc(ann, null); + + assertEquals(0.9865, kalphaBlanc.values().iterator().next(), 0.0001); + + } +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/main/MainTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/main/MainTest.java new file mode 100755 index 0000000..ed41e34 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/main/MainTest.java @@ -0,0 +1,29 @@ +package pl.waw.ipipan.zil.core.scoreference.main; + +import java.io.File; + +import org.junit.Test; + +public class MainTest { + @Test + public final void testMmax() { + + String goldPath = new File(MainTest.class.getResource( + "/teksty_mmax/teksty_gold").getFile()).getAbsolutePath(); + String sysPath = new File(MainTest.class.getResource( + "/teksty_mmax/teksty_sys").getFile()).getAbsolutePath(); + + Main.main(new String[] { goldPath, sysPath, "mmax" }); + } + + @Test + public final void testTei() { + + String goldPath = new File(MainTest.class.getResource( + "/teksty_tei/teksty_gold").getFile()).getAbsolutePath(); + String sysPath = new File(MainTest.class.getResource( + "/teksty_tei/teksty_sys").getFile()).getAbsolutePath(); + + Main.main(new String[] { goldPath, sysPath, "tei" }); + } +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerB3Test.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerB3Test.java new file mode 100755 index 0000000..a12da7c --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerB3Test.java @@ -0,0 +1,14 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +public class ScorerB3Test extends ScorerTest { + + { + scorer = new ScorerB3(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.865; + annotationBScore = 0.737; + annotationCScore = 0.545; + annotationDScore = 0.400; + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerBlancTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerBlancTest.java new file mode 100755 index 0000000..7101c2a --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerBlancTest.java @@ -0,0 +1,40 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import org.junit.Test; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class ScorerBlancTest extends ScorerTest { + + { + scorer = new ScorerBlanc(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.841; + annotationBScore = 0.621; + annotationCScore = 0.241; // 0.318; + annotationDScore = 0.405; // 0.682; + } + + @Test + public final void testAnnotationBlanc() { + testAnnotation(new AnnotationBlancGold(), new AnnotationBlancSys(), + 0.7078); + } + + private class AnnotationBlancGold extends SingleTextAnnotationImpl { + { + addSingletons(1, 2, 3, 4, 6, 8, 10, 11, 13); + addMentionGroup(5, 12, 14); + addMentionGroup(7, 9); + } + } + + private class AnnotationBlancSys extends SingleTextAnnotationImpl { + { + addSingletons(1, 2, 3, 8, 10, 11, 13); + addMentionGroup(4, 6); + addMentionGroup(5, 12); + addMentionGroup(7, 9, 14); + } + } +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafeTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafeTest.java new file mode 100755 index 0000000..4cd4643 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafeTest.java @@ -0,0 +1,14 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +public class ScorerCeafeTest extends ScorerTest { + + { + scorer = new ScorerCeafe(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.733; + annotationBScore = 0.667; + annotationCScore = 0.294; + annotationDScore = 0.178; + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafmTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafmTest.java new file mode 100755 index 0000000..df5a230 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerCeafmTest.java @@ -0,0 +1,14 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +public class ScorerCeafmTest extends ScorerTest { + + { + scorer = new ScorerCeafm(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.833; + annotationBScore = 0.583; + annotationCScore = 0.417; + annotationDScore = 0.250; + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerF1Test.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerF1Test.java new file mode 100755 index 0000000..eb87c16 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerF1Test.java @@ -0,0 +1,17 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import org.junit.Ignore; + +@Ignore +public class ScorerF1Test extends ScorerTest { + + { + scorer = new ScorerF1(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.808; + annotationBScore = 0.636; + annotationCScore = 0.483; + annotationDScore = Double.NaN; + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerHTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerHTest.java new file mode 100755 index 0000000..4384b44 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerHTest.java @@ -0,0 +1,17 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import org.junit.Ignore; + +@Ignore +public class ScorerHTest extends ScorerTest { + + { + scorer = new ScorerH(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.778; + annotationBScore = 0.571; + annotationCScore = 0.0; + annotationDScore = 0.487; + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerMucTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerMucTest.java new file mode 100755 index 0000000..59deee9 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerMucTest.java @@ -0,0 +1,35 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import org.junit.Test; + +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotationImpl; + +public class ScorerMucTest extends ScorerTest { + + { + scorer = new ScorerMuc(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.947; + annotationBScore = 0.947; + annotationCScore = 0.9; + annotationDScore = Double.NaN; + } + + @Test + public final void testAnnotationMUC() { + testAnnotation(new AnnotationMUCGold(), new AnnotationMUCSys(), 0.666); + } + + private class AnnotationMUCGold extends SingleTextAnnotationImpl { + { + addMentionGroup(1, 2, 3); + } + } + + private class AnnotationMUCSys extends SingleTextAnnotationImpl { + { + addMentionGroup(1, 3); + addMentionGroup(2); + } + } +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerRandTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerRandTest.java new file mode 100755 index 0000000..4bc3b87 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerRandTest.java @@ -0,0 +1,17 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import org.junit.Ignore; + +@Ignore +public class ScorerRandTest extends ScorerTest { + + { + scorer = new ScorerRand(); + perfectAnnotationScore = 1.0; + annotationAScore = 0.848; + annotationBScore = 0.621; + annotationCScore = 0.318; + annotationDScore = 0.682; + } + +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerTest.java new file mode 100755 index 0000000..159a1c9 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/scorers/ScorerTest.java @@ -0,0 +1,81 @@ +package pl.waw.ipipan.zil.core.scoreference.scorers; + +import static org.junit.Assert.assertEquals; + +import org.apache.log4j.Logger; +import org.junit.Ignore; +import org.junit.Test; + +import pl.waw.ipipan.zil.core.scoreference.annotations.AnnotationA; +import pl.waw.ipipan.zil.core.scoreference.annotations.AnnotationB; +import pl.waw.ipipan.zil.core.scoreference.annotations.AnnotationC; +import pl.waw.ipipan.zil.core.scoreference.annotations.AnnotationD; +import pl.waw.ipipan.zil.core.scoreference.annotations.AnnotationGold; +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPair; +import pl.waw.ipipan.zil.core.scoreference.basic.AnnotationPairImpl; +import pl.waw.ipipan.zil.core.scoreference.basic.SingleTextAnnotation; +import pl.waw.ipipan.zil.core.scoreference.utils.Result; + +@Ignore +public class ScorerTest { + + protected Logger logger = Logger.getLogger(ScorerTest.class); + + protected Scorer scorer = null; + + protected Double perfectAnnotationScore; + protected Double annotationAScore; + protected Double annotationBScore; + protected Double annotationCScore; + protected Double annotationDScore; + + protected void testAnnotation(SingleTextAnnotation gold, + SingleTextAnnotation sys, Double expectedF1) { + + logger.debug("\n################## " + + scorer.getClass().getCanonicalName() + " #############"); + logger.debug("Calculating annotation score for \n" + "\tGOLD: " + + gold.toString() + "\n" + "\tSYS: " + sys.toString()); + + AnnotationPair annos = new AnnotationPairImpl(); + annos.addPair(gold, sys, false); + + Result result = scorer.compare(annos.getGold(0), annos.getSys(0)); + + logger.debug(result.toString()); + + Double delta = 0.001; + + assertEquals(expectedF1, result.getF1(), delta); + } + + @Test + public final void testPerfectAnnotation() { + testAnnotation(new AnnotationGold(), new AnnotationGold(), + perfectAnnotationScore); + } + + @Test + public final void testAnnotationA() { + testAnnotation(new AnnotationGold(), new AnnotationA(), + annotationAScore); + } + + @Test + public final void testAnnotationB() { + testAnnotation(new AnnotationGold(), new AnnotationB(), + annotationBScore); + } + + @Test + public final void testAnnotationC() { + testAnnotation(new AnnotationGold(), new AnnotationC(), + annotationCScore); + } + + @Test + public final void testAnnotationD() { + testAnnotation(new AnnotationGold(), new AnnotationD(), + annotationDScore); + } +} diff --git b/src/test/java/pl/waw/ipipan/zil/core/scoreference/utils/ResultTest.java a/src/test/java/pl/waw/ipipan/zil/core/scoreference/utils/ResultTest.java new file mode 100755 index 0000000..885fac8 --- /dev/null +++ a/src/test/java/pl/waw/ipipan/zil/core/scoreference/utils/ResultTest.java @@ -0,0 +1,27 @@ +package pl.waw.ipipan.zil.core.scoreference.utils; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class ResultTest { + + @Test + public final void testF1Calculation() { + Result r = new Result(1.0, 1.0); + assertEquals(r.getF1(), 1.0, 0.000001); + + r = new Result(0.42, 0.91); + assertEquals(0.5747, r.getF1(), 0.0001); + + r = new Result(1.0, 0.9); + assertEquals(0.947, r.getF1(), 0.001); + + r = new Result(Double.NaN, 0.9); + assertEquals(Double.NaN, r.getF1(), 0.001); + + r = new Result(Double.NaN, Double.NaN); + assertEquals(Double.NaN, r.getF1(), 0.001); + } + +} diff --git b/src/test/resources/teksty_mmax/teksty_gold/0.mmax a/src/test/resources/teksty_mmax/teksty_gold/0.mmax new file mode 100755 index 0000000..c9d65de --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/0.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>0_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/0_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/0_mentions.xml new file mode 100755 index 0000000..47bf5bd --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/0_mentions.xml @@ -0,0 +1,85 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_2" mmax_level="mention" mention_head="spotkaniu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_4" mmax_level="mention" mention_head="udział" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_3" span="word_8" mmax_level="mention" mention_head="braci" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_4" span="word_7..word_8" mmax_level="mention" mention_head="tysięcy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_10..word_11" mmax_level="mention" mention_head="Europy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_17" mmax_level="mention" mention_head="nich" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_7" span="word_22..word_25" mmax_level="mention" mention_head="tytuł" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_8" span="word_29" mmax_level="mention" mention_head="udział" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_9" span="word_31" mmax_level="mention" mention_head="strzelaniu" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_10" span="word_34..word_35" mmax_level="mention" mention_head="szanse" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_40" mmax_level="mention" mention_head="to" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_12" span="word_46" mmax_level="mention" mention_head="gospodarza" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_47" mmax_level="mention" mention_head="spotkań" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_14" span="word_44..word_47" mmax_level="mention" mention_head="obowiązków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_50..word_52" mmax_level="mention" mention_head="nawale" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_54" mmax_level="mention" mention_head="mi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_59" mmax_level="mention" mention_head="strzelaniu" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_18" span="word_63" mmax_level="mention" mention_head="Maj" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_19" span="word_62..word_63" mmax_level="mention" mention_head="Zdzisław" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_20" span="word_66..word_68" mmax_level="mention" mention_head="Bractwa" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_21" span="word_65..word_68" mmax_level="mention" mention_head="prezes" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_71..word_72" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_70..word_72" mmax_level="mention" mention_head="Król" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_24" span="word_77..word_79" mmax_level="mention" mention_head="Króla" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_25" span="word_76..word_79" mmax_level="mention" mention_head="tytuł" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_26" span="word_84..word_85" mmax_level="mention" mention_head="etapach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_88" mmax_level="mention" mention_head="finału" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_96" mmax_level="mention" mention_head="nich" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_29" span="word_99..word_101" mmax_level="mention" mention_head="Króla" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_30" span="word_98..word_101" mmax_level="mention" mention_head="tytuł" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_31" span="word_103" mmax_level="mention" mention_head="go" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_32" span="word_106..word_107" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_110" mmax_level="mention" mention_head="Król" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_34" span="word_113..word_115" mmax_level="mention" mention_head="nagród" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_118..word_119" mmax_level="mention" mention_head="tytuł" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_36" span="word_121..word_122" mmax_level="mention" mention_head="zaszczytem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_124" mmax_level="mention" mention_head="król" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_38" span="word_131..word_132" mmax_level="mention" mention_head="Parlamentu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_130..word_132" mmax_level="mention" mention_head="posiedzenia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_136" mmax_level="mention" mention_head="Maj" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_41" span="word_135..word_136" mmax_level="mention" mention_head="Zdzisław" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_42" span="word_144..word_145" mmax_level="mention" mention_head="Bractw" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_142..word_145" mmax_level="mention" mention_head="Spotkań" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_44" span="word_147..word_148" mmax_level="mention" mention_head="parada" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_156..word_157" mmax_level="mention" mention_head="godz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_160..word_161" mmax_level="mention" mention_head="braci" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_47" span="word_163..word_164" mmax_level="mention" mention_head="strojach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_167" mmax_level="mention" mention_head="Błoń" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_169" mmax_level="mention" mention_head="Rynek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_172" mmax_level="mention" mention_head="Piłsudskiego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_174" mmax_level="mention" mention_head="Straszewskiego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_181" mmax_level="mention" mention_head="istnienia" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_53" span="word_182..word_183" mmax_level="mention" mention_head="Bractwa" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_54" span="word_185" mmax_level="mention" mention_head="Krakowie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_182..word_185" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_187..word_188" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_191" mmax_level="mention" mention_head="ono" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_58" span="word_192..word_193" mmax_level="mention" mention_head="obywateli" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_197..word_198" mmax_level="mention" mention_head="rzemieślników" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_195..word_198" mmax_level="mention" mention_head="kupców" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_61" span="word_200..word_201" mmax_level="mention" mention_head="obronność" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_203..word_205" mmax_level="mention" mention_head="świętem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_207" mmax_level="mention" mention_head="turniej" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_213" mmax_level="mention" mention_head="strzelnicy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_220..word_221" mmax_level="mention" mention_head="dni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_229..word_230" mmax_level="mention" mention_head="żerdzi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_232" mmax_level="mention" mention_head="Brat" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_68" span="word_236..word_237" mmax_level="mention" mention_head="strzałem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_69" span="word_240" mmax_level="mention" mention_head="jego" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_70" span="word_239..word_241" mmax_level="mention" mention_head="fragment" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_243..word_245" mmax_level="mention" mention_head="miano" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_248..word_249" mmax_level="mention" mention_head="tytułem" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_73" span="word_258" mmax_level="mention" mention_head="przywileje" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_74" span="word_252..word_258" mmax_level="mention" mention_head="honory" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_75" span="word_260..word_261" mmax_level="mention" mention_head="Rada" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_263" mmax_level="mention" mention_head="jego" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_77" span="word_263..word_264" mmax_level="mention" mention_head="posiadacza" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_78" span="word_272" mmax_level="mention" mention_head="podatków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_79" span="word_270..word_272" mmax_level="mention" mention_head="obowiązku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_80" span="word_274..word_275" mmax_level="mention" mention_head="zwyczaj" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_279" mmax_level="mention" mention_head="dziś" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/0_words.xml a/src/test/resources/teksty_mmax/teksty_gold/0_words.xml new file mode 100755 index 0000000..9b46167 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/0_words.xml @@ -0,0 +1,285 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">W</word> + <word id="word_2" tei_id="morph_1.1.2-seg">spotkaniu</word> + <word id="word_3" tei_id="morph_1.1.3-seg">weźmie</word> + <word id="word_4" tei_id="morph_1.1.4-seg">udział</word> + <word id="word_5" tei_id="morph_1.1.5-seg">blisko</word> + <word id="word_6" tei_id="morph_1.1.6-seg">7</word> + <word id="word_7" tei_id="morph_1.1.7-seg">tysięcy</word> + <word id="word_8" tei_id="morph_1.1.8-seg">braci</word> + <word id="word_9" tei_id="morph_1.1.9-seg">z</word> + <word id="word_10" tei_id="morph_1.1.10-seg">całej</word> + <word id="word_11" tei_id="morph_1.1.11-seg">Europy</word> + <word id="word_12" tei_id="morph_1.1.12-seg">,</word> + <word id="word_13" tei_id="morph_1.1.13-seg">ale</word> + <word id="word_14" tei_id="morph_1.1.14-seg">tylko</word> + <word id="word_15" tei_id="morph_1.1.15-seg">206</word> + <word id="word_16" tei_id="morph_1.1.16-seg">z</word> + <word id="word_17" tei_id="morph_1.1.17-seg">nich</word> + <word id="word_18" tei_id="morph_1.1.18-seg">będzie</word> + <word id="word_19" tei_id="morph_1.1.19-seg">ubiegało</word> + <word id="word_20" tei_id="morph_1.1.20-seg">się</word> + <word id="word_21" tei_id="morph_1.1.21-seg">o</word> + <word id="word_22" tei_id="morph_1.1.22-seg">tytuł</word> + <word id="word_23" tei_id="morph_1.1.23-seg">Europejskiego</word> + <word id="word_24" tei_id="morph_1.1.24-seg">Króla</word> + <word id="word_25" tei_id="morph_1.1.25-seg">Kurkowego</word> + <word id="word_26" tei_id="morph_1.1.26-seg">.</word> + <word id="word_27" tei_id="morph_1.1.27-seg">-</word> + <word id="word_28" tei_id="morph_1.1.28-seg">Wezmę</word> + <word id="word_29" tei_id="morph_1.1.29-seg">udział</word> + <word id="word_30" tei_id="morph_1.1.30-seg">w</word> + <word id="word_31" tei_id="morph_1.1.31-seg">strzelaniu</word> + <word id="word_32" tei_id="morph_1.1.32-seg">,</word> + <word id="word_33" tei_id="morph_1.1.33-seg">choć</word> + <word id="word_34" tei_id="morph_1.1.34-seg">moje</word> + <word id="word_35" tei_id="morph_1.1.35-seg">szanse</word> + <word id="word_36" tei_id="morph_1.1.36-seg">są</word> + <word id="word_37" tei_id="morph_1.1.37-seg">marne</word> + <word id="word_38" tei_id="morph_1.1.38-seg">.</word> + <word id="word_39" tei_id="morph_1.2.1-seg">Wynika</word> + <word id="word_40" tei_id="morph_1.2.2-seg">to</word> + <word id="word_41" tei_id="morph_1.2.3-seg">przede</word> + <word id="word_42" tei_id="morph_1.2.4-seg">wszystkim</word> + <word id="word_43" tei_id="morph_1.2.5-seg">z</word> + <word id="word_44" tei_id="morph_1.2.6-seg">moich</word> + <word id="word_45" tei_id="morph_1.2.7-seg">obowiązków</word> + <word id="word_46" tei_id="morph_1.2.8-seg">gospodarza</word> + <word id="word_47" tei_id="morph_1.2.9-seg">spotkań</word> + <word id="word_48" tei_id="morph_1.2.10-seg">;</word> + <word id="word_49" tei_id="morph_1.2.11-seg">w</word> + <word id="word_50" tei_id="morph_1.2.12-seg">tym</word> + <word id="word_51" tei_id="morph_1.2.13-seg">nawale</word> + <word id="word_52" tei_id="morph_1.2.14-seg">pracy</word> + <word id="word_53" tei_id="morph_1.2.15-seg">ciężko</word> + <word id="word_54" tei_id="morph_1.2.16-seg">mi</word> + <word id="word_55" tei_id="morph_1.2.17-seg">będzie</word> + <word id="word_56" tei_id="morph_1.2.18-seg">się</word> + <word id="word_57" tei_id="morph_1.2.19-seg">skupić</word> + <word id="word_58" tei_id="morph_1.2.20-seg">na</word> + <word id="word_59" tei_id="morph_1.2.21-seg">strzelaniu</word> + <word id="word_60" tei_id="morph_1.2.22-seg">-</word> + <word id="word_61" tei_id="morph_1.2.23-seg">przewiduje</word> + <word id="word_62" tei_id="morph_1.2.24-seg">Zdzisław</word> + <word id="word_63" tei_id="morph_1.2.25-seg">Maj</word> + <word id="word_64" tei_id="morph_1.2.26-seg">,</word> + <word id="word_65" tei_id="morph_1.2.27-seg">prezes</word> + <word id="word_66" tei_id="morph_1.2.28-seg">krakowskiego</word> + <word id="word_67" tei_id="morph_1.2.29-seg">Bractwa</word> + <word id="word_68" tei_id="morph_1.2.30-seg">Kurkowego</word> + <word id="word_69" tei_id="morph_1.2.31-seg">,</word> + <word id="word_70" tei_id="morph_1.2.32-seg">panujący</word> + <word id="word_71" tei_id="morph_1.2.33-seg">Król</word> + <word id="word_72" tei_id="morph_1.2.34-seg">Kurkowy</word> + <word id="word_73" tei_id="morph_1.2.35-seg" lastinpar="true">.</word> + <word id="word_74" tei_id="morph_2.3.1-seg">Strzelanie</word> + <word id="word_75" tei_id="morph_2.3.2-seg">o</word> + <word id="word_76" tei_id="morph_2.3.3-seg">tytuł</word> + <word id="word_77" tei_id="morph_2.3.4-seg">Europejskiego</word> + <word id="word_78" tei_id="morph_2.3.5-seg">Króla</word> + <word id="word_79" tei_id="morph_2.3.6-seg">Kurkowego</word> + <word id="word_80" tei_id="morph_2.3.7-seg">będzie</word> + <word id="word_81" tei_id="morph_2.3.8-seg">się</word> + <word id="word_82" tei_id="morph_2.3.9-seg">odbywało</word> + <word id="word_83" tei_id="morph_2.3.10-seg">w</word> + <word id="word_84" tei_id="morph_2.3.11-seg">kilku</word> + <word id="word_85" tei_id="morph_2.3.12-seg">etapach</word> + <word id="word_86" tei_id="morph_2.3.13-seg">.</word> + <word id="word_87" tei_id="morph_2.4.1-seg">Do</word> + <word id="word_88" tei_id="morph_2.4.2-seg">finału</word> + <word id="word_89" tei_id="morph_2.4.3-seg">zostanie</word> + <word id="word_90" tei_id="morph_2.4.4-seg">dopuszczonych</word> + <word id="word_91" tei_id="morph_2.4.5-seg">27</word> + <word id="word_92" tei_id="morph_2.4.6-seg">braci</word> + <word id="word_93" tei_id="morph_2.4.7-seg">-</word> + <word id="word_94" tei_id="morph_2.4.8-seg">jeden</word> + <word id="word_95" tei_id="morph_2.4.9-seg">z</word> + <word id="word_96" tei_id="morph_2.4.10-seg">nich</word> + <word id="word_97" tei_id="morph_2.4.11-seg">otrzyma</word> + <word id="word_98" tei_id="morph_2.4.12-seg">tytuł</word> + <word id="word_99" tei_id="morph_2.4.13-seg">Europejskiego</word> + <word id="word_100" tei_id="morph_2.4.14-seg">Króla</word> + <word id="word_101" tei_id="morph_2.4.15-seg">Kurkowego</word> + <word id="word_102" tei_id="morph_2.4.16-seg">odbierając</word> + <word id="word_103" tei_id="morph_2.4.17-seg">go</word> + <word id="word_104" tei_id="morph_2.4.18-seg">obecnie</word> + <word id="word_105" tei_id="morph_2.4.19-seg">panującemu</word> + <word id="word_106" tei_id="morph_2.4.20-seg">Wilfriedowi</word> + <word id="word_107" tei_id="morph_2.4.21-seg">Stammermannowi</word> + <word id="word_108" tei_id="morph_2.4.22-seg">.</word> + <word id="word_109" tei_id="morph_2.4.23-seg">-</word> + <word id="word_110" tei_id="morph_2.4.24-seg">Król</word> + <word id="word_111" tei_id="morph_2.4.25-seg">nie</word> + <word id="word_112" tei_id="morph_2.4.26-seg">otrzymuje</word> + <word id="word_113" tei_id="morph_2.4.27-seg">żadnych</word> + <word id="word_114" tei_id="morph_2.4.28-seg">nagród</word> + <word id="word_115" tei_id="morph_2.4.29-seg">finansowych</word> + <word id="word_116" tei_id="morph_2.4.30-seg">,</word> + <word id="word_117" tei_id="morph_2.4.31-seg">ale</word> + <word id="word_118" tei_id="morph_2.4.32-seg">taki</word> + <word id="word_119" tei_id="morph_2.4.33-seg">tytuł</word> + <word id="word_120" tei_id="morph_2.4.34-seg">jest</word> + <word id="word_121" tei_id="morph_2.4.35-seg">ogromnym</word> + <word id="word_122" tei_id="morph_2.4.36-seg">zaszczytem</word> + <word id="word_123" tei_id="morph_2.4.37-seg">;</word> + <word id="word_124" tei_id="morph_2.4.38-seg">król</word> + <word id="word_125" tei_id="morph_2.4.39-seg">jest</word> + <word id="word_126" tei_id="morph_2.4.40-seg">np</word> + <word id="word_127" tei_id="morph_2.4.41-seg">.</word> + <word id="word_128" tei_id="morph_2.4.42-seg">zapraszany</word> + <word id="word_129" tei_id="morph_2.4.43-seg">na</word> + <word id="word_130" tei_id="morph_2.4.44-seg">posiedzenia</word> + <word id="word_131" tei_id="morph_2.4.45-seg">Parlamentu</word> + <word id="word_132" tei_id="morph_2.4.46-seg">Europejskiego</word> + <word id="word_133" tei_id="morph_2.4.47-seg">-</word> + <word id="word_134" tei_id="morph_2.4.48-seg">mówi</word> + <word id="word_135" tei_id="morph_2.4.49-seg">Zdzisław</word> + <word id="word_136" tei_id="morph_2.4.50-seg">Maj</word> + <word id="word_137" tei_id="morph_2.4.51-seg" lastinpar="true">.</word> + <word id="word_138" tei_id="morph_3.5.1-seg">Największą</word> + <word id="word_139" tei_id="morph_3.5.2-seg">atrakcją</word> + <word id="word_140" tei_id="morph_3.5.3-seg">12</word> + <word id="word_141" tei_id="morph_3.5.4-seg">.</word> + <word id="word_142" tei_id="morph_3.6.1-seg">Europejskich</word> + <word id="word_143" tei_id="morph_3.6.2-seg">Spotkań</word> + <word id="word_144" tei_id="morph_3.6.3-seg">Bractw</word> + <word id="word_145" tei_id="morph_3.6.4-seg">Strzeleckich</word> + <word id="word_146" tei_id="morph_3.6.5-seg">będzie</word> + <word id="word_147" tei_id="morph_3.6.6-seg">wielka</word> + <word id="word_148" tei_id="morph_3.6.7-seg">parada</word> + <word id="word_149" tei_id="morph_3.6.8-seg">,</word> + <word id="word_150" tei_id="morph_3.6.9-seg">która</word> + <word id="word_151" tei_id="morph_3.6.10-seg">rozpocznie</word> + <word id="word_152" tei_id="morph_3.6.11-seg">się</word> + <word id="word_153" tei_id="morph_3.6.12-seg">w</word> + <word id="word_154" tei_id="morph_3.6.13-seg">niedzielę</word> + <word id="word_155" tei_id="morph_3.6.14-seg">o</word> + <word id="word_156" tei_id="morph_3.6.15-seg">godz</word> + <word id="word_157" tei_id="morph_3.6.16-seg">.</word> + <word id="word_158" tei_id="morph_3.6.17-seg">13</word> + <word id="word_159" tei_id="morph_3.6.18-seg">.</word> + <word id="word_160" tei_id="morph_3.7.1-seg">Kilkuset</word> + <word id="word_161" tei_id="morph_3.7.2-seg">braci</word> + <word id="word_162" tei_id="morph_3.7.3-seg">w</word> + <word id="word_163" tei_id="morph_3.7.4-seg">historycznych</word> + <word id="word_164" tei_id="morph_3.7.5-seg">strojach</word> + <word id="word_165" tei_id="morph_3.7.6-seg">przejdzie</word> + <word id="word_166" tei_id="morph_3.7.7-seg">z</word> + <word id="word_167" tei_id="morph_3.7.8-seg">Błoń</word> + <word id="word_168" tei_id="morph_3.7.9-seg">na</word> + <word id="word_169" tei_id="morph_3.7.10-seg">Rynek</word> + <word id="word_170" tei_id="morph_3.7.11-seg">ulicami</word> + <word id="word_171" tei_id="morph_3.7.12-seg">:</word> + <word id="word_172" tei_id="morph_3.7.13-seg">Piłsudskiego</word> + <word id="word_173" tei_id="morph_3.7.14-seg">,</word> + <word id="word_174" tei_id="morph_3.7.15-seg">Straszewskiego</word> + <word id="word_175" tei_id="morph_3.7.16-seg">,</word> + <word id="word_176" tei_id="morph_3.7.17-seg">Franciszkańską</word> + <word id="word_177" tei_id="morph_3.7.18-seg">i</word> + <word id="word_178" tei_id="morph_3.7.19-seg">Grodzką</word> + <word id="word_179" tei_id="morph_3.7.20-seg" lastinpar="true">.</word> + <word id="word_180" tei_id="morph_4.8.1-seg">Początki</word> + <word id="word_181" tei_id="morph_4.8.2-seg">istnienia</word> + <word id="word_182" tei_id="morph_4.8.3-seg">Bractwa</word> + <word id="word_183" tei_id="morph_4.8.4-seg">Kurkowego</word> + <word id="word_184" tei_id="morph_4.8.5-seg">w</word> + <word id="word_185" tei_id="morph_4.8.6-seg">Krakowie</word> + <word id="word_186" tei_id="morph_4.8.7-seg">sięgają</word> + <word id="word_187" tei_id="morph_4.8.8-seg">XIII</word> + <word id="word_188" tei_id="morph_4.8.9-seg">wieku</word> + <word id="word_189" tei_id="morph_4.8.10-seg">.</word> + <word id="word_190" tei_id="morph_4.9.1-seg">Skupiało</word> + <word id="word_191" tei_id="morph_4.9.2-seg">ono</word> + <word id="word_192" tei_id="morph_4.9.3-seg">znamienitych</word> + <word id="word_193" tei_id="morph_4.9.4-seg">obywateli</word> + <word id="word_194" tei_id="morph_4.9.5-seg">,</word> + <word id="word_195" tei_id="morph_4.9.6-seg">kupców</word> + <word id="word_196" tei_id="morph_4.9.7-seg">i</word> + <word id="word_197" tei_id="morph_4.9.8-seg">rzemieślników</word> + <word id="word_198" tei_id="morph_4.9.9-seg">pragnących</word> + <word id="word_199" tei_id="morph_4.9.10-seg">wspomóc</word> + <word id="word_200" tei_id="morph_4.9.11-seg">obronność</word> + <word id="word_201" tei_id="morph_4.9.12-seg">miasta</word> + <word id="word_202" tei_id="morph_4.9.13-seg">.</word> + <word id="word_203" tei_id="morph_4.10.1-seg">Wielkim</word> + <word id="word_204" tei_id="morph_4.10.2-seg">świętem</word> + <word id="word_205" tei_id="morph_4.10.3-seg">bractwa</word> + <word id="word_206" tei_id="morph_4.10.4-seg">był</word> + <word id="word_207" tei_id="morph_4.10.5-seg">turniej</word> + <word id="word_208" tei_id="morph_4.10.6-seg">,</word> + <word id="word_209" tei_id="morph_4.10.7-seg">który</word> + <word id="word_210" tei_id="morph_4.10.8-seg">odbywał</word> + <word id="word_211" tei_id="morph_4.10.9-seg">się</word> + <word id="word_212" tei_id="morph_4.10.10-seg">na</word> + <word id="word_213" tei_id="morph_4.10.11-seg">strzelnicy</word> + <word id="word_214" tei_id="morph_4.10.12-seg">zwanej</word> + <word id="word_215" tei_id="morph_4.10.13-seg">Celestatem</word> + <word id="word_216" tei_id="morph_4.10.14-seg">.</word> + <word id="word_217" tei_id="morph_4.11.1-seg">Zawody</word> + <word id="word_218" tei_id="morph_4.11.2-seg">trwały</word> + <word id="word_219" tei_id="morph_4.11.3-seg">zwykle</word> + <word id="word_220" tei_id="morph_4.11.4-seg">trzy</word> + <word id="word_221" tei_id="morph_4.11.5-seg">dni</word> + <word id="word_222" tei_id="morph_4.11.6-seg">.</word> + <word id="word_223" tei_id="morph_4.12.1-seg">Strzelano</word> + <word id="word_224" tei_id="morph_4.12.2-seg">do</word> + <word id="word_225" tei_id="morph_4.12.3-seg">drewnianego</word> + <word id="word_226" tei_id="morph_4.12.4-seg">kura</word> + <word id="word_227" tei_id="morph_4.12.5-seg">umocowanego</word> + <word id="word_228" tei_id="morph_4.12.6-seg">na</word> + <word id="word_229" tei_id="morph_4.12.7-seg">wysokiej</word> + <word id="word_230" tei_id="morph_4.12.8-seg">żerdzi</word> + <word id="word_231" tei_id="morph_4.12.9-seg">.</word> + <word id="word_232" tei_id="morph_4.13.1-seg">Brat</word> + <word id="word_233" tei_id="morph_4.13.2-seg">,</word> + <word id="word_234" tei_id="morph_4.13.3-seg">który</word> + <word id="word_235" tei_id="morph_4.13.4-seg">zdołał</word> + <word id="word_236" tei_id="morph_4.13.5-seg">celnym</word> + <word id="word_237" tei_id="morph_4.13.6-seg">strzałem</word> + <word id="word_238" tei_id="morph_4.13.7-seg">strącić</word> + <word id="word_239" tei_id="morph_4.13.8-seg">ostatni</word> + <word id="word_240" tei_id="morph_4.13.9-seg">jego</word> + <word id="word_241" tei_id="morph_4.13.10-seg">fragment</word> + <word id="word_242" tei_id="morph_4.13.11-seg">zdobywał</word> + <word id="word_243" tei_id="morph_4.13.12-seg">miano</word> + <word id="word_244" tei_id="morph_4.13.13-seg">Króla</word> + <word id="word_245" tei_id="morph_4.13.14-seg">Kurkowego</word> + <word id="word_246" tei_id="morph_4.13.15-seg">.</word> + <word id="word_247" tei_id="morph_4.14.1-seg">Z</word> + <word id="word_248" tei_id="morph_4.14.2-seg">tym</word> + <word id="word_249" tei_id="morph_4.14.3-seg">tytułem</word> + <word id="word_250" tei_id="morph_4.14.4-seg">wiązały</word> + <word id="word_251" tei_id="morph_4.14.5-seg">się</word> + <word id="word_252" tei_id="morph_4.14.6-seg">nie</word> + <word id="word_253" tei_id="morph_4.14.7-seg">tylko</word> + <word id="word_254" tei_id="morph_4.14.8-seg">honory</word> + <word id="word_255" tei_id="morph_4.14.9-seg">,</word> + <word id="word_256" tei_id="morph_4.14.10-seg">ale</word> + <word id="word_257" tei_id="morph_4.14.11-seg">także</word> + <word id="word_258" tei_id="morph_4.14.12-seg">przywileje</word> + <word id="word_259" tei_id="morph_4.14.13-seg">:</word> + <word id="word_260" tei_id="morph_4.14.14-seg">Rada</word> + <word id="word_261" tei_id="morph_4.14.15-seg">Miejska</word> + <word id="word_262" tei_id="morph_4.14.16-seg">zwalniała</word> + <word id="word_263" tei_id="morph_4.14.17-seg">jego</word> + <word id="word_264" tei_id="morph_4.14.18-seg">posiadacza</word> + <word id="word_265" tei_id="morph_4.14.19-seg">m</word> + <word id="word_266" tei_id="morph_4.14.20-seg">.</word> + <word id="word_267" tei_id="morph_4.14.21-seg">in</word> + <word id="word_268" tei_id="morph_4.14.22-seg">.</word> + <word id="word_269" tei_id="morph_4.14.23-seg">z</word> + <word id="word_270" tei_id="morph_4.14.24-seg">obowiązku</word> + <word id="word_271" tei_id="morph_4.14.25-seg">płacenia</word> + <word id="word_272" tei_id="morph_4.14.26-seg">podatków</word> + <word id="word_273" tei_id="morph_4.14.27-seg">(</word> + <word id="word_274" tei_id="morph_4.14.28-seg">ten</word> + <word id="word_275" tei_id="morph_4.14.29-seg">zwyczaj</word> + <word id="word_276" tei_id="morph_4.14.30-seg">utrzymał</word> + <word id="word_277" tei_id="morph_4.14.31-seg">się</word> + <word id="word_278" tei_id="morph_4.14.32-seg">do</word> + <word id="word_279" tei_id="morph_4.14.33-seg">dziś</word> + <word id="word_280" tei_id="morph_4.14.34-seg">)</word> + <word id="word_281" tei_id="morph_4.14.35-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/1.mmax a/src/test/resources/teksty_mmax/teksty_gold/1.mmax new file mode 100755 index 0000000..5e74016 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/1.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>1_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/1_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/1_mentions.xml new file mode 100755 index 0000000..319dae7 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/1_mentions.xml @@ -0,0 +1,79 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_4" mmax_level="mention" mention_head="Wrocławiu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_6" mmax_level="mention" mention_head="kontrolerów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_5..word_6" mmax_level="mention" mention_head="płace" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_12..word_13" mmax_level="mention" mention_head="wezwań" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_15" mmax_level="mention" mention_head="zapłaty" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_26..word_27" mmax_level="mention" mention_head="pracowników" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_29..word_30" mmax_level="mention" mention_head="razy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_35" mmax_level="mention" mention_head="premie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_41" mmax_level="mention" mention_head="skuteczność" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_45" mmax_level="mention" mention_head="pasażerów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_43..word_45" mmax_level="mention" mention_head="skargi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_40..word_45" mmax_level="mention" mention_head="uwagę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_48..word_49" mmax_level="mention" mention_head="Monika" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_52" mmax_level="mention" mention_head="Wydziału" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_52..word_53" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_56" mmax_level="mention" mention_head="Miasta" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_17" span="word_55..word_56" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_51..word_56" mmax_level="mention" mention_head="kierownik" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_19" span="word_58..word_59" mmax_level="mention" mention_head="kontrolerzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_67" mmax_level="mention" mention_head="złotych" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_21" span="word_73" mmax_level="mention" mention_head="Miasto" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_22" span="word_78..word_79" mmax_level="mention" mention_head="spółki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_86..word_87" mmax_level="mention" mention_head="projekt" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_24" span="word_92" mmax_level="mention" mention_head="udziałów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_91..word_92" mmax_level="mention" mention_head="wniesienie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_95..word_96" mmax_level="mention" mention_head="Budownictwa" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_27" span="word_97..word_99" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_94..word_99" mmax_level="mention" mention_head="Towarzystwie" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_29" span="word_102..word_103" mmax_level="mention" mention_head="Budownictwa" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_30" span="word_101..word_103" mmax_level="mention" mention_head="Towarzystwie" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_31" span="word_104..word_107" mmax_level="mention" mention_head="Dom" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_101..word_107" mmax_level="mention" mention_head="" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_33" span="word_111..word_112" mmax_level="mention" mention_head="Budownictwa" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_34" span="word_109..word_112" mmax_level="mention" mention_head="Towarzystwa" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_35" span="word_115" mmax_level="mention" mention_head="piątek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_117..word_118" mmax_level="mention" mention_head="propozycję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_121..word_122" mmax_level="mention" mention_head="Gospodarki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_120..word_122" mmax_level="mention" mention_head="Komisja" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_39" span="word_124..word_125" mmax_level="mention" mention_head="Polityki" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_40" span="word_120..word_125" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_129" mmax_level="mention" mention_head="wtorek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_132" mmax_level="mention" mention_head="nią" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_43" span="word_133..word_134" mmax_level="mention" mention_head="Rada" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_44" span="word_138" mmax_level="mention" mention_head="połączenia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_137..word_138" mmax_level="mention" mention_head="Pomysł" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_46" span="word_142" mmax_level="mention" mention_head="wątpliwości" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_147" mmax_level="mention" mention_head="kosztów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_151" mmax_level="mention" mention_head="Lewandowski" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_150..word_151" mmax_level="mention" mention_head="Tomasz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_154" mmax_level="mention" mention_head="LiD" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_157" mmax_level="mention" mention_head="komisji" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_52" span="word_156..word_157" mmax_level="mention" mention_head="członek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_153..word_157" mmax_level="mention" mention_head="radny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_163" mmax_level="mention" mention_head="dyskusja" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_55" span="word_166" mmax_level="mention" mention_head="towarzystw" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_56" span="word_165..word_166" mmax_level="mention" mention_head="przyszłości" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_169" mmax_level="mention" mention_head="rząd" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_173" mmax_level="mention" mention_head="ustawy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_172..word_173" mmax_level="mention" mention_head="zmianą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_178" mmax_level="mention" mention_head="mieszkań" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_61" span="word_181..word_182" mmax_level="mention" mention_head="budownictwa" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_62" span="word_180..word_182" mmax_level="mention" mention_head="towarzystwach" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_63" span="word_186..word_188" mmax_level="mention" mention_head="sytuację" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_197..word_198" mmax_level="mention" mention_head="kroków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_196..word_198" mmax_level="mention" mention_head="podjęcie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_200" mmax_level="mention" mention_head="miasto" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_67" span="word_202..word_203" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_206" mmax_level="mention" mention_head="klubu" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_69" span="word_205..word_206" mmax_level="mention" mention_head="szef" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_70" span="word_208" mmax_level="mention" mention_head="PiS" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_214..word_215" mmax_level="mention" mention_head="względów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_217..word_218" mmax_level="mention" mention_head="spółki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_73" span="word_216..word_218" mmax_level="mention" mention_head="utworzenie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_74" span="word_241..word_242" mmax_level="mention" mention_head="klub" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_75" span="word_248..word_250" mmax_level="mention" mention_head="uchwały" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/1_words.xml a/src/test/resources/teksty_mmax/teksty_gold/1_words.xml new file mode 100755 index 0000000..739e577 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/1_words.xml @@ -0,0 +1,255 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Z</word> + <word id="word_2" tei_id="morph_1.1.2-seg">kolei</word> + <word id="word_3" tei_id="morph_1.1.3-seg">we</word> + <word id="word_4" tei_id="morph_1.1.4-seg">Wrocławiu</word> + <word id="word_5" tei_id="morph_1.1.5-seg">płace</word> + <word id="word_6" tei_id="morph_1.1.6-seg">kontrolerów</word> + <word id="word_7" tei_id="morph_1.1.7-seg">zostały</word> + <word id="word_8" tei_id="morph_1.1.8-seg">zupełnie</word> + <word id="word_9" tei_id="morph_1.1.9-seg">uniezależnione</word> + <word id="word_10" tei_id="morph_1.1.10-seg">od</word> + <word id="word_11" tei_id="morph_1.1.11-seg">liczby</word> + <word id="word_12" tei_id="morph_1.1.12-seg">wystawionych</word> + <word id="word_13" tei_id="morph_1.1.13-seg">wezwań</word> + <word id="word_14" tei_id="morph_1.1.14-seg">do</word> + <word id="word_15" tei_id="morph_1.1.15-seg">zapłaty</word> + <word id="word_16" tei_id="morph_1.1.16-seg" lastinpar="true">.</word> + <word id="word_17" tei_id="morph_2.2.1-seg">-</word> + <word id="word_18" tei_id="morph_2.2.2-seg">Nie</word> + <word id="word_19" tei_id="morph_2.2.3-seg">oznacza</word> + <word id="word_20" tei_id="morph_2.2.4-seg">to</word> + <word id="word_21" tei_id="morph_2.2.5-seg">jednak</word> + <word id="word_22" tei_id="morph_2.2.6-seg">,</word> + <word id="word_23" tei_id="morph_2.2.7-seg">że</word> + <word id="word_24" tei_id="morph_2.2.8-seg">nie</word> + <word id="word_25" tei_id="morph_2.2.9-seg">nagradzamy</word> + <word id="word_26" tei_id="morph_2.2.10-seg">najskuteczniejszych</word> + <word id="word_27" tei_id="morph_2.2.11-seg">pracowników</word> + <word id="word_28" tei_id="morph_2.2.12-seg">.</word> + <word id="word_29" tei_id="morph_2.3.1-seg">Kilka</word> + <word id="word_30" tei_id="morph_2.3.2-seg">razy</word> + <word id="word_31" tei_id="morph_2.3.3-seg">w</word> + <word id="word_32" tei_id="morph_2.3.4-seg">roku</word> + <word id="word_33" tei_id="morph_2.3.5-seg">przyznawane</word> + <word id="word_34" tei_id="morph_2.3.6-seg">są</word> + <word id="word_35" tei_id="morph_2.3.7-seg">premie</word> + <word id="word_36" tei_id="morph_2.3.8-seg">.</word> + <word id="word_37" tei_id="morph_2.4.1-seg">Bierzemy</word> + <word id="word_38" tei_id="morph_2.4.2-seg">wtedy</word> + <word id="word_39" tei_id="morph_2.4.3-seg">pod</word> + <word id="word_40" tei_id="morph_2.4.4-seg">uwagę</word> + <word id="word_41" tei_id="morph_2.4.5-seg">skuteczność</word> + <word id="word_42" tei_id="morph_2.4.6-seg">i</word> + <word id="word_43" tei_id="morph_2.4.7-seg">ewentualne</word> + <word id="word_44" tei_id="morph_2.4.8-seg">skargi</word> + <word id="word_45" tei_id="morph_2.4.9-seg">pasażerów</word> + <word id="word_46" tei_id="morph_2.4.10-seg">-</word> + <word id="word_47" tei_id="morph_2.4.11-seg">wyjaśnia</word> + <word id="word_48" tei_id="morph_2.4.12-seg">Monika</word> + <word id="word_49" tei_id="morph_2.4.13-seg">Poważna</word> + <word id="word_50" tei_id="morph_2.4.14-seg">,</word> + <word id="word_51" tei_id="morph_2.4.15-seg">kierownik</word> + <word id="word_52" tei_id="morph_2.4.16-seg">Wydziału</word> + <word id="word_53" tei_id="morph_2.4.17-seg">Transportu</word> + <word id="word_54" tei_id="morph_2.4.18-seg">wrocławskiego</word> + <word id="word_55" tei_id="morph_2.4.19-seg">Urzędu</word> + <word id="word_56" tei_id="morph_2.4.20-seg">Miasta</word> + <word id="word_57" tei_id="morph_2.4.21-seg" lastinpar="true">.</word> + <word id="word_58" tei_id="morph_3.5.1-seg">Tamtejsi</word> + <word id="word_59" tei_id="morph_3.5.2-seg">kontrolerzy</word> + <word id="word_60" tei_id="morph_3.5.3-seg">zarabiają</word> + <word id="word_61" tei_id="morph_3.5.4-seg">(</word> + <word id="word_62" tei_id="morph_3.5.5-seg">bez</word> + <word id="word_63" tei_id="morph_3.5.6-seg">premii</word> + <word id="word_64" tei_id="morph_3.5.7-seg">)</word> + <word id="word_65" tei_id="morph_3.5.8-seg">około</word> + <word id="word_66" tei_id="morph_3.5.9-seg">1200</word> + <word id="word_67" tei_id="morph_3.5.10-seg">złotych</word> + <word id="word_68" tei_id="morph_3.5.11-seg">miesięcznie</word> + <word id="word_69" tei_id="morph_3.5.12-seg">(</word> + <word id="word_70" tei_id="morph_3.5.13-seg">netto</word> + <word id="word_71" tei_id="morph_3.5.14-seg">)</word> + <word id="word_72" tei_id="morph_3.5.15-seg" lastinpar="true">.</word> + <word id="word_73" tei_id="morph_4.6.1-seg">Miasto</word> + <word id="word_74" tei_id="morph_4.6.2-seg">postanowiło</word> + <word id="word_75" tei_id="morph_4.6.3-seg">za</word> + <word id="word_76" tei_id="morph_4.6.4-seg">jednym</word> + <word id="word_77" tei_id="morph_4.6.5-seg">zamachem</word> + <word id="word_78" tei_id="morph_4.6.6-seg">trzy</word> + <word id="word_79" tei_id="morph_4.6.7-seg">spółki</word> + <word id="word_80" tei_id="morph_4.6.8-seg">połączyć</word> + <word id="word_81" tei_id="morph_4.6.9-seg">w</word> + <word id="word_82" tei_id="morph_4.6.10-seg">jedną</word> + <word id="word_83" tei_id="morph_4.6.11-seg">.</word> + <word id="word_84" tei_id="morph_4.7.1-seg">Przygotowany</word> + <word id="word_85" tei_id="morph_4.7.2-seg">jest</word> + <word id="word_86" tei_id="morph_4.7.3-seg">projekt</word> + <word id="word_87" tei_id="morph_4.7.4-seg">uchwały</word> + <word id="word_88" tei_id="morph_4.7.5-seg">,</word> + <word id="word_89" tei_id="morph_4.7.6-seg">który</word> + <word id="word_90" tei_id="morph_4.7.7-seg">przewiduje</word> + <word id="word_91" tei_id="morph_4.7.8-seg">wniesienie</word> + <word id="word_92" tei_id="morph_4.7.9-seg">udziałów</word> + <word id="word_93" tei_id="morph_4.7.10-seg">w</word> + <word id="word_94" tei_id="morph_4.7.11-seg">Towarzystwie</word> + <word id="word_95" tei_id="morph_4.7.12-seg">Budownictwa</word> + <word id="word_96" tei_id="morph_4.7.13-seg">Społecznego</word> + <word id="word_97" tei_id="morph_4.7.14-seg">"</word> + <word id="word_98" tei_id="morph_4.7.15-seg">Wielkopolska</word> + <word id="word_99" tei_id="morph_4.7.16-seg">"</word> + <word id="word_100" tei_id="morph_4.7.17-seg">oraz</word> + <word id="word_101" tei_id="morph_4.7.18-seg">Towarzystwie</word> + <word id="word_102" tei_id="morph_4.7.19-seg">Budownictwa</word> + <word id="word_103" tei_id="morph_4.7.20-seg">Społecznego</word> + <word id="word_104" tei_id="morph_4.7.21-seg">"</word> + <word id="word_105" tei_id="morph_4.7.22-seg">Nasz</word> + <word id="word_106" tei_id="morph_4.7.23-seg">Dom</word> + <word id="word_107" tei_id="morph_4.7.24-seg">"</word> + <word id="word_108" tei_id="morph_4.7.25-seg">do</word> + <word id="word_109" tei_id="morph_4.7.26-seg">Poznańskiego</word> + <word id="word_110" tei_id="morph_4.7.27-seg">Towarzystwa</word> + <word id="word_111" tei_id="morph_4.7.28-seg">Budownictwa</word> + <word id="word_112" tei_id="morph_4.7.29-seg">Społecznego</word> + <word id="word_113" tei_id="morph_4.7.30-seg">.</word> + <word id="word_114" tei_id="morph_4.8.1-seg">W</word> + <word id="word_115" tei_id="morph_4.8.2-seg">piątek</word> + <word id="word_116" tei_id="morph_4.8.3-seg">opiniować</word> + <word id="word_117" tei_id="morph_4.8.4-seg">tę</word> + <word id="word_118" tei_id="morph_4.8.5-seg">propozycję</word> + <word id="word_119" tei_id="morph_4.8.6-seg">będzie</word> + <word id="word_120" tei_id="morph_4.8.7-seg">Komisja</word> + <word id="word_121" tei_id="morph_4.8.8-seg">Gospodarki</word> + <word id="word_122" tei_id="morph_4.8.9-seg">Komunalnej</word> + <word id="word_123" tei_id="morph_4.8.10-seg">i</word> + <word id="word_124" tei_id="morph_4.8.11-seg">Polityki</word> + <word id="word_125" tei_id="morph_4.8.12-seg">Mieszkaniowej</word> + <word id="word_126" tei_id="morph_4.8.13-seg">,</word> + <word id="word_127" tei_id="morph_4.8.14-seg">a</word> + <word id="word_128" tei_id="morph_4.8.15-seg">we</word> + <word id="word_129" tei_id="morph_4.8.16-seg">wtorek</word> + <word id="word_130" tei_id="morph_4.8.17-seg">zajmie</word> + <word id="word_131" tei_id="morph_4.8.18-seg">się</word> + <word id="word_132" tei_id="morph_4.8.19-seg">nią</word> + <word id="word_133" tei_id="morph_4.8.20-seg">Rada</word> + <word id="word_134" tei_id="morph_4.8.21-seg">Miasta</word> + <word id="word_135" tei_id="morph_4.8.22-seg" lastinpar="true">.</word> + <word id="word_136" tei_id="morph_5.9.1-seg">-</word> + <word id="word_137" tei_id="morph_5.9.2-seg">Pomysł</word> + <word id="word_138" tei_id="morph_5.9.3-seg">połączenia</word> + <word id="word_139" tei_id="morph_5.9.4-seg">TBS-ów</word> + <word id="word_140" tei_id="morph_5.9.5-seg">nie</word> + <word id="word_141" tei_id="morph_5.9.6-seg">budzi</word> + <word id="word_142" tei_id="morph_5.9.7-seg">wątpliwości</word> + <word id="word_143" tei_id="morph_5.9.8-seg">z</word> + <word id="word_144" tei_id="morph_5.9.9-seg">punktu</word> + <word id="word_145" tei_id="morph_5.9.10-seg">widzenia</word> + <word id="word_146" tei_id="morph_5.9.11-seg">racjonalizacji</word> + <word id="word_147" tei_id="morph_5.9.12-seg">kosztów</word> + <word id="word_148" tei_id="morph_5.9.13-seg">-</word> + <word id="word_149" tei_id="morph_5.9.14-seg">twierdzi</word> + <word id="word_150" tei_id="morph_5.9.15-seg">Tomasz</word> + <word id="word_151" tei_id="morph_5.9.16-seg">Lewandowski</word> + <word id="word_152" tei_id="morph_5.9.17-seg">,</word> + <word id="word_153" tei_id="morph_5.9.18-seg">radny</word> + <word id="word_154" tei_id="morph_5.9.19-seg">LiD</word> + <word id="word_155" tei_id="morph_5.9.20-seg">i</word> + <word id="word_156" tei_id="morph_5.9.21-seg">członek</word> + <word id="word_157" tei_id="morph_5.9.22-seg">komisji</word> + <word id="word_158" tei_id="morph_5.9.23-seg">.</word> + <word id="word_159" tei_id="morph_5.9.24-seg">-</word> + <word id="word_160" tei_id="morph_5.9.25-seg">Potrzebna</word> + <word id="word_161" tei_id="morph_5.9.26-seg">jest</word> + <word id="word_162" tei_id="morph_5.9.27-seg">jednak</word> + <word id="word_163" tei_id="morph_5.9.28-seg">dyskusja</word> + <word id="word_164" tei_id="morph_5.9.29-seg">o</word> + <word id="word_165" tei_id="morph_5.9.30-seg">przyszłości</word> + <word id="word_166" tei_id="morph_5.9.31-seg">towarzystw</word> + <word id="word_167" tei_id="morph_5.9.32-seg">.</word> + <word id="word_168" tei_id="morph_5.10.1-seg">Obecnie</word> + <word id="word_169" tei_id="morph_5.10.2-seg">rząd</word> + <word id="word_170" tei_id="morph_5.10.3-seg">pracuje</word> + <word id="word_171" tei_id="morph_5.10.4-seg">nad</word> + <word id="word_172" tei_id="morph_5.10.5-seg">zmianą</word> + <word id="word_173" tei_id="morph_5.10.6-seg">ustawy</word> + <word id="word_174" tei_id="morph_5.10.7-seg">,</word> + <word id="word_175" tei_id="morph_5.10.8-seg">która</word> + <word id="word_176" tei_id="morph_5.10.9-seg">przewiduje</word> + <word id="word_177" tei_id="morph_5.10.10-seg">wykup</word> + <word id="word_178" tei_id="morph_5.10.11-seg">mieszkań</word> + <word id="word_179" tei_id="morph_5.10.12-seg">w</word> + <word id="word_180" tei_id="morph_5.10.13-seg">towarzystwach</word> + <word id="word_181" tei_id="morph_5.10.14-seg">budownictwa</word> + <word id="word_182" tei_id="morph_5.10.15-seg">społecznego</word> + <word id="word_183" tei_id="morph_5.10.16-seg">.</word> + <word id="word_184" tei_id="morph_5.11.1-seg">To</word> + <word id="word_185" tei_id="morph_5.11.2-seg">stworzy</word> + <word id="word_186" tei_id="morph_5.11.3-seg">zupełnie</word> + <word id="word_187" tei_id="morph_5.11.4-seg">nową</word> + <word id="word_188" tei_id="morph_5.11.5-seg">sytuację</word> + <word id="word_189" tei_id="morph_5.11.6-seg">.</word> + <word id="word_190" tei_id="morph_5.12.1-seg">W</word> + <word id="word_191" tei_id="morph_5.12.2-seg">związku</word> + <word id="word_192" tei_id="morph_5.12.3-seg">z</word> + <word id="word_193" tei_id="morph_5.12.4-seg">tym</word> + <word id="word_194" tei_id="morph_5.12.5-seg">konieczne</word> + <word id="word_195" tei_id="morph_5.12.6-seg">będzie</word> + <word id="word_196" tei_id="morph_5.12.7-seg">podjęcie</word> + <word id="word_197" tei_id="morph_5.12.8-seg">odpowiednich</word> + <word id="word_198" tei_id="morph_5.12.9-seg">kroków</word> + <word id="word_199" tei_id="morph_5.12.10-seg">przez</word> + <word id="word_200" tei_id="morph_5.12.11-seg">miasto</word> + <word id="word_201" tei_id="morph_5.12.12-seg" lastinpar="true">.</word> + <word id="word_202" tei_id="morph_6.13.1-seg">Norbert</word> + <word id="word_203" tei_id="morph_6.13.2-seg">Napieraj</word> + <word id="word_204" tei_id="morph_6.13.3-seg">,</word> + <word id="word_205" tei_id="morph_6.13.4-seg">szef</word> + <word id="word_206" tei_id="morph_6.13.5-seg">klubu</word> + <word id="word_207" tei_id="morph_6.13.6-seg">radnych</word> + <word id="word_208" tei_id="morph_6.13.7-seg">PiS</word> + <word id="word_209" tei_id="morph_6.13.8-seg">również</word> + <word id="word_210" tei_id="morph_6.13.9-seg">uważa</word> + <word id="word_211" tei_id="morph_6.13.10-seg">,</word> + <word id="word_212" tei_id="morph_6.13.11-seg">że</word> + <word id="word_213" tei_id="morph_6.13.12-seg">ze</word> + <word id="word_214" tei_id="morph_6.13.13-seg">względów</word> + <word id="word_215" tei_id="morph_6.13.14-seg">ekonomicznych</word> + <word id="word_216" tei_id="morph_6.13.15-seg">utworzenie</word> + <word id="word_217" tei_id="morph_6.13.16-seg">jednej</word> + <word id="word_218" tei_id="morph_6.13.17-seg">spółki</word> + <word id="word_219" tei_id="morph_6.13.18-seg">jest</word> + <word id="word_220" tei_id="morph_6.13.19-seg">zasadne</word> + <word id="word_221" tei_id="morph_6.13.20-seg" lastinpar="true">.</word> + <word id="word_222" tei_id="morph_7.14.1-seg">-</word> + <word id="word_223" tei_id="morph_7.14.2-seg">Na</word> + <word id="word_224" tei_id="morph_7.14.3-seg">razie</word> + <word id="word_225" tei_id="morph_7.14.4-seg">jest</word> + <word id="word_226" tei_id="morph_7.14.5-seg">to</word> + <word id="word_227" tei_id="morph_7.14.6-seg">jednak</word> + <word id="word_228" tei_id="morph_7.14.7-seg">luźny</word> + <word id="word_229" tei_id="morph_7.14.8-seg">pomysł</word> + <word id="word_230" tei_id="morph_7.14.9-seg">.</word> + <word id="word_231" tei_id="morph_7.15.1-seg">Nie</word> + <word id="word_232" tei_id="morph_7.15.2-seg">ma</word> + <word id="word_233" tei_id="morph_7.15.3-seg">konkretów</word> + <word id="word_234" tei_id="morph_7.15.4-seg">-</word> + <word id="word_235" tei_id="morph_7.15.5-seg">dodaje</word> + <word id="word_236" tei_id="morph_7.15.6-seg">N</word> + <word id="word_237" tei_id="morph_7.15.7-seg">.</word> + <word id="word_238" tei_id="morph_7.16.1-seg">Napieraj</word> + <word id="word_239" tei_id="morph_7.16.2-seg">.</word> + <word id="word_240" tei_id="morph_7.16.3-seg">-</word> + <word id="word_241" tei_id="morph_7.16.4-seg">Nasz</word> + <word id="word_242" tei_id="morph_7.16.5-seg">klub</word> + <word id="word_243" tei_id="morph_7.16.6-seg">jeszcze</word> + <word id="word_244" tei_id="morph_7.16.7-seg">nie</word> + <word id="word_245" tei_id="morph_7.16.8-seg">wypracował</word> + <word id="word_246" tei_id="morph_7.16.9-seg">w</word> + <word id="word_247" tei_id="morph_7.16.10-seg">sprawie</word> + <word id="word_248" tei_id="morph_7.16.11-seg">tej</word> + <word id="word_249" tei_id="morph_7.16.12-seg">uchwały</word> + <word id="word_250" tei_id="morph_7.16.13-seg">stanowiska</word> + <word id="word_251" tei_id="morph_7.16.14-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/2.mmax a/src/test/resources/teksty_mmax/teksty_gold/2.mmax new file mode 100755 index 0000000..babfe74 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/2.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>2_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/2_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/2_mentions.xml new file mode 100755 index 0000000..6df3c30 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/2_mentions.xml @@ -0,0 +1,88 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_3" mmax_level="mention" mention_head="zatrudnienie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_5" mmax_level="mention" mention_head="Fundacji" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_3" span="word_6..word_8" mmax_level="mention" mention_head="Spotkania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_10" mmax_level="mention" mention_head="Kultury" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_5..word_10" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_17" mmax_level="mention" mention_head="spółki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_16..word_17" mmax_level="mention" mention_head="fundację" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_8" span="word_18" mmax_level="mention" mention_head="Zamek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_23" mmax_level="mention" mention_head="radnym" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_27..word_28" mmax_level="mention" mention_head="kadencji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_30" mmax_level="mention" mention_head="Gminy" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_12" span="word_29..word_30" mmax_level="mention" mention_head="Rada" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_13" span="word_29..word_31" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_35" mmax_level="mention" mention_head="wójta" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_15" span="word_38..word_40" mmax_level="mention" mention_head="r" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_44" mmax_level="mention" mention_head="wyborach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_50" mmax_level="mention" mention_head="mnie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_52" mmax_level="mention" mention_head="zostania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_53" mmax_level="mention" mention_head="samorządowcem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_58" mmax_level="mention" mention_head="życia" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_21" span="word_59" mmax_level="mention" mention_head="mężczyzny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_56..word_59" mmax_level="mention" mention_head="momencie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_70..word_71" mmax_level="mention" mention_head="życiu" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_24" span="word_74..word_75" mmax_level="mention" mention_head="kandydat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_78" mmax_level="mention" mention_head="Wójta" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_26" span="word_79" mmax_level="mention" mention_head="Pomorza" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_77..word_79" mmax_level="mention" mention_head="tytułu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_86..word_87" mmax_level="mention" mention_head="sprawy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_29" span="word_96" mmax_level="mention" mention_head="Co" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_98" mmax_level="mention" mention_head="gminy" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_31" span="word_98..word_99" mmax_level="mention" mention_head="" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_32" span="word_102..word_107" mmax_level="mention" mention_head="sukces" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_112" mmax_level="mention" mention_head="to" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_34" span="word_120..word_121" mmax_level="mention" mention_head="zagospodarowania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_119..word_121" mmax_level="mention" mention_head="plany" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_123" mmax_level="mention" mention_head="Gotowe" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_130" mmax_level="mention" mention_head="Dębek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_135..word_136" mmax_level="mention" mention_head="lat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_143" mmax_level="mention" mention_head="wójt" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_40" span="word_148" mmax_level="mention" mention_head="porażkę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_151" mmax_level="mention" mention_head="Rady" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_42" span="word_152" mmax_level="mention" mention_head="gminy" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_43" span="word_150..word_152" mmax_level="mention" mention_head="decyzję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_161" mmax_level="mention" mention_head="Gmin" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_160..word_161" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_159..word_161" mmax_level="mention" mention_head="Związku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_164" mmax_level="mention" mention_head="uporządkowania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_165..word_166" mmax_level="mention" mention_head="gospodarki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_163..word_166" mmax_level="mention" mention_head="programu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_171..word_173" mmax_level="mention" mention_head="milionów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_192" mmax_level="mention" mention_head="Henryka" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_52" span="word_192..word_193" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_195" mmax_level="mention" mention_head="narty" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_198" mmax_level="mention" mention_head="urlop" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_207..word_209" mmax_level="mention" mention_head="stoki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_212..word_213" mmax_level="mention" mention_head="zimy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_221" mmax_level="mention" mention_head="wójt" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_58" span="word_226" mmax_level="mention" mention_head="wolnego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_225..word_226" mmax_level="mention" mention_head="Czasu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_239" mmax_level="mention" mention_head="go" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_61" span="word_243" mmax_level="mention" mention_head="bliskimi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_247..word_250" mmax_level="mention" mention_head="Dziennik" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_254" mmax_level="mention" mention_head="konkursu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_252..word_254" mmax_level="mention" mention_head="edycję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_255..word_256" mmax_level="mention" mention_head="Wójt" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_66" span="word_258..word_259" mmax_level="mention" mention_head="powiat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_261..word_264" mmax_level="mention" mention_head="włodarze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_267" mmax_level="mention" mention_head="Henryk" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_69" span="word_273" mmax_level="mention" mention_head="Tadeusz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_273..word_274" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_277" mmax_level="mention" mention_head="Puck" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_276..word_277" mmax_level="mention" mention_head="gmina" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_73" span="word_280" mmax_level="mention" mention_head="Jerzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_74" span="word_280..word_281" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_75" span="word_283" mmax_level="mention" mention_head="Kosakowo" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_289" mmax_level="mention" mention_head="kolegów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_77" span="word_287..word_289" mmax_level="mention" mention_head="gronie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_78" span="word_291" mmax_level="mention" mention_head="fachu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_79" span="word_296..word_297" mmax_level="mention" mention_head="wójta" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_80" span="word_295..word_298" mmax_level="mention" mention_head="miano" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_301" mmax_level="mention" mention_head="tym" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_82" span="word_303" mmax_level="mention" mention_head="kto" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_83" span="word_307..word_308" mmax_level="mention" mention_head="głosami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_309..word_312" mmax_level="mention" mention_head="Czytelnicy" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/2_words.xml a/src/test/resources/teksty_mmax/teksty_gold/2_words.xml new file mode 100755 index 0000000..71829a1 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/2_words.xml @@ -0,0 +1,317 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Potem</word> + <word id="word_2" tei_id="morph_1.1.2-seg">znalazł</word> + <word id="word_3" tei_id="morph_1.1.3-seg">zatrudnienie</word> + <word id="word_4" tei_id="morph_1.1.4-seg">w</word> + <word id="word_5" tei_id="morph_1.1.5-seg">Fundacji</word> + <word id="word_6" tei_id="morph_1.1.6-seg">Europejskie</word> + <word id="word_7" tei_id="morph_1.1.7-seg">Spotkania</word> + <word id="word_8" tei_id="morph_1.1.8-seg">Kaszubskie</word> + <word id="word_9" tei_id="morph_1.1.9-seg">Centrum</word> + <word id="word_10" tei_id="morph_1.1.10-seg">Kultury</word> + <word id="word_11" tei_id="morph_1.1.11-seg">.</word> + <word id="word_12" tei_id="morph_1.2.1-seg">Był</word> + <word id="word_13" tei_id="morph_1.2.2-seg">prezesem</word> + <word id="word_14" tei_id="morph_1.2.3-seg">utworzonej</word> + <word id="word_15" tei_id="morph_1.2.4-seg">przez</word> + <word id="word_16" tei_id="morph_1.2.5-seg">fundację</word> + <word id="word_17" tei_id="morph_1.2.6-seg">spółki</word> + <word id="word_18" tei_id="morph_1.2.7-seg">Zamek</word> + <word id="word_19" tei_id="morph_1.2.8-seg" lastinpar="true">.</word> + <word id="word_20" tei_id="morph_2.3.1-seg">W</word> + <word id="word_21" tei_id="morph_2.3.2-seg">międzyczasie</word> + <word id="word_22" tei_id="morph_2.3.3-seg">został</word> + <word id="word_23" tei_id="morph_2.3.4-seg">radnym</word> + <word id="word_24" tei_id="morph_2.3.5-seg">.</word> + <word id="word_25" tei_id="morph_2.4.1-seg">Pod</word> + <word id="word_26" tei_id="morph_2.4.2-seg">koniec</word> + <word id="word_27" tei_id="morph_2.4.3-seg">ubiegłej</word> + <word id="word_28" tei_id="morph_2.4.4-seg">kadencji</word> + <word id="word_29" tei_id="morph_2.4.5-seg">Rada</word> + <word id="word_30" tei_id="morph_2.4.6-seg">Gminy</word> + <word id="word_31" tei_id="morph_2.4.7-seg">Krokowa</word> + <word id="word_32" tei_id="morph_2.4.8-seg">wybrała</word> + <word id="word_33" tei_id="morph_2.4.9-seg">go</word> + <word id="word_34" tei_id="morph_2.4.10-seg">na</word> + <word id="word_35" tei_id="morph_2.4.11-seg">wójta</word> + <word id="word_36" tei_id="morph_2.4.12-seg">.</word> + <word id="word_37" tei_id="morph_2.5.1-seg">Jesienią</word> + <word id="word_38" tei_id="morph_2.5.2-seg">2002</word> + <word id="word_39" tei_id="morph_2.5.3-seg">r</word> + <word id="word_40" tei_id="morph_2.5.4-seg">.</word> + <word id="word_41" tei_id="morph_2.5.5-seg">został</word> + <word id="word_42" tei_id="morph_2.5.6-seg">wójtem</word> + <word id="word_43" tei_id="morph_2.5.7-seg">w</word> + <word id="word_44" tei_id="morph_2.5.8-seg">wyborach</word> + <word id="word_45" tei_id="morph_2.5.9-seg">powszechnych</word> + <word id="word_46" tei_id="morph_2.5.10-seg" lastinpar="true">.</word> + <word id="word_47" tei_id="morph_3.6.1-seg">-</word> + <word id="word_48" tei_id="morph_3.6.2-seg">Co</word> + <word id="word_49" tei_id="morph_3.6.3-seg">skłoniło</word> + <word id="word_50" tei_id="morph_3.6.4-seg">mnie</word> + <word id="word_51" tei_id="morph_3.6.5-seg">do</word> + <word id="word_52" tei_id="morph_3.6.6-seg">zostania</word> + <word id="word_53" tei_id="morph_3.6.7-seg">samorządowcem</word> + <word id="word_54" tei_id="morph_3.6.8-seg">?</word> + <word id="word_55" tei_id="morph_3.7.1-seg">W</word> + <word id="word_56" tei_id="morph_3.7.2-seg">pewnym</word> + <word id="word_57" tei_id="morph_3.7.3-seg">momencie</word> + <word id="word_58" tei_id="morph_3.7.4-seg">życia</word> + <word id="word_59" tei_id="morph_3.7.5-seg">mężczyzny</word> + <word id="word_60" tei_id="morph_3.7.6-seg">przychodzi</word> + <word id="word_61" tei_id="morph_3.7.7-seg">taka</word> + <word id="word_62" tei_id="morph_3.7.8-seg">potrzeba</word> + <word id="word_63" tei_id="morph_3.7.9-seg">,</word> + <word id="word_64" tei_id="morph_3.7.10-seg">aby</word> + <word id="word_65" tei_id="morph_3.7.11-seg">sprawdzić</word> + <word id="word_66" tei_id="morph_3.7.12-seg">się</word> + <word id="word_67" tei_id="morph_3.7.13-seg">np</word> + <word id="word_68" tei_id="morph_3.7.14-seg">.</word> + <word id="word_69" tei_id="morph_3.7.15-seg">w</word> + <word id="word_70" tei_id="morph_3.7.16-seg">życiu</word> + <word id="word_71" tei_id="morph_3.7.17-seg">publicznym</word> + <word id="word_72" tei_id="morph_3.7.18-seg">-</word> + <word id="word_73" tei_id="morph_3.7.19-seg">twierdzi</word> + <word id="word_74" tei_id="morph_3.7.20-seg">krokowski</word> + <word id="word_75" tei_id="morph_3.7.21-seg">kandydat</word> + <word id="word_76" tei_id="morph_3.7.22-seg">do</word> + <word id="word_77" tei_id="morph_3.7.23-seg">tytułu</word> + <word id="word_78" tei_id="morph_3.7.24-seg">Wójta</word> + <word id="word_79" tei_id="morph_3.7.25-seg">Pomorza</word> + <word id="word_80" tei_id="morph_3.7.26-seg">.</word> + <word id="word_81" tei_id="morph_3.7.27-seg">-</word> + <word id="word_82" tei_id="morph_3.7.28-seg">Poza</word> + <word id="word_83" tei_id="morph_3.7.29-seg">tym</word> + <word id="word_84" tei_id="morph_3.7.30-seg">interesowały</word> + <word id="word_85" tei_id="morph_3.7.31-seg">mnie</word> + <word id="word_86" tei_id="morph_3.7.32-seg">sprawy</word> + <word id="word_87" tei_id="morph_3.7.33-seg">komunalne</word> + <word id="word_88" tei_id="morph_3.7.34-seg">.</word> + <word id="word_89" tei_id="morph_3.8.1-seg">Chciał</word> + <word id="word_90" tei_id="morph_3.8.2-seg">em</word> + <word id="word_91" tei_id="morph_3.8.3-seg">się</word> + <word id="word_92" tei_id="morph_3.8.4-seg">nimi</word> + <word id="word_93" tei_id="morph_3.8.5-seg">bliżej</word> + <word id="word_94" tei_id="morph_3.8.6-seg">zająć</word> + <word id="word_95" tei_id="morph_3.8.7-seg" lastinpar="true">.</word> + <word id="word_96" tei_id="morph_4.9.1-seg">Co</word> + <word id="word_97" tei_id="morph_4.9.2-seg">wójt</word> + <word id="word_98" tei_id="morph_4.9.3-seg">gminy</word> + <word id="word_99" tei_id="morph_4.9.4-seg">Krokowa</word> + <word id="word_100" tei_id="morph_4.9.5-seg">uważa</word> + <word id="word_101" tei_id="morph_4.9.6-seg">za</word> + <word id="word_102" tei_id="morph_4.9.7-seg">swój</word> + <word id="word_103" tei_id="morph_4.9.8-seg">największy</word> + <word id="word_104" tei_id="morph_4.9.9-seg">sukces</word> + <word id="word_105" tei_id="morph_4.9.10-seg">i</word> + <word id="word_106" tei_id="morph_4.9.11-seg">największą</word> + <word id="word_107" tei_id="morph_4.9.12-seg">porażkę</word> + <word id="word_108" tei_id="morph_4.9.13-seg" lastinpar="true">?</word> + <word id="word_109" tei_id="morph_5.10.1-seg">-</word> + <word id="word_110" tei_id="morph_5.10.2-seg">Sukcesem</word> + <word id="word_111" tei_id="morph_5.10.3-seg">jest</word> + <word id="word_112" tei_id="morph_5.10.4-seg">to</word> + <word id="word_113" tei_id="morph_5.10.5-seg">,</word> + <word id="word_114" tei_id="morph_5.10.6-seg">że</word> + <word id="word_115" tei_id="morph_5.10.7-seg">udaje</word> + <word id="word_116" tei_id="morph_5.10.8-seg">się</word> + <word id="word_117" tei_id="morph_5.10.9-seg">wreszcie</word> + <word id="word_118" tei_id="morph_5.10.10-seg">opracowywać</word> + <word id="word_119" tei_id="morph_5.10.11-seg">plany</word> + <word id="word_120" tei_id="morph_5.10.12-seg">zagospodarowania</word> + <word id="word_121" tei_id="morph_5.10.13-seg">przestrzennego</word> + <word id="word_122" tei_id="morph_5.10.14-seg">.</word> + <word id="word_123" tei_id="morph_5.11.1-seg">Gotowe</word> + <word id="word_124" tei_id="morph_5.11.2-seg">są</word> + <word id="word_125" tei_id="morph_5.11.3-seg">już</word> + <word id="word_126" tei_id="morph_5.11.4-seg">dla</word> + <word id="word_127" tei_id="morph_5.11.5-seg">Białogóry</word> + <word id="word_128" tei_id="morph_5.11.6-seg">i</word> + <word id="word_129" tei_id="morph_5.11.7-seg">części</word> + <word id="word_130" tei_id="morph_5.11.8-seg">Dębek</word> + <word id="word_131" tei_id="morph_5.11.9-seg">.</word> + <word id="word_132" tei_id="morph_5.12.1-seg">Tych</word> + <word id="word_133" tei_id="morph_5.12.2-seg">ostatnich</word> + <word id="word_134" tei_id="morph_5.12.3-seg">przez</word> + <word id="word_135" tei_id="morph_5.12.4-seg">wiele</word> + <word id="word_136" tei_id="morph_5.12.5-seg">lat</word> + <word id="word_137" tei_id="morph_5.12.6-seg">nie</word> + <word id="word_138" tei_id="morph_5.12.7-seg">można</word> + <word id="word_139" tei_id="morph_5.12.8-seg">było</word> + <word id="word_140" tei_id="morph_5.12.9-seg">uchwalić</word> + <word id="word_141" tei_id="morph_5.12.10-seg">-</word> + <word id="word_142" tei_id="morph_5.12.11-seg">uważa</word> + <word id="word_143" tei_id="morph_5.12.12-seg">wójt</word> + <word id="word_144" tei_id="morph_5.12.13-seg">.</word> + <word id="word_145" tei_id="morph_5.12.14-seg">-</word> + <word id="word_146" tei_id="morph_5.12.15-seg">Natomiast</word> + <word id="word_147" tei_id="morph_5.12.16-seg">za</word> + <word id="word_148" tei_id="morph_5.12.17-seg">porażkę</word> + <word id="word_149" tei_id="morph_5.12.18-seg">uważam</word> + <word id="word_150" tei_id="morph_5.12.19-seg">decyzję</word> + <word id="word_151" tei_id="morph_5.12.20-seg">Rady</word> + <word id="word_152" tei_id="morph_5.12.21-seg">gminy</word> + <word id="word_153" tei_id="morph_5.12.22-seg">,</word> + <word id="word_154" tei_id="morph_5.12.23-seg">aby</word> + <word id="word_155" tei_id="morph_5.12.24-seg">nie</word> + <word id="word_156" tei_id="morph_5.12.25-seg">przystępować</word> + <word id="word_157" tei_id="morph_5.12.26-seg">w</word> + <word id="word_158" tei_id="morph_5.12.27-seg">ramach</word> + <word id="word_159" tei_id="morph_5.12.28-seg">Komunalnego</word> + <word id="word_160" tei_id="morph_5.12.29-seg">Związku</word> + <word id="word_161" tei_id="morph_5.12.30-seg">Gmin</word> + <word id="word_162" tei_id="morph_5.12.31-seg">do</word> + <word id="word_163" tei_id="morph_5.12.32-seg">programu</word> + <word id="word_164" tei_id="morph_5.12.33-seg">uporządkowania</word> + <word id="word_165" tei_id="morph_5.12.34-seg">gospodarki</word> + <word id="word_166" tei_id="morph_5.12.35-seg">ściekowej</word> + <word id="word_167" tei_id="morph_5.12.36-seg">.</word> + <word id="word_168" tei_id="morph_5.13.1-seg">Mogli</word> + <word id="word_169" tei_id="morph_5.13.2-seg">śmy</word> + <word id="word_170" tei_id="morph_5.13.3-seg">uzyskać</word> + <word id="word_171" tei_id="morph_5.13.4-seg">wiele</word> + <word id="word_172" tei_id="morph_5.13.5-seg">milionów</word> + <word id="word_173" tei_id="morph_5.13.6-seg">euro</word> + <word id="word_174" tei_id="morph_5.13.7-seg">.</word> + <word id="word_175" tei_id="morph_5.14.1-seg">Boję</word> + <word id="word_176" tei_id="morph_5.14.2-seg">się</word> + <word id="word_177" tei_id="morph_5.14.3-seg">,</word> + <word id="word_178" tei_id="morph_5.14.4-seg">że</word> + <word id="word_179" tei_id="morph_5.14.5-seg">to</word> + <word id="word_180" tei_id="morph_5.14.6-seg">nie</word> + <word id="word_181" tei_id="morph_5.14.7-seg">tylko</word> + <word id="word_182" tei_id="morph_5.14.8-seg">moja</word> + <word id="word_183" tei_id="morph_5.14.9-seg">porażka</word> + <word id="word_184" tei_id="morph_5.14.10-seg">.</word> + <word id="word_185" tei_id="morph_5.14.11-seg">.</word> + <word id="word_186" tei_id="morph_5.14.12-seg" lastinpar="true">.</word> + <word id="word_187" tei_id="morph_6.15.1-seg">Od</word> + <word id="word_188" tei_id="morph_6.15.2-seg">5</word> + <word id="word_189" tei_id="morph_6.15.3-seg">lat</word> + <word id="word_190" tei_id="morph_6.15.4-seg">ulubionym</word> + <word id="word_191" tei_id="morph_6.15.5-seg">hobby</word> + <word id="word_192" tei_id="morph_6.15.6-seg">Henryka</word> + <word id="word_193" tei_id="morph_6.15.7-seg">Doeringa</word> + <word id="word_194" tei_id="morph_6.15.8-seg">są</word> + <word id="word_195" tei_id="morph_6.15.9-seg">narty</word> + <word id="word_196" tei_id="morph_6.15.10-seg">.</word> + <word id="word_197" tei_id="morph_6.16.1-seg">Dlatego</word> + <word id="word_198" tei_id="morph_6.16.2-seg">urlop</word> + <word id="word_199" tei_id="morph_6.16.3-seg">najchętniej</word> + <word id="word_200" tei_id="morph_6.16.4-seg">bierze</word> + <word id="word_201" tei_id="morph_6.16.5-seg">zimą</word> + <word id="word_202" tei_id="morph_6.16.6-seg">,</word> + <word id="word_203" tei_id="morph_6.16.7-seg">aby</word> + <word id="word_204" tei_id="morph_6.16.8-seg">udać</word> + <word id="word_205" tei_id="morph_6.16.9-seg">się</word> + <word id="word_206" tei_id="morph_6.16.10-seg">na</word> + <word id="word_207" tei_id="morph_6.16.11-seg">stoki</word> + <word id="word_208" tei_id="morph_6.16.12-seg">Szklarskiej</word> + <word id="word_209" tei_id="morph_6.16.13-seg">Poręby</word> + <word id="word_210" tei_id="morph_6.16.14-seg" lastinpar="true">.</word> + <word id="word_211" tei_id="morph_7.17.1-seg">-</word> + <word id="word_212" tei_id="morph_7.17.2-seg">Tej</word> + <word id="word_213" tei_id="morph_7.17.3-seg">zimy</word> + <word id="word_214" tei_id="morph_7.17.4-seg">niestety</word> + <word id="word_215" tei_id="morph_7.17.5-seg">nie</word> + <word id="word_216" tei_id="morph_7.17.6-seg">mogł</word> + <word id="word_217" tei_id="morph_7.17.7-seg">em</word> + <word id="word_218" tei_id="morph_7.17.8-seg">wyjechać</word> + <word id="word_219" tei_id="morph_7.17.9-seg">-</word> + <word id="word_220" tei_id="morph_7.17.10-seg">przyznaje</word> + <word id="word_221" tei_id="morph_7.17.11-seg">wójt</word> + <word id="word_222" tei_id="morph_7.17.12-seg">Krokowej</word> + <word id="word_223" tei_id="morph_7.17.13-seg">.</word> + <word id="word_224" tei_id="morph_7.17.14-seg">-</word> + <word id="word_225" tei_id="morph_7.17.15-seg">Czasu</word> + <word id="word_226" tei_id="morph_7.17.16-seg">wolnego</word> + <word id="word_227" tei_id="morph_7.17.17-seg">mam</word> + <word id="word_228" tei_id="morph_7.17.18-seg">bardzo</word> + <word id="word_229" tei_id="morph_7.17.19-seg">mało</word> + <word id="word_230" tei_id="morph_7.17.20-seg">,</word> + <word id="word_231" tei_id="morph_7.17.21-seg">jeśli</word> + <word id="word_232" tei_id="morph_7.17.22-seg">się</word> + <word id="word_233" tei_id="morph_7.17.23-seg">taki</word> + <word id="word_234" tei_id="morph_7.17.24-seg">pojawia</word> + <word id="word_235" tei_id="morph_7.17.25-seg">,</word> + <word id="word_236" tei_id="morph_7.17.26-seg">to</word> + <word id="word_237" tei_id="morph_7.17.27-seg">staram</word> + <word id="word_238" tei_id="morph_7.17.28-seg">się</word> + <word id="word_239" tei_id="morph_7.17.29-seg">go</word> + <word id="word_240" tei_id="morph_7.17.30-seg">spędzać</word> + <word id="word_241" tei_id="morph_7.17.31-seg">razem</word> + <word id="word_242" tei_id="morph_7.17.32-seg">z</word> + <word id="word_243" tei_id="morph_7.17.33-seg">bliskimi</word> + <word id="word_244" tei_id="morph_7.17.34-seg" lastinpar="true">.</word> + <word id="word_245" tei_id="morph_8.18.1-seg">Nasz</word> + <word id="word_246" tei_id="morph_8.18.2-seg" lastinpar="true">plebiscyt</word> + <word id="word_247" tei_id="morph_9.19.1-seg">„</word> + <word id="word_248" tei_id="morph_9.19.2-seg">Dziennik</word> + <word id="word_249" tei_id="morph_9.19.3-seg">Bałtycki</word> + <word id="word_250" tei_id="morph_9.19.4-seg">”</word> + <word id="word_251" tei_id="morph_9.19.5-seg">rozpoczął</word> + <word id="word_252" tei_id="morph_9.19.6-seg">kolejną</word> + <word id="word_253" tei_id="morph_9.19.7-seg">edycję</word> + <word id="word_254" tei_id="morph_9.19.8-seg">konkursu</word> + <word id="word_255" tei_id="morph_9.19.9-seg">Wójt</word> + <word id="word_256" tei_id="morph_9.19.10-seg">Pomorza</word> + <word id="word_257" tei_id="morph_9.19.11-seg">.</word> + <word id="word_258" tei_id="morph_9.20.1-seg">Nasz</word> + <word id="word_259" tei_id="morph_9.20.2-seg">powiat</word> + <word id="word_260" tei_id="morph_9.20.3-seg">reprezentują</word> + <word id="word_261" tei_id="morph_9.20.4-seg">trzej</word> + <word id="word_262" tei_id="morph_9.20.5-seg">włodarze</word> + <word id="word_263" tei_id="morph_9.20.6-seg">gmin</word> + <word id="word_264" tei_id="morph_9.20.7-seg">wiejskich</word> + <word id="word_265" tei_id="morph_9.20.8-seg">.</word> + <word id="word_266" tei_id="morph_9.21.1-seg">To</word> + <word id="word_267" tei_id="morph_9.21.2-seg">Henryk</word> + <word id="word_268" tei_id="morph_9.21.3-seg">Doering</word> + <word id="word_269" tei_id="morph_9.21.4-seg">(</word> + <word id="word_270" tei_id="morph_9.21.5-seg">Krokowa</word> + <word id="word_271" tei_id="morph_9.21.6-seg">)</word> + <word id="word_272" tei_id="morph_9.21.7-seg">,</word> + <word id="word_273" tei_id="morph_9.21.8-seg">Tadeusz</word> + <word id="word_274" tei_id="morph_9.21.9-seg">Puszkarczuk</word> + <word id="word_275" tei_id="morph_9.21.10-seg">(</word> + <word id="word_276" tei_id="morph_9.21.11-seg">gmina</word> + <word id="word_277" tei_id="morph_9.21.12-seg">Puck</word> + <word id="word_278" tei_id="morph_9.21.13-seg">)</word> + <word id="word_279" tei_id="morph_9.21.14-seg">i</word> + <word id="word_280" tei_id="morph_9.21.15-seg">Jerzy</word> + <word id="word_281" tei_id="morph_9.21.16-seg">Włudzik</word> + <word id="word_282" tei_id="morph_9.21.17-seg">(</word> + <word id="word_283" tei_id="morph_9.21.18-seg">Kosakowo</word> + <word id="word_284" tei_id="morph_9.21.19-seg">)</word> + <word id="word_285" tei_id="morph_9.21.20-seg">.</word> + <word id="word_286" tei_id="morph_9.22.1-seg">W</word> + <word id="word_287" tei_id="morph_9.22.2-seg">gronie</word> + <word id="word_288" tei_id="morph_9.22.3-seg">kilkudziesięciu</word> + <word id="word_289" tei_id="morph_9.22.4-seg">kolegów</word> + <word id="word_290" tei_id="morph_9.22.5-seg">po</word> + <word id="word_291" tei_id="morph_9.22.6-seg">fachu</word> + <word id="word_292" tei_id="morph_9.22.7-seg">walczyć</word> + <word id="word_293" tei_id="morph_9.22.8-seg">będą</word> + <word id="word_294" tei_id="morph_9.22.9-seg">o</word> + <word id="word_295" tei_id="morph_9.22.10-seg">miano</word> + <word id="word_296" tei_id="morph_9.22.11-seg">najpopularniejszego</word> + <word id="word_297" tei_id="morph_9.22.12-seg">wójta</word> + <word id="word_298" tei_id="morph_9.22.13-seg">województwa</word> + <word id="word_299" tei_id="morph_9.22.14-seg">.</word> + <word id="word_300" tei_id="morph_9.23.1-seg">O</word> + <word id="word_301" tei_id="morph_9.23.2-seg">tym</word> + <word id="word_302" tei_id="morph_9.23.3-seg">,</word> + <word id="word_303" tei_id="morph_9.23.4-seg">kto</word> + <word id="word_304" tei_id="morph_9.23.5-seg">wygra</word> + <word id="word_305" tei_id="morph_9.23.6-seg">,</word> + <word id="word_306" tei_id="morph_9.23.7-seg">zadecydują</word> + <word id="word_307" tei_id="morph_9.23.8-seg">swoimi</word> + <word id="word_308" tei_id="morph_9.23.9-seg">głosami</word> + <word id="word_309" tei_id="morph_9.23.10-seg">Czytelnicy</word> + <word id="word_310" tei_id="morph_9.23.11-seg">„</word> + <word id="word_311" tei_id="morph_9.23.12-seg">Dziennika</word> + <word id="word_312" tei_id="morph_9.23.13-seg">”</word> + <word id="word_313" tei_id="morph_9.23.14-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/3.mmax a/src/test/resources/teksty_mmax/teksty_gold/3.mmax new file mode 100755 index 0000000..6dae981 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/3.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>3_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/3_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/3_mentions.xml new file mode 100755 index 0000000..d47df62 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/3_mentions.xml @@ -0,0 +1,60 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1..word_2" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_5" mmax_level="mention" mention_head="Frankfurt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_4..word_5" mmax_level="mention" mention_head="Eintracht" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_9" mmax_level="mention" mention_head="Ukraińcy" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_5" span="word_12..word_13" mmax_level="mention" mention_head="wymagania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_15" mmax_level="mention" mention_head="Remis" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_21..word_22" mmax_level="mention" mention_head="odrobinie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_30" mmax_level="mention" mention_head="pretensję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_34..word_35" mmax_level="mention" mention_head="koncentracji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_39" mmax_level="mention" mention_head="bramkę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_42..word_43" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_47" mmax_level="mention" mention_head="FC" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_62" mmax_level="mention" mention_head="eliminacje" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_14" span="word_65..word_66" mmax_level="mention" mention_head="strony" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_68" mmax_level="mention" mention_head="wszystko" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_74" mmax_level="mention" mention_head="kadry" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_83..word_84" mmax_level="mention" mention_head="klubie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_94" mmax_level="mention" mention_head="zespołem" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_19" span="word_101..word_102" mmax_level="mention" mention_head="sposób" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_105" mmax_level="mention" mention_head="punktów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_21" span="word_111" mmax_level="mention" mention_head="Mecze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_113" mmax_level="mention" mention_head="Ukrainą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_115" mmax_level="mention" mention_head="klamrą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_116" mmax_level="mention" mention_head="eliminacje" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_25" span="word_122..word_123" mmax_level="mention" mention_head="końca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_128" mmax_level="mention" mention_head="kibice" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_133..word_135" mmax_level="mention" mention_head="zespołów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_139" mmax_level="mention" mention_head="Ukraińcy" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_29" span="word_137..word_139" mmax_level="mention" mention_head="zdaniem" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_30" span="word_142..word_143" mmax_level="mention" mention_head="miejsce" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_155" mmax_level="mention" mention_head="mundialu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_168..word_169" mmax_level="mention" mention_head="Zbigniew" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_171..word_172" mmax_level="mention" mention_head="wiceprezes" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_34" span="word_179" mmax_level="mention" mention_head="eliminacje" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_35" span="word_182" mmax_level="mention" mention_head="sukcesem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_193" mmax_level="mention" mention_head="dziennikarze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_196" mmax_level="mention" mention_head="to" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_198" mmax_level="mention" mention_head="co" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_202" mmax_level="mention" mention_head="boisku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_207" mmax_level="mention" mention_head="uwagę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_210" mmax_level="mention" mention_head="sukcesów" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_42" span="word_211" mmax_level="mention" mention_head="piłkarzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_209..word_211" mmax_level="mention" mention_head="kulisy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_215" mmax_level="mention" mention_head="one" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_45" span="word_216..word_218" mmax_level="mention" mention_head="wysiłek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_222" mmax_level="mention" mention_head="rzeszy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_223" mmax_level="mention" mention_head="ludzi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_220..word_223" mmax_level="mention" mention_head="pracy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_229..word_230" mmax_level="mention" mention_head="szeregu" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_50" span="word_237..word_238" mmax_level="mention" mention_head="zadania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_240..word_241" mmax_level="mention" mention_head="zdaniem" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_52" span="word_247..word_248" mmax_level="mention" mention_head="sprawy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_252..word_253" mmax_level="mention" mention_head="poziomie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_255" mmax_level="mention" mention_head="wszyscy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_264" mmax_level="mention" mention_head="świata" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_56" span="word_263..word_264" mmax_level="mention" mention_head="mistrzostw" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/3_words.xml a/src/test/resources/teksty_mmax/teksty_gold/3_words.xml new file mode 100755 index 0000000..f23ab57 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/3_words.xml @@ -0,0 +1,269 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Paweł</word> + <word id="word_2" tei_id="morph_1.1.2-seg" lastinpar="true">Kryszałowicz</word> + <word id="word_3" tei_id="morph_2.2.1-seg">(</word> + <word id="word_4" tei_id="morph_2.2.2-seg">Eintracht</word> + <word id="word_5" tei_id="morph_2.2.3-seg">Frankfurt</word> + <word id="word_6" tei_id="morph_2.2.4-seg">)</word> + <word id="word_7" tei_id="morph_2.2.5-seg">:</word> + <word id="word_8" tei_id="morph_2.2.6-seg">-</word> + <word id="word_9" tei_id="morph_2.2.7-seg">Ukraińcy</word> + <word id="word_10" tei_id="morph_2.2.8-seg">postawili</word> + <word id="word_11" tei_id="morph_2.2.9-seg">nam</word> + <word id="word_12" tei_id="morph_2.2.10-seg">wysokie</word> + <word id="word_13" tei_id="morph_2.2.11-seg">wymagania</word> + <word id="word_14" tei_id="morph_2.2.12-seg">.</word> + <word id="word_15" tei_id="morph_2.3.1-seg">Remis</word> + <word id="word_16" tei_id="morph_2.3.2-seg">jest</word> + <word id="word_17" tei_id="morph_2.3.3-seg">sprawiedliwy</word> + <word id="word_18" tei_id="morph_2.3.4-seg">,</word> + <word id="word_19" tei_id="morph_2.3.5-seg">choć</word> + <word id="word_20" tei_id="morph_2.3.6-seg">przy</word> + <word id="word_21" tei_id="morph_2.3.7-seg">odrobinie</word> + <word id="word_22" tei_id="morph_2.3.8-seg">szczęścia</word> + <word id="word_23" tei_id="morph_2.3.9-seg">mogli</word> + <word id="word_24" tei_id="morph_2.3.10-seg">śmy</word> + <word id="word_25" tei_id="morph_2.3.11-seg">wygrać</word> + <word id="word_26" tei_id="morph_2.3.12-seg">.</word> + <word id="word_27" tei_id="morph_2.4.1-seg">Mam</word> + <word id="word_28" tei_id="morph_2.4.2-seg">do</word> + <word id="word_29" tei_id="morph_2.4.3-seg">siebie</word> + <word id="word_30" tei_id="morph_2.4.4-seg">pretensję</word> + <word id="word_31" tei_id="morph_2.4.5-seg">,</word> + <word id="word_32" tei_id="morph_2.4.6-seg">bo</word> + <word id="word_33" tei_id="morph_2.4.7-seg">przy</word> + <word id="word_34" tei_id="morph_2.4.8-seg">lepszej</word> + <word id="word_35" tei_id="morph_2.4.9-seg">koncentracji</word> + <word id="word_36" tei_id="morph_2.4.10-seg">mogł</word> + <word id="word_37" tei_id="morph_2.4.11-seg">em</word> + <word id="word_38" tei_id="morph_2.4.12-seg">zdobyć</word> + <word id="word_39" tei_id="morph_2.4.13-seg">bramkę</word> + <word id="word_40" tei_id="morph_2.4.14-seg" lastinpar="true">.</word> + <word id="word_41" tei_id="morph_3.5.1-seg" lastinpar="true">ś</word> + <word id="word_42" tei_id="morph_4.6.1-seg">Jacek</word> + <word id="word_43" tei_id="morph_4.6.2-seg" lastinpar="true">Krzynówek</word> + <word id="word_44" tei_id="morph_5.7.1-seg">(</word> + <word id="word_45" tei_id="morph_5.7.2-seg">1</word> + <word id="word_46" tei_id="morph_5.7.3-seg">.</word> + <word id="word_47" tei_id="morph_5.8.1-seg">FC</word> + <word id="word_48" tei_id="morph_5.8.2-seg">Nuernberg</word> + <word id="word_49" tei_id="morph_5.8.3-seg">)</word> + <word id="word_50" tei_id="morph_5.8.4-seg">:</word> + <word id="word_51" tei_id="morph_5.8.5-seg">-</word> + <word id="word_52" tei_id="morph_5.8.6-seg">Cieszę</word> + <word id="word_53" tei_id="morph_5.8.7-seg">się</word> + <word id="word_54" tei_id="morph_5.8.8-seg">,</word> + <word id="word_55" tei_id="morph_5.8.9-seg">że</word> + <word id="word_56" tei_id="morph_5.8.10-seg">mogł</word> + <word id="word_57" tei_id="morph_5.8.11-seg">em</word> + <word id="word_58" tei_id="morph_5.8.12-seg">wystąpić</word> + <word id="word_59" tei_id="morph_5.8.13-seg">w</word> + <word id="word_60" tei_id="morph_5.8.14-seg">meczu</word> + <word id="word_61" tei_id="morph_5.8.15-seg">kończącym</word> + <word id="word_62" tei_id="morph_5.8.16-seg">eliminacje</word> + <word id="word_63" tei_id="morph_5.8.17-seg">.</word> + <word id="word_64" tei_id="morph_5.9.1-seg">Ze</word> + <word id="word_65" tei_id="morph_5.9.2-seg">swej</word> + <word id="word_66" tei_id="morph_5.9.3-seg">strony</word> + <word id="word_67" tei_id="morph_5.9.4-seg">zrobię</word> + <word id="word_68" tei_id="morph_5.9.5-seg">wszystko</word> + <word id="word_69" tei_id="morph_5.9.6-seg">,</word> + <word id="word_70" tei_id="morph_5.9.7-seg">aby</word> + <word id="word_71" tei_id="morph_5.9.8-seg">nie</word> + <word id="word_72" tei_id="morph_5.9.9-seg">wypaść</word> + <word id="word_73" tei_id="morph_5.9.10-seg">z</word> + <word id="word_74" tei_id="morph_5.9.11-seg">kadry</word> + <word id="word_75" tei_id="morph_5.9.12-seg">,</word> + <word id="word_76" tei_id="morph_5.9.13-seg">mimo</word> + <word id="word_77" tei_id="morph_5.9.14-seg">że</word> + <word id="word_78" tei_id="morph_5.9.15-seg">nie</word> + <word id="word_79" tei_id="morph_5.9.16-seg">mam</word> + <word id="word_80" tei_id="morph_5.9.17-seg">ugruntowanej</word> + <word id="word_81" tei_id="morph_5.9.18-seg">pozycji</word> + <word id="word_82" tei_id="morph_5.9.19-seg">w</word> + <word id="word_83" tei_id="morph_5.9.20-seg">swoim</word> + <word id="word_84" tei_id="morph_5.9.21-seg">klubie</word> + <word id="word_85" tei_id="morph_5.9.22-seg">.</word> + <word id="word_86" tei_id="morph_5.10.1-seg">Zasłużyli</word> + <word id="word_87" tei_id="morph_5.10.2-seg">śmy</word> + <word id="word_88" tei_id="morph_5.10.3-seg">na</word> + <word id="word_89" tei_id="morph_5.10.4-seg">awans</word> + <word id="word_90" tei_id="morph_5.10.5-seg">,</word> + <word id="word_91" tei_id="morph_5.10.6-seg">bo</word> + <word id="word_92" tei_id="morph_5.10.7-seg">byli</word> + <word id="word_93" tei_id="morph_5.10.8-seg">śmy</word> + <word id="word_94" tei_id="morph_5.10.9-seg">zespołem</word> + <word id="word_95" tei_id="morph_5.10.10-seg">,</word> + <word id="word_96" tei_id="morph_5.10.11-seg">który</word> + <word id="word_97" tei_id="morph_5.10.12-seg">grał</word> + <word id="word_98" tei_id="morph_5.10.13-seg">najrówniej</word> + <word id="word_99" tei_id="morph_5.10.14-seg">i</word> + <word id="word_100" tei_id="morph_5.10.15-seg">w</word> + <word id="word_101" tei_id="morph_5.10.16-seg">głupi</word> + <word id="word_102" tei_id="morph_5.10.17-seg">sposób</word> + <word id="word_103" tei_id="morph_5.10.18-seg">nie</word> + <word id="word_104" tei_id="morph_5.10.19-seg">tracił</word> + <word id="word_105" tei_id="morph_5.10.20-seg">punktów</word> + <word id="word_106" tei_id="morph_5.10.21-seg" lastinpar="true">.</word> + <word id="word_107" tei_id="morph_6.11.1-seg" lastinpar="true">ś</word> + <word id="word_108" tei_id="morph_7.12.1-seg">Marek</word> + <word id="word_109" tei_id="morph_7.12.2-seg">Koźmiński</word> + <word id="word_110" tei_id="morph_7.12.3-seg" lastinpar="true">:</word> + <word id="word_111" tei_id="morph_8.13.1-seg">Mecze</word> + <word id="word_112" tei_id="morph_8.13.2-seg">z</word> + <word id="word_113" tei_id="morph_8.13.3-seg">Ukrainą</word> + <word id="word_114" tei_id="morph_8.13.4-seg">spinają</word> + <word id="word_115" tei_id="morph_8.13.5-seg">klamrą</word> + <word id="word_116" tei_id="morph_8.13.6-seg">eliminacje</word> + <word id="word_117" tei_id="morph_8.13.7-seg">,</word> + <word id="word_118" tei_id="morph_8.13.8-seg">które</word> + <word id="word_119" tei_id="morph_8.13.9-seg">będziemy</word> + <word id="word_120" tei_id="morph_8.13.10-seg">pamiętać</word> + <word id="word_121" tei_id="morph_8.13.11-seg">do</word> + <word id="word_122" tei_id="morph_8.13.12-seg">końca</word> + <word id="word_123" tei_id="morph_8.13.13-seg">życia</word> + <word id="word_124" tei_id="morph_8.13.14-seg">.</word> + <word id="word_125" tei_id="morph_8.14.1-seg">Uważam</word> + <word id="word_126" tei_id="morph_8.14.2-seg">,</word> + <word id="word_127" tei_id="morph_8.14.3-seg">że</word> + <word id="word_128" tei_id="morph_8.14.4-seg">kibice</word> + <word id="word_129" tei_id="morph_8.14.5-seg">oglądali</word> + <word id="word_130" tei_id="morph_8.14.6-seg">dzisiaj</word> + <word id="word_131" tei_id="morph_8.14.7-seg">dobry</word> + <word id="word_132" tei_id="morph_8.14.8-seg">mecz</word> + <word id="word_133" tei_id="morph_8.14.9-seg">dwóch</word> + <word id="word_134" tei_id="morph_8.14.10-seg">równych</word> + <word id="word_135" tei_id="morph_8.14.11-seg">zespołów</word> + <word id="word_136" tei_id="morph_8.14.12-seg">.</word> + <word id="word_137" tei_id="morph_8.15.1-seg">Moim</word> + <word id="word_138" tei_id="morph_8.15.2-seg">zdaniem</word> + <word id="word_139" tei_id="morph_8.15.3-seg">Ukraińcy</word> + <word id="word_140" tei_id="morph_8.15.4-seg">zasłużenie</word> + <word id="word_141" tei_id="morph_8.15.5-seg">zajęli</word> + <word id="word_142" tei_id="morph_8.15.6-seg">drugie</word> + <word id="word_143" tei_id="morph_8.15.7-seg">miejsce</word> + <word id="word_144" tei_id="morph_8.15.8-seg">,</word> + <word id="word_145" tei_id="morph_8.15.9-seg">jednak</word> + <word id="word_146" tei_id="morph_8.15.10-seg">nie</word> + <word id="word_147" tei_id="morph_8.15.11-seg">chciał</word> + <word id="word_148" tei_id="morph_8.15.12-seg">by</word> + <word id="word_149" tei_id="morph_8.15.13-seg">m</word> + <word id="word_150" tei_id="morph_8.15.14-seg">spotkać</word> + <word id="word_151" tei_id="morph_8.15.15-seg">się</word> + <word id="word_152" tei_id="morph_8.15.16-seg">z</word> + <word id="word_153" tei_id="morph_8.15.17-seg">nimi</word> + <word id="word_154" tei_id="morph_8.15.18-seg">na</word> + <word id="word_155" tei_id="morph_8.15.19-seg">mundialu</word> + <word id="word_156" tei_id="morph_8.15.20-seg">.</word> + <word id="word_157" tei_id="morph_8.16.1-seg">To</word> + <word id="word_158" tei_id="morph_8.16.2-seg">bardzo</word> + <word id="word_159" tei_id="morph_8.16.3-seg">niewygodny</word> + <word id="word_160" tei_id="morph_8.16.4-seg">przeciwnik</word> + <word id="word_161" tei_id="morph_8.16.5-seg">.</word> + <word id="word_162" tei_id="morph_8.17.1-seg">Może</word> + <word id="word_163" tei_id="morph_8.17.2-seg">wygrać</word> + <word id="word_164" tei_id="morph_8.17.3-seg">z</word> + <word id="word_165" tei_id="morph_8.17.4-seg">każdym</word> + <word id="word_166" tei_id="morph_8.17.5-seg" lastinpar="true">.</word> + <word id="word_167" tei_id="morph_9.18.1-seg" lastinpar="true">ś</word> + <word id="word_168" tei_id="morph_10.19.1-seg">Zbigniew</word> + <word id="word_169" tei_id="morph_10.19.2-seg" lastinpar="true">Boniek</word> + <word id="word_170" tei_id="morph_11.20.1-seg">(</word> + <word id="word_171" tei_id="morph_11.20.2-seg">wiceprezes</word> + <word id="word_172" tei_id="morph_11.20.3-seg">PZPN</word> + <word id="word_173" tei_id="morph_11.20.4-seg">)</word> + <word id="word_174" tei_id="morph_11.20.5-seg">:</word> + <word id="word_175" tei_id="morph_11.20.6-seg">-</word> + <word id="word_176" tei_id="morph_11.20.7-seg">Najważniejsze</word> + <word id="word_177" tei_id="morph_11.20.8-seg">,</word> + <word id="word_178" tei_id="morph_11.20.9-seg">że</word> + <word id="word_179" tei_id="morph_11.20.10-seg">eliminacje</word> + <word id="word_180" tei_id="morph_11.20.11-seg">zakończyły</word> + <word id="word_181" tei_id="morph_11.20.12-seg">się</word> + <word id="word_182" tei_id="morph_11.20.13-seg">sukcesem</word> + <word id="word_183" tei_id="morph_11.20.14-seg">.</word> + <word id="word_184" tei_id="morph_11.21.1-seg">Jestem</word> + <word id="word_185" tei_id="morph_11.21.2-seg">usatysfakcjonowany</word> + <word id="word_186" tei_id="morph_11.21.3-seg">.</word> + <word id="word_187" tei_id="morph_11.22.1-seg">Chcę</word> + <word id="word_188" tei_id="morph_11.22.2-seg">podkreślić</word> + <word id="word_189" tei_id="morph_11.22.3-seg">,</word> + <word id="word_190" tei_id="morph_11.22.4-seg">że</word> + <word id="word_191" tei_id="morph_11.22.5-seg">kibice</word> + <word id="word_192" tei_id="morph_11.22.6-seg">i</word> + <word id="word_193" tei_id="morph_11.22.7-seg">dziennikarze</word> + <word id="word_194" tei_id="morph_11.22.8-seg">dostrzegają</word> + <word id="word_195" tei_id="morph_11.22.9-seg">tylko</word> + <word id="word_196" tei_id="morph_11.22.10-seg">to</word> + <word id="word_197" tei_id="morph_11.22.11-seg">,</word> + <word id="word_198" tei_id="morph_11.22.12-seg">co</word> + <word id="word_199" tei_id="morph_11.22.13-seg">dzieje</word> + <word id="word_200" tei_id="morph_11.22.14-seg">się</word> + <word id="word_201" tei_id="morph_11.22.15-seg">na</word> + <word id="word_202" tei_id="morph_11.22.16-seg">boisku</word> + <word id="word_203" tei_id="morph_11.22.17-seg">i</word> + <word id="word_204" tei_id="morph_11.22.18-seg">rzadko</word> + <word id="word_205" tei_id="morph_11.22.19-seg">kiedy</word> + <word id="word_206" tei_id="morph_11.22.20-seg">zwracają</word> + <word id="word_207" tei_id="morph_11.22.21-seg">uwagę</word> + <word id="word_208" tei_id="morph_11.22.22-seg">na</word> + <word id="word_209" tei_id="morph_11.22.23-seg">kulisy</word> + <word id="word_210" tei_id="morph_11.22.24-seg">sukcesów</word> + <word id="word_211" tei_id="morph_11.22.25-seg">piłkarzy</word> + <word id="word_212" tei_id="morph_11.22.26-seg">,</word> + <word id="word_213" tei_id="morph_11.22.27-seg">a</word> + <word id="word_214" tei_id="morph_11.22.28-seg">oznaczają</word> + <word id="word_215" tei_id="morph_11.22.29-seg">one</word> + <word id="word_216" tei_id="morph_11.22.30-seg">ogromny</word> + <word id="word_217" tei_id="morph_11.22.31-seg">wysiłek</word> + <word id="word_218" tei_id="morph_11.22.32-seg">organizacyjny</word> + <word id="word_219" tei_id="morph_11.22.33-seg">,</word> + <word id="word_220" tei_id="morph_11.22.34-seg">wiele</word> + <word id="word_221" tei_id="morph_11.22.35-seg">pracy</word> + <word id="word_222" tei_id="morph_11.22.36-seg">rzeszy</word> + <word id="word_223" tei_id="morph_11.22.37-seg">ludzi</word> + <word id="word_224" tei_id="morph_11.22.38-seg">,</word> + <word id="word_225" tei_id="morph_11.22.39-seg">którzy</word> + <word id="word_226" tei_id="morph_11.22.40-seg">nie</word> + <word id="word_227" tei_id="morph_11.22.41-seg">stoją</word> + <word id="word_228" tei_id="morph_11.22.42-seg">w</word> + <word id="word_229" tei_id="morph_11.22.43-seg">pierwszym</word> + <word id="word_230" tei_id="morph_11.22.44-seg">szeregu</word> + <word id="word_231" tei_id="morph_11.22.45-seg">,</word> + <word id="word_232" tei_id="morph_11.22.46-seg">ale</word> + <word id="word_233" tei_id="morph_11.22.47-seg">wykonują</word> + <word id="word_234" tei_id="morph_11.22.48-seg">nieraz</word> + <word id="word_235" tei_id="morph_11.22.49-seg">ciężkie</word> + <word id="word_236" tei_id="morph_11.22.50-seg">i</word> + <word id="word_237" tei_id="morph_11.22.51-seg">niewdzięczne</word> + <word id="word_238" tei_id="morph_11.22.52-seg">zadania</word> + <word id="word_239" tei_id="morph_11.22.53-seg">.</word> + <word id="word_240" tei_id="morph_11.23.1-seg">Moim</word> + <word id="word_241" tei_id="morph_11.23.2-seg">zdaniem</word> + <word id="word_242" tei_id="morph_11.23.3-seg">między</word> + <word id="word_243" tei_id="morph_11.23.4-seg">innymi</word> + <word id="word_244" tei_id="morph_11.23.5-seg">dlatego</word> + <word id="word_245" tei_id="morph_11.23.6-seg">,</word> + <word id="word_246" tei_id="morph_11.23.7-seg">że</word> + <word id="word_247" tei_id="morph_11.23.8-seg">sprawy</word> + <word id="word_248" tei_id="morph_11.23.9-seg">organizacyjne</word> + <word id="word_249" tei_id="morph_11.23.10-seg">zostały</word> + <word id="word_250" tei_id="morph_11.23.11-seg">ułożone</word> + <word id="word_251" tei_id="morph_11.23.12-seg">na</word> + <word id="word_252" tei_id="morph_11.23.13-seg">odpowiednim</word> + <word id="word_253" tei_id="morph_11.23.14-seg">poziomie</word> + <word id="word_254" tei_id="morph_11.23.15-seg">,</word> + <word id="word_255" tei_id="morph_11.23.16-seg">wszyscy</word> + <word id="word_256" tei_id="morph_11.23.17-seg">możemy</word> + <word id="word_257" tei_id="morph_11.23.18-seg">się</word> + <word id="word_258" tei_id="morph_11.23.19-seg">dzisiaj</word> + <word id="word_259" tei_id="morph_11.23.20-seg">cieszyć</word> + <word id="word_260" tei_id="morph_11.23.21-seg">z</word> + <word id="word_261" tei_id="morph_11.23.22-seg">awansu</word> + <word id="word_262" tei_id="morph_11.23.23-seg">do</word> + <word id="word_263" tei_id="morph_11.23.24-seg">mistrzostw</word> + <word id="word_264" tei_id="morph_11.23.25-seg">świata</word> + <word id="word_265" tei_id="morph_11.23.26-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/4.mmax a/src/test/resources/teksty_mmax/teksty_gold/4.mmax new file mode 100755 index 0000000..731abf6 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/4.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>4_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/4_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/4_mentions.xml new file mode 100755 index 0000000..35bcfe6 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/4_mentions.xml @@ -0,0 +1,64 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1" mmax_level="mention" mention_head="Ernest" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_2" span="word_12..word_13" mmax_level="mention" mention_head="rodzinę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_28" mmax_level="mention" mention_head="dziewczętami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_32..word_33" mmax_level="mention" mention_head="Ernest" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_5" span="word_36" mmax_level="mention" mention_head="Wigilię" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_38" mmax_level="mention" mention_head="jego" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_7" span="word_38..word_39" mmax_level="mention" mention_head="obowiązków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_43" mmax_level="mention" mention_head="choinki" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_9" span="word_48" mmax_level="mention" mention_head="naczyń" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_47..word_48" mmax_level="mention" mention_head="zmywanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_50" mmax_level="mention" mention_head="Agnieszka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_52" mmax_level="mention" mention_head="pierogi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_55" mmax_level="mention" mention_head="barszcz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_60" mmax_level="mention" mention_head="karpia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_64" mmax_level="mention" mention_head="ciasta" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_70" mmax_level="mention" mention_head="prezenty" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_87" mmax_level="mention" mention_head="Leśniczy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_92" mmax_level="mention" mention_head="Ernest" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_19" span="word_90..word_92" mmax_level="mention" mention_head="pan" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_20" span="word_94" mmax_level="mention" mention_head="choinkę" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_21" span="word_101" mmax_level="mention" mention_head="Kwietniów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_100..word_101" mmax_level="mention" mention_head="domu" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_23" span="word_103" mmax_level="mention" mention_head="On" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_24" span="word_107" mmax_level="mention" mention_head="sobie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_108" mmax_level="mention" mention_head="świąt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_110..word_111" mmax_level="mention" mention_head="świerku" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_27" span="word_121..word_122" mmax_level="mention" mention_head="Gabriel" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_28" span="word_124..word_125" mmax_level="mention" mention_head="nadleśniczy" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_29" span="word_127" mmax_level="mention" mention_head="To" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_133" mmax_level="mention" mention_head="nim" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_31" span="word_135" mmax_level="mention" mention_head="ozdoby" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_138" mmax_level="mention" mention_head="dzieci" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_145..word_147" mmax_level="mention" mention_head="okazję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_34" span="word_149..word_150" mmax_level="mention" mention_head="Pan" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_35" span="word_155..word_156" mmax_level="mention" mention_head="córki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_152..word_156" mmax_level="mention" mention_head="synów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_159" mmax_level="mention" mention_head="domu" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_38" span="word_169" mmax_level="mention" mention_head="wszyscy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_173" mmax_level="mention" mention_head="choinkę" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_40" span="word_178..word_179" mmax_level="mention" mention_head="włosy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_181" mmax_level="mention" mention_head="ja" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_183" mmax_level="mention" mention_head="lampki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_187" mmax_level="mention" mention_head="nadleśniczego" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_44" span="word_186..word_187" mmax_level="mention" mention_head="domu" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_45" span="word_189..word_190" mmax_level="mention" mention_head="ról" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_188..word_190" mmax_level="mention" mention_head="podział" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_195..word_196" mmax_level="mention" mention_head="lasach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_199" mmax_level="mention" mention_head="świerków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_202" mmax_level="mention" mention_head="plantacjach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_209..word_210" mmax_level="mention" mention_head="igłami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_235" mmax_level="mention" mention_head="nadleśniczy" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_52" span="word_239" mmax_level="mention" mention_head="choinkę" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_53" span="word_241" mmax_level="mention" mention_head="sobie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_246" mmax_level="mention" mention_head="rodziny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_251" mmax_level="mention" mention_head="roku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_255" mmax_level="mention" mention_head="synem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_260" mmax_level="mention" mention_head="drzewko" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_264" mmax_level="mention" mention_head="tradycję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_268" mmax_level="mention" mention_head="Gabriel" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_60" span="word_267..word_268" mmax_level="mention" mention_head="pan" mention_group="set_7" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/4_words.xml a/src/test/resources/teksty_mmax/teksty_gold/4_words.xml new file mode 100755 index 0000000..17caaeb --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/4_words.xml @@ -0,0 +1,273 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Ernest</word> + <word id="word_2" tei_id="morph_1.1.2-seg">i</word> + <word id="word_3" tei_id="morph_1.1.3-seg">Agnieszka</word> + <word id="word_4" tei_id="morph_1.1.4-seg">nie</word> + <word id="word_5" tei_id="morph_1.1.5-seg">planowali</word> + <word id="word_6" tei_id="morph_1.1.6-seg">,</word> + <word id="word_7" tei_id="morph_1.1.7-seg">że</word> + <word id="word_8" tei_id="morph_1.1.8-seg">będą</word> + <word id="word_9" tei_id="morph_1.1.9-seg">mieli</word> + <word id="word_10" tei_id="morph_1.1.10-seg">wielką</word> + <word id="word_11" tei_id="morph_1.1.11-seg">,</word> + <word id="word_12" tei_id="morph_1.1.12-seg">babską</word> + <word id="word_13" tei_id="morph_1.1.13-seg">rodzinę</word> + <word id="word_14" tei_id="morph_1.1.14-seg">.</word> + <word id="word_15" tei_id="morph_1.2.1-seg">Ale</word> + <word id="word_16" tei_id="morph_1.2.2-seg">tak</word> + <word id="word_17" tei_id="morph_1.2.3-seg">wyszło</word> + <word id="word_18" tei_id="morph_1.2.4-seg">.</word> + <word id="word_19" tei_id="morph_1.2.5-seg">–</word> + <word id="word_20" tei_id="morph_1.2.6-seg">I</word> + <word id="word_21" tei_id="morph_1.2.7-seg">całe</word> + <word id="word_22" tei_id="morph_1.2.8-seg">szczęście</word> + <word id="word_23" tei_id="morph_1.2.9-seg">.</word> + <word id="word_24" tei_id="morph_1.3.1-seg">Lepiej</word> + <word id="word_25" tei_id="morph_1.3.2-seg">się</word> + <word id="word_26" tei_id="morph_1.3.3-seg">dogaduję</word> + <word id="word_27" tei_id="morph_1.3.4-seg">z</word> + <word id="word_28" tei_id="morph_1.3.5-seg">dziewczętami</word> + <word id="word_29" tei_id="morph_1.3.6-seg">–</word> + <word id="word_30" tei_id="morph_1.3.7-seg">cieszy</word> + <word id="word_31" tei_id="morph_1.3.8-seg">się</word> + <word id="word_32" tei_id="morph_1.3.9-seg">Ernest</word> + <word id="word_33" tei_id="morph_1.3.10-seg">Kwiecień</word> + <word id="word_34" tei_id="morph_1.3.11-seg" lastinpar="true">.</word> + <word id="word_35" tei_id="morph_2.4.1-seg">W</word> + <word id="word_36" tei_id="morph_2.4.2-seg">Wigilię</word> + <word id="word_37" tei_id="morph_2.4.3-seg">do</word> + <word id="word_38" tei_id="morph_2.4.4-seg">jego</word> + <word id="word_39" tei_id="morph_2.4.5-seg">obowiązków</word> + <word id="word_40" tei_id="morph_2.4.6-seg">,</word> + <word id="word_41" tei_id="morph_2.4.7-seg">poza</word> + <word id="word_42" tei_id="morph_2.4.8-seg">dostarczeniem</word> + <word id="word_43" tei_id="morph_2.4.9-seg">choinki</word> + <word id="word_44" tei_id="morph_2.4.10-seg">,</word> + <word id="word_45" tei_id="morph_2.4.11-seg">należeć</word> + <word id="word_46" tei_id="morph_2.4.12-seg">będzie</word> + <word id="word_47" tei_id="morph_2.4.13-seg">zmywanie</word> + <word id="word_48" tei_id="morph_2.4.14-seg">naczyń</word> + <word id="word_49" tei_id="morph_2.4.15-seg">.</word> + <word id="word_50" tei_id="morph_2.5.1-seg">Agnieszka</word> + <word id="word_51" tei_id="morph_2.5.2-seg">zrobi</word> + <word id="word_52" tei_id="morph_2.5.3-seg">pierogi</word> + <word id="word_53" tei_id="morph_2.5.4-seg">,</word> + <word id="word_54" tei_id="morph_2.5.5-seg">ugotuje</word> + <word id="word_55" tei_id="morph_2.5.6-seg">barszcz</word> + <word id="word_56" tei_id="morph_2.5.7-seg">z</word> + <word id="word_57" tei_id="morph_2.5.8-seg">uszkami</word> + <word id="word_58" tei_id="morph_2.5.9-seg">,</word> + <word id="word_59" tei_id="morph_2.5.10-seg">usmaży</word> + <word id="word_60" tei_id="morph_2.5.11-seg">karpia</word> + <word id="word_61" tei_id="morph_2.5.12-seg">.</word> + <word id="word_62" tei_id="morph_2.6.1-seg">Córki</word> + <word id="word_63" tei_id="morph_2.6.2-seg">upieką</word> + <word id="word_64" tei_id="morph_2.6.3-seg">ciasta</word> + <word id="word_65" tei_id="morph_2.6.4-seg">.</word> + <word id="word_66" tei_id="morph_2.7.1-seg">Potem</word> + <word id="word_67" tei_id="morph_2.7.2-seg">przyjdzie</word> + <word id="word_68" tei_id="morph_2.7.3-seg">czas</word> + <word id="word_69" tei_id="morph_2.7.4-seg">na</word> + <word id="word_70" tei_id="morph_2.7.5-seg">prezenty</word> + <word id="word_71" tei_id="morph_2.7.6-seg">.</word> + <word id="word_72" tei_id="morph_2.8.1-seg">Może</word> + <word id="word_73" tei_id="morph_2.8.2-seg">to</word> + <word id="word_74" tei_id="morph_2.8.3-seg">nawet</word> + <word id="word_75" tei_id="morph_2.8.4-seg">będą</word> + <word id="word_76" tei_id="morph_2.8.5-seg">empetrójki</word> + <word id="word_77" tei_id="morph_2.8.6-seg">,</word> + <word id="word_78" tei_id="morph_2.8.7-seg">o</word> + <word id="word_79" tei_id="morph_2.8.8-seg">których</word> + <word id="word_80" tei_id="morph_2.8.9-seg">marzą</word> + <word id="word_81" tei_id="morph_2.8.10-seg">starsze</word> + <word id="word_82" tei_id="morph_2.8.11-seg">dziewczyny</word> + <word id="word_83" tei_id="morph_2.8.12-seg" lastinpar="true">.</word> + <word id="word_84" tei_id="morph_3.9.1-seg">Jodełek</word> + <word id="word_85" tei_id="morph_3.9.2-seg">sadzimy</word> + <word id="word_86" tei_id="morph_3.9.3-seg" lastinpar="true">mniej</word> + <word id="word_87" tei_id="morph_4.10.1-seg">Leśniczy</word> + <word id="word_88" tei_id="morph_4.10.2-seg">,</word> + <word id="word_89" tei_id="morph_4.10.3-seg">od</word> + <word id="word_90" tei_id="morph_4.10.4-seg">którego</word> + <word id="word_91" tei_id="morph_4.10.5-seg">pan</word> + <word id="word_92" tei_id="morph_4.10.6-seg">Ernest</word> + <word id="word_93" tei_id="morph_4.10.7-seg">przywozi</word> + <word id="word_94" tei_id="morph_4.10.8-seg">choinkę</word> + <word id="word_95" tei_id="morph_4.10.9-seg">,</word> + <word id="word_96" tei_id="morph_4.10.10-seg">mieszka</word> + <word id="word_97" tei_id="morph_4.10.11-seg">kilka</word> + <word id="word_98" tei_id="morph_4.10.12-seg">kilometrów</word> + <word id="word_99" tei_id="morph_4.10.13-seg">od</word> + <word id="word_100" tei_id="morph_4.10.14-seg">domu</word> + <word id="word_101" tei_id="morph_4.10.15-seg">Kwietniów</word> + <word id="word_102" tei_id="morph_4.10.16-seg">.</word> + <word id="word_103" tei_id="morph_4.11.1-seg">On</word> + <word id="word_104" tei_id="morph_4.11.2-seg">także</word> + <word id="word_105" tei_id="morph_4.11.3-seg">nie</word> + <word id="word_106" tei_id="morph_4.11.4-seg">wyobraża</word> + <word id="word_107" tei_id="morph_4.11.5-seg">sobie</word> + <word id="word_108" tei_id="morph_4.11.6-seg">świąt</word> + <word id="word_109" tei_id="morph_4.11.7-seg">bez</word> + <word id="word_110" tei_id="morph_4.11.8-seg">prawdziwego</word> + <word id="word_111" tei_id="morph_4.11.9-seg">świerku</word> + <word id="word_112" tei_id="morph_4.11.10-seg">.</word> + <word id="word_113" tei_id="morph_4.11.11-seg">–</word> + <word id="word_114" tei_id="morph_4.11.12-seg">I</word> + <word id="word_115" tei_id="morph_4.11.13-seg">musi</word> + <word id="word_116" tei_id="morph_4.11.14-seg">być</word> + <word id="word_117" tei_id="morph_4.11.15-seg">kiczowaty</word> + <word id="word_118" tei_id="morph_4.11.16-seg">–</word> + <word id="word_119" tei_id="morph_4.11.17-seg">uśmiecha</word> + <word id="word_120" tei_id="morph_4.11.18-seg">się</word> + <word id="word_121" tei_id="morph_4.11.19-seg">Gabriel</word> + <word id="word_122" tei_id="morph_4.11.20-seg">Grobelny</word> + <word id="word_123" tei_id="morph_4.11.21-seg">,</word> + <word id="word_124" tei_id="morph_4.11.22-seg">nadleśniczy</word> + <word id="word_125" tei_id="morph_4.11.23-seg">wałbrzyski</word> + <word id="word_126" tei_id="morph_4.11.24-seg" lastinpar="true">.</word> + <word id="word_127" tei_id="morph_5.12.1-seg">To</word> + <word id="word_128" tei_id="morph_5.12.2-seg">znaczy</word> + <word id="word_129" tei_id="morph_5.12.3-seg">,</word> + <word id="word_130" tei_id="morph_5.12.4-seg">że</word> + <word id="word_131" tei_id="morph_5.12.5-seg">powinny</word> + <word id="word_132" tei_id="morph_5.12.6-seg">na</word> + <word id="word_133" tei_id="morph_5.12.7-seg">nim</word> + <word id="word_134" tei_id="morph_5.12.8-seg">wisieć</word> + <word id="word_135" tei_id="morph_5.12.9-seg">ozdoby</word> + <word id="word_136" tei_id="morph_5.12.10-seg">zrobione</word> + <word id="word_137" tei_id="morph_5.12.11-seg">przez</word> + <word id="word_138" tei_id="morph_5.12.12-seg">dzieci</word> + <word id="word_139" tei_id="morph_5.12.13-seg">,</word> + <word id="word_140" tei_id="morph_5.12.14-seg">przechowywane</word> + <word id="word_141" tei_id="morph_5.12.15-seg">latami</word> + <word id="word_142" tei_id="morph_5.12.16-seg">,</word> + <word id="word_143" tei_id="morph_5.12.17-seg">wyciągane</word> + <word id="word_144" tei_id="morph_5.12.18-seg">na</word> + <word id="word_145" tei_id="morph_5.12.19-seg">tę</word> + <word id="word_146" tei_id="morph_5.12.20-seg">jedyną</word> + <word id="word_147" tei_id="morph_5.12.21-seg">okazję</word> + <word id="word_148" tei_id="morph_5.12.22-seg" lastinpar="true">.</word> + <word id="word_149" tei_id="morph_6.13.1-seg">Pan</word> + <word id="word_150" tei_id="morph_6.13.2-seg">Gabriel</word> + <word id="word_151" tei_id="morph_6.13.3-seg">ma</word> + <word id="word_152" tei_id="morph_6.13.4-seg">dwóch</word> + <word id="word_153" tei_id="morph_6.13.5-seg">synów</word> + <word id="word_154" tei_id="morph_6.13.6-seg">i</word> + <word id="word_155" tei_id="morph_6.13.7-seg">trzy</word> + <word id="word_156" tei_id="morph_6.13.8-seg">córki</word> + <word id="word_157" tei_id="morph_6.13.9-seg">.</word> + <word id="word_158" tei_id="morph_6.14.1-seg">W</word> + <word id="word_159" tei_id="morph_6.14.2-seg">domu</word> + <word id="word_160" tei_id="morph_6.14.3-seg">została</word> + <word id="word_161" tei_id="morph_6.14.4-seg">najmłodsza</word> + <word id="word_162" tei_id="morph_6.14.5-seg">,</word> + <word id="word_163" tei_id="morph_6.14.6-seg">12-letnia</word> + <word id="word_164" tei_id="morph_6.14.7-seg">,</word> + <word id="word_165" tei_id="morph_6.14.8-seg">ale</word> + <word id="word_166" tei_id="morph_6.14.9-seg">na</word> + <word id="word_167" tei_id="morph_6.14.10-seg">święta</word> + <word id="word_168" tei_id="morph_6.14.11-seg">zjadą</word> + <word id="word_169" tei_id="morph_6.14.12-seg">wszyscy</word> + <word id="word_170" tei_id="morph_6.14.13-seg">.</word> + <word id="word_171" tei_id="morph_6.15.1-seg">I</word> + <word id="word_172" tei_id="morph_6.15.2-seg">ubiorą</word> + <word id="word_173" tei_id="morph_6.15.3-seg">choinkę</word> + <word id="word_174" tei_id="morph_6.15.4-seg">.</word> + <word id="word_175" tei_id="morph_6.15.5-seg">–</word> + <word id="word_176" tei_id="morph_6.15.6-seg">Żona</word> + <word id="word_177" tei_id="morph_6.15.7-seg">rozwiesi</word> + <word id="word_178" tei_id="morph_6.15.8-seg">anielskie</word> + <word id="word_179" tei_id="morph_6.15.9-seg">włosy</word> + <word id="word_180" tei_id="morph_6.15.10-seg">,</word> + <word id="word_181" tei_id="morph_6.15.11-seg">ja</word> + <word id="word_182" tei_id="morph_6.15.12-seg">podłączę</word> + <word id="word_183" tei_id="morph_6.15.13-seg">lampki</word> + <word id="word_184" tei_id="morph_6.15.14-seg">–</word> + <word id="word_185" tei_id="morph_6.15.15-seg">w</word> + <word id="word_186" tei_id="morph_6.15.16-seg">domu</word> + <word id="word_187" tei_id="morph_6.15.17-seg">nadleśniczego</word> + <word id="word_188" tei_id="morph_6.15.18-seg">podział</word> + <word id="word_189" tei_id="morph_6.15.19-seg">świątecznych</word> + <word id="word_190" tei_id="morph_6.15.20-seg">ról</word> + <word id="word_191" tei_id="morph_6.15.21-seg">jest</word> + <word id="word_192" tei_id="morph_6.15.22-seg">określony</word> + <word id="word_193" tei_id="morph_6.15.23-seg" lastinpar="true">.</word> + <word id="word_194" tei_id="morph_7.16.1-seg">W</word> + <word id="word_195" tei_id="morph_7.16.2-seg">dolnośląskich</word> + <word id="word_196" tei_id="morph_7.16.3-seg">lasach</word> + <word id="word_197" tei_id="morph_7.16.4-seg">najwięcej</word> + <word id="word_198" tei_id="morph_7.16.5-seg">jest</word> + <word id="word_199" tei_id="morph_7.16.6-seg">świerków</word> + <word id="word_200" tei_id="morph_7.16.7-seg">.</word> + <word id="word_201" tei_id="morph_7.17.1-seg">Na</word> + <word id="word_202" tei_id="morph_7.17.2-seg">plantacjach</word> + <word id="word_203" tei_id="morph_7.17.3-seg">sadzą</word> + <word id="word_204" tei_id="morph_7.17.4-seg">także</word> + <word id="word_205" tei_id="morph_7.17.5-seg">coraz</word> + <word id="word_206" tei_id="morph_7.17.6-seg">popularniejsze</word> + <word id="word_207" tei_id="morph_7.17.7-seg">jodły</word> + <word id="word_208" tei_id="morph_7.17.8-seg">z</word> + <word id="word_209" tei_id="morph_7.17.9-seg">miękkimi</word> + <word id="word_210" tei_id="morph_7.17.10-seg">igłami</word> + <word id="word_211" tei_id="morph_7.17.11-seg" lastinpar="true">.</word> + <word id="word_212" tei_id="morph_8.18.1-seg">–</word> + <word id="word_213" tei_id="morph_8.18.2-seg">Ale</word> + <word id="word_214" tei_id="morph_8.18.3-seg">i</word> + <word id="word_215" tei_id="morph_8.18.4-seg">tych</word> + <word id="word_216" tei_id="morph_8.18.5-seg">jodełek</word> + <word id="word_217" tei_id="morph_8.18.6-seg">sadzimy</word> + <word id="word_218" tei_id="morph_8.18.7-seg">już</word> + <word id="word_219" tei_id="morph_8.18.8-seg">mniej</word> + <word id="word_220" tei_id="morph_8.18.9-seg">.</word> + <word id="word_221" tei_id="morph_8.19.1-seg">To</word> + <word id="word_222" tei_id="morph_8.19.2-seg">nie</word> + <word id="word_223" tei_id="morph_8.19.3-seg">lata</word> + <word id="word_224" tei_id="morph_8.19.4-seg">dziewięćdziesiąte</word> + <word id="word_225" tei_id="morph_8.19.5-seg">,</word> + <word id="word_226" tei_id="morph_8.19.6-seg">gdy</word> + <word id="word_227" tei_id="morph_8.19.7-seg">sprzedawali</word> + <word id="word_228" tei_id="morph_8.19.8-seg">śmy</word> + <word id="word_229" tei_id="morph_8.19.9-seg">prawie</word> + <word id="word_230" tei_id="morph_8.19.10-seg">wszystkie</word> + <word id="word_231" tei_id="morph_8.19.11-seg">wyhodowane</word> + <word id="word_232" tei_id="morph_8.19.12-seg">drzewka</word> + <word id="word_233" tei_id="morph_8.19.13-seg">–</word> + <word id="word_234" tei_id="morph_8.19.14-seg">wspomina</word> + <word id="word_235" tei_id="morph_8.19.15-seg">nadleśniczy</word> + <word id="word_236" tei_id="morph_8.19.16-seg" lastinpar="true">.</word> + <word id="word_237" tei_id="morph_9.20.1-seg">U</word> + <word id="word_238" tei_id="morph_9.20.2-seg">Grobelnego</word> + <word id="word_239" tei_id="morph_9.20.3-seg">choinkę</word> + <word id="word_240" tei_id="morph_9.20.4-seg">można</word> + <word id="word_241" tei_id="morph_9.20.5-seg">sobie</word> + <word id="word_242" tei_id="morph_9.20.6-seg">wybrać</word> + <word id="word_243" tei_id="morph_9.20.7-seg">.</word> + <word id="word_244" tei_id="morph_9.20.8-seg">–</word> + <word id="word_245" tei_id="morph_9.20.9-seg">Mamy</word> + <word id="word_246" tei_id="morph_9.20.10-seg">rodziny</word> + <word id="word_247" tei_id="morph_9.20.11-seg">,</word> + <word id="word_248" tei_id="morph_9.20.12-seg">w</word> + <word id="word_249" tei_id="morph_9.20.13-seg">których</word> + <word id="word_250" tei_id="morph_9.20.14-seg">co</word> + <word id="word_251" tei_id="morph_9.20.15-seg">roku</word> + <word id="word_252" tei_id="morph_9.20.16-seg">ojciec</word> + <word id="word_253" tei_id="morph_9.20.17-seg">przyjeżdża</word> + <word id="word_254" tei_id="morph_9.20.18-seg">z</word> + <word id="word_255" tei_id="morph_9.20.19-seg">synem</word> + <word id="word_256" tei_id="morph_9.20.20-seg">,</word> + <word id="word_257" tei_id="morph_9.20.21-seg">by</word> + <word id="word_258" tei_id="morph_9.20.22-seg">samemu</word> + <word id="word_259" tei_id="morph_9.20.23-seg">ściąć</word> + <word id="word_260" tei_id="morph_9.20.24-seg">drzewko</word> + <word id="word_261" tei_id="morph_9.20.25-seg">.</word> + <word id="word_262" tei_id="morph_9.21.1-seg">Taką</word> + <word id="word_263" tei_id="morph_9.21.2-seg">mają</word> + <word id="word_264" tei_id="morph_9.21.3-seg">tradycję</word> + <word id="word_265" tei_id="morph_9.21.4-seg">–</word> + <word id="word_266" tei_id="morph_9.21.5-seg">dodaje</word> + <word id="word_267" tei_id="morph_9.21.6-seg">pan</word> + <word id="word_268" tei_id="morph_9.21.7-seg">Gabriel</word> + <word id="word_269" tei_id="morph_9.21.8-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/5.mmax a/src/test/resources/teksty_mmax/teksty_gold/5.mmax new file mode 100755 index 0000000..26737f7 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/5.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>5_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/5_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/5_mentions.xml new file mode 100755 index 0000000..d7eeb0d --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/5_mentions.xml @@ -0,0 +1,61 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1..word_2" mmax_level="mention" mention_head="Cena" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_7..word_8" mmax_level="mention" mention_head="Żydów" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_3" span="word_12" mmax_level="mention" mention_head="nich" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_13" mmax_level="mention" mention_head="rodzina" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_5" span="word_20" mmax_level="mention" mention_head="to" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_6" span="word_23" mmax_level="mention" mention_head="miasteczka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_24..word_25" mmax_level="mention" mention_head="rodzina" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_8" span="word_38" mmax_level="mention" mention_head="kogo" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_44..word_45" mmax_level="mention" mention_head="Żydzi" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_10" span="word_48..word_49" mmax_level="mention" mention_head="okupacji" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_11" span="word_56..word_57" mmax_level="mention" mention_head="okupację" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_12" span="word_60..word_61" mmax_level="mention" mention_head="Faktem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_64" mmax_level="mention" mention_head="to" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_14" span="word_67..word_69" mmax_level="mention" mention_head="rodziny" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_15" span="word_79" mmax_level="mention" mention_head="Żydów" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_16" span="word_82" mmax_level="mention" mention_head="kilometrów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_90" mmax_level="mention" mention_head="folwarku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_95" mmax_level="mention" mention_head="AK" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_96" mmax_level="mention" mention_head="Kazimierz" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_20" span="word_94..word_96" mmax_level="mention" mention_head="żołnierz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_21" span="word_100" mmax_level="mention" mention_head="wojny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_102..word_103" mmax_level="mention" mention_head="domu" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_23" span="word_106" mmax_level="mention" mention_head="Żydów" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_24" span="word_109" mmax_level="mention" mention_head="studni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_112" mmax_level="mention" mention_head="domu" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_26" span="word_111..word_112" mmax_level="mention" mention_head="piwnic" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_114..word_115" mmax_level="mention" mention_head="tunel" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_120" mmax_level="mention" mention_head="wodę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_29" span="word_127" mmax_level="mention" mention_head="Kazimierz" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_30" span="word_125..word_127" mmax_level="mention" mention_head="siatki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_132" mmax_level="mention" mention_head="Żydów" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_32" span="word_138" mmax_level="mention" mention_head="Żydów" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_33" span="word_143..word_144" mmax_level="mention" mention_head="rodziny" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_34" span="word_153" mmax_level="mention" mention_head="to" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_35" span="word_156" mmax_level="mention" mention_head="Żydzi" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_36" span="word_158..word_159" mmax_level="mention" mention_head="przechowanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_170" mmax_level="mention" mention_head="ryzyko" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_173" mmax_level="mention" mention_head="tym" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_39" span="word_185" mmax_level="mention" mention_head="tym" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_40" span="word_190" mmax_level="mention" mention_head="pieniądze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_195..word_196" mmax_level="mention" mention_head="odruchu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_203" mmax_level="mention" mention_head="Żydzi" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_43" span="word_221..word_223" mmax_level="mention" mention_head="wsi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_224..word_225" mmax_level="mention" mention_head="gospodarz" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_45" span="word_227" mmax_level="mention" mention_head="Polak" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_46" span="word_233..word_234" mmax_level="mention" mention_head="Żydówkę" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_47" span="word_237" mmax_level="mention" mention_head="mu" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_48" span="word_243" mmax_level="mention" mention_head="dramat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_249" mmax_level="mention" mention_head="policję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_255" mmax_level="mention" mention_head="gospodarzem" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_51" span="word_257..word_258" mmax_level="mention" mention_head="kobieta" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_261" mmax_level="mention" mention_head="męża" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_53" span="word_268" mmax_level="mention" mention_head="łapówkę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_282" mmax_level="mention" mention_head="gospodarza" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_55" span="word_276..word_282" mmax_level="mention" mention_head="Żydówkę" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_56" span="word_286..word_287" mmax_level="mention" mention_head="straceńca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_290" mmax_level="mention" mention_head="antysemitką" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/5_words.xml a/src/test/resources/teksty_mmax/teksty_gold/5_words.xml new file mode 100755 index 0000000..4d24a36 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/5_words.xml @@ -0,0 +1,295 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Cena</word> + <word id="word_2" tei_id="morph_1.1.2-seg" lastinpar="true">życia</word> + <word id="word_3" tei_id="morph_2.2.1-seg">Z</word> + <word id="word_4" tei_id="morph_2.2.2-seg">tego</word> + <word id="word_5" tei_id="morph_2.2.3-seg">pogromu</word> + <word id="word_6" tei_id="morph_2.2.4-seg">ocalało</word> + <word id="word_7" tei_id="morph_2.2.5-seg">kilkudziesięciu</word> + <word id="word_8" tei_id="morph_2.2.6-seg">Żydów</word> + <word id="word_9" tei_id="morph_2.2.7-seg">,</word> + <word id="word_10" tei_id="morph_2.2.8-seg">a</word> + <word id="word_11" tei_id="morph_2.2.9-seg">wśród</word> + <word id="word_12" tei_id="morph_2.2.10-seg">nich</word> + <word id="word_13" tei_id="morph_2.2.11-seg">rodzina</word> + <word id="word_14" tei_id="morph_2.2.12-seg">Mosze</word> + <word id="word_15" tei_id="morph_2.2.13-seg">Sonensona</word> + <word id="word_16" tei_id="morph_2.2.14-seg">.</word> + <word id="word_17" tei_id="morph_2.3.1-seg">Przed</word> + <word id="word_18" tei_id="morph_2.3.2-seg">wojną</word> + <word id="word_19" tei_id="morph_2.3.3-seg">była</word> + <word id="word_20" tei_id="morph_2.3.4-seg">to</word> + <word id="word_21" tei_id="morph_2.3.5-seg">w</word> + <word id="word_22" tei_id="morph_2.3.6-seg">skali</word> + <word id="word_23" tei_id="morph_2.3.7-seg">miasteczka</word> + <word id="word_24" tei_id="morph_2.3.8-seg">rodzina</word> + <word id="word_25" tei_id="morph_2.3.9-seg">bogata</word> + <word id="word_26" tei_id="morph_2.3.10-seg">.</word> + <word id="word_27" tei_id="morph_2.4.1-seg">Sonensonowie</word> + <word id="word_28" tei_id="morph_2.4.2-seg">mieli</word> + <word id="word_29" tei_id="morph_2.4.3-seg">garbarnię</word> + <word id="word_30" tei_id="morph_2.4.4-seg">.</word> + <word id="word_31" tei_id="morph_2.5.1-seg">Nie</word> + <word id="word_32" tei_id="morph_2.5.2-seg">udało</word> + <word id="word_33" tei_id="morph_2.5.3-seg">mi</word> + <word id="word_34" tei_id="morph_2.5.4-seg">się</word> + <word id="word_35" tei_id="morph_2.5.5-seg">dociec</word> + <word id="word_36" tei_id="morph_2.5.6-seg">,</word> + <word id="word_37" tei_id="morph_2.5.7-seg">u</word> + <word id="word_38" tei_id="morph_2.5.8-seg">kogo</word> + <word id="word_39" tei_id="morph_2.5.9-seg">mianowicie</word> + <word id="word_40" tei_id="morph_2.5.10-seg">przechowywali</word> + <word id="word_41" tei_id="morph_2.5.11-seg">się</word> + <word id="word_42" tei_id="morph_2.5.12-seg">Sonensonowie</word> + <word id="word_43" tei_id="morph_2.5.13-seg">oraz</word> + <word id="word_44" tei_id="morph_2.5.14-seg">pozostali</word> + <word id="word_45" tei_id="morph_2.5.15-seg">Żydzi</word> + <word id="word_46" tei_id="morph_2.5.16-seg">w</word> + <word id="word_47" tei_id="morph_2.5.17-seg">czasie</word> + <word id="word_48" tei_id="morph_2.5.18-seg">okupacji</word> + <word id="word_49" tei_id="morph_2.5.19-seg">niemieckiej</word> + <word id="word_50" tei_id="morph_2.5.20-seg">.</word> + <word id="word_51" tei_id="morph_2.6.1-seg">Faktem</word> + <word id="word_52" tei_id="morph_2.6.2-seg">pozostaje</word> + <word id="word_53" tei_id="morph_2.6.3-seg">natomiast</word> + <word id="word_54" tei_id="morph_2.6.4-seg">,</word> + <word id="word_55" tei_id="morph_2.6.5-seg">że</word> + <word id="word_56" tei_id="morph_2.6.6-seg">okupację</word> + <word id="word_57" tei_id="morph_2.6.7-seg">tę</word> + <word id="word_58" tei_id="morph_2.6.8-seg">przeżyli</word> + <word id="word_59" tei_id="morph_2.6.9-seg">.</word> + <word id="word_60" tei_id="morph_2.7.1-seg">Faktem</word> + <word id="word_61" tei_id="morph_2.7.2-seg">oczywistym</word> + <word id="word_62" tei_id="morph_2.7.3-seg">pozostaje</word> + <word id="word_63" tei_id="morph_2.7.4-seg">i</word> + <word id="word_64" tei_id="morph_2.7.5-seg">to</word> + <word id="word_65" tei_id="morph_2.7.6-seg">,</word> + <word id="word_66" tei_id="morph_2.7.7-seg">że</word> + <word id="word_67" tei_id="morph_2.7.8-seg">liczne</word> + <word id="word_68" tei_id="morph_2.7.9-seg">rodziny</word> + <word id="word_69" tei_id="morph_2.7.10-seg">polskie</word> + <word id="word_70" tei_id="morph_2.7.11-seg">-</word> + <word id="word_71" tei_id="morph_2.7.12-seg">w</word> + <word id="word_72" tei_id="morph_2.7.13-seg">Ejszyszkach</word> + <word id="word_73" tei_id="morph_2.7.14-seg">i</word> + <word id="word_74" tei_id="morph_2.7.15-seg">w</word> + <word id="word_75" tei_id="morph_2.7.16-seg">pobliskich</word> + <word id="word_76" tei_id="morph_2.7.17-seg">okolicach</word> + <word id="word_77" tei_id="morph_2.7.18-seg">-</word> + <word id="word_78" tei_id="morph_2.7.19-seg">przechowywały</word> + <word id="word_79" tei_id="morph_2.7.20-seg">Żydów</word> + <word id="word_80" tei_id="morph_2.7.21-seg">.</word> + <word id="word_81" tei_id="morph_2.8.1-seg">Parę</word> + <word id="word_82" tei_id="morph_2.8.2-seg">kilometrów</word> + <word id="word_83" tei_id="morph_2.8.3-seg">od</word> + <word id="word_84" tei_id="morph_2.8.4-seg">Ejszyszek</word> + <word id="word_85" tei_id="morph_2.8.5-seg">,</word> + <word id="word_86" tei_id="morph_2.8.6-seg">w</word> + <word id="word_87" tei_id="morph_2.8.7-seg">Korkucianach</word> + <word id="word_88" tei_id="morph_2.8.8-seg">(</word> + <word id="word_89" tei_id="morph_2.8.9-seg">w</word> + <word id="word_90" tei_id="morph_2.8.10-seg">folwarku</word> + <word id="word_91" tei_id="morph_2.8.11-seg">Lebiedniki</word> + <word id="word_92" tei_id="morph_2.8.12-seg">)</word> + <word id="word_93" tei_id="morph_2.8.13-seg">,</word> + <word id="word_94" tei_id="morph_2.8.14-seg">żołnierz</word> + <word id="word_95" tei_id="morph_2.8.15-seg">AK</word> + <word id="word_96" tei_id="morph_2.8.16-seg">Kazimierz</word> + <word id="word_97" tei_id="morph_2.8.17-seg">Korkuć</word> + <word id="word_98" tei_id="morph_2.8.18-seg">w</word> + <word id="word_99" tei_id="morph_2.8.19-seg">czasie</word> + <word id="word_100" tei_id="morph_2.8.20-seg">wojny</word> + <word id="word_101" tei_id="morph_2.8.21-seg">w</word> + <word id="word_102" tei_id="morph_2.8.22-seg">swoim</word> + <word id="word_103" tei_id="morph_2.8.23-seg">domu</word> + <word id="word_104" tei_id="morph_2.8.24-seg">przechowywał</word> + <word id="word_105" tei_id="morph_2.8.25-seg">28</word> + <word id="word_106" tei_id="morph_2.8.26-seg">Żydów</word> + <word id="word_107" tei_id="morph_2.8.27-seg">.</word> + <word id="word_108" tei_id="morph_2.9.1-seg">Od</word> + <word id="word_109" tei_id="morph_2.9.2-seg">studni</word> + <word id="word_110" tei_id="morph_2.9.3-seg">do</word> + <word id="word_111" tei_id="morph_2.9.4-seg">piwnic</word> + <word id="word_112" tei_id="morph_2.9.5-seg">domu</word> + <word id="word_113" tei_id="morph_2.9.6-seg">był</word> + <word id="word_114" tei_id="morph_2.9.7-seg">przekopany</word> + <word id="word_115" tei_id="morph_2.9.8-seg">tunel</word> + <word id="word_116" tei_id="morph_2.9.9-seg">,</word> + <word id="word_117" tei_id="morph_2.9.10-seg">dzięki</word> + <word id="word_118" tei_id="morph_2.9.11-seg">czemu</word> + <word id="word_119" tei_id="morph_2.9.12-seg">mieli</word> + <word id="word_120" tei_id="morph_2.9.13-seg">wodę</word> + <word id="word_121" tei_id="morph_2.9.14-seg">.</word> + <word id="word_122" tei_id="morph_2.10.1-seg">Natomiast</word> + <word id="word_123" tei_id="morph_2.10.2-seg">w</word> + <word id="word_124" tei_id="morph_2.10.3-seg">skali</word> + <word id="word_125" tei_id="morph_2.10.4-seg">siatki</word> + <word id="word_126" tei_id="morph_2.10.5-seg">AK</word> + <word id="word_127" tei_id="morph_2.10.6-seg">Kazimierz</word> + <word id="word_128" tei_id="morph_2.10.7-seg">Korkuć</word> + <word id="word_129" tei_id="morph_2.10.8-seg">przechowywał</word> + <word id="word_130" tei_id="morph_2.10.9-seg">około</word> + <word id="word_131" tei_id="morph_2.10.10-seg">70</word> + <word id="word_132" tei_id="morph_2.10.11-seg">Żydów</word> + <word id="word_133" tei_id="morph_2.10.12-seg">.</word> + <word id="word_134" tei_id="morph_2.11.1-seg">Rodzina</word> + <word id="word_135" tei_id="morph_2.11.2-seg">Świeczków</word> + <word id="word_136" tei_id="morph_2.11.3-seg">również</word> + <word id="word_137" tei_id="morph_2.11.4-seg">przechowywała</word> + <word id="word_138" tei_id="morph_2.11.5-seg">Żydów</word> + <word id="word_139" tei_id="morph_2.11.6-seg">.</word> + <word id="word_140" tei_id="morph_2.12.1-seg">W</word> + <word id="word_141" tei_id="morph_2.12.2-seg">tamtych</word> + <word id="word_142" tei_id="morph_2.12.3-seg">stronach</word> + <word id="word_143" tei_id="morph_2.12.4-seg">liczne</word> + <word id="word_144" tei_id="morph_2.12.5-seg">rodziny</word> + <word id="word_145" tei_id="morph_2.12.6-seg">polskie</word> + <word id="word_146" tei_id="morph_2.12.7-seg">postępowały</word> + <word id="word_147" tei_id="morph_2.12.8-seg">podobnie</word> + <word id="word_148" tei_id="morph_2.12.9-seg" lastinpar="true">.</word> + <word id="word_149" tei_id="morph_3.13.1-seg">Prawdą</word> + <word id="word_150" tei_id="morph_3.13.2-seg">jest</word> + <word id="word_151" tei_id="morph_3.13.3-seg">również</word> + <word id="word_152" tei_id="morph_3.13.4-seg">i</word> + <word id="word_153" tei_id="morph_3.13.5-seg">to</word> + <word id="word_154" tei_id="morph_3.13.6-seg">,</word> + <word id="word_155" tei_id="morph_3.13.7-seg">że</word> + <word id="word_156" tei_id="morph_3.13.8-seg">Żydzi</word> + <word id="word_157" tei_id="morph_3.13.9-seg">za</word> + <word id="word_158" tei_id="morph_3.13.10-seg">swe</word> + <word id="word_159" tei_id="morph_3.13.11-seg">przechowanie</word> + <word id="word_160" tei_id="morph_3.13.12-seg">płacili</word> + <word id="word_161" tei_id="morph_3.13.13-seg">.</word> + <word id="word_162" tei_id="morph_3.14.1-seg">Płacili</word> + <word id="word_163" tei_id="morph_3.14.2-seg">za</word> + <word id="word_164" tei_id="morph_3.14.3-seg">utrzymanie</word> + <word id="word_165" tei_id="morph_3.14.4-seg">i</word> + <word id="word_166" tei_id="morph_3.14.5-seg">chyba</word> + <word id="word_167" tei_id="morph_3.14.6-seg">jeszcze</word> + <word id="word_168" tei_id="morph_3.14.7-seg">-</word> + <word id="word_169" tei_id="morph_3.14.8-seg">za</word> + <word id="word_170" tei_id="morph_3.14.9-seg">ryzyko</word> + <word id="word_171" tei_id="morph_3.14.10-seg">.</word> + <word id="word_172" tei_id="morph_3.15.1-seg">O</word> + <word id="word_173" tei_id="morph_3.15.2-seg">tym</word> + <word id="word_174" tei_id="morph_3.15.3-seg">dzisiaj</word> + <word id="word_175" tei_id="morph_3.15.4-seg">raczej</word> + <word id="word_176" tei_id="morph_3.15.5-seg">tu</word> + <word id="word_177" tei_id="morph_3.15.6-seg">się</word> + <word id="word_178" tei_id="morph_3.15.7-seg">nie</word> + <word id="word_179" tei_id="morph_3.15.8-seg">mówi</word> + <word id="word_180" tei_id="morph_3.15.9-seg">,</word> + <word id="word_181" tei_id="morph_3.15.10-seg">ale</word> + <word id="word_182" tei_id="morph_3.15.11-seg">prawdopodobnie</word> + <word id="word_183" tei_id="morph_3.15.12-seg">różnie</word> + <word id="word_184" tei_id="morph_3.15.13-seg">z</word> + <word id="word_185" tei_id="morph_3.15.14-seg">tym</word> + <word id="word_186" tei_id="morph_3.15.15-seg">było</word> + <word id="word_187" tei_id="morph_3.15.16-seg">:</word> + <word id="word_188" tei_id="morph_3.15.17-seg">jedni</word> + <word id="word_189" tei_id="morph_3.15.18-seg">za</word> + <word id="word_190" tei_id="morph_3.15.19-seg">pieniądze</word> + <word id="word_191" tei_id="morph_3.15.20-seg">,</word> + <word id="word_192" tei_id="morph_3.15.21-seg">inni</word> + <word id="word_193" tei_id="morph_3.15.22-seg">-</word> + <word id="word_194" tei_id="morph_3.15.23-seg">z</word> + <word id="word_195" tei_id="morph_3.15.24-seg">odruchu</word> + <word id="word_196" tei_id="morph_3.15.25-seg">serca</word> + <word id="word_197" tei_id="morph_3.15.26-seg">.</word> + <word id="word_198" tei_id="morph_3.16.1-seg">Ryzykowali</word> + <word id="word_199" tei_id="morph_3.16.2-seg">i</word> + <word id="word_200" tei_id="morph_3.16.3-seg">Polacy</word> + <word id="word_201" tei_id="morph_3.16.4-seg">,</word> + <word id="word_202" tei_id="morph_3.16.5-seg">i</word> + <word id="word_203" tei_id="morph_3.16.6-seg">Żydzi</word> + <word id="word_204" tei_id="morph_3.16.7-seg">.</word> + <word id="word_205" tei_id="morph_3.17.1-seg">Te</word> + <word id="word_206" tei_id="morph_3.17.2-seg">rachunki</word> + <word id="word_207" tei_id="morph_3.17.3-seg">mogły</word> + <word id="word_208" tei_id="morph_3.17.4-seg">wyglądać</word> + <word id="word_209" tei_id="morph_3.17.5-seg">bardzo</word> + <word id="word_210" tei_id="morph_3.17.6-seg">różnie</word> + <word id="word_211" tei_id="morph_3.17.7-seg" lastinpar="true">.</word> + <word id="word_212" tei_id="morph_4.18.1-seg">Mieszkam</word> + <word id="word_213" tei_id="morph_4.18.2-seg">w</word> + <word id="word_214" tei_id="morph_4.18.3-seg">jednej</word> + <word id="word_215" tei_id="morph_4.18.4-seg">z</word> + <word id="word_216" tei_id="morph_4.18.5-seg">podwileńskich</word> + <word id="word_217" tei_id="morph_4.18.6-seg">wsi</word> + <word id="word_218" tei_id="morph_4.18.7-seg">.</word> + <word id="word_219" tei_id="morph_4.19.1-seg">Otóż</word> + <word id="word_220" tei_id="morph_4.19.2-seg">w</word> + <word id="word_221" tei_id="morph_4.19.3-seg">tej</word> + <word id="word_222" tei_id="morph_4.19.4-seg">mojej</word> + <word id="word_223" tei_id="morph_4.19.5-seg">wsi</word> + <word id="word_224" tei_id="morph_4.19.6-seg">pewien</word> + <word id="word_225" tei_id="morph_4.19.7-seg">gospodarz</word> + <word id="word_226" tei_id="morph_4.19.8-seg">-</word> + <word id="word_227" tei_id="morph_4.19.9-seg">Polak</word> + <word id="word_228" tei_id="morph_4.19.10-seg">-</word> + <word id="word_229" tei_id="morph_4.19.11-seg">przechowywał</word> + <word id="word_230" tei_id="morph_4.19.12-seg">w</word> + <word id="word_231" tei_id="morph_4.19.13-seg">czasie</word> + <word id="word_232" tei_id="morph_4.19.14-seg">wojny</word> + <word id="word_233" tei_id="morph_4.19.15-seg">młodą</word> + <word id="word_234" tei_id="morph_4.19.16-seg">Żydówkę</word> + <word id="word_235" tei_id="morph_4.19.17-seg">.</word> + <word id="word_236" tei_id="morph_4.20.1-seg">Spodobała</word> + <word id="word_237" tei_id="morph_4.20.2-seg">mu</word> + <word id="word_238" tei_id="morph_4.20.3-seg">się</word> + <word id="word_239" tei_id="morph_4.20.4-seg">,</word> + <word id="word_240" tei_id="morph_4.20.5-seg">z</word> + <word id="word_241" tei_id="morph_4.20.6-seg">czego</word> + <word id="word_242" tei_id="morph_4.20.7-seg">wynikł</word> + <word id="word_243" tei_id="morph_4.20.8-seg">dramat</word> + <word id="word_244" tei_id="morph_4.20.9-seg">.</word> + <word id="word_245" tei_id="morph_4.21.1-seg">Zdenerwowana</word> + <word id="word_246" tei_id="morph_4.21.2-seg">żona</word> + <word id="word_247" tei_id="morph_4.21.3-seg">doniosła</word> + <word id="word_248" tei_id="morph_4.21.4-seg">na</word> + <word id="word_249" tei_id="morph_4.21.5-seg">policję</word> + <word id="word_250" tei_id="morph_4.21.6-seg">.</word> + <word id="word_251" tei_id="morph_4.22.1-seg">Aresztowano</word> + <word id="word_252" tei_id="morph_4.22.2-seg">Żydówkę</word> + <word id="word_253" tei_id="morph_4.22.3-seg">razem</word> + <word id="word_254" tei_id="morph_4.22.4-seg">z</word> + <word id="word_255" tei_id="morph_4.22.5-seg">gospodarzem</word> + <word id="word_256" tei_id="morph_4.22.6-seg">,</word> + <word id="word_257" tei_id="morph_4.22.7-seg">przerażona</word> + <word id="word_258" tei_id="morph_4.22.8-seg">kobieta</word> + <word id="word_259" tei_id="morph_4.22.9-seg">próbowała</word> + <word id="word_260" tei_id="morph_4.22.10-seg">ocalić</word> + <word id="word_261" tei_id="morph_4.22.11-seg">męża</word> + <word id="word_262" tei_id="morph_4.22.12-seg">.</word> + <word id="word_263" tei_id="morph_4.23.1-seg">Zanim</word> + <word id="word_264" tei_id="morph_4.23.2-seg">uzbierała</word> + <word id="word_265" tei_id="morph_4.23.3-seg">potrzebną</word> + <word id="word_266" tei_id="morph_4.23.4-seg">sumę</word> + <word id="word_267" tei_id="morph_4.23.5-seg">na</word> + <word id="word_268" tei_id="morph_4.23.6-seg">łapówkę</word> + <word id="word_269" tei_id="morph_4.23.7-seg">,</word> + <word id="word_270" tei_id="morph_4.23.8-seg">było</word> + <word id="word_271" tei_id="morph_4.23.9-seg">już</word> + <word id="word_272" tei_id="morph_4.23.10-seg">za</word> + <word id="word_273" tei_id="morph_4.23.11-seg">późno</word> + <word id="word_274" tei_id="morph_4.23.12-seg">-</word> + <word id="word_275" tei_id="morph_4.23.13-seg">rozstrzelano</word> + <word id="word_276" tei_id="morph_4.23.14-seg">nie</word> + <word id="word_277" tei_id="morph_4.23.15-seg">tylko</word> + <word id="word_278" tei_id="morph_4.23.16-seg">Żydówkę</word> + <word id="word_279" tei_id="morph_4.23.17-seg">,</word> + <word id="word_280" tei_id="morph_4.23.18-seg">ale</word> + <word id="word_281" tei_id="morph_4.23.19-seg">i</word> + <word id="word_282" tei_id="morph_4.23.20-seg">gospodarza</word> + <word id="word_283" tei_id="morph_4.23.21-seg">.</word> + <word id="word_284" tei_id="morph_4.24.1-seg">Czy</word> + <word id="word_285" tei_id="morph_4.24.2-seg">żonę</word> + <word id="word_286" tei_id="morph_4.24.3-seg">tego</word> + <word id="word_287" tei_id="morph_4.24.4-seg">straceńca</word> + <word id="word_288" tei_id="morph_4.24.5-seg">można</word> + <word id="word_289" tei_id="morph_4.24.6-seg">nazwać</word> + <word id="word_290" tei_id="morph_4.24.7-seg">antysemitką</word> + <word id="word_291" tei_id="morph_4.24.8-seg" lastinpar="true">?</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/8.mmax a/src/test/resources/teksty_mmax/teksty_gold/8.mmax new file mode 100755 index 0000000..849f740 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/8.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>8_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/8_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/8_mentions.xml new file mode 100755 index 0000000..e318462 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/8_mentions.xml @@ -0,0 +1,88 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_3..word_4" mmax_level="mention" mention_head="g" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_5..word_6" mmax_level="mention" mention_head="Joanna" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_28" mmax_level="mention" mention_head="lekarzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_44..word_45" mmax_level="mention" mention_head="pierś" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_48" mmax_level="mention" mention_head="folii" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_6" span="word_50" mmax_level="mention" mention_head="sałatka" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_7" span="word_52..word_53" mmax_level="mention" mention_head="kapusty" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_8" span="word_58" mmax_level="mention" mention_head="oliwą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_55..word_58" mmax_level="mention" mention_head="octem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_60..word_61" mmax_level="mention" mention_head="jabłko" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_11" span="word_63" mmax_level="mention" mention_head="kolacja" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_12" span="word_66..word_67" mmax_level="mention" mention_head="jajka" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_13" span="word_69" mmax_level="mention" mention_head="pomidor" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_72" mmax_level="mention" mention_head="serka" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_15" span="word_75" mmax_level="mention" mention_head="gruszka" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_16" span="word_79" mmax_level="mention" mention_head="obiad" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_17" span="word_81" mmax_level="mention" mention_head="królik" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_18" span="word_83" mmax_level="mention" mention_head="potrawce" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_85" mmax_level="mention" mention_head="surówka" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_20" span="word_87..word_89" mmax_level="mention" mention_head="marchewki" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_21" span="word_91" mmax_level="mention" mention_head="brzoskwinia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_93" mmax_level="mention" mention_head="kolacja" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_23" span="word_95" mmax_level="mention" mention_head="befsztyk" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_97" mmax_level="mention" mention_head="polędwicy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_100" mmax_level="mention" mention_head="tłuszczu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_105" mmax_level="mention" mention_head="sosem" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_27" span="word_109" mmax_level="mention" mention_head="mandarynki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_114" mmax_level="mention" mention_head="kura" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_29" span="word_116" mmax_level="mention" mention_head="rosołu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_118..word_119" mmax_level="mention" mention_head="kapusta" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_31" span="word_123..word_124" mmax_level="mention" mention_head="grejpfrut" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_126" mmax_level="mention" mention_head="kolacja" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_33" span="word_128" mmax_level="mention" mention_head="ryba" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_34" span="word_130" mmax_level="mention" mention_head="warzywach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_132" mmax_level="mention" mention_head="surówka" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_36" span="word_136" mmax_level="mention" mention_head="pomarańcza" mention_group="set_15" near_identity="empty"></markable> + <markable id="markable_37" span="word_144" mmax_level="mention" mention_head="brokuły" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_147" mmax_level="mention" mention_head="parze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_149" mmax_level="mention" mention_head="jabłko" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_40" span="word_154..word_155" mmax_level="mention" mention_head="ryżu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_153..word_155" mmax_level="mention" mention_head="szklanka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_157..word_158" mmax_level="mention" mention_head="mlekiem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_161..word_163" mmax_level="mention" mention_head="sera" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_44" span="word_160..word_163" mmax_level="mention" mention_head="plaster" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_166" mmax_level="mention" mention_head="winogron" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_165..word_166" mmax_level="mention" mention_head="kiść" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_170" mmax_level="mention" mention_head="obiad" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_48" span="word_173" mmax_level="mention" mention_head="mięsa" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_172..word_173" mmax_level="mention" mention_head="sztuka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_178..word_180" mmax_level="mention" mention_head="śliwek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_182" mmax_level="mention" mention_head="kolacja" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_52" span="word_185" mmax_level="mention" mention_head="jajka" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_53" span="word_190" mmax_level="mention" mention_head="brie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_192" mmax_level="mention" mention_head="banan" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_198" mmax_level="mention" mention_head="ryba" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_56" span="word_201" mmax_level="mention" mention_head="folii" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_57" span="word_203" mmax_level="mention" mention_head="surówka" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_58" span="word_208" mmax_level="mention" mention_head="marchewki" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_59" span="word_205..word_208" mmax_level="mention" mention_head="kapusty" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_60" span="word_210" mmax_level="mention" mention_head="gruszka" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_61" span="word_212..word_213" mmax_level="mention" mention_head="sosie" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_62" span="word_215" mmax_level="mention" mention_head="kolacja" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_63" span="word_219" mmax_level="mention" mention_head="królika" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_64" span="word_221" mmax_level="mention" mention_head="sałatka" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_65" span="word_225" mmax_level="mention" mention_head="papryki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_223..word_225" mmax_level="mention" mention_head="pomidorów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_228" mmax_level="mention" mention_head="serka" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_68" span="word_227..word_228" mmax_level="mention" mention_head="trójkąt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_69" span="word_231" mmax_level="mention" mention_head="kiwi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_237" mmax_level="mention" mention_head="cielęcina" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_241" mmax_level="mention" mention_head="ziół" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_240..word_241" mmax_level="mention" mention_head="dodatkiem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_73" span="word_243..word_244" mmax_level="mention" mention_head="fasolka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_74" span="word_247" mmax_level="mention" mention_head="masła" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_75" span="word_246..word_247" mmax_level="mention" mention_head="odrobiną" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_249" mmax_level="mention" mention_head="jogurt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_77" span="word_251" mmax_level="mention" mention_head="kolacja" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_78" span="word_255" mmax_level="mention" mention_head="ziemniaków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_79" span="word_257" mmax_level="mention" mention_head="odrobiny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_80" span="word_262" mmax_level="mention" mention_head="szynki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_259..word_262" mmax_level="mention" mention_head="sera" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_82" span="word_264..word_265" mmax_level="mention" mention_head="sałata" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_83" span="word_267" mmax_level="mention" mention_head="rzodkiewkami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_269" mmax_level="mention" mention_head="pomarańcza" mention_group="set_15" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/8_words.xml a/src/test/resources/teksty_mmax/teksty_gold/8_words.xml new file mode 100755 index 0000000..db237a8 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/8_words.xml @@ -0,0 +1,273 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">-</word> + <word id="word_2" tei_id="morph_1.1.2-seg">150</word> + <word id="word_3" tei_id="morph_1.1.3-seg">g</word> + <word id="word_4" tei_id="morph_1.1.4-seg" lastinpar="true">owoców</word> + <word id="word_5" tei_id="morph_2.2.1-seg">Joanna</word> + <word id="word_6" tei_id="morph_2.2.2-seg" lastinpar="true">Kuc</word> + <word id="word_7" tei_id="morph_3.3.1-seg">(</word> + <word id="word_8" tei_id="morph_3.3.2-seg">PAI</word> + <word id="word_9" tei_id="morph_3.3.3-seg" lastinpar="true">)</word> + <word id="word_10" tei_id="morph_4.4.1-seg">Diety</word> + <word id="word_11" tei_id="morph_4.4.2-seg">są</word> + <word id="word_12" tei_id="morph_4.4.3-seg">różne</word> + <word id="word_13" tei_id="morph_4.4.4-seg">,</word> + <word id="word_14" tei_id="morph_4.4.5-seg">ścisłe</word> + <word id="word_15" tei_id="morph_4.4.6-seg">i</word> + <word id="word_16" tei_id="morph_4.4.7-seg">urozmaicone</word> + <word id="word_17" tei_id="morph_4.4.8-seg">,</word> + <word id="word_18" tei_id="morph_4.4.9-seg">eliminacyjne</word> + <word id="word_19" tei_id="morph_4.4.10-seg">,</word> + <word id="word_20" tei_id="morph_4.4.11-seg">dziwne</word> + <word id="word_21" tei_id="morph_4.4.12-seg">,</word> + <word id="word_22" tei_id="morph_4.4.13-seg">racjonalne</word> + <word id="word_23" tei_id="morph_4.4.14-seg">i</word> + <word id="word_24" tei_id="morph_4.4.15-seg">wreszcie</word> + <word id="word_25" tei_id="morph_4.4.16-seg">te</word> + <word id="word_26" tei_id="morph_4.4.17-seg">zalecane</word> + <word id="word_27" tei_id="morph_4.4.18-seg">przez</word> + <word id="word_28" tei_id="morph_4.4.19-seg">lekarzy</word> + <word id="word_29" tei_id="morph_4.4.20-seg">.</word> + <word id="word_30" tei_id="morph_4.5.1-seg">Dziś</word> + <word id="word_31" tei_id="morph_4.5.2-seg">dieta</word> + <word id="word_32" tei_id="morph_4.5.3-seg">burgundzka</word> + <word id="word_33" tei_id="morph_4.5.4-seg" lastinpar="true">.</word> + <word id="word_34" tei_id="morph_5.6.1-seg">Jak</word> + <word id="word_35" tei_id="morph_5.6.2-seg">wygląda</word> + <word id="word_36" tei_id="morph_5.6.3-seg">przykładowe</word> + <word id="word_37" tei_id="morph_5.6.4-seg">tygodniowe</word> + <word id="word_38" tei_id="morph_5.6.5-seg">menu</word> + <word id="word_39" tei_id="morph_5.6.6-seg" lastinpar="true">?</word> + <word id="word_40" tei_id="morph_6.7.1-seg">1</word> + <word id="word_41" tei_id="morph_6.7.2-seg" lastinpar="true">dzień</word> + <word id="word_42" tei_id="morph_7.8.1-seg">-obiad</word> + <word id="word_43" tei_id="morph_7.8.2-seg">:</word> + <word id="word_44" tei_id="morph_7.8.3-seg">pierś</word> + <word id="word_45" tei_id="morph_7.8.4-seg">kurczaka</word> + <word id="word_46" tei_id="morph_7.8.5-seg">pieczona</word> + <word id="word_47" tei_id="morph_7.8.6-seg">w</word> + <word id="word_48" tei_id="morph_7.8.7-seg">folii</word> + <word id="word_49" tei_id="morph_7.8.8-seg">,</word> + <word id="word_50" tei_id="morph_7.8.9-seg">sałatka</word> + <word id="word_51" tei_id="morph_7.8.10-seg">z</word> + <word id="word_52" tei_id="morph_7.8.11-seg">czerwonej</word> + <word id="word_53" tei_id="morph_7.8.12-seg">kapusty</word> + <word id="word_54" tei_id="morph_7.8.13-seg">doprawiona</word> + <word id="word_55" tei_id="morph_7.8.14-seg">octem</word> + <word id="word_56" tei_id="morph_7.8.15-seg">winnym</word> + <word id="word_57" tei_id="morph_7.8.16-seg">i</word> + <word id="word_58" tei_id="morph_7.8.17-seg">oliwą</word> + <word id="word_59" tei_id="morph_7.8.18-seg">,</word> + <word id="word_60" tei_id="morph_7.8.19-seg">pieczone</word> + <word id="word_61" tei_id="morph_7.8.20-seg" lastinpar="true">jabłko</word> + <word id="word_62" tei_id="morph_8.9.1-seg">-</word> + <word id="word_63" tei_id="morph_8.9.2-seg">kolacja</word> + <word id="word_64" tei_id="morph_8.9.3-seg">:</word> + <word id="word_65" tei_id="morph_8.9.4-seg">2</word> + <word id="word_66" tei_id="morph_8.9.5-seg">sadzone</word> + <word id="word_67" tei_id="morph_8.9.6-seg">jajka</word> + <word id="word_68" tei_id="morph_8.9.7-seg">,</word> + <word id="word_69" tei_id="morph_8.9.8-seg">pomidor</word> + <word id="word_70" tei_id="morph_8.9.9-seg">,</word> + <word id="word_71" tei_id="morph_8.9.10-seg">trójkąt</word> + <word id="word_72" tei_id="morph_8.9.11-seg">serka</word> + <word id="word_73" tei_id="morph_8.9.12-seg">topionego</word> + <word id="word_74" tei_id="morph_8.9.13-seg">,</word> + <word id="word_75" tei_id="morph_8.9.14-seg" lastinpar="true">gruszka</word> + <word id="word_76" tei_id="morph_9.10.1-seg">2</word> + <word id="word_77" tei_id="morph_9.10.2-seg" lastinpar="true">dzień</word> + <word id="word_78" tei_id="morph_10.11.1-seg">-</word> + <word id="word_79" tei_id="morph_10.11.2-seg">obiad</word> + <word id="word_80" tei_id="morph_10.11.3-seg">:</word> + <word id="word_81" tei_id="morph_10.11.4-seg">królik</word> + <word id="word_82" tei_id="morph_10.11.5-seg">w</word> + <word id="word_83" tei_id="morph_10.11.6-seg">potrawce</word> + <word id="word_84" tei_id="morph_10.11.7-seg">,</word> + <word id="word_85" tei_id="morph_10.11.8-seg">surówka</word> + <word id="word_86" tei_id="morph_10.11.9-seg">z</word> + <word id="word_87" tei_id="morph_10.11.10-seg">marchewki</word> + <word id="word_88" tei_id="morph_10.11.11-seg">i</word> + <word id="word_89" tei_id="morph_10.11.12-seg">chrzanu</word> + <word id="word_90" tei_id="morph_10.11.13-seg">,</word> + <word id="word_91" tei_id="morph_10.11.14-seg" lastinpar="true">brzoskwinia</word> + <word id="word_92" tei_id="morph_11.12.1-seg">-</word> + <word id="word_93" tei_id="morph_11.12.2-seg">kolacja</word> + <word id="word_94" tei_id="morph_11.12.3-seg">:</word> + <word id="word_95" tei_id="morph_11.12.4-seg">befsztyk</word> + <word id="word_96" tei_id="morph_11.12.5-seg">z</word> + <word id="word_97" tei_id="morph_11.12.6-seg">polędwicy</word> + <word id="word_98" tei_id="morph_11.12.7-seg">usmażony</word> + <word id="word_99" tei_id="morph_11.12.8-seg">bez</word> + <word id="word_100" tei_id="morph_11.12.9-seg">tłuszczu</word> + <word id="word_101" tei_id="morph_11.12.10-seg">,</word> + <word id="word_102" tei_id="morph_11.12.11-seg">zielona</word> + <word id="word_103" tei_id="morph_11.12.12-seg">sałata</word> + <word id="word_104" tei_id="morph_11.12.13-seg">z</word> + <word id="word_105" tei_id="morph_11.12.14-seg">sosem</word> + <word id="word_106" tei_id="morph_11.12.15-seg">vinegrette</word> + <word id="word_107" tei_id="morph_11.12.16-seg">,</word> + <word id="word_108" tei_id="morph_11.12.17-seg">2</word> + <word id="word_109" tei_id="morph_11.12.18-seg" lastinpar="true">mandarynki</word> + <word id="word_110" tei_id="morph_12.13.1-seg">3</word> + <word id="word_111" tei_id="morph_12.13.2-seg" lastinpar="true">dzień</word> + <word id="word_112" tei_id="morph_13.14.1-seg">-obiad</word> + <word id="word_113" tei_id="morph_13.14.2-seg">:</word> + <word id="word_114" tei_id="morph_13.14.3-seg">kura</word> + <word id="word_115" tei_id="morph_13.14.4-seg">z</word> + <word id="word_116" tei_id="morph_13.14.5-seg">rosołu</word> + <word id="word_117" tei_id="morph_13.14.6-seg">,</word> + <word id="word_118" tei_id="morph_13.14.7-seg">gotowana</word> + <word id="word_119" tei_id="morph_13.14.8-seg">kapusta</word> + <word id="word_120" tei_id="morph_13.14.9-seg">bez</word> + <word id="word_121" tei_id="morph_13.14.10-seg">zasmażki</word> + <word id="word_122" tei_id="morph_13.14.11-seg">,</word> + <word id="word_123" tei_id="morph_13.14.12-seg">mały</word> + <word id="word_124" tei_id="morph_13.14.13-seg" lastinpar="true">grejpfrut</word> + <word id="word_125" tei_id="morph_14.15.1-seg">-</word> + <word id="word_126" tei_id="morph_14.15.2-seg">kolacja</word> + <word id="word_127" tei_id="morph_14.15.3-seg">:</word> + <word id="word_128" tei_id="morph_14.15.4-seg">ryba</word> + <word id="word_129" tei_id="morph_14.15.5-seg">w</word> + <word id="word_130" tei_id="morph_14.15.6-seg">warzywach</word> + <word id="word_131" tei_id="morph_14.15.7-seg">,</word> + <word id="word_132" tei_id="morph_14.15.8-seg">surówka</word> + <word id="word_133" tei_id="morph_14.15.9-seg">z</word> + <word id="word_134" tei_id="morph_14.15.10-seg">buraczków</word> + <word id="word_135" tei_id="morph_14.15.11-seg">,</word> + <word id="word_136" tei_id="morph_14.15.12-seg" lastinpar="true">pomarańcza</word> + <word id="word_137" tei_id="morph_15.16.1-seg">4</word> + <word id="word_138" tei_id="morph_15.16.2-seg" lastinpar="true">dzień</word> + <word id="word_139" tei_id="morph_16.17.1-seg">-obiad</word> + <word id="word_140" tei_id="morph_16.17.2-seg">:</word> + <word id="word_141" tei_id="morph_16.17.3-seg">pieczona</word> + <word id="word_142" tei_id="morph_16.17.4-seg">wieprzowina</word> + <word id="word_143" tei_id="morph_16.17.5-seg">,</word> + <word id="word_144" tei_id="morph_16.17.6-seg">brokuły</word> + <word id="word_145" tei_id="morph_16.17.7-seg">ugotowane</word> + <word id="word_146" tei_id="morph_16.17.8-seg">na</word> + <word id="word_147" tei_id="morph_16.17.9-seg">parze</word> + <word id="word_148" tei_id="morph_16.17.10-seg">,</word> + <word id="word_149" tei_id="morph_16.17.11-seg" lastinpar="true">jabłko</word> + <word id="word_150" tei_id="morph_17.18.1-seg">-</word> + <word id="word_151" tei_id="morph_17.18.2-seg">kolacja</word> + <word id="word_152" tei_id="morph_17.18.3-seg">:</word> + <word id="word_153" tei_id="morph_17.18.4-seg">szklanka</word> + <word id="word_154" tei_id="morph_17.18.5-seg">ugotowanego</word> + <word id="word_155" tei_id="morph_17.18.6-seg">ryżu</word> + <word id="word_156" tei_id="morph_17.18.7-seg">zalana</word> + <word id="word_157" tei_id="morph_17.18.8-seg">chudym</word> + <word id="word_158" tei_id="morph_17.18.9-seg">mlekiem</word> + <word id="word_159" tei_id="morph_17.18.10-seg">,</word> + <word id="word_160" tei_id="morph_17.18.11-seg">plaster</word> + <word id="word_161" tei_id="morph_17.18.12-seg">białego</word> + <word id="word_162" tei_id="morph_17.18.13-seg">chudego</word> + <word id="word_163" tei_id="morph_17.18.14-seg">sera</word> + <word id="word_164" tei_id="morph_17.18.15-seg">,</word> + <word id="word_165" tei_id="morph_17.18.16-seg">kiść</word> + <word id="word_166" tei_id="morph_17.18.17-seg" lastinpar="true">winogron</word> + <word id="word_167" tei_id="morph_18.19.1-seg">5</word> + <word id="word_168" tei_id="morph_18.19.2-seg" lastinpar="true">dzień</word> + <word id="word_169" tei_id="morph_19.20.1-seg">-</word> + <word id="word_170" tei_id="morph_19.20.2-seg">obiad</word> + <word id="word_171" tei_id="morph_19.20.3-seg">–</word> + <word id="word_172" tei_id="morph_19.20.4-seg">sztuka</word> + <word id="word_173" tei_id="morph_19.20.5-seg">mięsa</word> + <word id="word_174" tei_id="morph_19.20.6-seg">,</word> + <word id="word_175" tei_id="morph_19.20.7-seg">gotowane</word> + <word id="word_176" tei_id="morph_19.20.8-seg">buraczki</word> + <word id="word_177" tei_id="morph_19.20.9-seg">,</word> + <word id="word_178" tei_id="morph_19.20.10-seg">kilka</word> + <word id="word_179" tei_id="morph_19.20.11-seg">suszonych</word> + <word id="word_180" tei_id="morph_19.20.12-seg" lastinpar="true">śliwek</word> + <word id="word_181" tei_id="morph_20.21.1-seg">-</word> + <word id="word_182" tei_id="morph_20.21.2-seg">kolacja</word> + <word id="word_183" tei_id="morph_20.21.3-seg">-</word> + <word id="word_184" tei_id="morph_20.21.4-seg">2</word> + <word id="word_185" tei_id="morph_20.21.5-seg">jajka</word> + <word id="word_186" tei_id="morph_20.21.6-seg">na</word> + <word id="word_187" tei_id="morph_20.21.7-seg">miękko</word> + <word id="word_188" tei_id="morph_20.21.8-seg">,</word> + <word id="word_189" tei_id="morph_20.21.9-seg">serka</word> + <word id="word_190" tei_id="morph_20.21.10-seg">brie</word> + <word id="word_191" tei_id="morph_20.21.11-seg">,</word> + <word id="word_192" tei_id="morph_20.21.12-seg" lastinpar="true">banan</word> + <word id="word_193" tei_id="morph_21.22.1-seg">6</word> + <word id="word_194" tei_id="morph_21.22.2-seg" lastinpar="true">dzień</word> + <word id="word_195" tei_id="morph_22.23.1-seg">-</word> + <word id="word_196" tei_id="morph_22.23.2-seg">obiad</word> + <word id="word_197" tei_id="morph_22.23.3-seg">:</word> + <word id="word_198" tei_id="morph_22.23.4-seg">ryba</word> + <word id="word_199" tei_id="morph_22.23.5-seg">pieczona</word> + <word id="word_200" tei_id="morph_22.23.6-seg">w</word> + <word id="word_201" tei_id="morph_22.23.7-seg">folii</word> + <word id="word_202" tei_id="morph_22.23.8-seg">,</word> + <word id="word_203" tei_id="morph_22.23.9-seg">surówka</word> + <word id="word_204" tei_id="morph_22.23.10-seg">z</word> + <word id="word_205" tei_id="morph_22.23.11-seg">białej</word> + <word id="word_206" tei_id="morph_22.23.12-seg">kapusty</word> + <word id="word_207" tei_id="morph_22.23.13-seg">i</word> + <word id="word_208" tei_id="morph_22.23.14-seg">marchewki</word> + <word id="word_209" tei_id="morph_22.23.15-seg">,</word> + <word id="word_210" tei_id="morph_22.23.16-seg">gruszka</word> + <word id="word_211" tei_id="morph_22.23.17-seg">w</word> + <word id="word_212" tei_id="morph_22.23.18-seg">sosie</word> + <word id="word_213" tei_id="morph_22.23.19-seg" lastinpar="true">waniliowym</word> + <word id="word_214" tei_id="morph_23.24.1-seg">-</word> + <word id="word_215" tei_id="morph_23.24.2-seg">kolacja</word> + <word id="word_216" tei_id="morph_23.24.3-seg">:</word> + <word id="word_217" tei_id="morph_23.24.4-seg">pieczeń</word> + <word id="word_218" tei_id="morph_23.24.5-seg">z</word> + <word id="word_219" tei_id="morph_23.24.6-seg">królika</word> + <word id="word_220" tei_id="morph_23.24.7-seg">,</word> + <word id="word_221" tei_id="morph_23.24.8-seg">sałatka</word> + <word id="word_222" tei_id="morph_23.24.9-seg">z</word> + <word id="word_223" tei_id="morph_23.24.10-seg">pomidorów</word> + <word id="word_224" tei_id="morph_23.24.11-seg">i</word> + <word id="word_225" tei_id="morph_23.24.12-seg">papryki</word> + <word id="word_226" tei_id="morph_23.24.13-seg">,</word> + <word id="word_227" tei_id="morph_23.24.14-seg">trójkąt</word> + <word id="word_228" tei_id="morph_23.24.15-seg">serka</word> + <word id="word_229" tei_id="morph_23.24.16-seg">topionego</word> + <word id="word_230" tei_id="morph_23.24.17-seg">,</word> + <word id="word_231" tei_id="morph_23.24.18-seg" lastinpar="true">kiwi</word> + <word id="word_232" tei_id="morph_24.25.1-seg">7</word> + <word id="word_233" tei_id="morph_24.25.2-seg" lastinpar="true">dzień</word> + <word id="word_234" tei_id="morph_25.26.1-seg">-</word> + <word id="word_235" tei_id="morph_25.26.2-seg">obiad</word> + <word id="word_236" tei_id="morph_25.26.3-seg">:</word> + <word id="word_237" tei_id="morph_25.26.4-seg">cielęcina</word> + <word id="word_238" tei_id="morph_25.26.5-seg">pieczona</word> + <word id="word_239" tei_id="morph_25.26.6-seg">z</word> + <word id="word_240" tei_id="morph_25.26.7-seg">dodatkiem</word> + <word id="word_241" tei_id="morph_25.26.8-seg">ziół</word> + <word id="word_242" tei_id="morph_25.26.9-seg">,</word> + <word id="word_243" tei_id="morph_25.26.10-seg">fasolka</word> + <word id="word_244" tei_id="morph_25.26.11-seg">szparagowa</word> + <word id="word_245" tei_id="morph_25.26.12-seg">z</word> + <word id="word_246" tei_id="morph_25.26.13-seg">odrobiną</word> + <word id="word_247" tei_id="morph_25.26.14-seg">masła</word> + <word id="word_248" tei_id="morph_25.26.15-seg">,</word> + <word id="word_249" tei_id="morph_25.26.16-seg" lastinpar="true">jogurt</word> + <word id="word_250" tei_id="morph_26.27.1-seg">-</word> + <word id="word_251" tei_id="morph_26.27.2-seg">kolacja</word> + <word id="word_252" tei_id="morph_26.27.3-seg">:</word> + <word id="word_253" tei_id="morph_26.27.4-seg">zapiekanka</word> + <word id="word_254" tei_id="morph_26.27.5-seg">z</word> + <word id="word_255" tei_id="morph_26.27.6-seg">ziemniaków</word> + <word id="word_256" tei_id="morph_26.27.7-seg">,</word> + <word id="word_257" tei_id="morph_26.27.8-seg">odrobiny</word> + <word id="word_258" tei_id="morph_26.27.9-seg">startego</word> + <word id="word_259" tei_id="morph_26.27.10-seg">żółtego</word> + <word id="word_260" tei_id="morph_26.27.11-seg">sera</word> + <word id="word_261" tei_id="morph_26.27.12-seg">i</word> + <word id="word_262" tei_id="morph_26.27.13-seg">szynki</word> + <word id="word_263" tei_id="morph_26.27.14-seg">,</word> + <word id="word_264" tei_id="morph_26.27.15-seg">sałata</word> + <word id="word_265" tei_id="morph_26.27.16-seg">zielona</word> + <word id="word_266" tei_id="morph_26.27.17-seg">z</word> + <word id="word_267" tei_id="morph_26.27.18-seg">rzodkiewkami</word> + <word id="word_268" tei_id="morph_26.27.19-seg">,</word> + <word id="word_269" tei_id="morph_26.27.20-seg" lastinpar="true">pomarańcza</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_gold/9.mmax a/src/test/resources/teksty_mmax/teksty_gold/9.mmax new file mode 100755 index 0000000..92026d2 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/9.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>9_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_gold/9_mentions.xml a/src/test/resources/teksty_mmax/teksty_gold/9_mentions.xml new file mode 100755 index 0000000..bd3b733 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/9_mentions.xml @@ -0,0 +1,79 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_6" mmax_level="mention" mention_head="balkonie" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_2" span="word_7" mmax_level="mention" mention_head="Pani" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_3" span="word_8..word_9" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_13" mmax_level="mention" mention_head="Piłsudskiego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_15" mmax_level="mention" mention_head="powierzchni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_16" mmax_level="mention" mention_head="niewiele" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_18..word_19" mmax_level="mention" mention_head="metra" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_28" mmax_level="mention" mention_head="skrzynek" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_9" span="word_30" mmax_level="mention" mention_head="kwiatami" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_10" span="word_37..word_38" mmax_level="mention" mention_head="balkonu" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_11" span="word_36..word_38" mmax_level="mention" mention_head="Utrzymanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_43" mmax_level="mention" mention_head="serca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_40..word_43" mmax_level="mention" mention_head="pracy" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_14" span="word_54" mmax_level="mention" mention_head="córka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_57" mmax_level="mention" mention_head="Stańczyk" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_56..word_57" mmax_level="mention" mention_head="Grażyna" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_67" mmax_level="mention" mention_head="kwiaty" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_18" span="word_74" mmax_level="mention" mention_head="balkonowi" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_19" span="word_76" mmax_level="mention" mention_head="Nasiona" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_82..word_84" mmax_level="mention" mention_head="lutego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_21" span="word_90" mmax_level="mention" mention_head="roślinki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_94" mmax_level="mention" mention_head="skrzynek" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_23" span="word_97" mmax_level="mention" mention_head="Skrzynki" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_24" span="word_100" mmax_level="mention" mention_head="balkon" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_25" span="word_103" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_105" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_103..word_105" mmax_level="mention" mention_head="maju" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_116..word_117" mmax_level="mention" mention_head="sadzonek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_29" span="word_119" mmax_level="mention" mention_head="sklepu" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_30" span="word_122" mmax_level="mention" mention_head="nic" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_125..word_126" mmax_level="mention" mention_head="przyjemności" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_129" mmax_level="mention" mention_head="kwiatka" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_33" span="word_128..word_129" mmax_level="mention" mention_head="wyhodowanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_34" span="word_131" mmax_level="mention" mention_head="nasionka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_136..word_137" mmax_level="mention" mention_head="roku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_138" mmax_level="mention" mention_head="pracy" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_37" span="word_142" mmax_level="mention" mention_head="roślin" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_38" span="word_144" mmax_level="mention" mention_head="zasilaniu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_141..word_144" mmax_level="mention" mention_head="podlewaniu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_145" mmax_level="mention" mention_head="ich" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_145..word_147" mmax_level="mention" mention_head="nawozami" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_42" span="word_151" mmax_level="mention" mention_head="pani" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_43" span="word_152" mmax_level="mention" mention_head="Stanisławy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_150..word_152" mmax_level="mention" mention_head="balkonie" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_45" span="word_155" mmax_level="mention" mention_head="surfinie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_157" mmax_level="mention" mention_head="petunie" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_47" span="word_159" mmax_level="mention" mention_head="gardenie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_161" mmax_level="mention" mention_head="aksamitki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_163" mmax_level="mention" mention_head="przypołudniki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_172" mmax_level="mention" mention_head="werbeny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_176..word_177" mmax_level="mention" mention_head="kwiatów" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_52" span="word_188..word_189" mmax_level="mention" mention_head="czasie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_193" mmax_level="mention" mention_head="Ogród" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_195" mmax_level="mention" mention_head="balkonie" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_55" span="word_200" mmax_level="mention" mention_head="przechodniów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_198..word_200" mmax_level="mention" mention_head="właścicielki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_203..word_204" mmax_level="mention" mention_head="jesieni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_207" mmax_level="mention" mention_head="balkonów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_206..word_207" mmax_level="mention" mention_head="Ozdabianie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_208" mmax_level="mention" mention_head="kwiatami" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_61" span="word_212" mmax_level="mention" mention_head="Brzezinach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_224..word_225" mmax_level="mention" mention_head="rośliny" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_63" span="word_229" mmax_level="mention" mention_head="Kolasa" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_231..word_232" mmax_level="mention" mention_head="sklepu" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_65" span="word_239..word_240" mmax_level="mention" mention_head="kwiaty" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_66" span="word_242" mmax_level="mention" mention_head="wystawienia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_244" mmax_level="mention" mention_head="balkon" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_68" span="word_248" mmax_level="mention" mention_head="petunie" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_69" span="word_255..word_258" mmax_level="mention" mention_head="pelargonie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_264..word_266" mmax_level="mention" mention_head="datura" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_268" mmax_level="mention" mention_head="Klientom" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_275..word_276" mmax_level="mention" mention_head="kwiat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_73" span="word_280" mmax_level="mention" mention_head="fot" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_74" span="word_278..word_280" mmax_level="mention" mention_head="tekst" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_75" span="word_282..word_283" mmax_level="mention" mention_head="grzegorz" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_gold/9_words.xml a/src/test/resources/teksty_mmax/teksty_gold/9_words.xml new file mode 100755 index 0000000..dd37a43 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_gold/9_words.xml @@ -0,0 +1,287 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Ogród</word> + <word id="word_2" tei_id="morph_1.1.2-seg">na</word> + <word id="word_3" tei_id="morph_1.1.3-seg">.</word> + <word id="word_4" tei_id="morph_1.1.4-seg">.</word> + <word id="word_5" tei_id="morph_1.1.5-seg">.</word> + <word id="word_6" tei_id="morph_1.1.6-seg">balkonie</word> + <word id="word_7" tei_id="morph_1.1.7-seg">Pani</word> + <word id="word_8" tei_id="morph_1.1.8-seg">Stanisławie</word> + <word id="word_9" tei_id="morph_1.1.9-seg">Budkiewicz</word> + <word id="word_10" tei_id="morph_1.1.10-seg">z</word> + <word id="word_11" tei_id="morph_1.1.11-seg">ul</word> + <word id="word_12" tei_id="morph_1.1.12-seg">.</word> + <word id="word_13" tei_id="morph_1.2.1-seg">Piłsudskiego</word> + <word id="word_14" tei_id="morph_1.2.2-seg">na</word> + <word id="word_15" tei_id="morph_1.2.3-seg">powierzchni</word> + <word id="word_16" tei_id="morph_1.2.4-seg">niewiele</word> + <word id="word_17" tei_id="morph_1.2.5-seg">przekraczającej</word> + <word id="word_18" tei_id="morph_1.2.6-seg">półtora</word> + <word id="word_19" tei_id="morph_1.2.7-seg">metra</word> + <word id="word_20" tei_id="morph_1.2.8-seg">kwadratowego</word> + <word id="word_21" tei_id="morph_1.2.9-seg">udało</word> + <word id="word_22" tei_id="morph_1.2.10-seg">się</word> + <word id="word_23" tei_id="morph_1.2.11-seg">"</word> + <word id="word_24" tei_id="morph_1.2.12-seg">upchnąć</word> + <word id="word_25" tei_id="morph_1.2.13-seg">"</word> + <word id="word_26" tei_id="morph_1.2.14-seg">aż</word> + <word id="word_27" tei_id="morph_1.2.15-seg">15</word> + <word id="word_28" tei_id="morph_1.2.16-seg">skrzynek</word> + <word id="word_29" tei_id="morph_1.2.17-seg">z</word> + <word id="word_30" tei_id="morph_1.2.18-seg">kwiatami</word> + <word id="word_31" tei_id="morph_1.2.19-seg">i</word> + <word id="word_32" tei_id="morph_1.2.20-seg">kilka</word> + <word id="word_33" tei_id="morph_1.2.21-seg">doniczek</word> + <word id="word_34" tei_id="morph_1.2.22-seg">.</word> + <word id="word_35" tei_id="morph_1.2.23-seg">-</word> + <word id="word_36" tei_id="morph_1.2.24-seg">Utrzymanie</word> + <word id="word_37" tei_id="morph_1.2.25-seg">takiego</word> + <word id="word_38" tei_id="morph_1.2.26-seg">balkonu</word> + <word id="word_39" tei_id="morph_1.2.27-seg">wymaga</word> + <word id="word_40" tei_id="morph_1.2.28-seg">wiele</word> + <word id="word_41" tei_id="morph_1.2.29-seg">pracy</word> + <word id="word_42" tei_id="morph_1.2.30-seg">i</word> + <word id="word_43" tei_id="morph_1.2.31-seg">serca</word> + <word id="word_44" tei_id="morph_1.2.32-seg">-</word> + <word id="word_45" tei_id="morph_1.2.33-seg">przyznaje</word> + <word id="word_46" tei_id="morph_1.2.34-seg">S</word> + <word id="word_47" tei_id="morph_1.2.35-seg">.</word> + <word id="word_48" tei_id="morph_1.3.1-seg">Budkiewicz</word> + <word id="word_49" tei_id="morph_1.3.2-seg">,</word> + <word id="word_50" tei_id="morph_1.3.3-seg">której</word> + <word id="word_51" tei_id="morph_1.3.4-seg">przy</word> + <word id="word_52" tei_id="morph_1.3.5-seg">kwiatach</word> + <word id="word_53" tei_id="morph_1.3.6-seg">pomaga</word> + <word id="word_54" tei_id="morph_1.3.7-seg">córka</word> + <word id="word_55" tei_id="morph_1.3.8-seg">-</word> + <word id="word_56" tei_id="morph_1.3.9-seg">Grażyna</word> + <word id="word_57" tei_id="morph_1.3.10-seg" lastinpar="true">Stańczyk</word> + <word id="word_58" tei_id="morph_2.4.1-seg">-</word> + <word id="word_59" tei_id="morph_2.4.2-seg">Nie</word> + <word id="word_60" tei_id="morph_2.4.3-seg">mamy</word> + <word id="word_61" tei_id="morph_2.4.4-seg">własnego</word> + <word id="word_62" tei_id="morph_2.4.5-seg">ogródka</word> + <word id="word_63" tei_id="morph_2.4.6-seg">,</word> + <word id="word_64" tei_id="morph_2.4.7-seg">a</word> + <word id="word_65" tei_id="morph_2.4.8-seg">bardzo</word> + <word id="word_66" tei_id="morph_2.4.9-seg">kochamy</word> + <word id="word_67" tei_id="morph_2.4.10-seg">kwiaty</word> + <word id="word_68" tei_id="morph_2.4.11-seg">.</word> + <word id="word_69" tei_id="morph_2.5.1-seg">Dlatego</word> + <word id="word_70" tei_id="morph_2.5.2-seg">każdą</word> + <word id="word_71" tei_id="morph_2.5.3-seg">wolną</word> + <word id="word_72" tei_id="morph_2.5.4-seg">chwilę</word> + <word id="word_73" tei_id="morph_2.5.5-seg">poświęcamy</word> + <word id="word_74" tei_id="morph_2.5.6-seg">balkonowi</word> + <word id="word_75" tei_id="morph_2.5.7-seg">.</word> + <word id="word_76" tei_id="morph_2.6.1-seg">Nasiona</word> + <word id="word_77" tei_id="morph_2.6.2-seg">wysiewane</word> + <word id="word_78" tei_id="morph_2.6.3-seg">są</word> + <word id="word_79" tei_id="morph_2.6.4-seg">już</word> + <word id="word_80" tei_id="morph_2.6.5-seg">na</word> + <word id="word_81" tei_id="morph_2.6.6-seg">przełomie</word> + <word id="word_82" tei_id="morph_2.6.7-seg">lutego</word> + <word id="word_83" tei_id="morph_2.6.8-seg">i</word> + <word id="word_84" tei_id="morph_2.6.9-seg">marca</word> + <word id="word_85" tei_id="morph_2.6.10-seg">.</word> + <word id="word_86" tei_id="morph_2.7.1-seg">Później</word> + <word id="word_87" tei_id="morph_2.7.2-seg">wyrastające</word> + <word id="word_88" tei_id="morph_2.7.3-seg">z</word> + <word id="word_89" tei_id="morph_2.7.4-seg">nich</word> + <word id="word_90" tei_id="morph_2.7.5-seg">roślinki</word> + <word id="word_91" tei_id="morph_2.7.6-seg">pikuje</word> + <word id="word_92" tei_id="morph_2.7.7-seg">się</word> + <word id="word_93" tei_id="morph_2.7.8-seg">do</word> + <word id="word_94" tei_id="morph_2.7.9-seg">skrzynek</word> + <word id="word_95" tei_id="morph_2.7.10-seg">.</word> + <word id="word_96" tei_id="morph_2.7.11-seg">-</word> + <word id="word_97" tei_id="morph_2.7.12-seg">Skrzynki</word> + <word id="word_98" tei_id="morph_2.7.13-seg">wystawiamy</word> + <word id="word_99" tei_id="morph_2.7.14-seg">na</word> + <word id="word_100" tei_id="morph_2.7.15-seg">balkon</word> + <word id="word_101" tei_id="morph_2.7.16-seg">dopiero</word> + <word id="word_102" tei_id="morph_2.7.17-seg">w</word> + <word id="word_103" tei_id="morph_2.7.18-seg">maju</word> + <word id="word_104" tei_id="morph_2.7.19-seg">-</word> + <word id="word_105" tei_id="morph_2.7.20-seg">czerwcu</word> + <word id="word_106" tei_id="morph_2.7.21-seg">-</word> + <word id="word_107" tei_id="morph_2.7.22-seg">wyjaśnia</word> + <word id="word_108" tei_id="morph_2.7.23-seg">G</word> + <word id="word_109" tei_id="morph_2.7.24-seg">.</word> + <word id="word_110" tei_id="morph_2.8.1-seg">Stańczyk</word> + <word id="word_111" tei_id="morph_2.8.2-seg">.</word> + <word id="word_112" tei_id="morph_2.8.3-seg">-</word> + <word id="word_113" tei_id="morph_2.8.4-seg">Bardzo</word> + <word id="word_114" tei_id="morph_2.8.5-seg">rzadko</word> + <word id="word_115" tei_id="morph_2.8.6-seg">używamy</word> + <word id="word_116" tei_id="morph_2.8.7-seg">gotowych</word> + <word id="word_117" tei_id="morph_2.8.8-seg">sadzonek</word> + <word id="word_118" tei_id="morph_2.8.9-seg">ze</word> + <word id="word_119" tei_id="morph_2.8.10-seg">sklepu</word> + <word id="word_120" tei_id="morph_2.8.11-seg">,</word> + <word id="word_121" tei_id="morph_2.8.12-seg">bo</word> + <word id="word_122" tei_id="morph_2.8.13-seg">nic</word> + <word id="word_123" tei_id="morph_2.8.14-seg">nie</word> + <word id="word_124" tei_id="morph_2.8.15-seg">sprawia</word> + <word id="word_125" tei_id="morph_2.8.16-seg">takiej</word> + <word id="word_126" tei_id="morph_2.8.17-seg">przyjemności</word> + <word id="word_127" tei_id="morph_2.8.18-seg">jak</word> + <word id="word_128" tei_id="morph_2.8.19-seg">wyhodowanie</word> + <word id="word_129" tei_id="morph_2.8.20-seg">kwiatka</word> + <word id="word_130" tei_id="morph_2.8.21-seg">od</word> + <word id="word_131" tei_id="morph_2.8.22-seg">nasionka</word> + <word id="word_132" tei_id="morph_2.8.23-seg">.</word> + <word id="word_133" tei_id="morph_2.9.1-seg">O</word> + <word id="word_134" tei_id="morph_2.9.2-seg">tej</word> + <word id="word_135" tei_id="morph_2.9.3-seg">porze</word> + <word id="word_136" tei_id="morph_2.9.4-seg">roku</word> + <word id="word_137" tei_id="morph_2.9.5-seg">najwięcej</word> + <word id="word_138" tei_id="morph_2.9.6-seg">pracy</word> + <word id="word_139" tei_id="morph_2.9.7-seg">jest</word> + <word id="word_140" tei_id="morph_2.9.8-seg">przy</word> + <word id="word_141" tei_id="morph_2.9.9-seg">podlewaniu</word> + <word id="word_142" tei_id="morph_2.9.10-seg">roślin</word> + <word id="word_143" tei_id="morph_2.9.11-seg">i</word> + <word id="word_144" tei_id="morph_2.9.12-seg">zasilaniu</word> + <word id="word_145" tei_id="morph_2.9.13-seg">ich</word> + <word id="word_146" tei_id="morph_2.9.14-seg">odpowiednimi</word> + <word id="word_147" tei_id="morph_2.9.15-seg">nawozami</word> + <word id="word_148" tei_id="morph_2.9.16-seg">.</word> + <word id="word_149" tei_id="morph_2.10.1-seg">Na</word> + <word id="word_150" tei_id="morph_2.10.2-seg">balkonie</word> + <word id="word_151" tei_id="morph_2.10.3-seg">pani</word> + <word id="word_152" tei_id="morph_2.10.4-seg">Stanisławy</word> + <word id="word_153" tei_id="morph_2.10.5-seg">rosną</word> + <word id="word_154" tei_id="morph_2.10.6-seg">:</word> + <word id="word_155" tei_id="morph_2.10.7-seg">surfinie</word> + <word id="word_156" tei_id="morph_2.10.8-seg">,</word> + <word id="word_157" tei_id="morph_2.10.9-seg">petunie</word> + <word id="word_158" tei_id="morph_2.10.10-seg">,</word> + <word id="word_159" tei_id="morph_2.10.11-seg">gardenie</word> + <word id="word_160" tei_id="morph_2.10.12-seg">,</word> + <word id="word_161" tei_id="morph_2.10.13-seg">aksamitki</word> + <word id="word_162" tei_id="morph_2.10.14-seg">,</word> + <word id="word_163" tei_id="morph_2.10.15-seg">przypołudniki</word> + <word id="word_164" tei_id="morph_2.10.16-seg">,</word> + <word id="word_165" tei_id="morph_2.10.17-seg">groszek</word> + <word id="word_166" tei_id="morph_2.10.18-seg">pachnący</word> + <word id="word_167" tei_id="morph_2.10.19-seg">,</word> + <word id="word_168" tei_id="morph_2.10.20-seg">kabea</word> + <word id="word_169" tei_id="morph_2.10.21-seg">,</word> + <word id="word_170" tei_id="morph_2.10.22-seg">nemezje</word> + <word id="word_171" tei_id="morph_2.10.23-seg">i</word> + <word id="word_172" tei_id="morph_2.10.24-seg">werbeny</word> + <word id="word_173" tei_id="morph_2.10.25-seg">.</word> + <word id="word_174" tei_id="morph_2.11.1-seg">W</word> + <word id="word_175" tei_id="morph_2.11.2-seg">sumie</word> + <word id="word_176" tei_id="morph_2.11.3-seg">kilkadziesiąt</word> + <word id="word_177" tei_id="morph_2.11.4-seg">kwiatów</word> + <word id="word_178" tei_id="morph_2.11.5-seg">,</word> + <word id="word_179" tei_id="morph_2.11.6-seg">z</word> + <word id="word_180" tei_id="morph_2.11.7-seg">których</word> + <word id="word_181" tei_id="morph_2.11.8-seg">każdy</word> + <word id="word_182" tei_id="morph_2.11.9-seg">kwitnie</word> + <word id="word_183" tei_id="morph_2.11.10-seg">w</word> + <word id="word_184" tei_id="morph_2.11.11-seg">innym</word> + <word id="word_185" tei_id="morph_2.11.12-seg">kolorze</word> + <word id="word_186" tei_id="morph_2.11.13-seg">i</word> + <word id="word_187" tei_id="morph_2.11.14-seg">w</word> + <word id="word_188" tei_id="morph_2.11.15-seg">różnym</word> + <word id="word_189" tei_id="morph_2.11.16-seg">czasie</word> + <word id="word_190" tei_id="morph_2.11.17-seg">.</word> + <word id="word_191" tei_id="morph_2.12.1-seg">Efekt</word> + <word id="word_192" tei_id="morph_2.12.2-seg">?</word> + <word id="word_193" tei_id="morph_2.13.1-seg">Ogród</word> + <word id="word_194" tei_id="morph_2.13.2-seg">na</word> + <word id="word_195" tei_id="morph_2.13.3-seg">balkonie</word> + <word id="word_196" tei_id="morph_2.13.4-seg">cieszy</word> + <word id="word_197" tei_id="morph_2.13.5-seg">oczy</word> + <word id="word_198" tei_id="morph_2.13.6-seg">właścicielki</word> + <word id="word_199" tei_id="morph_2.13.7-seg">i</word> + <word id="word_200" tei_id="morph_2.13.8-seg">przechodniów</word> + <word id="word_201" tei_id="morph_2.13.9-seg">aż</word> + <word id="word_202" tei_id="morph_2.13.10-seg">do</word> + <word id="word_203" tei_id="morph_2.13.11-seg">późnej</word> + <word id="word_204" tei_id="morph_2.13.12-seg">jesieni</word> + <word id="word_205" tei_id="morph_2.13.13-seg">.</word> + <word id="word_206" tei_id="morph_2.14.1-seg">Ozdabianie</word> + <word id="word_207" tei_id="morph_2.14.2-seg">balkonów</word> + <word id="word_208" tei_id="morph_2.14.3-seg">kwiatami</word> + <word id="word_209" tei_id="morph_2.14.4-seg">staje</word> + <word id="word_210" tei_id="morph_2.14.5-seg">się</word> + <word id="word_211" tei_id="morph_2.14.6-seg">w</word> + <word id="word_212" tei_id="morph_2.14.7-seg">Brzezinach</word> + <word id="word_213" tei_id="morph_2.14.8-seg">coraz</word> + <word id="word_214" tei_id="morph_2.14.9-seg">popularniejsze</word> + <word id="word_215" tei_id="morph_2.14.10-seg">-</word> + <word id="word_216" tei_id="morph_2.14.11-seg">Teraz</word> + <word id="word_217" tei_id="morph_2.14.12-seg">jest</word> + <word id="word_218" tei_id="morph_2.14.13-seg">za</word> + <word id="word_219" tei_id="morph_2.14.14-seg">późno</word> + <word id="word_220" tei_id="morph_2.14.15-seg">,</word> + <word id="word_221" tei_id="morph_2.14.16-seg">by</word> + <word id="word_222" tei_id="morph_2.14.17-seg">samemu</word> + <word id="word_223" tei_id="morph_2.14.18-seg">uprawiać</word> + <word id="word_224" tei_id="morph_2.14.19-seg">rośliny</word> + <word id="word_225" tei_id="morph_2.14.20-seg">balkonowe</word> + <word id="word_226" tei_id="morph_2.14.21-seg">-</word> + <word id="word_227" tei_id="morph_2.14.22-seg">mówi</word> + <word id="word_228" tei_id="morph_2.14.23-seg">Bożenna</word> + <word id="word_229" tei_id="morph_2.14.24-seg">Kolasa</word> + <word id="word_230" tei_id="morph_2.14.25-seg">ze</word> + <word id="word_231" tei_id="morph_2.14.26-seg">sklepu</word> + <word id="word_232" tei_id="morph_2.14.27-seg">ogrodniczego</word> + <word id="word_233" tei_id="morph_2.14.28-seg">.</word> + <word id="word_234" tei_id="morph_2.14.29-seg">-</word> + <word id="word_235" tei_id="morph_2.14.30-seg">Jednak</word> + <word id="word_236" tei_id="morph_2.14.31-seg">wciąż</word> + <word id="word_237" tei_id="morph_2.14.32-seg">można</word> + <word id="word_238" tei_id="morph_2.14.33-seg">kupić</word> + <word id="word_239" tei_id="morph_2.14.34-seg">kwiaty</word> + <word id="word_240" tei_id="morph_2.14.35-seg">gotowe</word> + <word id="word_241" tei_id="morph_2.14.36-seg">do</word> + <word id="word_242" tei_id="morph_2.14.37-seg">wystawienia</word> + <word id="word_243" tei_id="morph_2.14.38-seg">na</word> + <word id="word_244" tei_id="morph_2.14.39-seg">balkon</word> + <word id="word_245" tei_id="morph_2.14.40-seg">.</word> + <word id="word_246" tei_id="morph_2.15.1-seg">Najpopularniejsze</word> + <word id="word_247" tei_id="morph_2.15.2-seg">są</word> + <word id="word_248" tei_id="morph_2.15.3-seg">petunie</word> + <word id="word_249" tei_id="morph_2.15.4-seg">,</word> + <word id="word_250" tei_id="morph_2.15.5-seg">surfinie</word> + <word id="word_251" tei_id="morph_2.15.6-seg">,</word> + <word id="word_252" tei_id="morph_2.15.7-seg">oraz</word> + <word id="word_253" tei_id="morph_2.15.8-seg">czerwone</word> + <word id="word_254" tei_id="morph_2.15.9-seg">,</word> + <word id="word_255" tei_id="morph_2.15.10-seg">białe</word> + <word id="word_256" tei_id="morph_2.15.11-seg">i</word> + <word id="word_257" tei_id="morph_2.15.12-seg">różowe</word> + <word id="word_258" tei_id="morph_2.15.13-seg">pelargonie</word> + <word id="word_259" tei_id="morph_2.15.14-seg">.</word> + <word id="word_260" tei_id="morph_2.16.1-seg">Modnym</word> + <word id="word_261" tei_id="morph_2.16.2-seg">kwiatem</word> + <word id="word_262" tei_id="morph_2.16.3-seg">jest</word> + <word id="word_263" tei_id="morph_2.16.4-seg">też</word> + <word id="word_264" tei_id="morph_2.16.5-seg">datura</word> + <word id="word_265" tei_id="morph_2.16.6-seg">-</word> + <word id="word_266" tei_id="morph_2.16.7-seg">bieluń</word> + <word id="word_267" tei_id="morph_2.16.8-seg">.</word> + <word id="word_268" tei_id="morph_2.17.1-seg">Klientom</word> + <word id="word_269" tei_id="morph_2.17.2-seg">nie</word> + <word id="word_270" tei_id="morph_2.17.3-seg">przeszkadza</word> + <word id="word_271" tei_id="morph_2.17.4-seg">,</word> + <word id="word_272" tei_id="morph_2.17.5-seg">że</word> + <word id="word_273" tei_id="morph_2.17.6-seg">jest</word> + <word id="word_274" tei_id="morph_2.17.7-seg">to</word> + <word id="word_275" tei_id="morph_2.17.8-seg">kwiat</word> + <word id="word_276" tei_id="morph_2.17.9-seg">trujący</word> + <word id="word_277" tei_id="morph_2.17.10-seg">.</word> + <word id="word_278" tei_id="morph_2.17.11-seg">tekst</word> + <word id="word_279" tei_id="morph_2.17.12-seg">i</word> + <word id="word_280" tei_id="morph_2.17.13-seg">fot</word> + <word id="word_281" tei_id="morph_2.17.14-seg">.</word> + <word id="word_282" tei_id="morph_2.17.15-seg">grzegorz</word> + <word id="word_283" tei_id="morph_2.17.16-seg" lastinpar="true">kozieł</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/0.mmax a/src/test/resources/teksty_mmax/teksty_sys/0.mmax new file mode 100755 index 0000000..c9d65de --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/0.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>0_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/0_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/0_mentions.xml new file mode 100755 index 0000000..e9fea36 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/0_mentions.xml @@ -0,0 +1,89 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_2" mmax_level="mention" mention_head="spotkaniu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_4" mmax_level="mention" mention_head="udział" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_3" span="word_7..word_8" mmax_level="mention" mention_head="tysięcy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_10..word_11" mmax_level="mention" mention_head="Europy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_17" mmax_level="mention" mention_head="nich" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_23..word_25" mmax_level="mention" mention_head="Króla" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_7" span="word_22..word_25" mmax_level="mention" mention_head="tytuł" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_8" span="word_29" mmax_level="mention" mention_head="udział" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_9" span="word_31" mmax_level="mention" mention_head="strzelaniu" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_10" span="word_34..word_35" mmax_level="mention" mention_head="szanse" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_40" mmax_level="mention" mention_head="to" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_46" mmax_level="mention" mention_head="gospodarza" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_47" mmax_level="mention" mention_head="spotkań" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_14" span="word_44..word_47" mmax_level="mention" mention_head="obowiązków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_52" mmax_level="mention" mention_head="pracy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_54" mmax_level="mention" mention_head="mi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_59" mmax_level="mention" mention_head="strzelaniu" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_18" span="word_63" mmax_level="mention" mention_head="Maj" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_19" span="word_62..word_63" mmax_level="mention" mention_head="Zdzisław" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_20" span="word_66..word_68" mmax_level="mention" mention_head="Bractwa" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_21" span="word_65..word_68" mmax_level="mention" mention_head="prezes" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_71..word_72" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_70..word_72" mmax_level="mention" mention_head="Król" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_24" span="word_74" mmax_level="mention" mention_head="Strzelanie" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_25" span="word_77..word_79" mmax_level="mention" mention_head="Króla" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_26" span="word_76..word_79" mmax_level="mention" mention_head="tytuł" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_27" span="word_88" mmax_level="mention" mention_head="finału" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_92" mmax_level="mention" mention_head="braci" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_29" span="word_99..word_101" mmax_level="mention" mention_head="Króla" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_30" span="word_98..word_101" mmax_level="mention" mention_head="tytuł" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_31" span="word_103" mmax_level="mention" mention_head="go" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_32" span="word_106..word_107" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_110" mmax_level="mention" mention_head="Król" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_34" span="word_113..word_115" mmax_level="mention" mention_head="nagród" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_118..word_119" mmax_level="mention" mention_head="tytuł" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_36" span="word_121..word_122" mmax_level="mention" mention_head="zaszczytem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_124" mmax_level="mention" mention_head="król" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_38" span="word_131..word_132" mmax_level="mention" mention_head="Parlamentu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_130..word_132" mmax_level="mention" mention_head="posiedzenia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_136" mmax_level="mention" mention_head="Maj" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_41" span="word_135..word_136" mmax_level="mention" mention_head="Zdzisław" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_42" span="word_144..word_145" mmax_level="mention" mention_head="Bractw" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_142..word_145" mmax_level="mention" mention_head="Spotkań" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_44" span="word_147..word_148" mmax_level="mention" mention_head="parada" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_156..word_157" mmax_level="mention" mention_head="godz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_160..word_161" mmax_level="mention" mention_head="braci" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_47" span="word_163..word_164" mmax_level="mention" mention_head="strojach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_167" mmax_level="mention" mention_head="Błoń" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_169" mmax_level="mention" mention_head="Rynek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_170" mmax_level="mention" mention_head="ulicami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_172" mmax_level="mention" mention_head="Piłsudskiego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_174" mmax_level="mention" mention_head="Straszewskiego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_181" mmax_level="mention" mention_head="istnienia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_182..word_183" mmax_level="mention" mention_head="Bractwa" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_55" span="word_185" mmax_level="mention" mention_head="Krakowie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_188" mmax_level="mention" mention_head="wieku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_187..word_188" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_191" mmax_level="mention" mention_head="ono" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_59" span="word_192..word_193" mmax_level="mention" mention_head="obywateli" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_197..word_198" mmax_level="mention" mention_head="rzemieślników" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_61" span="word_195..word_198" mmax_level="mention" mention_head="kupców" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_201" mmax_level="mention" mention_head="miasta" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_200..word_201" mmax_level="mention" mention_head="obronność" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_205" mmax_level="mention" mention_head="bractwa" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_65" span="word_203..word_205" mmax_level="mention" mention_head="świętem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_207" mmax_level="mention" mention_head="turniej" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_213" mmax_level="mention" mention_head="strzelnicy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_220..word_221" mmax_level="mention" mention_head="dni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_69" span="word_229..word_230" mmax_level="mention" mention_head="żerdzi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_232" mmax_level="mention" mention_head="Brat" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_71" span="word_236..word_237" mmax_level="mention" mention_head="strzałem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_240" mmax_level="mention" mention_head="jego" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_73" span="word_239..word_241" mmax_level="mention" mention_head="fragment" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_74" span="word_244..word_245" mmax_level="mention" mention_head="Króla" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_75" span="word_243..word_245" mmax_level="mention" mention_head="miano" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_248..word_249" mmax_level="mention" mention_head="tytułem" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_77" span="word_252..word_258" mmax_level="mention" mention_head="honory" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_78" span="word_260..word_261" mmax_level="mention" mention_head="Rada" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_79" span="word_263" mmax_level="mention" mention_head="jego" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_80" span="word_263..word_264" mmax_level="mention" mention_head="posiadacza" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_81" span="word_271" mmax_level="mention" mention_head="płacenia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_82" span="word_272" mmax_level="mention" mention_head="podatków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_83" span="word_270..word_272" mmax_level="mention" mention_head="obowiązku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_274..word_275" mmax_level="mention" mention_head="zwyczaj" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_85" span="word_279" mmax_level="mention" mention_head="dziś" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/0_words.xml a/src/test/resources/teksty_mmax/teksty_sys/0_words.xml new file mode 100755 index 0000000..9b46167 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/0_words.xml @@ -0,0 +1,285 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">W</word> + <word id="word_2" tei_id="morph_1.1.2-seg">spotkaniu</word> + <word id="word_3" tei_id="morph_1.1.3-seg">weźmie</word> + <word id="word_4" tei_id="morph_1.1.4-seg">udział</word> + <word id="word_5" tei_id="morph_1.1.5-seg">blisko</word> + <word id="word_6" tei_id="morph_1.1.6-seg">7</word> + <word id="word_7" tei_id="morph_1.1.7-seg">tysięcy</word> + <word id="word_8" tei_id="morph_1.1.8-seg">braci</word> + <word id="word_9" tei_id="morph_1.1.9-seg">z</word> + <word id="word_10" tei_id="morph_1.1.10-seg">całej</word> + <word id="word_11" tei_id="morph_1.1.11-seg">Europy</word> + <word id="word_12" tei_id="morph_1.1.12-seg">,</word> + <word id="word_13" tei_id="morph_1.1.13-seg">ale</word> + <word id="word_14" tei_id="morph_1.1.14-seg">tylko</word> + <word id="word_15" tei_id="morph_1.1.15-seg">206</word> + <word id="word_16" tei_id="morph_1.1.16-seg">z</word> + <word id="word_17" tei_id="morph_1.1.17-seg">nich</word> + <word id="word_18" tei_id="morph_1.1.18-seg">będzie</word> + <word id="word_19" tei_id="morph_1.1.19-seg">ubiegało</word> + <word id="word_20" tei_id="morph_1.1.20-seg">się</word> + <word id="word_21" tei_id="morph_1.1.21-seg">o</word> + <word id="word_22" tei_id="morph_1.1.22-seg">tytuł</word> + <word id="word_23" tei_id="morph_1.1.23-seg">Europejskiego</word> + <word id="word_24" tei_id="morph_1.1.24-seg">Króla</word> + <word id="word_25" tei_id="morph_1.1.25-seg">Kurkowego</word> + <word id="word_26" tei_id="morph_1.1.26-seg">.</word> + <word id="word_27" tei_id="morph_1.1.27-seg">-</word> + <word id="word_28" tei_id="morph_1.1.28-seg">Wezmę</word> + <word id="word_29" tei_id="morph_1.1.29-seg">udział</word> + <word id="word_30" tei_id="morph_1.1.30-seg">w</word> + <word id="word_31" tei_id="morph_1.1.31-seg">strzelaniu</word> + <word id="word_32" tei_id="morph_1.1.32-seg">,</word> + <word id="word_33" tei_id="morph_1.1.33-seg">choć</word> + <word id="word_34" tei_id="morph_1.1.34-seg">moje</word> + <word id="word_35" tei_id="morph_1.1.35-seg">szanse</word> + <word id="word_36" tei_id="morph_1.1.36-seg">są</word> + <word id="word_37" tei_id="morph_1.1.37-seg">marne</word> + <word id="word_38" tei_id="morph_1.1.38-seg">.</word> + <word id="word_39" tei_id="morph_1.2.1-seg">Wynika</word> + <word id="word_40" tei_id="morph_1.2.2-seg">to</word> + <word id="word_41" tei_id="morph_1.2.3-seg">przede</word> + <word id="word_42" tei_id="morph_1.2.4-seg">wszystkim</word> + <word id="word_43" tei_id="morph_1.2.5-seg">z</word> + <word id="word_44" tei_id="morph_1.2.6-seg">moich</word> + <word id="word_45" tei_id="morph_1.2.7-seg">obowiązków</word> + <word id="word_46" tei_id="morph_1.2.8-seg">gospodarza</word> + <word id="word_47" tei_id="morph_1.2.9-seg">spotkań</word> + <word id="word_48" tei_id="morph_1.2.10-seg">;</word> + <word id="word_49" tei_id="morph_1.2.11-seg">w</word> + <word id="word_50" tei_id="morph_1.2.12-seg">tym</word> + <word id="word_51" tei_id="morph_1.2.13-seg">nawale</word> + <word id="word_52" tei_id="morph_1.2.14-seg">pracy</word> + <word id="word_53" tei_id="morph_1.2.15-seg">ciężko</word> + <word id="word_54" tei_id="morph_1.2.16-seg">mi</word> + <word id="word_55" tei_id="morph_1.2.17-seg">będzie</word> + <word id="word_56" tei_id="morph_1.2.18-seg">się</word> + <word id="word_57" tei_id="morph_1.2.19-seg">skupić</word> + <word id="word_58" tei_id="morph_1.2.20-seg">na</word> + <word id="word_59" tei_id="morph_1.2.21-seg">strzelaniu</word> + <word id="word_60" tei_id="morph_1.2.22-seg">-</word> + <word id="word_61" tei_id="morph_1.2.23-seg">przewiduje</word> + <word id="word_62" tei_id="morph_1.2.24-seg">Zdzisław</word> + <word id="word_63" tei_id="morph_1.2.25-seg">Maj</word> + <word id="word_64" tei_id="morph_1.2.26-seg">,</word> + <word id="word_65" tei_id="morph_1.2.27-seg">prezes</word> + <word id="word_66" tei_id="morph_1.2.28-seg">krakowskiego</word> + <word id="word_67" tei_id="morph_1.2.29-seg">Bractwa</word> + <word id="word_68" tei_id="morph_1.2.30-seg">Kurkowego</word> + <word id="word_69" tei_id="morph_1.2.31-seg">,</word> + <word id="word_70" tei_id="morph_1.2.32-seg">panujący</word> + <word id="word_71" tei_id="morph_1.2.33-seg">Król</word> + <word id="word_72" tei_id="morph_1.2.34-seg">Kurkowy</word> + <word id="word_73" tei_id="morph_1.2.35-seg" lastinpar="true">.</word> + <word id="word_74" tei_id="morph_2.3.1-seg">Strzelanie</word> + <word id="word_75" tei_id="morph_2.3.2-seg">o</word> + <word id="word_76" tei_id="morph_2.3.3-seg">tytuł</word> + <word id="word_77" tei_id="morph_2.3.4-seg">Europejskiego</word> + <word id="word_78" tei_id="morph_2.3.5-seg">Króla</word> + <word id="word_79" tei_id="morph_2.3.6-seg">Kurkowego</word> + <word id="word_80" tei_id="morph_2.3.7-seg">będzie</word> + <word id="word_81" tei_id="morph_2.3.8-seg">się</word> + <word id="word_82" tei_id="morph_2.3.9-seg">odbywało</word> + <word id="word_83" tei_id="morph_2.3.10-seg">w</word> + <word id="word_84" tei_id="morph_2.3.11-seg">kilku</word> + <word id="word_85" tei_id="morph_2.3.12-seg">etapach</word> + <word id="word_86" tei_id="morph_2.3.13-seg">.</word> + <word id="word_87" tei_id="morph_2.4.1-seg">Do</word> + <word id="word_88" tei_id="morph_2.4.2-seg">finału</word> + <word id="word_89" tei_id="morph_2.4.3-seg">zostanie</word> + <word id="word_90" tei_id="morph_2.4.4-seg">dopuszczonych</word> + <word id="word_91" tei_id="morph_2.4.5-seg">27</word> + <word id="word_92" tei_id="morph_2.4.6-seg">braci</word> + <word id="word_93" tei_id="morph_2.4.7-seg">-</word> + <word id="word_94" tei_id="morph_2.4.8-seg">jeden</word> + <word id="word_95" tei_id="morph_2.4.9-seg">z</word> + <word id="word_96" tei_id="morph_2.4.10-seg">nich</word> + <word id="word_97" tei_id="morph_2.4.11-seg">otrzyma</word> + <word id="word_98" tei_id="morph_2.4.12-seg">tytuł</word> + <word id="word_99" tei_id="morph_2.4.13-seg">Europejskiego</word> + <word id="word_100" tei_id="morph_2.4.14-seg">Króla</word> + <word id="word_101" tei_id="morph_2.4.15-seg">Kurkowego</word> + <word id="word_102" tei_id="morph_2.4.16-seg">odbierając</word> + <word id="word_103" tei_id="morph_2.4.17-seg">go</word> + <word id="word_104" tei_id="morph_2.4.18-seg">obecnie</word> + <word id="word_105" tei_id="morph_2.4.19-seg">panującemu</word> + <word id="word_106" tei_id="morph_2.4.20-seg">Wilfriedowi</word> + <word id="word_107" tei_id="morph_2.4.21-seg">Stammermannowi</word> + <word id="word_108" tei_id="morph_2.4.22-seg">.</word> + <word id="word_109" tei_id="morph_2.4.23-seg">-</word> + <word id="word_110" tei_id="morph_2.4.24-seg">Król</word> + <word id="word_111" tei_id="morph_2.4.25-seg">nie</word> + <word id="word_112" tei_id="morph_2.4.26-seg">otrzymuje</word> + <word id="word_113" tei_id="morph_2.4.27-seg">żadnych</word> + <word id="word_114" tei_id="morph_2.4.28-seg">nagród</word> + <word id="word_115" tei_id="morph_2.4.29-seg">finansowych</word> + <word id="word_116" tei_id="morph_2.4.30-seg">,</word> + <word id="word_117" tei_id="morph_2.4.31-seg">ale</word> + <word id="word_118" tei_id="morph_2.4.32-seg">taki</word> + <word id="word_119" tei_id="morph_2.4.33-seg">tytuł</word> + <word id="word_120" tei_id="morph_2.4.34-seg">jest</word> + <word id="word_121" tei_id="morph_2.4.35-seg">ogromnym</word> + <word id="word_122" tei_id="morph_2.4.36-seg">zaszczytem</word> + <word id="word_123" tei_id="morph_2.4.37-seg">;</word> + <word id="word_124" tei_id="morph_2.4.38-seg">król</word> + <word id="word_125" tei_id="morph_2.4.39-seg">jest</word> + <word id="word_126" tei_id="morph_2.4.40-seg">np</word> + <word id="word_127" tei_id="morph_2.4.41-seg">.</word> + <word id="word_128" tei_id="morph_2.4.42-seg">zapraszany</word> + <word id="word_129" tei_id="morph_2.4.43-seg">na</word> + <word id="word_130" tei_id="morph_2.4.44-seg">posiedzenia</word> + <word id="word_131" tei_id="morph_2.4.45-seg">Parlamentu</word> + <word id="word_132" tei_id="morph_2.4.46-seg">Europejskiego</word> + <word id="word_133" tei_id="morph_2.4.47-seg">-</word> + <word id="word_134" tei_id="morph_2.4.48-seg">mówi</word> + <word id="word_135" tei_id="morph_2.4.49-seg">Zdzisław</word> + <word id="word_136" tei_id="morph_2.4.50-seg">Maj</word> + <word id="word_137" tei_id="morph_2.4.51-seg" lastinpar="true">.</word> + <word id="word_138" tei_id="morph_3.5.1-seg">Największą</word> + <word id="word_139" tei_id="morph_3.5.2-seg">atrakcją</word> + <word id="word_140" tei_id="morph_3.5.3-seg">12</word> + <word id="word_141" tei_id="morph_3.5.4-seg">.</word> + <word id="word_142" tei_id="morph_3.6.1-seg">Europejskich</word> + <word id="word_143" tei_id="morph_3.6.2-seg">Spotkań</word> + <word id="word_144" tei_id="morph_3.6.3-seg">Bractw</word> + <word id="word_145" tei_id="morph_3.6.4-seg">Strzeleckich</word> + <word id="word_146" tei_id="morph_3.6.5-seg">będzie</word> + <word id="word_147" tei_id="morph_3.6.6-seg">wielka</word> + <word id="word_148" tei_id="morph_3.6.7-seg">parada</word> + <word id="word_149" tei_id="morph_3.6.8-seg">,</word> + <word id="word_150" tei_id="morph_3.6.9-seg">która</word> + <word id="word_151" tei_id="morph_3.6.10-seg">rozpocznie</word> + <word id="word_152" tei_id="morph_3.6.11-seg">się</word> + <word id="word_153" tei_id="morph_3.6.12-seg">w</word> + <word id="word_154" tei_id="morph_3.6.13-seg">niedzielę</word> + <word id="word_155" tei_id="morph_3.6.14-seg">o</word> + <word id="word_156" tei_id="morph_3.6.15-seg">godz</word> + <word id="word_157" tei_id="morph_3.6.16-seg">.</word> + <word id="word_158" tei_id="morph_3.6.17-seg">13</word> + <word id="word_159" tei_id="morph_3.6.18-seg">.</word> + <word id="word_160" tei_id="morph_3.7.1-seg">Kilkuset</word> + <word id="word_161" tei_id="morph_3.7.2-seg">braci</word> + <word id="word_162" tei_id="morph_3.7.3-seg">w</word> + <word id="word_163" tei_id="morph_3.7.4-seg">historycznych</word> + <word id="word_164" tei_id="morph_3.7.5-seg">strojach</word> + <word id="word_165" tei_id="morph_3.7.6-seg">przejdzie</word> + <word id="word_166" tei_id="morph_3.7.7-seg">z</word> + <word id="word_167" tei_id="morph_3.7.8-seg">Błoń</word> + <word id="word_168" tei_id="morph_3.7.9-seg">na</word> + <word id="word_169" tei_id="morph_3.7.10-seg">Rynek</word> + <word id="word_170" tei_id="morph_3.7.11-seg">ulicami</word> + <word id="word_171" tei_id="morph_3.7.12-seg">:</word> + <word id="word_172" tei_id="morph_3.7.13-seg">Piłsudskiego</word> + <word id="word_173" tei_id="morph_3.7.14-seg">,</word> + <word id="word_174" tei_id="morph_3.7.15-seg">Straszewskiego</word> + <word id="word_175" tei_id="morph_3.7.16-seg">,</word> + <word id="word_176" tei_id="morph_3.7.17-seg">Franciszkańską</word> + <word id="word_177" tei_id="morph_3.7.18-seg">i</word> + <word id="word_178" tei_id="morph_3.7.19-seg">Grodzką</word> + <word id="word_179" tei_id="morph_3.7.20-seg" lastinpar="true">.</word> + <word id="word_180" tei_id="morph_4.8.1-seg">Początki</word> + <word id="word_181" tei_id="morph_4.8.2-seg">istnienia</word> + <word id="word_182" tei_id="morph_4.8.3-seg">Bractwa</word> + <word id="word_183" tei_id="morph_4.8.4-seg">Kurkowego</word> + <word id="word_184" tei_id="morph_4.8.5-seg">w</word> + <word id="word_185" tei_id="morph_4.8.6-seg">Krakowie</word> + <word id="word_186" tei_id="morph_4.8.7-seg">sięgają</word> + <word id="word_187" tei_id="morph_4.8.8-seg">XIII</word> + <word id="word_188" tei_id="morph_4.8.9-seg">wieku</word> + <word id="word_189" tei_id="morph_4.8.10-seg">.</word> + <word id="word_190" tei_id="morph_4.9.1-seg">Skupiało</word> + <word id="word_191" tei_id="morph_4.9.2-seg">ono</word> + <word id="word_192" tei_id="morph_4.9.3-seg">znamienitych</word> + <word id="word_193" tei_id="morph_4.9.4-seg">obywateli</word> + <word id="word_194" tei_id="morph_4.9.5-seg">,</word> + <word id="word_195" tei_id="morph_4.9.6-seg">kupców</word> + <word id="word_196" tei_id="morph_4.9.7-seg">i</word> + <word id="word_197" tei_id="morph_4.9.8-seg">rzemieślników</word> + <word id="word_198" tei_id="morph_4.9.9-seg">pragnących</word> + <word id="word_199" tei_id="morph_4.9.10-seg">wspomóc</word> + <word id="word_200" tei_id="morph_4.9.11-seg">obronność</word> + <word id="word_201" tei_id="morph_4.9.12-seg">miasta</word> + <word id="word_202" tei_id="morph_4.9.13-seg">.</word> + <word id="word_203" tei_id="morph_4.10.1-seg">Wielkim</word> + <word id="word_204" tei_id="morph_4.10.2-seg">świętem</word> + <word id="word_205" tei_id="morph_4.10.3-seg">bractwa</word> + <word id="word_206" tei_id="morph_4.10.4-seg">był</word> + <word id="word_207" tei_id="morph_4.10.5-seg">turniej</word> + <word id="word_208" tei_id="morph_4.10.6-seg">,</word> + <word id="word_209" tei_id="morph_4.10.7-seg">który</word> + <word id="word_210" tei_id="morph_4.10.8-seg">odbywał</word> + <word id="word_211" tei_id="morph_4.10.9-seg">się</word> + <word id="word_212" tei_id="morph_4.10.10-seg">na</word> + <word id="word_213" tei_id="morph_4.10.11-seg">strzelnicy</word> + <word id="word_214" tei_id="morph_4.10.12-seg">zwanej</word> + <word id="word_215" tei_id="morph_4.10.13-seg">Celestatem</word> + <word id="word_216" tei_id="morph_4.10.14-seg">.</word> + <word id="word_217" tei_id="morph_4.11.1-seg">Zawody</word> + <word id="word_218" tei_id="morph_4.11.2-seg">trwały</word> + <word id="word_219" tei_id="morph_4.11.3-seg">zwykle</word> + <word id="word_220" tei_id="morph_4.11.4-seg">trzy</word> + <word id="word_221" tei_id="morph_4.11.5-seg">dni</word> + <word id="word_222" tei_id="morph_4.11.6-seg">.</word> + <word id="word_223" tei_id="morph_4.12.1-seg">Strzelano</word> + <word id="word_224" tei_id="morph_4.12.2-seg">do</word> + <word id="word_225" tei_id="morph_4.12.3-seg">drewnianego</word> + <word id="word_226" tei_id="morph_4.12.4-seg">kura</word> + <word id="word_227" tei_id="morph_4.12.5-seg">umocowanego</word> + <word id="word_228" tei_id="morph_4.12.6-seg">na</word> + <word id="word_229" tei_id="morph_4.12.7-seg">wysokiej</word> + <word id="word_230" tei_id="morph_4.12.8-seg">żerdzi</word> + <word id="word_231" tei_id="morph_4.12.9-seg">.</word> + <word id="word_232" tei_id="morph_4.13.1-seg">Brat</word> + <word id="word_233" tei_id="morph_4.13.2-seg">,</word> + <word id="word_234" tei_id="morph_4.13.3-seg">który</word> + <word id="word_235" tei_id="morph_4.13.4-seg">zdołał</word> + <word id="word_236" tei_id="morph_4.13.5-seg">celnym</word> + <word id="word_237" tei_id="morph_4.13.6-seg">strzałem</word> + <word id="word_238" tei_id="morph_4.13.7-seg">strącić</word> + <word id="word_239" tei_id="morph_4.13.8-seg">ostatni</word> + <word id="word_240" tei_id="morph_4.13.9-seg">jego</word> + <word id="word_241" tei_id="morph_4.13.10-seg">fragment</word> + <word id="word_242" tei_id="morph_4.13.11-seg">zdobywał</word> + <word id="word_243" tei_id="morph_4.13.12-seg">miano</word> + <word id="word_244" tei_id="morph_4.13.13-seg">Króla</word> + <word id="word_245" tei_id="morph_4.13.14-seg">Kurkowego</word> + <word id="word_246" tei_id="morph_4.13.15-seg">.</word> + <word id="word_247" tei_id="morph_4.14.1-seg">Z</word> + <word id="word_248" tei_id="morph_4.14.2-seg">tym</word> + <word id="word_249" tei_id="morph_4.14.3-seg">tytułem</word> + <word id="word_250" tei_id="morph_4.14.4-seg">wiązały</word> + <word id="word_251" tei_id="morph_4.14.5-seg">się</word> + <word id="word_252" tei_id="morph_4.14.6-seg">nie</word> + <word id="word_253" tei_id="morph_4.14.7-seg">tylko</word> + <word id="word_254" tei_id="morph_4.14.8-seg">honory</word> + <word id="word_255" tei_id="morph_4.14.9-seg">,</word> + <word id="word_256" tei_id="morph_4.14.10-seg">ale</word> + <word id="word_257" tei_id="morph_4.14.11-seg">także</word> + <word id="word_258" tei_id="morph_4.14.12-seg">przywileje</word> + <word id="word_259" tei_id="morph_4.14.13-seg">:</word> + <word id="word_260" tei_id="morph_4.14.14-seg">Rada</word> + <word id="word_261" tei_id="morph_4.14.15-seg">Miejska</word> + <word id="word_262" tei_id="morph_4.14.16-seg">zwalniała</word> + <word id="word_263" tei_id="morph_4.14.17-seg">jego</word> + <word id="word_264" tei_id="morph_4.14.18-seg">posiadacza</word> + <word id="word_265" tei_id="morph_4.14.19-seg">m</word> + <word id="word_266" tei_id="morph_4.14.20-seg">.</word> + <word id="word_267" tei_id="morph_4.14.21-seg">in</word> + <word id="word_268" tei_id="morph_4.14.22-seg">.</word> + <word id="word_269" tei_id="morph_4.14.23-seg">z</word> + <word id="word_270" tei_id="morph_4.14.24-seg">obowiązku</word> + <word id="word_271" tei_id="morph_4.14.25-seg">płacenia</word> + <word id="word_272" tei_id="morph_4.14.26-seg">podatków</word> + <word id="word_273" tei_id="morph_4.14.27-seg">(</word> + <word id="word_274" tei_id="morph_4.14.28-seg">ten</word> + <word id="word_275" tei_id="morph_4.14.29-seg">zwyczaj</word> + <word id="word_276" tei_id="morph_4.14.30-seg">utrzymał</word> + <word id="word_277" tei_id="morph_4.14.31-seg">się</word> + <word id="word_278" tei_id="morph_4.14.32-seg">do</word> + <word id="word_279" tei_id="morph_4.14.33-seg">dziś</word> + <word id="word_280" tei_id="morph_4.14.34-seg">)</word> + <word id="word_281" tei_id="morph_4.14.35-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/1.mmax a/src/test/resources/teksty_mmax/teksty_sys/1.mmax new file mode 100755 index 0000000..5e74016 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/1.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>1_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/1_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/1_mentions.xml new file mode 100755 index 0000000..1a671e0 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/1_mentions.xml @@ -0,0 +1,92 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_4" mmax_level="mention" mention_head="Wrocławiu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_6" mmax_level="mention" mention_head="kontrolerów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_5..word_6" mmax_level="mention" mention_head="płace" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_12..word_13" mmax_level="mention" mention_head="wezwań" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_11..word_13" mmax_level="mention" mention_head="liczby" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_15" mmax_level="mention" mention_head="zapłaty" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_20" mmax_level="mention" mention_head="to" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_8" span="word_26..word_27" mmax_level="mention" mention_head="pracowników" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_29..word_30" mmax_level="mention" mention_head="razy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_32" mmax_level="mention" mention_head="roku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_35" mmax_level="mention" mention_head="premie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_41" mmax_level="mention" mention_head="skuteczność" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_45" mmax_level="mention" mention_head="pasażerów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_43..word_45" mmax_level="mention" mention_head="skargi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_40..word_45" mmax_level="mention" mention_head="uwagę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_48..word_49" mmax_level="mention" mention_head="Monika" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_52" mmax_level="mention" mention_head="Wydziału" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_53" mmax_level="mention" mention_head="Transportu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_52..word_53" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_56" mmax_level="mention" mention_head="Miasta" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_21" span="word_55..word_56" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_51..word_56" mmax_level="mention" mention_head="kierownik" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_58..word_59" mmax_level="mention" mention_head="kontrolerzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_63" mmax_level="mention" mention_head="premii" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_67" mmax_level="mention" mention_head="złotych" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_73" mmax_level="mention" mention_head="Miasto" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_27" span="word_76..word_77" mmax_level="mention" mention_head="zamachem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_87" mmax_level="mention" mention_head="uchwały" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_29" span="word_86..word_87" mmax_level="mention" mention_head="projekt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_92" mmax_level="mention" mention_head="udziałów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_91..word_92" mmax_level="mention" mention_head="wniesienie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_95..word_96" mmax_level="mention" mention_head="Budownictwa" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_33" span="word_97..word_99" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_34" span="word_94..word_99" mmax_level="mention" mention_head="Towarzystwie" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_35" span="word_102..word_103" mmax_level="mention" mention_head="Budownictwa" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_36" span="word_101..word_103" mmax_level="mention" mention_head="Towarzystwie" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_37" span="word_104..word_107" mmax_level="mention" mention_head="Dom" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_101..word_107" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_111..word_112" mmax_level="mention" mention_head="Budownictwa" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_40" span="word_109..word_112" mmax_level="mention" mention_head="Towarzystwa" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_41" span="word_115" mmax_level="mention" mention_head="piątek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_117..word_118" mmax_level="mention" mention_head="propozycję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_121..word_122" mmax_level="mention" mention_head="Gospodarki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_120..word_122" mmax_level="mention" mention_head="Komisja" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_45" span="word_124..word_125" mmax_level="mention" mention_head="Polityki" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_46" span="word_120..word_125" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_129" mmax_level="mention" mention_head="wtorek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_132" mmax_level="mention" mention_head="nią" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_49" span="word_134" mmax_level="mention" mention_head="Miasta" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_50" span="word_133..word_134" mmax_level="mention" mention_head="Rada" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_138" mmax_level="mention" mention_head="połączenia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_137..word_138" mmax_level="mention" mention_head="Pomysł" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_142" mmax_level="mention" mention_head="wątpliwości" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_147" mmax_level="mention" mention_head="kosztów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_146..word_147" mmax_level="mention" mention_head="racjonalizacji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_151" mmax_level="mention" mention_head="Lewandowski" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_150..word_151" mmax_level="mention" mention_head="Tomasz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_157" mmax_level="mention" mention_head="komisji" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_59" span="word_156..word_157" mmax_level="mention" mention_head="członek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_153..word_157" mmax_level="mention" mention_head="radny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_61" span="word_163" mmax_level="mention" mention_head="dyskusja" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_166" mmax_level="mention" mention_head="towarzystw" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_63" span="word_165..word_166" mmax_level="mention" mention_head="przyszłości" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_169" mmax_level="mention" mention_head="rząd" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_173" mmax_level="mention" mention_head="ustawy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_172..word_173" mmax_level="mention" mention_head="zmianą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_178" mmax_level="mention" mention_head="mieszkań" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_177..word_178" mmax_level="mention" mention_head="wykup" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_69" span="word_181..word_182" mmax_level="mention" mention_head="budownictwa" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_70" span="word_180..word_182" mmax_level="mention" mention_head="towarzystwach" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_71" span="word_184" mmax_level="mention" mention_head="To" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_72" span="word_186..word_188" mmax_level="mention" mention_head="sytuację" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_73" span="word_193" mmax_level="mention" mention_head="tym" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_74" span="word_197..word_198" mmax_level="mention" mention_head="kroków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_75" span="word_196..word_198" mmax_level="mention" mention_head="podjęcie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_200" mmax_level="mention" mention_head="miasto" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_77" span="word_202" mmax_level="mention" mention_head="Norbert" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_78" span="word_202..word_203" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_79" span="word_206" mmax_level="mention" mention_head="klubu" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_80" span="word_205..word_206" mmax_level="mention" mention_head="szef" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_208" mmax_level="mention" mention_head="PiS" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_82" span="word_214..word_215" mmax_level="mention" mention_head="względów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_83" span="word_217..word_218" mmax_level="mention" mention_head="spółki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_216..word_218" mmax_level="mention" mention_head="utworzenie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_85" span="word_233" mmax_level="mention" mention_head="konkretów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_86" span="word_241..word_242" mmax_level="mention" mention_head="klub" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_87" span="word_250" mmax_level="mention" mention_head="stanowiska" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_88" span="word_248..word_250" mmax_level="mention" mention_head="uchwały" mention_group="set_8" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/1_words.xml a/src/test/resources/teksty_mmax/teksty_sys/1_words.xml new file mode 100755 index 0000000..739e577 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/1_words.xml @@ -0,0 +1,255 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Z</word> + <word id="word_2" tei_id="morph_1.1.2-seg">kolei</word> + <word id="word_3" tei_id="morph_1.1.3-seg">we</word> + <word id="word_4" tei_id="morph_1.1.4-seg">Wrocławiu</word> + <word id="word_5" tei_id="morph_1.1.5-seg">płace</word> + <word id="word_6" tei_id="morph_1.1.6-seg">kontrolerów</word> + <word id="word_7" tei_id="morph_1.1.7-seg">zostały</word> + <word id="word_8" tei_id="morph_1.1.8-seg">zupełnie</word> + <word id="word_9" tei_id="morph_1.1.9-seg">uniezależnione</word> + <word id="word_10" tei_id="morph_1.1.10-seg">od</word> + <word id="word_11" tei_id="morph_1.1.11-seg">liczby</word> + <word id="word_12" tei_id="morph_1.1.12-seg">wystawionych</word> + <word id="word_13" tei_id="morph_1.1.13-seg">wezwań</word> + <word id="word_14" tei_id="morph_1.1.14-seg">do</word> + <word id="word_15" tei_id="morph_1.1.15-seg">zapłaty</word> + <word id="word_16" tei_id="morph_1.1.16-seg" lastinpar="true">.</word> + <word id="word_17" tei_id="morph_2.2.1-seg">-</word> + <word id="word_18" tei_id="morph_2.2.2-seg">Nie</word> + <word id="word_19" tei_id="morph_2.2.3-seg">oznacza</word> + <word id="word_20" tei_id="morph_2.2.4-seg">to</word> + <word id="word_21" tei_id="morph_2.2.5-seg">jednak</word> + <word id="word_22" tei_id="morph_2.2.6-seg">,</word> + <word id="word_23" tei_id="morph_2.2.7-seg">że</word> + <word id="word_24" tei_id="morph_2.2.8-seg">nie</word> + <word id="word_25" tei_id="morph_2.2.9-seg">nagradzamy</word> + <word id="word_26" tei_id="morph_2.2.10-seg">najskuteczniejszych</word> + <word id="word_27" tei_id="morph_2.2.11-seg">pracowników</word> + <word id="word_28" tei_id="morph_2.2.12-seg">.</word> + <word id="word_29" tei_id="morph_2.3.1-seg">Kilka</word> + <word id="word_30" tei_id="morph_2.3.2-seg">razy</word> + <word id="word_31" tei_id="morph_2.3.3-seg">w</word> + <word id="word_32" tei_id="morph_2.3.4-seg">roku</word> + <word id="word_33" tei_id="morph_2.3.5-seg">przyznawane</word> + <word id="word_34" tei_id="morph_2.3.6-seg">są</word> + <word id="word_35" tei_id="morph_2.3.7-seg">premie</word> + <word id="word_36" tei_id="morph_2.3.8-seg">.</word> + <word id="word_37" tei_id="morph_2.4.1-seg">Bierzemy</word> + <word id="word_38" tei_id="morph_2.4.2-seg">wtedy</word> + <word id="word_39" tei_id="morph_2.4.3-seg">pod</word> + <word id="word_40" tei_id="morph_2.4.4-seg">uwagę</word> + <word id="word_41" tei_id="morph_2.4.5-seg">skuteczność</word> + <word id="word_42" tei_id="morph_2.4.6-seg">i</word> + <word id="word_43" tei_id="morph_2.4.7-seg">ewentualne</word> + <word id="word_44" tei_id="morph_2.4.8-seg">skargi</word> + <word id="word_45" tei_id="morph_2.4.9-seg">pasażerów</word> + <word id="word_46" tei_id="morph_2.4.10-seg">-</word> + <word id="word_47" tei_id="morph_2.4.11-seg">wyjaśnia</word> + <word id="word_48" tei_id="morph_2.4.12-seg">Monika</word> + <word id="word_49" tei_id="morph_2.4.13-seg">Poważna</word> + <word id="word_50" tei_id="morph_2.4.14-seg">,</word> + <word id="word_51" tei_id="morph_2.4.15-seg">kierownik</word> + <word id="word_52" tei_id="morph_2.4.16-seg">Wydziału</word> + <word id="word_53" tei_id="morph_2.4.17-seg">Transportu</word> + <word id="word_54" tei_id="morph_2.4.18-seg">wrocławskiego</word> + <word id="word_55" tei_id="morph_2.4.19-seg">Urzędu</word> + <word id="word_56" tei_id="morph_2.4.20-seg">Miasta</word> + <word id="word_57" tei_id="morph_2.4.21-seg" lastinpar="true">.</word> + <word id="word_58" tei_id="morph_3.5.1-seg">Tamtejsi</word> + <word id="word_59" tei_id="morph_3.5.2-seg">kontrolerzy</word> + <word id="word_60" tei_id="morph_3.5.3-seg">zarabiają</word> + <word id="word_61" tei_id="morph_3.5.4-seg">(</word> + <word id="word_62" tei_id="morph_3.5.5-seg">bez</word> + <word id="word_63" tei_id="morph_3.5.6-seg">premii</word> + <word id="word_64" tei_id="morph_3.5.7-seg">)</word> + <word id="word_65" tei_id="morph_3.5.8-seg">około</word> + <word id="word_66" tei_id="morph_3.5.9-seg">1200</word> + <word id="word_67" tei_id="morph_3.5.10-seg">złotych</word> + <word id="word_68" tei_id="morph_3.5.11-seg">miesięcznie</word> + <word id="word_69" tei_id="morph_3.5.12-seg">(</word> + <word id="word_70" tei_id="morph_3.5.13-seg">netto</word> + <word id="word_71" tei_id="morph_3.5.14-seg">)</word> + <word id="word_72" tei_id="morph_3.5.15-seg" lastinpar="true">.</word> + <word id="word_73" tei_id="morph_4.6.1-seg">Miasto</word> + <word id="word_74" tei_id="morph_4.6.2-seg">postanowiło</word> + <word id="word_75" tei_id="morph_4.6.3-seg">za</word> + <word id="word_76" tei_id="morph_4.6.4-seg">jednym</word> + <word id="word_77" tei_id="morph_4.6.5-seg">zamachem</word> + <word id="word_78" tei_id="morph_4.6.6-seg">trzy</word> + <word id="word_79" tei_id="morph_4.6.7-seg">spółki</word> + <word id="word_80" tei_id="morph_4.6.8-seg">połączyć</word> + <word id="word_81" tei_id="morph_4.6.9-seg">w</word> + <word id="word_82" tei_id="morph_4.6.10-seg">jedną</word> + <word id="word_83" tei_id="morph_4.6.11-seg">.</word> + <word id="word_84" tei_id="morph_4.7.1-seg">Przygotowany</word> + <word id="word_85" tei_id="morph_4.7.2-seg">jest</word> + <word id="word_86" tei_id="morph_4.7.3-seg">projekt</word> + <word id="word_87" tei_id="morph_4.7.4-seg">uchwały</word> + <word id="word_88" tei_id="morph_4.7.5-seg">,</word> + <word id="word_89" tei_id="morph_4.7.6-seg">który</word> + <word id="word_90" tei_id="morph_4.7.7-seg">przewiduje</word> + <word id="word_91" tei_id="morph_4.7.8-seg">wniesienie</word> + <word id="word_92" tei_id="morph_4.7.9-seg">udziałów</word> + <word id="word_93" tei_id="morph_4.7.10-seg">w</word> + <word id="word_94" tei_id="morph_4.7.11-seg">Towarzystwie</word> + <word id="word_95" tei_id="morph_4.7.12-seg">Budownictwa</word> + <word id="word_96" tei_id="morph_4.7.13-seg">Społecznego</word> + <word id="word_97" tei_id="morph_4.7.14-seg">"</word> + <word id="word_98" tei_id="morph_4.7.15-seg">Wielkopolska</word> + <word id="word_99" tei_id="morph_4.7.16-seg">"</word> + <word id="word_100" tei_id="morph_4.7.17-seg">oraz</word> + <word id="word_101" tei_id="morph_4.7.18-seg">Towarzystwie</word> + <word id="word_102" tei_id="morph_4.7.19-seg">Budownictwa</word> + <word id="word_103" tei_id="morph_4.7.20-seg">Społecznego</word> + <word id="word_104" tei_id="morph_4.7.21-seg">"</word> + <word id="word_105" tei_id="morph_4.7.22-seg">Nasz</word> + <word id="word_106" tei_id="morph_4.7.23-seg">Dom</word> + <word id="word_107" tei_id="morph_4.7.24-seg">"</word> + <word id="word_108" tei_id="morph_4.7.25-seg">do</word> + <word id="word_109" tei_id="morph_4.7.26-seg">Poznańskiego</word> + <word id="word_110" tei_id="morph_4.7.27-seg">Towarzystwa</word> + <word id="word_111" tei_id="morph_4.7.28-seg">Budownictwa</word> + <word id="word_112" tei_id="morph_4.7.29-seg">Społecznego</word> + <word id="word_113" tei_id="morph_4.7.30-seg">.</word> + <word id="word_114" tei_id="morph_4.8.1-seg">W</word> + <word id="word_115" tei_id="morph_4.8.2-seg">piątek</word> + <word id="word_116" tei_id="morph_4.8.3-seg">opiniować</word> + <word id="word_117" tei_id="morph_4.8.4-seg">tę</word> + <word id="word_118" tei_id="morph_4.8.5-seg">propozycję</word> + <word id="word_119" tei_id="morph_4.8.6-seg">będzie</word> + <word id="word_120" tei_id="morph_4.8.7-seg">Komisja</word> + <word id="word_121" tei_id="morph_4.8.8-seg">Gospodarki</word> + <word id="word_122" tei_id="morph_4.8.9-seg">Komunalnej</word> + <word id="word_123" tei_id="morph_4.8.10-seg">i</word> + <word id="word_124" tei_id="morph_4.8.11-seg">Polityki</word> + <word id="word_125" tei_id="morph_4.8.12-seg">Mieszkaniowej</word> + <word id="word_126" tei_id="morph_4.8.13-seg">,</word> + <word id="word_127" tei_id="morph_4.8.14-seg">a</word> + <word id="word_128" tei_id="morph_4.8.15-seg">we</word> + <word id="word_129" tei_id="morph_4.8.16-seg">wtorek</word> + <word id="word_130" tei_id="morph_4.8.17-seg">zajmie</word> + <word id="word_131" tei_id="morph_4.8.18-seg">się</word> + <word id="word_132" tei_id="morph_4.8.19-seg">nią</word> + <word id="word_133" tei_id="morph_4.8.20-seg">Rada</word> + <word id="word_134" tei_id="morph_4.8.21-seg">Miasta</word> + <word id="word_135" tei_id="morph_4.8.22-seg" lastinpar="true">.</word> + <word id="word_136" tei_id="morph_5.9.1-seg">-</word> + <word id="word_137" tei_id="morph_5.9.2-seg">Pomysł</word> + <word id="word_138" tei_id="morph_5.9.3-seg">połączenia</word> + <word id="word_139" tei_id="morph_5.9.4-seg">TBS-ów</word> + <word id="word_140" tei_id="morph_5.9.5-seg">nie</word> + <word id="word_141" tei_id="morph_5.9.6-seg">budzi</word> + <word id="word_142" tei_id="morph_5.9.7-seg">wątpliwości</word> + <word id="word_143" tei_id="morph_5.9.8-seg">z</word> + <word id="word_144" tei_id="morph_5.9.9-seg">punktu</word> + <word id="word_145" tei_id="morph_5.9.10-seg">widzenia</word> + <word id="word_146" tei_id="morph_5.9.11-seg">racjonalizacji</word> + <word id="word_147" tei_id="morph_5.9.12-seg">kosztów</word> + <word id="word_148" tei_id="morph_5.9.13-seg">-</word> + <word id="word_149" tei_id="morph_5.9.14-seg">twierdzi</word> + <word id="word_150" tei_id="morph_5.9.15-seg">Tomasz</word> + <word id="word_151" tei_id="morph_5.9.16-seg">Lewandowski</word> + <word id="word_152" tei_id="morph_5.9.17-seg">,</word> + <word id="word_153" tei_id="morph_5.9.18-seg">radny</word> + <word id="word_154" tei_id="morph_5.9.19-seg">LiD</word> + <word id="word_155" tei_id="morph_5.9.20-seg">i</word> + <word id="word_156" tei_id="morph_5.9.21-seg">członek</word> + <word id="word_157" tei_id="morph_5.9.22-seg">komisji</word> + <word id="word_158" tei_id="morph_5.9.23-seg">.</word> + <word id="word_159" tei_id="morph_5.9.24-seg">-</word> + <word id="word_160" tei_id="morph_5.9.25-seg">Potrzebna</word> + <word id="word_161" tei_id="morph_5.9.26-seg">jest</word> + <word id="word_162" tei_id="morph_5.9.27-seg">jednak</word> + <word id="word_163" tei_id="morph_5.9.28-seg">dyskusja</word> + <word id="word_164" tei_id="morph_5.9.29-seg">o</word> + <word id="word_165" tei_id="morph_5.9.30-seg">przyszłości</word> + <word id="word_166" tei_id="morph_5.9.31-seg">towarzystw</word> + <word id="word_167" tei_id="morph_5.9.32-seg">.</word> + <word id="word_168" tei_id="morph_5.10.1-seg">Obecnie</word> + <word id="word_169" tei_id="morph_5.10.2-seg">rząd</word> + <word id="word_170" tei_id="morph_5.10.3-seg">pracuje</word> + <word id="word_171" tei_id="morph_5.10.4-seg">nad</word> + <word id="word_172" tei_id="morph_5.10.5-seg">zmianą</word> + <word id="word_173" tei_id="morph_5.10.6-seg">ustawy</word> + <word id="word_174" tei_id="morph_5.10.7-seg">,</word> + <word id="word_175" tei_id="morph_5.10.8-seg">która</word> + <word id="word_176" tei_id="morph_5.10.9-seg">przewiduje</word> + <word id="word_177" tei_id="morph_5.10.10-seg">wykup</word> + <word id="word_178" tei_id="morph_5.10.11-seg">mieszkań</word> + <word id="word_179" tei_id="morph_5.10.12-seg">w</word> + <word id="word_180" tei_id="morph_5.10.13-seg">towarzystwach</word> + <word id="word_181" tei_id="morph_5.10.14-seg">budownictwa</word> + <word id="word_182" tei_id="morph_5.10.15-seg">społecznego</word> + <word id="word_183" tei_id="morph_5.10.16-seg">.</word> + <word id="word_184" tei_id="morph_5.11.1-seg">To</word> + <word id="word_185" tei_id="morph_5.11.2-seg">stworzy</word> + <word id="word_186" tei_id="morph_5.11.3-seg">zupełnie</word> + <word id="word_187" tei_id="morph_5.11.4-seg">nową</word> + <word id="word_188" tei_id="morph_5.11.5-seg">sytuację</word> + <word id="word_189" tei_id="morph_5.11.6-seg">.</word> + <word id="word_190" tei_id="morph_5.12.1-seg">W</word> + <word id="word_191" tei_id="morph_5.12.2-seg">związku</word> + <word id="word_192" tei_id="morph_5.12.3-seg">z</word> + <word id="word_193" tei_id="morph_5.12.4-seg">tym</word> + <word id="word_194" tei_id="morph_5.12.5-seg">konieczne</word> + <word id="word_195" tei_id="morph_5.12.6-seg">będzie</word> + <word id="word_196" tei_id="morph_5.12.7-seg">podjęcie</word> + <word id="word_197" tei_id="morph_5.12.8-seg">odpowiednich</word> + <word id="word_198" tei_id="morph_5.12.9-seg">kroków</word> + <word id="word_199" tei_id="morph_5.12.10-seg">przez</word> + <word id="word_200" tei_id="morph_5.12.11-seg">miasto</word> + <word id="word_201" tei_id="morph_5.12.12-seg" lastinpar="true">.</word> + <word id="word_202" tei_id="morph_6.13.1-seg">Norbert</word> + <word id="word_203" tei_id="morph_6.13.2-seg">Napieraj</word> + <word id="word_204" tei_id="morph_6.13.3-seg">,</word> + <word id="word_205" tei_id="morph_6.13.4-seg">szef</word> + <word id="word_206" tei_id="morph_6.13.5-seg">klubu</word> + <word id="word_207" tei_id="morph_6.13.6-seg">radnych</word> + <word id="word_208" tei_id="morph_6.13.7-seg">PiS</word> + <word id="word_209" tei_id="morph_6.13.8-seg">również</word> + <word id="word_210" tei_id="morph_6.13.9-seg">uważa</word> + <word id="word_211" tei_id="morph_6.13.10-seg">,</word> + <word id="word_212" tei_id="morph_6.13.11-seg">że</word> + <word id="word_213" tei_id="morph_6.13.12-seg">ze</word> + <word id="word_214" tei_id="morph_6.13.13-seg">względów</word> + <word id="word_215" tei_id="morph_6.13.14-seg">ekonomicznych</word> + <word id="word_216" tei_id="morph_6.13.15-seg">utworzenie</word> + <word id="word_217" tei_id="morph_6.13.16-seg">jednej</word> + <word id="word_218" tei_id="morph_6.13.17-seg">spółki</word> + <word id="word_219" tei_id="morph_6.13.18-seg">jest</word> + <word id="word_220" tei_id="morph_6.13.19-seg">zasadne</word> + <word id="word_221" tei_id="morph_6.13.20-seg" lastinpar="true">.</word> + <word id="word_222" tei_id="morph_7.14.1-seg">-</word> + <word id="word_223" tei_id="morph_7.14.2-seg">Na</word> + <word id="word_224" tei_id="morph_7.14.3-seg">razie</word> + <word id="word_225" tei_id="morph_7.14.4-seg">jest</word> + <word id="word_226" tei_id="morph_7.14.5-seg">to</word> + <word id="word_227" tei_id="morph_7.14.6-seg">jednak</word> + <word id="word_228" tei_id="morph_7.14.7-seg">luźny</word> + <word id="word_229" tei_id="morph_7.14.8-seg">pomysł</word> + <word id="word_230" tei_id="morph_7.14.9-seg">.</word> + <word id="word_231" tei_id="morph_7.15.1-seg">Nie</word> + <word id="word_232" tei_id="morph_7.15.2-seg">ma</word> + <word id="word_233" tei_id="morph_7.15.3-seg">konkretów</word> + <word id="word_234" tei_id="morph_7.15.4-seg">-</word> + <word id="word_235" tei_id="morph_7.15.5-seg">dodaje</word> + <word id="word_236" tei_id="morph_7.15.6-seg">N</word> + <word id="word_237" tei_id="morph_7.15.7-seg">.</word> + <word id="word_238" tei_id="morph_7.16.1-seg">Napieraj</word> + <word id="word_239" tei_id="morph_7.16.2-seg">.</word> + <word id="word_240" tei_id="morph_7.16.3-seg">-</word> + <word id="word_241" tei_id="morph_7.16.4-seg">Nasz</word> + <word id="word_242" tei_id="morph_7.16.5-seg">klub</word> + <word id="word_243" tei_id="morph_7.16.6-seg">jeszcze</word> + <word id="word_244" tei_id="morph_7.16.7-seg">nie</word> + <word id="word_245" tei_id="morph_7.16.8-seg">wypracował</word> + <word id="word_246" tei_id="morph_7.16.9-seg">w</word> + <word id="word_247" tei_id="morph_7.16.10-seg">sprawie</word> + <word id="word_248" tei_id="morph_7.16.11-seg">tej</word> + <word id="word_249" tei_id="morph_7.16.12-seg">uchwały</word> + <word id="word_250" tei_id="morph_7.16.13-seg">stanowiska</word> + <word id="word_251" tei_id="morph_7.16.14-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/2.mmax a/src/test/resources/teksty_mmax/teksty_sys/2.mmax new file mode 100755 index 0000000..babfe74 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/2.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>2_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/2_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/2_mentions.xml new file mode 100755 index 0000000..97b716a --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/2_mentions.xml @@ -0,0 +1,98 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_3" mmax_level="mention" mention_head="zatrudnienie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_5" mmax_level="mention" mention_head="Fundacji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_6..word_8" mmax_level="mention" mention_head="Spotkania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_10" mmax_level="mention" mention_head="Kultury" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_9..word_10" mmax_level="mention" mention_head="Centrum" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_5..word_10" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_13" mmax_level="mention" mention_head="prezesem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_17" mmax_level="mention" mention_head="spółki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_18" mmax_level="mention" mention_head="Zamek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_21" mmax_level="mention" mention_head="międzyczasie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_23" mmax_level="mention" mention_head="radnym" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_12" span="word_27..word_28" mmax_level="mention" mention_head="kadencji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_30" mmax_level="mention" mention_head="Gminy" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_14" span="word_29..word_30" mmax_level="mention" mention_head="Rada" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_15" span="word_33" mmax_level="mention" mention_head="go" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_16" span="word_35" mmax_level="mention" mention_head="wójta" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_17" span="word_42" mmax_level="mention" mention_head="wójtem" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_18" span="word_44" mmax_level="mention" mention_head="wyborach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_48" mmax_level="mention" mention_head="Co" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_20" span="word_50" mmax_level="mention" mention_head="mnie" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_21" span="word_52" mmax_level="mention" mention_head="zostania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_53" mmax_level="mention" mention_head="samorządowcem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_58" mmax_level="mention" mention_head="życia" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_24" span="word_59" mmax_level="mention" mention_head="mężczyzny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_56..word_59" mmax_level="mention" mention_head="momencie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_70..word_71" mmax_level="mention" mention_head="życiu" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_27" span="word_74..word_75" mmax_level="mention" mention_head="kandydat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_78" mmax_level="mention" mention_head="Wójta" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_29" span="word_79" mmax_level="mention" mention_head="Pomorza" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_30" span="word_77..word_79" mmax_level="mention" mention_head="tytułu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_85" mmax_level="mention" mention_head="mnie" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_32" span="word_86..word_87" mmax_level="mention" mention_head="sprawy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_96" mmax_level="mention" mention_head="Co" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_34" span="word_98" mmax_level="mention" mention_head="gminy" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_35" span="word_98..word_99" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_106..word_107" mmax_level="mention" mention_head="porażkę" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_37" span="word_102..word_107" mmax_level="mention" mention_head="sukces" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_38" span="word_110" mmax_level="mention" mention_head="Sukcesem" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_39" span="word_112" mmax_level="mention" mention_head="to" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_40" span="word_120..word_121" mmax_level="mention" mention_head="zagospodarowania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_119..word_121" mmax_level="mention" mention_head="plany" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_123" mmax_level="mention" mention_head="Gotowe" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_129" mmax_level="mention" mention_head="części" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_132..word_133" mmax_level="mention" mention_head="Tych" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_135..word_136" mmax_level="mention" mention_head="lat" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_46" span="word_143" mmax_level="mention" mention_head="wójt" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_47" span="word_148" mmax_level="mention" mention_head="porażkę" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_48" span="word_151" mmax_level="mention" mention_head="Rady" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_49" span="word_150..word_152" mmax_level="mention" mention_head="decyzję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_161" mmax_level="mention" mention_head="Gmin" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_160..word_161" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_159..word_161" mmax_level="mention" mention_head="Związku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_164" mmax_level="mention" mention_head="uporządkowania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_165..word_166" mmax_level="mention" mention_head="gospodarki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_163..word_166" mmax_level="mention" mention_head="programu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_173" mmax_level="mention" mention_head="euro" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_189" mmax_level="mention" mention_head="lat" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_58" span="word_192" mmax_level="mention" mention_head="Henryka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_195" mmax_level="mention" mention_head="narty" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_198" mmax_level="mention" mention_head="urlop" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_61" span="word_208..word_209" mmax_level="mention" mention_head="Poręby" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_207..word_209" mmax_level="mention" mention_head="stoki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_212..word_213" mmax_level="mention" mention_head="zimy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_221" mmax_level="mention" mention_head="wójt" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_65" span="word_226" mmax_level="mention" mention_head="wolnego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_225..word_226" mmax_level="mention" mention_head="Czasu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_229" mmax_level="mention" mention_head="mało" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_239" mmax_level="mention" mention_head="go" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_69" span="word_243" mmax_level="mention" mention_head="bliskimi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_245..word_246" mmax_level="mention" mention_head="plebiscyt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_247..word_250" mmax_level="mention" mention_head="Dziennik" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_252..word_254" mmax_level="mention" mention_head="edycję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_73" span="word_256" mmax_level="mention" mention_head="Pomorza" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_74" span="word_255..word_256" mmax_level="mention" mention_head="Wójt" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_75" span="word_258..word_259" mmax_level="mention" mention_head="powiat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_261..word_264" mmax_level="mention" mention_head="włodarze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_77" span="word_267..word_268" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_78" span="word_273" mmax_level="mention" mention_head="Tadeusz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_79" span="word_273..word_274" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_80" span="word_277" mmax_level="mention" mention_head="Puck" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_276..word_277" mmax_level="mention" mention_head="gmina" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_82" span="word_280" mmax_level="mention" mention_head="Jerzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_83" span="word_280..word_281" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_283" mmax_level="mention" mention_head="Kosakowo" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_85" span="word_289" mmax_level="mention" mention_head="kolegów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_86" span="word_287..word_289" mmax_level="mention" mention_head="gronie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_87" span="word_291" mmax_level="mention" mention_head="fachu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_88" span="word_296..word_297" mmax_level="mention" mention_head="wójta" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_89" span="word_298" mmax_level="mention" mention_head="województwa" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_90" span="word_295..word_298" mmax_level="mention" mention_head="miano" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_91" span="word_301" mmax_level="mention" mention_head="tym" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_92" span="word_303" mmax_level="mention" mention_head="kto" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_93" span="word_307..word_308" mmax_level="mention" mention_head="głosami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_94" span="word_309..word_312" mmax_level="mention" mention_head="Czytelnicy" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/2_words.xml a/src/test/resources/teksty_mmax/teksty_sys/2_words.xml new file mode 100755 index 0000000..71829a1 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/2_words.xml @@ -0,0 +1,317 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Potem</word> + <word id="word_2" tei_id="morph_1.1.2-seg">znalazł</word> + <word id="word_3" tei_id="morph_1.1.3-seg">zatrudnienie</word> + <word id="word_4" tei_id="morph_1.1.4-seg">w</word> + <word id="word_5" tei_id="morph_1.1.5-seg">Fundacji</word> + <word id="word_6" tei_id="morph_1.1.6-seg">Europejskie</word> + <word id="word_7" tei_id="morph_1.1.7-seg">Spotkania</word> + <word id="word_8" tei_id="morph_1.1.8-seg">Kaszubskie</word> + <word id="word_9" tei_id="morph_1.1.9-seg">Centrum</word> + <word id="word_10" tei_id="morph_1.1.10-seg">Kultury</word> + <word id="word_11" tei_id="morph_1.1.11-seg">.</word> + <word id="word_12" tei_id="morph_1.2.1-seg">Był</word> + <word id="word_13" tei_id="morph_1.2.2-seg">prezesem</word> + <word id="word_14" tei_id="morph_1.2.3-seg">utworzonej</word> + <word id="word_15" tei_id="morph_1.2.4-seg">przez</word> + <word id="word_16" tei_id="morph_1.2.5-seg">fundację</word> + <word id="word_17" tei_id="morph_1.2.6-seg">spółki</word> + <word id="word_18" tei_id="morph_1.2.7-seg">Zamek</word> + <word id="word_19" tei_id="morph_1.2.8-seg" lastinpar="true">.</word> + <word id="word_20" tei_id="morph_2.3.1-seg">W</word> + <word id="word_21" tei_id="morph_2.3.2-seg">międzyczasie</word> + <word id="word_22" tei_id="morph_2.3.3-seg">został</word> + <word id="word_23" tei_id="morph_2.3.4-seg">radnym</word> + <word id="word_24" tei_id="morph_2.3.5-seg">.</word> + <word id="word_25" tei_id="morph_2.4.1-seg">Pod</word> + <word id="word_26" tei_id="morph_2.4.2-seg">koniec</word> + <word id="word_27" tei_id="morph_2.4.3-seg">ubiegłej</word> + <word id="word_28" tei_id="morph_2.4.4-seg">kadencji</word> + <word id="word_29" tei_id="morph_2.4.5-seg">Rada</word> + <word id="word_30" tei_id="morph_2.4.6-seg">Gminy</word> + <word id="word_31" tei_id="morph_2.4.7-seg">Krokowa</word> + <word id="word_32" tei_id="morph_2.4.8-seg">wybrała</word> + <word id="word_33" tei_id="morph_2.4.9-seg">go</word> + <word id="word_34" tei_id="morph_2.4.10-seg">na</word> + <word id="word_35" tei_id="morph_2.4.11-seg">wójta</word> + <word id="word_36" tei_id="morph_2.4.12-seg">.</word> + <word id="word_37" tei_id="morph_2.5.1-seg">Jesienią</word> + <word id="word_38" tei_id="morph_2.5.2-seg">2002</word> + <word id="word_39" tei_id="morph_2.5.3-seg">r</word> + <word id="word_40" tei_id="morph_2.5.4-seg">.</word> + <word id="word_41" tei_id="morph_2.5.5-seg">został</word> + <word id="word_42" tei_id="morph_2.5.6-seg">wójtem</word> + <word id="word_43" tei_id="morph_2.5.7-seg">w</word> + <word id="word_44" tei_id="morph_2.5.8-seg">wyborach</word> + <word id="word_45" tei_id="morph_2.5.9-seg">powszechnych</word> + <word id="word_46" tei_id="morph_2.5.10-seg" lastinpar="true">.</word> + <word id="word_47" tei_id="morph_3.6.1-seg">-</word> + <word id="word_48" tei_id="morph_3.6.2-seg">Co</word> + <word id="word_49" tei_id="morph_3.6.3-seg">skłoniło</word> + <word id="word_50" tei_id="morph_3.6.4-seg">mnie</word> + <word id="word_51" tei_id="morph_3.6.5-seg">do</word> + <word id="word_52" tei_id="morph_3.6.6-seg">zostania</word> + <word id="word_53" tei_id="morph_3.6.7-seg">samorządowcem</word> + <word id="word_54" tei_id="morph_3.6.8-seg">?</word> + <word id="word_55" tei_id="morph_3.7.1-seg">W</word> + <word id="word_56" tei_id="morph_3.7.2-seg">pewnym</word> + <word id="word_57" tei_id="morph_3.7.3-seg">momencie</word> + <word id="word_58" tei_id="morph_3.7.4-seg">życia</word> + <word id="word_59" tei_id="morph_3.7.5-seg">mężczyzny</word> + <word id="word_60" tei_id="morph_3.7.6-seg">przychodzi</word> + <word id="word_61" tei_id="morph_3.7.7-seg">taka</word> + <word id="word_62" tei_id="morph_3.7.8-seg">potrzeba</word> + <word id="word_63" tei_id="morph_3.7.9-seg">,</word> + <word id="word_64" tei_id="morph_3.7.10-seg">aby</word> + <word id="word_65" tei_id="morph_3.7.11-seg">sprawdzić</word> + <word id="word_66" tei_id="morph_3.7.12-seg">się</word> + <word id="word_67" tei_id="morph_3.7.13-seg">np</word> + <word id="word_68" tei_id="morph_3.7.14-seg">.</word> + <word id="word_69" tei_id="morph_3.7.15-seg">w</word> + <word id="word_70" tei_id="morph_3.7.16-seg">życiu</word> + <word id="word_71" tei_id="morph_3.7.17-seg">publicznym</word> + <word id="word_72" tei_id="morph_3.7.18-seg">-</word> + <word id="word_73" tei_id="morph_3.7.19-seg">twierdzi</word> + <word id="word_74" tei_id="morph_3.7.20-seg">krokowski</word> + <word id="word_75" tei_id="morph_3.7.21-seg">kandydat</word> + <word id="word_76" tei_id="morph_3.7.22-seg">do</word> + <word id="word_77" tei_id="morph_3.7.23-seg">tytułu</word> + <word id="word_78" tei_id="morph_3.7.24-seg">Wójta</word> + <word id="word_79" tei_id="morph_3.7.25-seg">Pomorza</word> + <word id="word_80" tei_id="morph_3.7.26-seg">.</word> + <word id="word_81" tei_id="morph_3.7.27-seg">-</word> + <word id="word_82" tei_id="morph_3.7.28-seg">Poza</word> + <word id="word_83" tei_id="morph_3.7.29-seg">tym</word> + <word id="word_84" tei_id="morph_3.7.30-seg">interesowały</word> + <word id="word_85" tei_id="morph_3.7.31-seg">mnie</word> + <word id="word_86" tei_id="morph_3.7.32-seg">sprawy</word> + <word id="word_87" tei_id="morph_3.7.33-seg">komunalne</word> + <word id="word_88" tei_id="morph_3.7.34-seg">.</word> + <word id="word_89" tei_id="morph_3.8.1-seg">Chciał</word> + <word id="word_90" tei_id="morph_3.8.2-seg">em</word> + <word id="word_91" tei_id="morph_3.8.3-seg">się</word> + <word id="word_92" tei_id="morph_3.8.4-seg">nimi</word> + <word id="word_93" tei_id="morph_3.8.5-seg">bliżej</word> + <word id="word_94" tei_id="morph_3.8.6-seg">zająć</word> + <word id="word_95" tei_id="morph_3.8.7-seg" lastinpar="true">.</word> + <word id="word_96" tei_id="morph_4.9.1-seg">Co</word> + <word id="word_97" tei_id="morph_4.9.2-seg">wójt</word> + <word id="word_98" tei_id="morph_4.9.3-seg">gminy</word> + <word id="word_99" tei_id="morph_4.9.4-seg">Krokowa</word> + <word id="word_100" tei_id="morph_4.9.5-seg">uważa</word> + <word id="word_101" tei_id="morph_4.9.6-seg">za</word> + <word id="word_102" tei_id="morph_4.9.7-seg">swój</word> + <word id="word_103" tei_id="morph_4.9.8-seg">największy</word> + <word id="word_104" tei_id="morph_4.9.9-seg">sukces</word> + <word id="word_105" tei_id="morph_4.9.10-seg">i</word> + <word id="word_106" tei_id="morph_4.9.11-seg">największą</word> + <word id="word_107" tei_id="morph_4.9.12-seg">porażkę</word> + <word id="word_108" tei_id="morph_4.9.13-seg" lastinpar="true">?</word> + <word id="word_109" tei_id="morph_5.10.1-seg">-</word> + <word id="word_110" tei_id="morph_5.10.2-seg">Sukcesem</word> + <word id="word_111" tei_id="morph_5.10.3-seg">jest</word> + <word id="word_112" tei_id="morph_5.10.4-seg">to</word> + <word id="word_113" tei_id="morph_5.10.5-seg">,</word> + <word id="word_114" tei_id="morph_5.10.6-seg">że</word> + <word id="word_115" tei_id="morph_5.10.7-seg">udaje</word> + <word id="word_116" tei_id="morph_5.10.8-seg">się</word> + <word id="word_117" tei_id="morph_5.10.9-seg">wreszcie</word> + <word id="word_118" tei_id="morph_5.10.10-seg">opracowywać</word> + <word id="word_119" tei_id="morph_5.10.11-seg">plany</word> + <word id="word_120" tei_id="morph_5.10.12-seg">zagospodarowania</word> + <word id="word_121" tei_id="morph_5.10.13-seg">przestrzennego</word> + <word id="word_122" tei_id="morph_5.10.14-seg">.</word> + <word id="word_123" tei_id="morph_5.11.1-seg">Gotowe</word> + <word id="word_124" tei_id="morph_5.11.2-seg">są</word> + <word id="word_125" tei_id="morph_5.11.3-seg">już</word> + <word id="word_126" tei_id="morph_5.11.4-seg">dla</word> + <word id="word_127" tei_id="morph_5.11.5-seg">Białogóry</word> + <word id="word_128" tei_id="morph_5.11.6-seg">i</word> + <word id="word_129" tei_id="morph_5.11.7-seg">części</word> + <word id="word_130" tei_id="morph_5.11.8-seg">Dębek</word> + <word id="word_131" tei_id="morph_5.11.9-seg">.</word> + <word id="word_132" tei_id="morph_5.12.1-seg">Tych</word> + <word id="word_133" tei_id="morph_5.12.2-seg">ostatnich</word> + <word id="word_134" tei_id="morph_5.12.3-seg">przez</word> + <word id="word_135" tei_id="morph_5.12.4-seg">wiele</word> + <word id="word_136" tei_id="morph_5.12.5-seg">lat</word> + <word id="word_137" tei_id="morph_5.12.6-seg">nie</word> + <word id="word_138" tei_id="morph_5.12.7-seg">można</word> + <word id="word_139" tei_id="morph_5.12.8-seg">było</word> + <word id="word_140" tei_id="morph_5.12.9-seg">uchwalić</word> + <word id="word_141" tei_id="morph_5.12.10-seg">-</word> + <word id="word_142" tei_id="morph_5.12.11-seg">uważa</word> + <word id="word_143" tei_id="morph_5.12.12-seg">wójt</word> + <word id="word_144" tei_id="morph_5.12.13-seg">.</word> + <word id="word_145" tei_id="morph_5.12.14-seg">-</word> + <word id="word_146" tei_id="morph_5.12.15-seg">Natomiast</word> + <word id="word_147" tei_id="morph_5.12.16-seg">za</word> + <word id="word_148" tei_id="morph_5.12.17-seg">porażkę</word> + <word id="word_149" tei_id="morph_5.12.18-seg">uważam</word> + <word id="word_150" tei_id="morph_5.12.19-seg">decyzję</word> + <word id="word_151" tei_id="morph_5.12.20-seg">Rady</word> + <word id="word_152" tei_id="morph_5.12.21-seg">gminy</word> + <word id="word_153" tei_id="morph_5.12.22-seg">,</word> + <word id="word_154" tei_id="morph_5.12.23-seg">aby</word> + <word id="word_155" tei_id="morph_5.12.24-seg">nie</word> + <word id="word_156" tei_id="morph_5.12.25-seg">przystępować</word> + <word id="word_157" tei_id="morph_5.12.26-seg">w</word> + <word id="word_158" tei_id="morph_5.12.27-seg">ramach</word> + <word id="word_159" tei_id="morph_5.12.28-seg">Komunalnego</word> + <word id="word_160" tei_id="morph_5.12.29-seg">Związku</word> + <word id="word_161" tei_id="morph_5.12.30-seg">Gmin</word> + <word id="word_162" tei_id="morph_5.12.31-seg">do</word> + <word id="word_163" tei_id="morph_5.12.32-seg">programu</word> + <word id="word_164" tei_id="morph_5.12.33-seg">uporządkowania</word> + <word id="word_165" tei_id="morph_5.12.34-seg">gospodarki</word> + <word id="word_166" tei_id="morph_5.12.35-seg">ściekowej</word> + <word id="word_167" tei_id="morph_5.12.36-seg">.</word> + <word id="word_168" tei_id="morph_5.13.1-seg">Mogli</word> + <word id="word_169" tei_id="morph_5.13.2-seg">śmy</word> + <word id="word_170" tei_id="morph_5.13.3-seg">uzyskać</word> + <word id="word_171" tei_id="morph_5.13.4-seg">wiele</word> + <word id="word_172" tei_id="morph_5.13.5-seg">milionów</word> + <word id="word_173" tei_id="morph_5.13.6-seg">euro</word> + <word id="word_174" tei_id="morph_5.13.7-seg">.</word> + <word id="word_175" tei_id="morph_5.14.1-seg">Boję</word> + <word id="word_176" tei_id="morph_5.14.2-seg">się</word> + <word id="word_177" tei_id="morph_5.14.3-seg">,</word> + <word id="word_178" tei_id="morph_5.14.4-seg">że</word> + <word id="word_179" tei_id="morph_5.14.5-seg">to</word> + <word id="word_180" tei_id="morph_5.14.6-seg">nie</word> + <word id="word_181" tei_id="morph_5.14.7-seg">tylko</word> + <word id="word_182" tei_id="morph_5.14.8-seg">moja</word> + <word id="word_183" tei_id="morph_5.14.9-seg">porażka</word> + <word id="word_184" tei_id="morph_5.14.10-seg">.</word> + <word id="word_185" tei_id="morph_5.14.11-seg">.</word> + <word id="word_186" tei_id="morph_5.14.12-seg" lastinpar="true">.</word> + <word id="word_187" tei_id="morph_6.15.1-seg">Od</word> + <word id="word_188" tei_id="morph_6.15.2-seg">5</word> + <word id="word_189" tei_id="morph_6.15.3-seg">lat</word> + <word id="word_190" tei_id="morph_6.15.4-seg">ulubionym</word> + <word id="word_191" tei_id="morph_6.15.5-seg">hobby</word> + <word id="word_192" tei_id="morph_6.15.6-seg">Henryka</word> + <word id="word_193" tei_id="morph_6.15.7-seg">Doeringa</word> + <word id="word_194" tei_id="morph_6.15.8-seg">są</word> + <word id="word_195" tei_id="morph_6.15.9-seg">narty</word> + <word id="word_196" tei_id="morph_6.15.10-seg">.</word> + <word id="word_197" tei_id="morph_6.16.1-seg">Dlatego</word> + <word id="word_198" tei_id="morph_6.16.2-seg">urlop</word> + <word id="word_199" tei_id="morph_6.16.3-seg">najchętniej</word> + <word id="word_200" tei_id="morph_6.16.4-seg">bierze</word> + <word id="word_201" tei_id="morph_6.16.5-seg">zimą</word> + <word id="word_202" tei_id="morph_6.16.6-seg">,</word> + <word id="word_203" tei_id="morph_6.16.7-seg">aby</word> + <word id="word_204" tei_id="morph_6.16.8-seg">udać</word> + <word id="word_205" tei_id="morph_6.16.9-seg">się</word> + <word id="word_206" tei_id="morph_6.16.10-seg">na</word> + <word id="word_207" tei_id="morph_6.16.11-seg">stoki</word> + <word id="word_208" tei_id="morph_6.16.12-seg">Szklarskiej</word> + <word id="word_209" tei_id="morph_6.16.13-seg">Poręby</word> + <word id="word_210" tei_id="morph_6.16.14-seg" lastinpar="true">.</word> + <word id="word_211" tei_id="morph_7.17.1-seg">-</word> + <word id="word_212" tei_id="morph_7.17.2-seg">Tej</word> + <word id="word_213" tei_id="morph_7.17.3-seg">zimy</word> + <word id="word_214" tei_id="morph_7.17.4-seg">niestety</word> + <word id="word_215" tei_id="morph_7.17.5-seg">nie</word> + <word id="word_216" tei_id="morph_7.17.6-seg">mogł</word> + <word id="word_217" tei_id="morph_7.17.7-seg">em</word> + <word id="word_218" tei_id="morph_7.17.8-seg">wyjechać</word> + <word id="word_219" tei_id="morph_7.17.9-seg">-</word> + <word id="word_220" tei_id="morph_7.17.10-seg">przyznaje</word> + <word id="word_221" tei_id="morph_7.17.11-seg">wójt</word> + <word id="word_222" tei_id="morph_7.17.12-seg">Krokowej</word> + <word id="word_223" tei_id="morph_7.17.13-seg">.</word> + <word id="word_224" tei_id="morph_7.17.14-seg">-</word> + <word id="word_225" tei_id="morph_7.17.15-seg">Czasu</word> + <word id="word_226" tei_id="morph_7.17.16-seg">wolnego</word> + <word id="word_227" tei_id="morph_7.17.17-seg">mam</word> + <word id="word_228" tei_id="morph_7.17.18-seg">bardzo</word> + <word id="word_229" tei_id="morph_7.17.19-seg">mało</word> + <word id="word_230" tei_id="morph_7.17.20-seg">,</word> + <word id="word_231" tei_id="morph_7.17.21-seg">jeśli</word> + <word id="word_232" tei_id="morph_7.17.22-seg">się</word> + <word id="word_233" tei_id="morph_7.17.23-seg">taki</word> + <word id="word_234" tei_id="morph_7.17.24-seg">pojawia</word> + <word id="word_235" tei_id="morph_7.17.25-seg">,</word> + <word id="word_236" tei_id="morph_7.17.26-seg">to</word> + <word id="word_237" tei_id="morph_7.17.27-seg">staram</word> + <word id="word_238" tei_id="morph_7.17.28-seg">się</word> + <word id="word_239" tei_id="morph_7.17.29-seg">go</word> + <word id="word_240" tei_id="morph_7.17.30-seg">spędzać</word> + <word id="word_241" tei_id="morph_7.17.31-seg">razem</word> + <word id="word_242" tei_id="morph_7.17.32-seg">z</word> + <word id="word_243" tei_id="morph_7.17.33-seg">bliskimi</word> + <word id="word_244" tei_id="morph_7.17.34-seg" lastinpar="true">.</word> + <word id="word_245" tei_id="morph_8.18.1-seg">Nasz</word> + <word id="word_246" tei_id="morph_8.18.2-seg" lastinpar="true">plebiscyt</word> + <word id="word_247" tei_id="morph_9.19.1-seg">„</word> + <word id="word_248" tei_id="morph_9.19.2-seg">Dziennik</word> + <word id="word_249" tei_id="morph_9.19.3-seg">Bałtycki</word> + <word id="word_250" tei_id="morph_9.19.4-seg">”</word> + <word id="word_251" tei_id="morph_9.19.5-seg">rozpoczął</word> + <word id="word_252" tei_id="morph_9.19.6-seg">kolejną</word> + <word id="word_253" tei_id="morph_9.19.7-seg">edycję</word> + <word id="word_254" tei_id="morph_9.19.8-seg">konkursu</word> + <word id="word_255" tei_id="morph_9.19.9-seg">Wójt</word> + <word id="word_256" tei_id="morph_9.19.10-seg">Pomorza</word> + <word id="word_257" tei_id="morph_9.19.11-seg">.</word> + <word id="word_258" tei_id="morph_9.20.1-seg">Nasz</word> + <word id="word_259" tei_id="morph_9.20.2-seg">powiat</word> + <word id="word_260" tei_id="morph_9.20.3-seg">reprezentują</word> + <word id="word_261" tei_id="morph_9.20.4-seg">trzej</word> + <word id="word_262" tei_id="morph_9.20.5-seg">włodarze</word> + <word id="word_263" tei_id="morph_9.20.6-seg">gmin</word> + <word id="word_264" tei_id="morph_9.20.7-seg">wiejskich</word> + <word id="word_265" tei_id="morph_9.20.8-seg">.</word> + <word id="word_266" tei_id="morph_9.21.1-seg">To</word> + <word id="word_267" tei_id="morph_9.21.2-seg">Henryk</word> + <word id="word_268" tei_id="morph_9.21.3-seg">Doering</word> + <word id="word_269" tei_id="morph_9.21.4-seg">(</word> + <word id="word_270" tei_id="morph_9.21.5-seg">Krokowa</word> + <word id="word_271" tei_id="morph_9.21.6-seg">)</word> + <word id="word_272" tei_id="morph_9.21.7-seg">,</word> + <word id="word_273" tei_id="morph_9.21.8-seg">Tadeusz</word> + <word id="word_274" tei_id="morph_9.21.9-seg">Puszkarczuk</word> + <word id="word_275" tei_id="morph_9.21.10-seg">(</word> + <word id="word_276" tei_id="morph_9.21.11-seg">gmina</word> + <word id="word_277" tei_id="morph_9.21.12-seg">Puck</word> + <word id="word_278" tei_id="morph_9.21.13-seg">)</word> + <word id="word_279" tei_id="morph_9.21.14-seg">i</word> + <word id="word_280" tei_id="morph_9.21.15-seg">Jerzy</word> + <word id="word_281" tei_id="morph_9.21.16-seg">Włudzik</word> + <word id="word_282" tei_id="morph_9.21.17-seg">(</word> + <word id="word_283" tei_id="morph_9.21.18-seg">Kosakowo</word> + <word id="word_284" tei_id="morph_9.21.19-seg">)</word> + <word id="word_285" tei_id="morph_9.21.20-seg">.</word> + <word id="word_286" tei_id="morph_9.22.1-seg">W</word> + <word id="word_287" tei_id="morph_9.22.2-seg">gronie</word> + <word id="word_288" tei_id="morph_9.22.3-seg">kilkudziesięciu</word> + <word id="word_289" tei_id="morph_9.22.4-seg">kolegów</word> + <word id="word_290" tei_id="morph_9.22.5-seg">po</word> + <word id="word_291" tei_id="morph_9.22.6-seg">fachu</word> + <word id="word_292" tei_id="morph_9.22.7-seg">walczyć</word> + <word id="word_293" tei_id="morph_9.22.8-seg">będą</word> + <word id="word_294" tei_id="morph_9.22.9-seg">o</word> + <word id="word_295" tei_id="morph_9.22.10-seg">miano</word> + <word id="word_296" tei_id="morph_9.22.11-seg">najpopularniejszego</word> + <word id="word_297" tei_id="morph_9.22.12-seg">wójta</word> + <word id="word_298" tei_id="morph_9.22.13-seg">województwa</word> + <word id="word_299" tei_id="morph_9.22.14-seg">.</word> + <word id="word_300" tei_id="morph_9.23.1-seg">O</word> + <word id="word_301" tei_id="morph_9.23.2-seg">tym</word> + <word id="word_302" tei_id="morph_9.23.3-seg">,</word> + <word id="word_303" tei_id="morph_9.23.4-seg">kto</word> + <word id="word_304" tei_id="morph_9.23.5-seg">wygra</word> + <word id="word_305" tei_id="morph_9.23.6-seg">,</word> + <word id="word_306" tei_id="morph_9.23.7-seg">zadecydują</word> + <word id="word_307" tei_id="morph_9.23.8-seg">swoimi</word> + <word id="word_308" tei_id="morph_9.23.9-seg">głosami</word> + <word id="word_309" tei_id="morph_9.23.10-seg">Czytelnicy</word> + <word id="word_310" tei_id="morph_9.23.11-seg">„</word> + <word id="word_311" tei_id="morph_9.23.12-seg">Dziennika</word> + <word id="word_312" tei_id="morph_9.23.13-seg">”</word> + <word id="word_313" tei_id="morph_9.23.14-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/3.mmax a/src/test/resources/teksty_mmax/teksty_sys/3.mmax new file mode 100755 index 0000000..6dae981 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/3.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>3_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/3_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/3_mentions.xml new file mode 100755 index 0000000..3267f08 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/3_mentions.xml @@ -0,0 +1,71 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1" mmax_level="mention" mention_head="Paweł" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_5" mmax_level="mention" mention_head="Frankfurt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_4..word_5" mmax_level="mention" mention_head="Eintracht" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_9" mmax_level="mention" mention_head="Ukraińcy" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_5" span="word_11" mmax_level="mention" mention_head="nam" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_12..word_13" mmax_level="mention" mention_head="wymagania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_15" mmax_level="mention" mention_head="Remis" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_22" mmax_level="mention" mention_head="szczęścia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_21..word_22" mmax_level="mention" mention_head="odrobinie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_29" mmax_level="mention" mention_head="siebie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_30" mmax_level="mention" mention_head="pretensję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_34..word_35" mmax_level="mention" mention_head="koncentracji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_39" mmax_level="mention" mention_head="bramkę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_42" mmax_level="mention" mention_head="Jacek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_47" mmax_level="mention" mention_head="FC" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_62" mmax_level="mention" mention_head="eliminacje" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_17" span="word_65..word_66" mmax_level="mention" mention_head="strony" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_68" mmax_level="mention" mention_head="wszystko" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_74" mmax_level="mention" mention_head="kadry" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_80..word_81" mmax_level="mention" mention_head="pozycji" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_21" span="word_83..word_84" mmax_level="mention" mention_head="klubie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_89" mmax_level="mention" mention_head="awans" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_23" span="word_94" mmax_level="mention" mention_head="zespołem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_101..word_102" mmax_level="mention" mention_head="sposób" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_105" mmax_level="mention" mention_head="punktów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_111" mmax_level="mention" mention_head="Mecze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_113" mmax_level="mention" mention_head="Ukrainą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_115" mmax_level="mention" mention_head="klamrą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_29" span="word_116" mmax_level="mention" mention_head="eliminacje" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_30" span="word_122..word_123" mmax_level="mention" mention_head="końca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_128" mmax_level="mention" mention_head="kibice" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_32" span="word_131..word_132" mmax_level="mention" mention_head="mecz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_133..word_135" mmax_level="mention" mention_head="zespołów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_34" span="word_139" mmax_level="mention" mention_head="Ukraińcy" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_35" span="word_137..word_139" mmax_level="mention" mention_head="zdaniem" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_36" span="word_142..word_143" mmax_level="mention" mention_head="miejsce" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_153" mmax_level="mention" mention_head="nimi" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_38" span="word_155" mmax_level="mention" mention_head="mundialu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_158..word_160" mmax_level="mention" mention_head="przeciwnik" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_169" mmax_level="mention" mention_head="Boniek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_168..word_169" mmax_level="mention" mention_head="Zbigniew" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_172" mmax_level="mention" mention_head="PZPN" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_43" span="word_171..word_172" mmax_level="mention" mention_head="wiceprezes" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_182" mmax_level="mention" mention_head="sukcesem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_193" mmax_level="mention" mention_head="dziennikarze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_191..word_193" mmax_level="mention" mention_head="kibice" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_47" span="word_196" mmax_level="mention" mention_head="to" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_198" mmax_level="mention" mention_head="co" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_202" mmax_level="mention" mention_head="boisku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_207" mmax_level="mention" mention_head="uwagę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_210" mmax_level="mention" mention_head="sukcesów" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_52" span="word_211" mmax_level="mention" mention_head="piłkarzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_209..word_211" mmax_level="mention" mention_head="kulisy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_215" mmax_level="mention" mention_head="one" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_55" span="word_216..word_218" mmax_level="mention" mention_head="wysiłek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_222" mmax_level="mention" mention_head="rzeszy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_223" mmax_level="mention" mention_head="ludzi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_220..word_223" mmax_level="mention" mention_head="pracy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_229..word_230" mmax_level="mention" mention_head="szeregu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_237..word_238" mmax_level="mention" mention_head="zadania" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_61" span="word_240..word_241" mmax_level="mention" mention_head="zdaniem" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_62" span="word_247..word_248" mmax_level="mention" mention_head="sprawy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_252..word_253" mmax_level="mention" mention_head="poziomie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_255" mmax_level="mention" mention_head="wszyscy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_261" mmax_level="mention" mention_head="awansu" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_66" span="word_264" mmax_level="mention" mention_head="świata" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_263..word_264" mmax_level="mention" mention_head="mistrzostw" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/3_words.xml a/src/test/resources/teksty_mmax/teksty_sys/3_words.xml new file mode 100755 index 0000000..f23ab57 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/3_words.xml @@ -0,0 +1,269 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Paweł</word> + <word id="word_2" tei_id="morph_1.1.2-seg" lastinpar="true">Kryszałowicz</word> + <word id="word_3" tei_id="morph_2.2.1-seg">(</word> + <word id="word_4" tei_id="morph_2.2.2-seg">Eintracht</word> + <word id="word_5" tei_id="morph_2.2.3-seg">Frankfurt</word> + <word id="word_6" tei_id="morph_2.2.4-seg">)</word> + <word id="word_7" tei_id="morph_2.2.5-seg">:</word> + <word id="word_8" tei_id="morph_2.2.6-seg">-</word> + <word id="word_9" tei_id="morph_2.2.7-seg">Ukraińcy</word> + <word id="word_10" tei_id="morph_2.2.8-seg">postawili</word> + <word id="word_11" tei_id="morph_2.2.9-seg">nam</word> + <word id="word_12" tei_id="morph_2.2.10-seg">wysokie</word> + <word id="word_13" tei_id="morph_2.2.11-seg">wymagania</word> + <word id="word_14" tei_id="morph_2.2.12-seg">.</word> + <word id="word_15" tei_id="morph_2.3.1-seg">Remis</word> + <word id="word_16" tei_id="morph_2.3.2-seg">jest</word> + <word id="word_17" tei_id="morph_2.3.3-seg">sprawiedliwy</word> + <word id="word_18" tei_id="morph_2.3.4-seg">,</word> + <word id="word_19" tei_id="morph_2.3.5-seg">choć</word> + <word id="word_20" tei_id="morph_2.3.6-seg">przy</word> + <word id="word_21" tei_id="morph_2.3.7-seg">odrobinie</word> + <word id="word_22" tei_id="morph_2.3.8-seg">szczęścia</word> + <word id="word_23" tei_id="morph_2.3.9-seg">mogli</word> + <word id="word_24" tei_id="morph_2.3.10-seg">śmy</word> + <word id="word_25" tei_id="morph_2.3.11-seg">wygrać</word> + <word id="word_26" tei_id="morph_2.3.12-seg">.</word> + <word id="word_27" tei_id="morph_2.4.1-seg">Mam</word> + <word id="word_28" tei_id="morph_2.4.2-seg">do</word> + <word id="word_29" tei_id="morph_2.4.3-seg">siebie</word> + <word id="word_30" tei_id="morph_2.4.4-seg">pretensję</word> + <word id="word_31" tei_id="morph_2.4.5-seg">,</word> + <word id="word_32" tei_id="morph_2.4.6-seg">bo</word> + <word id="word_33" tei_id="morph_2.4.7-seg">przy</word> + <word id="word_34" tei_id="morph_2.4.8-seg">lepszej</word> + <word id="word_35" tei_id="morph_2.4.9-seg">koncentracji</word> + <word id="word_36" tei_id="morph_2.4.10-seg">mogł</word> + <word id="word_37" tei_id="morph_2.4.11-seg">em</word> + <word id="word_38" tei_id="morph_2.4.12-seg">zdobyć</word> + <word id="word_39" tei_id="morph_2.4.13-seg">bramkę</word> + <word id="word_40" tei_id="morph_2.4.14-seg" lastinpar="true">.</word> + <word id="word_41" tei_id="morph_3.5.1-seg" lastinpar="true">ś</word> + <word id="word_42" tei_id="morph_4.6.1-seg">Jacek</word> + <word id="word_43" tei_id="morph_4.6.2-seg" lastinpar="true">Krzynówek</word> + <word id="word_44" tei_id="morph_5.7.1-seg">(</word> + <word id="word_45" tei_id="morph_5.7.2-seg">1</word> + <word id="word_46" tei_id="morph_5.7.3-seg">.</word> + <word id="word_47" tei_id="morph_5.8.1-seg">FC</word> + <word id="word_48" tei_id="morph_5.8.2-seg">Nuernberg</word> + <word id="word_49" tei_id="morph_5.8.3-seg">)</word> + <word id="word_50" tei_id="morph_5.8.4-seg">:</word> + <word id="word_51" tei_id="morph_5.8.5-seg">-</word> + <word id="word_52" tei_id="morph_5.8.6-seg">Cieszę</word> + <word id="word_53" tei_id="morph_5.8.7-seg">się</word> + <word id="word_54" tei_id="morph_5.8.8-seg">,</word> + <word id="word_55" tei_id="morph_5.8.9-seg">że</word> + <word id="word_56" tei_id="morph_5.8.10-seg">mogł</word> + <word id="word_57" tei_id="morph_5.8.11-seg">em</word> + <word id="word_58" tei_id="morph_5.8.12-seg">wystąpić</word> + <word id="word_59" tei_id="morph_5.8.13-seg">w</word> + <word id="word_60" tei_id="morph_5.8.14-seg">meczu</word> + <word id="word_61" tei_id="morph_5.8.15-seg">kończącym</word> + <word id="word_62" tei_id="morph_5.8.16-seg">eliminacje</word> + <word id="word_63" tei_id="morph_5.8.17-seg">.</word> + <word id="word_64" tei_id="morph_5.9.1-seg">Ze</word> + <word id="word_65" tei_id="morph_5.9.2-seg">swej</word> + <word id="word_66" tei_id="morph_5.9.3-seg">strony</word> + <word id="word_67" tei_id="morph_5.9.4-seg">zrobię</word> + <word id="word_68" tei_id="morph_5.9.5-seg">wszystko</word> + <word id="word_69" tei_id="morph_5.9.6-seg">,</word> + <word id="word_70" tei_id="morph_5.9.7-seg">aby</word> + <word id="word_71" tei_id="morph_5.9.8-seg">nie</word> + <word id="word_72" tei_id="morph_5.9.9-seg">wypaść</word> + <word id="word_73" tei_id="morph_5.9.10-seg">z</word> + <word id="word_74" tei_id="morph_5.9.11-seg">kadry</word> + <word id="word_75" tei_id="morph_5.9.12-seg">,</word> + <word id="word_76" tei_id="morph_5.9.13-seg">mimo</word> + <word id="word_77" tei_id="morph_5.9.14-seg">że</word> + <word id="word_78" tei_id="morph_5.9.15-seg">nie</word> + <word id="word_79" tei_id="morph_5.9.16-seg">mam</word> + <word id="word_80" tei_id="morph_5.9.17-seg">ugruntowanej</word> + <word id="word_81" tei_id="morph_5.9.18-seg">pozycji</word> + <word id="word_82" tei_id="morph_5.9.19-seg">w</word> + <word id="word_83" tei_id="morph_5.9.20-seg">swoim</word> + <word id="word_84" tei_id="morph_5.9.21-seg">klubie</word> + <word id="word_85" tei_id="morph_5.9.22-seg">.</word> + <word id="word_86" tei_id="morph_5.10.1-seg">Zasłużyli</word> + <word id="word_87" tei_id="morph_5.10.2-seg">śmy</word> + <word id="word_88" tei_id="morph_5.10.3-seg">na</word> + <word id="word_89" tei_id="morph_5.10.4-seg">awans</word> + <word id="word_90" tei_id="morph_5.10.5-seg">,</word> + <word id="word_91" tei_id="morph_5.10.6-seg">bo</word> + <word id="word_92" tei_id="morph_5.10.7-seg">byli</word> + <word id="word_93" tei_id="morph_5.10.8-seg">śmy</word> + <word id="word_94" tei_id="morph_5.10.9-seg">zespołem</word> + <word id="word_95" tei_id="morph_5.10.10-seg">,</word> + <word id="word_96" tei_id="morph_5.10.11-seg">który</word> + <word id="word_97" tei_id="morph_5.10.12-seg">grał</word> + <word id="word_98" tei_id="morph_5.10.13-seg">najrówniej</word> + <word id="word_99" tei_id="morph_5.10.14-seg">i</word> + <word id="word_100" tei_id="morph_5.10.15-seg">w</word> + <word id="word_101" tei_id="morph_5.10.16-seg">głupi</word> + <word id="word_102" tei_id="morph_5.10.17-seg">sposób</word> + <word id="word_103" tei_id="morph_5.10.18-seg">nie</word> + <word id="word_104" tei_id="morph_5.10.19-seg">tracił</word> + <word id="word_105" tei_id="morph_5.10.20-seg">punktów</word> + <word id="word_106" tei_id="morph_5.10.21-seg" lastinpar="true">.</word> + <word id="word_107" tei_id="morph_6.11.1-seg" lastinpar="true">ś</word> + <word id="word_108" tei_id="morph_7.12.1-seg">Marek</word> + <word id="word_109" tei_id="morph_7.12.2-seg">Koźmiński</word> + <word id="word_110" tei_id="morph_7.12.3-seg" lastinpar="true">:</word> + <word id="word_111" tei_id="morph_8.13.1-seg">Mecze</word> + <word id="word_112" tei_id="morph_8.13.2-seg">z</word> + <word id="word_113" tei_id="morph_8.13.3-seg">Ukrainą</word> + <word id="word_114" tei_id="morph_8.13.4-seg">spinają</word> + <word id="word_115" tei_id="morph_8.13.5-seg">klamrą</word> + <word id="word_116" tei_id="morph_8.13.6-seg">eliminacje</word> + <word id="word_117" tei_id="morph_8.13.7-seg">,</word> + <word id="word_118" tei_id="morph_8.13.8-seg">które</word> + <word id="word_119" tei_id="morph_8.13.9-seg">będziemy</word> + <word id="word_120" tei_id="morph_8.13.10-seg">pamiętać</word> + <word id="word_121" tei_id="morph_8.13.11-seg">do</word> + <word id="word_122" tei_id="morph_8.13.12-seg">końca</word> + <word id="word_123" tei_id="morph_8.13.13-seg">życia</word> + <word id="word_124" tei_id="morph_8.13.14-seg">.</word> + <word id="word_125" tei_id="morph_8.14.1-seg">Uważam</word> + <word id="word_126" tei_id="morph_8.14.2-seg">,</word> + <word id="word_127" tei_id="morph_8.14.3-seg">że</word> + <word id="word_128" tei_id="morph_8.14.4-seg">kibice</word> + <word id="word_129" tei_id="morph_8.14.5-seg">oglądali</word> + <word id="word_130" tei_id="morph_8.14.6-seg">dzisiaj</word> + <word id="word_131" tei_id="morph_8.14.7-seg">dobry</word> + <word id="word_132" tei_id="morph_8.14.8-seg">mecz</word> + <word id="word_133" tei_id="morph_8.14.9-seg">dwóch</word> + <word id="word_134" tei_id="morph_8.14.10-seg">równych</word> + <word id="word_135" tei_id="morph_8.14.11-seg">zespołów</word> + <word id="word_136" tei_id="morph_8.14.12-seg">.</word> + <word id="word_137" tei_id="morph_8.15.1-seg">Moim</word> + <word id="word_138" tei_id="morph_8.15.2-seg">zdaniem</word> + <word id="word_139" tei_id="morph_8.15.3-seg">Ukraińcy</word> + <word id="word_140" tei_id="morph_8.15.4-seg">zasłużenie</word> + <word id="word_141" tei_id="morph_8.15.5-seg">zajęli</word> + <word id="word_142" tei_id="morph_8.15.6-seg">drugie</word> + <word id="word_143" tei_id="morph_8.15.7-seg">miejsce</word> + <word id="word_144" tei_id="morph_8.15.8-seg">,</word> + <word id="word_145" tei_id="morph_8.15.9-seg">jednak</word> + <word id="word_146" tei_id="morph_8.15.10-seg">nie</word> + <word id="word_147" tei_id="morph_8.15.11-seg">chciał</word> + <word id="word_148" tei_id="morph_8.15.12-seg">by</word> + <word id="word_149" tei_id="morph_8.15.13-seg">m</word> + <word id="word_150" tei_id="morph_8.15.14-seg">spotkać</word> + <word id="word_151" tei_id="morph_8.15.15-seg">się</word> + <word id="word_152" tei_id="morph_8.15.16-seg">z</word> + <word id="word_153" tei_id="morph_8.15.17-seg">nimi</word> + <word id="word_154" tei_id="morph_8.15.18-seg">na</word> + <word id="word_155" tei_id="morph_8.15.19-seg">mundialu</word> + <word id="word_156" tei_id="morph_8.15.20-seg">.</word> + <word id="word_157" tei_id="morph_8.16.1-seg">To</word> + <word id="word_158" tei_id="morph_8.16.2-seg">bardzo</word> + <word id="word_159" tei_id="morph_8.16.3-seg">niewygodny</word> + <word id="word_160" tei_id="morph_8.16.4-seg">przeciwnik</word> + <word id="word_161" tei_id="morph_8.16.5-seg">.</word> + <word id="word_162" tei_id="morph_8.17.1-seg">Może</word> + <word id="word_163" tei_id="morph_8.17.2-seg">wygrać</word> + <word id="word_164" tei_id="morph_8.17.3-seg">z</word> + <word id="word_165" tei_id="morph_8.17.4-seg">każdym</word> + <word id="word_166" tei_id="morph_8.17.5-seg" lastinpar="true">.</word> + <word id="word_167" tei_id="morph_9.18.1-seg" lastinpar="true">ś</word> + <word id="word_168" tei_id="morph_10.19.1-seg">Zbigniew</word> + <word id="word_169" tei_id="morph_10.19.2-seg" lastinpar="true">Boniek</word> + <word id="word_170" tei_id="morph_11.20.1-seg">(</word> + <word id="word_171" tei_id="morph_11.20.2-seg">wiceprezes</word> + <word id="word_172" tei_id="morph_11.20.3-seg">PZPN</word> + <word id="word_173" tei_id="morph_11.20.4-seg">)</word> + <word id="word_174" tei_id="morph_11.20.5-seg">:</word> + <word id="word_175" tei_id="morph_11.20.6-seg">-</word> + <word id="word_176" tei_id="morph_11.20.7-seg">Najważniejsze</word> + <word id="word_177" tei_id="morph_11.20.8-seg">,</word> + <word id="word_178" tei_id="morph_11.20.9-seg">że</word> + <word id="word_179" tei_id="morph_11.20.10-seg">eliminacje</word> + <word id="word_180" tei_id="morph_11.20.11-seg">zakończyły</word> + <word id="word_181" tei_id="morph_11.20.12-seg">się</word> + <word id="word_182" tei_id="morph_11.20.13-seg">sukcesem</word> + <word id="word_183" tei_id="morph_11.20.14-seg">.</word> + <word id="word_184" tei_id="morph_11.21.1-seg">Jestem</word> + <word id="word_185" tei_id="morph_11.21.2-seg">usatysfakcjonowany</word> + <word id="word_186" tei_id="morph_11.21.3-seg">.</word> + <word id="word_187" tei_id="morph_11.22.1-seg">Chcę</word> + <word id="word_188" tei_id="morph_11.22.2-seg">podkreślić</word> + <word id="word_189" tei_id="morph_11.22.3-seg">,</word> + <word id="word_190" tei_id="morph_11.22.4-seg">że</word> + <word id="word_191" tei_id="morph_11.22.5-seg">kibice</word> + <word id="word_192" tei_id="morph_11.22.6-seg">i</word> + <word id="word_193" tei_id="morph_11.22.7-seg">dziennikarze</word> + <word id="word_194" tei_id="morph_11.22.8-seg">dostrzegają</word> + <word id="word_195" tei_id="morph_11.22.9-seg">tylko</word> + <word id="word_196" tei_id="morph_11.22.10-seg">to</word> + <word id="word_197" tei_id="morph_11.22.11-seg">,</word> + <word id="word_198" tei_id="morph_11.22.12-seg">co</word> + <word id="word_199" tei_id="morph_11.22.13-seg">dzieje</word> + <word id="word_200" tei_id="morph_11.22.14-seg">się</word> + <word id="word_201" tei_id="morph_11.22.15-seg">na</word> + <word id="word_202" tei_id="morph_11.22.16-seg">boisku</word> + <word id="word_203" tei_id="morph_11.22.17-seg">i</word> + <word id="word_204" tei_id="morph_11.22.18-seg">rzadko</word> + <word id="word_205" tei_id="morph_11.22.19-seg">kiedy</word> + <word id="word_206" tei_id="morph_11.22.20-seg">zwracają</word> + <word id="word_207" tei_id="morph_11.22.21-seg">uwagę</word> + <word id="word_208" tei_id="morph_11.22.22-seg">na</word> + <word id="word_209" tei_id="morph_11.22.23-seg">kulisy</word> + <word id="word_210" tei_id="morph_11.22.24-seg">sukcesów</word> + <word id="word_211" tei_id="morph_11.22.25-seg">piłkarzy</word> + <word id="word_212" tei_id="morph_11.22.26-seg">,</word> + <word id="word_213" tei_id="morph_11.22.27-seg">a</word> + <word id="word_214" tei_id="morph_11.22.28-seg">oznaczają</word> + <word id="word_215" tei_id="morph_11.22.29-seg">one</word> + <word id="word_216" tei_id="morph_11.22.30-seg">ogromny</word> + <word id="word_217" tei_id="morph_11.22.31-seg">wysiłek</word> + <word id="word_218" tei_id="morph_11.22.32-seg">organizacyjny</word> + <word id="word_219" tei_id="morph_11.22.33-seg">,</word> + <word id="word_220" tei_id="morph_11.22.34-seg">wiele</word> + <word id="word_221" tei_id="morph_11.22.35-seg">pracy</word> + <word id="word_222" tei_id="morph_11.22.36-seg">rzeszy</word> + <word id="word_223" tei_id="morph_11.22.37-seg">ludzi</word> + <word id="word_224" tei_id="morph_11.22.38-seg">,</word> + <word id="word_225" tei_id="morph_11.22.39-seg">którzy</word> + <word id="word_226" tei_id="morph_11.22.40-seg">nie</word> + <word id="word_227" tei_id="morph_11.22.41-seg">stoją</word> + <word id="word_228" tei_id="morph_11.22.42-seg">w</word> + <word id="word_229" tei_id="morph_11.22.43-seg">pierwszym</word> + <word id="word_230" tei_id="morph_11.22.44-seg">szeregu</word> + <word id="word_231" tei_id="morph_11.22.45-seg">,</word> + <word id="word_232" tei_id="morph_11.22.46-seg">ale</word> + <word id="word_233" tei_id="morph_11.22.47-seg">wykonują</word> + <word id="word_234" tei_id="morph_11.22.48-seg">nieraz</word> + <word id="word_235" tei_id="morph_11.22.49-seg">ciężkie</word> + <word id="word_236" tei_id="morph_11.22.50-seg">i</word> + <word id="word_237" tei_id="morph_11.22.51-seg">niewdzięczne</word> + <word id="word_238" tei_id="morph_11.22.52-seg">zadania</word> + <word id="word_239" tei_id="morph_11.22.53-seg">.</word> + <word id="word_240" tei_id="morph_11.23.1-seg">Moim</word> + <word id="word_241" tei_id="morph_11.23.2-seg">zdaniem</word> + <word id="word_242" tei_id="morph_11.23.3-seg">między</word> + <word id="word_243" tei_id="morph_11.23.4-seg">innymi</word> + <word id="word_244" tei_id="morph_11.23.5-seg">dlatego</word> + <word id="word_245" tei_id="morph_11.23.6-seg">,</word> + <word id="word_246" tei_id="morph_11.23.7-seg">że</word> + <word id="word_247" tei_id="morph_11.23.8-seg">sprawy</word> + <word id="word_248" tei_id="morph_11.23.9-seg">organizacyjne</word> + <word id="word_249" tei_id="morph_11.23.10-seg">zostały</word> + <word id="word_250" tei_id="morph_11.23.11-seg">ułożone</word> + <word id="word_251" tei_id="morph_11.23.12-seg">na</word> + <word id="word_252" tei_id="morph_11.23.13-seg">odpowiednim</word> + <word id="word_253" tei_id="morph_11.23.14-seg">poziomie</word> + <word id="word_254" tei_id="morph_11.23.15-seg">,</word> + <word id="word_255" tei_id="morph_11.23.16-seg">wszyscy</word> + <word id="word_256" tei_id="morph_11.23.17-seg">możemy</word> + <word id="word_257" tei_id="morph_11.23.18-seg">się</word> + <word id="word_258" tei_id="morph_11.23.19-seg">dzisiaj</word> + <word id="word_259" tei_id="morph_11.23.20-seg">cieszyć</word> + <word id="word_260" tei_id="morph_11.23.21-seg">z</word> + <word id="word_261" tei_id="morph_11.23.22-seg">awansu</word> + <word id="word_262" tei_id="morph_11.23.23-seg">do</word> + <word id="word_263" tei_id="morph_11.23.24-seg">mistrzostw</word> + <word id="word_264" tei_id="morph_11.23.25-seg">świata</word> + <word id="word_265" tei_id="morph_11.23.26-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/4.mmax a/src/test/resources/teksty_mmax/teksty_sys/4.mmax new file mode 100755 index 0000000..731abf6 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/4.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>4_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/4_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/4_mentions.xml new file mode 100755 index 0000000..6e92792 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/4_mentions.xml @@ -0,0 +1,74 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1" mmax_level="mention" mention_head="Ernest" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_2" span="word_3" mmax_level="mention" mention_head="Agnieszka" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_3" span="word_12..word_13" mmax_level="mention" mention_head="rodzinę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_21..word_22" mmax_level="mention" mention_head="szczęście" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_33" mmax_level="mention" mention_head="Kwiecień" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_32..word_33" mmax_level="mention" mention_head="Ernest" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_7" span="word_36" mmax_level="mention" mention_head="Wigilię" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_38" mmax_level="mention" mention_head="jego" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_9" span="word_38..word_39" mmax_level="mention" mention_head="obowiązków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_43" mmax_level="mention" mention_head="choinki" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_11" span="word_42..word_43" mmax_level="mention" mention_head="dostarczeniem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_48" mmax_level="mention" mention_head="naczyń" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_50" mmax_level="mention" mention_head="Agnieszka" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_14" span="word_52" mmax_level="mention" mention_head="pierogi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_15" span="word_55" mmax_level="mention" mention_head="barszcz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_60" mmax_level="mention" mention_head="karpia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_62" mmax_level="mention" mention_head="Córki" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_18" span="word_68" mmax_level="mention" mention_head="czas" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_70" mmax_level="mention" mention_head="prezenty" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_84" mmax_level="mention" mention_head="Jodełek" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_21" span="word_87" mmax_level="mention" mention_head="Leśniczy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_92" mmax_level="mention" mention_head="Ernest" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_23" span="word_90..word_92" mmax_level="mention" mention_head="pan" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_24" span="word_94" mmax_level="mention" mention_head="choinkę" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_25" span="word_97..word_98" mmax_level="mention" mention_head="kilometrów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_101" mmax_level="mention" mention_head="Kwietniów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_100..word_101" mmax_level="mention" mention_head="domu" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_28" span="word_103" mmax_level="mention" mention_head="On" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_29" span="word_107" mmax_level="mention" mention_head="sobie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_108" mmax_level="mention" mention_head="świąt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_110..word_111" mmax_level="mention" mention_head="świerku" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_32" span="word_122" mmax_level="mention" mention_head="Grobelny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_121..word_122" mmax_level="mention" mention_head="Gabriel" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_34" span="word_124..word_125" mmax_level="mention" mention_head="nadleśniczy" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_35" span="word_127" mmax_level="mention" mention_head="To" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_133" mmax_level="mention" mention_head="nim" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_37" span="word_135" mmax_level="mention" mention_head="ozdoby" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_138" mmax_level="mention" mention_head="dzieci" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_141" mmax_level="mention" mention_head="latami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_150" mmax_level="mention" mention_head="Gabriel" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_41" span="word_149..word_150" mmax_level="mention" mention_head="Pan" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_42" span="word_155..word_156" mmax_level="mention" mention_head="córki" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_43" span="word_152..word_156" mmax_level="mention" mention_head="synów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_159" mmax_level="mention" mention_head="domu" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_45" span="word_169" mmax_level="mention" mention_head="wszyscy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_173" mmax_level="mention" mention_head="choinkę" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_47" span="word_176" mmax_level="mention" mention_head="Żona" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_178..word_179" mmax_level="mention" mention_head="włosy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_181" mmax_level="mention" mention_head="ja" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_183" mmax_level="mention" mention_head="lampki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_187" mmax_level="mention" mention_head="nadleśniczego" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_52" span="word_186..word_187" mmax_level="mention" mention_head="domu" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_53" span="word_189..word_190" mmax_level="mention" mention_head="ról" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_188..word_190" mmax_level="mention" mention_head="podział" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_195..word_196" mmax_level="mention" mention_head="lasach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_197" mmax_level="mention" mention_head="najwięcej" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_57" span="word_205..word_207" mmax_level="mention" mention_head="jodły" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_209..word_210" mmax_level="mention" mention_head="igłami" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_215..word_216" mmax_level="mention" mention_head="jodełek" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_60" span="word_235" mmax_level="mention" mention_head="nadleśniczy" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_61" span="word_239" mmax_level="mention" mention_head="choinkę" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_62" span="word_241" mmax_level="mention" mention_head="sobie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_246" mmax_level="mention" mention_head="rodziny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_251" mmax_level="mention" mention_head="roku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_255" mmax_level="mention" mention_head="synem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_260" mmax_level="mention" mention_head="drzewko" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_263" mmax_level="mention" mention_head="mają" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_264" mmax_level="mention" mention_head="tradycję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_69" span="word_268" mmax_level="mention" mention_head="Gabriel" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_70" span="word_267..word_268" mmax_level="mention" mention_head="pan" mention_group="set_10" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/4_words.xml a/src/test/resources/teksty_mmax/teksty_sys/4_words.xml new file mode 100755 index 0000000..17caaeb --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/4_words.xml @@ -0,0 +1,273 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Ernest</word> + <word id="word_2" tei_id="morph_1.1.2-seg">i</word> + <word id="word_3" tei_id="morph_1.1.3-seg">Agnieszka</word> + <word id="word_4" tei_id="morph_1.1.4-seg">nie</word> + <word id="word_5" tei_id="morph_1.1.5-seg">planowali</word> + <word id="word_6" tei_id="morph_1.1.6-seg">,</word> + <word id="word_7" tei_id="morph_1.1.7-seg">że</word> + <word id="word_8" tei_id="morph_1.1.8-seg">będą</word> + <word id="word_9" tei_id="morph_1.1.9-seg">mieli</word> + <word id="word_10" tei_id="morph_1.1.10-seg">wielką</word> + <word id="word_11" tei_id="morph_1.1.11-seg">,</word> + <word id="word_12" tei_id="morph_1.1.12-seg">babską</word> + <word id="word_13" tei_id="morph_1.1.13-seg">rodzinę</word> + <word id="word_14" tei_id="morph_1.1.14-seg">.</word> + <word id="word_15" tei_id="morph_1.2.1-seg">Ale</word> + <word id="word_16" tei_id="morph_1.2.2-seg">tak</word> + <word id="word_17" tei_id="morph_1.2.3-seg">wyszło</word> + <word id="word_18" tei_id="morph_1.2.4-seg">.</word> + <word id="word_19" tei_id="morph_1.2.5-seg">–</word> + <word id="word_20" tei_id="morph_1.2.6-seg">I</word> + <word id="word_21" tei_id="morph_1.2.7-seg">całe</word> + <word id="word_22" tei_id="morph_1.2.8-seg">szczęście</word> + <word id="word_23" tei_id="morph_1.2.9-seg">.</word> + <word id="word_24" tei_id="morph_1.3.1-seg">Lepiej</word> + <word id="word_25" tei_id="morph_1.3.2-seg">się</word> + <word id="word_26" tei_id="morph_1.3.3-seg">dogaduję</word> + <word id="word_27" tei_id="morph_1.3.4-seg">z</word> + <word id="word_28" tei_id="morph_1.3.5-seg">dziewczętami</word> + <word id="word_29" tei_id="morph_1.3.6-seg">–</word> + <word id="word_30" tei_id="morph_1.3.7-seg">cieszy</word> + <word id="word_31" tei_id="morph_1.3.8-seg">się</word> + <word id="word_32" tei_id="morph_1.3.9-seg">Ernest</word> + <word id="word_33" tei_id="morph_1.3.10-seg">Kwiecień</word> + <word id="word_34" tei_id="morph_1.3.11-seg" lastinpar="true">.</word> + <word id="word_35" tei_id="morph_2.4.1-seg">W</word> + <word id="word_36" tei_id="morph_2.4.2-seg">Wigilię</word> + <word id="word_37" tei_id="morph_2.4.3-seg">do</word> + <word id="word_38" tei_id="morph_2.4.4-seg">jego</word> + <word id="word_39" tei_id="morph_2.4.5-seg">obowiązków</word> + <word id="word_40" tei_id="morph_2.4.6-seg">,</word> + <word id="word_41" tei_id="morph_2.4.7-seg">poza</word> + <word id="word_42" tei_id="morph_2.4.8-seg">dostarczeniem</word> + <word id="word_43" tei_id="morph_2.4.9-seg">choinki</word> + <word id="word_44" tei_id="morph_2.4.10-seg">,</word> + <word id="word_45" tei_id="morph_2.4.11-seg">należeć</word> + <word id="word_46" tei_id="morph_2.4.12-seg">będzie</word> + <word id="word_47" tei_id="morph_2.4.13-seg">zmywanie</word> + <word id="word_48" tei_id="morph_2.4.14-seg">naczyń</word> + <word id="word_49" tei_id="morph_2.4.15-seg">.</word> + <word id="word_50" tei_id="morph_2.5.1-seg">Agnieszka</word> + <word id="word_51" tei_id="morph_2.5.2-seg">zrobi</word> + <word id="word_52" tei_id="morph_2.5.3-seg">pierogi</word> + <word id="word_53" tei_id="morph_2.5.4-seg">,</word> + <word id="word_54" tei_id="morph_2.5.5-seg">ugotuje</word> + <word id="word_55" tei_id="morph_2.5.6-seg">barszcz</word> + <word id="word_56" tei_id="morph_2.5.7-seg">z</word> + <word id="word_57" tei_id="morph_2.5.8-seg">uszkami</word> + <word id="word_58" tei_id="morph_2.5.9-seg">,</word> + <word id="word_59" tei_id="morph_2.5.10-seg">usmaży</word> + <word id="word_60" tei_id="morph_2.5.11-seg">karpia</word> + <word id="word_61" tei_id="morph_2.5.12-seg">.</word> + <word id="word_62" tei_id="morph_2.6.1-seg">Córki</word> + <word id="word_63" tei_id="morph_2.6.2-seg">upieką</word> + <word id="word_64" tei_id="morph_2.6.3-seg">ciasta</word> + <word id="word_65" tei_id="morph_2.6.4-seg">.</word> + <word id="word_66" tei_id="morph_2.7.1-seg">Potem</word> + <word id="word_67" tei_id="morph_2.7.2-seg">przyjdzie</word> + <word id="word_68" tei_id="morph_2.7.3-seg">czas</word> + <word id="word_69" tei_id="morph_2.7.4-seg">na</word> + <word id="word_70" tei_id="morph_2.7.5-seg">prezenty</word> + <word id="word_71" tei_id="morph_2.7.6-seg">.</word> + <word id="word_72" tei_id="morph_2.8.1-seg">Może</word> + <word id="word_73" tei_id="morph_2.8.2-seg">to</word> + <word id="word_74" tei_id="morph_2.8.3-seg">nawet</word> + <word id="word_75" tei_id="morph_2.8.4-seg">będą</word> + <word id="word_76" tei_id="morph_2.8.5-seg">empetrójki</word> + <word id="word_77" tei_id="morph_2.8.6-seg">,</word> + <word id="word_78" tei_id="morph_2.8.7-seg">o</word> + <word id="word_79" tei_id="morph_2.8.8-seg">których</word> + <word id="word_80" tei_id="morph_2.8.9-seg">marzą</word> + <word id="word_81" tei_id="morph_2.8.10-seg">starsze</word> + <word id="word_82" tei_id="morph_2.8.11-seg">dziewczyny</word> + <word id="word_83" tei_id="morph_2.8.12-seg" lastinpar="true">.</word> + <word id="word_84" tei_id="morph_3.9.1-seg">Jodełek</word> + <word id="word_85" tei_id="morph_3.9.2-seg">sadzimy</word> + <word id="word_86" tei_id="morph_3.9.3-seg" lastinpar="true">mniej</word> + <word id="word_87" tei_id="morph_4.10.1-seg">Leśniczy</word> + <word id="word_88" tei_id="morph_4.10.2-seg">,</word> + <word id="word_89" tei_id="morph_4.10.3-seg">od</word> + <word id="word_90" tei_id="morph_4.10.4-seg">którego</word> + <word id="word_91" tei_id="morph_4.10.5-seg">pan</word> + <word id="word_92" tei_id="morph_4.10.6-seg">Ernest</word> + <word id="word_93" tei_id="morph_4.10.7-seg">przywozi</word> + <word id="word_94" tei_id="morph_4.10.8-seg">choinkę</word> + <word id="word_95" tei_id="morph_4.10.9-seg">,</word> + <word id="word_96" tei_id="morph_4.10.10-seg">mieszka</word> + <word id="word_97" tei_id="morph_4.10.11-seg">kilka</word> + <word id="word_98" tei_id="morph_4.10.12-seg">kilometrów</word> + <word id="word_99" tei_id="morph_4.10.13-seg">od</word> + <word id="word_100" tei_id="morph_4.10.14-seg">domu</word> + <word id="word_101" tei_id="morph_4.10.15-seg">Kwietniów</word> + <word id="word_102" tei_id="morph_4.10.16-seg">.</word> + <word id="word_103" tei_id="morph_4.11.1-seg">On</word> + <word id="word_104" tei_id="morph_4.11.2-seg">także</word> + <word id="word_105" tei_id="morph_4.11.3-seg">nie</word> + <word id="word_106" tei_id="morph_4.11.4-seg">wyobraża</word> + <word id="word_107" tei_id="morph_4.11.5-seg">sobie</word> + <word id="word_108" tei_id="morph_4.11.6-seg">świąt</word> + <word id="word_109" tei_id="morph_4.11.7-seg">bez</word> + <word id="word_110" tei_id="morph_4.11.8-seg">prawdziwego</word> + <word id="word_111" tei_id="morph_4.11.9-seg">świerku</word> + <word id="word_112" tei_id="morph_4.11.10-seg">.</word> + <word id="word_113" tei_id="morph_4.11.11-seg">–</word> + <word id="word_114" tei_id="morph_4.11.12-seg">I</word> + <word id="word_115" tei_id="morph_4.11.13-seg">musi</word> + <word id="word_116" tei_id="morph_4.11.14-seg">być</word> + <word id="word_117" tei_id="morph_4.11.15-seg">kiczowaty</word> + <word id="word_118" tei_id="morph_4.11.16-seg">–</word> + <word id="word_119" tei_id="morph_4.11.17-seg">uśmiecha</word> + <word id="word_120" tei_id="morph_4.11.18-seg">się</word> + <word id="word_121" tei_id="morph_4.11.19-seg">Gabriel</word> + <word id="word_122" tei_id="morph_4.11.20-seg">Grobelny</word> + <word id="word_123" tei_id="morph_4.11.21-seg">,</word> + <word id="word_124" tei_id="morph_4.11.22-seg">nadleśniczy</word> + <word id="word_125" tei_id="morph_4.11.23-seg">wałbrzyski</word> + <word id="word_126" tei_id="morph_4.11.24-seg" lastinpar="true">.</word> + <word id="word_127" tei_id="morph_5.12.1-seg">To</word> + <word id="word_128" tei_id="morph_5.12.2-seg">znaczy</word> + <word id="word_129" tei_id="morph_5.12.3-seg">,</word> + <word id="word_130" tei_id="morph_5.12.4-seg">że</word> + <word id="word_131" tei_id="morph_5.12.5-seg">powinny</word> + <word id="word_132" tei_id="morph_5.12.6-seg">na</word> + <word id="word_133" tei_id="morph_5.12.7-seg">nim</word> + <word id="word_134" tei_id="morph_5.12.8-seg">wisieć</word> + <word id="word_135" tei_id="morph_5.12.9-seg">ozdoby</word> + <word id="word_136" tei_id="morph_5.12.10-seg">zrobione</word> + <word id="word_137" tei_id="morph_5.12.11-seg">przez</word> + <word id="word_138" tei_id="morph_5.12.12-seg">dzieci</word> + <word id="word_139" tei_id="morph_5.12.13-seg">,</word> + <word id="word_140" tei_id="morph_5.12.14-seg">przechowywane</word> + <word id="word_141" tei_id="morph_5.12.15-seg">latami</word> + <word id="word_142" tei_id="morph_5.12.16-seg">,</word> + <word id="word_143" tei_id="morph_5.12.17-seg">wyciągane</word> + <word id="word_144" tei_id="morph_5.12.18-seg">na</word> + <word id="word_145" tei_id="morph_5.12.19-seg">tę</word> + <word id="word_146" tei_id="morph_5.12.20-seg">jedyną</word> + <word id="word_147" tei_id="morph_5.12.21-seg">okazję</word> + <word id="word_148" tei_id="morph_5.12.22-seg" lastinpar="true">.</word> + <word id="word_149" tei_id="morph_6.13.1-seg">Pan</word> + <word id="word_150" tei_id="morph_6.13.2-seg">Gabriel</word> + <word id="word_151" tei_id="morph_6.13.3-seg">ma</word> + <word id="word_152" tei_id="morph_6.13.4-seg">dwóch</word> + <word id="word_153" tei_id="morph_6.13.5-seg">synów</word> + <word id="word_154" tei_id="morph_6.13.6-seg">i</word> + <word id="word_155" tei_id="morph_6.13.7-seg">trzy</word> + <word id="word_156" tei_id="morph_6.13.8-seg">córki</word> + <word id="word_157" tei_id="morph_6.13.9-seg">.</word> + <word id="word_158" tei_id="morph_6.14.1-seg">W</word> + <word id="word_159" tei_id="morph_6.14.2-seg">domu</word> + <word id="word_160" tei_id="morph_6.14.3-seg">została</word> + <word id="word_161" tei_id="morph_6.14.4-seg">najmłodsza</word> + <word id="word_162" tei_id="morph_6.14.5-seg">,</word> + <word id="word_163" tei_id="morph_6.14.6-seg">12-letnia</word> + <word id="word_164" tei_id="morph_6.14.7-seg">,</word> + <word id="word_165" tei_id="morph_6.14.8-seg">ale</word> + <word id="word_166" tei_id="morph_6.14.9-seg">na</word> + <word id="word_167" tei_id="morph_6.14.10-seg">święta</word> + <word id="word_168" tei_id="morph_6.14.11-seg">zjadą</word> + <word id="word_169" tei_id="morph_6.14.12-seg">wszyscy</word> + <word id="word_170" tei_id="morph_6.14.13-seg">.</word> + <word id="word_171" tei_id="morph_6.15.1-seg">I</word> + <word id="word_172" tei_id="morph_6.15.2-seg">ubiorą</word> + <word id="word_173" tei_id="morph_6.15.3-seg">choinkę</word> + <word id="word_174" tei_id="morph_6.15.4-seg">.</word> + <word id="word_175" tei_id="morph_6.15.5-seg">–</word> + <word id="word_176" tei_id="morph_6.15.6-seg">Żona</word> + <word id="word_177" tei_id="morph_6.15.7-seg">rozwiesi</word> + <word id="word_178" tei_id="morph_6.15.8-seg">anielskie</word> + <word id="word_179" tei_id="morph_6.15.9-seg">włosy</word> + <word id="word_180" tei_id="morph_6.15.10-seg">,</word> + <word id="word_181" tei_id="morph_6.15.11-seg">ja</word> + <word id="word_182" tei_id="morph_6.15.12-seg">podłączę</word> + <word id="word_183" tei_id="morph_6.15.13-seg">lampki</word> + <word id="word_184" tei_id="morph_6.15.14-seg">–</word> + <word id="word_185" tei_id="morph_6.15.15-seg">w</word> + <word id="word_186" tei_id="morph_6.15.16-seg">domu</word> + <word id="word_187" tei_id="morph_6.15.17-seg">nadleśniczego</word> + <word id="word_188" tei_id="morph_6.15.18-seg">podział</word> + <word id="word_189" tei_id="morph_6.15.19-seg">świątecznych</word> + <word id="word_190" tei_id="morph_6.15.20-seg">ról</word> + <word id="word_191" tei_id="morph_6.15.21-seg">jest</word> + <word id="word_192" tei_id="morph_6.15.22-seg">określony</word> + <word id="word_193" tei_id="morph_6.15.23-seg" lastinpar="true">.</word> + <word id="word_194" tei_id="morph_7.16.1-seg">W</word> + <word id="word_195" tei_id="morph_7.16.2-seg">dolnośląskich</word> + <word id="word_196" tei_id="morph_7.16.3-seg">lasach</word> + <word id="word_197" tei_id="morph_7.16.4-seg">najwięcej</word> + <word id="word_198" tei_id="morph_7.16.5-seg">jest</word> + <word id="word_199" tei_id="morph_7.16.6-seg">świerków</word> + <word id="word_200" tei_id="morph_7.16.7-seg">.</word> + <word id="word_201" tei_id="morph_7.17.1-seg">Na</word> + <word id="word_202" tei_id="morph_7.17.2-seg">plantacjach</word> + <word id="word_203" tei_id="morph_7.17.3-seg">sadzą</word> + <word id="word_204" tei_id="morph_7.17.4-seg">także</word> + <word id="word_205" tei_id="morph_7.17.5-seg">coraz</word> + <word id="word_206" tei_id="morph_7.17.6-seg">popularniejsze</word> + <word id="word_207" tei_id="morph_7.17.7-seg">jodły</word> + <word id="word_208" tei_id="morph_7.17.8-seg">z</word> + <word id="word_209" tei_id="morph_7.17.9-seg">miękkimi</word> + <word id="word_210" tei_id="morph_7.17.10-seg">igłami</word> + <word id="word_211" tei_id="morph_7.17.11-seg" lastinpar="true">.</word> + <word id="word_212" tei_id="morph_8.18.1-seg">–</word> + <word id="word_213" tei_id="morph_8.18.2-seg">Ale</word> + <word id="word_214" tei_id="morph_8.18.3-seg">i</word> + <word id="word_215" tei_id="morph_8.18.4-seg">tych</word> + <word id="word_216" tei_id="morph_8.18.5-seg">jodełek</word> + <word id="word_217" tei_id="morph_8.18.6-seg">sadzimy</word> + <word id="word_218" tei_id="morph_8.18.7-seg">już</word> + <word id="word_219" tei_id="morph_8.18.8-seg">mniej</word> + <word id="word_220" tei_id="morph_8.18.9-seg">.</word> + <word id="word_221" tei_id="morph_8.19.1-seg">To</word> + <word id="word_222" tei_id="morph_8.19.2-seg">nie</word> + <word id="word_223" tei_id="morph_8.19.3-seg">lata</word> + <word id="word_224" tei_id="morph_8.19.4-seg">dziewięćdziesiąte</word> + <word id="word_225" tei_id="morph_8.19.5-seg">,</word> + <word id="word_226" tei_id="morph_8.19.6-seg">gdy</word> + <word id="word_227" tei_id="morph_8.19.7-seg">sprzedawali</word> + <word id="word_228" tei_id="morph_8.19.8-seg">śmy</word> + <word id="word_229" tei_id="morph_8.19.9-seg">prawie</word> + <word id="word_230" tei_id="morph_8.19.10-seg">wszystkie</word> + <word id="word_231" tei_id="morph_8.19.11-seg">wyhodowane</word> + <word id="word_232" tei_id="morph_8.19.12-seg">drzewka</word> + <word id="word_233" tei_id="morph_8.19.13-seg">–</word> + <word id="word_234" tei_id="morph_8.19.14-seg">wspomina</word> + <word id="word_235" tei_id="morph_8.19.15-seg">nadleśniczy</word> + <word id="word_236" tei_id="morph_8.19.16-seg" lastinpar="true">.</word> + <word id="word_237" tei_id="morph_9.20.1-seg">U</word> + <word id="word_238" tei_id="morph_9.20.2-seg">Grobelnego</word> + <word id="word_239" tei_id="morph_9.20.3-seg">choinkę</word> + <word id="word_240" tei_id="morph_9.20.4-seg">można</word> + <word id="word_241" tei_id="morph_9.20.5-seg">sobie</word> + <word id="word_242" tei_id="morph_9.20.6-seg">wybrać</word> + <word id="word_243" tei_id="morph_9.20.7-seg">.</word> + <word id="word_244" tei_id="morph_9.20.8-seg">–</word> + <word id="word_245" tei_id="morph_9.20.9-seg">Mamy</word> + <word id="word_246" tei_id="morph_9.20.10-seg">rodziny</word> + <word id="word_247" tei_id="morph_9.20.11-seg">,</word> + <word id="word_248" tei_id="morph_9.20.12-seg">w</word> + <word id="word_249" tei_id="morph_9.20.13-seg">których</word> + <word id="word_250" tei_id="morph_9.20.14-seg">co</word> + <word id="word_251" tei_id="morph_9.20.15-seg">roku</word> + <word id="word_252" tei_id="morph_9.20.16-seg">ojciec</word> + <word id="word_253" tei_id="morph_9.20.17-seg">przyjeżdża</word> + <word id="word_254" tei_id="morph_9.20.18-seg">z</word> + <word id="word_255" tei_id="morph_9.20.19-seg">synem</word> + <word id="word_256" tei_id="morph_9.20.20-seg">,</word> + <word id="word_257" tei_id="morph_9.20.21-seg">by</word> + <word id="word_258" tei_id="morph_9.20.22-seg">samemu</word> + <word id="word_259" tei_id="morph_9.20.23-seg">ściąć</word> + <word id="word_260" tei_id="morph_9.20.24-seg">drzewko</word> + <word id="word_261" tei_id="morph_9.20.25-seg">.</word> + <word id="word_262" tei_id="morph_9.21.1-seg">Taką</word> + <word id="word_263" tei_id="morph_9.21.2-seg">mają</word> + <word id="word_264" tei_id="morph_9.21.3-seg">tradycję</word> + <word id="word_265" tei_id="morph_9.21.4-seg">–</word> + <word id="word_266" tei_id="morph_9.21.5-seg">dodaje</word> + <word id="word_267" tei_id="morph_9.21.6-seg">pan</word> + <word id="word_268" tei_id="morph_9.21.7-seg">Gabriel</word> + <word id="word_269" tei_id="morph_9.21.8-seg" lastinpar="true">.</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/5.mmax a/src/test/resources/teksty_mmax/teksty_sys/5.mmax new file mode 100755 index 0000000..26737f7 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/5.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>5_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/5_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/5_mentions.xml new file mode 100755 index 0000000..80d7c76 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/5_mentions.xml @@ -0,0 +1,75 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1..word_2" mmax_level="mention" mention_head="Cena" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_4..word_5" mmax_level="mention" mention_head="pogromu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_7..word_8" mmax_level="mention" mention_head="Żydów" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_4" span="word_12" mmax_level="mention" mention_head="nich" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_13" mmax_level="mention" mention_head="rodzina" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_6" span="word_18" mmax_level="mention" mention_head="wojną" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_7" span="word_20" mmax_level="mention" mention_head="to" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_8" span="word_23" mmax_level="mention" mention_head="miasteczka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_24..word_25" mmax_level="mention" mention_head="rodzina" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_10" span="word_29" mmax_level="mention" mention_head="garbarnię" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_11" span="word_33" mmax_level="mention" mention_head="mi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_44..word_45" mmax_level="mention" mention_head="Żydzi" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_13" span="word_48..word_49" mmax_level="mention" mention_head="okupacji" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_14" span="word_51" mmax_level="mention" mention_head="Faktem" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_15" span="word_56..word_57" mmax_level="mention" mention_head="okupację" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_16" span="word_60..word_61" mmax_level="mention" mention_head="Faktem" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_17" span="word_64" mmax_level="mention" mention_head="to" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_18" span="word_67..word_69" mmax_level="mention" mention_head="rodziny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_75..word_76" mmax_level="mention" mention_head="okolicach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_79" mmax_level="mention" mention_head="Żydów" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_21" span="word_82" mmax_level="mention" mention_head="kilometrów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_81..word_82" mmax_level="mention" mention_head="Parę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_90" mmax_level="mention" mention_head="folwarku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_95" mmax_level="mention" mention_head="AK" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_25" span="word_96" mmax_level="mention" mention_head="Kazimierz" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_26" span="word_94..word_96" mmax_level="mention" mention_head="żołnierz" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_27" span="word_100" mmax_level="mention" mention_head="wojny" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_28" span="word_102..word_103" mmax_level="mention" mention_head="domu" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_29" span="word_106" mmax_level="mention" mention_head="Żydów" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_30" span="word_112" mmax_level="mention" mention_head="domu" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_31" span="word_111..word_112" mmax_level="mention" mention_head="piwnic" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_114..word_115" mmax_level="mention" mention_head="tunel" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_118" mmax_level="mention" mention_head="czemu" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_34" span="word_120" mmax_level="mention" mention_head="wodę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_126" mmax_level="mention" mention_head="AK" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_36" span="word_127" mmax_level="mention" mention_head="Kazimierz" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_37" span="word_125..word_127" mmax_level="mention" mention_head="siatki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_132" mmax_level="mention" mention_head="Żydów" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_39" span="word_134" mmax_level="mention" mention_head="Rodzina" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_40" span="word_141..word_142" mmax_level="mention" mention_head="stronach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_149" mmax_level="mention" mention_head="Prawdą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_153" mmax_level="mention" mention_head="to" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_43" span="word_156" mmax_level="mention" mention_head="Żydzi" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_44" span="word_158..word_159" mmax_level="mention" mention_head="przechowanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_164" mmax_level="mention" mention_head="utrzymanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_46" span="word_173" mmax_level="mention" mention_head="tym" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_47" span="word_185" mmax_level="mention" mention_head="tym" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_48" span="word_190" mmax_level="mention" mention_head="pieniądze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_196" mmax_level="mention" mention_head="serca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_50" span="word_195..word_196" mmax_level="mention" mention_head="odruchu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_200" mmax_level="mention" mention_head="Polacy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_203" mmax_level="mention" mention_head="Żydzi" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_53" span="word_205..word_206" mmax_level="mention" mention_head="rachunki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_216..word_217" mmax_level="mention" mention_head="wsi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_221..word_223" mmax_level="mention" mention_head="wsi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_224..word_225" mmax_level="mention" mention_head="gospodarz" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_57" span="word_227" mmax_level="mention" mention_head="Polak" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_58" span="word_232" mmax_level="mention" mention_head="wojny" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_59" span="word_233..word_234" mmax_level="mention" mention_head="Żydówkę" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_60" span="word_237" mmax_level="mention" mention_head="mu" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_61" span="word_241" mmax_level="mention" mention_head="czego" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_62" span="word_249" mmax_level="mention" mention_head="policję" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_255" mmax_level="mention" mention_head="gospodarzem" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_64" span="word_257..word_258" mmax_level="mention" mention_head="kobieta" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_261" mmax_level="mention" mention_head="męża" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_265..word_266" mmax_level="mention" mention_head="sumę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_282" mmax_level="mention" mention_head="gospodarza" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_68" span="word_276..word_282" mmax_level="mention" mention_head="Żydówkę" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_69" span="word_286..word_287" mmax_level="mention" mention_head="straceńca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_285..word_287" mmax_level="mention" mention_head="żonę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_71" span="word_290" mmax_level="mention" mention_head="antysemitką" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/5_words.xml a/src/test/resources/teksty_mmax/teksty_sys/5_words.xml new file mode 100755 index 0000000..4d24a36 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/5_words.xml @@ -0,0 +1,295 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Cena</word> + <word id="word_2" tei_id="morph_1.1.2-seg" lastinpar="true">życia</word> + <word id="word_3" tei_id="morph_2.2.1-seg">Z</word> + <word id="word_4" tei_id="morph_2.2.2-seg">tego</word> + <word id="word_5" tei_id="morph_2.2.3-seg">pogromu</word> + <word id="word_6" tei_id="morph_2.2.4-seg">ocalało</word> + <word id="word_7" tei_id="morph_2.2.5-seg">kilkudziesięciu</word> + <word id="word_8" tei_id="morph_2.2.6-seg">Żydów</word> + <word id="word_9" tei_id="morph_2.2.7-seg">,</word> + <word id="word_10" tei_id="morph_2.2.8-seg">a</word> + <word id="word_11" tei_id="morph_2.2.9-seg">wśród</word> + <word id="word_12" tei_id="morph_2.2.10-seg">nich</word> + <word id="word_13" tei_id="morph_2.2.11-seg">rodzina</word> + <word id="word_14" tei_id="morph_2.2.12-seg">Mosze</word> + <word id="word_15" tei_id="morph_2.2.13-seg">Sonensona</word> + <word id="word_16" tei_id="morph_2.2.14-seg">.</word> + <word id="word_17" tei_id="morph_2.3.1-seg">Przed</word> + <word id="word_18" tei_id="morph_2.3.2-seg">wojną</word> + <word id="word_19" tei_id="morph_2.3.3-seg">była</word> + <word id="word_20" tei_id="morph_2.3.4-seg">to</word> + <word id="word_21" tei_id="morph_2.3.5-seg">w</word> + <word id="word_22" tei_id="morph_2.3.6-seg">skali</word> + <word id="word_23" tei_id="morph_2.3.7-seg">miasteczka</word> + <word id="word_24" tei_id="morph_2.3.8-seg">rodzina</word> + <word id="word_25" tei_id="morph_2.3.9-seg">bogata</word> + <word id="word_26" tei_id="morph_2.3.10-seg">.</word> + <word id="word_27" tei_id="morph_2.4.1-seg">Sonensonowie</word> + <word id="word_28" tei_id="morph_2.4.2-seg">mieli</word> + <word id="word_29" tei_id="morph_2.4.3-seg">garbarnię</word> + <word id="word_30" tei_id="morph_2.4.4-seg">.</word> + <word id="word_31" tei_id="morph_2.5.1-seg">Nie</word> + <word id="word_32" tei_id="morph_2.5.2-seg">udało</word> + <word id="word_33" tei_id="morph_2.5.3-seg">mi</word> + <word id="word_34" tei_id="morph_2.5.4-seg">się</word> + <word id="word_35" tei_id="morph_2.5.5-seg">dociec</word> + <word id="word_36" tei_id="morph_2.5.6-seg">,</word> + <word id="word_37" tei_id="morph_2.5.7-seg">u</word> + <word id="word_38" tei_id="morph_2.5.8-seg">kogo</word> + <word id="word_39" tei_id="morph_2.5.9-seg">mianowicie</word> + <word id="word_40" tei_id="morph_2.5.10-seg">przechowywali</word> + <word id="word_41" tei_id="morph_2.5.11-seg">się</word> + <word id="word_42" tei_id="morph_2.5.12-seg">Sonensonowie</word> + <word id="word_43" tei_id="morph_2.5.13-seg">oraz</word> + <word id="word_44" tei_id="morph_2.5.14-seg">pozostali</word> + <word id="word_45" tei_id="morph_2.5.15-seg">Żydzi</word> + <word id="word_46" tei_id="morph_2.5.16-seg">w</word> + <word id="word_47" tei_id="morph_2.5.17-seg">czasie</word> + <word id="word_48" tei_id="morph_2.5.18-seg">okupacji</word> + <word id="word_49" tei_id="morph_2.5.19-seg">niemieckiej</word> + <word id="word_50" tei_id="morph_2.5.20-seg">.</word> + <word id="word_51" tei_id="morph_2.6.1-seg">Faktem</word> + <word id="word_52" tei_id="morph_2.6.2-seg">pozostaje</word> + <word id="word_53" tei_id="morph_2.6.3-seg">natomiast</word> + <word id="word_54" tei_id="morph_2.6.4-seg">,</word> + <word id="word_55" tei_id="morph_2.6.5-seg">że</word> + <word id="word_56" tei_id="morph_2.6.6-seg">okupację</word> + <word id="word_57" tei_id="morph_2.6.7-seg">tę</word> + <word id="word_58" tei_id="morph_2.6.8-seg">przeżyli</word> + <word id="word_59" tei_id="morph_2.6.9-seg">.</word> + <word id="word_60" tei_id="morph_2.7.1-seg">Faktem</word> + <word id="word_61" tei_id="morph_2.7.2-seg">oczywistym</word> + <word id="word_62" tei_id="morph_2.7.3-seg">pozostaje</word> + <word id="word_63" tei_id="morph_2.7.4-seg">i</word> + <word id="word_64" tei_id="morph_2.7.5-seg">to</word> + <word id="word_65" tei_id="morph_2.7.6-seg">,</word> + <word id="word_66" tei_id="morph_2.7.7-seg">że</word> + <word id="word_67" tei_id="morph_2.7.8-seg">liczne</word> + <word id="word_68" tei_id="morph_2.7.9-seg">rodziny</word> + <word id="word_69" tei_id="morph_2.7.10-seg">polskie</word> + <word id="word_70" tei_id="morph_2.7.11-seg">-</word> + <word id="word_71" tei_id="morph_2.7.12-seg">w</word> + <word id="word_72" tei_id="morph_2.7.13-seg">Ejszyszkach</word> + <word id="word_73" tei_id="morph_2.7.14-seg">i</word> + <word id="word_74" tei_id="morph_2.7.15-seg">w</word> + <word id="word_75" tei_id="morph_2.7.16-seg">pobliskich</word> + <word id="word_76" tei_id="morph_2.7.17-seg">okolicach</word> + <word id="word_77" tei_id="morph_2.7.18-seg">-</word> + <word id="word_78" tei_id="morph_2.7.19-seg">przechowywały</word> + <word id="word_79" tei_id="morph_2.7.20-seg">Żydów</word> + <word id="word_80" tei_id="morph_2.7.21-seg">.</word> + <word id="word_81" tei_id="morph_2.8.1-seg">Parę</word> + <word id="word_82" tei_id="morph_2.8.2-seg">kilometrów</word> + <word id="word_83" tei_id="morph_2.8.3-seg">od</word> + <word id="word_84" tei_id="morph_2.8.4-seg">Ejszyszek</word> + <word id="word_85" tei_id="morph_2.8.5-seg">,</word> + <word id="word_86" tei_id="morph_2.8.6-seg">w</word> + <word id="word_87" tei_id="morph_2.8.7-seg">Korkucianach</word> + <word id="word_88" tei_id="morph_2.8.8-seg">(</word> + <word id="word_89" tei_id="morph_2.8.9-seg">w</word> + <word id="word_90" tei_id="morph_2.8.10-seg">folwarku</word> + <word id="word_91" tei_id="morph_2.8.11-seg">Lebiedniki</word> + <word id="word_92" tei_id="morph_2.8.12-seg">)</word> + <word id="word_93" tei_id="morph_2.8.13-seg">,</word> + <word id="word_94" tei_id="morph_2.8.14-seg">żołnierz</word> + <word id="word_95" tei_id="morph_2.8.15-seg">AK</word> + <word id="word_96" tei_id="morph_2.8.16-seg">Kazimierz</word> + <word id="word_97" tei_id="morph_2.8.17-seg">Korkuć</word> + <word id="word_98" tei_id="morph_2.8.18-seg">w</word> + <word id="word_99" tei_id="morph_2.8.19-seg">czasie</word> + <word id="word_100" tei_id="morph_2.8.20-seg">wojny</word> + <word id="word_101" tei_id="morph_2.8.21-seg">w</word> + <word id="word_102" tei_id="morph_2.8.22-seg">swoim</word> + <word id="word_103" tei_id="morph_2.8.23-seg">domu</word> + <word id="word_104" tei_id="morph_2.8.24-seg">przechowywał</word> + <word id="word_105" tei_id="morph_2.8.25-seg">28</word> + <word id="word_106" tei_id="morph_2.8.26-seg">Żydów</word> + <word id="word_107" tei_id="morph_2.8.27-seg">.</word> + <word id="word_108" tei_id="morph_2.9.1-seg">Od</word> + <word id="word_109" tei_id="morph_2.9.2-seg">studni</word> + <word id="word_110" tei_id="morph_2.9.3-seg">do</word> + <word id="word_111" tei_id="morph_2.9.4-seg">piwnic</word> + <word id="word_112" tei_id="morph_2.9.5-seg">domu</word> + <word id="word_113" tei_id="morph_2.9.6-seg">był</word> + <word id="word_114" tei_id="morph_2.9.7-seg">przekopany</word> + <word id="word_115" tei_id="morph_2.9.8-seg">tunel</word> + <word id="word_116" tei_id="morph_2.9.9-seg">,</word> + <word id="word_117" tei_id="morph_2.9.10-seg">dzięki</word> + <word id="word_118" tei_id="morph_2.9.11-seg">czemu</word> + <word id="word_119" tei_id="morph_2.9.12-seg">mieli</word> + <word id="word_120" tei_id="morph_2.9.13-seg">wodę</word> + <word id="word_121" tei_id="morph_2.9.14-seg">.</word> + <word id="word_122" tei_id="morph_2.10.1-seg">Natomiast</word> + <word id="word_123" tei_id="morph_2.10.2-seg">w</word> + <word id="word_124" tei_id="morph_2.10.3-seg">skali</word> + <word id="word_125" tei_id="morph_2.10.4-seg">siatki</word> + <word id="word_126" tei_id="morph_2.10.5-seg">AK</word> + <word id="word_127" tei_id="morph_2.10.6-seg">Kazimierz</word> + <word id="word_128" tei_id="morph_2.10.7-seg">Korkuć</word> + <word id="word_129" tei_id="morph_2.10.8-seg">przechowywał</word> + <word id="word_130" tei_id="morph_2.10.9-seg">około</word> + <word id="word_131" tei_id="morph_2.10.10-seg">70</word> + <word id="word_132" tei_id="morph_2.10.11-seg">Żydów</word> + <word id="word_133" tei_id="morph_2.10.12-seg">.</word> + <word id="word_134" tei_id="morph_2.11.1-seg">Rodzina</word> + <word id="word_135" tei_id="morph_2.11.2-seg">Świeczków</word> + <word id="word_136" tei_id="morph_2.11.3-seg">również</word> + <word id="word_137" tei_id="morph_2.11.4-seg">przechowywała</word> + <word id="word_138" tei_id="morph_2.11.5-seg">Żydów</word> + <word id="word_139" tei_id="morph_2.11.6-seg">.</word> + <word id="word_140" tei_id="morph_2.12.1-seg">W</word> + <word id="word_141" tei_id="morph_2.12.2-seg">tamtych</word> + <word id="word_142" tei_id="morph_2.12.3-seg">stronach</word> + <word id="word_143" tei_id="morph_2.12.4-seg">liczne</word> + <word id="word_144" tei_id="morph_2.12.5-seg">rodziny</word> + <word id="word_145" tei_id="morph_2.12.6-seg">polskie</word> + <word id="word_146" tei_id="morph_2.12.7-seg">postępowały</word> + <word id="word_147" tei_id="morph_2.12.8-seg">podobnie</word> + <word id="word_148" tei_id="morph_2.12.9-seg" lastinpar="true">.</word> + <word id="word_149" tei_id="morph_3.13.1-seg">Prawdą</word> + <word id="word_150" tei_id="morph_3.13.2-seg">jest</word> + <word id="word_151" tei_id="morph_3.13.3-seg">również</word> + <word id="word_152" tei_id="morph_3.13.4-seg">i</word> + <word id="word_153" tei_id="morph_3.13.5-seg">to</word> + <word id="word_154" tei_id="morph_3.13.6-seg">,</word> + <word id="word_155" tei_id="morph_3.13.7-seg">że</word> + <word id="word_156" tei_id="morph_3.13.8-seg">Żydzi</word> + <word id="word_157" tei_id="morph_3.13.9-seg">za</word> + <word id="word_158" tei_id="morph_3.13.10-seg">swe</word> + <word id="word_159" tei_id="morph_3.13.11-seg">przechowanie</word> + <word id="word_160" tei_id="morph_3.13.12-seg">płacili</word> + <word id="word_161" tei_id="morph_3.13.13-seg">.</word> + <word id="word_162" tei_id="morph_3.14.1-seg">Płacili</word> + <word id="word_163" tei_id="morph_3.14.2-seg">za</word> + <word id="word_164" tei_id="morph_3.14.3-seg">utrzymanie</word> + <word id="word_165" tei_id="morph_3.14.4-seg">i</word> + <word id="word_166" tei_id="morph_3.14.5-seg">chyba</word> + <word id="word_167" tei_id="morph_3.14.6-seg">jeszcze</word> + <word id="word_168" tei_id="morph_3.14.7-seg">-</word> + <word id="word_169" tei_id="morph_3.14.8-seg">za</word> + <word id="word_170" tei_id="morph_3.14.9-seg">ryzyko</word> + <word id="word_171" tei_id="morph_3.14.10-seg">.</word> + <word id="word_172" tei_id="morph_3.15.1-seg">O</word> + <word id="word_173" tei_id="morph_3.15.2-seg">tym</word> + <word id="word_174" tei_id="morph_3.15.3-seg">dzisiaj</word> + <word id="word_175" tei_id="morph_3.15.4-seg">raczej</word> + <word id="word_176" tei_id="morph_3.15.5-seg">tu</word> + <word id="word_177" tei_id="morph_3.15.6-seg">się</word> + <word id="word_178" tei_id="morph_3.15.7-seg">nie</word> + <word id="word_179" tei_id="morph_3.15.8-seg">mówi</word> + <word id="word_180" tei_id="morph_3.15.9-seg">,</word> + <word id="word_181" tei_id="morph_3.15.10-seg">ale</word> + <word id="word_182" tei_id="morph_3.15.11-seg">prawdopodobnie</word> + <word id="word_183" tei_id="morph_3.15.12-seg">różnie</word> + <word id="word_184" tei_id="morph_3.15.13-seg">z</word> + <word id="word_185" tei_id="morph_3.15.14-seg">tym</word> + <word id="word_186" tei_id="morph_3.15.15-seg">było</word> + <word id="word_187" tei_id="morph_3.15.16-seg">:</word> + <word id="word_188" tei_id="morph_3.15.17-seg">jedni</word> + <word id="word_189" tei_id="morph_3.15.18-seg">za</word> + <word id="word_190" tei_id="morph_3.15.19-seg">pieniądze</word> + <word id="word_191" tei_id="morph_3.15.20-seg">,</word> + <word id="word_192" tei_id="morph_3.15.21-seg">inni</word> + <word id="word_193" tei_id="morph_3.15.22-seg">-</word> + <word id="word_194" tei_id="morph_3.15.23-seg">z</word> + <word id="word_195" tei_id="morph_3.15.24-seg">odruchu</word> + <word id="word_196" tei_id="morph_3.15.25-seg">serca</word> + <word id="word_197" tei_id="morph_3.15.26-seg">.</word> + <word id="word_198" tei_id="morph_3.16.1-seg">Ryzykowali</word> + <word id="word_199" tei_id="morph_3.16.2-seg">i</word> + <word id="word_200" tei_id="morph_3.16.3-seg">Polacy</word> + <word id="word_201" tei_id="morph_3.16.4-seg">,</word> + <word id="word_202" tei_id="morph_3.16.5-seg">i</word> + <word id="word_203" tei_id="morph_3.16.6-seg">Żydzi</word> + <word id="word_204" tei_id="morph_3.16.7-seg">.</word> + <word id="word_205" tei_id="morph_3.17.1-seg">Te</word> + <word id="word_206" tei_id="morph_3.17.2-seg">rachunki</word> + <word id="word_207" tei_id="morph_3.17.3-seg">mogły</word> + <word id="word_208" tei_id="morph_3.17.4-seg">wyglądać</word> + <word id="word_209" tei_id="morph_3.17.5-seg">bardzo</word> + <word id="word_210" tei_id="morph_3.17.6-seg">różnie</word> + <word id="word_211" tei_id="morph_3.17.7-seg" lastinpar="true">.</word> + <word id="word_212" tei_id="morph_4.18.1-seg">Mieszkam</word> + <word id="word_213" tei_id="morph_4.18.2-seg">w</word> + <word id="word_214" tei_id="morph_4.18.3-seg">jednej</word> + <word id="word_215" tei_id="morph_4.18.4-seg">z</word> + <word id="word_216" tei_id="morph_4.18.5-seg">podwileńskich</word> + <word id="word_217" tei_id="morph_4.18.6-seg">wsi</word> + <word id="word_218" tei_id="morph_4.18.7-seg">.</word> + <word id="word_219" tei_id="morph_4.19.1-seg">Otóż</word> + <word id="word_220" tei_id="morph_4.19.2-seg">w</word> + <word id="word_221" tei_id="morph_4.19.3-seg">tej</word> + <word id="word_222" tei_id="morph_4.19.4-seg">mojej</word> + <word id="word_223" tei_id="morph_4.19.5-seg">wsi</word> + <word id="word_224" tei_id="morph_4.19.6-seg">pewien</word> + <word id="word_225" tei_id="morph_4.19.7-seg">gospodarz</word> + <word id="word_226" tei_id="morph_4.19.8-seg">-</word> + <word id="word_227" tei_id="morph_4.19.9-seg">Polak</word> + <word id="word_228" tei_id="morph_4.19.10-seg">-</word> + <word id="word_229" tei_id="morph_4.19.11-seg">przechowywał</word> + <word id="word_230" tei_id="morph_4.19.12-seg">w</word> + <word id="word_231" tei_id="morph_4.19.13-seg">czasie</word> + <word id="word_232" tei_id="morph_4.19.14-seg">wojny</word> + <word id="word_233" tei_id="morph_4.19.15-seg">młodą</word> + <word id="word_234" tei_id="morph_4.19.16-seg">Żydówkę</word> + <word id="word_235" tei_id="morph_4.19.17-seg">.</word> + <word id="word_236" tei_id="morph_4.20.1-seg">Spodobała</word> + <word id="word_237" tei_id="morph_4.20.2-seg">mu</word> + <word id="word_238" tei_id="morph_4.20.3-seg">się</word> + <word id="word_239" tei_id="morph_4.20.4-seg">,</word> + <word id="word_240" tei_id="morph_4.20.5-seg">z</word> + <word id="word_241" tei_id="morph_4.20.6-seg">czego</word> + <word id="word_242" tei_id="morph_4.20.7-seg">wynikł</word> + <word id="word_243" tei_id="morph_4.20.8-seg">dramat</word> + <word id="word_244" tei_id="morph_4.20.9-seg">.</word> + <word id="word_245" tei_id="morph_4.21.1-seg">Zdenerwowana</word> + <word id="word_246" tei_id="morph_4.21.2-seg">żona</word> + <word id="word_247" tei_id="morph_4.21.3-seg">doniosła</word> + <word id="word_248" tei_id="morph_4.21.4-seg">na</word> + <word id="word_249" tei_id="morph_4.21.5-seg">policję</word> + <word id="word_250" tei_id="morph_4.21.6-seg">.</word> + <word id="word_251" tei_id="morph_4.22.1-seg">Aresztowano</word> + <word id="word_252" tei_id="morph_4.22.2-seg">Żydówkę</word> + <word id="word_253" tei_id="morph_4.22.3-seg">razem</word> + <word id="word_254" tei_id="morph_4.22.4-seg">z</word> + <word id="word_255" tei_id="morph_4.22.5-seg">gospodarzem</word> + <word id="word_256" tei_id="morph_4.22.6-seg">,</word> + <word id="word_257" tei_id="morph_4.22.7-seg">przerażona</word> + <word id="word_258" tei_id="morph_4.22.8-seg">kobieta</word> + <word id="word_259" tei_id="morph_4.22.9-seg">próbowała</word> + <word id="word_260" tei_id="morph_4.22.10-seg">ocalić</word> + <word id="word_261" tei_id="morph_4.22.11-seg">męża</word> + <word id="word_262" tei_id="morph_4.22.12-seg">.</word> + <word id="word_263" tei_id="morph_4.23.1-seg">Zanim</word> + <word id="word_264" tei_id="morph_4.23.2-seg">uzbierała</word> + <word id="word_265" tei_id="morph_4.23.3-seg">potrzebną</word> + <word id="word_266" tei_id="morph_4.23.4-seg">sumę</word> + <word id="word_267" tei_id="morph_4.23.5-seg">na</word> + <word id="word_268" tei_id="morph_4.23.6-seg">łapówkę</word> + <word id="word_269" tei_id="morph_4.23.7-seg">,</word> + <word id="word_270" tei_id="morph_4.23.8-seg">było</word> + <word id="word_271" tei_id="morph_4.23.9-seg">już</word> + <word id="word_272" tei_id="morph_4.23.10-seg">za</word> + <word id="word_273" tei_id="morph_4.23.11-seg">późno</word> + <word id="word_274" tei_id="morph_4.23.12-seg">-</word> + <word id="word_275" tei_id="morph_4.23.13-seg">rozstrzelano</word> + <word id="word_276" tei_id="morph_4.23.14-seg">nie</word> + <word id="word_277" tei_id="morph_4.23.15-seg">tylko</word> + <word id="word_278" tei_id="morph_4.23.16-seg">Żydówkę</word> + <word id="word_279" tei_id="morph_4.23.17-seg">,</word> + <word id="word_280" tei_id="morph_4.23.18-seg">ale</word> + <word id="word_281" tei_id="morph_4.23.19-seg">i</word> + <word id="word_282" tei_id="morph_4.23.20-seg">gospodarza</word> + <word id="word_283" tei_id="morph_4.23.21-seg">.</word> + <word id="word_284" tei_id="morph_4.24.1-seg">Czy</word> + <word id="word_285" tei_id="morph_4.24.2-seg">żonę</word> + <word id="word_286" tei_id="morph_4.24.3-seg">tego</word> + <word id="word_287" tei_id="morph_4.24.4-seg">straceńca</word> + <word id="word_288" tei_id="morph_4.24.5-seg">można</word> + <word id="word_289" tei_id="morph_4.24.6-seg">nazwać</word> + <word id="word_290" tei_id="morph_4.24.7-seg">antysemitką</word> + <word id="word_291" tei_id="morph_4.24.8-seg" lastinpar="true">?</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/8.mmax a/src/test/resources/teksty_mmax/teksty_sys/8.mmax new file mode 100755 index 0000000..849f740 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/8.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>8_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/8_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/8_mentions.xml new file mode 100755 index 0000000..1fd4dee --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/8_mentions.xml @@ -0,0 +1,103 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_4" mmax_level="mention" mention_head="owoców" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_2" span="word_3..word_4" mmax_level="mention" mention_head="g" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_3" span="word_6" mmax_level="mention" mention_head="Kuc" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_28" mmax_level="mention" mention_head="lekarzy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_31..word_32" mmax_level="mention" mention_head="dieta" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_36..word_38" mmax_level="mention" mention_head="menu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_45" mmax_level="mention" mention_head="kurczaka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_8" span="word_44..word_45" mmax_level="mention" mention_head="pierś" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_9" span="word_48" mmax_level="mention" mention_head="folii" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_10" span="word_50" mmax_level="mention" mention_head="sałatka" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_11" span="word_58" mmax_level="mention" mention_head="oliwą" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_55..word_58" mmax_level="mention" mention_head="octem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_60..word_61" mmax_level="mention" mention_head="jabłko" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_14" span="word_63" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_15" span="word_66..word_67" mmax_level="mention" mention_head="jajka" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_16" span="word_69" mmax_level="mention" mention_head="pomidor" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_17" span="word_72" mmax_level="mention" mention_head="serka" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_18" span="word_71..word_72" mmax_level="mention" mention_head="trójkąt" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_19" span="word_75" mmax_level="mention" mention_head="gruszka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_77" mmax_level="mention" mention_head="dzień" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_21" span="word_79" mmax_level="mention" mention_head="obiad" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_22" span="word_81" mmax_level="mention" mention_head="królik" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_23" span="word_83" mmax_level="mention" mention_head="potrawce" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_85" mmax_level="mention" mention_head="surówka" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_25" span="word_89" mmax_level="mention" mention_head="chrzanu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_26" span="word_87..word_89" mmax_level="mention" mention_head="marchewki" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_27" span="word_91" mmax_level="mention" mention_head="brzoskwinia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_28" span="word_93" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_29" span="word_95" mmax_level="mention" mention_head="befsztyk" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_97" mmax_level="mention" mention_head="polędwicy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_100" mmax_level="mention" mention_head="tłuszczu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_32" span="word_105" mmax_level="mention" mention_head="sosem" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_33" span="word_109" mmax_level="mention" mention_head="mandarynki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_34" span="word_114" mmax_level="mention" mention_head="kura" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_116" mmax_level="mention" mention_head="rosołu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_118..word_119" mmax_level="mention" mention_head="kapusta" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_37" span="word_121" mmax_level="mention" mention_head="zasmażki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_123..word_124" mmax_level="mention" mention_head="grejpfrut" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_126" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_40" span="word_128" mmax_level="mention" mention_head="ryba" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_41" span="word_130" mmax_level="mention" mention_head="warzywach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_42" span="word_132" mmax_level="mention" mention_head="surówka" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_43" span="word_134" mmax_level="mention" mention_head="buraczków" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_44" span="word_136" mmax_level="mention" mention_head="pomarańcza" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_138" mmax_level="mention" mention_head="dzień" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_46" span="word_141..word_142" mmax_level="mention" mention_head="wieprzowina" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_144" mmax_level="mention" mention_head="brokuły" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_147" mmax_level="mention" mention_head="parze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_49" span="word_151" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_50" span="word_154..word_155" mmax_level="mention" mention_head="ryżu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_51" span="word_153..word_155" mmax_level="mention" mention_head="szklanka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_157..word_158" mmax_level="mention" mention_head="mlekiem" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_161..word_163" mmax_level="mention" mention_head="sera" mention_group="set_15" near_identity="empty"></markable> + <markable id="markable_54" span="word_160..word_163" mmax_level="mention" mention_head="plaster" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_166" mmax_level="mention" mention_head="winogron" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_168" mmax_level="mention" mention_head="dzień" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_57" span="word_170" mmax_level="mention" mention_head="obiad" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_58" span="word_173" mmax_level="mention" mention_head="mięsa" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_172..word_173" mmax_level="mention" mention_head="sztuka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_175..word_176" mmax_level="mention" mention_head="buraczki" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_61" span="word_178..word_180" mmax_level="mention" mention_head="śliwek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_62" span="word_182" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_63" span="word_185" mmax_level="mention" mention_head="jajka" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_64" span="word_190" mmax_level="mention" mention_head="brie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_189..word_190" mmax_level="mention" mention_head="serka" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_66" span="word_192" mmax_level="mention" mention_head="banan" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_194" mmax_level="mention" mention_head="dzień" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_68" span="word_196" mmax_level="mention" mention_head="obiad" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_69" span="word_198" mmax_level="mention" mention_head="ryba" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_70" span="word_201" mmax_level="mention" mention_head="folii" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_71" span="word_203" mmax_level="mention" mention_head="surówka" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_72" span="word_208" mmax_level="mention" mention_head="marchewki" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_73" span="word_205..word_208" mmax_level="mention" mention_head="kapusty" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_74" span="word_212..word_213" mmax_level="mention" mention_head="sosie" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_75" span="word_215" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_76" span="word_217" mmax_level="mention" mention_head="pieczeń" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_77" span="word_219" mmax_level="mention" mention_head="królika" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_78" span="word_221" mmax_level="mention" mention_head="sałatka" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_79" span="word_225" mmax_level="mention" mention_head="papryki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_80" span="word_223..word_225" mmax_level="mention" mention_head="pomidorów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_228" mmax_level="mention" mention_head="serka" mention_group="set_10" near_identity="empty"></markable> + <markable id="markable_82" span="word_227..word_228" mmax_level="mention" mention_head="trójkąt" mention_group="set_11" near_identity="empty"></markable> + <markable id="markable_83" span="word_231" mmax_level="mention" mention_head="kiwi" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_233" mmax_level="mention" mention_head="dzień" mention_group="set_12" near_identity="empty"></markable> + <markable id="markable_85" span="word_235" mmax_level="mention" mention_head="obiad" mention_group="set_13" near_identity="empty"></markable> + <markable id="markable_86" span="word_237" mmax_level="mention" mention_head="cielęcina" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_87" span="word_241" mmax_level="mention" mention_head="ziół" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_88" span="word_243..word_244" mmax_level="mention" mention_head="fasolka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_89" span="word_247" mmax_level="mention" mention_head="masła" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_90" span="word_246..word_247" mmax_level="mention" mention_head="odrobiną" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_91" span="word_249" mmax_level="mention" mention_head="jogurt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_92" span="word_251" mmax_level="mention" mention_head="kolacja" mention_group="set_14" near_identity="empty"></markable> + <markable id="markable_93" span="word_253" mmax_level="mention" mention_head="zapiekanka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_94" span="word_255" mmax_level="mention" mention_head="ziemniaków" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_95" span="word_257" mmax_level="mention" mention_head="odrobiny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_96" span="word_262" mmax_level="mention" mention_head="szynki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_97" span="word_259..word_262" mmax_level="mention" mention_head="sera" mention_group="set_15" near_identity="empty"></markable> + <markable id="markable_98" span="word_264..word_265" mmax_level="mention" mention_head="sałata" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_99" span="word_267" mmax_level="mention" mention_head="rzodkiewkami" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/8_words.xml a/src/test/resources/teksty_mmax/teksty_sys/8_words.xml new file mode 100755 index 0000000..db237a8 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/8_words.xml @@ -0,0 +1,273 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">-</word> + <word id="word_2" tei_id="morph_1.1.2-seg">150</word> + <word id="word_3" tei_id="morph_1.1.3-seg">g</word> + <word id="word_4" tei_id="morph_1.1.4-seg" lastinpar="true">owoców</word> + <word id="word_5" tei_id="morph_2.2.1-seg">Joanna</word> + <word id="word_6" tei_id="morph_2.2.2-seg" lastinpar="true">Kuc</word> + <word id="word_7" tei_id="morph_3.3.1-seg">(</word> + <word id="word_8" tei_id="morph_3.3.2-seg">PAI</word> + <word id="word_9" tei_id="morph_3.3.3-seg" lastinpar="true">)</word> + <word id="word_10" tei_id="morph_4.4.1-seg">Diety</word> + <word id="word_11" tei_id="morph_4.4.2-seg">są</word> + <word id="word_12" tei_id="morph_4.4.3-seg">różne</word> + <word id="word_13" tei_id="morph_4.4.4-seg">,</word> + <word id="word_14" tei_id="morph_4.4.5-seg">ścisłe</word> + <word id="word_15" tei_id="morph_4.4.6-seg">i</word> + <word id="word_16" tei_id="morph_4.4.7-seg">urozmaicone</word> + <word id="word_17" tei_id="morph_4.4.8-seg">,</word> + <word id="word_18" tei_id="morph_4.4.9-seg">eliminacyjne</word> + <word id="word_19" tei_id="morph_4.4.10-seg">,</word> + <word id="word_20" tei_id="morph_4.4.11-seg">dziwne</word> + <word id="word_21" tei_id="morph_4.4.12-seg">,</word> + <word id="word_22" tei_id="morph_4.4.13-seg">racjonalne</word> + <word id="word_23" tei_id="morph_4.4.14-seg">i</word> + <word id="word_24" tei_id="morph_4.4.15-seg">wreszcie</word> + <word id="word_25" tei_id="morph_4.4.16-seg">te</word> + <word id="word_26" tei_id="morph_4.4.17-seg">zalecane</word> + <word id="word_27" tei_id="morph_4.4.18-seg">przez</word> + <word id="word_28" tei_id="morph_4.4.19-seg">lekarzy</word> + <word id="word_29" tei_id="morph_4.4.20-seg">.</word> + <word id="word_30" tei_id="morph_4.5.1-seg">Dziś</word> + <word id="word_31" tei_id="morph_4.5.2-seg">dieta</word> + <word id="word_32" tei_id="morph_4.5.3-seg">burgundzka</word> + <word id="word_33" tei_id="morph_4.5.4-seg" lastinpar="true">.</word> + <word id="word_34" tei_id="morph_5.6.1-seg">Jak</word> + <word id="word_35" tei_id="morph_5.6.2-seg">wygląda</word> + <word id="word_36" tei_id="morph_5.6.3-seg">przykładowe</word> + <word id="word_37" tei_id="morph_5.6.4-seg">tygodniowe</word> + <word id="word_38" tei_id="morph_5.6.5-seg">menu</word> + <word id="word_39" tei_id="morph_5.6.6-seg" lastinpar="true">?</word> + <word id="word_40" tei_id="morph_6.7.1-seg">1</word> + <word id="word_41" tei_id="morph_6.7.2-seg" lastinpar="true">dzień</word> + <word id="word_42" tei_id="morph_7.8.1-seg">-obiad</word> + <word id="word_43" tei_id="morph_7.8.2-seg">:</word> + <word id="word_44" tei_id="morph_7.8.3-seg">pierś</word> + <word id="word_45" tei_id="morph_7.8.4-seg">kurczaka</word> + <word id="word_46" tei_id="morph_7.8.5-seg">pieczona</word> + <word id="word_47" tei_id="morph_7.8.6-seg">w</word> + <word id="word_48" tei_id="morph_7.8.7-seg">folii</word> + <word id="word_49" tei_id="morph_7.8.8-seg">,</word> + <word id="word_50" tei_id="morph_7.8.9-seg">sałatka</word> + <word id="word_51" tei_id="morph_7.8.10-seg">z</word> + <word id="word_52" tei_id="morph_7.8.11-seg">czerwonej</word> + <word id="word_53" tei_id="morph_7.8.12-seg">kapusty</word> + <word id="word_54" tei_id="morph_7.8.13-seg">doprawiona</word> + <word id="word_55" tei_id="morph_7.8.14-seg">octem</word> + <word id="word_56" tei_id="morph_7.8.15-seg">winnym</word> + <word id="word_57" tei_id="morph_7.8.16-seg">i</word> + <word id="word_58" tei_id="morph_7.8.17-seg">oliwą</word> + <word id="word_59" tei_id="morph_7.8.18-seg">,</word> + <word id="word_60" tei_id="morph_7.8.19-seg">pieczone</word> + <word id="word_61" tei_id="morph_7.8.20-seg" lastinpar="true">jabłko</word> + <word id="word_62" tei_id="morph_8.9.1-seg">-</word> + <word id="word_63" tei_id="morph_8.9.2-seg">kolacja</word> + <word id="word_64" tei_id="morph_8.9.3-seg">:</word> + <word id="word_65" tei_id="morph_8.9.4-seg">2</word> + <word id="word_66" tei_id="morph_8.9.5-seg">sadzone</word> + <word id="word_67" tei_id="morph_8.9.6-seg">jajka</word> + <word id="word_68" tei_id="morph_8.9.7-seg">,</word> + <word id="word_69" tei_id="morph_8.9.8-seg">pomidor</word> + <word id="word_70" tei_id="morph_8.9.9-seg">,</word> + <word id="word_71" tei_id="morph_8.9.10-seg">trójkąt</word> + <word id="word_72" tei_id="morph_8.9.11-seg">serka</word> + <word id="word_73" tei_id="morph_8.9.12-seg">topionego</word> + <word id="word_74" tei_id="morph_8.9.13-seg">,</word> + <word id="word_75" tei_id="morph_8.9.14-seg" lastinpar="true">gruszka</word> + <word id="word_76" tei_id="morph_9.10.1-seg">2</word> + <word id="word_77" tei_id="morph_9.10.2-seg" lastinpar="true">dzień</word> + <word id="word_78" tei_id="morph_10.11.1-seg">-</word> + <word id="word_79" tei_id="morph_10.11.2-seg">obiad</word> + <word id="word_80" tei_id="morph_10.11.3-seg">:</word> + <word id="word_81" tei_id="morph_10.11.4-seg">królik</word> + <word id="word_82" tei_id="morph_10.11.5-seg">w</word> + <word id="word_83" tei_id="morph_10.11.6-seg">potrawce</word> + <word id="word_84" tei_id="morph_10.11.7-seg">,</word> + <word id="word_85" tei_id="morph_10.11.8-seg">surówka</word> + <word id="word_86" tei_id="morph_10.11.9-seg">z</word> + <word id="word_87" tei_id="morph_10.11.10-seg">marchewki</word> + <word id="word_88" tei_id="morph_10.11.11-seg">i</word> + <word id="word_89" tei_id="morph_10.11.12-seg">chrzanu</word> + <word id="word_90" tei_id="morph_10.11.13-seg">,</word> + <word id="word_91" tei_id="morph_10.11.14-seg" lastinpar="true">brzoskwinia</word> + <word id="word_92" tei_id="morph_11.12.1-seg">-</word> + <word id="word_93" tei_id="morph_11.12.2-seg">kolacja</word> + <word id="word_94" tei_id="morph_11.12.3-seg">:</word> + <word id="word_95" tei_id="morph_11.12.4-seg">befsztyk</word> + <word id="word_96" tei_id="morph_11.12.5-seg">z</word> + <word id="word_97" tei_id="morph_11.12.6-seg">polędwicy</word> + <word id="word_98" tei_id="morph_11.12.7-seg">usmażony</word> + <word id="word_99" tei_id="morph_11.12.8-seg">bez</word> + <word id="word_100" tei_id="morph_11.12.9-seg">tłuszczu</word> + <word id="word_101" tei_id="morph_11.12.10-seg">,</word> + <word id="word_102" tei_id="morph_11.12.11-seg">zielona</word> + <word id="word_103" tei_id="morph_11.12.12-seg">sałata</word> + <word id="word_104" tei_id="morph_11.12.13-seg">z</word> + <word id="word_105" tei_id="morph_11.12.14-seg">sosem</word> + <word id="word_106" tei_id="morph_11.12.15-seg">vinegrette</word> + <word id="word_107" tei_id="morph_11.12.16-seg">,</word> + <word id="word_108" tei_id="morph_11.12.17-seg">2</word> + <word id="word_109" tei_id="morph_11.12.18-seg" lastinpar="true">mandarynki</word> + <word id="word_110" tei_id="morph_12.13.1-seg">3</word> + <word id="word_111" tei_id="morph_12.13.2-seg" lastinpar="true">dzień</word> + <word id="word_112" tei_id="morph_13.14.1-seg">-obiad</word> + <word id="word_113" tei_id="morph_13.14.2-seg">:</word> + <word id="word_114" tei_id="morph_13.14.3-seg">kura</word> + <word id="word_115" tei_id="morph_13.14.4-seg">z</word> + <word id="word_116" tei_id="morph_13.14.5-seg">rosołu</word> + <word id="word_117" tei_id="morph_13.14.6-seg">,</word> + <word id="word_118" tei_id="morph_13.14.7-seg">gotowana</word> + <word id="word_119" tei_id="morph_13.14.8-seg">kapusta</word> + <word id="word_120" tei_id="morph_13.14.9-seg">bez</word> + <word id="word_121" tei_id="morph_13.14.10-seg">zasmażki</word> + <word id="word_122" tei_id="morph_13.14.11-seg">,</word> + <word id="word_123" tei_id="morph_13.14.12-seg">mały</word> + <word id="word_124" tei_id="morph_13.14.13-seg" lastinpar="true">grejpfrut</word> + <word id="word_125" tei_id="morph_14.15.1-seg">-</word> + <word id="word_126" tei_id="morph_14.15.2-seg">kolacja</word> + <word id="word_127" tei_id="morph_14.15.3-seg">:</word> + <word id="word_128" tei_id="morph_14.15.4-seg">ryba</word> + <word id="word_129" tei_id="morph_14.15.5-seg">w</word> + <word id="word_130" tei_id="morph_14.15.6-seg">warzywach</word> + <word id="word_131" tei_id="morph_14.15.7-seg">,</word> + <word id="word_132" tei_id="morph_14.15.8-seg">surówka</word> + <word id="word_133" tei_id="morph_14.15.9-seg">z</word> + <word id="word_134" tei_id="morph_14.15.10-seg">buraczków</word> + <word id="word_135" tei_id="morph_14.15.11-seg">,</word> + <word id="word_136" tei_id="morph_14.15.12-seg" lastinpar="true">pomarańcza</word> + <word id="word_137" tei_id="morph_15.16.1-seg">4</word> + <word id="word_138" tei_id="morph_15.16.2-seg" lastinpar="true">dzień</word> + <word id="word_139" tei_id="morph_16.17.1-seg">-obiad</word> + <word id="word_140" tei_id="morph_16.17.2-seg">:</word> + <word id="word_141" tei_id="morph_16.17.3-seg">pieczona</word> + <word id="word_142" tei_id="morph_16.17.4-seg">wieprzowina</word> + <word id="word_143" tei_id="morph_16.17.5-seg">,</word> + <word id="word_144" tei_id="morph_16.17.6-seg">brokuły</word> + <word id="word_145" tei_id="morph_16.17.7-seg">ugotowane</word> + <word id="word_146" tei_id="morph_16.17.8-seg">na</word> + <word id="word_147" tei_id="morph_16.17.9-seg">parze</word> + <word id="word_148" tei_id="morph_16.17.10-seg">,</word> + <word id="word_149" tei_id="morph_16.17.11-seg" lastinpar="true">jabłko</word> + <word id="word_150" tei_id="morph_17.18.1-seg">-</word> + <word id="word_151" tei_id="morph_17.18.2-seg">kolacja</word> + <word id="word_152" tei_id="morph_17.18.3-seg">:</word> + <word id="word_153" tei_id="morph_17.18.4-seg">szklanka</word> + <word id="word_154" tei_id="morph_17.18.5-seg">ugotowanego</word> + <word id="word_155" tei_id="morph_17.18.6-seg">ryżu</word> + <word id="word_156" tei_id="morph_17.18.7-seg">zalana</word> + <word id="word_157" tei_id="morph_17.18.8-seg">chudym</word> + <word id="word_158" tei_id="morph_17.18.9-seg">mlekiem</word> + <word id="word_159" tei_id="morph_17.18.10-seg">,</word> + <word id="word_160" tei_id="morph_17.18.11-seg">plaster</word> + <word id="word_161" tei_id="morph_17.18.12-seg">białego</word> + <word id="word_162" tei_id="morph_17.18.13-seg">chudego</word> + <word id="word_163" tei_id="morph_17.18.14-seg">sera</word> + <word id="word_164" tei_id="morph_17.18.15-seg">,</word> + <word id="word_165" tei_id="morph_17.18.16-seg">kiść</word> + <word id="word_166" tei_id="morph_17.18.17-seg" lastinpar="true">winogron</word> + <word id="word_167" tei_id="morph_18.19.1-seg">5</word> + <word id="word_168" tei_id="morph_18.19.2-seg" lastinpar="true">dzień</word> + <word id="word_169" tei_id="morph_19.20.1-seg">-</word> + <word id="word_170" tei_id="morph_19.20.2-seg">obiad</word> + <word id="word_171" tei_id="morph_19.20.3-seg">–</word> + <word id="word_172" tei_id="morph_19.20.4-seg">sztuka</word> + <word id="word_173" tei_id="morph_19.20.5-seg">mięsa</word> + <word id="word_174" tei_id="morph_19.20.6-seg">,</word> + <word id="word_175" tei_id="morph_19.20.7-seg">gotowane</word> + <word id="word_176" tei_id="morph_19.20.8-seg">buraczki</word> + <word id="word_177" tei_id="morph_19.20.9-seg">,</word> + <word id="word_178" tei_id="morph_19.20.10-seg">kilka</word> + <word id="word_179" tei_id="morph_19.20.11-seg">suszonych</word> + <word id="word_180" tei_id="morph_19.20.12-seg" lastinpar="true">śliwek</word> + <word id="word_181" tei_id="morph_20.21.1-seg">-</word> + <word id="word_182" tei_id="morph_20.21.2-seg">kolacja</word> + <word id="word_183" tei_id="morph_20.21.3-seg">-</word> + <word id="word_184" tei_id="morph_20.21.4-seg">2</word> + <word id="word_185" tei_id="morph_20.21.5-seg">jajka</word> + <word id="word_186" tei_id="morph_20.21.6-seg">na</word> + <word id="word_187" tei_id="morph_20.21.7-seg">miękko</word> + <word id="word_188" tei_id="morph_20.21.8-seg">,</word> + <word id="word_189" tei_id="morph_20.21.9-seg">serka</word> + <word id="word_190" tei_id="morph_20.21.10-seg">brie</word> + <word id="word_191" tei_id="morph_20.21.11-seg">,</word> + <word id="word_192" tei_id="morph_20.21.12-seg" lastinpar="true">banan</word> + <word id="word_193" tei_id="morph_21.22.1-seg">6</word> + <word id="word_194" tei_id="morph_21.22.2-seg" lastinpar="true">dzień</word> + <word id="word_195" tei_id="morph_22.23.1-seg">-</word> + <word id="word_196" tei_id="morph_22.23.2-seg">obiad</word> + <word id="word_197" tei_id="morph_22.23.3-seg">:</word> + <word id="word_198" tei_id="morph_22.23.4-seg">ryba</word> + <word id="word_199" tei_id="morph_22.23.5-seg">pieczona</word> + <word id="word_200" tei_id="morph_22.23.6-seg">w</word> + <word id="word_201" tei_id="morph_22.23.7-seg">folii</word> + <word id="word_202" tei_id="morph_22.23.8-seg">,</word> + <word id="word_203" tei_id="morph_22.23.9-seg">surówka</word> + <word id="word_204" tei_id="morph_22.23.10-seg">z</word> + <word id="word_205" tei_id="morph_22.23.11-seg">białej</word> + <word id="word_206" tei_id="morph_22.23.12-seg">kapusty</word> + <word id="word_207" tei_id="morph_22.23.13-seg">i</word> + <word id="word_208" tei_id="morph_22.23.14-seg">marchewki</word> + <word id="word_209" tei_id="morph_22.23.15-seg">,</word> + <word id="word_210" tei_id="morph_22.23.16-seg">gruszka</word> + <word id="word_211" tei_id="morph_22.23.17-seg">w</word> + <word id="word_212" tei_id="morph_22.23.18-seg">sosie</word> + <word id="word_213" tei_id="morph_22.23.19-seg" lastinpar="true">waniliowym</word> + <word id="word_214" tei_id="morph_23.24.1-seg">-</word> + <word id="word_215" tei_id="morph_23.24.2-seg">kolacja</word> + <word id="word_216" tei_id="morph_23.24.3-seg">:</word> + <word id="word_217" tei_id="morph_23.24.4-seg">pieczeń</word> + <word id="word_218" tei_id="morph_23.24.5-seg">z</word> + <word id="word_219" tei_id="morph_23.24.6-seg">królika</word> + <word id="word_220" tei_id="morph_23.24.7-seg">,</word> + <word id="word_221" tei_id="morph_23.24.8-seg">sałatka</word> + <word id="word_222" tei_id="morph_23.24.9-seg">z</word> + <word id="word_223" tei_id="morph_23.24.10-seg">pomidorów</word> + <word id="word_224" tei_id="morph_23.24.11-seg">i</word> + <word id="word_225" tei_id="morph_23.24.12-seg">papryki</word> + <word id="word_226" tei_id="morph_23.24.13-seg">,</word> + <word id="word_227" tei_id="morph_23.24.14-seg">trójkąt</word> + <word id="word_228" tei_id="morph_23.24.15-seg">serka</word> + <word id="word_229" tei_id="morph_23.24.16-seg">topionego</word> + <word id="word_230" tei_id="morph_23.24.17-seg">,</word> + <word id="word_231" tei_id="morph_23.24.18-seg" lastinpar="true">kiwi</word> + <word id="word_232" tei_id="morph_24.25.1-seg">7</word> + <word id="word_233" tei_id="morph_24.25.2-seg" lastinpar="true">dzień</word> + <word id="word_234" tei_id="morph_25.26.1-seg">-</word> + <word id="word_235" tei_id="morph_25.26.2-seg">obiad</word> + <word id="word_236" tei_id="morph_25.26.3-seg">:</word> + <word id="word_237" tei_id="morph_25.26.4-seg">cielęcina</word> + <word id="word_238" tei_id="morph_25.26.5-seg">pieczona</word> + <word id="word_239" tei_id="morph_25.26.6-seg">z</word> + <word id="word_240" tei_id="morph_25.26.7-seg">dodatkiem</word> + <word id="word_241" tei_id="morph_25.26.8-seg">ziół</word> + <word id="word_242" tei_id="morph_25.26.9-seg">,</word> + <word id="word_243" tei_id="morph_25.26.10-seg">fasolka</word> + <word id="word_244" tei_id="morph_25.26.11-seg">szparagowa</word> + <word id="word_245" tei_id="morph_25.26.12-seg">z</word> + <word id="word_246" tei_id="morph_25.26.13-seg">odrobiną</word> + <word id="word_247" tei_id="morph_25.26.14-seg">masła</word> + <word id="word_248" tei_id="morph_25.26.15-seg">,</word> + <word id="word_249" tei_id="morph_25.26.16-seg" lastinpar="true">jogurt</word> + <word id="word_250" tei_id="morph_26.27.1-seg">-</word> + <word id="word_251" tei_id="morph_26.27.2-seg">kolacja</word> + <word id="word_252" tei_id="morph_26.27.3-seg">:</word> + <word id="word_253" tei_id="morph_26.27.4-seg">zapiekanka</word> + <word id="word_254" tei_id="morph_26.27.5-seg">z</word> + <word id="word_255" tei_id="morph_26.27.6-seg">ziemniaków</word> + <word id="word_256" tei_id="morph_26.27.7-seg">,</word> + <word id="word_257" tei_id="morph_26.27.8-seg">odrobiny</word> + <word id="word_258" tei_id="morph_26.27.9-seg">startego</word> + <word id="word_259" tei_id="morph_26.27.10-seg">żółtego</word> + <word id="word_260" tei_id="morph_26.27.11-seg">sera</word> + <word id="word_261" tei_id="morph_26.27.12-seg">i</word> + <word id="word_262" tei_id="morph_26.27.13-seg">szynki</word> + <word id="word_263" tei_id="morph_26.27.14-seg">,</word> + <word id="word_264" tei_id="morph_26.27.15-seg">sałata</word> + <word id="word_265" tei_id="morph_26.27.16-seg">zielona</word> + <word id="word_266" tei_id="morph_26.27.17-seg">z</word> + <word id="word_267" tei_id="morph_26.27.18-seg">rzodkiewkami</word> + <word id="word_268" tei_id="morph_26.27.19-seg">,</word> + <word id="word_269" tei_id="morph_26.27.20-seg" lastinpar="true">pomarańcza</word> +</words> diff --git b/src/test/resources/teksty_mmax/teksty_sys/9.mmax a/src/test/resources/teksty_mmax/teksty_sys/9.mmax new file mode 100755 index 0000000..92026d2 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/9.mmax @@ -0,0 +1,4 @@ +<?xml version="1.0" ?> +<mmax_project> + <words>9_words.xml</words> +</mmax_project> diff --git b/src/test/resources/teksty_mmax/teksty_sys/9_mentions.xml a/src/test/resources/teksty_mmax/teksty_sys/9_mentions.xml new file mode 100755 index 0000000..08a7965 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/9_mentions.xml @@ -0,0 +1,89 @@ +<?xml version="1.0" ?> +<!DOCTYPE markables SYSTEM "markables.dtd"> +<markables xmlns="www.eml.org/NameSpaces/mention"> + <markable id="markable_1" span="word_1" mmax_level="mention" mention_head="Ogród" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_2" span="word_6" mmax_level="mention" mention_head="balkonie" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_3" span="word_8..word_9" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_4" span="word_15" mmax_level="mention" mention_head="powierzchni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_5" span="word_16" mmax_level="mention" mention_head="niewiele" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_6" span="word_18..word_19" mmax_level="mention" mention_head="metra" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_7" span="word_28" mmax_level="mention" mention_head="skrzynek" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_8" span="word_30" mmax_level="mention" mention_head="kwiatami" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_9" span="word_32..word_33" mmax_level="mention" mention_head="doniczek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_10" span="word_37..word_38" mmax_level="mention" mention_head="balkonu" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_11" span="word_36..word_38" mmax_level="mention" mention_head="Utrzymanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_12" span="word_43" mmax_level="mention" mention_head="serca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_13" span="word_40..word_43" mmax_level="mention" mention_head="pracy" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_14" span="word_52" mmax_level="mention" mention_head="kwiatach" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_15" span="word_54" mmax_level="mention" mention_head="córka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_16" span="word_57" mmax_level="mention" mention_head="Stańczyk" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_17" span="word_56..word_57" mmax_level="mention" mention_head="Grażyna" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_18" span="word_61..word_62" mmax_level="mention" mention_head="ogródka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_19" span="word_70..word_72" mmax_level="mention" mention_head="chwilę" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_20" span="word_76" mmax_level="mention" mention_head="Nasiona" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_21" span="word_84" mmax_level="mention" mention_head="marca" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_22" span="word_82..word_84" mmax_level="mention" mention_head="lutego" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_23" span="word_89" mmax_level="mention" mention_head="nich" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_24" span="word_90" mmax_level="mention" mention_head="roślinki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_25" span="word_94" mmax_level="mention" mention_head="skrzynek" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_26" span="word_97" mmax_level="mention" mention_head="Skrzynki" mention_group="set_0" near_identity="empty"></markable> + <markable id="markable_27" span="word_100" mmax_level="mention" mention_head="balkon" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_28" span="word_103" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_29" span="word_105" mmax_level="mention" mention_head="" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_30" span="word_103..word_105" mmax_level="mention" mention_head="maju" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_31" span="word_110" mmax_level="mention" mention_head="Stańczyk" mention_group="set_1" near_identity="empty"></markable> + <markable id="markable_32" span="word_116..word_117" mmax_level="mention" mention_head="sadzonek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_33" span="word_119" mmax_level="mention" mention_head="sklepu" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_34" span="word_122" mmax_level="mention" mention_head="nic" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_35" span="word_125..word_126" mmax_level="mention" mention_head="przyjemności" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_36" span="word_129" mmax_level="mention" mention_head="kwiatka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_37" span="word_128..word_129" mmax_level="mention" mention_head="wyhodowanie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_38" span="word_131" mmax_level="mention" mention_head="nasionka" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_39" span="word_134..word_135" mmax_level="mention" mention_head="porze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_40" span="word_136..word_137" mmax_level="mention" mention_head="roku" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_41" span="word_138" mmax_level="mention" mention_head="pracy" mention_group="set_2" near_identity="empty"></markable> + <markable id="markable_42" span="word_142" mmax_level="mention" mention_head="roślin" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_43" span="word_144" mmax_level="mention" mention_head="zasilaniu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_44" span="word_141..word_144" mmax_level="mention" mention_head="podlewaniu" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_45" span="word_145..word_147" mmax_level="mention" mention_head="nawozami" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_46" span="word_151" mmax_level="mention" mention_head="pani" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_47" span="word_152" mmax_level="mention" mention_head="Stanisławy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_48" span="word_150..word_152" mmax_level="mention" mention_head="balkonie" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_49" span="word_155" mmax_level="mention" mention_head="surfinie" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_50" span="word_157" mmax_level="mention" mention_head="petunie" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_51" span="word_159" mmax_level="mention" mention_head="gardenie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_52" span="word_161" mmax_level="mention" mention_head="aksamitki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_53" span="word_163" mmax_level="mention" mention_head="przypołudniki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_54" span="word_165..word_166" mmax_level="mention" mention_head="groszek" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_55" span="word_172" mmax_level="mention" mention_head="werbeny" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_56" span="word_176..word_177" mmax_level="mention" mention_head="kwiatów" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_57" span="word_184..word_185" mmax_level="mention" mention_head="kolorze" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_58" span="word_188..word_189" mmax_level="mention" mention_head="czasie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_59" span="word_191" mmax_level="mention" mention_head="Efekt" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_60" span="word_193" mmax_level="mention" mention_head="Ogród" mention_group="set_3" near_identity="empty"></markable> + <markable id="markable_61" span="word_195" mmax_level="mention" mention_head="balkonie" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_62" span="word_200" mmax_level="mention" mention_head="przechodniów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_63" span="word_198..word_200" mmax_level="mention" mention_head="właścicielki" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_64" span="word_197..word_200" mmax_level="mention" mention_head="oczy" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_65" span="word_203..word_204" mmax_level="mention" mention_head="jesieni" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_66" span="word_207" mmax_level="mention" mention_head="balkonów" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_67" span="word_206..word_207" mmax_level="mention" mention_head="Ozdabianie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_68" span="word_208" mmax_level="mention" mention_head="kwiatami" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_69" span="word_212" mmax_level="mention" mention_head="Brzezinach" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_70" span="word_224..word_225" mmax_level="mention" mention_head="rośliny" mention_group="set_4" near_identity="empty"></markable> + <markable id="markable_71" span="word_229" mmax_level="mention" mention_head="Kolasa" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_72" span="word_228..word_229" mmax_level="mention" mention_head="Bożenna" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_73" span="word_231..word_232" mmax_level="mention" mention_head="sklepu" mention_group="set_5" near_identity="empty"></markable> + <markable id="markable_74" span="word_239..word_240" mmax_level="mention" mention_head="kwiaty" mention_group="set_6" near_identity="empty"></markable> + <markable id="markable_75" span="word_242" mmax_level="mention" mention_head="wystawienia" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_76" span="word_244" mmax_level="mention" mention_head="balkon" mention_group="set_7" near_identity="empty"></markable> + <markable id="markable_77" span="word_248" mmax_level="mention" mention_head="petunie" mention_group="set_8" near_identity="empty"></markable> + <markable id="markable_78" span="word_250" mmax_level="mention" mention_head="surfinie" mention_group="set_9" near_identity="empty"></markable> + <markable id="markable_79" span="word_255..word_258" mmax_level="mention" mention_head="pelargonie" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_80" span="word_264..word_266" mmax_level="mention" mention_head="datura" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_81" span="word_268" mmax_level="mention" mention_head="Klientom" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_82" span="word_275..word_276" mmax_level="mention" mention_head="kwiat" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_83" span="word_280" mmax_level="mention" mention_head="fot" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_84" span="word_283" mmax_level="mention" mention_head="kozieł" mention_group="empty" near_identity="empty"></markable> + <markable id="markable_85" span="word_282..word_283" mmax_level="mention" mention_head="grzegorz" mention_group="empty" near_identity="empty"></markable> +</markables> \ No newline at end of file diff --git b/src/test/resources/teksty_mmax/teksty_sys/9_words.xml a/src/test/resources/teksty_mmax/teksty_sys/9_words.xml new file mode 100755 index 0000000..dd37a43 --- /dev/null +++ a/src/test/resources/teksty_mmax/teksty_sys/9_words.xml @@ -0,0 +1,287 @@ +<?xml version="1.0" ?> +<!DOCTYPE words SYSTEM "words.dtd"> +<words> + <word id="word_1" tei_id="morph_1.1.1-seg">Ogród</word> + <word id="word_2" tei_id="morph_1.1.2-seg">na</word> + <word id="word_3" tei_id="morph_1.1.3-seg">.</word> + <word id="word_4" tei_id="morph_1.1.4-seg">.</word> + <word id="word_5" tei_id="morph_1.1.5-seg">.</word> + <word id="word_6" tei_id="morph_1.1.6-seg">balkonie</word> + <word id="word_7" tei_id="morph_1.1.7-seg">Pani</word> + <word id="word_8" tei_id="morph_1.1.8-seg">Stanisławie</word> + <word id="word_9" tei_id="morph_1.1.9-seg">Budkiewicz</word> + <word id="word_10" tei_id="morph_1.1.10-seg">z</word> + <word id="word_11" tei_id="morph_1.1.11-seg">ul</word> + <word id="word_12" tei_id="morph_1.1.12-seg">.</word> + <word id="word_13" tei_id="morph_1.2.1-seg">Piłsudskiego</word> + <word id="word_14" tei_id="morph_1.2.2-seg">na</word> + <word id="word_15" tei_id="morph_1.2.3-seg">powierzchni</word> + <word id="word_16" tei_id="morph_1.2.4-seg">niewiele</word> + <word id="word_17" tei_id="morph_1.2.5-seg">przekraczającej</word> + <word id="word_18" tei_id="morph_1.2.6-seg">półtora</word> + <word id="word_19" tei_id="morph_1.2.7-seg">metra</word> + <word id="word_20" tei_id="morph_1.2.8-seg">kwadratowego</word> + <word id="word_21" tei_id="morph_1.2.9-seg">udało</word> + <word id="word_22" tei_id="morph_1.2.10-seg">się</word> + <word id="word_23" tei_id="morph_1.2.11-seg">"</word> + <word id="word_24" tei_id="morph_1.2.12-seg">upchnąć</word> + <word id="word_25" tei_id="morph_1.2.13-seg">"</word> + <word id="word_26" tei_id="morph_1.2.14-seg">aż</word> + <word id="word_27" tei_id="morph_1.2.15-seg">15</word> + <word id="word_28" tei_id="morph_1.2.16-seg">skrzynek</word> + <word id="word_29" tei_id="morph_1.2.17-seg">z</word> + <word id="word_30" tei_id="morph_1.2.18-seg">kwiatami</word> + <word id="word_31" tei_id="morph_1.2.19-seg">i</word> + <word id="word_32" tei_id="morph_1.2.20-seg">kilka</word> + <word id="word_33" tei_id="morph_1.2.21-seg">doniczek</word> + <word id="word_34" tei_id="morph_1.2.22-seg">.</word> + <word id="word_35" tei_id="morph_1.2.23-seg">-</word> + <word id="word_36" tei_id="morph_1.2.24-seg">Utrzymanie</word> + <word id="word_37" tei_id="morph_1.2.25-seg">takiego</word> + <word id="word_38" tei_id="morph_1.2.26-seg">balkonu</word> + <word id="word_39" tei_id="morph_1.2.27-seg">wymaga</word> + <word id="word_40" tei_id="morph_1.2.28-seg">wiele</word> + <word id="word_41" tei_id="morph_1.2.29-seg">pracy</word> + <word id="word_42" tei_id="morph_1.2.30-seg">i</word> + <word id="word_43" tei_id="morph_1.2.31-seg">serca</word> + <word id="word_44" tei_id="morph_1.2.32-seg">-</word> + <word id="word_45" tei_id="morph_1.2.33-seg">przyznaje</word> + <word id="word_46" tei_id="morph_1.2.34-seg">S</word> + <word id="word_47" tei_id="morph_1.2.35-seg">.</word> + <word id="word_48" tei_id="morph_1.3.1-seg">Budkiewicz</word> + <word id="word_49" tei_id="morph_1.3.2-seg">,</word> + <word id="word_50" tei_id="morph_1.3.3-seg">której</word> + <word id="word_51" tei_id="morph_1.3.4-seg">przy</word> + <word id="word_52" tei_id="morph_1.3.5-seg">kwiatach</word> + <word id="word_53" tei_id="morph_1.3.6-seg">pomaga</word> + <word id="word_54" tei_id="morph_1.3.7-seg">córka</word> + <word id="word_55" tei_id="morph_1.3.8-seg">-</word> + <word id="word_56" tei_id="morph_1.3.9-seg">Grażyna</word> + <word id="word_57" tei_id="morph_1.3.10-seg" lastinpar="true">Stańczyk</word> + <word id="word_58" tei_id="morph_2.4.1-seg">-</word> + <word id="word_59" tei_id="morph_2.4.2-seg">Nie</word> + <word id="word_60" tei_id="morph_2.4.3-seg">mamy</word> + <word id="word_61" tei_id="morph_2.4.4-seg">własnego</word> + <word id="word_62" tei_id="morph_2.4.5-seg">ogródka</word> + <word id="word_63" tei_id="morph_2.4.6-seg">,</word> + <word id="word_64" tei_id="morph_2.4.7-seg">a</word> + <word id="word_65" tei_id="morph_2.4.8-seg">bardzo</word> + <word id="word_66" tei_id="morph_2.4.9-seg">kochamy</word> + <word id="word_67" tei_id="morph_2.4.10-seg">kwiaty</word> + <word id="word_68" tei_id="morph_2.4.11-seg">.</word> + <word id="word_69" tei_id="morph_2.5.1-seg">Dlatego</word> + <word id="word_70" tei_id="morph_2.5.2-seg">każdą</word> + <word id="word_71" tei_id="morph_2.5.3-seg">wolną</word> + <word id="word_72" tei_id="morph_2.5.4-seg">chwilę</word> + <word id="word_73" tei_id="morph_2.5.5-seg">poświęcamy</word> + <word id="word_74" tei_id="morph_2.5.6-seg">balkonowi</word> + <word id="word_75" tei_id="morph_2.5.7-seg">.</word> + <word id="word_76" tei_id="morph_2.6.1-seg">Nasiona</word> + <word id="word_77" tei_id="morph_2.6.2-seg">wysiewane</word> + <word id="word_78" tei_id="morph_2.6.3-seg">są</word> + <word id="word_79" tei_id="morph_2.6.4-seg">już</word> + <word id="word_80" tei_id="morph_2.6.5-seg">na</word> + <word id="word_81" tei_id="morph_2.6.6-seg">przełomie</word> + <word id="word_82" tei_id="morph_2.6.7-seg">lutego</word> + <word id="word_83" tei_id="morph_2.6.8-seg">i</word> + <word id="word_84" tei_id="morph_2.6.9-seg">marca</word> + <word id="word_85" tei_id="morph_2.6.10-seg">.</word> + <word id="word_86" tei_id="morph_2.7.1-seg">Później</word> + <word id="word_87" tei_id="morph_2.7.2-seg">wyrastające</word> + <word id="word_88" tei_id="morph_2.7.3-seg">z</word> + <word id="word_89" tei_id="morph_2.7.4-seg">nich</word> + <word id="word_90" tei_id="morph_2.7.5-seg">roślinki</word> + <word id="word_91" tei_id="morph_2.7.6-seg">pikuje</word> + <word id="word_92" tei_id="morph_2.7.7-seg">się</word> + <word id="word_93" tei_id="morph_2.7.8-seg">do</word> + <word id="word_94" tei_id="morph_2.7.9-seg">skrzynek</word> + <word id="word_95" tei_id="morph_2.7.10-seg">.</word> + <word id="word_96" tei_id="morph_2.7.11-seg">-</word> + <word id="word_97" tei_id="morph_2.7.12-seg">Skrzynki</word> + <word id="word_98" tei_id="morph_2.7.13-seg">wystawiamy</word> + <word id="word_99" tei_id="morph_2.7.14-seg">na</word> + <word id="word_100" tei_id="morph_2.7.15-seg">balkon</word> + <word id="word_101" tei_id="morph_2.7.16-seg">dopiero</word> + <word id="word_102" tei_id="morph_2.7.17-seg">w</word> + <word id="word_103" tei_id="morph_2.7.18-seg">maju</word> + <word id="word_104" tei_id="morph_2.7.19-seg">-</word> + <word id="word_105" tei_id="morph_2.7.20-seg">czerwcu</word> + <word id="word_106" tei_id="morph_2.7.21-seg">-</word> + <word id="word_107" tei_id="morph_2.7.22-seg">wyjaśnia</word> + <word id="word_108" tei_id="morph_2.7.23-seg">G</word> + <word id="word_109" tei_id="morph_2.7.24-seg">.</word> + <word id="word_110" tei_id="morph_2.8.1-seg">Stańczyk</word> + <word id="word_111" tei_id="morph_2.8.2-seg">.</word> + <word id="word_112" tei_id="morph_2.8.3-seg">-</word> + <word id="word_113" tei_id="morph_2.8.4-seg">Bardzo</word> + <word id="word_114" tei_id="morph_2.8.5-seg">rzadko</word> + <word id="word_115" tei_id="morph_2.8.6-seg">używamy</word> + <word id="word_116" tei_id="morph_2.8.7-seg">gotowych</word> + <word id="word_117" tei_id="morph_2.8.8-seg">sadzonek</word> + <word id="word_118" tei_id="morph_2.8.9-seg">ze</word> + <word id="word_119" tei_id="morph_2.8.10-seg">sklepu</word> + <word id="word_120" tei_id="morph_2.8.11-seg">,</word> + <word id="word_121" tei_id="morph_2.8.12-seg">bo</word> + <word id="word_122" tei_id="morph_2.8.13-seg">nic</word> + <word id="word_123" tei_id="morph_2.8.14-seg">nie</word> + <word id="word_124" tei_id="morph_2.8.15-seg">sprawia</word> + <word id="word_125" tei_id="morph_2.8.16-seg">takiej</word> + <word id="word_126" tei_id="morph_2.8.17-seg">przyjemności</word> + <word id="word_127" tei_id="morph_2.8.18-seg">jak</word> + <word id="word_128" tei_id="morph_2.8.19-seg">wyhodowanie</word> + <word id="word_129" tei_id="morph_2.8.20-seg">kwiatka</word> + <word id="word_130" tei_id="morph_2.8.21-seg">od</word> + <word id="word_131" tei_id="morph_2.8.22-seg">nasionka</word> + <word id="word_132" tei_id="morph_2.8.23-seg">.</word> + <word id="word_133" tei_id="morph_2.9.1-seg">O</word> + <word id="word_134" tei_id="morph_2.9.2-seg">tej</word> + <word id="word_135" tei_id="morph_2.9.3-seg">porze</word> + <word id="word_136" tei_id="morph_2.9.4-seg">roku</word> + <word id="word_137" tei_id="morph_2.9.5-seg">najwięcej</word> + <word id="word_138" tei_id="morph_2.9.6-seg">pracy</word> + <word id="word_139" tei_id="morph_2.9.7-seg">jest</word> + <word id="word_140" tei_id="morph_2.9.8-seg">przy</word> + <word id="word_141" tei_id="morph_2.9.9-seg">podlewaniu</word> + <word id="word_142" tei_id="morph_2.9.10-seg">roślin</word> + <word id="word_143" tei_id="morph_2.9.11-seg">i</word> + <word id="word_144" tei_id="morph_2.9.12-seg">zasilaniu</word> + <word id="word_145" tei_id="morph_2.9.13-seg">ich</word> + <word id="word_146" tei_id="morph_2.9.14-seg">odpowiednimi</word> + <word id="word_147" tei_id="morph_2.9.15-seg">nawozami</word> + <word id="word_148" tei_id="morph_2.9.16-seg">.</word> + <word id="word_149" tei_id="morph_2.10.1-seg">Na</word> + <word id="word_150" tei_id="morph_2.10.2-seg">balkonie</word> + <word id="word_151" tei_id="morph_2.10.3-seg">pani</word> + <word id="word_152" tei_id="morph_2.10.4-seg">Stanisławy</word> + <word id="word_153" tei_id="morph_2.10.5-seg">rosną</word> + <word id="word_154" tei_id="morph_2.10.6-seg">:</word> + <word id="word_155" tei_id="morph_2.10.7-seg">surfinie</word> + <word id="word_156" tei_id="morph_2.10.8-seg">,</word> + <word id="word_157" tei_id="morph_2.10.9-seg">petunie</word> + <word id="word_158" tei_id="morph_2.10.10-seg">,</word> + <word id="word_159" tei_id="morph_2.10.11-seg">gardenie</word> + <word id="word_160" tei_id="morph_2.10.12-seg">,</word> + <word id="word_161" tei_id="morph_2.10.13-seg">aksamitki</word> + <word id="word_162" tei_id="morph_2.10.14-seg">,</word> + <word id="word_163" tei_id="morph_2.10.15-seg">przypołudniki</word> + <word id="word_164" tei_id="morph_2.10.16-seg">,</word> + <word id="word_165" tei_id="morph_2.10.17-seg">groszek</word> + <word id="word_166" tei_id="morph_2.10.18-seg">pachnący</word> + <word id="word_167" tei_id="morph_2.10.19-seg">,</word> + <word id="word_168" tei_id="morph_2.10.20-seg">kabea</word> + <word id="word_169" tei_id="morph_2.10.21-seg">,</word> + <word id="word_170" tei_id="morph_2.10.22-seg">nemezje</word> + <word id="word_171" tei_id="morph_2.10.23-seg">i</word> + <word id="word_172" tei_id="morph_2.10.24-seg">werbeny</word> + <word id="word_173" tei_id="morph_2.10.25-seg">.</word> + <word id="word_174" tei_id="morph_2.11.1-seg">W</word> + <word id="word_175" tei_id="morph_2.11.2-seg">sumie</word> + <word id="word_176" tei_id="morph_2.11.3-seg">kilkadziesiąt</word> + <word id="word_177" tei_id="morph_2.11.4-seg">kwiatów</word> + <word id="word_178" tei_id="morph_2.11.5-seg">,</word> + <word id="word_179" tei_id="morph_2.11.6-seg">z</word> + <word id="word_180" tei_id="morph_2.11.7-seg">których</word> + <word id="word_181" tei_id="morph_2.11.8-seg">każdy</word> + <word id="word_182" tei_id="morph_2.11.9-seg">kwitnie</word> + <word id="word_183" tei_id="morph_2.11.10-seg">w</word> + <word id="word_184" tei_id="morph_2.11.11-seg">innym</word> + <word id="word_185" tei_id="morph_2.11.12-seg">kolorze</word> + <word id="word_186" tei_id="morph_2.11.13-seg">i</word> + <word id="word_187" tei_id="morph_2.11.14-seg">w</word> + <word id="word_188" tei_id="morph_2.11.15-seg">różnym</word> + <word id="word_189" tei_id="morph_2.11.16-seg">czasie</word> + <word id="word_190" tei_id="morph_2.11.17-seg">.</word> + <word id="word_191" tei_id="morph_2.12.1-seg">Efekt</word> + <word id="word_192" tei_id="morph_2.12.2-seg">?</word> + <word id="word_193" tei_id="morph_2.13.1-seg">Ogród</word> + <word id="word_194" tei_id="morph_2.13.2-seg">na</word> + <word id="word_195" tei_id="morph_2.13.3-seg">balkonie</word> + <word id="word_196" tei_id="morph_2.13.4-seg">cieszy</word> + <word id="word_197" tei_id="morph_2.13.5-seg">oczy</word> + <word id="word_198" tei_id="morph_2.13.6-seg">właścicielki</word> + <word id="word_199" tei_id="morph_2.13.7-seg">i</word> + <word id="word_200" tei_id="morph_2.13.8-seg">przechodniów</word> + <word id="word_201" tei_id="morph_2.13.9-seg">aż</word> + <word id="word_202" tei_id="morph_2.13.10-seg">do</word> + <word id="word_203" tei_id="morph_2.13.11-seg">późnej</word> + <word id="word_204" tei_id="morph_2.13.12-seg">jesieni</word> + <word id="word_205" tei_id="morph_2.13.13-seg">.</word> + <word id="word_206" tei_id="morph_2.14.1-seg">Ozdabianie</word> + <word id="word_207" tei_id="morph_2.14.2-seg">balkonów</word> + <word id="word_208" tei_id="morph_2.14.3-seg">kwiatami</word> + <word id="word_209" tei_id="morph_2.14.4-seg">staje</word> + <word id="word_210" tei_id="morph_2.14.5-seg">się</word> + <word id="word_211" tei_id="morph_2.14.6-seg">w</word> + <word id="word_212" tei_id="morph_2.14.7-seg">Brzezinach</word> + <word id="word_213" tei_id="morph_2.14.8-seg">coraz</word> + <word id="word_214" tei_id="morph_2.14.9-seg">popularniejsze</word> + <word id="word_215" tei_id="morph_2.14.10-seg">-</word> + <word id="word_216" tei_id="morph_2.14.11-seg">Teraz</word> + <word id="word_217" tei_id="morph_2.14.12-seg">jest</word> + <word id="word_218" tei_id="morph_2.14.13-seg">za</word> + <word id="word_219" tei_id="morph_2.14.14-seg">późno</word> + <word id="word_220" tei_id="morph_2.14.15-seg">,</word> + <word id="word_221" tei_id="morph_2.14.16-seg">by</word> + <word id="word_222" tei_id="morph_2.14.17-seg">samemu</word> + <word id="word_223" tei_id="morph_2.14.18-seg">uprawiać</word> + <word id="word_224" tei_id="morph_2.14.19-seg">rośliny</word> + <word id="word_225" tei_id="morph_2.14.20-seg">balkonowe</word> + <word id="word_226" tei_id="morph_2.14.21-seg">-</word> + <word id="word_227" tei_id="morph_2.14.22-seg">mówi</word> + <word id="word_228" tei_id="morph_2.14.23-seg">Bożenna</word> + <word id="word_229" tei_id="morph_2.14.24-seg">Kolasa</word> + <word id="word_230" tei_id="morph_2.14.25-seg">ze</word> + <word id="word_231" tei_id="morph_2.14.26-seg">sklepu</word> + <word id="word_232" tei_id="morph_2.14.27-seg">ogrodniczego</word> + <word id="word_233" tei_id="morph_2.14.28-seg">.</word> + <word id="word_234" tei_id="morph_2.14.29-seg">-</word> + <word id="word_235" tei_id="morph_2.14.30-seg">Jednak</word> + <word id="word_236" tei_id="morph_2.14.31-seg">wciąż</word> + <word id="word_237" tei_id="morph_2.14.32-seg">można</word> + <word id="word_238" tei_id="morph_2.14.33-seg">kupić</word> + <word id="word_239" tei_id="morph_2.14.34-seg">kwiaty</word> + <word id="word_240" tei_id="morph_2.14.35-seg">gotowe</word> + <word id="word_241" tei_id="morph_2.14.36-seg">do</word> + <word id="word_242" tei_id="morph_2.14.37-seg">wystawienia</word> + <word id="word_243" tei_id="morph_2.14.38-seg">na</word> + <word id="word_244" tei_id="morph_2.14.39-seg">balkon</word> + <word id="word_245" tei_id="morph_2.14.40-seg">.</word> + <word id="word_246" tei_id="morph_2.15.1-seg">Najpopularniejsze</word> + <word id="word_247" tei_id="morph_2.15.2-seg">są</word> + <word id="word_248" tei_id="morph_2.15.3-seg">petunie</word> + <word id="word_249" tei_id="morph_2.15.4-seg">,</word> + <word id="word_250" tei_id="morph_2.15.5-seg">surfinie</word> + <word id="word_251" tei_id="morph_2.15.6-seg">,</word> + <word id="word_252" tei_id="morph_2.15.7-seg">oraz</word> + <word id="word_253" tei_id="morph_2.15.8-seg">czerwone</word> + <word id="word_254" tei_id="morph_2.15.9-seg">,</word> + <word id="word_255" tei_id="morph_2.15.10-seg">białe</word> + <word id="word_256" tei_id="morph_2.15.11-seg">i</word> + <word id="word_257" tei_id="morph_2.15.12-seg">różowe</word> + <word id="word_258" tei_id="morph_2.15.13-seg">pelargonie</word> + <word id="word_259" tei_id="morph_2.15.14-seg">.</word> + <word id="word_260" tei_id="morph_2.16.1-seg">Modnym</word> + <word id="word_261" tei_id="morph_2.16.2-seg">kwiatem</word> + <word id="word_262" tei_id="morph_2.16.3-seg">jest</word> + <word id="word_263" tei_id="morph_2.16.4-seg">też</word> + <word id="word_264" tei_id="morph_2.16.5-seg">datura</word> + <word id="word_265" tei_id="morph_2.16.6-seg">-</word> + <word id="word_266" tei_id="morph_2.16.7-seg">bieluń</word> + <word id="word_267" tei_id="morph_2.16.8-seg">.</word> + <word id="word_268" tei_id="morph_2.17.1-seg">Klientom</word> + <word id="word_269" tei_id="morph_2.17.2-seg">nie</word> + <word id="word_270" tei_id="morph_2.17.3-seg">przeszkadza</word> + <word id="word_271" tei_id="morph_2.17.4-seg">,</word> + <word id="word_272" tei_id="morph_2.17.5-seg">że</word> + <word id="word_273" tei_id="morph_2.17.6-seg">jest</word> + <word id="word_274" tei_id="morph_2.17.7-seg">to</word> + <word id="word_275" tei_id="morph_2.17.8-seg">kwiat</word> + <word id="word_276" tei_id="morph_2.17.9-seg">trujący</word> + <word id="word_277" tei_id="morph_2.17.10-seg">.</word> + <word id="word_278" tei_id="morph_2.17.11-seg">tekst</word> + <word id="word_279" tei_id="morph_2.17.12-seg">i</word> + <word id="word_280" tei_id="morph_2.17.13-seg">fot</word> + <word id="word_281" tei_id="morph_2.17.14-seg">.</word> + <word id="word_282" tei_id="morph_2.17.15-seg">grzegorz</word> + <word id="word_283" tei_id="morph_2.17.16-seg" lastinpar="true">kozieł</word> +</words> diff --git b/src/test/resources/teksty_semeval/all_automatic.txt a/src/test/resources/teksty_semeval/all_automatic.txt new file mode 100755 index 0000000..2f63f06 --- /dev/null +++ a/src/test/resources/teksty_semeval/all_automatic.txt @@ -0,0 +1,2475 @@ +#begin document /0 +W w prep - - - - +spotkaniu spotkanie subst loc sg n - (0) +weźmie wziąć fin - - - - +udział udział subst acc sg m3 - (1) +blisko blisko adv - - - - +7 7 ign - - - - +tysięcy tysiąc subst gen pl m3 head:2 (2 +braci brat subst gen pl m1 - 2) +z z prep - - - - +całej cały adj - - - - (3 +Europy Europa subst gen sg f head:3 3) +, , interp - - - - +ale ale conj - - - - +tylko tylko qub - - - - +206 206 ign - - - - +z z prep - - - - +nich on ppron3 gen pl m2 - (4) +będzie być bedzie - - - - +ubiegało ubiegać praet - - - - +się się qub - - - - +o o prep - - - - +tytuł tytuł subst acc sg m3 head:5 (5 +Europejskiego europejski adj - - - - (6 +Króla król subst gen sg m1 head:6 +Kurkowego kurkowy adj - - - - 5)|6) +. . interp - - - - +- - interp - - - - +Wezmę wziąć fin - - - - +udział udział subst acc sg m3 - (1) +w w prep - - - - +strzelaniu strzelanie subst loc sg n - (7) +, , interp - - - - +choć choć comp - - - - +moje mój adj - - - - (8 +szanse szansa subst nom pl f head:8 8) +są być fin - - - - +marne marny adj - - - - +. . interp - - - - + +Wynika wynikać fin - - - - +to to subst nom sg n - (9) +przede przed prep - - - - +wszystkim wszystko subst inst sg n - +z z prep - - - - +moich mój adj - - - - (10 +obowiązków obowiązek subst gen pl m3 head:10 +gospodarza gospodarz subst gen sg m1 - (11) +spotkań spotkanie subst gen pl n - (12)|10) +; ; interp - - - - +w w prep - - - - +tym ten adj - - - - +nawale nawał subst loc sg m3 - +pracy praca subst gen sg f - (13) +ciężko ciężko adv - - - - +mi ja ppron12 dat sg m1 - (14) +będzie być bedzie - - - - +się się qub - - - - +skupić skupić inf - - - - +na na prep - - - - +strzelaniu strzelanie subst loc sg n - (7) +- - interp - - - - +przewiduje przewidywać fin - - - - +Zdzisław Zdzisław subst nom sg m1 head:15 (15 +Maj Maj subst nom sg m1 - (16)|15) +, , interp - - - - +prezes prezes subst nom sg m1 head:17 (17 +krakowskiego krakowski adj - - - - (18 +Bractwa bractwo subst gen sg n head:18 +Kurkowego kurkowy adj - - - - 17)|18) +, , interp - - - - +panujący panujący adj - - - - (6 +Król król subst nom sg m1 head:6 (19 +Kurkowy kurkowy adj - - - - 6)|19) +. . interp - - - - + + +Strzelanie strzelanie subst nom sg n - (7) +o o prep - - - - +tytuł tytuł subst acc sg m3 head:5 (5 +Europejskiego europejski adj - - - - (6 +Króla król subst gen sg m1 head:6 +Kurkowego kurkowy adj - - - - 5)|6) +będzie być bedzie - - - - +się się qub - - - - +odbywało odbywać praet - - - - +w w prep - - - - +kilku kilka num - - - - +etapach etap subst loc pl m3 - +. . interp - - - - + +Do do prep - - - - +finału finał subst gen sg m3 - (20) +zostanie zostać fin - - - - +dopuszczonych dopuścić ppas - - - - +27 27 ign - - - - +braci brat subst gen pl m1 - (21) +- - interp - - - - +jeden jeden adj - - - - +z z prep - - - - +nich on ppron3 gen pl m2 - +otrzyma otrzymać fin - - - - +tytuł tytuł subst acc sg m3 head:5 (5 +Europejskiego europejski adj - - - - (6 +Króla król subst gen sg m1 head:6 +Kurkowego kurkowy adj - - - - 5)|6) +odbierając odbierać pcon - - - - +go on ppron3 acc sg m1 - (6) +obecnie obecnie adv - - - - +panującemu panujący adj - - - - +Wilfriedowi Wilfried subst dat sg m1 - (22 +Stammermannowi Stammermannowi ign - - - - 22) +. . interp - - - - +- - interp - - - - +Król król subst nom sg m1 - (6) +nie nie qub - - - - +otrzymuje otrzymywać fin - - - - +żadnych żaden adj - - - - (23 +nagród nagroda subst gen pl f head:23 +finansowych finansowy adj - - - - 23) +, , interp - - - - +ale ale conj - - - - +taki taki adj - - - - (5 +tytuł tytuł subst nom sg m3 head:5 5) +jest być fin - - - - +ogromnym ogromny adj - - - - (24 +zaszczytem zaszczyt subst inst sg m3 head:24 24) +; ; interp - - - - +król król subst nom sg m1 - (6) +jest być fin - - - - +np na przykład brev - - - - +. . interp - - - - +zapraszany zapraszać ppas - - - - +na na prep - - - - +posiedzenia posiedzenie subst acc pl n head:25 (25 +Parlamentu parlament subst gen sg m3 head:26 (26 +Europejskiego europejski adj - - - - 25)|26) +- - interp - - - - +mówi mówić fin - - - - +Zdzisław Zdzisław subst nom sg m1 head:15 (15 +Maj Maj subst nom sg m1 - (16)|15) +. . interp - - - - + + +Największą wielki adj - - - - +atrakcją atrakcja subst inst sg f - +12 12 ign - - - - +. . interp - - - - + +Europejskich europejski adj - - - - (12 +Spotkań spotkanie subst gen pl n head:12 +Bractw bractwo subst gen pl n head:27 (27 +Strzeleckich strzelecki adj - - - - 12)|27) +będzie być bedzie - - - - +wielka wielki adj - - - - (28 +parada parada subst nom sg f head:28 28) +, , interp - - - - +która który adj - - - - +rozpocznie rozpocząć fin - - - - +się się qub - - - - +w w prep - - - - +niedzielę niedziela subst acc sg f - +o o prep - - - - +godz godzina brev - - - head:29 (29 +. . interp - - - head:29 29) +13 13 ign - - - - +. . interp - - - - + +Kilkuset kilkaset num - - - - (21 +braci brat subst gen pl m1 head:21 21) +w w prep - - - - +historycznych historyczny adj - - - - (30 +strojach strój subst loc pl m3 head:30 30) +przejdzie przejść fin - - - - +z z prep - - - - +Błoń błonie subst gen pl n - (31) +na na prep - - - - +Rynek rynek subst acc sg m3 - (32) +ulicami ulica subst inst pl f - (33) +: : interp - - - - +Piłsudskiego Piłsudski subst gen sg m1 - (34) +, , interp - - - - +Straszewskiego Straszewski subst gen sg m1 - (35) +, , interp - - - - +Franciszkańską franciszkański adj - - - - +i i conj - - - - +Grodzką grodzki adj - - - - +. . interp - - - - + + +Początki początek subst nom pl m3 - +istnienia istnieć ger gen sg n - (36) +Bractwa bractwo subst gen sg n head:18 (18 +Kurkowego kurkowy adj - - - - 18) +w w prep - - - - +Krakowie Kraków subst loc sg m3 - (37) +sięgają sięgać fin - - - - +XIII XIII ign - - - - (38 +wieku wiek subst loc sg m3 - (39)|38) +. . interp - - - - + +Skupiało skupiać praet - - - - +ono on ppron3 nom sg n - (18) +znamienitych znamienity adj - - - - (40 +obywateli obywatel subst gen pl m1 head:40 40) +, , interp - - - - +kupców kupiec subst gen pl m1 head:41 (41 +i i conj - - - - +rzemieślników rzemieślnik subst gen pl m1 head:42 (42 +pragnących pragnący adj - - - - 41)|42) +wspomóc wspomóc inf - - - - +obronność obronność subst acc sg f head:43 (43 +miasta miasto subst gen sg n - (44)|43) +. . interp - - - - + +Wielkim wielki adj - - - - (45 +świętem święto subst inst sg n head:45 +bractwa bractwo subst gen sg n - (18)|45) +był być praet - - - - +turniej turniej subst nom sg m3 - (46) +, , interp - - - - +który który adj - - - - +odbywał odbywać praet - - - - +się się qub - - - - +na na prep - - - - +strzelnicy strzelnica subst loc sg f - (47) +zwanej zwać ppas - - - - +Celestatem Celestatem ign - - - - +. . interp - - - - + +Zawody zawody subst nom pl n - +trwały trwać praet - - - - +zwykle zwykle adv - - - - +trzy trzy num - - - - (48 +dni dzień subst acc pl m3 head:48 48) +. . interp - - - - + +Strzelano strzelać imps - - - - +do do prep - - - - +drewnianego drewniany adj - - - - +kura kur subst gen sg m2 - +umocowanego umocowany adj - - - - +na na prep - - - - +wysokiej wysoki adj - - - - (49 +żerdzi żerdź subst loc sg f head:49 49) +. . interp - - - - + +Brat brat subst nom sg m1 - (50) +, , interp - - - - +który który adj - - - - +zdołał zdołać praet - - - - +celnym celny adj - - - - (51 +strzałem strzał subst inst sg m3 head:51 51) +strącić strącić inf - - - - +ostatni ostatni adj - - - - (52 +jego on ppron3 gen sg m1 - (50) +fragment fragment subst acc sg m3 head:52 52) +zdobywał zdobywać praet - - - - +miano miano subst acc sg n head:53 (53 +Króla król subst gen sg m1 head:6 (6 +Kurkowego kurkowy adj - - - - 53)|6) +. . interp - - - - + +Z z prep - - - - +tym ten adj - - - - (5 +tytułem tytuł subst inst sg m3 head:5 5) +wiązały wiązać praet - - - - +się się qub - - - - +nie nie conj - - - - (54 +tylko tylko conj - - - - +honory Honory subst nom sg m1 head:54 +, , interp - - - - +ale ale conj - - - - +także także qub - - - - +przywileje przywilej subst nom pl m3 - 54) +: : interp - - - - +Rada rada subst nom sg f head:55 (55 +Miejska miejski adj - - - - 55) +zwalniała zwalniać praet - - - - +jego on ppron3 gen sg m1 - (54|(54) +posiadacza posiadacz subst gen sg m1 head:54 54) +m męski brev - - - - +. . interp - - - - +in inny brev - - - - +. . interp - - - - +z z prep - - - - +obowiązku obowiązek subst gen sg m3 head:56 (56 +płacenia płacić ger gen sg n - (57) +podatków podatek subst gen pl m3 - (58)|56) +( ( interp - - - - +ten ten adj - - - - (59 +zwyczaj zwyczaj subst nom sg m3 head:59 59) +utrzymał utrzymać praet - - - - +się się qub - - - - +do do prep - - - - +dziś dziś subst gen sg n - (60) +) ) interp - - - - +. . interp - - - - + + +#end document /0 +#begin document /1 +Z z prep - - - - +kolei koleja subst gen sg f - +we w prep - - - - +Wrocławiu Wrocław subst loc sg m3 - (0) +płace płaca subst nom pl f head:1 (1 +kontrolerów kontroler subst gen pl m3 - (2)|1) +zostały zostać praet - - - - +zupełnie zupełnie adv - - - - +uniezależnione uniezależnić ppas - - - - +od od prep - - - - +liczby liczba subst gen sg f head:3 (3 +wystawionych wystawić ppas - - - - (4 +wezwań wezwanie subst gen pl n head:4 3)|4) +do do prep - - - - +zapłaty zapłata subst gen sg f - (5) +. . interp - - - - + + +- - interp - - - - +Nie nie qub - - - - +oznacza oznaczać fin - - - - +to to subst nom sg n - (6) +jednak jednak conj - - - - +, , interp - - - - +że że comp - - - - +nie nie qub - - - - +nagradzamy nagradzać fin - - - - +najskuteczniejszych skuteczny adj - - - - (7 +pracowników pracownik subst gen pl m1 head:7 7) +. . interp - - - - + +Kilka kilka num - - - - (8 +razy raz subst gen pl m3 head:8 8) +w w prep - - - - +roku rok subst loc sg m3 - (9) +przyznawane przyznawać ppas - - - - +są być fin - - - - +premie premia subst nom pl f - (10) +. . interp - - - - + +Bierzemy brać fin - - - - +wtedy wtedy adv - - - - +pod pod prep - - - - +uwagę uwaga subst acc sg f head:11 (11 +skuteczność skuteczność subst acc sg f - (12) +i i conj - - - - +ewentualne ewentualny adj - - - - (13 +skargi skarga subst acc pl f head:13 +pasażerów pasażer subst gen pl m1 - (14)|11)|13) +- - interp - - - - +wyjaśnia wyjaśniać fin - - - - +Monika Monika subst nom sg f head:15 (15 +Poważna poważny adj - - - - 15) +, , interp - - - - +kierownik kierownik subst nom sg m1 head:16 (16 +Wydziału wydział subst gen sg m3 - (17|(18) +Transportu transport subst gen sg m3 - (19)|17) +wrocławskiego wrocławski adj - - - - +Urzędu urząd subst gen sg m3 - (20 +Miasta miasto subst gen sg n - (21)|16)|20) +. . interp - - - - + + +Tamtejsi tamtejszy adj - - - - (22 +kontrolerzy kontroler subst nom pl m1 head:22 22) +zarabiają zarabiać fin - - - - +( ( interp - - - - +bez bez prep - - - - +premii premia subst gen sg f - (23) +) ) interp - - - - +około około qub - - - - +1200 1200 ign - - - - +złotych złoty subst gen pl m2 - (24) +miesięcznie miesięcznie adv - - - - +( ( interp - - - - +netto netto adj - - - - +) ) interp - - - - +. . interp - - - - + + +Miasto miasto subst nom sg n - (21) +postanowiło postanowić praet - - - - +za za prep - - - - +jednym jeden adj - - - - (25 +zamachem zamach subst inst sg m3 head:25 25) +trzy trzy num - - - - +spółki spółka subst acc pl f - +połączyć połączyć inf - - - - +w w prep - - - - +jedną jeden adj - - - - +. . interp - - - - + +Przygotowany przygotować ppas - - - - +jest być fin - - - - +projekt projekt subst nom sg m3 head:26 (26 +uchwały uchwała subst gen sg f - (27)|26) +, , interp - - - - +który który adj - - - - +przewiduje przewidywać fin - - - - +wniesienie wnieść ger acc sg n head:28 (28 +udziałów udział subst gen pl m3 - (29)|28) +w w prep - - - - +Towarzystwie towarzystwo subst loc sg n head:30 (30 +Budownictwa budownictwo subst gen sg n head:31 (31 +Społecznego społeczny adj - - - - 31) +" " interp - - - - (32 +Wielkopolska Wielkopolska subst nom sg f - +" " interp - - - - 30)|32) +oraz oraz conj - - - - +Towarzystwie towarzystwo subst loc sg n head:30 (33|(30 +Budownictwa budownictwo subst gen sg n head:31 (31 +Społecznego społeczny adj - - - - 30)|31) +" " interp - - - - (34 +Nasz nasz adj - - - - +Dom dom subst nom sg m3 head:34 +" " interp - - - - 33)|34) +do do prep - - - - +Poznańskiego poznański adj - - - - (30 +Towarzystwa towarzystwo subst gen sg n head:30 +Budownictwa budownictwo subst gen sg n head:31 (31 +Społecznego społeczny adj - - - - 30)|31) +. . interp - - - - + +W w prep - - - - +piątek piątek subst acc sg m3 - (35) +opiniować opiniować inf - - - - +tę ten adj - - - - (36 +propozycję propozycja subst acc sg f head:36 36) +będzie być bedzie - - - - +Komisja komisja subst nom sg f head:37 (38|(37 +Gospodarki gospodarka subst gen sg f head:39 (39 +Komunalnej komunalny adj - - - - 37)|39) +i i conj - - - - +Polityki polityka subst gen sg f head:40 (40 +Mieszkaniowej mieszkaniowy adj - - - - 38)|40) +, , interp - - - - +a a conj - - - - +we w prep - - - - +wtorek wtorek subst acc sg m3 - (41) +zajmie zająć fin - - - - +się się qub - - - - +nią on ppron3 inst sg f - (40) +Rada rada subst nom sg f head:42 (42 +Miasta miasto subst gen sg n - (21)|42) +. . interp - - - - + + +- - interp - - - - +Pomysł pomysł subst nom sg m3 head:43 (43 +połączenia połączenie subst gen sg n - (44)|43) +TBS-ów TBS-ów ign - - - - +nie nie qub - - - - +budzi budzić fin - - - - +wątpliwości wątpliwość subst gen pl f - (45) +z z prep - - - - +punktu punkt subst gen sg m3 - +widzenia widzenie subst gen sg n - +racjonalizacji racjonalizacja subst gen sg f head:46 (46 +kosztów koszt subst gen pl m3 - (47)|46) +- - interp - - - - +twierdzi twierdzić fin - - - - +Tomasz Tomasz subst nom sg m1 head:48 (48 +Lewandowski Lewandowski subst nom sg m1 - (49)|48) +, , interp - - - - +radny radny subst nom sg m1 head:50 (50 +LiD Lida subst gen pl f - +i i conj - - - - +członek członek subst nom sg m1 head:51 (51 +komisji komisja subst gen sg f - (37)|50)|51) +. . interp - - - - +- - interp - - - - +Potrzebna potrzebny adj - - - - +jest być fin - - - - +jednak jednak conj - - - - +dyskusja dyskusja subst nom sg f - (52) +o o prep - - - - +przyszłości przyszłość subst loc sg f head:53 (53 +towarzystw towarzystwo subst gen pl n - (54)|53) +. . interp - - - - + +Obecnie obecnie adv - - - - +rząd rząd subst nom sg m3 - (55) +pracuje pracować fin - - - - +nad nad prep - - - - +zmianą zmiana subst inst sg f head:56 (56 +ustawy ustawa subst gen sg f - (57)|56) +, , interp - - - - +która który adj - - - - +przewiduje przewidywać fin - - - - +wykup wykup subst acc sg m3 head:58 (58 +mieszkań mieszkanie subst gen pl n - (59)|58) +w w prep - - - - +towarzystwach towarzystwo subst loc pl n head:54 (54 +budownictwa budownictwo subst gen sg n head:31 (31 +społecznego społeczny adj - - - - 54)|31) +. . interp - - - - + +To to subst nom sg n - (6) +stworzy stworzyć fin - - - - +zupełnie zupełnie adv - - - - (60 +nową nowy adj - - - - +sytuację sytuacja subst acc sg f head:60 60) +. . interp - - - - + +W w prep - - - - +związku związek subst loc sg m3 - +z z prep - - - - +tym to subst inst sg n - (6) +konieczne konieczny adj - - - - +będzie być bedzie - - - - +podjęcie podjąć ger nom sg n head:61 (61 +odpowiednich odpowiedni adj - - - - (62 +kroków krok subst gen pl m3 head:62 61)|62) +przez przez prep - - - - +miasto miasto subst acc sg n - (21) +. . interp - - - - + + +Norbert Norbert subst nom sg m1 - (63|(64) +Napieraj napierać impt - - - - 63) +, , interp - - - - +szef szef subst nom sg m1 head:65 (65 +klubu klub subst gen sg m3 - (66)|65) +radnych radny adj - - - - +PiS PiS subst gen sg f - (67) +również również qub - - - - +uważa uważać fin - - - - +, , interp - - - - +że że comp - - - - +ze z prep - - - - +względów wzgląd subst gen pl m3 head:68 (68 +ekonomicznych ekonomiczny adj - - - - 68) +utworzenie utworzyć ger acc sg n head:69 (69 +jednej jeden adj - - - - (70 +spółki spółka subst gen sg f head:70 69)|70) +jest być fin - - - - +zasadne zasadny adj - - - - +. . interp - - - - + + +- - interp - - - - +Na na prep - - - - +razie raz subst loc sg m3 - +jest być fin - - - - +to to pred - - - - +jednak jednak conj - - - - +luźny luźny adj - - - - +pomysł pomysł subst nom sg m3 - +. . interp - - - - + +Nie nie qub - - - - +ma mieć fin - - - - +konkretów konkret subst gen pl m3 - (71) +- - interp - - - - +dodaje dodawać fin - - - - +N nowy brev - - - - +. . interp - - - - + +Napieraj napierać impt - - - - +. . interp - - - - +- - interp - - - - +Nasz nasz adj - - - - (66 +klub klub subst nom sg m3 head:66 66) +jeszcze jeszcze qub - - - - +nie nie qub - - - - +wypracował wypracować praet - - - - +w w prep - - - - +sprawie sprawa subst loc sg f - +tej ten adj - - - - (27 +uchwały uchwała subst gen sg f head:27 +stanowiska stanowisko subst gen sg n - (72)|27) +. . interp - - - - + + +#end document /1 +#begin document /2 +Potem potem adv - - - - +znalazł znaleźć praet - - - - +zatrudnienie zatrudnienie subst acc sg n - (0) +w w prep - - - - +Fundacji fundacja subst loc sg f - (1|(2) +Europejskie europejski adj - - - - (3 +Spotkania spotkanie subst nom pl n head:3 +Kaszubskie kaszubski adj - - - - 3) +Centrum centrum subst nom sg n head:4 (4 +Kultury kultura subst gen sg f - (5)|1)|4) +. . interp - - - - + +Był być praet - - - - +prezesem prezes subst inst sg m1 - (6) +utworzonej utworzyć ppas - - - - +przez przez prep - - - - +fundację fundacja subst acc sg f - +spółki spółka subst gen sg f - (7) +Zamek zamek subst nom sg m3 - (8) +. . interp - - - - + + +W w prep - - - - +międzyczasie międzyczas subst loc sg m3 - (9) +został zostać praet - - - - +radnym radny subst inst sg m1 - (10) +. . interp - - - - + +Pod pod prep - - - - +koniec koniec subst acc sg m3 - +ubiegłej ubiegły adj - - - - (11 +kadencji kadencja subst gen sg f head:11 11) +Rada rada subst nom sg f head:12 (12 +Gminy gmina subst gen sg f - (13)|12) +Krokowa krokowy adj - - - - +wybrała wybrać praet - - - - +go on ppron3 acc sg m1 - (10) +na na prep - - - - +wójta wójt subst acc sg m1 - (14) +. . interp - - - - + +Jesienią jesień subst inst sg f - +2002 2002 ign - - - - +r rok brev - - - - +. . interp - - - - +został zostać praet - - - - +wójtem wójt subst inst sg m1 - (14) +w w prep - - - - +wyborach wybory subst loc pl n - (15) +powszechnych powszechny adj - - - - +. . interp - - - - + + +- - interp - - - - +Co co subst nom sg n - (16) +skłoniło skłonić praet - - - - +mnie ja ppron12 acc sg m1 - (17) +do do prep - - - - +zostania zostać ger gen sg n - (18) +samorządowcem samorządowiec subst inst sg m1 - (19) +? ? interp - - - - + +W w prep - - - - +pewnym pewny adj - - - - (20 +momencie moment subst loc sg m3 head:20 +życia życie subst gen sg n - (21) +mężczyzny mężczyzna subst gen sg m1 - (22)|20) +przychodzi przychodzić fin - - - - +taka taki adj - - - - +potrzeba potrzeba pred - - - - +, , interp - - - - +aby aby comp - - - - +sprawdzić sprawdzić inf - - - - +się się qub - - - - +np na przykład brev - - - - +. . interp - - - - +w w prep - - - - +życiu życie subst loc sg n head:21 (21 +publicznym publiczny adj - - - - 21) +- - interp - - - - +twierdzi twierdzić fin - - - - +krokowski krokowski adj - - - - (23 +kandydat kandydat subst nom sg m1 head:23 23) +do do prep - - - - +tytułu tytuł subst gen sg m3 head:24 (24 +Wójta wójt subst gen sg m1 - (14) +Pomorza pomorze subst gen sg n - (25)|24) +. . interp - - - - +- - interp - - - - +Poza poza prep - - - - +tym to subst inst sg n - +interesowały interesować praet - - - - +mnie ja ppron12 acc sg m1 - (17) +sprawy sprawa subst nom pl f head:26 (26 +komunalne komunalny adj - - - - 26) +. . interp - - - - + +Chciał chcieć praet - - - - +em być aglt - - - - +się się qub - - - - +nimi on ppron3 inst pl f - +bliżej blisko adv - - - - +zająć zająć inf - - - - +. . interp - - - - + + +Co co subst acc sg n - (16) +wójt wójt subst nom sg m1 - +gminy gmina subst gen sg f - (27|(13) +Krokowa krokowy adj - - - - 27) +uważa uważać fin - - - - +za za prep - - - - +swój swój adj - - - - (28 +największy wielki adj - - - - +sukces sukces subst acc sg m3 head:28 +i i conj - - - - +największą wielki adj - - - - (29 +porażkę porażka subst acc sg f head:29 28)|29) +? ? interp - - - - + + +- - interp - - - - +Sukcesem sukces subst inst sg m3 - (28) +jest być fin - - - - +to to subst nom sg n - (30) +, , interp - - - - +że że comp - - - - +udaje udawać fin - - - - +się się qub - - - - +wreszcie wreszcie qub - - - - +opracowywać opracowywać inf - - - - +plany plan subst acc pl m3 head:31 (31 +zagospodarowania zagospodarować ger gen sg n head:32 (32 +przestrzennego przestrzenny adj - - - - 31)|32) +. . interp - - - - + +Gotowe gotowe subst nom sg n - (33) +są być fin - - - - +już już qub - - - - +dla dla prep - - - - +Białogóry Białogóry ign - - - - +i i conj - - - - +części część subst gen sg f - (34) +Dębek dębek subst nom sg m3 - +. . interp - - - - + +Tych Tychy subst gen pl n head:35 (35 +ostatnich ostatni adj - - - - 35) +przez przez prep - - - - +wiele wiele num - - - - (36 +lat rok subst gen pl m3 head:36 36) +nie nie qub - - - - +można można pred - - - - +było być praet - - - - +uchwalić uchwalić inf - - - - +- - interp - - - - +uważa uważać fin - - - - +wójt wójt subst nom sg m1 - (14) +. . interp - - - - +- - interp - - - - +Natomiast natomiast conj - - - - +za za prep - - - - +porażkę porażka subst acc sg f - (29) +uważam uważać fin - - - - +decyzję decyzja subst acc sg f head:37 (37 +Rady rada subst gen sg f - (12) +gminy gmina subst gen sg f - 37) +, , interp - - - - +aby aby comp - - - - +nie nie qub - - - - +przystępować przystępować inf - - - - +w w prep - - - - +ramach ramy subst loc pl n - +Komunalnego komunalny adj - - - - (38 +Związku związek subst gen sg m3 head:38 (39 +Gmin gmina subst gen pl f - (40)|38)|39) +do do prep - - - - +programu program subst gen sg m3 head:41 (41 +uporządkowania uporządkować ger gen sg n - (42) +gospodarki gospodarka subst gen sg f head:43 (43 +ściekowej ściekowy adj - - - - 41)|43) +. . interp - - - - + +Mogli móc praet - - - - +śmy być aglt - - - - +uzyskać uzyskać inf - - - - +wiele wiele num - - - - +milionów milion subst gen pl m3 - +euro euro subst gen pl n - (44) +. . interp - - - - + +Boję bać fin - - - - +się się qub - - - - +, , interp - - - - +że że comp - - - - +to to pred - - - - +nie nie conj - - - - +tylko tylko conj - - - - +moja mój adj - - - - +porażka porażka subst nom sg f - +. . interp - - - - +. . interp - - - - +. . interp - - - - + + +Od od prep - - - - +5 5 ign - - - - +lat rok subst gen pl m3 - (36) +ulubionym ulubiony adj - - - - +hobby hobby subst gen sg n - +Henryka Henryk subst gen sg m1 - (45) +Doeringa Doeringa ign - - - - +są być fin - - - - +narty narta subst nom pl f - (46) +. . interp - - - - + +Dlatego dlatego adv - - - - +urlop urlop subst nom sg m3 - (47) +najchętniej chętnie adv - - - - +bierze brać fin - - - - +zimą zima subst inst sg f - +, , interp - - - - +aby aby comp - - - - +udać udać inf - - - - +się się qub - - - - +na na prep - - - - +stoki stok subst acc pl m3 head:48 (48 +Szklarskiej szklarski adj - - - - (49 +Poręby poręba subst gen sg f head:49 48)|49) +. . interp - - - - + + +- - interp - - - - +Tej ten adj - - - - (50 +zimy zima subst gen sg f head:50 50) +niestety niestety qub - - - - +nie nie qub - - - - +mogł móc praet - - - - +em być aglt - - - - +wyjechać wyjechać inf - - - - +- - interp - - - - +przyznaje przyznawać fin - - - - +wójt wójt subst nom sg m1 - (14) +Krokowej krokowy adj - - - - +. . interp - - - - +- - interp - - - - +Czasu czas subst gen sg m3 head:51 (51 +wolnego wolne subst gen sg n - (52)|51) +mam mieć fin - - - - +bardzo bardzo adv - - - - +mało mało num - - - - (53) +, , interp - - - - +jeśli jeśli comp - - - - +się się qub - - - - +taki taki adj - - - - +pojawia pojawiać fin - - - - +, , interp - - - - +to to conj - - - - +staram starać fin - - - - +się się qub - - - - +go on ppron3 acc sg m1 - (14) +spędzać spędzać inf - - - - +razem razem adv - - - - +z z prep - - - - +bliskimi bliscy subst inst pl m1 - (54) +. . interp - - - - + + +Nasz nasz adj - - - - (55 +plebiscyt plebiscyt subst nom sg m3 head:55 55) + + +„ „ interp - - - - (56 +Dziennik dziennik subst nom sg m3 head:56 +Bałtycki bałtycki adj - - - - +” ” interp - - - - 56) +rozpoczął rozpocząć praet - - - - +kolejną kolejny adj - - - - (57 +edycję edycja subst acc sg f head:57 +konkursu konkurs subst gen sg m3 - 57) +Wójt wójt subst nom sg m1 head:14 (14 +Pomorza pomorze subst gen sg n - (25)|14) +. . interp - - - - + +Nasz nasz adj - - - - (58 +powiat powiat subst nom sg m3 head:58 58) +reprezentują reprezentować fin - - - - +trzej trzy num - - - - (59 +włodarze włodarz subst nom pl m1 head:59 +gmin gmina subst gen pl f - +wiejskich wiejski adj - - - - 59) +. . interp - - - - + +To to pred - - - - +Henryk Henryk subst nom sg m1 - (60 +Doering Doering ign - - - - 60) +( ( interp - - - - +Krokowa krokowy adj - - - - +) ) interp - - - - +, , interp - - - - +Tadeusz Tadeusz subst nom sg m1 - (61|(62) +Puszkarczuk Puszkarczuk ign - - - - 61) +( ( interp - - - - +gmina gmina subst nom sg f head:13 (13 +Puck Puck subst nom sg m3 - (63)|13) +) ) interp - - - - +i i conj - - - - +Jerzy Jerzy subst nom sg m1 - (64|(65) +Włudzik Włudzik ign - - - - 64) +( ( interp - - - - +Kosakowo Kosakowo subst nom sg n - (66) +) ) interp - - - - +. . interp - - - - + +W w prep - - - - +gronie grono subst loc sg n head:67 (67 +kilkudziesięciu kilkadziesiąt num - - - - +kolegów kolega subst gen pl m1 - (68)|67) +po po prep - - - - +fachu fach subst loc sg m3 - (69) +walczyć walczyć inf - - - - +będą być bedzie - - - - +o o prep - - - - +miano miano subst acc sg n head:70 (70 +najpopularniejszego popularny adj - - - - (14 +wójta wójt subst gen sg m1 head:14 14) +województwa województwo subst gen sg n - (71)|70) +. . interp - - - - + +O o prep - - - - +tym to subst loc sg n - (30) +, , interp - - - - +kto kto subst nom sg m1 - (72) +wygra wygrać fin - - - - +, , interp - - - - +zadecydują zadecydować fin - - - - +swoimi swój adj - - - - (73 +głosami głos subst inst pl m3 head:73 73) +Czytelnicy czytelnik subst nom pl m1 head:74 (74 +„ „ interp - - - - +Dziennika dziennik subst gen sg m3 - +” ” interp - - - - 74) +. . interp - - - - + + +#end document /2 +#begin document /3 +Paweł Paweł subst nom sg m1 - (0) +Kryszałowicz Kryszałowicz ign - - - - + + +( ( interp - - - - +Eintracht Eintracht subst nom sg m3 head:1 (1 +Frankfurt Frankfurt subst nom sg m3 - (2)|1) +) ) interp - - - - +: : interp - - - - +- - interp - - - - +Ukraińcy Ukrainiec subst nom pl m1 - (3) +postawili postawić praet - - - - +nam my ppron12 dat pl m1 - (4) +wysokie wysoki adj - - - - (5 +wymagania wymaganie subst acc pl n head:5 5) +. . interp - - - - + +Remis remis subst nom sg m3 - (6) +jest być fin - - - - +sprawiedliwy sprawiedliwy adj - - - - +, , interp - - - - +choć choć comp - - - - +przy przy prep - - - - +odrobinie odrobina subst loc sg f head:7 (7 +szczęścia szczęście subst gen sg n - (8)|7) +mogli móc praet - - - - +śmy być aglt - - - - +wygrać wygrać inf - - - - +. . interp - - - - + +Mam mieć fin - - - - +do do prep - - - - +siebie się siebie gen - - - (9) +pretensję pretensja subst acc sg f - (10) +, , interp - - - - +bo bo comp - - - - +przy przy prep - - - - +lepszej dobry adj - - - - (11 +koncentracji koncentracja subst loc sg f head:11 11) +mogł móc praet - - - - +em być aglt - - - - +zdobyć zdobyć inf - - - - +bramkę bramka subst acc sg f - (12) +. . interp - - - - + + +ś ś ign - - - - + + +Jacek Jacek subst nom sg m1 - (13) +Krzynówek Krzynówek ign - - - - + + +( ( interp - - - - +1 1 ign - - - - +. . interp - - - - + +FC FC subst nom sg m3 - (14) +Nuernberg Nuernberg ign - - - - +) ) interp - - - - +: : interp - - - - +- - interp - - - - +Cieszę cieszyć fin - - - - +się się qub - - - - +, , interp - - - - +że że comp - - - - +mogł móc praet - - - - +em być aglt - - - - +wystąpić wystąpić inf - - - - +w w prep - - - - +meczu mecz subst loc sg m3 - +kończącym kończyć pact - - - - +eliminacje eliminacja subst acc pl f - (15) +. . interp - - - - + +Ze z prep - - - - +swej swój adj - - - - (16 +strony strona subst gen sg f head:16 16) +zrobię zrobić fin - - - - +wszystko wszystko subst acc sg n - (17) +, , interp - - - - +aby aby comp - - - - +nie nie qub - - - - +wypaść wypaść inf - - - - +z z prep - - - - +kadry kadra subst gen sg f - (18) +, , interp - - - - +mimo mimo prep - - - - +że że comp - - - - +nie nie qub - - - - +mam mieć fin - - - - +ugruntowanej ugruntować ppas - - - - (19 +pozycji pozycja subst gen sg f head:19 19) +w w prep - - - - +swoim swój adj - - - - (20 +klubie klub subst loc sg m3 head:20 20) +. . interp - - - - + +Zasłużyli zasłużyć praet - - - - +śmy być aglt - - - - +na na prep - - - - +awans awans subst acc sg m3 - (21) +, , interp - - - - +bo bo comp - - - - +byli być praet - - - - +śmy być aglt - - - - +zespołem zespół subst inst sg m3 - (22) +, , interp - - - - +który który adj - - - - +grał grać praet - - - - +najrówniej równo adv - - - - +i i conj - - - - +w w prep - - - - +głupi głupi adj - - - - (23 +sposób sposób subst acc sg m3 head:23 23) +nie nie qub - - - - +tracił tracić praet - - - - +punktów punkt subst gen pl m3 - (24) +. . interp - - - - + + +ś ś ign - - - - + + +Marek marek subst nom sg m1 - +Koźmiński koźmiński adj - - - - +: : interp - - - - + + +Mecze mecz subst nom pl m3 - (25) +z z prep - - - - +Ukrainą ukraina subst inst sg f - (26) +spinają spinać fin - - - - +klamrą klamra subst inst sg f - (27) +eliminacje eliminacja subst nom pl f - (15) +, , interp - - - - +które który adj - - - - +będziemy być bedzie - - - - +pamiętać pamiętać inf - - - - +do do prep - - - - +końca koniec subst gen sg m3 head:28 (28 +życia życie subst gen sg n - 28) +. . interp - - - - + +Uważam uważać fin - - - - +, , interp - - - - +że że comp - - - - +kibice kibic subst nom pl m1 - (29) +oglądali oglądać praet - - - - +dzisiaj dzisiaj adv - - - - +dobry dobry adj - - - - (30 +mecz mecz subst acc sg m3 head:30 30) +dwóch dwa num - - - - (31 +równych równy adj - - - - +zespołów zespół subst gen pl m3 head:31 31) +. . interp - - - - + +Moim mój adj - - - - (32 +zdaniem zdanie subst inst sg n head:32 +Ukraińcy Ukrainiec subst nom pl m1 - (3)|32) +zasłużenie zasłużenie adv - - - - +zajęli zająć praet - - - - +drugie drugi adj - - - - (33 +miejsce miejsce subst acc sg n head:33 33) +, , interp - - - - +jednak jednak conj - - - - +nie nie qub - - - - +chciał chcieć praet - - - - +by by qub - - - - +m być aglt - - - - +spotkać spotkać inf - - - - +się się qub - - - - +z z prep - - - - +nimi on ppron3 inst pl m1 - (3) +na na prep - - - - +mundialu mundial subst loc sg m3 - (34) +. . interp - - - - + +To to pred - - - - +bardzo bardzo adv - - - - (35 +niewygodny niewygodny adj - - - - +przeciwnik przeciwnik subst nom sg m1 head:35 35) +. . interp - - - - + +Może móc fin - - - - +wygrać wygrać inf - - - - +z z prep - - - - +każdym każdy adj - - - - +. . interp - - - - + + +ś ś ign - - - - + + +Zbigniew Zbigniew subst nom sg m1 head:36 (36 +Boniek Boniek subst nom sg m1 - (37)|36) + + +( ( interp - - - - +wiceprezes wiceprezes subst nom sg m1 head:38 (38 +PZPN PZPN subst nom sg m3 - (39)|38) +) ) interp - - - - +: : interp - - - - +- - interp - - - - +Najważniejsze ważny adj - - - - +, , interp - - - - +że że comp - - - - +eliminacje eliminacja subst nom pl f - +zakończyły zakończyć praet - - - - +się się qub - - - - +sukcesem sukces subst inst sg m3 - (40) +. . interp - - - - + +Jestem być fin - - - - +usatysfakcjonowany usatysfakcjonować ppas - - - - +. . interp - - - - + +Chcę chcieć fin - - - - +podkreślić podkreślić inf - - - - +, , interp - - - - +że że comp - - - - +kibice kibic subst nom pl m1 head:29 (29 +i i conj - - - - +dziennikarze dziennikarz subst nom pl m1 - (41)|29) +dostrzegają dostrzegać fin - - - - +tylko tylko qub - - - - +to to subst nom sg n - (42) +, , interp - - - - +co co subst nom sg n - (43) +dzieje dziać fin - - - - +się się qub - - - - +na na prep - - - - +boisku boisko subst loc sg n - (44) +i i conj - - - - +rzadko rzadko adv - - - - +kiedy kiedy adv - - - - +zwracają zwracać fin - - - - +uwagę uwaga subst acc sg f - (45) +na na prep - - - - +kulisy kulisa subst acc pl f head:46 (46 +sukcesów sukces subst gen pl m3 - (47) +piłkarzy piłkarz subst gen pl m1 - (48)|46) +, , interp - - - - +a a conj - - - - +oznaczają oznaczać fin - - - - +one on ppron3 nom pl m3 - (47) +ogromny ogromny adj - - - - (49 +wysiłek wysiłek subst acc sg m3 head:49 +organizacyjny organizacyjny adj - - - - 49) +, , interp - - - - +wiele wiele num - - - - (50 +pracy praca subst gen sg f head:50 +rzeszy rzesza subst gen sg f - (51) +ludzi ludzie subst gen pl m1 - (52)|50) +, , interp - - - - +którzy który adj - - - - +nie nie qub - - - - +stoją stać fin - - - - +w w prep - - - - +pierwszym pierwszy adj - - - - (53 +szeregu szereg subst loc sg m3 head:53 53) +, , interp - - - - +ale ale conj - - - - +wykonują wykonywać fin - - - - +nieraz nieraz adv - - - - +ciężkie ciężki adj - - - - +i i conj - - - - +niewdzięczne niewdzięczny adj - - - - (54 +zadania zadanie subst acc pl n head:54 54) +. . interp - - - - + +Moim mój adj - - - - (32 +zdaniem zdanie subst inst sg n head:32 32) +między między prep - - - - +innymi inny adj - - - - +dlatego dlatego adv - - - - +, , interp - - - - +że że comp - - - - +sprawy sprawa subst nom pl f head:55 (55 +organizacyjne organizacyjny adj - - - - 55) +zostały zostać praet - - - - +ułożone ułożony adj - - - - +na na prep - - - - +odpowiednim odpowiedni adj - - - - (56 +poziomie poziom subst loc sg m3 head:56 56) +, , interp - - - - +wszyscy wszyscy subst nom pl m1 - (57) +możemy móc fin - - - - +się się qub - - - - +dzisiaj dzisiaj adv - - - - +cieszyć cieszyć inf - - - - +z z prep - - - - +awansu awans subst gen sg m3 - (21) +do do prep - - - - +mistrzostw mistrzostwo subst gen pl n head:58 (58 +świata świat subst gen sg m3 - (59)|58) +. . interp - - - - + + +#end document /3 +#begin document /4 +Ernest Ernesta subst gen pl f - (0) +i i conj - - - - +Agnieszka Agnieszka subst nom sg f - (1) +nie nie qub - - - - +planowali planować praet - - - - +, , interp - - - - +że że comp - - - - +będą być bedzie - - - - +mieli mieć praet - - - - +wielką wielki adj - - - - +, , interp - - - - +babską babski adj - - - - (2 +rodzinę rodzina subst acc sg f head:2 2) +. . interp - - - - + +Ale ale conj - - - - +tak tak adv - - - - +wyszło wyniść praet - - - - +. . interp - - - - +– – interp - - - - +I i conj - - - - +całe cały adj - - - - (3 +szczęście szczęście subst nom sg n head:3 3) +. . interp - - - - + +Lepiej dobrze adv - - - - +się się qub - - - - +dogaduję dogadywać fin - - - - +z z prep - - - - +dziewczętami dziewczę subst inst pl n - +– – interp - - - - +cieszy cieszyć fin - - - - +się się qub - - - - +Ernest Ernest subst nom sg m1 head:4 (4 +Kwiecień Kwiecień subst nom sg m1 - (5)|4) +. . interp - - - - + + +W w prep - - - - +Wigilię wigilia subst acc sg f - (6) +do do prep - - - - +jego on ppron3 gen sg m1 - (7|(4) +obowiązków obowiązek subst gen pl m3 head:7 7) +, , interp - - - - +poza poza prep - - - - +dostarczeniem dostarczyć ger inst sg n head:8 (8 +choinki choinka subst gen sg f - (9)|8) +, , interp - - - - +należeć należeć inf - - - - +będzie być bedzie - - - - +zmywanie zmywać ger nom sg n - +naczyń naczynie subst gen pl n - (10) +. . interp - - - - + +Agnieszka Agnieszka subst nom sg f - (1) +zrobi zrobić fin - - - - +pierogi pieróg subst acc pl m3 - (11) +, , interp - - - - +ugotuje ugotować fin - - - - +barszcz barszcz subst nom sg m3 - (12) +z z prep - - - - +uszkami uszko subst inst pl n - +, , interp - - - - +usmaży usmażyć fin - - - - +karpia Karp subst acc sg m1 - (13) +. . interp - - - - + +Córki córka subst nom pl f - (14) +upieką upiec fin - - - - +ciasta ciasto subst acc pl n - +. . interp - - - - + +Potem potem adv - - - - +przyjdzie przyjść fin - - - - +czas czas subst acc sg m3 - (15) +na na prep - - - - +prezenty prezent subst acc pl m3 - (16) +. . interp - - - - + +Może może qub - - - - +to to pred - - - - +nawet nawet qub - - - - +będą być bedzie - - - - +empetrójki empetrójki ign - - - - +, , interp - - - - +o o prep - - - - +których który adj - - - - +marzą marzyć fin - - - - +starsze stary adj - - - - +dziewczyny dziewczyna subst acc pl f - +. . interp - - - - + + +Jodełek jodełka subst gen pl f - (17) +sadzimy sadzić fin - - - - +mniej mało adv - - - - + + +Leśniczy leśniczy subst nom sg m1 - (18) +, , interp - - - - +od od prep - - - - +którego który adj - - - - (19 +pan pan subst nom sg m1 head:19 +Ernest Ernesta subst gen pl f - (0)|19) +przywozi przywozić fin - - - - +choinkę choinka subst acc sg f - (9) +, , interp - - - - +mieszka mieszkać fin - - - - +kilka kilka num - - - - (20 +kilometrów kilometr subst gen pl m3 head:20 20) +od od prep - - - - +domu dom subst gen sg m3 head:21 (21 +Kwietniów Kwiecień subst gen pl m1 - (22)|21) +. . interp - - - - + +On on ppron3 nom sg m1 - (19) +także także qub - - - - +nie nie qub - - - - +wyobraża wyobrażać fin - - - - +sobie się siebie dat - - - (23) +świąt święto subst gen pl n - (24) +bez bez prep - - - - +prawdziwego prawdziwy adj - - - - (25 +świerku świerk subst gen sg m3 head:25 25) +. . interp - - - - +– – interp - - - - +I i conj - - - - +musi musić fin - - - - +być być inf - - - - +kiczowaty kiczowaty adj - - - - +– – interp - - - - +uśmiecha uśmiechać fin - - - - +się się qub - - - - +Gabriel Gabriel subst nom sg m1 head:26 (26 +Grobelny Grobelny subst nom sg m1 - (27)|26) +, , interp - - - - +nadleśniczy nadleśniczy subst nom sg m1 head:28 (28 +wałbrzyski wałbrzyski adj - - - - 28) +. . interp - - - - + + +To to subst nom sg n - (29) +znaczy znaczyć fin - - - - +, , interp - - - - +że że comp - - - - +powinny powinien winien - - - - +na na prep - - - - +nim on ppron3 loc sg m3 - (25) +wisieć wisieć inf - - - - +ozdoby ozdoba subst acc pl f - (30) +zrobione zrobić ppas - - - - +przez przez prep - - - - +dzieci dziecko subst acc pl n - (31) +, , interp - - - - +przechowywane przechowywać ppas - - - - +latami rok subst inst pl m3 - (32) +, , interp - - - - +wyciągane wyciągać ppas - - - - +na na prep - - - - +tę ten adj - - - - +jedyną jedyny adj - - - - +okazję okazja subst acc sg f - +. . interp - - - - + + +Pan pan subst nom sg m1 head:19 (19 +Gabriel Gabriel subst nom sg m1 - (26)|19) +ma mieć fin - - - - +dwóch dwa num - - - - (33 +synów syn subst gen pl m1 head:33 +i i conj - - - - +trzy trzy num - - - - (14 +córki córka subst acc pl f head:14 33)|14) +. . interp - - - - + +W w prep - - - - +domu dom subst loc sg m3 - (21) +została zostać praet - - - - +najmłodsza młody adj - - - - +, , interp - - - - +12-letnia 12-letnia ign - - - - +, , interp - - - - +ale ale conj - - - - +na na prep - - - - +święta święto subst acc pl n - +zjadą zjechać fin - - - - +wszyscy wszyscy subst nom pl m1 - (34) +. . interp - - - - + +I i conj - - - - +ubiorą ubrać fin - - - - +choinkę choinka subst acc sg f - (9) +. . interp - - - - +– – interp - - - - +Żona żona subst nom sg f - (35) +rozwiesi rozwiesić fin - - - - +anielskie anielski adj - - - - (36 +włosy włos subst acc pl m3 head:36 36) +, , interp - - - - +ja ja ppron12 nom sg m1 - (37) +podłączę podłączyć fin - - - - +lampki lampka subst acc pl f - (38) +– – interp - - - - +w w prep - - - - +domu dom subst loc sg m3 head:21 (21 +nadleśniczego nadleśniczy subst gen sg m1 - (28)|21) +podział podział subst nom sg m3 head:39 (39 +świątecznych świąteczny adj - - - - (40 +ról rola subst gen pl f head:40 39)|40) +jest być fin - - - - +określony określony adj - - - - +. . interp - - - - + + +W w prep - - - - +dolnośląskich dolnośląski adj - - - - (41 +lasach las subst loc pl m3 head:41 41) +najwięcej najwięcej num - - - - (42) +jest być fin - - - - +świerków świerk subst gen pl m2 - +. . interp - - - - + +Na na prep - - - - +plantacjach plantacja subst loc pl f - +sadzą sadzić fin - - - - +także także qub - - - - +coraz coraz adv - - - - (43 +popularniejsze popularny adj - - - - +jodły jodła subst nom pl f head:43 43) +z z prep - - - - +miękkimi miękki adj - - - - (44 +igłami igła subst inst pl f head:44 44) +. . interp - - - - + + +– – interp - - - - +Ale ale conj - - - - +i i qub - - - - +tych ten adj - - - - (17 +jodełek jodełka subst gen pl f head:17 17) +sadzimy sadzić fin - - - - +już już qub - - - - +mniej mało adv - - - - +. . interp - - - - + +To to qub - - - - +nie nie qub - - - - +lata latać fin - - - - +dziewięćdziesiąte dziewięćdziesiąty adj - - - - +, , interp - - - - +gdy gdy adv - - - - +sprzedawali sprzedawać praet - - - - +śmy być aglt - - - - +prawie prawie qub - - - - +wszystkie wszystek adj - - - - +wyhodowane wyhodować ppas - - - - +drzewka drzewko subst acc pl n - +– – interp - - - - +wspomina wspominać fin - - - - +nadleśniczy nadleśniczy subst nom sg m1 - (28) +. . interp - - - - + + +U u prep - - - - +Grobelnego grobelny adj - - - - +choinkę choinka subst acc sg f - (9) +można można pred - - - - +sobie się siebie dat - - - (45) +wybrać wybrać inf - - - - +. . interp - - - - +– – interp - - - - +Mamy mieć fin - - - - +rodziny rodzina subst acc pl f - (46) +, , interp - - - - +w w prep - - - - +których który adj - - - - +co co comp - - - - +roku rok subst gen sg m3 - (47) +ojciec ojciec subst nom sg m1 - +przyjeżdża przyjeżdżać fin - - - - +z z prep - - - - +synem syn subst inst sg m1 - (48) +, , interp - - - - +by by comp - - - - +samemu sam adj - - - - +ściąć ściąć inf - - - - +drzewko drzewko subst acc sg n - (49) +. . interp - - - - + +Taką taki adj - - - - +mają Maja subst inst sg m1 - (50) +tradycję tradycja subst acc sg f - (51) +– – interp - - - - +dodaje dodawać fin - - - - +pan pan subst nom sg m1 head:19 (19 +Gabriel Gabriel subst nom sg m1 - (26)|19) +. . interp - - - - + + +#end document /4 +#begin document /5 +Cena cena subst nom sg f head:0 (0 +życia życie subst gen sg n - 0) + + +Z z prep - - - - +tego ten adj - - - - (1 +pogromu pogrom subst gen sg m3 head:1 1) +ocalało ocalać praet - - - - +kilkudziesięciu kilkadziesiąt num - - - - (2 +Żydów żyd subst gen pl m1 head:2 2) +, , interp - - - - +a a conj - - - - +wśród wśród prep - - - - +nich on ppron3 gen pl m2 - (3) +rodzina rodzina subst nom sg f - (4) +Mosze Mosze ign - - - - +Sonensona Sonensona ign - - - - +. . interp - - - - + +Przed przed prep - - - - +wojną wojna subst inst sg f - (5) +była być praet - - - - +to to subst acc sg n - (6) +w w prep - - - - +skali skala subst loc sg f - +miasteczka miasteczko subst gen sg n - (7) +rodzina rodzina subst nom sg f head:4 (4 +bogata bogaty adj - - - - 4) +. . interp - - - - + +Sonensonowie Sonensonowie ign - - - - +mieli mieć praet - - - - +garbarnię garbarnia subst acc sg f - (8) +. . interp - - - - + +Nie nie qub - - - - +udało udać praet - - - - +mi ja ppron12 dat sg m1 - (9) +się się qub - - - - +dociec dociec inf - - - - +, , interp - - - - +u u prep - - - - +kogo kto subst gen sg m1 - +mianowicie mianowicie conj - - - - +przechowywali przechowywać praet - - - - +się się qub - - - - +Sonensonowie Sonensonowie ign - - - - +oraz oraz conj - - - - +pozostali pozostały adj - - - - (2 +Żydzi żyd subst nom pl m1 head:2 2) +w w prep - - - - +czasie czas subst loc sg m3 - +okupacji okupacja subst gen sg f head:10 (10 +niemieckiej niemiecki adj - - - - 10) +. . interp - - - - + +Faktem fakt subst inst sg m3 - (11) +pozostaje pozostawać fin - - - - +natomiast natomiast conj - - - - +, , interp - - - - +że że comp - - - - +okupację okupacja subst acc sg f head:10 (10 +tę ten adj - - - - 10) +przeżyli przeżyć praet - - - - +. . interp - - - - + +Faktem fakt subst inst sg m3 head:11 (11 +oczywistym oczywisty adj - - - - 11) +pozostaje pozostawać fin - - - - +i i conj - - - - +to to subst nom sg n - (6) +, , interp - - - - +że że comp - - - - +liczne liczny adj - - - - (12 +rodziny rodzina subst nom pl f head:12 +polskie polski adj - - - - 12) +- - interp - - - - +w w prep - - - - +Ejszyszkach Ejszyszkach ign - - - - +i i conj - - - - +w w prep - - - - +pobliskich pobliski adj - - - - (13 +okolicach okolica subst loc pl f head:13 13) +- - interp - - - - +przechowywały przechowywać praet - - - - +Żydów żyd subst acc pl m1 - (2) +. . interp - - - - + +Parę para subst acc sg f head:14 (14 +kilometrów kilometr subst gen pl m3 - (15)|14) +od od prep - - - - +Ejszyszek Ejszyszek ign - - - - +, , interp - - - - +w w prep - - - - +Korkucianach Korkucianach ign - - - - +( ( interp - - - - +w w prep - - - - +folwarku folwark subst loc sg m3 - (16) +Lebiedniki Lebiedniki ign - - - - +) ) interp - - - - +, , interp - - - - +żołnierz żołnierz subst nom sg m1 head:17 (17 +AK AK subst gen sg f - (18) +Kazimierz Kazimierz subst nom sg m1 - (19)|17) +Korkuć Korkuć ign - - - - +w w prep - - - - +czasie czas subst loc sg m3 - +wojny wojna subst gen sg f - (5) +w w prep - - - - +swoim swój adj - - - - (20 +domu dom subst loc sg m3 head:20 20) +przechowywał przechowywać praet - - - - +28 28 ign - - - - +Żydów żyd subst gen pl m1 - (2) +. . interp - - - - + +Od od prep - - - - +studni studnia subst gen sg f - +do do prep - - - - +piwnic piwnica subst gen pl f head:21 (21 +domu dom subst gen sg m3 - (20)|21) +był być praet - - - - +przekopany przekopać ppas - - - - (22 +tunel tunel subst nom sg m3 head:22 22) +, , interp - - - - +dzięki dzięki prep - - - - +czemu co subst dat sg n - (23) +mieli mieć praet - - - - +wodę woda subst acc sg f - (24) +. . interp - - - - + +Natomiast natomiast conj - - - - +w w prep - - - - +skali skala subst loc sg f - +siatki siatka subst gen sg f head:25 (25 +AK AK subst gen sg f - (18) +Kazimierz Kazimierz subst nom sg m1 - (19)|25) +Korkuć Korkuć ign - - - - +przechowywał przechowywać praet - - - - +około około qub - - - - +70 70 ign - - - - +Żydów żyd subst gen pl m1 - (2) +. . interp - - - - + +Rodzina rodzina subst nom sg f - (4) +Świeczków Świeczków ign - - - - +również również qub - - - - +przechowywała przechowywać praet - - - - +Żydów żyd subst gen pl m1 - +. . interp - - - - + +W w prep - - - - +tamtych tamten adj - - - - (26 +stronach strona subst loc pl f head:26 26) +liczne liczny adj - - - - +rodziny rodzina subst nom pl f - +polskie polski adj - - - - +postępowały postępować praet - - - - +podobnie podobnie adv - - - - +. . interp - - - - + + +Prawdą prawda subst inst sg f - (27) +jest być fin - - - - +również również qub - - - - +i i conj - - - - +to to subst nom sg n - (6) +, , interp - - - - +że że comp - - - - +Żydzi żyd subst nom pl m1 - (2) +za za prep - - - - +swe swój adj - - - - (28 +przechowanie przechowanie subst acc sg n head:28 28) +płacili płacić praet - - - - +. . interp - - - - + +Płacili płacić praet - - - - +za za prep - - - - +utrzymanie utrzymanie subst acc sg n - (29) +i i conj - - - - +chyba chyba qub - - - - +jeszcze jeszcze qub - - - - +- - interp - - - - +za za prep - - - - +ryzyko ryzyko subst acc sg n - +. . interp - - - - + +O o prep - - - - +tym to subst loc sg n - (6) +dzisiaj dzisiaj adv - - - - +raczej raczej qub - - - - +tu tu adv - - - - +się się qub - - - - +nie nie qub - - - - +mówi mówić fin - - - - +, , interp - - - - +ale ale conj - - - - +prawdopodobnie prawdopodobnie adv - - - - +różnie różnie adv - - - - +z z prep - - - - +tym to subst inst sg n - (6) +było być praet - - - - +: : interp - - - - +jedni jeden adj - - - - +za za prep - - - - +pieniądze pieniądz subst acc pl m3 - (30) +, , interp - - - - +inni inny adj - - - - +- - interp - - - - +z z prep - - - - +odruchu odruch subst gen sg m3 head:31 (31 +serca serce subst gen sg n - (32)|31) +. . interp - - - - + +Ryzykowali ryzykować praet - - - - +i i conj - - - - +Polacy Polak subst nom pl m1 - (33) +, , interp - - - - +i i conj - - - - +Żydzi żyd subst nom pl m1 - (2) +. . interp - - - - + +Te ten adj - - - - (34 +rachunki rachunek subst nom pl m3 head:34 34) +mogły móc praet - - - - +wyglądać wyglądać inf - - - - +bardzo bardzo adv - - - - +różnie różnie adv - - - - +. . interp - - - - + + +Mieszkam mieszkać fin - - - - +w w prep - - - - +jednej jeden adj - - - - +z z prep - - - - +podwileńskich podwileński adj - - - - (35 +wsi wieś subst gen pl f head:35 35) +. . interp - - - - + +Otóż otóż qub - - - - +w w prep - - - - +tej ten adj - - - - (36 +mojej mój adj - - - - +wsi wieś subst loc sg f head:36 36) +pewien pewny adj - - - - (37 +gospodarz gospodarz subst nom sg m1 head:37 37) +- - interp - - - - +Polak Polak subst nom sg m1 - (38) +- - interp - - - - +przechowywał przechowywać praet - - - - +w w prep - - - - +czasie czas subst loc sg m3 - +wojny wojna subst gen sg f - (5) +młodą młody adj - - - - (39 +Żydówkę Żydówka subst acc sg f head:39 39) +. . interp - - - - + +Spodobała spodobać praet - - - - +mu on ppron3 dat sg m1 - (38) +się się qub - - - - +, , interp - - - - +z z prep - - - - +czego co subst gen sg n - (23) +wynikł wyniknąć praet - - - - +dramat dramat subst acc sg m3 - +. . interp - - - - + +Zdenerwowana zdenerwować ppas - - - - +żona żona subst nom sg f - +doniosła donieść praet - - - - +na na prep - - - - +policję policja subst acc sg f - (40) +. . interp - - - - + +Aresztowano aresztować imps - - - - +Żydówkę Żydówka subst acc sg f - +razem razem adv - - - - +z z prep - - - - +gospodarzem gospodarz subst inst sg m1 - (37) +, , interp - - - - +przerażona przerażony adj - - - - (41 +kobieta kobieta subst nom sg f head:41 41) +próbowała próbować praet - - - - +ocalić ocalić inf - - - - +męża mąż subst acc sg m1 - (42) +. . interp - - - - + +Zanim zanim comp - - - - +uzbierała uzbierać praet - - - - +potrzebną potrzebny adj - - - - (43 +sumę suma subst acc sg f head:43 43) +na na prep - - - - +łapówkę łapówka subst acc sg f - +, , interp - - - - +było być praet - - - - +już już qub - - - - +za za qub - - - - +późno późno adv - - - - +- - interp - - - - +rozstrzelano rozstrzelać imps - - - - +nie nie conj - - - - (39 +tylko tylko conj - - - - +Żydówkę Żydówka subst acc sg f head:39 +, , interp - - - - +ale ale conj - - - - +i i conj - - - - +gospodarza gospodarz subst acc sg m1 - (37)|39) +. . interp - - - - + +Czy czy qub - - - - +żonę żona subst acc sg f head:44 (44 +tego ten adj - - - - (45 +straceńca straceniec subst gen sg m1 head:45 44)|45) +można można pred - - - - +nazwać nazwać inf - - - - +antysemitką antysemitka subst inst sg f - (46) +? ? interp - - - - + + +#end document /5 +#begin document /8 +- - interp - - - - +150 150 ign - - - - +g gram brev - - - head:0 (0 +owoców owoc subst gen pl m3 - (1)|0) + + +Joanna Joanna subst nom sg f - +Kuc kuca subst gen pl f - (2) + + +( ( interp - - - - +PAI PAI subst nom sg f - +) ) interp - - - - + + +Diety dieta subst nom pl f - +są być fin - - - - +różne różny adj - - - - +, , interp - - - - +ścisłe ścisły adj - - - - +i i conj - - - - +urozmaicone urozmaicić ppas - - - - +, , interp - - - - +eliminacyjne eliminacyjny adj - - - - +, , interp - - - - +dziwne dziwny adj - - - - +, , interp - - - - +racjonalne racjonalny adj - - - - +i i conj - - - - +wreszcie wreszcie qub - - - - +te ten adj - - - - +zalecane zalecać ppas - - - - +przez przez prep - - - - +lekarzy lekarz subst acc pl m1 - (3) +. . interp - - - - + +Dziś dziś adv - - - - +dieta dieta subst nom sg f head:4 (4 +burgundzka burgundzki adj - - - - 4) +. . interp - - - - + + +Jak jak adv - - - - +wygląda wyglądać fin - - - - +przykładowe przykładowy adj - - - - (5 +tygodniowe tygodniowy adj - - - - +menu menu subst nom sg n head:5 5) +? ? interp - - - - + + +1 1 ign - - - - +dzień dzień subst acc sg m3 - + + +-obiad -obiad ign - - - - +: : interp - - - - +pierś pierś subst nom sg f head:6 (6 +kurczaka kurczak subst gen sg m2 - (7)|6) +pieczona piec ppas - - - - +w w prep - - - - +folii folia subst loc sg f - (8) +, , interp - - - - +sałatka sałatka subst nom sg f - (9) +z z prep - - - - +czerwonej czerwony adj - - - - +kapusty kapusta subst gen sg f - +doprawiona doprawić ppas - - - - +octem ocet subst inst sg m3 head:10 (10 +winnym winny adj - - - - +i i conj - - - - +oliwą oliwa subst inst sg f - (11)|10) +, , interp - - - - +pieczone piec ppas - - - - (12 +jabłko jabłko subst nom sg n head:12 12) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +: : interp - - - - +2 2 ign - - - - +sadzone sadzić ppas - - - - (14 +jajka jajko subst nom pl n head:14 14) +, , interp - - - - +pomidor pomidor subst nom sg m2 - (15) +, , interp - - - - +trójkąt trójkąt subst nom sg m3 head:16 (16 +serka serek subst gen sg m3 - (17)|16) +topionego topić ppas - - - - +, , interp - - - - +gruszka Gruszka subst nom sg m1 - (18) + + +2 2 ign - - - - +dzień dzień subst acc sg m3 - (19) + + +- - interp - - - - +obiad obiad subst nom sg m3 - (20) +: : interp - - - - +królik królik subst nom sg m2 - (21) +w w prep - - - - +potrawce potrawka subst loc sg f - (22) +, , interp - - - - +surówka surówka subst nom sg f - (23) +z z prep - - - - +marchewki marchewka subst gen sg f head:24 (24 +i i conj - - - - +chrzanu chrzan subst gen sg m3 - (25)|24) +, , interp - - - - +brzoskwinia brzoskwinia subst nom sg f - (26) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +: : interp - - - - +befsztyk befsztyk subst nom sg m3 - (27) +z z prep - - - - +polędwicy polędwica subst gen sg f - (28) +usmażony usmażyć ppas - - - - +bez bez prep - - - - +tłuszczu tłuszcz subst gen sg m3 - (29) +, , interp - - - - +zielona zielony adj - - - - +sałata sałata subst nom sg f - +z z prep - - - - +sosem sos subst inst sg m3 - (30) +vinegrette vinegrette ign - - - - +, , interp - - - - +2 2 ign - - - - +mandarynki mandarynka subst gen sg f - (31) + + +3 3 ign - - - - +dzień dzień subst acc sg m3 - + + +-obiad -obiad ign - - - - +: : interp - - - - +kura kura subst nom sg f - (32) +z z prep - - - - +rosołu rosół subst gen sg m3 - (33) +, , interp - - - - +gotowana gotowany adj - - - - (34 +kapusta kapusta subst nom sg f head:34 34) +bez bez prep - - - - +zasmażki zasmażka subst gen sg f - (35) +, , interp - - - - +mały mały adj - - - - (36 +grejpfrut grejpfrut subst nom sg m3 head:36 36) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +: : interp - - - - +ryba ryba subst nom sg f - (37) +w w prep - - - - +warzywach warzywo subst loc pl n - (38) +, , interp - - - - +surówka surówka subst nom sg f - (23) +z z prep - - - - +buraczków buraczek subst gen pl m2 - (39) +, , interp - - - - +pomarańcza pomarańcza subst nom sg f - (40) + + +4 4 ign - - - - +dzień dzień subst acc sg m3 - (19) + + +-obiad -obiad ign - - - - +: : interp - - - - +pieczona piec ppas - - - - (41 +wieprzowina wieprzowina subst nom sg f head:41 41) +, , interp - - - - +brokuły brokuł subst nom pl m3 - (42) +ugotowane ugotować ppas - - - - +na na prep - - - - +parze par subst loc sg m3 - (43) +, , interp - - - - +jabłko jabłko subst nom sg n - + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +: : interp - - - - +szklanka szklanka subst nom sg f head:44 (44 +ugotowanego ugotować ppas - - - - (45 +ryżu ryż subst gen sg m3 head:45 44)|45) +zalana zalać ppas - - - - +chudym chudy adj - - - - (46 +mlekiem mleko subst inst sg n head:46 46) +, , interp - - - - +plaster plaster subst nom sg m3 head:47 (47 +białego biały adj - - - - (48 +chudego chudy adj - - - - +sera ser subst gen sg m3 head:48 47)|48) +, , interp - - - - +kiść kiść subst nom sg f - +winogron winogrono subst gen pl n - (49) + + +5 5 ign - - - - +dzień dzień subst acc sg m3 - (19) + + +- - interp - - - - +obiad obiad subst nom sg m3 - (20) +– – interp - - - - +sztuka sztuka subst nom sg f head:50 (50 +mięsa mięso subst gen sg n - (51)|50) +, , interp - - - - +gotowane gotowany adj - - - - (39 +buraczki buraczek subst nom pl m2 head:39 39) +, , interp - - - - +kilka kilka num - - - - (52 +suszonych suszyć ppas - - - - +śliwek śliwka subst gen pl f head:52 52) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +- - interp - - - - +2 2 ign - - - - +jajka jajko subst nom pl n - (14) +na na prep - - - - +miękko miękko adv - - - - +, , interp - - - - +serka serek subst gen sg m3 head:17 (17 +brie brie subst gen sg m3 - (53)|17) +, , interp - - - - +banan banan subst nom sg m3 - (54) + + +6 6 ign - - - - +dzień dzień subst acc sg m3 - (19) + + +- - interp - - - - +obiad obiad subst nom sg m3 - (20) +: : interp - - - - +ryba ryba subst nom sg f - (37) +pieczona piec ppas - - - - +w w prep - - - - +folii folia subst loc sg f - (8) +, , interp - - - - +surówka surówka subst nom sg f - (23) +z z prep - - - - +białej biały adj - - - - (34 +kapusty kapusta subst gen sg f head:34 +i i conj - - - - +marchewki marchewka subst gen sg f - (24)|34) +, , interp - - - - +gruszka Gruszka subst nom sg m1 - +w w prep - - - - +sosie sos subst loc sg m3 head:30 (30 +waniliowym waniliowy adj - - - - 30) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +: : interp - - - - +pieczeń pieczeń subst nom sg f - (55) +z z prep - - - - +królika królik subst gen sg m2 - (21) +, , interp - - - - +sałatka sałatka subst nom sg f - (9) +z z prep - - - - +pomidorów pomidor subst gen pl m2 head:56 (56 +i i conj - - - - +papryki papryka subst gen sg f - (57)|56) +, , interp - - - - +trójkąt trójkąt subst nom sg m3 head:16 (16 +serka serek subst gen sg m3 - (17)|16) +topionego topić ppas - - - - +, , interp - - - - +kiwi kiwi subst nom sg n - (58) + + +7 7 ign - - - - +dzień dzień subst acc sg m3 - (19) + + +- - interp - - - - +obiad obiad subst nom sg m3 - (20) +: : interp - - - - +cielęcina cielęcina subst nom sg f - (59) +pieczona piec ppas - - - - +z z prep - - - - +dodatkiem dodatek subst inst sg m3 - +ziół zioło subst gen pl n - (60) +, , interp - - - - +fasolka fasolka subst nom sg f head:61 (61 +szparagowa szparagowy adj - - - - 61) +z z prep - - - - +odrobiną odrobina subst inst sg f head:62 (62 +masła masło subst gen sg n - (63)|62) +, , interp - - - - +jogurt jogurt subst nom sg m3 - (64) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (13) +: : interp - - - - +zapiekanka zapiekanka subst nom sg f - (65) +z z prep - - - - +ziemniaków ziemniak subst gen pl m2 - (66) +, , interp - - - - +odrobiny odrobina subst nom pl f - (67) +startego zetrzeć ppas - - - - +żółtego żółty adj - - - - (48 +sera ser subst gen sg m3 head:48 +i i conj - - - - +szynki szynka subst gen sg f - (68)|48) +, , interp - - - - +sałata sałata subst nom sg f head:69 (69 +zielona zielony adj - - - - 69) +z z prep - - - - +rzodkiewkami rzodkiewka subst inst pl f - (70) +, , interp - - - - +pomarańcza pomarańcza subst nom sg f - + + +#end document /8 +#begin document /9 +Ogród ogród subst nom sg m3 - (0) +na na prep - - - - +. . interp - - - - +. . interp - - - - +. . interp - - - - +balkonie balkon subst voc sg m3 - (1) +Pani pani subst gen sg f - +Stanisławie Stanisława subst loc sg f - (2 +Budkiewicz Budkiewicz ign - - - - 2) +z z prep - - - - +ul ulica brev - - - - +. . interp - - - - + +Piłsudskiego Piłsudski subst gen sg m1 - +na na prep - - - - +powierzchni powierzchnia subst loc sg f - (3) +niewiele niewiele num - - - - (4) +przekraczającej przekraczać pact - - - - +półtora półtora num - - - - (5 +metra metr subst gen sg m3 head:5 5) +kwadratowego kwadratowy adj - - - - +udało udać praet - - - - +się się qub - - - - +" " interp - - - - +upchnąć upchnąć inf - - - - +" " interp - - - - +aż aż qub - - - - +15 15 ign - - - - +skrzynek skrzynka subst gen pl f - (6) +z z prep - - - - +kwiatami kwiat subst inst pl m3 - (7) +i i conj - - - - +kilka kilka num - - - - (8 +doniczek doniczka subst gen pl f head:8 8) +. . interp - - - - +- - interp - - - - +Utrzymanie utrzymać ger nom sg n head:9 (9 +takiego taki adj - - - - (1 +balkonu balkon subst gen sg m3 head:1 9)|1) +wymaga wymagać fin - - - - +wiele wiele num - - - - (10 +pracy praca subst gen sg f head:10 +i i conj - - - - +serca serce subst gen sg n - (11)|10) +- - interp - - - - +przyznaje przyznawać fin - - - - +S syn brev - - - - +. . interp - - - - + +Budkiewicz Budkiewicz ign - - - - +, , interp - - - - +której który adj - - - - +przy przy prep - - - - +kwiatach kwiat subst loc pl m3 - (7) +pomaga pomagać fin - - - - +córka córka subst nom sg f - (12) +- - interp - - - - +Grażyna Grażyna subst nom sg f head:13 (13 +Stańczyk stańczyk subst nom sg m1 - (14)|13) + + +- - interp - - - - +Nie nie qub - - - - +mamy mieć fin - - - - +własnego własny adj - - - - (15 +ogródka ogródek subst gen sg m3 head:15 15) +, , interp - - - - +a a conj - - - - +bardzo bardzo adv - - - - +kochamy kochać fin - - - - +kwiaty kwiat subst acc pl m3 - +. . interp - - - - + +Dlatego dlatego adv - - - - +każdą każdy adj - - - - (16 +wolną wolny adj - - - - +chwilę chwila subst acc sg f head:16 16) +poświęcamy poświęcać fin - - - - +balkonowi balkon subst dat sg m3 - +. . interp - - - - + +Nasiona nasiono subst nom pl n - (17) +wysiewane wysiewać ppas - - - - +są być fin - - - - +już już qub - - - - +na na prep - - - - +przełomie przełom subst loc sg m3 - +lutego luty subst gen sg m3 head:18 (18 +i i conj - - - - +marca marzec subst gen sg m3 - (19)|18) +. . interp - - - - + +Później późno adv - - - - +wyrastające wyrastać pact - - - - +z z prep - - - - +nich on ppron3 gen pl m2 - (20) +roślinki roślinka subst gen sg f - (21) +pikuje pikować fin - - - - +się się qub - - - - +do do prep - - - - +skrzynek skrzynka subst gen pl f - (6) +. . interp - - - - +- - interp - - - - +Skrzynki skrzynka subst acc pl f - (6) +wystawiamy wystawiać fin - - - - +na na prep - - - - +balkon balkon subst acc sg m3 - (1) +dopiero dopiero qub - - - - +w w prep - - - - +maju maj subst loc sg m3 head:22 (22|(23) +- - interp - - - head:22 +czerwcu czerwiec subst loc sg m3 head:22 (24)|22) +- - interp - - - - +wyjaśnia wyjaśniać fin - - - - +G gram brev - - - - +. . interp - - - - + +Stańczyk stańczyk subst nom sg m1 - (14) +. . interp - - - - +- - interp - - - - +Bardzo bardzo adv - - - - +rzadko rzadko adv - - - - +używamy używać fin - - - - +gotowych gotowy adj - - - - (25 +sadzonek sadzonka subst gen pl f head:25 25) +ze z prep - - - - +sklepu sklep subst gen sg m3 - (26) +, , interp - - - - +bo bo comp - - - - +nic nic subst gen sg n - (27) +nie nie qub - - - - +sprawia sprawiać fin - - - - +takiej taki adj - - - - (28 +przyjemności przyjemność subst gen sg f head:28 28) +jak jak adv - - - - +wyhodowanie wyhodować ger nom sg n head:29 (29 +kwiatka kwiatek subst gen sg m3 - (30)|29) +od od prep - - - - +nasionka nasionko subst gen sg n - (31) +. . interp - - - - + +O o prep - - - - +tej ten adj - - - - (32 +porze pora subst loc sg f head:32 32) +roku rok subst gen sg m3 head:33 (33 +najwięcej najwięcej num - - - - 33) +pracy praca subst gen sg f - (10) +jest być fin - - - - +przy przy prep - - - - +podlewaniu podlewać ger loc sg n head:34 (34 +roślin roślina subst gen pl f - (35) +i i conj - - - - +zasilaniu zasilać ger loc sg n - (36)|34) +ich on ppron3 gen pl m2 - (7 +odpowiednimi odpowiedni adj - - - - +nawozami nawóz subst inst pl m3 head:7 7) +. . interp - - - - + +Na na prep - - - - +balkonie balkon subst loc sg m3 head:1 (1 +pani pani subst gen sg f - (37) +Stanisławy Stanisława subst gen sg f - (38)|1) +rosną rosnąć fin - - - - +: : interp - - - - +surfinie surfinia subst nom pl f - (39) +, , interp - - - - +petunie petunia subst nom pl f - (40) +, , interp - - - - +gardenie gardenia subst nom pl f - (41) +, , interp - - - - +aksamitki aksamitka subst nom pl f - (42) +, , interp - - - - +przypołudniki przypołudnik subst nom pl m3 - (43) +, , interp - - - - +groszek groszek subst nom sg m3 head:44 (44 +pachnący pachnący adj - - - - 44) +, , interp - - - - +kabea kabea ign - - - - +, , interp - - - - +nemezje nemezje ign - - - - +i i conj - - - - +werbeny werbena subst gen sg f - (45) +. . interp - - - - + +W w prep - - - - +sumie suma subst loc sg f - +kilkadziesiąt kilkadziesiąt num - - - - (7 +kwiatów kwiat subst gen pl m3 head:7 7) +, , interp - - - - +z z prep - - - - +których który adj - - - - +każdy każdy adj - - - - +kwitnie kwitnąć fin - - - - +w w prep - - - - +innym inny adj - - - - (46 +kolorze kolor subst loc sg m3 head:46 46) +i i conj - - - - +w w prep - - - - +różnym różny adj - - - - (47 +czasie czas subst loc sg m3 head:47 47) +. . interp - - - - + +Efekt efekt subst nom sg m3 - (48) +? ? interp - - - - + +Ogród ogród subst nom sg m3 - (0) +na na prep - - - - +balkonie balkon subst loc sg m3 - (1) +cieszy cieszyć fin - - - - +oczy oko subst acc pl n head:49 (49 +właścicielki właścicielka subst gen sg f head:50 (50 +i i conj - - - - +przechodniów przechodzień subst gen pl m1 - (51)|49)|50) +aż aż qub - - - - +do do prep - - - - +późnej późny adj - - - - (52 +jesieni jesień subst gen sg f head:52 52) +. . interp - - - - + +Ozdabianie ozdabiać ger nom sg n head:53 (53 +balkonów balkon subst gen pl m3 - (54)|53) +kwiatami kwiat subst inst pl m3 - (7) +staje stawać fin - - - - +się się qub - - - - +w w prep - - - - +Brzezinach Brzeziny subst loc pl n - (55) +coraz coraz adv - - - - +popularniejsze popularny adj - - - - +- - interp - - - - +Teraz teraz adv - - - - +jest być fin - - - - +za za qub - - - - +późno późno adv - - - - +, , interp - - - - +by by comp - - - - +samemu sam adj - - - - +uprawiać uprawiać inf - - - - +rośliny roślina subst acc pl f head:35 (35 +balkonowe balkonowy adj - - - - 35) +- - interp - - - - +mówi mówić fin - - - - +Bożenna Bożenna subst nom sg f head:56 (56 +Kolasa kolasa subst nom sg f - (57)|56) +ze z prep - - - - +sklepu sklep subst gen sg m3 head:26 (26 +ogrodniczego ogrodniczy adj - - - - 26) +. . interp - - - - +- - interp - - - - +Jednak jednak conj - - - - +wciąż wciąż adv - - - - +można można pred - - - - +kupić kupić inf - - - - +kwiaty kwiat subst acc pl m3 head:7 (7 +gotowe gotowy adj - - - - 7) +do do prep - - - - +wystawienia wystawić ger gen sg n - (58) +na na prep - - - - +balkon balkon subst acc sg m3 - (1) +. . interp - - - - + +Najpopularniejsze popularny adj - - - - +są być fin - - - - +petunie petunia subst nom pl f - (40) +, , interp - - - - +surfinie surfinia subst nom pl f - (39) +, , interp - - - - +oraz oraz conj - - - - +czerwone czerwony adj - - - - +, , interp - - - - +białe biały adj - - - - (59 +i i conj - - - - +różowe różowy adj - - - - +pelargonie pelargonia subst nom pl f head:59 59) +. . interp - - - - + +Modnym modny adj - - - - +kwiatem kwiat subst inst sg m3 - +jest być fin - - - - +też też qub - - - - +datura datura subst nom sg f head:60 (60 +- - interp - - - head:60 +bieluń bieluń subst nom sg m3 head:60 60) +. . interp - - - - + +Klientom klient subst dat pl m1 - (61) +nie nie qub - - - - +przeszkadza przeszkadzać fin - - - - +, , interp - - - - +że że comp - - - - +jest być fin - - - - +to to pred - - - - +kwiat kwiat subst nom sg m3 head:62 (62 +trujący trujący adj - - - - 62) +. . interp - - - - +tekst tekst subst nom sg m3 - +i i conj - - - - +fot fot subst nom sg m3 - (63) +. . interp - - - - +grzegorz Grzegorz subst nom sg m1 head:64 (64 +kozieł kozieł subst nom sg m2 - (65)|64) + + +#end document /9 diff --git b/src/test/resources/teksty_semeval/all_golden.txt a/src/test/resources/teksty_semeval/all_golden.txt new file mode 100755 index 0000000..411c4ad --- /dev/null +++ a/src/test/resources/teksty_semeval/all_golden.txt @@ -0,0 +1,2475 @@ +#begin document /0 +W w prep - - - - +spotkaniu spotkanie subst loc sg n - (0) +weźmie wziąć fin - - - - +udział udział subst acc sg m3 - (1) +blisko blisko adv - - - - +7 7 ign - - - - +tysięcy tysiąc subst gen pl m3 head:2 (2 +braci brat subst gen pl m1 - (3)|2) +z z prep - - - - +całej cały adj - - - - (4 +Europy Europa subst gen sg f head:4 4) +, , interp - - - - +ale ale conj - - - - +tylko tylko qub - - - - +206 206 ign - - - - +z z prep - - - - +nich on ppron3 gen pl m2 - (5) +będzie być bedzie - - - - +ubiegało ubiegać praet - - - - +się się qub - - - - +o o prep - - - - +tytuł tytuł subst acc sg m3 head:6 (6 +Europejskiego europejski adj - - - - +Króla król subst gen sg m1 - +Kurkowego kurkowy adj - - - - 6) +. . interp - - - - +- - interp - - - - +Wezmę wziąć fin - - - - +udział udział subst acc sg m3 - (1) +w w prep - - - - +strzelaniu strzelanie subst loc sg n - (7) +, , interp - - - - +choć choć comp - - - - +moje mój adj - - - - (8 +szanse szansa subst nom pl f head:8 8) +są być fin - - - - +marne marny adj - - - - +. . interp - - - - + +Wynika wynikać fin - - - - +to to subst nom sg n - (9) +przede przed prep - - - - +wszystkim wszystko subst inst sg n - +z z prep - - - - +moich mój adj - - - - (10 +obowiązków obowiązek subst gen pl m3 head:10 +gospodarza gospodarz subst gen sg m1 - (11) +spotkań spotkanie subst gen pl n - (12)|10) +; ; interp - - - - +w w prep - - - - +tym ten adj - - - - (13 +nawale nawał subst loc sg m3 head:13 +pracy praca subst gen sg f - 13) +ciężko ciężko adv - - - - +mi ja ppron12 dat sg m1 - (14) +będzie być bedzie - - - - +się się qub - - - - +skupić skupić inf - - - - +na na prep - - - - +strzelaniu strzelanie subst loc sg n - (7) +- - interp - - - - +przewiduje przewidywać fin - - - - +Zdzisław Zdzisław subst nom sg m1 head:15 (15 +Maj Maj subst nom sg m1 - (16)|15) +, , interp - - - - +prezes prezes subst nom sg m1 head:17 (17 +krakowskiego krakowski adj - - - - (18 +Bractwa bractwo subst gen sg n head:18 +Kurkowego kurkowy adj - - - - 17)|18) +, , interp - - - - +panujący panujący adj - - - - (19 +Król król subst nom sg m1 head:19 (20 +Kurkowy kurkowy adj - - - - 19)|20) +. . interp - - - - + + +Strzelanie strzelanie subst nom sg n - +o o prep - - - - +tytuł tytuł subst acc sg m3 head:6 (6 +Europejskiego europejski adj - - - - (19 +Króla król subst gen sg m1 head:19 +Kurkowego kurkowy adj - - - - 6)|19) +będzie być bedzie - - - - +się się qub - - - - +odbywało odbywać praet - - - - +w w prep - - - - +kilku kilka num - - - - (21 +etapach etap subst loc pl m3 head:21 21) +. . interp - - - - + +Do do prep - - - - +finału finał subst gen sg m3 - (22) +zostanie zostać fin - - - - +dopuszczonych dopuścić ppas - - - - +27 27 ign - - - - +braci brat subst gen pl m1 - +- - interp - - - - +jeden jeden adj - - - - +z z prep - - - - +nich on ppron3 gen pl m2 - (5) +otrzyma otrzymać fin - - - - +tytuł tytuł subst acc sg m3 head:6 (6 +Europejskiego europejski adj - - - - (19 +Króla król subst gen sg m1 head:19 +Kurkowego kurkowy adj - - - - 6)|19) +odbierając odbierać pcon - - - - +go on ppron3 acc sg m1 - (19) +obecnie obecnie adv - - - - +panującemu panujący adj - - - - +Wilfriedowi Wilfried subst dat sg m1 - (23 +Stammermannowi Stammermannowi ign - - - - 23) +. . interp - - - - +- - interp - - - - +Król król subst nom sg m1 - (19) +nie nie qub - - - - +otrzymuje otrzymywać fin - - - - +żadnych żaden adj - - - - (24 +nagród nagroda subst gen pl f head:24 +finansowych finansowy adj - - - - 24) +, , interp - - - - +ale ale conj - - - - +taki taki adj - - - - (6 +tytuł tytuł subst nom sg m3 head:6 6) +jest być fin - - - - +ogromnym ogromny adj - - - - (25 +zaszczytem zaszczyt subst inst sg m3 head:25 25) +; ; interp - - - - +król król subst nom sg m1 - (19) +jest być fin - - - - +np na przykład brev - - - - +. . interp - - - - +zapraszany zapraszać ppas - - - - +na na prep - - - - +posiedzenia posiedzenie subst acc pl n head:26 (26 +Parlamentu parlament subst gen sg m3 head:27 (27 +Europejskiego europejski adj - - - - 26)|27) +- - interp - - - - +mówi mówić fin - - - - +Zdzisław Zdzisław subst nom sg m1 head:15 (15 +Maj Maj subst nom sg m1 - (16)|15) +. . interp - - - - + + +Największą wielki adj - - - - +atrakcją atrakcja subst inst sg f - +12 12 ign - - - - +. . interp - - - - + +Europejskich europejski adj - - - - (12 +Spotkań spotkanie subst gen pl n head:12 +Bractw bractwo subst gen pl n head:28 (28 +Strzeleckich strzelecki adj - - - - 12)|28) +będzie być bedzie - - - - +wielka wielki adj - - - - (29 +parada parada subst nom sg f head:29 29) +, , interp - - - - +która który adj - - - - +rozpocznie rozpocząć fin - - - - +się się qub - - - - +w w prep - - - - +niedzielę niedziela subst acc sg f - +o o prep - - - - +godz godzina brev - - - head:30 (30 +. . interp - - - head:30 30) +13 13 ign - - - - +. . interp - - - - + +Kilkuset kilkaset num - - - - (3 +braci brat subst gen pl m1 head:3 3) +w w prep - - - - +historycznych historyczny adj - - - - (31 +strojach strój subst loc pl m3 head:31 31) +przejdzie przejść fin - - - - +z z prep - - - - +Błoń błonie subst gen pl n - (32) +na na prep - - - - +Rynek rynek subst acc sg m3 - (33) +ulicami ulica subst inst pl f - +: : interp - - - - +Piłsudskiego Piłsudski subst gen sg m1 - (34) +, , interp - - - - +Straszewskiego Straszewski subst gen sg m1 - (35) +, , interp - - - - +Franciszkańską franciszkański adj - - - - +i i conj - - - - +Grodzką grodzki adj - - - - +. . interp - - - - + + +Początki początek subst nom pl m3 - +istnienia istnieć ger gen sg n - (9) +Bractwa bractwo subst gen sg n head:18 (36|(18 +Kurkowego kurkowy adj - - - - 18) +w w prep - - - - +Krakowie Kraków subst loc sg m3 - (37)|36) +sięgają sięgać fin - - - - +XIII XIII ign - - - - (38 +wieku wiek subst loc sg m3 - 38) +. . interp - - - - + +Skupiało skupiać praet - - - - +ono on ppron3 nom sg n - (18) +znamienitych znamienity adj - - - - (39 +obywateli obywatel subst gen pl m1 head:39 39) +, , interp - - - - +kupców kupiec subst gen pl m1 head:40 (40 +i i conj - - - - +rzemieślników rzemieślnik subst gen pl m1 head:41 (41 +pragnących pragnący adj - - - - 40)|41) +wspomóc wspomóc inf - - - - +obronność obronność subst acc sg f head:42 (42 +miasta miasto subst gen sg n - 42) +. . interp - - - - + +Wielkim wielki adj - - - - (43 +świętem święto subst inst sg n head:43 +bractwa bractwo subst gen sg n - 43) +był być praet - - - - +turniej turniej subst nom sg m3 - (44) +, , interp - - - - +który który adj - - - - +odbywał odbywać praet - - - - +się się qub - - - - +na na prep - - - - +strzelnicy strzelnica subst loc sg f - (45) +zwanej zwać ppas - - - - +Celestatem Celestatem ign - - - - +. . interp - - - - + +Zawody zawody subst nom pl n - +trwały trwać praet - - - - +zwykle zwykle adv - - - - +trzy trzy num - - - - (46 +dni dzień subst acc pl m3 head:46 46) +. . interp - - - - + +Strzelano strzelać imps - - - - +do do prep - - - - +drewnianego drewniany adj - - - - +kura kur subst gen sg m2 - +umocowanego umocowany adj - - - - +na na prep - - - - +wysokiej wysoki adj - - - - (47 +żerdzi żerdź subst loc sg f head:47 47) +. . interp - - - - + +Brat brat subst nom sg m1 - (48) +, , interp - - - - +który który adj - - - - +zdołał zdołać praet - - - - +celnym celny adj - - - - (49 +strzałem strzał subst inst sg m3 head:49 49) +strącić strącić inf - - - - +ostatni ostatni adj - - - - (50 +jego on ppron3 gen sg m1 - (48) +fragment fragment subst acc sg m3 head:50 50) +zdobywał zdobywać praet - - - - +miano miano subst acc sg n head:51 (51 +Króla król subst gen sg m1 - +Kurkowego kurkowy adj - - - - 51) +. . interp - - - - + +Z z prep - - - - +tym ten adj - - - - (6 +tytułem tytuł subst inst sg m3 head:6 6) +wiązały wiązać praet - - - - +się się qub - - - - +nie nie conj - - - - (52 +tylko tylko conj - - - - +honory Honory subst nom sg m1 head:52 +, , interp - - - - +ale ale conj - - - - +także także qub - - - - +przywileje przywilej subst nom pl m3 - (53)|52) +: : interp - - - - +Rada rada subst nom sg f head:54 (54 +Miejska miejski adj - - - - 54) +zwalniała zwalniać praet - - - - +jego on ppron3 gen sg m1 - (52|(52) +posiadacza posiadacz subst gen sg m1 head:52 52) +m męski brev - - - - +. . interp - - - - +in inny brev - - - - +. . interp - - - - +z z prep - - - - +obowiązku obowiązek subst gen sg m3 head:55 (55 +płacenia płacić ger gen sg n - +podatków podatek subst gen pl m3 - (56)|55) +( ( interp - - - - +ten ten adj - - - - (57 +zwyczaj zwyczaj subst nom sg m3 head:57 57) +utrzymał utrzymać praet - - - - +się się qub - - - - +do do prep - - - - +dziś dziś subst gen sg n - (58) +) ) interp - - - - +. . interp - - - - + + +#end document /0 +#begin document /1 +Z z prep - - - - +kolei koleja subst gen sg f - +we w prep - - - - +Wrocławiu Wrocław subst loc sg m3 - (0) +płace płaca subst nom pl f head:1 (1 +kontrolerów kontroler subst gen pl m3 - (2)|1) +zostały zostać praet - - - - +zupełnie zupełnie adv - - - - +uniezależnione uniezależnić ppas - - - - +od od prep - - - - +liczby liczba subst gen sg f - +wystawionych wystawić ppas - - - - (3 +wezwań wezwanie subst gen pl n head:3 3) +do do prep - - - - +zapłaty zapłata subst gen sg f - (4) +. . interp - - - - + + +- - interp - - - - +Nie nie qub - - - - +oznacza oznaczać fin - - - - +to to subst nom sg n - +jednak jednak conj - - - - +, , interp - - - - +że że comp - - - - +nie nie qub - - - - +nagradzamy nagradzać fin - - - - +najskuteczniejszych skuteczny adj - - - - (5 +pracowników pracownik subst gen pl m1 head:5 5) +. . interp - - - - + +Kilka kilka num - - - - (6 +razy raz subst gen pl m3 head:6 6) +w w prep - - - - +roku rok subst loc sg m3 - +przyznawane przyznawać ppas - - - - +są być fin - - - - +premie premia subst nom pl f - (7) +. . interp - - - - + +Bierzemy brać fin - - - - +wtedy wtedy adv - - - - +pod pod prep - - - - +uwagę uwaga subst acc sg f head:8 (8 +skuteczność skuteczność subst acc sg f - (9) +i i conj - - - - +ewentualne ewentualny adj - - - - (10 +skargi skarga subst acc pl f head:10 +pasażerów pasażer subst gen pl m1 - (11)|8)|10) +- - interp - - - - +wyjaśnia wyjaśniać fin - - - - +Monika Monika subst nom sg f head:12 (12 +Poważna poważny adj - - - - 12) +, , interp - - - - +kierownik kierownik subst nom sg m1 head:13 (13 +Wydziału wydział subst gen sg m3 - (14|(15) +Transportu transport subst gen sg m3 - 14) +wrocławskiego wrocławski adj - - - - +Urzędu urząd subst gen sg m3 - (16 +Miasta miasto subst gen sg n - (17)|13)|16) +. . interp - - - - + + +Tamtejsi tamtejszy adj - - - - (18 +kontrolerzy kontroler subst nom pl m1 head:18 18) +zarabiają zarabiać fin - - - - +( ( interp - - - - +bez bez prep - - - - +premii premia subst gen sg f - +) ) interp - - - - +około około qub - - - - +1200 1200 ign - - - - +złotych złoty subst gen pl m2 - (19) +miesięcznie miesięcznie adv - - - - +( ( interp - - - - +netto netto adj - - - - +) ) interp - - - - +. . interp - - - - + + +Miasto miasto subst nom sg n - (17) +postanowiło postanowić praet - - - - +za za prep - - - - +jednym jeden adj - - - - +zamachem zamach subst inst sg m3 - +trzy trzy num - - - - (20 +spółki spółka subst acc pl f head:20 20) +połączyć połączyć inf - - - - +w w prep - - - - +jedną jeden adj - - - - +. . interp - - - - + +Przygotowany przygotować ppas - - - - +jest być fin - - - - +projekt projekt subst nom sg m3 head:21 (21 +uchwały uchwała subst gen sg f - 21) +, , interp - - - - +który który adj - - - - +przewiduje przewidywać fin - - - - +wniesienie wnieść ger acc sg n head:22 (22 +udziałów udział subst gen pl m3 - (23)|22) +w w prep - - - - +Towarzystwie towarzystwo subst loc sg n head:24 (24 +Budownictwa budownictwo subst gen sg n head:25 (25 +Społecznego społeczny adj - - - - 25) +" " interp - - - - (26 +Wielkopolska Wielkopolska subst nom sg f - +" " interp - - - - 24)|26) +oraz oraz conj - - - - +Towarzystwie towarzystwo subst loc sg n head:24 (24|(24 +Budownictwa budownictwo subst gen sg n head:25 (25 +Społecznego społeczny adj - - - - 24)|25) +" " interp - - - - (27 +Nasz nasz adj - - - - +Dom dom subst nom sg m3 head:27 +" " interp - - - - 24)|27) +do do prep - - - - +Poznańskiego poznański adj - - - - (24 +Towarzystwa towarzystwo subst gen sg n head:24 +Budownictwa budownictwo subst gen sg n head:25 (25 +Społecznego społeczny adj - - - - 24)|25) +. . interp - - - - + +W w prep - - - - +piątek piątek subst acc sg m3 - (28) +opiniować opiniować inf - - - - +tę ten adj - - - - (29 +propozycję propozycja subst acc sg f head:29 29) +będzie być bedzie - - - - +Komisja komisja subst nom sg f head:30 (31|(30 +Gospodarki gospodarka subst gen sg f head:32 (32 +Komunalnej komunalny adj - - - - 30)|32) +i i conj - - - - +Polityki polityka subst gen sg f head:33 (33 +Mieszkaniowej mieszkaniowy adj - - - - 31)|33) +, , interp - - - - +a a conj - - - - +we w prep - - - - +wtorek wtorek subst acc sg m3 - (34) +zajmie zająć fin - - - - +się się qub - - - - +nią on ppron3 inst sg f - (33) +Rada rada subst nom sg f head:35 (35 +Miasta miasto subst gen sg n - 35) +. . interp - - - - + + +- - interp - - - - +Pomysł pomysł subst nom sg m3 head:21 (21 +połączenia połączenie subst gen sg n - (36)|21) +TBS-ów TBS-ów ign - - - - +nie nie qub - - - - +budzi budzić fin - - - - +wątpliwości wątpliwość subst gen pl f - (37) +z z prep - - - - +punktu punkt subst gen sg m3 - +widzenia widzenie subst gen sg n - +racjonalizacji racjonalizacja subst gen sg f - +kosztów koszt subst gen pl m3 - (38) +- - interp - - - - +twierdzi twierdzić fin - - - - +Tomasz Tomasz subst nom sg m1 head:39 (39 +Lewandowski Lewandowski subst nom sg m1 - (40)|39) +, , interp - - - - +radny radny subst nom sg m1 head:41 (41 +LiD Lida subst gen pl f - (42) +i i conj - - - - +członek członek subst nom sg m1 head:43 (43 +komisji komisja subst gen sg f - (30)|41)|43) +. . interp - - - - +- - interp - - - - +Potrzebna potrzebny adj - - - - +jest być fin - - - - +jednak jednak conj - - - - +dyskusja dyskusja subst nom sg f - (35) +o o prep - - - - +przyszłości przyszłość subst loc sg f head:44 (44 +towarzystw towarzystwo subst gen pl n - (45)|44) +. . interp - - - - + +Obecnie obecnie adv - - - - +rząd rząd subst nom sg m3 - (46) +pracuje pracować fin - - - - +nad nad prep - - - - +zmianą zmiana subst inst sg f head:47 (47 +ustawy ustawa subst gen sg f - (48)|47) +, , interp - - - - +która który adj - - - - +przewiduje przewidywać fin - - - - +wykup wykup subst acc sg m3 - +mieszkań mieszkanie subst gen pl n - (49) +w w prep - - - - +towarzystwach towarzystwo subst loc pl n head:45 (45 +budownictwa budownictwo subst gen sg n head:25 (25 +społecznego społeczny adj - - - - 45)|25) +. . interp - - - - + +To to subst nom sg n - +stworzy stworzyć fin - - - - +zupełnie zupełnie adv - - - - (50 +nową nowy adj - - - - +sytuację sytuacja subst acc sg f head:50 50) +. . interp - - - - + +W w prep - - - - +związku związek subst loc sg m3 - +z z prep - - - - +tym to subst inst sg n - +konieczne konieczny adj - - - - +będzie być bedzie - - - - +podjęcie podjąć ger nom sg n head:51 (51 +odpowiednich odpowiedni adj - - - - (52 +kroków krok subst gen pl m3 head:52 51)|52) +przez przez prep - - - - +miasto miasto subst acc sg n - (17) +. . interp - - - - + + +Norbert Norbert subst nom sg m1 - (53 +Napieraj napierać impt - - - - 53) +, , interp - - - - +szef szef subst nom sg m1 head:13 (13 +klubu klub subst gen sg m3 - (54)|13) +radnych radny adj - - - - +PiS PiS subst gen sg f - (55) +również również qub - - - - +uważa uważać fin - - - - +, , interp - - - - +że że comp - - - - +ze z prep - - - - +względów wzgląd subst gen pl m3 head:56 (56 +ekonomicznych ekonomiczny adj - - - - 56) +utworzenie utworzyć ger acc sg n head:57 (57 +jednej jeden adj - - - - (58 +spółki spółka subst gen sg f head:58 57)|58) +jest być fin - - - - +zasadne zasadny adj - - - - +. . interp - - - - + + +- - interp - - - - +Na na prep - - - - +razie raz subst loc sg m3 - +jest być fin - - - - +to to pred - - - - +jednak jednak conj - - - - +luźny luźny adj - - - - +pomysł pomysł subst nom sg m3 - +. . interp - - - - + +Nie nie qub - - - - +ma mieć fin - - - - +konkretów konkret subst gen pl m3 - +- - interp - - - - +dodaje dodawać fin - - - - +N nowy brev - - - - +. . interp - - - - + +Napieraj napierać impt - - - - +. . interp - - - - +- - interp - - - - +Nasz nasz adj - - - - (54 +klub klub subst nom sg m3 head:54 54) +jeszcze jeszcze qub - - - - +nie nie qub - - - - +wypracował wypracować praet - - - - +w w prep - - - - +sprawie sprawa subst loc sg f - +tej ten adj - - - - (59 +uchwały uchwała subst gen sg f head:59 +stanowiska stanowisko subst gen sg n - 59) +. . interp - - - - + + +#end document /1 +#begin document /2 +Potem potem adv - - - - +znalazł znaleźć praet - - - - +zatrudnienie zatrudnienie subst acc sg n - (0) +w w prep - - - - +Fundacji fundacja subst loc sg f - (1|(2) +Europejskie europejski adj - - - - (3 +Spotkania spotkanie subst nom pl n head:3 +Kaszubskie kaszubski adj - - - - 3) +Centrum centrum subst nom sg n - +Kultury kultura subst gen sg f - (4)|1) +. . interp - - - - + +Był być praet - - - - +prezesem prezes subst inst sg m1 - +utworzonej utworzyć ppas - - - - +przez przez prep - - - - +fundację fundacja subst acc sg f head:2 (2 +spółki spółka subst gen sg f - (5)|2) +Zamek zamek subst nom sg m3 - (6) +. . interp - - - - + + +W w prep - - - - +międzyczasie międzyczas subst loc sg m3 - +został zostać praet - - - - +radnym radny subst inst sg m1 - (7) +. . interp - - - - + +Pod pod prep - - - - +koniec koniec subst acc sg m3 - +ubiegłej ubiegły adj - - - - (8 +kadencji kadencja subst gen sg f head:8 8) +Rada rada subst nom sg f head:9 (10|(9 +Gminy gmina subst gen sg f - (11)|9) +Krokowa krokowy adj - - - - 10) +wybrała wybrać praet - - - - +go on ppron3 acc sg m1 - +na na prep - - - - +wójta wójt subst acc sg m1 - (12) +. . interp - - - - + +Jesienią jesień subst inst sg f - +2002 2002 ign - - - - (13 +r rok brev - - - head:13 +. . interp - - - head:13 13) +został zostać praet - - - - +wójtem wójt subst inst sg m1 - +w w prep - - - - +wyborach wybory subst loc pl n - (14) +powszechnych powszechny adj - - - - +. . interp - - - - + + +- - interp - - - - +Co co subst nom sg n - +skłoniło skłonić praet - - - - +mnie ja ppron12 acc sg m1 - (15) +do do prep - - - - +zostania zostać ger gen sg n - (16) +samorządowcem samorządowiec subst inst sg m1 - (17) +? ? interp - - - - + +W w prep - - - - +pewnym pewny adj - - - - (18 +momencie moment subst loc sg m3 head:18 +życia życie subst gen sg n - (19) +mężczyzny mężczyzna subst gen sg m1 - (20)|18) +przychodzi przychodzić fin - - - - +taka taki adj - - - - +potrzeba potrzeba pred - - - - +, , interp - - - - +aby aby comp - - - - +sprawdzić sprawdzić inf - - - - +się się qub - - - - +np na przykład brev - - - - +. . interp - - - - +w w prep - - - - +życiu życie subst loc sg n head:19 (19 +publicznym publiczny adj - - - - 19) +- - interp - - - - +twierdzi twierdzić fin - - - - +krokowski krokowski adj - - - - (21 +kandydat kandydat subst nom sg m1 head:21 21) +do do prep - - - - +tytułu tytuł subst gen sg m3 head:22 (22 +Wójta wójt subst gen sg m1 - (12) +Pomorza pomorze subst gen sg n - (23)|22) +. . interp - - - - +- - interp - - - - +Poza poza prep - - - - +tym to subst inst sg n - +interesowały interesować praet - - - - +mnie ja ppron12 acc sg m1 - +sprawy sprawa subst nom pl f head:24 (24 +komunalne komunalny adj - - - - 24) +. . interp - - - - + +Chciał chcieć praet - - - - +em być aglt - - - - +się się qub - - - - +nimi on ppron3 inst pl f - +bliżej blisko adv - - - - +zająć zająć inf - - - - +. . interp - - - - + + +Co co subst acc sg n - (25) +wójt wójt subst nom sg m1 - +gminy gmina subst gen sg f - (11|(11) +Krokowa krokowy adj - - - - 11) +uważa uważać fin - - - - +za za prep - - - - +swój swój adj - - - - (26 +największy wielki adj - - - - +sukces sukces subst acc sg m3 head:26 +i i conj - - - - +największą wielki adj - - - - +porażkę porażka subst acc sg f - 26) +? ? interp - - - - + + +- - interp - - - - +Sukcesem sukces subst inst sg m3 - +jest być fin - - - - +to to subst nom sg n - (27) +, , interp - - - - +że że comp - - - - +udaje udawać fin - - - - +się się qub - - - - +wreszcie wreszcie qub - - - - +opracowywać opracowywać inf - - - - +plany plan subst acc pl m3 head:28 (28 +zagospodarowania zagospodarować ger gen sg n head:29 (29 +przestrzennego przestrzenny adj - - - - 28)|29) +. . interp - - - - + +Gotowe gotowe subst nom sg n - (30) +są być fin - - - - +już już qub - - - - +dla dla prep - - - - +Białogóry Białogóry ign - - - - +i i conj - - - - +części część subst gen sg f - +Dębek dębek subst nom sg m3 - (31) +. . interp - - - - + +Tych Tychy subst gen pl n - +ostatnich ostatni adj - - - - +przez przez prep - - - - +wiele wiele num - - - - (32 +lat rok subst gen pl m3 head:32 32) +nie nie qub - - - - +można można pred - - - - +było być praet - - - - +uchwalić uchwalić inf - - - - +- - interp - - - - +uważa uważać fin - - - - +wójt wójt subst nom sg m1 - (12) +. . interp - - - - +- - interp - - - - +Natomiast natomiast conj - - - - +za za prep - - - - +porażkę porażka subst acc sg f - (33) +uważam uważać fin - - - - +decyzję decyzja subst acc sg f head:34 (34 +Rady rada subst gen sg f - (9) +gminy gmina subst gen sg f - (11)|34) +, , interp - - - - +aby aby comp - - - - +nie nie qub - - - - +przystępować przystępować inf - - - - +w w prep - - - - +ramach ramy subst loc pl n - +Komunalnego komunalny adj - - - - (35 +Związku związek subst gen sg m3 head:35 (36 +Gmin gmina subst gen pl f - (37)|35)|36) +do do prep - - - - +programu program subst gen sg m3 head:38 (38 +uporządkowania uporządkować ger gen sg n - (39) +gospodarki gospodarka subst gen sg f head:40 (40 +ściekowej ściekowy adj - - - - 38)|40) +. . interp - - - - + +Mogli móc praet - - - - +śmy być aglt - - - - +uzyskać uzyskać inf - - - - +wiele wiele num - - - - (41 +milionów milion subst gen pl m3 head:41 +euro euro subst gen pl n - 41) +. . interp - - - - + +Boję bać fin - - - - +się się qub - - - - +, , interp - - - - +że że comp - - - - +to to pred - - - - +nie nie conj - - - - +tylko tylko conj - - - - +moja mój adj - - - - +porażka porażka subst nom sg f - +. . interp - - - - +. . interp - - - - +. . interp - - - - + + +Od od prep - - - - +5 5 ign - - - - +lat rok subst gen pl m3 - +ulubionym ulubiony adj - - - - +hobby hobby subst gen sg n - +Henryka Henryk subst gen sg m1 - (42|(43) +Doeringa Doeringa ign - - - - 42) +są być fin - - - - +narty narta subst nom pl f - (44) +. . interp - - - - + +Dlatego dlatego adv - - - - +urlop urlop subst nom sg m3 - (45) +najchętniej chętnie adv - - - - +bierze brać fin - - - - +zimą zima subst inst sg f - +, , interp - - - - +aby aby comp - - - - +udać udać inf - - - - +się się qub - - - - +na na prep - - - - +stoki stok subst acc pl m3 head:46 (46 +Szklarskiej szklarski adj - - - - +Poręby poręba subst gen sg f - 46) +. . interp - - - - + + +- - interp - - - - +Tej ten adj - - - - (47 +zimy zima subst gen sg f head:47 47) +niestety niestety qub - - - - +nie nie qub - - - - +mogł móc praet - - - - +em być aglt - - - - +wyjechać wyjechać inf - - - - +- - interp - - - - +przyznaje przyznawać fin - - - - +wójt wójt subst nom sg m1 - (12) +Krokowej krokowy adj - - - - +. . interp - - - - +- - interp - - - - +Czasu czas subst gen sg m3 head:48 (48 +wolnego wolne subst gen sg n - (49)|48) +mam mieć fin - - - - +bardzo bardzo adv - - - - +mało mało num - - - - +, , interp - - - - +jeśli jeśli comp - - - - +się się qub - - - - +taki taki adj - - - - +pojawia pojawiać fin - - - - +, , interp - - - - +to to conj - - - - +staram starać fin - - - - +się się qub - - - - +go on ppron3 acc sg m1 - (12) +spędzać spędzać inf - - - - +razem razem adv - - - - +z z prep - - - - +bliskimi bliscy subst inst pl m1 - (50) +. . interp - - - - + + +Nasz nasz adj - - - - +plebiscyt plebiscyt subst nom sg m3 - + + +„ „ interp - - - - (51 +Dziennik dziennik subst nom sg m3 head:51 +Bałtycki bałtycki adj - - - - +” ” interp - - - - 51) +rozpoczął rozpocząć praet - - - - +kolejną kolejny adj - - - - (52 +edycję edycja subst acc sg f head:52 +konkursu konkurs subst gen sg m3 - (53)|52) +Wójt wójt subst nom sg m1 head:12 (12 +Pomorza pomorze subst gen sg n - 12) +. . interp - - - - + +Nasz nasz adj - - - - (54 +powiat powiat subst nom sg m3 head:54 54) +reprezentują reprezentować fin - - - - +trzej trzy num - - - - (55 +włodarze włodarz subst nom pl m1 head:55 +gmin gmina subst gen pl f - +wiejskich wiejski adj - - - - 55) +. . interp - - - - + +To to pred - - - - +Henryk Henryk subst nom sg m1 - (43) +Doering Doering ign - - - - +( ( interp - - - - +Krokowa krokowy adj - - - - +) ) interp - - - - +, , interp - - - - +Tadeusz Tadeusz subst nom sg m1 - (56|(57) +Puszkarczuk Puszkarczuk ign - - - - 56) +( ( interp - - - - +gmina gmina subst nom sg f head:11 (11 +Puck Puck subst nom sg m3 - (58)|11) +) ) interp - - - - +i i conj - - - - +Jerzy Jerzy subst nom sg m1 - (59|(60) +Włudzik Włudzik ign - - - - 59) +( ( interp - - - - +Kosakowo Kosakowo subst nom sg n - (61) +) ) interp - - - - +. . interp - - - - + +W w prep - - - - +gronie grono subst loc sg n head:62 (62 +kilkudziesięciu kilkadziesiąt num - - - - +kolegów kolega subst gen pl m1 - (63)|62) +po po prep - - - - +fachu fach subst loc sg m3 - (64) +walczyć walczyć inf - - - - +będą być bedzie - - - - +o o prep - - - - +miano miano subst acc sg n head:65 (65 +najpopularniejszego popularny adj - - - - (12 +wójta wójt subst gen sg m1 head:12 12) +województwa województwo subst gen sg n - 65) +. . interp - - - - + +O o prep - - - - +tym to subst loc sg n - (27) +, , interp - - - - +kto kto subst nom sg m1 - (66) +wygra wygrać fin - - - - +, , interp - - - - +zadecydują zadecydować fin - - - - +swoimi swój adj - - - - (67 +głosami głos subst inst pl m3 head:67 67) +Czytelnicy czytelnik subst nom pl m1 head:68 (68 +„ „ interp - - - - +Dziennika dziennik subst gen sg m3 - +” ” interp - - - - 68) +. . interp - - - - + + +#end document /2 +#begin document /3 +Paweł Paweł subst nom sg m1 - (0 +Kryszałowicz Kryszałowicz ign - - - - 0) + + +( ( interp - - - - +Eintracht Eintracht subst nom sg m3 head:1 (1 +Frankfurt Frankfurt subst nom sg m3 - (2)|1) +) ) interp - - - - +: : interp - - - - +- - interp - - - - +Ukraińcy Ukrainiec subst nom pl m1 - (3) +postawili postawić praet - - - - +nam my ppron12 dat pl m1 - +wysokie wysoki adj - - - - (4 +wymagania wymaganie subst acc pl n head:4 4) +. . interp - - - - + +Remis remis subst nom sg m3 - (5) +jest być fin - - - - +sprawiedliwy sprawiedliwy adj - - - - +, , interp - - - - +choć choć comp - - - - +przy przy prep - - - - +odrobinie odrobina subst loc sg f head:6 (6 +szczęścia szczęście subst gen sg n - 6) +mogli móc praet - - - - +śmy być aglt - - - - +wygrać wygrać inf - - - - +. . interp - - - - + +Mam mieć fin - - - - +do do prep - - - - +siebie się siebie gen - - - +pretensję pretensja subst acc sg f - (7) +, , interp - - - - +bo bo comp - - - - +przy przy prep - - - - +lepszej dobry adj - - - - (8 +koncentracji koncentracja subst loc sg f head:8 8) +mogł móc praet - - - - +em być aglt - - - - +zdobyć zdobyć inf - - - - +bramkę bramka subst acc sg f - (9) +. . interp - - - - + + +ś ś ign - - - - + + +Jacek Jacek subst nom sg m1 - (10 +Krzynówek Krzynówek ign - - - - 10) + + +( ( interp - - - - +1 1 ign - - - - +. . interp - - - - + +FC FC subst nom sg m3 - (11) +Nuernberg Nuernberg ign - - - - +) ) interp - - - - +: : interp - - - - +- - interp - - - - +Cieszę cieszyć fin - - - - +się się qub - - - - +, , interp - - - - +że że comp - - - - +mogł móc praet - - - - +em być aglt - - - - +wystąpić wystąpić inf - - - - +w w prep - - - - +meczu mecz subst loc sg m3 - +kończącym kończyć pact - - - - +eliminacje eliminacja subst acc pl f - (12) +. . interp - - - - + +Ze z prep - - - - +swej swój adj - - - - (13 +strony strona subst gen sg f head:13 13) +zrobię zrobić fin - - - - +wszystko wszystko subst acc sg n - (14) +, , interp - - - - +aby aby comp - - - - +nie nie qub - - - - +wypaść wypaść inf - - - - +z z prep - - - - +kadry kadra subst gen sg f - (15) +, , interp - - - - +mimo mimo prep - - - - +że że comp - - - - +nie nie qub - - - - +mam mieć fin - - - - +ugruntowanej ugruntować ppas - - - - +pozycji pozycja subst gen sg f - +w w prep - - - - +swoim swój adj - - - - (16 +klubie klub subst loc sg m3 head:16 16) +. . interp - - - - + +Zasłużyli zasłużyć praet - - - - +śmy być aglt - - - - +na na prep - - - - +awans awans subst acc sg m3 - +, , interp - - - - +bo bo comp - - - - +byli być praet - - - - +śmy być aglt - - - - +zespołem zespół subst inst sg m3 - (17) +, , interp - - - - +który który adj - - - - +grał grać praet - - - - +najrówniej równo adv - - - - +i i conj - - - - +w w prep - - - - +głupi głupi adj - - - - (18 +sposób sposób subst acc sg m3 head:18 18) +nie nie qub - - - - +tracił tracić praet - - - - +punktów punkt subst gen pl m3 - (19) +. . interp - - - - + + +ś ś ign - - - - + + +Marek marek subst nom sg m1 - +Koźmiński koźmiński adj - - - - +: : interp - - - - + + +Mecze mecz subst nom pl m3 - (20) +z z prep - - - - +Ukrainą ukraina subst inst sg f - (21) +spinają spinać fin - - - - +klamrą klamra subst inst sg f - (22) +eliminacje eliminacja subst nom pl f - (12) +, , interp - - - - +które który adj - - - - +będziemy być bedzie - - - - +pamiętać pamiętać inf - - - - +do do prep - - - - +końca koniec subst gen sg m3 head:23 (23 +życia życie subst gen sg n - 23) +. . interp - - - - + +Uważam uważać fin - - - - +, , interp - - - - +że że comp - - - - +kibice kibic subst nom pl m1 - (24) +oglądali oglądać praet - - - - +dzisiaj dzisiaj adv - - - - +dobry dobry adj - - - - +mecz mecz subst acc sg m3 - +dwóch dwa num - - - - (25 +równych równy adj - - - - +zespołów zespół subst gen pl m3 head:25 25) +. . interp - - - - + +Moim mój adj - - - - (26 +zdaniem zdanie subst inst sg n head:26 +Ukraińcy Ukrainiec subst nom pl m1 - (3)|26) +zasłużenie zasłużenie adv - - - - +zajęli zająć praet - - - - +drugie drugi adj - - - - (27 +miejsce miejsce subst acc sg n head:27 27) +, , interp - - - - +jednak jednak conj - - - - +nie nie qub - - - - +chciał chcieć praet - - - - +by by qub - - - - +m być aglt - - - - +spotkać spotkać inf - - - - +się się qub - - - - +z z prep - - - - +nimi on ppron3 inst pl m1 - +na na prep - - - - +mundialu mundial subst loc sg m3 - (28) +. . interp - - - - + +To to pred - - - - +bardzo bardzo adv - - - - +niewygodny niewygodny adj - - - - +przeciwnik przeciwnik subst nom sg m1 - +. . interp - - - - + +Może móc fin - - - - +wygrać wygrać inf - - - - +z z prep - - - - +każdym każdy adj - - - - +. . interp - - - - + + +ś ś ign - - - - + + +Zbigniew Zbigniew subst nom sg m1 head:29 (29 +Boniek Boniek subst nom sg m1 - 29) + + +( ( interp - - - - +wiceprezes wiceprezes subst nom sg m1 head:30 (30 +PZPN PZPN subst nom sg m3 - 30) +) ) interp - - - - +: : interp - - - - +- - interp - - - - +Najważniejsze ważny adj - - - - +, , interp - - - - +że że comp - - - - +eliminacje eliminacja subst nom pl f - (12) +zakończyły zakończyć praet - - - - +się się qub - - - - +sukcesem sukces subst inst sg m3 - (31) +. . interp - - - - + +Jestem być fin - - - - +usatysfakcjonowany usatysfakcjonować ppas - - - - +. . interp - - - - + +Chcę chcieć fin - - - - +podkreślić podkreślić inf - - - - +, , interp - - - - +że że comp - - - - +kibice kibic subst nom pl m1 - +i i conj - - - - +dziennikarze dziennikarz subst nom pl m1 - (32) +dostrzegają dostrzegać fin - - - - +tylko tylko qub - - - - +to to subst nom sg n - (33) +, , interp - - - - +co co subst nom sg n - (34) +dzieje dziać fin - - - - +się się qub - - - - +na na prep - - - - +boisku boisko subst loc sg n - (35) +i i conj - - - - +rzadko rzadko adv - - - - +kiedy kiedy adv - - - - +zwracają zwracać fin - - - - +uwagę uwaga subst acc sg f - (36) +na na prep - - - - +kulisy kulisa subst acc pl f head:37 (37 +sukcesów sukces subst gen pl m3 - (38) +piłkarzy piłkarz subst gen pl m1 - (39)|37) +, , interp - - - - +a a conj - - - - +oznaczają oznaczać fin - - - - +one on ppron3 nom pl m3 - (38) +ogromny ogromny adj - - - - (40 +wysiłek wysiłek subst acc sg m3 head:40 +organizacyjny organizacyjny adj - - - - 40) +, , interp - - - - +wiele wiele num - - - - (41 +pracy praca subst gen sg f head:41 +rzeszy rzesza subst gen sg f - (42) +ludzi ludzie subst gen pl m1 - (43)|41) +, , interp - - - - +którzy który adj - - - - +nie nie qub - - - - +stoją stać fin - - - - +w w prep - - - - +pierwszym pierwszy adj - - - - (17 +szeregu szereg subst loc sg m3 head:17 17) +, , interp - - - - +ale ale conj - - - - +wykonują wykonywać fin - - - - +nieraz nieraz adv - - - - +ciężkie ciężki adj - - - - +i i conj - - - - +niewdzięczne niewdzięczny adj - - - - (44 +zadania zadanie subst acc pl n head:44 44) +. . interp - - - - + +Moim mój adj - - - - (26 +zdaniem zdanie subst inst sg n head:26 26) +między między prep - - - - +innymi inny adj - - - - +dlatego dlatego adv - - - - +, , interp - - - - +że że comp - - - - +sprawy sprawa subst nom pl f head:45 (45 +organizacyjne organizacyjny adj - - - - 45) +zostały zostać praet - - - - +ułożone ułożony adj - - - - +na na prep - - - - +odpowiednim odpowiedni adj - - - - (46 +poziomie poziom subst loc sg m3 head:46 46) +, , interp - - - - +wszyscy wszyscy subst nom pl m1 - (47) +możemy móc fin - - - - +się się qub - - - - +dzisiaj dzisiaj adv - - - - +cieszyć cieszyć inf - - - - +z z prep - - - - +awansu awans subst gen sg m3 - +do do prep - - - - +mistrzostw mistrzostwo subst gen pl n head:48 (48 +świata świat subst gen sg m3 - (17)|48) +. . interp - - - - + + +#end document /3 +#begin document /4 +Ernest Ernesta subst gen pl f - (0) +i i conj - - - - +Agnieszka Agnieszka subst nom sg f - +nie nie qub - - - - +planowali planować praet - - - - +, , interp - - - - +że że comp - - - - +będą być bedzie - - - - +mieli mieć praet - - - - +wielką wielki adj - - - - +, , interp - - - - +babską babski adj - - - - (1 +rodzinę rodzina subst acc sg f head:1 1) +. . interp - - - - + +Ale ale conj - - - - +tak tak adv - - - - +wyszło wyniść praet - - - - +. . interp - - - - +– – interp - - - - +I i conj - - - - +całe cały adj - - - - +szczęście szczęście subst nom sg n - +. . interp - - - - + +Lepiej dobrze adv - - - - +się się qub - - - - +dogaduję dogadywać fin - - - - +z z prep - - - - +dziewczętami dziewczę subst inst pl n - (2) +– – interp - - - - +cieszy cieszyć fin - - - - +się się qub - - - - +Ernest Ernest subst nom sg m1 head:3 (3 +Kwiecień Kwiecień subst nom sg m1 - 3) +. . interp - - - - + + +W w prep - - - - +Wigilię wigilia subst acc sg f - (4) +do do prep - - - - +jego on ppron3 gen sg m1 - (5|(3) +obowiązków obowiązek subst gen pl m3 head:5 5) +, , interp - - - - +poza poza prep - - - - +dostarczeniem dostarczyć ger inst sg n - +choinki choinka subst gen sg f - (6) +, , interp - - - - +należeć należeć inf - - - - +będzie być bedzie - - - - +zmywanie zmywać ger nom sg n head:7 (7 +naczyń naczynie subst gen pl n - (8)|7) +. . interp - - - - + +Agnieszka Agnieszka subst nom sg f - (9) +zrobi zrobić fin - - - - +pierogi pieróg subst acc pl m3 - (10) +, , interp - - - - +ugotuje ugotować fin - - - - +barszcz barszcz subst nom sg m3 - (11) +z z prep - - - - +uszkami uszko subst inst pl n - +, , interp - - - - +usmaży usmażyć fin - - - - +karpia Karp subst acc sg m1 - (12) +. . interp - - - - + +Córki córka subst nom pl f - +upieką upiec fin - - - - +ciasta ciasto subst acc pl n - (13) +. . interp - - - - + +Potem potem adv - - - - +przyjdzie przyjść fin - - - - +czas czas subst acc sg m3 - +na na prep - - - - +prezenty prezent subst acc pl m3 - (14) +. . interp - - - - + +Może może qub - - - - +to to pred - - - - +nawet nawet qub - - - - +będą być bedzie - - - - +empetrójki empetrójki ign - - - - +, , interp - - - - +o o prep - - - - +których który adj - - - - +marzą marzyć fin - - - - +starsze stary adj - - - - +dziewczyny dziewczyna subst acc pl f - +. . interp - - - - + + +Jodełek jodełka subst gen pl f - +sadzimy sadzić fin - - - - +mniej mało adv - - - - + + +Leśniczy leśniczy subst nom sg m1 - (15) +, , interp - - - - +od od prep - - - - +którego który adj - - - - (16 +pan pan subst nom sg m1 head:16 +Ernest Ernesta subst gen pl f - (0)|16) +przywozi przywozić fin - - - - +choinkę choinka subst acc sg f - (6) +, , interp - - - - +mieszka mieszkać fin - - - - +kilka kilka num - - - - +kilometrów kilometr subst gen pl m3 - +od od prep - - - - +domu dom subst gen sg m3 head:17 (17 +Kwietniów Kwiecień subst gen pl m1 - (18)|17) +. . interp - - - - + +On on ppron3 nom sg m1 - (16) +także także qub - - - - +nie nie qub - - - - +wyobraża wyobrażać fin - - - - +sobie się siebie dat - - - (19) +świąt święto subst gen pl n - (20) +bez bez prep - - - - +prawdziwego prawdziwy adj - - - - (21 +świerku świerk subst gen sg m3 head:21 21) +. . interp - - - - +– – interp - - - - +I i conj - - - - +musi musić fin - - - - +być być inf - - - - +kiczowaty kiczowaty adj - - - - +– – interp - - - - +uśmiecha uśmiechać fin - - - - +się się qub - - - - +Gabriel Gabriel subst nom sg m1 head:22 (22 +Grobelny Grobelny subst nom sg m1 - 22) +, , interp - - - - +nadleśniczy nadleśniczy subst nom sg m1 head:23 (23 +wałbrzyski wałbrzyski adj - - - - 23) +. . interp - - - - + + +To to subst nom sg n - (24) +znaczy znaczyć fin - - - - +, , interp - - - - +że że comp - - - - +powinny powinien winien - - - - +na na prep - - - - +nim on ppron3 loc sg m3 - (21) +wisieć wisieć inf - - - - +ozdoby ozdoba subst acc pl f - (25) +zrobione zrobić ppas - - - - +przez przez prep - - - - +dzieci dziecko subst acc pl n - (26) +, , interp - - - - +przechowywane przechowywać ppas - - - - +latami rok subst inst pl m3 - +, , interp - - - - +wyciągane wyciągać ppas - - - - +na na prep - - - - +tę ten adj - - - - (27 +jedyną jedyny adj - - - - +okazję okazja subst acc sg f head:27 27) +. . interp - - - - + + +Pan pan subst nom sg m1 head:16 (16 +Gabriel Gabriel subst nom sg m1 - 16) +ma mieć fin - - - - +dwóch dwa num - - - - (28 +synów syn subst gen pl m1 head:28 +i i conj - - - - +trzy trzy num - - - - (29 +córki córka subst acc pl f head:29 28)|29) +. . interp - - - - + +W w prep - - - - +domu dom subst loc sg m3 - (17) +została zostać praet - - - - +najmłodsza młody adj - - - - +, , interp - - - - +12-letnia 12-letnia ign - - - - +, , interp - - - - +ale ale conj - - - - +na na prep - - - - +święta święto subst acc pl n - +zjadą zjechać fin - - - - +wszyscy wszyscy subst nom pl m1 - (30) +. . interp - - - - + +I i conj - - - - +ubiorą ubrać fin - - - - +choinkę choinka subst acc sg f - (6) +. . interp - - - - +– – interp - - - - +Żona żona subst nom sg f - +rozwiesi rozwiesić fin - - - - +anielskie anielski adj - - - - (31 +włosy włos subst acc pl m3 head:31 31) +, , interp - - - - +ja ja ppron12 nom sg m1 - (32) +podłączę podłączyć fin - - - - +lampki lampka subst acc pl f - (33) +– – interp - - - - +w w prep - - - - +domu dom subst loc sg m3 head:17 (17 +nadleśniczego nadleśniczy subst gen sg m1 - (23)|17) +podział podział subst nom sg m3 head:34 (34 +świątecznych świąteczny adj - - - - (35 +ról rola subst gen pl f head:35 34)|35) +jest być fin - - - - +określony określony adj - - - - +. . interp - - - - + + +W w prep - - - - +dolnośląskich dolnośląski adj - - - - (36 +lasach las subst loc pl m3 head:36 36) +najwięcej najwięcej num - - - - +jest być fin - - - - +świerków świerk subst gen pl m2 - (37) +. . interp - - - - + +Na na prep - - - - +plantacjach plantacja subst loc pl f - (38) +sadzą sadzić fin - - - - +także także qub - - - - +coraz coraz adv - - - - +popularniejsze popularny adj - - - - +jodły jodła subst nom pl f - +z z prep - - - - +miękkimi miękki adj - - - - (39 +igłami igła subst inst pl f head:39 39) +. . interp - - - - + + +– – interp - - - - +Ale ale conj - - - - +i i qub - - - - +tych ten adj - - - - +jodełek jodełka subst gen pl f - +sadzimy sadzić fin - - - - +już już qub - - - - +mniej mało adv - - - - +. . interp - - - - + +To to qub - - - - +nie nie qub - - - - +lata latać fin - - - - +dziewięćdziesiąte dziewięćdziesiąty adj - - - - +, , interp - - - - +gdy gdy adv - - - - +sprzedawali sprzedawać praet - - - - +śmy być aglt - - - - +prawie prawie qub - - - - +wszystkie wszystek adj - - - - +wyhodowane wyhodować ppas - - - - +drzewka drzewko subst acc pl n - +– – interp - - - - +wspomina wspominać fin - - - - +nadleśniczy nadleśniczy subst nom sg m1 - (23) +. . interp - - - - + + +U u prep - - - - +Grobelnego grobelny adj - - - - +choinkę choinka subst acc sg f - (6) +można można pred - - - - +sobie się siebie dat - - - (40) +wybrać wybrać inf - - - - +. . interp - - - - +– – interp - - - - +Mamy mieć fin - - - - +rodziny rodzina subst acc pl f - (41) +, , interp - - - - +w w prep - - - - +których który adj - - - - +co co comp - - - - +roku rok subst gen sg m3 - (42) +ojciec ojciec subst nom sg m1 - +przyjeżdża przyjeżdżać fin - - - - +z z prep - - - - +synem syn subst inst sg m1 - (43) +, , interp - - - - +by by comp - - - - +samemu sam adj - - - - +ściąć ściąć inf - - - - +drzewko drzewko subst acc sg n - (44) +. . interp - - - - + +Taką taki adj - - - - +mają Maja subst inst sg m1 - +tradycję tradycja subst acc sg f - (45) +– – interp - - - - +dodaje dodawać fin - - - - +pan pan subst nom sg m1 head:16 (16 +Gabriel Gabriel subst nom sg m1 - (22)|16) +. . interp - - - - + + +#end document /4 +#begin document /5 +Cena cena subst nom sg f head:0 (0 +życia życie subst gen sg n - 0) + + +Z z prep - - - - +tego ten adj - - - - +pogromu pogrom subst gen sg m3 - +ocalało ocalać praet - - - - +kilkudziesięciu kilkadziesiąt num - - - - (1 +Żydów żyd subst gen pl m1 head:1 1) +, , interp - - - - +a a conj - - - - +wśród wśród prep - - - - +nich on ppron3 gen pl m2 - (2) +rodzina rodzina subst nom sg f - (3) +Mosze Mosze ign - - - - +Sonensona Sonensona ign - - - - +. . interp - - - - + +Przed przed prep - - - - +wojną wojna subst inst sg f - +była być praet - - - - +to to subst acc sg n - (4) +w w prep - - - - +skali skala subst loc sg f - +miasteczka miasteczko subst gen sg n - (5) +rodzina rodzina subst nom sg f head:3 (3 +bogata bogaty adj - - - - 3) +. . interp - - - - + +Sonensonowie Sonensonowie ign - - - - +mieli mieć praet - - - - +garbarnię garbarnia subst acc sg f - +. . interp - - - - + +Nie nie qub - - - - +udało udać praet - - - - +mi ja ppron12 dat sg m1 - +się się qub - - - - +dociec dociec inf - - - - +, , interp - - - - +u u prep - - - - +kogo kto subst gen sg m1 - (6) +mianowicie mianowicie conj - - - - +przechowywali przechowywać praet - - - - +się się qub - - - - +Sonensonowie Sonensonowie ign - - - - +oraz oraz conj - - - - +pozostali pozostały adj - - - - (1 +Żydzi żyd subst nom pl m1 head:1 1) +w w prep - - - - +czasie czas subst loc sg m3 - +okupacji okupacja subst gen sg f head:7 (7 +niemieckiej niemiecki adj - - - - 7) +. . interp - - - - + +Faktem fakt subst inst sg m3 - +pozostaje pozostawać fin - - - - +natomiast natomiast conj - - - - +, , interp - - - - +że że comp - - - - +okupację okupacja subst acc sg f head:7 (7 +tę ten adj - - - - 7) +przeżyli przeżyć praet - - - - +. . interp - - - - + +Faktem fakt subst inst sg m3 head:8 (8 +oczywistym oczywisty adj - - - - 8) +pozostaje pozostawać fin - - - - +i i conj - - - - +to to subst nom sg n - (4) +, , interp - - - - +że że comp - - - - +liczne liczny adj - - - - (9 +rodziny rodzina subst nom pl f head:9 +polskie polski adj - - - - 9) +- - interp - - - - +w w prep - - - - +Ejszyszkach Ejszyszkach ign - - - - +i i conj - - - - +w w prep - - - - +pobliskich pobliski adj - - - - +okolicach okolica subst loc pl f - +- - interp - - - - +przechowywały przechowywać praet - - - - +Żydów żyd subst acc pl m1 - (1) +. . interp - - - - + +Parę para subst acc sg f - +kilometrów kilometr subst gen pl m3 - (10) +od od prep - - - - +Ejszyszek Ejszyszek ign - - - - +, , interp - - - - +w w prep - - - - +Korkucianach Korkucianach ign - - - - +( ( interp - - - - +w w prep - - - - +folwarku folwark subst loc sg m3 - (11) +Lebiedniki Lebiedniki ign - - - - +) ) interp - - - - +, , interp - - - - +żołnierz żołnierz subst nom sg m1 head:12 (12 +AK AK subst gen sg f - (13) +Kazimierz Kazimierz subst nom sg m1 - (14)|12) +Korkuć Korkuć ign - - - - +w w prep - - - - +czasie czas subst loc sg m3 - +wojny wojna subst gen sg f - (15) +w w prep - - - - +swoim swój adj - - - - (16 +domu dom subst loc sg m3 head:16 16) +przechowywał przechowywać praet - - - - +28 28 ign - - - - +Żydów żyd subst gen pl m1 - (1) +. . interp - - - - + +Od od prep - - - - +studni studnia subst gen sg f - (17) +do do prep - - - - +piwnic piwnica subst gen pl f head:18 (18 +domu dom subst gen sg m3 - (16)|18) +był być praet - - - - +przekopany przekopać ppas - - - - (19 +tunel tunel subst nom sg m3 head:19 19) +, , interp - - - - +dzięki dzięki prep - - - - +czemu co subst dat sg n - +mieli mieć praet - - - - +wodę woda subst acc sg f - (20) +. . interp - - - - + +Natomiast natomiast conj - - - - +w w prep - - - - +skali skala subst loc sg f - +siatki siatka subst gen sg f head:21 (21 +AK AK subst gen sg f - +Kazimierz Kazimierz subst nom sg m1 - (14)|21) +Korkuć Korkuć ign - - - - +przechowywał przechowywać praet - - - - +około około qub - - - - +70 70 ign - - - - +Żydów żyd subst gen pl m1 - (1) +. . interp - - - - + +Rodzina rodzina subst nom sg f - +Świeczków Świeczków ign - - - - +również również qub - - - - +przechowywała przechowywać praet - - - - +Żydów żyd subst gen pl m1 - (1) +. . interp - - - - + +W w prep - - - - +tamtych tamten adj - - - - +stronach strona subst loc pl f - +liczne liczny adj - - - - (9 +rodziny rodzina subst nom pl f head:9 9) +polskie polski adj - - - - +postępowały postępować praet - - - - +podobnie podobnie adv - - - - +. . interp - - - - + + +Prawdą prawda subst inst sg f - +jest być fin - - - - +również również qub - - - - +i i conj - - - - +to to subst nom sg n - (4) +, , interp - - - - +że że comp - - - - +Żydzi żyd subst nom pl m1 - (1) +za za prep - - - - +swe swój adj - - - - (22 +przechowanie przechowanie subst acc sg n head:22 22) +płacili płacić praet - - - - +. . interp - - - - + +Płacili płacić praet - - - - +za za prep - - - - +utrzymanie utrzymanie subst acc sg n - +i i conj - - - - +chyba chyba qub - - - - +jeszcze jeszcze qub - - - - +- - interp - - - - +za za prep - - - - +ryzyko ryzyko subst acc sg n - (23) +. . interp - - - - + +O o prep - - - - +tym to subst loc sg n - (4) +dzisiaj dzisiaj adv - - - - +raczej raczej qub - - - - +tu tu adv - - - - +się się qub - - - - +nie nie qub - - - - +mówi mówić fin - - - - +, , interp - - - - +ale ale conj - - - - +prawdopodobnie prawdopodobnie adv - - - - +różnie różnie adv - - - - +z z prep - - - - +tym to subst inst sg n - (4) +było być praet - - - - +: : interp - - - - +jedni jeden adj - - - - +za za prep - - - - +pieniądze pieniądz subst acc pl m3 - (24) +, , interp - - - - +inni inny adj - - - - +- - interp - - - - +z z prep - - - - +odruchu odruch subst gen sg m3 head:25 (25 +serca serce subst gen sg n - 25) +. . interp - - - - + +Ryzykowali ryzykować praet - - - - +i i conj - - - - +Polacy Polak subst nom pl m1 - +, , interp - - - - +i i conj - - - - +Żydzi żyd subst nom pl m1 - (1) +. . interp - - - - + +Te ten adj - - - - +rachunki rachunek subst nom pl m3 - +mogły móc praet - - - - +wyglądać wyglądać inf - - - - +bardzo bardzo adv - - - - +różnie różnie adv - - - - +. . interp - - - - + + +Mieszkam mieszkać fin - - - - +w w prep - - - - +jednej jeden adj - - - - +z z prep - - - - +podwileńskich podwileński adj - - - - +wsi wieś subst gen pl f - +. . interp - - - - + +Otóż otóż qub - - - - +w w prep - - - - +tej ten adj - - - - (26 +mojej mój adj - - - - +wsi wieś subst loc sg f head:26 26) +pewien pewny adj - - - - (27 +gospodarz gospodarz subst nom sg m1 head:27 27) +- - interp - - - - +Polak Polak subst nom sg m1 - (28) +- - interp - - - - +przechowywał przechowywać praet - - - - +w w prep - - - - +czasie czas subst loc sg m3 - +wojny wojna subst gen sg f - +młodą młody adj - - - - (29 +Żydówkę Żydówka subst acc sg f head:29 29) +. . interp - - - - + +Spodobała spodobać praet - - - - +mu on ppron3 dat sg m1 - (28) +się się qub - - - - +, , interp - - - - +z z prep - - - - +czego co subst gen sg n - +wynikł wyniknąć praet - - - - +dramat dramat subst acc sg m3 - (30) +. . interp - - - - + +Zdenerwowana zdenerwować ppas - - - - +żona żona subst nom sg f - +doniosła donieść praet - - - - +na na prep - - - - +policję policja subst acc sg f - (31) +. . interp - - - - + +Aresztowano aresztować imps - - - - +Żydówkę Żydówka subst acc sg f - +razem razem adv - - - - +z z prep - - - - +gospodarzem gospodarz subst inst sg m1 - (27) +, , interp - - - - +przerażona przerażony adj - - - - (32 +kobieta kobieta subst nom sg f head:32 32) +próbowała próbować praet - - - - +ocalić ocalić inf - - - - +męża mąż subst acc sg m1 - (27) +. . interp - - - - + +Zanim zanim comp - - - - +uzbierała uzbierać praet - - - - +potrzebną potrzebny adj - - - - +sumę suma subst acc sg f - +na na prep - - - - +łapówkę łapówka subst acc sg f - (33) +, , interp - - - - +było być praet - - - - +już już qub - - - - +za za qub - - - - +późno późno adv - - - - +- - interp - - - - +rozstrzelano rozstrzelać imps - - - - +nie nie conj - - - - (29 +tylko tylko conj - - - - +Żydówkę Żydówka subst acc sg f head:29 +, , interp - - - - +ale ale conj - - - - +i i conj - - - - +gospodarza gospodarz subst acc sg m1 - (27)|29) +. . interp - - - - + +Czy czy qub - - - - +żonę żona subst acc sg f - +tego ten adj - - - - (34 +straceńca straceniec subst gen sg m1 head:34 34) +można można pred - - - - +nazwać nazwać inf - - - - +antysemitką antysemitka subst inst sg f - (35) +? ? interp - - - - + + +#end document /5 +#begin document /8 +- - interp - - - - +150 150 ign - - - - +g gram brev - - - head:0 (0 +owoców owoc subst gen pl m3 - 0) + + +Joanna Joanna subst nom sg f head:1 (1 +Kuc kuca subst gen pl f - 1) + + +( ( interp - - - - +PAI PAI subst nom sg f - +) ) interp - - - - + + +Diety dieta subst nom pl f - +są być fin - - - - +różne różny adj - - - - +, , interp - - - - +ścisłe ścisły adj - - - - +i i conj - - - - +urozmaicone urozmaicić ppas - - - - +, , interp - - - - +eliminacyjne eliminacyjny adj - - - - +, , interp - - - - +dziwne dziwny adj - - - - +, , interp - - - - +racjonalne racjonalny adj - - - - +i i conj - - - - +wreszcie wreszcie qub - - - - +te ten adj - - - - +zalecane zalecać ppas - - - - +przez przez prep - - - - +lekarzy lekarz subst acc pl m1 - (2) +. . interp - - - - + +Dziś dziś adv - - - - +dieta dieta subst nom sg f - +burgundzka burgundzki adj - - - - +. . interp - - - - + + +Jak jak adv - - - - +wygląda wyglądać fin - - - - +przykładowe przykładowy adj - - - - +tygodniowe tygodniowy adj - - - - +menu menu subst nom sg n - +? ? interp - - - - + + +1 1 ign - - - - +dzień dzień subst acc sg m3 - + + +-obiad -obiad ign - - - - +: : interp - - - - +pierś pierś subst nom sg f head:3 (3 +kurczaka kurczak subst gen sg m2 - 3) +pieczona piec ppas - - - - +w w prep - - - - +folii folia subst loc sg f - (4) +, , interp - - - - +sałatka sałatka subst nom sg f - (5) +z z prep - - - - +czerwonej czerwony adj - - - - (6 +kapusty kapusta subst gen sg f head:6 6) +doprawiona doprawić ppas - - - - +octem ocet subst inst sg m3 head:7 (7 +winnym winny adj - - - - +i i conj - - - - +oliwą oliwa subst inst sg f - (8)|7) +, , interp - - - - +pieczone piec ppas - - - - (9 +jabłko jabłko subst nom sg n head:9 9) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (10) +: : interp - - - - +2 2 ign - - - - +sadzone sadzić ppas - - - - (11 +jajka jajko subst nom pl n head:11 11) +, , interp - - - - +pomidor pomidor subst nom sg m2 - (12) +, , interp - - - - +trójkąt trójkąt subst nom sg m3 - +serka serek subst gen sg m3 - (13) +topionego topić ppas - - - - +, , interp - - - - +gruszka Gruszka subst nom sg m1 - (14) + + +2 2 ign - - - - +dzień dzień subst acc sg m3 - + + +- - interp - - - - +obiad obiad subst nom sg m3 - (15) +: : interp - - - - +królik królik subst nom sg m2 - (16) +w w prep - - - - +potrawce potrawka subst loc sg f - (17) +, , interp - - - - +surówka surówka subst nom sg f - (18) +z z prep - - - - +marchewki marchewka subst gen sg f head:19 (19 +i i conj - - - - +chrzanu chrzan subst gen sg m3 - 19) +, , interp - - - - +brzoskwinia brzoskwinia subst nom sg f - (20) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (10) +: : interp - - - - +befsztyk befsztyk subst nom sg m3 - (21) +z z prep - - - - +polędwicy polędwica subst gen sg f - (22) +usmażony usmażyć ppas - - - - +bez bez prep - - - - +tłuszczu tłuszcz subst gen sg m3 - (23) +, , interp - - - - +zielona zielony adj - - - - +sałata sałata subst nom sg f - +z z prep - - - - +sosem sos subst inst sg m3 - (24) +vinegrette vinegrette ign - - - - +, , interp - - - - +2 2 ign - - - - +mandarynki mandarynka subst gen sg f - (25) + + +3 3 ign - - - - +dzień dzień subst acc sg m3 - + + +-obiad -obiad ign - - - - +: : interp - - - - +kura kura subst nom sg f - (26) +z z prep - - - - +rosołu rosół subst gen sg m3 - (27) +, , interp - - - - +gotowana gotowany adj - - - - (6 +kapusta kapusta subst nom sg f head:6 6) +bez bez prep - - - - +zasmażki zasmażka subst gen sg f - +, , interp - - - - +mały mały adj - - - - (28 +grejpfrut grejpfrut subst nom sg m3 head:28 28) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (10) +: : interp - - - - +ryba ryba subst nom sg f - (29) +w w prep - - - - +warzywach warzywo subst loc pl n - (30) +, , interp - - - - +surówka surówka subst nom sg f - (18) +z z prep - - - - +buraczków buraczek subst gen pl m2 - +, , interp - - - - +pomarańcza pomarańcza subst nom sg f - (31) + + +4 4 ign - - - - +dzień dzień subst acc sg m3 - + + +-obiad -obiad ign - - - - +: : interp - - - - +pieczona piec ppas - - - - +wieprzowina wieprzowina subst nom sg f - +, , interp - - - - +brokuły brokuł subst nom pl m3 - (32) +ugotowane ugotować ppas - - - - +na na prep - - - - +parze par subst loc sg m3 - (33) +, , interp - - - - +jabłko jabłko subst nom sg n - (9) + + +- - interp - - - - +kolacja kolacja subst nom sg f - +: : interp - - - - +szklanka szklanka subst nom sg f head:34 (34 +ugotowanego ugotować ppas - - - - (35 +ryżu ryż subst gen sg m3 head:35 34)|35) +zalana zalać ppas - - - - +chudym chudy adj - - - - (36 +mlekiem mleko subst inst sg n head:36 36) +, , interp - - - - +plaster plaster subst nom sg m3 head:37 (37 +białego biały adj - - - - (38 +chudego chudy adj - - - - +sera ser subst gen sg m3 head:38 37)|38) +, , interp - - - - +kiść kiść subst nom sg f head:39 (39 +winogron winogrono subst gen pl n - (40)|39) + + +5 5 ign - - - - +dzień dzień subst acc sg m3 - + + +- - interp - - - - +obiad obiad subst nom sg m3 - (15) +– – interp - - - - +sztuka sztuka subst nom sg f head:41 (41 +mięsa mięso subst gen sg n - (42)|41) +, , interp - - - - +gotowane gotowany adj - - - - +buraczki buraczek subst nom pl m2 - +, , interp - - - - +kilka kilka num - - - - (43 +suszonych suszyć ppas - - - - +śliwek śliwka subst gen pl f head:43 43) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (10) +- - interp - - - - +2 2 ign - - - - +jajka jajko subst nom pl n - (11) +na na prep - - - - +miękko miękko adv - - - - +, , interp - - - - +serka serek subst gen sg m3 - +brie brie subst gen sg m3 - (44) +, , interp - - - - +banan banan subst nom sg m3 - (45) + + +6 6 ign - - - - +dzień dzień subst acc sg m3 - + + +- - interp - - - - +obiad obiad subst nom sg m3 - +: : interp - - - - +ryba ryba subst nom sg f - (29) +pieczona piec ppas - - - - +w w prep - - - - +folii folia subst loc sg f - (4) +, , interp - - - - +surówka surówka subst nom sg f - (18) +z z prep - - - - +białej biały adj - - - - (6 +kapusty kapusta subst gen sg f head:6 +i i conj - - - - +marchewki marchewka subst gen sg f - (19)|6) +, , interp - - - - +gruszka Gruszka subst nom sg m1 - (14) +w w prep - - - - +sosie sos subst loc sg m3 head:24 (24 +waniliowym waniliowy adj - - - - 24) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (10) +: : interp - - - - +pieczeń pieczeń subst nom sg f - +z z prep - - - - +królika królik subst gen sg m2 - (16) +, , interp - - - - +sałatka sałatka subst nom sg f - (5) +z z prep - - - - +pomidorów pomidor subst gen pl m2 head:46 (46 +i i conj - - - - +papryki papryka subst gen sg f - (47)|46) +, , interp - - - - +trójkąt trójkąt subst nom sg m3 head:48 (48 +serka serek subst gen sg m3 - (13)|48) +topionego topić ppas - - - - +, , interp - - - - +kiwi kiwi subst nom sg n - (49) + + +7 7 ign - - - - +dzień dzień subst acc sg m3 - + + +- - interp - - - - +obiad obiad subst nom sg m3 - +: : interp - - - - +cielęcina cielęcina subst nom sg f - (50) +pieczona piec ppas - - - - +z z prep - - - - +dodatkiem dodatek subst inst sg m3 head:51 (51 +ziół zioło subst gen pl n - (52)|51) +, , interp - - - - +fasolka fasolka subst nom sg f head:53 (53 +szparagowa szparagowy adj - - - - 53) +z z prep - - - - +odrobiną odrobina subst inst sg f head:54 (54 +masła masło subst gen sg n - (55)|54) +, , interp - - - - +jogurt jogurt subst nom sg m3 - (56) + + +- - interp - - - - +kolacja kolacja subst nom sg f - (10) +: : interp - - - - +zapiekanka zapiekanka subst nom sg f - +z z prep - - - - +ziemniaków ziemniak subst gen pl m2 - (57) +, , interp - - - - +odrobiny odrobina subst nom pl f - (58) +startego zetrzeć ppas - - - - +żółtego żółty adj - - - - (38 +sera ser subst gen sg m3 head:38 +i i conj - - - - +szynki szynka subst gen sg f - (59)|38) +, , interp - - - - +sałata sałata subst nom sg f head:60 (60 +zielona zielony adj - - - - 60) +z z prep - - - - +rzodkiewkami rzodkiewka subst inst pl f - (61) +, , interp - - - - +pomarańcza pomarańcza subst nom sg f - (31) + + +#end document /8 +#begin document /9 +Ogród ogród subst nom sg m3 - +na na prep - - - - +. . interp - - - - +. . interp - - - - +. . interp - - - - +balkonie balkon subst voc sg m3 - (0) +Pani pani subst gen sg f - (1) +Stanisławie Stanisława subst loc sg f - (2 +Budkiewicz Budkiewicz ign - - - - 2) +z z prep - - - - +ul ulica brev - - - - +. . interp - - - - + +Piłsudskiego Piłsudski subst gen sg m1 - (3) +na na prep - - - - +powierzchni powierzchnia subst loc sg f - (4) +niewiele niewiele num - - - - (5) +przekraczającej przekraczać pact - - - - +półtora półtora num - - - - (6 +metra metr subst gen sg m3 head:6 6) +kwadratowego kwadratowy adj - - - - +udało udać praet - - - - +się się qub - - - - +" " interp - - - - +upchnąć upchnąć inf - - - - +" " interp - - - - +aż aż qub - - - - +15 15 ign - - - - +skrzynek skrzynka subst gen pl f - (7) +z z prep - - - - +kwiatami kwiat subst inst pl m3 - (8) +i i conj - - - - +kilka kilka num - - - - +doniczek doniczka subst gen pl f - +. . interp - - - - +- - interp - - - - +Utrzymanie utrzymać ger nom sg n head:9 (9 +takiego taki adj - - - - (0 +balkonu balkon subst gen sg m3 head:0 9)|0) +wymaga wymagać fin - - - - +wiele wiele num - - - - (10 +pracy praca subst gen sg f head:10 +i i conj - - - - +serca serce subst gen sg n - (11)|10) +- - interp - - - - +przyznaje przyznawać fin - - - - +S syn brev - - - - +. . interp - - - - + +Budkiewicz Budkiewicz ign - - - - +, , interp - - - - +której który adj - - - - +przy przy prep - - - - +kwiatach kwiat subst loc pl m3 - +pomaga pomagać fin - - - - +córka córka subst nom sg f - (12) +- - interp - - - - +Grażyna Grażyna subst nom sg f head:13 (13 +Stańczyk stańczyk subst nom sg m1 - (14)|13) + + +- - interp - - - - +Nie nie qub - - - - +mamy mieć fin - - - - +własnego własny adj - - - - +ogródka ogródek subst gen sg m3 - +, , interp - - - - +a a conj - - - - +bardzo bardzo adv - - - - +kochamy kochać fin - - - - +kwiaty kwiat subst acc pl m3 - (8) +. . interp - - - - + +Dlatego dlatego adv - - - - +każdą każdy adj - - - - +wolną wolny adj - - - - +chwilę chwila subst acc sg f - +poświęcamy poświęcać fin - - - - +balkonowi balkon subst dat sg m3 - (0) +. . interp - - - - + +Nasiona nasiono subst nom pl n - (15) +wysiewane wysiewać ppas - - - - +są być fin - - - - +już już qub - - - - +na na prep - - - - +przełomie przełom subst loc sg m3 - +lutego luty subst gen sg m3 head:16 (16 +i i conj - - - - +marca marzec subst gen sg m3 - 16) +. . interp - - - - + +Później późno adv - - - - +wyrastające wyrastać pact - - - - +z z prep - - - - +nich on ppron3 gen pl m2 - +roślinki roślinka subst gen sg f - (17) +pikuje pikować fin - - - - +się się qub - - - - +do do prep - - - - +skrzynek skrzynka subst gen pl f - (7) +. . interp - - - - +- - interp - - - - +Skrzynki skrzynka subst acc pl f - (7) +wystawiamy wystawiać fin - - - - +na na prep - - - - +balkon balkon subst acc sg m3 - (0) +dopiero dopiero qub - - - - +w w prep - - - - +maju maj subst loc sg m3 head:18 (18|(19) +- - interp - - - head:18 +czerwcu czerwiec subst loc sg m3 head:18 (20)|18) +- - interp - - - - +wyjaśnia wyjaśniać fin - - - - +G gram brev - - - - +. . interp - - - - + +Stańczyk stańczyk subst nom sg m1 - +. . interp - - - - +- - interp - - - - +Bardzo bardzo adv - - - - +rzadko rzadko adv - - - - +używamy używać fin - - - - +gotowych gotowy adj - - - - (21 +sadzonek sadzonka subst gen pl f head:21 21) +ze z prep - - - - +sklepu sklep subst gen sg m3 - (22) +, , interp - - - - +bo bo comp - - - - +nic nic subst gen sg n - (23) +nie nie qub - - - - +sprawia sprawiać fin - - - - +takiej taki adj - - - - (24 +przyjemności przyjemność subst gen sg f head:24 24) +jak jak adv - - - - +wyhodowanie wyhodować ger nom sg n head:25 (25 +kwiatka kwiatek subst gen sg m3 - (26)|25) +od od prep - - - - +nasionka nasionko subst gen sg n - (27) +. . interp - - - - + +O o prep - - - - +tej ten adj - - - - +porze pora subst loc sg f - +roku rok subst gen sg m3 head:28 (28 +najwięcej najwięcej num - - - - 28) +pracy praca subst gen sg f - (10) +jest być fin - - - - +przy przy prep - - - - +podlewaniu podlewać ger loc sg n head:29 (29 +roślin roślina subst gen pl f - (30) +i i conj - - - - +zasilaniu zasilać ger loc sg n - (31)|29) +ich on ppron3 gen pl m2 - (8|(32) +odpowiednimi odpowiedni adj - - - - +nawozami nawóz subst inst pl m3 head:8 8) +. . interp - - - - + +Na na prep - - - - +balkonie balkon subst loc sg m3 head:0 (0 +pani pani subst gen sg f - (1) +Stanisławy Stanisława subst gen sg f - (33)|0) +rosną rosnąć fin - - - - +: : interp - - - - +surfinie surfinia subst nom pl f - (34) +, , interp - - - - +petunie petunia subst nom pl f - (35) +, , interp - - - - +gardenie gardenia subst nom pl f - (36) +, , interp - - - - +aksamitki aksamitka subst nom pl f - (37) +, , interp - - - - +przypołudniki przypołudnik subst nom pl m3 - (38) +, , interp - - - - +groszek groszek subst nom sg m3 - +pachnący pachnący adj - - - - +, , interp - - - - +kabea kabea ign - - - - +, , interp - - - - +nemezje nemezje ign - - - - +i i conj - - - - +werbeny werbena subst gen sg f - (39) +. . interp - - - - + +W w prep - - - - +sumie suma subst loc sg f - +kilkadziesiąt kilkadziesiąt num - - - - (8 +kwiatów kwiat subst gen pl m3 head:8 8) +, , interp - - - - +z z prep - - - - +których który adj - - - - +każdy każdy adj - - - - +kwitnie kwitnąć fin - - - - +w w prep - - - - +innym inny adj - - - - +kolorze kolor subst loc sg m3 - +i i conj - - - - +w w prep - - - - +różnym różny adj - - - - (40 +czasie czas subst loc sg m3 head:40 40) +. . interp - - - - + +Efekt efekt subst nom sg m3 - +? ? interp - - - - + +Ogród ogród subst nom sg m3 - (41) +na na prep - - - - +balkonie balkon subst loc sg m3 - (0) +cieszy cieszyć fin - - - - +oczy oko subst acc pl n - +właścicielki właścicielka subst gen sg f head:42 (42 +i i conj - - - - +przechodniów przechodzień subst gen pl m1 - (43)|42) +aż aż qub - - - - +do do prep - - - - +późnej późny adj - - - - (44 +jesieni jesień subst gen sg f head:44 44) +. . interp - - - - + +Ozdabianie ozdabiać ger nom sg n head:45 (45 +balkonów balkon subst gen pl m3 - (46)|45) +kwiatami kwiat subst inst pl m3 - (8) +staje stawać fin - - - - +się się qub - - - - +w w prep - - - - +Brzezinach Brzeziny subst loc pl n - (47) +coraz coraz adv - - - - +popularniejsze popularny adj - - - - +- - interp - - - - +Teraz teraz adv - - - - +jest być fin - - - - +za za qub - - - - +późno późno adv - - - - +, , interp - - - - +by by comp - - - - +samemu sam adj - - - - +uprawiać uprawiać inf - - - - +rośliny roślina subst acc pl f head:30 (30 +balkonowe balkonowy adj - - - - 30) +- - interp - - - - +mówi mówić fin - - - - +Bożenna Bożenna subst nom sg f - +Kolasa kolasa subst nom sg f - (48) +ze z prep - - - - +sklepu sklep subst gen sg m3 head:22 (22 +ogrodniczego ogrodniczy adj - - - - 22) +. . interp - - - - +- - interp - - - - +Jednak jednak conj - - - - +wciąż wciąż adv - - - - +można można pred - - - - +kupić kupić inf - - - - +kwiaty kwiat subst acc pl m3 head:8 (8 +gotowe gotowy adj - - - - 8) +do do prep - - - - +wystawienia wystawić ger gen sg n - (49) +na na prep - - - - +balkon balkon subst acc sg m3 - (0) +. . interp - - - - + +Najpopularniejsze popularny adj - - - - +są być fin - - - - +petunie petunia subst nom pl f - (35) +, , interp - - - - +surfinie surfinia subst nom pl f - +, , interp - - - - +oraz oraz conj - - - - +czerwone czerwony adj - - - - +, , interp - - - - +białe biały adj - - - - (50 +i i conj - - - - +różowe różowy adj - - - - +pelargonie pelargonia subst nom pl f head:50 50) +. . interp - - - - + +Modnym modny adj - - - - +kwiatem kwiat subst inst sg m3 - +jest być fin - - - - +też też qub - - - - +datura datura subst nom sg f head:51 (51 +- - interp - - - head:51 +bieluń bieluń subst nom sg m3 head:51 51) +. . interp - - - - + +Klientom klient subst dat pl m1 - (52) +nie nie qub - - - - +przeszkadza przeszkadzać fin - - - - +, , interp - - - - +że że comp - - - - +jest być fin - - - - +to to pred - - - - +kwiat kwiat subst nom sg m3 head:53 (53 +trujący trujący adj - - - - 53) +. . interp - - - - +tekst tekst subst nom sg m3 head:26 (26 +i i conj - - - - +fot fot subst nom sg m3 - (54)|26) +. . interp - - - - +grzegorz Grzegorz subst nom sg m1 head:55 (55 +kozieł kozieł subst nom sg m2 - 55) + + +#end document /9 diff --git b/src/test/resources/teksty_semeval/detailed.txt a/src/test/resources/teksty_semeval/detailed.txt new file mode 100755 index 0000000..1f04903 --- /dev/null +++ a/src/test/resources/teksty_semeval/detailed.txt @@ -0,0 +1,370 @@ + +METRIC muc: +/4: +Total key mentions: 60 +Total response mentions: 70 +Strictly correct identified mentions: 54 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (14 / 14) 100% Precision: (14 / 18) 77.77% F1: 87.5% +/5: +Total key mentions: 57 +Total response mentions: 71 +Strictly correct identified mentions: 50 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 21 +Recall: (18 / 21) 85.71% Precision: (18 / 24) 75% F1: 79.99% +/1: +Total key mentions: 75 +Total response mentions: 88 +Strictly correct identified mentions: 73 +Partially correct identified mentions: 0 +No identified: 2 +Invented: 15 +Recall: (11 / 15) 73.33% Precision: (11 / 15) 73.33% F1: 73.33% +/0: +Total key mentions: 81 +Total response mentions: 85 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 10 +Recall: (19 / 22) 86.36% Precision: (19 / 24) 79.16% F1: 82.6% +/2: +Total key mentions: 84 +Total response mentions: 94 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 9 +Invented: 19 +Recall: (11 / 15) 73.33% Precision: (11 / 19) 57.89% F1: 64.7% +/9: +Total key mentions: 75 +Total response mentions: 85 +Strictly correct identified mentions: 69 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (15 / 19) 78.94% Precision: (15 / 19) 78.94% F1: 78.94% +/8: +Total key mentions: 84 +Total response mentions: 99 +Strictly correct identified mentions: 77 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 22 +Recall: (18 / 22) 81.81% Precision: (18 / 28) 64.28% F1: 72% +/3: +Total key mentions: 56 +Total response mentions: 67 +Strictly correct identified mentions: 53 +Partially correct identified mentions: 2 +No identified: 1 +Invented: 12 +Recall: (4 / 7) 57.14% Precision: (4 / 7) 57.14% F1: 57.14% + +====== TOTALS ======= +Identification of Mentions: Recall: (527 / 572) 92.13% Precision: (527 / 659) 79.96% F1: 85.62% +Coreference: Recall: (110 / 135) 81.48% Precision: (110 / 154) 71.42% F1: 76.12% + +METRIC bcub: +/4: +Total key mentions: 60 +Total response mentions: 70 +Strictly correct identified mentions: 54 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (54 / 60) 90% Precision: (52.3333333333333 / 70) 74.76% F1: 81.67% +/5: +Total key mentions: 57 +Total response mentions: 71 +Strictly correct identified mentions: 50 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 21 +Recall: (47.125 / 57) 82.67% Precision: (47.6666666666667 / 71) 67.13% F1: 74.09% +/1: +Total key mentions: 75 +Total response mentions: 88 +Strictly correct identified mentions: 73 +Partially correct identified mentions: 0 +No identified: 2 +Invented: 15 +Recall: (68.5 / 75) 91.33% Precision: (71.75 / 88) 81.53% F1: 86.15% +/0: +Total key mentions: 81 +Total response mentions: 85 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 10 +Recall: (73 / 81) 90.12% Precision: (71.5833333333333 / 85) 84.21% F1: 87.06% +/2: +Total key mentions: 84 +Total response mentions: 94 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 9 +Invented: 19 +Recall: (72 / 84) 85.71% Precision: (70.625 / 94) 75.13% F1: 80.07% +/9: +Total key mentions: 75 +Total response mentions: 85 +Strictly correct identified mentions: 69 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (66.3095238095238 / 75) 88.41% Precision: (66.6666666666667 / 85) 78.43% F1: 83.12% +/8: +Total key mentions: 84 +Total response mentions: 99 +Strictly correct identified mentions: 77 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 22 +Recall: (74.8333333333333 / 84) 89.08% Precision: (73.9761904761905 / 99) 74.72% F1: 81.27% +/3: +Total key mentions: 56 +Total response mentions: 67 +Strictly correct identified mentions: 53 +Partially correct identified mentions: 2 +No identified: 1 +Invented: 12 +Recall: (52.3333333333333 / 56) 93.45% Precision: (53.8333333333333 / 67) 80.34% F1: 86.4% + +====== TOTALS ======= +Identification of Mentions: Recall: (527 / 572) 92.13% Precision: (527 / 659) 79.96% F1: 85.62% +Coreference: Recall: (508.10119047619 / 572) 88.82% Precision: (508.434523809524 / 659) 77.15% F1: 82.57% + +METRIC ceafm: +/4: +Total key mentions: 60 +Total response mentions: 70 +Strictly correct identified mentions: 54 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (54 / 60) 90% Precision: (54 / 70) 77.14% F1: 83.07% +/5: +Total key mentions: 57 +Total response mentions: 71 +Strictly correct identified mentions: 50 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 21 +Recall: (49 / 57) 85.96% Precision: (49 / 71) 69.01% F1: 76.56% +/1: +Total key mentions: 75 +Total response mentions: 88 +Strictly correct identified mentions: 73 +Partially correct identified mentions: 0 +No identified: 2 +Invented: 15 +Recall: (69 / 75) 92% Precision: (69 / 88) 78.4% F1: 84.66% +/0: +Total key mentions: 81 +Total response mentions: 85 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 10 +Recall: (74 / 81) 91.35% Precision: (74 / 85) 87.05% F1: 89.15% +/2: +Total key mentions: 84 +Total response mentions: 94 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 9 +Invented: 19 +Recall: (74 / 84) 88.09% Precision: (74 / 94) 78.72% F1: 83.14% +/9: +Total key mentions: 75 +Total response mentions: 85 +Strictly correct identified mentions: 69 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (69 / 75) 92% Precision: (69 / 85) 81.17% F1: 86.25% +/8: +Total key mentions: 84 +Total response mentions: 99 +Strictly correct identified mentions: 77 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 22 +Recall: (77 / 84) 91.66% Precision: (77 / 99) 77.77% F1: 84.15% +/3: +Total key mentions: 56 +Total response mentions: 67 +Strictly correct identified mentions: 53 +Partially correct identified mentions: 2 +No identified: 1 +Invented: 12 +Recall: (53 / 56) 94.64% Precision: (53 / 67) 79.1% F1: 86.17% + +====== TOTALS ======= +Identification of Mentions: Recall: (527 / 572) 92.13% Precision: (527 / 659) 79.96% F1: 85.62% +Coreference: Recall: (519 / 572) 90.73% Precision: (519 / 659) 78.75% F1: 84.32% + +METRIC ceafe: +/4: +Total key mentions: 60 +Total response mentions: 70 +Strictly correct identified mentions: 54 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (39.1333333333333 / 46) 85.07% Precision: (39.1333333333333 / 52) 75.25% F1: 79.86% +/5: +Total key mentions: 57 +Total response mentions: 71 +Strictly correct identified mentions: 50 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 21 +Recall: (29.0904761904762 / 36) 80.8% Precision: (29.0904761904762 / 47) 61.89% F1: 70.09% +/1: +Total key mentions: 75 +Total response mentions: 88 +Strictly correct identified mentions: 73 +Partially correct identified mentions: 0 +No identified: 2 +Invented: 15 +Recall: (56.3809523809524 / 60) 93.96% Precision: (56.3809523809524 / 73) 77.23% F1: 84.78% +/0: +Total key mentions: 81 +Total response mentions: 85 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 10 +Recall: (53.347619047619 / 59) 90.41% Precision: (53.347619047619 / 61) 87.45% F1: 88.91% +/2: +Total key mentions: 84 +Total response mentions: 94 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 9 +Invented: 19 +Recall: (59.6833333333333 / 69) 86.49% Precision: (59.6833333333333 / 75) 79.57% F1: 82.89% +/9: +Total key mentions: 75 +Total response mentions: 85 +Strictly correct identified mentions: 69 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Recall: (52.0897435897436 / 56) 93.01% Precision: (52.0897435897436 / 66) 78.92% F1: 85.39% +/8: +Total key mentions: 84 +Total response mentions: 99 +Strictly correct identified mentions: 77 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 22 +Recall: (56.8564102564102 / 62) 91.7% Precision: (56.8564102564102 / 71) 80.07% F1: 85.49% +/3: +Total key mentions: 56 +Total response mentions: 67 +Strictly correct identified mentions: 53 +Partially correct identified mentions: 2 +No identified: 1 +Invented: 12 +Recall: (47.7666666666667 / 49) 97.48% Precision: (47.7666666666667 / 60) 79.61% F1: 87.64% + +====== TOTALS ======= +Identification of Mentions: Recall: (527 / 572) 92.13% Precision: (527 / 659) 79.96% F1: 85.62% +Coreference: Recall: (394.348534798535 / 437) 90.23% Precision: (394.348534798535 / 505) 78.08% F1: 83.72% + +METRIC blanc: +: +Total key mentions: 60 +Total response mentions: 70 +Strictly correct identified mentions: 54 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Coreference links: Recall: (23 / 174) 13.21% Precision: (23 / 27) 85.18% F1: 22.88% +Non-coreference links: Recall: (2237 / 2241) 99.82% Precision: (2237 / 2388) 93.67% F1: 96.65% +Mean: Recall: (0.565199495299205 / 1) 56.51% Precision: (0.894309510515541 / 1) 89.43% F1: 59.76% +: +Total key mentions: 57 +Total response mentions: 71 +Strictly correct identified mentions: 50 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 21 +Coreference links: Recall: (42 / 274) 15.32% Precision: (42 / 48) 87.5% F1: 26.08% +Non-coreference links: Recall: (2205 / 2211) 99.72% Precision: (2205 / 2437) 90.48% F1: 94.87% +Mean: Recall: (0.575285483663303 / 1) 57.52% Precision: (0.889900492408699 / 1) 88.99% F1: 60.48% +: +Total key mentions: 75 +Total response mentions: 88 +Strictly correct identified mentions: 73 +Partially correct identified mentions: 0 +No identified: 2 +Invented: 15 +Coreference links: Recall: (19 / 142) 13.38% Precision: (19 / 23) 82.6% F1: 23.03% +Non-coreference links: Recall: (3682 / 3686) 99.89% Precision: (3682 / 3805) 96.76% F1: 98.3% +Mean: Recall: (0.566358814853309 / 1) 56.63% Precision: (0.896880534765469 / 1) 89.68% F1: 60.66% +: +Total key mentions: 81 +Total response mentions: 85 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 10 +Coreference links: Recall: (38 / 93) 40.86% Precision: (38 / 56) 67.85% F1: 51% +Non-coreference links: Recall: (3459 / 3477) 99.48% Precision: (3459 / 3514) 98.43% F1: 98.95% +Mean: Recall: (0.70171263695993 / 1) 70.17% Precision: (0.831459874786568 / 1) 83.14% F1: 74.98% +: +Total key mentions: 84 +Total response mentions: 94 +Strictly correct identified mentions: 75 +Partially correct identified mentions: 0 +No identified: 9 +Invented: 19 +Coreference links: Recall: (27 / 220) 12.27% Precision: (27 / 41) 65.85% F1: 20.68% +Non-coreference links: Recall: (4137 / 4151) 99.66% Precision: (4137 / 4330) 95.54% F1: 97.55% +Mean: Recall: (0.559677295722827 / 1) 55.96% Precision: (0.806981918548978 / 1) 80.69% F1: 59.12% +: +Total key mentions: 75 +Total response mentions: 85 +Strictly correct identified mentions: 69 +Partially correct identified mentions: 0 +No identified: 6 +Invented: 16 +Coreference links: Recall: (32 / 248) 12.9% Precision: (32 / 40) 80% F1: 22.22% +Non-coreference links: Recall: (3314 / 3322) 99.75% Precision: (3314 / 3530) 93.88% F1: 96.73% +Mean: Recall: (0.563312035112932 / 1) 56.33% Precision: (0.869405099150142 / 1) 86.94% F1: 59.47% +: +Total key mentions: 84 +Total response mentions: 99 +Strictly correct identified mentions: 77 +Partially correct identified mentions: 0 +No identified: 7 +Invented: 22 +Coreference links: Recall: (41 / 282) 14.53% Precision: (41 / 54) 75.92% F1: 24.4% +Non-coreference links: Recall: (4556 / 4569) 99.71% Precision: (4556 / 4797) 94.97% F1: 97.28% +Mean: Recall: (0.571272404688395 / 1) 57.12% Precision: (0.854509763046349 / 1) 85.45% F1: 60.84% +: +Total key mentions: 56 +Total response mentions: 67 +Strictly correct identified mentions: 53 +Partially correct identified mentions: 2 +No identified: 1 +Invented: 12 +Coreference links: Recall: (5 / 85) 5.88% Precision: (5 / 8) 62.5% F1: 10.75% +Non-coreference links: Recall: (2123 / 2126) 99.85% Precision: (2123 / 2203) 96.36% F1: 98.08% +Mean: Recall: (0.528706214376625 / 1) 52.87% Precision: (0.794342941443486 / 1) 79.43% F1: 54.41% + +====== TOTALS ======= +Identification of Mentions: Recall: (527 / 572) 92.13% Precision: (527 / 659) 79.96% F1: 85.62% + +Coreference: +Coreference links: Recall: (227 / 1518) 14.95% Precision: (227 / 297) 76.43% F1: 25.01% +Non-coreference links: Recall: (25713 / 25783) 99.72% Precision: (25713 / 27004) 95.21% F1: 97.42% +BLANC: Recall: (0.573411949851852 / 1) 57.34% Precision: (0.85825101606097 / 1) 85.82% F1: 61.21% diff --git b/src/test/resources/teksty_semeval/results.txt a/src/test/resources/teksty_semeval/results.txt new file mode 100755 index 0000000..7f5f62c --- /dev/null +++ a/src/test/resources/teksty_semeval/results.txt @@ -0,0 +1,19 @@ +Identification of Mentions: Recall: (527 / 572) 92.13% Precision: (527 / 659) 79.96% F1: 85.62% + +METRIC muc: +Coreference: Recall: (110 / 135) 81.48% Precision: (110 / 154) 71.42% F1: 76.12% + +METRIC bcub: +Coreference: Recall: (508.10119047619 / 572) 88.82% Precision: (508.434523809524 / 659) 77.15% F1: 82.57% + +METRIC ceafm: +Coreference: Recall: (519 / 572) 90.73% Precision: (519 / 659) 78.75% F1: 84.32% + +METRIC ceafe: +Coreference: Recall: (394.348534798535 / 437) 90.23% Precision: (394.348534798535 / 505) 78.08% F1: 83.72% + +METRIC blanc: +Coreference: +Coreference links: Recall: (227 / 1518) 14.95% Precision: (227 / 297) 76.43% F1: 25.01% +Non-coreference links: Recall: (25713 / 25783) 99.72% Precision: (25713 / 27004) 95.21% F1: 97.42% +BLANC: Recall: (0.573411949851852 / 1) 57.34% Precision: (0.85825101606097 / 1) 85.82% F1: 61.21% diff --git b/src/test/resources/teksty_semeval/teksty_z_testow/all.txt a/src/test/resources/teksty_semeval/teksty_z_testow/all.txt new file mode 100755 index 0000000..d6a6c73 --- /dev/null +++ a/src/test/resources/teksty_semeval/teksty_z_testow/all.txt @@ -0,0 +1,34 @@ + +METRIC muc: + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (27 / 36) 75% Precision: (27 / 31) 87.09% F1: 80.59% + +METRIC bcub: + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (39 / 48) 81.25% Precision: (32.6428571428571 / 48) 68% F1: 74.04% + +METRIC ceafm: + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (25 / 48) 52.08% Precision: (25 / 48) 52.08% F1: 52.08% + +METRIC ceafe: + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (5.42156862745098 / 12) 45.17% Precision: (5.42156862745098 / 17) 31.89% F1: 37.39% + +METRIC blanc: + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% + +Coreference: +Coreference links: Recall: (63 / 84) 75% Precision: (63 / 143) 44.05% F1: 55.5% +Non-coreference links: Recall: (100 / 180) 55.55% Precision: (100 / 121) 82.64% F1: 66.44% +BLANC: Recall: (0.652777777777778 / 1) 65.27% Precision: (0.633502860775588 / 1) 63.35% F1: 60.97% diff --git b/src/test/resources/teksty_semeval/teksty_z_testow/all_golden.txt a/src/test/resources/teksty_semeval/teksty_z_testow/all_golden.txt new file mode 100755 index 0000000..2f9f936 --- /dev/null +++ a/src/test/resources/teksty_semeval/teksty_z_testow/all_golden.txt @@ -0,0 +1,59 @@ +#begin document A +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (2) +7 (2) +8 (3) +9 (3) +10 (3) +11 (3) +12 (3) +#end document A + +#begin document B +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (2) +7 (2) +8 (3) +9 (3) +10 (3) +11 (3) +12 (3) +#end document B + +#begin document C +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (2) +7 (2) +8 (3) +9 (3) +10 (3) +11 (3) +12 (3) +#end document C + +#begin document D +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (2) +7 (2) +8 (3) +9 (3) +10 (3) +11 (3) +12 (3) +#end document D diff --git b/src/test/resources/teksty_semeval/teksty_z_testow/all_system.txt a/src/test/resources/teksty_semeval/teksty_z_testow/all_system.txt new file mode 100755 index 0000000..c4ce79f --- /dev/null +++ a/src/test/resources/teksty_semeval/teksty_z_testow/all_system.txt @@ -0,0 +1,59 @@ +#begin document A +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (3) +7 (3) +8 (3) +9 (3) +10 (3) +11 (3) +12 (3) +#end document A + +#begin document B +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (2) +7 (2) +8 (1) +9 (1) +10 (1) +11 (1) +12 (1) +#end document B + +#begin document C +1 (1) +2 (1) +3 (1) +4 (1) +5 (1) +6 (1) +7 (1) +8 (1) +9 (1) +10 (1) +11 (1) +12 (1) +#end document C + +#begin document D +1 (1) +2 (2) +3 (3) +4 (4) +5 (5) +6 (6) +7 (7) +8 (8) +9 (9) +10 (10) +11 (11) +12 (12) +#end document D diff --git b/src/test/resources/teksty_semeval/teksty_z_testow/det.txt a/src/test/resources/teksty_semeval/teksty_z_testow/det.txt new file mode 100755 index 0000000..14f3596 --- /dev/null +++ a/src/test/resources/teksty_semeval/teksty_z_testow/det.txt @@ -0,0 +1,202 @@ + +METRIC muc: +A: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (9 / 9) 100% Precision: (9 / 10) 90% F1: 94.73% +D: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (0 / 9) 0% Precision: (0 / 0) 0% F1: 0% +C: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (9 / 9) 100% Precision: (9 / 11) 81.81% F1: 90% +B: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (9 / 9) 100% Precision: (9 / 10) 90% F1: 94.73% + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (27 / 36) 75% Precision: (27 / 31) 87.09% F1: 80.59% + +METRIC bcub: +A: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (12 / 12) 100% Precision: (9.14285714285714 / 12) 76.19% F1: 86.48% +D: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (3 / 12) 25% Precision: (12 / 12) 100% F1: 40% +C: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (12 / 12) 100% Precision: (4.5 / 12) 37.5% F1: 54.54% +B: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (12 / 12) 100% Precision: (7 / 12) 58.33% F1: 73.68% + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (39 / 48) 81.25% Precision: (32.6428571428571 / 48) 68% F1: 74.04% + +METRIC ceafm: +A: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (10 / 12) 83.33% Precision: (10 / 12) 83.33% F1: 83.33% +D: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (3 / 12) 25% Precision: (3 / 12) 25% F1: 25% +C: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (5 / 12) 41.66% Precision: (5 / 12) 41.66% F1: 41.66% +B: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (7 / 12) 58.33% Precision: (7 / 12) 58.33% F1: 58.33% + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (25 / 48) 52.08% Precision: (25 / 48) 52.08% F1: 52.08% + +METRIC ceafe: +A: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (1.83333333333333 / 3) 61.11% Precision: (1.83333333333333 / 2) 91.66% F1: 73.33% +D: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (1.33333333333333 / 3) 44.44% Precision: (1.33333333333333 / 12) 11.11% F1: 17.77% +C: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (0.588235294117647 / 3) 19.6% Precision: (0.588235294117647 / 1) 58.82% F1: 29.41% +B: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Recall: (1.66666666666667 / 3) 55.55% Precision: (1.66666666666667 / 2) 83.33% F1: 66.66% + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% +Coreference: Recall: (5.42156862745098 / 12) 45.17% Precision: (5.42156862745098 / 17) 31.89% F1: 37.39% + +METRIC blanc: +: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Coreference links: Recall: (21 / 21) 100% Precision: (21 / 31) 67.74% F1: 80.76% +Non-coreference links: Recall: (35 / 45) 77.77% Precision: (35 / 35) 100% F1: 87.5% +Mean: Recall: (0.888888888888889 / 1) 88.88% Precision: (0.838709677419355 / 1) 83.87% F1: 84.13% +: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Coreference links: Recall: (0 / 21) 0% Precision: (0 / 0) 0% F1: 0% +Non-coreference links: Recall: (45 / 45) 100% Precision: (45 / 66) 68.18% F1: 81.08% +Mean: Recall: (0.5 / 1) 50% Precision: (0.340909090909091 / 1) 34.09% F1: 40.54% +: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Coreference links: Recall: (21 / 21) 100% Precision: (21 / 66) 31.81% F1: 48.27% +Non-coreference links: Recall: (0 / 45) 0% Precision: (0 / 0) 0% F1: 0% +Mean: Recall: (0.5 / 1) 50% Precision: (0.159090909090909 / 1) 15.9% F1: 24.13% +: +Total key mentions: 12 +Total response mentions: 12 +Strictly correct identified mentions: 12 +Partially correct identified mentions: 0 +No identified: 0 +Invented: 0 +Coreference links: Recall: (21 / 21) 100% Precision: (21 / 46) 45.65% F1: 62.68% +Non-coreference links: Recall: (20 / 45) 44.44% Precision: (20 / 20) 100% F1: 61.53% +Mean: Recall: (0.722222222222222 / 1) 72.22% Precision: (0.728260869565217 / 1) 72.82% F1: 62.11% + +====== TOTALS ======= +Identification of Mentions: Recall: (48 / 48) 100% Precision: (48 / 48) 100% F1: 100% + +Coreference: +Coreference links: Recall: (63 / 84) 75% Precision: (63 / 143) 44.05% F1: 55.5% +Non-coreference links: Recall: (100 / 180) 55.55% Precision: (100 / 121) 82.64% F1: 66.44% +BLANC: Recall: (0.652777777777778 / 1) 65.27% Precision: (0.633502860775588 / 1) 63.35% F1: 60.97% diff --git b/src/test/resources/teksty_tei/teksty_gold/0/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/0/ann_coreference.xml.gz new file mode 100755 index 0000000..76a9cf2 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/0/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/0/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/0/ann_mentions.xml.gz new file mode 100755 index 0000000..d51ae07 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/0/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/0/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/0/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..ce7d825 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/0/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/0/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/0/ann_segmentation.xml.gz new file mode 100755 index 0000000..0aed7a0 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/0/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/0/header.xml a/src/test/resources/teksty_tei/teksty_gold/0/header.xml new file mode 100755 index 0000000..7c47cd1 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/0/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-57,p-58,p-59,p-60 from NKJP text with id: IPIPAN_1301919980826</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/0/text.xml a/src/test/resources/teksty_tei/teksty_gold/0/text.xml new file mode 100755 index 0000000..2c3048e --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/0/text.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">W spotkaniu weźmie udział blisko 7 tysięcy braci z całej Europy, ale tylko 206 z nich będzie ubiegało się o tytuł Europejskiego Króla Kurkowego. - Wezmę udział w strzelaniu, choć moje szanse są marne. Wynika to przede wszystkim z moich obowiązków gospodarza spotkań; w tym nawale pracy ciężko mi będzie się skupić na strzelaniu - przewiduje Zdzisław Maj, prezes krakowskiego Bractwa Kurkowego, panujący Król Kurkowy.</p> + <p xml:id="p-2">Strzelanie o tytuł Europejskiego Króla Kurkowego będzie się odbywało w kilku etapach. Do finału zostanie dopuszczonych 27 braci - jeden z nich otrzyma tytuł Europejskiego Króla Kurkowego odbierając go obecnie panującemu Wilfriedowi Stammermannowi. - Król nie otrzymuje żadnych nagród finansowych, ale taki tytuł jest ogromnym zaszczytem; król jest np. zapraszany na posiedzenia Parlamentu Europejskiego - mówi Zdzisław Maj.</p> + <p xml:id="p-3">Największą atrakcją 12. Europejskich Spotkań Bractw Strzeleckich będzie wielka parada, która rozpocznie się w niedzielę o godz. 13. Kilkuset braci w historycznych strojach przejdzie z Błoń na Rynek ulicami: Piłsudskiego, Straszewskiego, Franciszkańską i Grodzką.</p> + <p xml:id="p-4">Początki istnienia Bractwa Kurkowego w Krakowie sięgają XIII wieku. Skupiało ono znamienitych obywateli, kupców i rzemieślników pragnących wspomóc obronność miasta. Wielkim świętem bractwa był turniej, który odbywał się na strzelnicy zwanej Celestatem. Zawody trwały zwykle trzy dni. Strzelano do drewnianego kura umocowanego na wysokiej żerdzi. Brat, który zdołał celnym strzałem strącić ostatni jego fragment zdobywał miano Króla Kurkowego. Z tym tytułem wiązały się nie tylko honory, ale także przywileje: Rada Miejska zwalniała jego posiadacza m.in. z obowiązku płacenia podatków (ten zwyczaj utrzymał się do dziś).</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/1/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/1/ann_coreference.xml.gz new file mode 100755 index 0000000..9ad1be6 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/1/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/1/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/1/ann_mentions.xml.gz new file mode 100755 index 0000000..0f6d1cb --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/1/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/1/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/1/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..62d6f6a --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/1/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/1/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/1/ann_segmentation.xml.gz new file mode 100755 index 0000000..9ff6101 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/1/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/1/header.xml a/src/test/resources/teksty_tei/teksty_gold/1/header.xml new file mode 100755 index 0000000..5723f42 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/1/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-21,p-22,p-23,p-24,p-25,p-26,p-27 from NKJP text with id: IJPPAN_PolPr_GWlk01253</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/1/text.xml a/src/test/resources/teksty_tei/teksty_gold/1/text.xml new file mode 100755 index 0000000..e77e7ea --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/1/text.xml @@ -0,0 +1,18 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Z kolei we Wrocławiu płace kontrolerów zostały zupełnie uniezależnione od liczby wystawionych wezwań do zapłaty.</p> + <p xml:id="p-2">- Nie oznacza to jednak, że nie nagradzamy najskuteczniejszych pracowników. Kilka razy w roku przyznawane są premie. Bierzemy wtedy pod uwagę skuteczność i ewentualne skargi pasażerów - wyjaśnia Monika Poważna, kierownik Wydziału Transportu wrocławskiego Urzędu Miasta.</p> + <p xml:id="p-3">Tamtejsi kontrolerzy zarabiają (bez premii) około 1200 złotych miesięcznie (netto).</p> + <p xml:id="p-4">Miasto postanowiło za jednym zamachem trzy spółki połączyć w jedną. Przygotowany jest projekt uchwały, który przewiduje wniesienie udziałów w Towarzystwie Budownictwa Społecznego "Wielkopolska" oraz Towarzystwie Budownictwa Społecznego "Nasz Dom" do Poznańskiego Towarzystwa Budownictwa Społecznego. W piątek opiniować tę propozycję będzie Komisja Gospodarki Komunalnej i Polityki Mieszkaniowej, a we wtorek zajmie się nią Rada Miasta.</p> + <p xml:id="p-5">- Pomysł połączenia TBS-ów nie budzi wątpliwości z punktu widzenia racjonalizacji kosztów - twierdzi Tomasz Lewandowski, radny LiD i członek komisji. - Potrzebna jest jednak dyskusja o przyszłości towarzystw. Obecnie rząd pracuje nad zmianą ustawy, która przewiduje wykup mieszkań w towarzystwach budownictwa społecznego. To stworzy zupełnie nową sytuację. W związku z tym konieczne będzie podjęcie odpowiednich kroków przez miasto.</p> + <p xml:id="p-6">Norbert Napieraj, szef klubu radnych PiS również uważa, że ze względów ekonomicznych utworzenie jednej spółki jest zasadne.</p> + <p xml:id="p-7">- Na razie jest to jednak luźny pomysł. Nie ma konkretów - dodaje N. Napieraj. - Nasz klub jeszcze nie wypracował w sprawie tej uchwały stanowiska.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/2/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/2/ann_coreference.xml.gz new file mode 100755 index 0000000..d047d38 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/2/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/2/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/2/ann_mentions.xml.gz new file mode 100755 index 0000000..fa20d2c --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/2/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/2/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/2/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..62f8665 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/2/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/2/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/2/ann_segmentation.xml.gz new file mode 100755 index 0000000..0fe2536 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/2/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/2/header.xml a/src/test/resources/teksty_tei/teksty_gold/2/header.xml new file mode 100755 index 0000000..769bd91 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/2/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-923,p-924,p-925,p-926,p-927,p-928,p-929,p-930,p-931 from NKJP text with id: IJPPAN_PolPr_DBb01448</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/2/text.xml a/src/test/resources/teksty_tei/teksty_gold/2/text.xml new file mode 100755 index 0000000..d2f8e1d --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/2/text.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Potem znalazł zatrudnienie w Fundacji Europejskie Spotkania Kaszubskie Centrum Kultury. Był prezesem utworzonej przez fundację spółki Zamek.</p> + <p xml:id="p-2">W międzyczasie został radnym. Pod koniec ubiegłej kadencji Rada Gminy Krokowa wybrała go na wójta. Jesienią 2002 r. został wójtem w wyborach powszechnych.</p> + <p xml:id="p-3">- Co skłoniło mnie do zostania samorządowcem? W pewnym momencie życia mężczyzny przychodzi taka potrzeba, aby sprawdzić się np. w życiu publicznym - twierdzi krokowski kandydat do tytułu Wójta Pomorza. - Poza tym interesowały mnie sprawy komunalne. Chciałem się nimi bliżej zająć.</p> + <p xml:id="p-4">Co wójt gminy Krokowa uważa za swój największy sukces i największą porażkę?</p> + <p xml:id="p-5">- Sukcesem jest to, że udaje się wreszcie opracowywać plany zagospodarowania przestrzennego. Gotowe są już dla Białogóry i części Dębek. Tych ostatnich przez wiele lat nie można było uchwalić - uważa wójt. - Natomiast za porażkę uważam decyzję Rady gminy, aby nie przystępować w ramach Komunalnego Związku Gmin do programu uporządkowania gospodarki ściekowej. Mogliśmy uzyskać wiele milionów euro. Boję się, że to nie tylko moja porażka...</p> + <p xml:id="p-6">Od 5 lat ulubionym hobby Henryka Doeringa są narty. Dlatego urlop najchętniej bierze zimą, aby udać się na stoki Szklarskiej Poręby.</p> + <p xml:id="p-7">- Tej zimy niestety nie mogłem wyjechać - przyznaje wójt Krokowej. - Czasu wolnego mam bardzo mało, jeśli się taki pojawia, to staram się go spędzać razem z bliskimi.</p> + <p xml:id="p-8">Nasz plebiscyt</p> + <p xml:id="p-9">„Dziennik Bałtycki” rozpoczął kolejną edycję konkursu Wójt Pomorza. Nasz powiat reprezentują trzej włodarze gmin wiejskich. To Henryk Doering (Krokowa), Tadeusz Puszkarczuk (gmina Puck) i Jerzy Włudzik (Kosakowo). W gronie kilkudziesięciu kolegów po fachu walczyć będą o miano najpopularniejszego wójta województwa. O tym, kto wygra, zadecydują swoimi głosami Czytelnicy „Dziennika”.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/3/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/3/ann_coreference.xml.gz new file mode 100755 index 0000000..0e5a175 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/3/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/3/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/3/ann_mentions.xml.gz new file mode 100755 index 0000000..15e813f --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/3/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/3/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/3/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..4b8e8f8 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/3/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/3/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/3/ann_segmentation.xml.gz new file mode 100755 index 0000000..39d2a21 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/3/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/3/header.xml a/src/test/resources/teksty_tei/teksty_gold/3/header.xml new file mode 100755 index 0000000..efca8ca --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/3/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-39,p-40,p-41,p-42,p-43,p-44,p-45,p-46,p-47,p-48,p-49 from NKJP text with id: IJPPAN_PolPr5b00121</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/3/text.xml a/src/test/resources/teksty_tei/teksty_gold/3/text.xml new file mode 100755 index 0000000..5da18a5 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/3/text.xml @@ -0,0 +1,22 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Paweł Kryszałowicz</p> + <p xml:id="p-2">(Eintracht Frankfurt): - Ukraińcy postawili nam wysokie wymagania. Remis jest sprawiedliwy, choć przy odrobinie szczęścia mogliśmy wygrać. Mam do siebie pretensję, bo przy lepszej koncentracji mogłem zdobyć bramkę.</p> + <p xml:id="p-3">ś</p> + <p xml:id="p-4">Jacek Krzynówek</p> + <p xml:id="p-5">(1. FC Nuernberg): - Cieszę się, że mogłem wystąpić w meczu kończącym eliminacje. Ze swej strony zrobię wszystko, aby nie wypaść z kadry, mimo że nie mam ugruntowanej pozycji w swoim klubie. Zasłużyliśmy na awans, bo byliśmy zespołem, który grał najrówniej i w głupi sposób nie tracił punktów.</p> + <p xml:id="p-6">ś</p> + <p xml:id="p-7">Marek Koźmiński:</p> + <p xml:id="p-8">Mecze z Ukrainą spinają klamrą eliminacje, które będziemy pamiętać do końca życia. Uważam, że kibice oglądali dzisiaj dobry mecz dwóch równych zespołów. Moim zdaniem Ukraińcy zasłużenie zajęli drugie miejsce , jednak nie chciałbym spotkać się z nimi na mundialu. To bardzo niewygodny przeciwnik. Może wygrać z każdym.</p> + <p xml:id="p-9">ś</p> + <p xml:id="p-10">Zbigniew Boniek</p> + <p xml:id="p-11">(wiceprezes PZPN): - Najważniejsze, że eliminacje zakończyły się sukcesem. Jestem usatysfakcjonowany. Chcę podkreślić, że kibice i dziennikarze dostrzegają tylko to, co dzieje się na boisku i rzadko kiedy zwracają uwagę na kulisy sukcesów piłkarzy, a oznaczają one ogromny wysiłek organizacyjny, wiele pracy rzeszy ludzi, którzy nie stoją w pierwszym szeregu, ale wykonują nieraz ciężkie i niewdzięczne zadania. Moim zdaniem między innymi dlatego, że sprawy organizacyjne zostały ułożone na odpowiednim poziomie, wszyscy możemy się dzisiaj cieszyć z awansu do mistrzostw świata.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/4/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/4/ann_coreference.xml.gz new file mode 100755 index 0000000..82d83c1 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/4/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/4/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/4/ann_mentions.xml.gz new file mode 100755 index 0000000..3d8d69e --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/4/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/4/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/4/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..d6bd89a --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/4/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/4/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/4/ann_segmentation.xml.gz new file mode 100755 index 0000000..e568746 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/4/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/4/header.xml a/src/test/resources/teksty_tei/teksty_gold/4/header.xml new file mode 100755 index 0000000..17a75d9 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/4/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-437,p-438,p-439,p-440,p-441,p-442,p-443,p-444,p-445 from NKJP text with id: IJPPAN_PolPr_SlP00841</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/4/text.xml a/src/test/resources/teksty_tei/teksty_gold/4/text.xml new file mode 100755 index 0000000..97371c9 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/4/text.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Ernest i Agnieszka nie planowali, że będą mieli wielką, babską rodzinę. Ale tak wyszło. – I całe szczęście. Lepiej się dogaduję z dziewczętami – cieszy się Ernest Kwiecień.</p> + <p xml:id="p-2">W Wigilię do jego obowiązków, poza dostarczeniem choinki, należeć będzie zmywanie naczyń. Agnieszka zrobi pierogi, ugotuje barszcz z uszkami, usmaży karpia. Córki upieką ciasta. Potem przyjdzie czas na prezenty. Może to nawet będą empetrójki, o których marzą starsze dziewczyny.</p> + <p xml:id="p-3">Jodełek sadzimy mniej</p> + <p xml:id="p-4">Leśniczy, od którego pan Ernest przywozi choinkę, mieszka kilka kilometrów od domu Kwietniów. On także nie wyobraża sobie świąt bez prawdziwego świerku. – I musi być kiczowaty – uśmiecha się Gabriel Grobelny, nadleśniczy wałbrzyski.</p> + <p xml:id="p-5">To znaczy, że powinny na nim wisieć ozdoby zrobione przez dzieci, przechowywane latami, wyciągane na tę jedyną okazję.</p> + <p xml:id="p-6">Pan Gabriel ma dwóch synów i trzy córki. W domu została najmłodsza, 12-letnia, ale na święta zjadą wszyscy. I ubiorą choinkę. – Żona rozwiesi anielskie włosy, ja podłączę lampki – w domu nadleśniczego podział świątecznych ról jest określony.</p> + <p xml:id="p-7">W dolnośląskich lasach najwięcej jest świerków. Na plantacjach sadzą także coraz popularniejsze jodły z miękkimi igłami.</p> + <p xml:id="p-8">– Ale i tych jodełek sadzimy już mniej. To nie lata dziewięćdziesiąte, gdy sprzedawaliśmy prawie wszystkie wyhodowane drzewka – wspomina nadleśniczy.</p> + <p xml:id="p-9">U Grobelnego choinkę można sobie wybrać. – Mamy rodziny, w których co roku ojciec przyjeżdża z synem, by samemu ściąć drzewko. Taką mają tradycję – dodaje pan Gabriel.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/5/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/5/ann_coreference.xml.gz new file mode 100755 index 0000000..57c7a04 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/5/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/5/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/5/ann_mentions.xml.gz new file mode 100755 index 0000000..150c9ed --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/5/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/5/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/5/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..b0e9efe --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/5/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/5/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/5/ann_segmentation.xml.gz new file mode 100755 index 0000000..2ef9b75 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/5/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/5/header.xml a/src/test/resources/teksty_tei/teksty_gold/5/header.xml new file mode 100755 index 0000000..7d37881 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/5/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-6,p-7,p-8,p-9 from NKJP text with id: PELCRA_1303919960926</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/5/text.xml a/src/test/resources/teksty_tei/teksty_gold/5/text.xml new file mode 100755 index 0000000..0d517c7 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/5/text.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Cena życia</p> + <p xml:id="p-2">Z tego pogromu ocalało kilkudziesięciu Żydów, a wśród nich rodzina Mosze Sonensona. Przed wojną była to w skali miasteczka rodzina bogata. Sonensonowie mieli garbarnię. Nie udało mi się dociec, u kogo mianowicie przechowywali się Sonensonowie oraz pozostali Żydzi w czasie okupacji niemieckiej. Faktem pozostaje natomiast, że okupację tę przeżyli. Faktem oczywistym pozostaje i to, że liczne rodziny polskie - w Ejszyszkach i w pobliskich okolicach - przechowywały Żydów. Parę kilometrów od Ejszyszek, w Korkucianach (w folwarku Lebiedniki), żołnierz AK Kazimierz Korkuć w czasie wojny w swoim domu przechowywał 28 Żydów. Od studni do piwnic domu był przekopany tunel, dzięki czemu mieli wodę. Natomiast w skali siatki AK Kazimierz Korkuć przechowywał około 70 Żydów. Rodzina Świeczków również przechowywała Żydów. W tamtych stronach liczne rodziny polskie postępowały podobnie.</p> + <p xml:id="p-3">Prawdą jest również i to, że Żydzi za swe przechowanie płacili. Płacili za utrzymanie i chyba jeszcze - za ryzyko. O tym dzisiaj raczej tu się nie mówi, ale prawdopodobnie różnie z tym było: jedni za pieniądze, inni - z odruchu serca. Ryzykowali i Polacy, i Żydzi. Te rachunki mogły wyglądać bardzo różnie.</p> + <p xml:id="p-4">Mieszkam w jednej z podwileńskich wsi. Otóż w tej mojej wsi pewien gospodarz - Polak - przechowywał w czasie wojny młodą Żydówkę. Spodobała mu się, z czego wynikł dramat. Zdenerwowana żona doniosła na policję. Aresztowano Żydówkę razem z gospodarzem, przerażona kobieta próbowała ocalić męża. Zanim uzbierała potrzebną sumę na łapówkę, było już za późno - rozstrzelano nie tylko Żydówkę, ale i gospodarza. Czy żonę tego straceńca można nazwać antysemitką?</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/8/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/8/ann_coreference.xml.gz new file mode 100755 index 0000000..bbc1874 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/8/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/8/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/8/ann_mentions.xml.gz new file mode 100755 index 0000000..fd2b157 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/8/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/8/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/8/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..0c0be0e --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/8/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/8/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/8/ann_segmentation.xml.gz new file mode 100755 index 0000000..4280861 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/8/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/8/header.xml a/src/test/resources/teksty_tei/teksty_gold/8/header.xml new file mode 100755 index 0000000..b6c7d38 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/8/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-596,p-597,p-598,p-599,p-600,p-601,p-602,p-603,p-604,p-605,p-606,p-607,p-608,p-609,p-610,p-611,p-612,p-613,p-614,p-615,p-616,p-617,p-618,p-619,p-620,p-621 from NKJP text with id: IJPPAN_PolPr_GWroc00348</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/8/text.xml a/src/test/resources/teksty_tei/teksty_gold/8/text.xml new file mode 100755 index 0000000..8dcb1b6 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/8/text.xml @@ -0,0 +1,37 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">- 150 g owoców</p> + <p xml:id="p-2">Joanna Kuc</p> + <p xml:id="p-3">(PAI)</p> + <p xml:id="p-4">Diety są różne, ścisłe i urozmaicone, eliminacyjne, dziwne, racjonalne i wreszcie te zalecane przez lekarzy. Dziś dieta burgundzka.</p> + <p xml:id="p-5">Jak wygląda przykładowe tygodniowe menu?</p> + <p xml:id="p-6">1 dzień</p> + <p xml:id="p-7">-obiad: pierś kurczaka pieczona w folii, sałatka z czerwonej kapusty doprawiona octem winnym i oliwą, pieczone jabłko</p> + <p xml:id="p-8">- kolacja: 2 sadzone jajka, pomidor, trójkąt serka topionego, gruszka</p> + <p xml:id="p-9">2 dzień</p> + <p xml:id="p-10">- obiad: królik w potrawce, surówka z marchewki i chrzanu, brzoskwinia</p> + <p xml:id="p-11">- kolacja: befsztyk z polędwicy usmażony bez tłuszczu, zielona sałata z sosem vinegrette, 2 mandarynki</p> + <p xml:id="p-12">3 dzień</p> + <p xml:id="p-13">-obiad: kura z rosołu, gotowana kapusta bez zasmażki, mały grejpfrut</p> + <p xml:id="p-14">- kolacja : ryba w warzywach, surówka z buraczków, pomarańcza</p> + <p xml:id="p-15">4 dzień</p> + <p xml:id="p-16">-obiad: pieczona wieprzowina, brokuły ugotowane na parze, jabłko</p> + <p xml:id="p-17">- kolacja: szklanka ugotowanego ryżu zalana chudym mlekiem, plaster białego chudego sera, kiść winogron</p> + <p xml:id="p-18">5 dzień</p> + <p xml:id="p-19">- obiad – sztuka mięsa, gotowane buraczki, kilka suszonych śliwek</p> + <p xml:id="p-20">- kolacja - 2 jajka na miękko, serka brie, banan</p> + <p xml:id="p-21">6 dzień</p> + <p xml:id="p-22">- obiad: ryba pieczona w folii, surówka z białej kapusty i marchewki, gruszka w sosie waniliowym</p> + <p xml:id="p-23">- kolacja: pieczeń z królika, sałatka z pomidorów i papryki, trójkąt serka topionego, kiwi</p> + <p xml:id="p-24">7 dzień</p> + <p xml:id="p-25">- obiad: cielęcina pieczona z dodatkiem ziół, fasolka szparagowa z odrobiną masła, jogurt</p> + <p xml:id="p-26">- kolacja: zapiekanka z ziemniaków, odrobiny startego żółtego sera i szynki, sałata zielona z rzodkiewkami, pomarańcza</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_gold/9/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_gold/9/ann_coreference.xml.gz new file mode 100755 index 0000000..dbbb49f --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/9/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/9/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_gold/9/ann_mentions.xml.gz new file mode 100755 index 0000000..e8692fc --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/9/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/9/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_gold/9/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..a0c48b2 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/9/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/9/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_gold/9/ann_segmentation.xml.gz new file mode 100755 index 0000000..a7fc20c --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/9/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_gold/9/header.xml a/src/test/resources/teksty_tei/teksty_gold/9/header.xml new file mode 100755 index 0000000..c644153 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/9/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-215,p-216 from NKJP text with id: IJPPAN_PolPr0300662</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_gold/9/text.xml a/src/test/resources/teksty_tei/teksty_gold/9/text.xml new file mode 100755 index 0000000..e605436 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_gold/9/text.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Ogród na... balkonie Pani Stanisławie Budkiewicz z ul. Piłsudskiego na powierzchni niewiele przekraczającej półtora metra kwadratowego udało się "upchnąć" aż 15 skrzynek z kwiatami i kilka doniczek. - Utrzymanie takiego balkonu wymaga wiele pracy i serca - przyznaje S. Budkiewicz, której przy kwiatach pomaga córka - Grażyna Stańczyk</p> + <p xml:id="p-2">- Nie mamy własnego ogródka, a bardzo kochamy kwiaty. Dlatego każdą wolną chwilę poświęcamy balkonowi. Nasiona wysiewane są już na przełomie lutego i marca. Później wyrastające z nich roślinki pikuje się do skrzynek. - Skrzynki wystawiamy na balkon dopiero w maju-czerwcu - wyjaśnia G. Stańczyk. - Bardzo rzadko używamy gotowych sadzonek ze sklepu, bo nic nie sprawia takiej przyjemności jak wyhodowanie kwiatka od nasionka. O tej porze roku najwięcej pracy jest przy podlewaniu roślin i zasilaniu ich odpowiednimi nawozami. Na balkonie pani Stanisławy rosną: surfinie, petunie, gardenie, aksamitki, przypołudniki, groszek pachnący, kabea, nemezje i werbeny. W sumie kilkadziesiąt kwiatów, z których każdy kwitnie w innym kolorze i w różnym czasie. Efekt? Ogród na balkonie cieszy oczy właścicielki i przechodniów aż do późnej jesieni. Ozdabianie balkonów kwiatami staje się w Brzezinach coraz popularniejsze - Teraz jest za późno, by samemu uprawiać rośliny balkonowe - mówi Bożenna Kolasa ze sklepu ogrodniczego. - Jednak wciąż można kupić kwiaty gotowe do wystawienia na balkon. Najpopularniejsze są petunie, surfinie, oraz czerwone, białe i różowe pelargonie. Modnym kwiatem jest też datura-bieluń. Klientom nie przeszkadza, że jest to kwiat trujący. tekst i fot. grzegorz kozieł</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/0/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/0/ann_coreference.xml.gz new file mode 100755 index 0000000..1612ef3 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/0/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/0/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/0/ann_mentions.xml.gz new file mode 100755 index 0000000..101d6f7 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/0/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/0/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/0/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..ce7d825 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/0/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/0/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/0/ann_segmentation.xml.gz new file mode 100755 index 0000000..0aed7a0 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/0/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/0/header.xml a/src/test/resources/teksty_tei/teksty_sys/0/header.xml new file mode 100755 index 0000000..7c47cd1 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/0/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-57,p-58,p-59,p-60 from NKJP text with id: IPIPAN_1301919980826</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/0/text.xml a/src/test/resources/teksty_tei/teksty_sys/0/text.xml new file mode 100755 index 0000000..2c3048e --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/0/text.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">W spotkaniu weźmie udział blisko 7 tysięcy braci z całej Europy, ale tylko 206 z nich będzie ubiegało się o tytuł Europejskiego Króla Kurkowego. - Wezmę udział w strzelaniu, choć moje szanse są marne. Wynika to przede wszystkim z moich obowiązków gospodarza spotkań; w tym nawale pracy ciężko mi będzie się skupić na strzelaniu - przewiduje Zdzisław Maj, prezes krakowskiego Bractwa Kurkowego, panujący Król Kurkowy.</p> + <p xml:id="p-2">Strzelanie o tytuł Europejskiego Króla Kurkowego będzie się odbywało w kilku etapach. Do finału zostanie dopuszczonych 27 braci - jeden z nich otrzyma tytuł Europejskiego Króla Kurkowego odbierając go obecnie panującemu Wilfriedowi Stammermannowi. - Król nie otrzymuje żadnych nagród finansowych, ale taki tytuł jest ogromnym zaszczytem; król jest np. zapraszany na posiedzenia Parlamentu Europejskiego - mówi Zdzisław Maj.</p> + <p xml:id="p-3">Największą atrakcją 12. Europejskich Spotkań Bractw Strzeleckich będzie wielka parada, która rozpocznie się w niedzielę o godz. 13. Kilkuset braci w historycznych strojach przejdzie z Błoń na Rynek ulicami: Piłsudskiego, Straszewskiego, Franciszkańską i Grodzką.</p> + <p xml:id="p-4">Początki istnienia Bractwa Kurkowego w Krakowie sięgają XIII wieku. Skupiało ono znamienitych obywateli, kupców i rzemieślników pragnących wspomóc obronność miasta. Wielkim świętem bractwa był turniej, który odbywał się na strzelnicy zwanej Celestatem. Zawody trwały zwykle trzy dni. Strzelano do drewnianego kura umocowanego na wysokiej żerdzi. Brat, który zdołał celnym strzałem strącić ostatni jego fragment zdobywał miano Króla Kurkowego. Z tym tytułem wiązały się nie tylko honory, ale także przywileje: Rada Miejska zwalniała jego posiadacza m.in. z obowiązku płacenia podatków (ten zwyczaj utrzymał się do dziś).</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/1/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/1/ann_coreference.xml.gz new file mode 100755 index 0000000..cc81bd5 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/1/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/1/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/1/ann_mentions.xml.gz new file mode 100755 index 0000000..50ad7ab --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/1/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/1/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/1/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..62d6f6a --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/1/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/1/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/1/ann_segmentation.xml.gz new file mode 100755 index 0000000..9ff6101 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/1/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/1/header.xml a/src/test/resources/teksty_tei/teksty_sys/1/header.xml new file mode 100755 index 0000000..5723f42 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/1/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-21,p-22,p-23,p-24,p-25,p-26,p-27 from NKJP text with id: IJPPAN_PolPr_GWlk01253</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/1/text.xml a/src/test/resources/teksty_tei/teksty_sys/1/text.xml new file mode 100755 index 0000000..e77e7ea --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/1/text.xml @@ -0,0 +1,18 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Z kolei we Wrocławiu płace kontrolerów zostały zupełnie uniezależnione od liczby wystawionych wezwań do zapłaty.</p> + <p xml:id="p-2">- Nie oznacza to jednak, że nie nagradzamy najskuteczniejszych pracowników. Kilka razy w roku przyznawane są premie. Bierzemy wtedy pod uwagę skuteczność i ewentualne skargi pasażerów - wyjaśnia Monika Poważna, kierownik Wydziału Transportu wrocławskiego Urzędu Miasta.</p> + <p xml:id="p-3">Tamtejsi kontrolerzy zarabiają (bez premii) około 1200 złotych miesięcznie (netto).</p> + <p xml:id="p-4">Miasto postanowiło za jednym zamachem trzy spółki połączyć w jedną. Przygotowany jest projekt uchwały, który przewiduje wniesienie udziałów w Towarzystwie Budownictwa Społecznego "Wielkopolska" oraz Towarzystwie Budownictwa Społecznego "Nasz Dom" do Poznańskiego Towarzystwa Budownictwa Społecznego. W piątek opiniować tę propozycję będzie Komisja Gospodarki Komunalnej i Polityki Mieszkaniowej, a we wtorek zajmie się nią Rada Miasta.</p> + <p xml:id="p-5">- Pomysł połączenia TBS-ów nie budzi wątpliwości z punktu widzenia racjonalizacji kosztów - twierdzi Tomasz Lewandowski, radny LiD i członek komisji. - Potrzebna jest jednak dyskusja o przyszłości towarzystw. Obecnie rząd pracuje nad zmianą ustawy, która przewiduje wykup mieszkań w towarzystwach budownictwa społecznego. To stworzy zupełnie nową sytuację. W związku z tym konieczne będzie podjęcie odpowiednich kroków przez miasto.</p> + <p xml:id="p-6">Norbert Napieraj, szef klubu radnych PiS również uważa, że ze względów ekonomicznych utworzenie jednej spółki jest zasadne.</p> + <p xml:id="p-7">- Na razie jest to jednak luźny pomysł. Nie ma konkretów - dodaje N. Napieraj. - Nasz klub jeszcze nie wypracował w sprawie tej uchwały stanowiska.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/2/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/2/ann_coreference.xml.gz new file mode 100755 index 0000000..a12d576 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/2/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/2/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/2/ann_mentions.xml.gz new file mode 100755 index 0000000..19cb576 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/2/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/2/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/2/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..62f8665 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/2/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/2/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/2/ann_segmentation.xml.gz new file mode 100755 index 0000000..0fe2536 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/2/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/2/header.xml a/src/test/resources/teksty_tei/teksty_sys/2/header.xml new file mode 100755 index 0000000..769bd91 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/2/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-923,p-924,p-925,p-926,p-927,p-928,p-929,p-930,p-931 from NKJP text with id: IJPPAN_PolPr_DBb01448</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/2/text.xml a/src/test/resources/teksty_tei/teksty_sys/2/text.xml new file mode 100755 index 0000000..d2f8e1d --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/2/text.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Potem znalazł zatrudnienie w Fundacji Europejskie Spotkania Kaszubskie Centrum Kultury. Był prezesem utworzonej przez fundację spółki Zamek.</p> + <p xml:id="p-2">W międzyczasie został radnym. Pod koniec ubiegłej kadencji Rada Gminy Krokowa wybrała go na wójta. Jesienią 2002 r. został wójtem w wyborach powszechnych.</p> + <p xml:id="p-3">- Co skłoniło mnie do zostania samorządowcem? W pewnym momencie życia mężczyzny przychodzi taka potrzeba, aby sprawdzić się np. w życiu publicznym - twierdzi krokowski kandydat do tytułu Wójta Pomorza. - Poza tym interesowały mnie sprawy komunalne. Chciałem się nimi bliżej zająć.</p> + <p xml:id="p-4">Co wójt gminy Krokowa uważa za swój największy sukces i największą porażkę?</p> + <p xml:id="p-5">- Sukcesem jest to, że udaje się wreszcie opracowywać plany zagospodarowania przestrzennego. Gotowe są już dla Białogóry i części Dębek. Tych ostatnich przez wiele lat nie można było uchwalić - uważa wójt. - Natomiast za porażkę uważam decyzję Rady gminy, aby nie przystępować w ramach Komunalnego Związku Gmin do programu uporządkowania gospodarki ściekowej. Mogliśmy uzyskać wiele milionów euro. Boję się, że to nie tylko moja porażka...</p> + <p xml:id="p-6">Od 5 lat ulubionym hobby Henryka Doeringa są narty. Dlatego urlop najchętniej bierze zimą, aby udać się na stoki Szklarskiej Poręby.</p> + <p xml:id="p-7">- Tej zimy niestety nie mogłem wyjechać - przyznaje wójt Krokowej. - Czasu wolnego mam bardzo mało, jeśli się taki pojawia, to staram się go spędzać razem z bliskimi.</p> + <p xml:id="p-8">Nasz plebiscyt</p> + <p xml:id="p-9">„Dziennik Bałtycki” rozpoczął kolejną edycję konkursu Wójt Pomorza. Nasz powiat reprezentują trzej włodarze gmin wiejskich. To Henryk Doering (Krokowa), Tadeusz Puszkarczuk (gmina Puck) i Jerzy Włudzik (Kosakowo). W gronie kilkudziesięciu kolegów po fachu walczyć będą o miano najpopularniejszego wójta województwa. O tym, kto wygra, zadecydują swoimi głosami Czytelnicy „Dziennika”.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/3/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/3/ann_coreference.xml.gz new file mode 100755 index 0000000..d2f0f87 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/3/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/3/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/3/ann_mentions.xml.gz new file mode 100755 index 0000000..3eafcd2 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/3/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/3/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/3/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..4b8e8f8 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/3/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/3/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/3/ann_segmentation.xml.gz new file mode 100755 index 0000000..39d2a21 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/3/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/3/header.xml a/src/test/resources/teksty_tei/teksty_sys/3/header.xml new file mode 100755 index 0000000..efca8ca --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/3/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-39,p-40,p-41,p-42,p-43,p-44,p-45,p-46,p-47,p-48,p-49 from NKJP text with id: IJPPAN_PolPr5b00121</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/3/text.xml a/src/test/resources/teksty_tei/teksty_sys/3/text.xml new file mode 100755 index 0000000..5da18a5 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/3/text.xml @@ -0,0 +1,22 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Paweł Kryszałowicz</p> + <p xml:id="p-2">(Eintracht Frankfurt): - Ukraińcy postawili nam wysokie wymagania. Remis jest sprawiedliwy, choć przy odrobinie szczęścia mogliśmy wygrać. Mam do siebie pretensję, bo przy lepszej koncentracji mogłem zdobyć bramkę.</p> + <p xml:id="p-3">ś</p> + <p xml:id="p-4">Jacek Krzynówek</p> + <p xml:id="p-5">(1. FC Nuernberg): - Cieszę się, że mogłem wystąpić w meczu kończącym eliminacje. Ze swej strony zrobię wszystko, aby nie wypaść z kadry, mimo że nie mam ugruntowanej pozycji w swoim klubie. Zasłużyliśmy na awans, bo byliśmy zespołem, który grał najrówniej i w głupi sposób nie tracił punktów.</p> + <p xml:id="p-6">ś</p> + <p xml:id="p-7">Marek Koźmiński:</p> + <p xml:id="p-8">Mecze z Ukrainą spinają klamrą eliminacje, które będziemy pamiętać do końca życia. Uważam, że kibice oglądali dzisiaj dobry mecz dwóch równych zespołów. Moim zdaniem Ukraińcy zasłużenie zajęli drugie miejsce , jednak nie chciałbym spotkać się z nimi na mundialu. To bardzo niewygodny przeciwnik. Może wygrać z każdym.</p> + <p xml:id="p-9">ś</p> + <p xml:id="p-10">Zbigniew Boniek</p> + <p xml:id="p-11">(wiceprezes PZPN): - Najważniejsze, że eliminacje zakończyły się sukcesem. Jestem usatysfakcjonowany. Chcę podkreślić, że kibice i dziennikarze dostrzegają tylko to, co dzieje się na boisku i rzadko kiedy zwracają uwagę na kulisy sukcesów piłkarzy, a oznaczają one ogromny wysiłek organizacyjny, wiele pracy rzeszy ludzi, którzy nie stoją w pierwszym szeregu, ale wykonują nieraz ciężkie i niewdzięczne zadania. Moim zdaniem między innymi dlatego, że sprawy organizacyjne zostały ułożone na odpowiednim poziomie, wszyscy możemy się dzisiaj cieszyć z awansu do mistrzostw świata.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/4/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/4/ann_coreference.xml.gz new file mode 100755 index 0000000..4567335 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/4/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/4/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/4/ann_mentions.xml.gz new file mode 100755 index 0000000..1eaa201 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/4/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/4/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/4/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..d6bd89a --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/4/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/4/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/4/ann_segmentation.xml.gz new file mode 100755 index 0000000..e568746 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/4/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/4/header.xml a/src/test/resources/teksty_tei/teksty_sys/4/header.xml new file mode 100755 index 0000000..17a75d9 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/4/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-437,p-438,p-439,p-440,p-441,p-442,p-443,p-444,p-445 from NKJP text with id: IJPPAN_PolPr_SlP00841</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/4/text.xml a/src/test/resources/teksty_tei/teksty_sys/4/text.xml new file mode 100755 index 0000000..97371c9 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/4/text.xml @@ -0,0 +1,20 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Ernest i Agnieszka nie planowali, że będą mieli wielką, babską rodzinę. Ale tak wyszło. – I całe szczęście. Lepiej się dogaduję z dziewczętami – cieszy się Ernest Kwiecień.</p> + <p xml:id="p-2">W Wigilię do jego obowiązków, poza dostarczeniem choinki, należeć będzie zmywanie naczyń. Agnieszka zrobi pierogi, ugotuje barszcz z uszkami, usmaży karpia. Córki upieką ciasta. Potem przyjdzie czas na prezenty. Może to nawet będą empetrójki, o których marzą starsze dziewczyny.</p> + <p xml:id="p-3">Jodełek sadzimy mniej</p> + <p xml:id="p-4">Leśniczy, od którego pan Ernest przywozi choinkę, mieszka kilka kilometrów od domu Kwietniów. On także nie wyobraża sobie świąt bez prawdziwego świerku. – I musi być kiczowaty – uśmiecha się Gabriel Grobelny, nadleśniczy wałbrzyski.</p> + <p xml:id="p-5">To znaczy, że powinny na nim wisieć ozdoby zrobione przez dzieci, przechowywane latami, wyciągane na tę jedyną okazję.</p> + <p xml:id="p-6">Pan Gabriel ma dwóch synów i trzy córki. W domu została najmłodsza, 12-letnia, ale na święta zjadą wszyscy. I ubiorą choinkę. – Żona rozwiesi anielskie włosy, ja podłączę lampki – w domu nadleśniczego podział świątecznych ról jest określony.</p> + <p xml:id="p-7">W dolnośląskich lasach najwięcej jest świerków. Na plantacjach sadzą także coraz popularniejsze jodły z miękkimi igłami.</p> + <p xml:id="p-8">– Ale i tych jodełek sadzimy już mniej. To nie lata dziewięćdziesiąte, gdy sprzedawaliśmy prawie wszystkie wyhodowane drzewka – wspomina nadleśniczy.</p> + <p xml:id="p-9">U Grobelnego choinkę można sobie wybrać. – Mamy rodziny, w których co roku ojciec przyjeżdża z synem, by samemu ściąć drzewko. Taką mają tradycję – dodaje pan Gabriel.</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/5/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/5/ann_coreference.xml.gz new file mode 100755 index 0000000..535c47c --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/5/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/5/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/5/ann_mentions.xml.gz new file mode 100755 index 0000000..70c6db8 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/5/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/5/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/5/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..b0e9efe --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/5/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/5/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/5/ann_segmentation.xml.gz new file mode 100755 index 0000000..2ef9b75 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/5/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/5/header.xml a/src/test/resources/teksty_tei/teksty_sys/5/header.xml new file mode 100755 index 0000000..7d37881 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/5/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-6,p-7,p-8,p-9 from NKJP text with id: PELCRA_1303919960926</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/5/text.xml a/src/test/resources/teksty_tei/teksty_sys/5/text.xml new file mode 100755 index 0000000..0d517c7 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/5/text.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Cena życia</p> + <p xml:id="p-2">Z tego pogromu ocalało kilkudziesięciu Żydów, a wśród nich rodzina Mosze Sonensona. Przed wojną była to w skali miasteczka rodzina bogata. Sonensonowie mieli garbarnię. Nie udało mi się dociec, u kogo mianowicie przechowywali się Sonensonowie oraz pozostali Żydzi w czasie okupacji niemieckiej. Faktem pozostaje natomiast, że okupację tę przeżyli. Faktem oczywistym pozostaje i to, że liczne rodziny polskie - w Ejszyszkach i w pobliskich okolicach - przechowywały Żydów. Parę kilometrów od Ejszyszek, w Korkucianach (w folwarku Lebiedniki), żołnierz AK Kazimierz Korkuć w czasie wojny w swoim domu przechowywał 28 Żydów. Od studni do piwnic domu był przekopany tunel, dzięki czemu mieli wodę. Natomiast w skali siatki AK Kazimierz Korkuć przechowywał około 70 Żydów. Rodzina Świeczków również przechowywała Żydów. W tamtych stronach liczne rodziny polskie postępowały podobnie.</p> + <p xml:id="p-3">Prawdą jest również i to, że Żydzi za swe przechowanie płacili. Płacili za utrzymanie i chyba jeszcze - za ryzyko. O tym dzisiaj raczej tu się nie mówi, ale prawdopodobnie różnie z tym było: jedni za pieniądze, inni - z odruchu serca. Ryzykowali i Polacy, i Żydzi. Te rachunki mogły wyglądać bardzo różnie.</p> + <p xml:id="p-4">Mieszkam w jednej z podwileńskich wsi. Otóż w tej mojej wsi pewien gospodarz - Polak - przechowywał w czasie wojny młodą Żydówkę. Spodobała mu się, z czego wynikł dramat. Zdenerwowana żona doniosła na policję. Aresztowano Żydówkę razem z gospodarzem, przerażona kobieta próbowała ocalić męża. Zanim uzbierała potrzebną sumę na łapówkę, było już za późno - rozstrzelano nie tylko Żydówkę, ale i gospodarza. Czy żonę tego straceńca można nazwać antysemitką?</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/8/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/8/ann_coreference.xml.gz new file mode 100755 index 0000000..b8a3634 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/8/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/8/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/8/ann_mentions.xml.gz new file mode 100755 index 0000000..3ebeaf5 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/8/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/8/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/8/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..0c0be0e --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/8/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/8/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/8/ann_segmentation.xml.gz new file mode 100755 index 0000000..4280861 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/8/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/8/header.xml a/src/test/resources/teksty_tei/teksty_sys/8/header.xml new file mode 100755 index 0000000..b6c7d38 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/8/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-596,p-597,p-598,p-599,p-600,p-601,p-602,p-603,p-604,p-605,p-606,p-607,p-608,p-609,p-610,p-611,p-612,p-613,p-614,p-615,p-616,p-617,p-618,p-619,p-620,p-621 from NKJP text with id: IJPPAN_PolPr_GWroc00348</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/8/text.xml a/src/test/resources/teksty_tei/teksty_sys/8/text.xml new file mode 100755 index 0000000..8dcb1b6 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/8/text.xml @@ -0,0 +1,37 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">- 150 g owoców</p> + <p xml:id="p-2">Joanna Kuc</p> + <p xml:id="p-3">(PAI)</p> + <p xml:id="p-4">Diety są różne, ścisłe i urozmaicone, eliminacyjne, dziwne, racjonalne i wreszcie te zalecane przez lekarzy. Dziś dieta burgundzka.</p> + <p xml:id="p-5">Jak wygląda przykładowe tygodniowe menu?</p> + <p xml:id="p-6">1 dzień</p> + <p xml:id="p-7">-obiad: pierś kurczaka pieczona w folii, sałatka z czerwonej kapusty doprawiona octem winnym i oliwą, pieczone jabłko</p> + <p xml:id="p-8">- kolacja: 2 sadzone jajka, pomidor, trójkąt serka topionego, gruszka</p> + <p xml:id="p-9">2 dzień</p> + <p xml:id="p-10">- obiad: królik w potrawce, surówka z marchewki i chrzanu, brzoskwinia</p> + <p xml:id="p-11">- kolacja: befsztyk z polędwicy usmażony bez tłuszczu, zielona sałata z sosem vinegrette, 2 mandarynki</p> + <p xml:id="p-12">3 dzień</p> + <p xml:id="p-13">-obiad: kura z rosołu, gotowana kapusta bez zasmażki, mały grejpfrut</p> + <p xml:id="p-14">- kolacja : ryba w warzywach, surówka z buraczków, pomarańcza</p> + <p xml:id="p-15">4 dzień</p> + <p xml:id="p-16">-obiad: pieczona wieprzowina, brokuły ugotowane na parze, jabłko</p> + <p xml:id="p-17">- kolacja: szklanka ugotowanego ryżu zalana chudym mlekiem, plaster białego chudego sera, kiść winogron</p> + <p xml:id="p-18">5 dzień</p> + <p xml:id="p-19">- obiad – sztuka mięsa, gotowane buraczki, kilka suszonych śliwek</p> + <p xml:id="p-20">- kolacja - 2 jajka na miękko, serka brie, banan</p> + <p xml:id="p-21">6 dzień</p> + <p xml:id="p-22">- obiad: ryba pieczona w folii, surówka z białej kapusty i marchewki, gruszka w sosie waniliowym</p> + <p xml:id="p-23">- kolacja: pieczeń z królika, sałatka z pomidorów i papryki, trójkąt serka topionego, kiwi</p> + <p xml:id="p-24">7 dzień</p> + <p xml:id="p-25">- obiad: cielęcina pieczona z dodatkiem ziół, fasolka szparagowa z odrobiną masła, jogurt</p> + <p xml:id="p-26">- kolacja: zapiekanka z ziemniaków, odrobiny startego żółtego sera i szynki, sałata zielona z rzodkiewkami, pomarańcza</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file diff --git b/src/test/resources/teksty_tei/teksty_sys/9/ann_coreference.xml.gz a/src/test/resources/teksty_tei/teksty_sys/9/ann_coreference.xml.gz new file mode 100755 index 0000000..4bcd42e --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/9/ann_coreference.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/9/ann_mentions.xml.gz a/src/test/resources/teksty_tei/teksty_sys/9/ann_mentions.xml.gz new file mode 100755 index 0000000..0bd7310 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/9/ann_mentions.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/9/ann_morphosyntax.xml.gz a/src/test/resources/teksty_tei/teksty_sys/9/ann_morphosyntax.xml.gz new file mode 100755 index 0000000..a0c48b2 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/9/ann_morphosyntax.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/9/ann_segmentation.xml.gz a/src/test/resources/teksty_tei/teksty_sys/9/ann_segmentation.xml.gz new file mode 100755 index 0000000..a7fc20c --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/9/ann_segmentation.xml.gz diff --git b/src/test/resources/teksty_tei/teksty_sys/9/header.xml a/src/test/resources/teksty_tei/teksty_sys/9/header.xml new file mode 100755 index 0000000..c644153 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/9/header.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<teiHeader xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0" xml:lang="en"> + <fileDesc> + <titleStmt> + <title>Paragraphs: p-215,p-216 from NKJP text with id: IJPPAN_PolPr0300662</title> + </titleStmt> + </fileDesc> + <profileDesc> + <textClass> + <catRef scheme="#taxonomy-CORE" target="Dzienniki"/> + </textClass> + </profileDesc> + <revisionDesc/> +</teiHeader> diff --git b/src/test/resources/teksty_tei/teksty_sys/9/text.xml a/src/test/resources/teksty_tei/teksty_sys/9/text.xml new file mode 100755 index 0000000..e605436 --- /dev/null +++ a/src/test/resources/teksty_tei/teksty_sys/9/text.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" ?> +<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" xmlns:xi="http://www.w3.org/2001/XInclude" xmlns:nkjp="http://www.nkjp.pl/ns/1.0"> + <xi:include href="PCC_header.xml"/> + <TEI> + <xi:include href="header.xml"/> + <text> + <body> + <p xml:id="p-1">Ogród na... balkonie Pani Stanisławie Budkiewicz z ul. Piłsudskiego na powierzchni niewiele przekraczającej półtora metra kwadratowego udało się "upchnąć" aż 15 skrzynek z kwiatami i kilka doniczek. - Utrzymanie takiego balkonu wymaga wiele pracy i serca - przyznaje S. Budkiewicz, której przy kwiatach pomaga córka - Grażyna Stańczyk</p> + <p xml:id="p-2">- Nie mamy własnego ogródka, a bardzo kochamy kwiaty. Dlatego każdą wolną chwilę poświęcamy balkonowi. Nasiona wysiewane są już na przełomie lutego i marca. Później wyrastające z nich roślinki pikuje się do skrzynek. - Skrzynki wystawiamy na balkon dopiero w maju-czerwcu - wyjaśnia G. Stańczyk. - Bardzo rzadko używamy gotowych sadzonek ze sklepu, bo nic nie sprawia takiej przyjemności jak wyhodowanie kwiatka od nasionka. O tej porze roku najwięcej pracy jest przy podlewaniu roślin i zasilaniu ich odpowiednimi nawozami. Na balkonie pani Stanisławy rosną: surfinie, petunie, gardenie, aksamitki, przypołudniki, groszek pachnący, kabea, nemezje i werbeny. W sumie kilkadziesiąt kwiatów, z których każdy kwitnie w innym kolorze i w różnym czasie. Efekt? Ogród na balkonie cieszy oczy właścicielki i przechodniów aż do późnej jesieni. Ozdabianie balkonów kwiatami staje się w Brzezinach coraz popularniejsze - Teraz jest za późno, by samemu uprawiać rośliny balkonowe - mówi Bożenna Kolasa ze sklepu ogrodniczego. - Jednak wciąż można kupić kwiaty gotowe do wystawienia na balkon. Najpopularniejsze są petunie, surfinie, oraz czerwone, białe i różowe pelargonie. Modnym kwiatem jest też datura-bieluń. Klientom nie przeszkadza, że jest to kwiat trujący. tekst i fot. grzegorz kozieł</p> + </body> + </text> + </TEI> +</teiCorpus> \ No newline at end of file