execTypes.ml 3.88 KB
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated

type eniam_parse_result = {
  id: string;
  status: status;
  msg: string;
  lex_time: float;
  parse_time: float;
  reduction_time: float;
  sem_time: float;
  paths_size: int;
  chart_size: int;
  dependency_tree_size: int;
  chart: LCGtypes.chart;
  dependency_tree: LCGtypes.linear_term array;
  paths: PreTypes.token_record array;
  }

type conll_parse_result = {
  id: string;
  status: status;
  msg: string;
  lex_time: float;
  parse_time: float;
  reduction_time: float;
  sem_time: float;
  paths_size: int;
  dependency_tree_size: int;
  dep_graph: LCGtypes.dep_tree;
  dep_graph_parsed: (LCGtypes.SymbolMap.key * LCGtypes.linear_term) list;
  dependency_tree: LCGtypes.linear_term array;
  paths: PreTypes.token_record array;
  }

type mode =
    Raw | Struct | CONLL | ENIAM | Mate

type sentence =
    RawSentence of string
  (* | CONLL of conll list *)
  | StructSentence of string * PreTypes.token_record list * int (* id * paths * last *)
  | ORSentence of PreTypes.token_record list * int * int * paragraph
  (* | NKJP1M of nkjp1m list *)
  (* | Skladnica of skladnica_tree *)
  | AltSentence of (mode * sentence) list  (* string = etykieta np raw, nkjp, krzaki *)
  | ENIAMSentence of eniam_parse_result
  | CONLLSentence of conll_parse_result

and paragraph_record = {pid: string; pbeg: int; plen: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *)

and paragraph =
    RawParagraph of string
  | StructParagraph of paragraph_record list (* zdania *)
  | AltParagraph of (mode * paragraph) list

type text =
    RawText of string
  | StructText of paragraph list * int (* akapity * next_id *)
  | AltText of (mode * text) list


type result = {
  input_text: text;
  pre_text: text;
  pre_time1: float;
  pre_time2: float;
  status: status;
  msg: string;
  (* lex_time: float; *)
  parse_time: float;
  parsed_text: text;
  (* reduction_time: float;
  sem_time: float;
  paths_size: int;
  disamb: LCGtypes.linear_term array;
  sem: LCGtypes.linear_term array;
  sem2: LCGtypes.linear_term array;
  sem3: LCGtypes.linear_term;
  trees: LCGtypes.linear_term list;
  mrls: SemTypes.mrl_formula list;
  paths: PreTypes.token_record array; *)
  }

type sum_result = {
  no_queries: int;
  no_pre_error: int;
  no_lex_error: int;
  no_parse_error: int;
  no_timeout: int;
  no_reduction_error: int;
  no_sem_error: int;
  no_not_parsed: int;
  no_not_reduced: int;
  no_too_many_nodes: int;
  no_not_translated: int;
  no_parsed: int;
  sum_pre_time1: float;
  sum_pre_time2: float;
  sum_lex_time: float;
  sum_parse_time: float;
  sum_reduction_time: float;
  sum_sem_time: float;
  }

type message_from_overseer =
    Work_with of string * (*reg_params*)(string * float)
  | Kill_yourself

type message_to_overseer =
    Ready_to_work of string
  | Work_done of
      string  * result

let time_fun = Unix.gettimeofday
(* let time_fun = Sys.time () *)