Blame view

concraft/src/NLP/Concraft/Polish/Request.hs 2.8 KB
Jan Lupa authored
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}


module NLP.Concraft.Polish.Request
(
-- * Request 
  Request (..)
, Config (..)
-- ** Short
, Short (..)
, short
-- ** Long
, Long (..)
, long
) where


import           Control.Applicative ((<$>), (<*>))
import qualified Control.Monad.LazyIO as LazyIO
import qualified Data.Char as Char
import qualified Data.List.Split as Split
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Binary as B

import           NLP.Concraft.Polish
import           NLP.Concraft.Polish.Maca
import           NLP.Concraft.Polish.Morphosyntax hiding (tag)


-------------------------------------------------
-- Configuration
-------------------------------------------------


-- | A request with configuration.
data Request t = Request {
    -- | The actuall request.
      rqBody    :: t
    -- | Request configuration.
    , rqConf    :: Config }


instance B.Binary t => B.Binary (Request t)  where
    put Request{..} = B.put rqBody >> B.put rqConf
    get = Request <$> B.get <*> B.get


-- | Tagging configuration.
newtype Config = Config {
    -- | Tag with marginal probabilities.
      tagProbs  :: Bool
    } deriving (B.Binary)


-------------------------------------------------
-- Short request
-------------------------------------------------


-- | A short request.
data Short
    = Short T.Text
    | Par [Sent Tag]


instance B.Binary Short where
    put (Short x) = B.putWord8 0 >> B.put x
    put (Par x)   = B.putWord8 1 >> B.put x
    get = B.getWord8 >>= \x -> case x of
        0   -> Short <$> B.get
        _   -> Par   <$> B.get


-- | Process the short request.
short :: MacaPool -> Concraft -> Request Short -> IO [Sent Tag]
short pool concraft Request{..} = case rqBody of
    Short x -> map (tagit concraft) <$> macaPar pool x
    Par x   -> return $ map (tagit concraft) x
  where
    tagit = if tagProbs rqConf then marginals else tag


-------------------------------------------------
-- Long request
-------------------------------------------------


-- | A request to parse a long text.
data Long
    = Long L.Text
    | Doc [[Sent Tag]]


instance B.Binary Long where
    put (Long x) = B.putWord8 0 >> B.put x
    put (Doc x)  = B.putWord8 1 >> B.put x
    get = B.getWord8 >>= \x -> case x of
        0   -> Long <$> B.get
        _   -> Doc  <$> B.get


-- | Process the long request given the processor for the
-- short request. 
long :: (Request Short -> IO a) -> Request Long -> IO [a]
long handler Request{..} = case rqBody of
    Long inp ->
          LazyIO.mapM f . map L.unlines
        . Split.splitWhen (L.all Char.isSpace)
        . L.lines $ inp
    Doc inp -> LazyIO.mapM g inp
  where
    f x = handler . r $ Short $ L.toStrict x
    g x = handler . r $ Par x
    r x = Request {rqBody = x, rqConf = rqConf}