nersuite_tag.sh
978 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
# Simple wrapper for an NERsuite pipeline using a fixed set of models.
# Expects input on STDIN, writes to STDOUT.
# Output format setting
OUTPUT_FORMAT=conll
# This didn't work out, NERsuite can't guarantee that offsets match
#OUTPUT_FORMAT=brat
# Model locations
GTAG_MODEL=~/local/models/models_gtagger/
NERS_MODEL=~/local/models/models_nersuite/all_merged.data.m
# Dictionaries
# NOTE: if you have a different number of dictionaries, you'll have to
# tweak the code below also (sorry)
DICT_DIR=~/local/models/dic/
DICT1=EntrezGene.8xpath.dic.cdbpp
DICT2=UMLS.all_class.dic.cdbpp
# NERsuite components; add path if you have a local installation
TOKENIZER=nersuite_tokenizer
GTAGGER=nersuite_gtagger
DTAGGER=nersuite_dic_tagger
NERSUITE=nersuite
# Run as a straight pipeline
$TOKENIZER $@ |
$GTAGGER -d $GTAG_MODEL $@ |
$DTAGGER $DICT_DIR/$DICT1 $@ |
$DTAGGER $DICT_DIR/$DICT2 $@ |
$NERSUITE tag -m $NERS_MODEL -o $OUTPUT_FORMAT $@