File size: 13,480 Bytes
69fb171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
/*
  CRF++ -- Yet Another CRF toolkit

  $Id: crfpp.h 1592 2007-02-12 09:40:53Z taku $;

  Copyright(C) 2005-2007 Taku Kudo <[email protected]>
*/
#ifndef CRFPP_CRFPP_H_
#define CRFPP_CRFPP_H_

/* C interface  */
#ifdef __cplusplus
#include <cstdio>
#else
#include <stdio.h>
#endif

#ifdef __cplusplus
extern "C" {
#endif

#ifdef _WIN32
#include <windows.h>
#  ifdef DLL_EXPORT
#    define CRFPP_DLL_EXTERN  __declspec(dllexport)
#    define CRFPP_DLL_CLASS_EXTERN  __declspec(dllexport)
#  else
#    define CRFPP_DLL_EXTERN  __declspec(dllimport)
#  endif
#endif

#ifndef CRFPP_DLL_EXTERN
#  define CRFPP_DLL_EXTERN extern
#endif

#ifndef CRFPP_DLL_CLASS_EXTERN
#  define CRFPP_DLL_CLASS_EXTERN
#endif

#ifndef SWIG
  typedef struct crfpp_t crfpp_t;
  typedef struct crfpp_model_t crfpp_model_t;

  /* C interface */
  CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new(int,  char**);
  CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new2(const char*);
  CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new(int,  char**, const char *, size_t);
  CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new2(const char*, const char *, size_t);
  CRFPP_DLL_EXTERN const char *   crfpp_model_get_template(crfpp_model_t*);
  CRFPP_DLL_EXTERN void           crfpp_model_destroy(crfpp_model_t*);
  CRFPP_DLL_EXTERN const char *   crfpp_model_strerror(crfpp_model_t *);
  CRFPP_DLL_EXTERN crfpp_t*       crfpp_model_new_tagger(crfpp_model_t *);

  CRFPP_DLL_EXTERN crfpp_t* crfpp_new(int,  char**);
  CRFPP_DLL_EXTERN crfpp_t* crfpp_new2(const char*);
  CRFPP_DLL_EXTERN void     crfpp_destroy(crfpp_t*);
  CRFPP_DLL_EXTERN int      crfpp_set_model(crfpp_t *, crfpp_model_t *);
  CRFPP_DLL_EXTERN int      crfpp_add2(crfpp_t*, size_t, const char **);
  CRFPP_DLL_EXTERN int      crfpp_add(crfpp_t*, const char*);
  CRFPP_DLL_EXTERN size_t   crfpp_size(crfpp_t*);
  CRFPP_DLL_EXTERN size_t   crfpp_xsize(crfpp_t*);
  CRFPP_DLL_EXTERN size_t   crfpp_dsize(crfpp_t*);
  CRFPP_DLL_EXTERN const float* crfpp_weight_vector(crfpp_t*);
  CRFPP_DLL_EXTERN size_t   crfpp_result(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN size_t   crfpp_answer(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN size_t   crfpp_y(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN size_t   crfpp_ysize(crfpp_t*);
  CRFPP_DLL_EXTERN double   crfpp_prob(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_prob2(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN double   crfpp_prob3(crfpp_t*);
  CRFPP_DLL_EXTERN void     crfpp_set_penalty(crfpp_t *, size_t i, size_t j, double penalty);
  CRFPP_DLL_EXTERN double   crfpp_penalty(crfpp_t *, size_t i, size_t j);
  CRFPP_DLL_EXTERN double   crfpp_alpha(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_beta(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_emisstion_cost(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_next_transition_cost(crfpp_t*, size_t,
                                                       size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_prev_transition_cost(crfpp_t*, size_t,
                                                       size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_best_cost(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN const int* crfpp_emittion_vector(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN const int* crfpp_next_transition_vector(crfpp_t*, size_t,
                                                           size_t, size_t);
  CRFPP_DLL_EXTERN const int* crfpp_prev_transition_vector(crfpp_t*, size_t,
                                                           size_t, size_t);
  CRFPP_DLL_EXTERN double   crfpp_Z(crfpp_t*);
  CRFPP_DLL_EXTERN int      crfpp_parse(crfpp_t*);
  CRFPP_DLL_EXTERN int      crfpp_empty(crfpp_t*);
  CRFPP_DLL_EXTERN int      crfpp_clear(crfpp_t*);
  CRFPP_DLL_EXTERN int      crfpp_next(crfpp_t*);
  CRFPP_DLL_EXTERN int      crfpp_test(int, char **);
  CRFPP_DLL_EXTERN int      crfpp_test2(const char *);
  CRFPP_DLL_EXTERN int      crfpp_learn(int, char **);
  CRFPP_DLL_EXTERN int      crfpp_learn2(const char *);
  CRFPP_DLL_EXTERN const char*  crfpp_strerror(crfpp_t*);
  CRFPP_DLL_EXTERN const char*  crfpp_yname(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN const char*  crfpp_y2(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN const char*  crfpp_x(crfpp_t*, size_t, size_t);
  CRFPP_DLL_EXTERN const char** crfpp_x2(crfpp_t*, size_t);
  CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr(crfpp_t*, const char*);
  CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr2(crfpp_t*,
                                                   const char*, size_t);
  CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr3(crfpp_t*, const char*,
                                                   size_t, char *, size_t);
  CRFPP_DLL_EXTERN const char*  crfpp_tostr(crfpp_t*);
  CRFPP_DLL_EXTERN const char*  crfpp_tostr2(crfpp_t*, char *, size_t);

  CRFPP_DLL_EXTERN void crfpp_set_vlevel(crfpp_t *, unsigned int);
  CRFPP_DLL_EXTERN unsigned int crfpp_vlevel(crfpp_t *);
  CRFPP_DLL_EXTERN void crfpp_set_cost_factor(crfpp_t *, float);
  CRFPP_DLL_EXTERN float crfpp_cost_factor(crfpp_t *);
  CRFPP_DLL_EXTERN void crfpp_set_nbest(crfpp_t *, size_t);
#endif

#ifdef __cplusplus
}
#endif

/* C++ interface */
#ifdef __cplusplus

namespace CRFPP {

class Tagger;

class CRFPP_DLL_CLASS_EXTERN Model {
 public:
#ifndef SWIG
  // open model with parameters in argv[]
  // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
  virtual bool open(int argc,  char** argv) = 0;

  // open model with parameter arg, e.g. arg = "-m model -v3";
  virtual bool open(const char* arg) = 0;

  // open model with parameters in argv[].
  // e.g, argv[] = {"CRF++", "-v3"};
  virtual bool openFromArray(int argc,  char** argv,
                             const char *model_buf,
                             size_t model_size) = 0;

  // open model with parameter arg, e.g. arg = "-m model -v3";
  virtual bool openFromArray(const char* arg,
                             const char *model_buf,
                             size_t model_size) = 0;
#endif
  // return template string embedded in this model file.
  virtual const char *getTemplate() const = 0;

  // create Tagger object. Returned object shared the same
  // model object
  virtual Tagger *createTagger() const = 0;

  virtual const char* what() = 0;

  virtual ~Model() {}
};

class CRFPP_DLL_CLASS_EXTERN Tagger {
 public:
#ifndef SWIG
  // open model with parameters in argv[]
  // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
  virtual bool open(int argc,  char** argv) = 0;

  // open model with parameter arg, e.g. arg = "-m model -v3";
  virtual bool open(const char* arg) = 0;

  // add str[] as tokens to the current context
  virtual bool add(size_t size, const char **str) = 0;

  // close the current model
  virtual void close() = 0;

  // return parameter vector. the size should be dsize();
  virtual const float *weight_vector() const = 0;
#endif

  // set Model
  virtual bool set_model(const Model &model) = 0;

  // set vlevel
  virtual void set_vlevel(unsigned int vlevel) = 0;

  // get vlevel
  virtual unsigned int vlevel() const = 0;

  // set cost factor
  virtual void set_cost_factor(float cost_factor) = 0;

  // get cost factor
  virtual float cost_factor() const = 0;

  // set nbest
  virtual void set_nbest(size_t nbest) = 0;

  // get nbest
  virtual size_t nbest() const = 0;

  // add one line to the current context
  virtual bool add(const char* str) = 0;

  // return size of tokens(lines)
  virtual size_t size() const = 0;

  // return size of column
  virtual size_t xsize() const = 0;

  // return size of features
  virtual size_t dsize() const = 0;

  // return output tag-id of i-th token
  virtual size_t result(size_t i) const = 0;

  // return answer tag-id of i-th token if it is available
  virtual size_t answer(size_t i) const = 0;

  // alias of result(i)
  virtual size_t y(size_t i) const = 0;

  // return output tag of i-th token as string
  virtual const char*   y2(size_t i) const = 0;

  // return i-th tag-id as string
  virtual const char*   yname(size_t i) const = 0;

  // return token at [i,j] as string(i:token j:column)
  virtual const char*   x(size_t i, size_t j) const = 0;

#ifndef SWIG
  // return an array of strings at i-th tokens
  virtual const char**  x(size_t) const = 0;
#endif

  // return size of output tags
  virtual size_t ysize() const = 0;

  // return marginal probability of j-th tag id at i-th token
  virtual double prob(size_t i, size_t j) const = 0;

  // return marginal probability of output tag at i-th token
  // same as prob(i, tagger->y(i));
  virtual double prob(size_t i) const = 0;

  // return conditional probability of enter output
  virtual double prob() const = 0;

  // set token-level penalty. It would be useful for implementing
  // Dual decompositon decoding.
  // e.g.
  // "Dual Decomposition for Parsing with Non-Projective Head Automata"
  // Terry Koo Alexander M. Rush Michael Collins Tommi Jaakkola David Sontag
  virtual void set_penalty(size_t i, size_t j, double penalty) = 0;
  virtual double penalty(size_t i, size_t j) const = 0;

  // return forward log-prob of the j-th tag at i-th token
  virtual double alpha(size_t i, size_t j) const = 0;

  // return backward log-prob of the j-th tag at i-th token
  virtual double beta(size_t i, size_t j) const = 0;

  // return emission cost of the j-th tag at i-th token
  virtual double emission_cost(size_t i, size_t j) const = 0;

  // return transition cost of [j-th tag at i-th token] to
  // [k-th tag at(i+1)-th token]
  virtual double next_transition_cost(size_t i,
                                      size_t j, size_t k) const = 0;

  // return transition cost of [j-th tag at i-th token] to
  // [k-th tag at(i-1)-th token]
  virtual double prev_transition_cost(size_t i,
                                      size_t j, size_t k) const = 0;

  //  return the best accumulative cost to the j-th tag at i-th token
  // used in viterbi search
  virtual double best_cost(size_t i, size_t j) const = 0;

#ifndef SWIG
  // return emission feature vector of the j-th tag at i-th token
  virtual const int* emission_vector(size_t i, size_t j) const = 0;

  // return transition feature vector of [j-th tag at i-th token] to
  // [k-th tag at(i+1)-th token]
  virtual const int* next_transition_vector(size_t i,
                                            size_t j, size_t k) const = 0;

  // return transition feature vector of [j-th tag at i-th token] to
  // [k-th tag at(i-1)-th token]
  virtual const int* prev_transition_vector(size_t i,
                                            size_t j, size_t k) const = 0;
#endif

  // normalizing factor(log-prob)
  virtual double Z() const = 0;

  // do parse and change the internal status, if failed, returns false
  virtual bool parse() = 0;

  // return true if the context is empty
  virtual bool empty() const = 0;

  // clear all context
  virtual bool clear() = 0;

  // change the internal state to output next-optimal output.
  // calling it n-th times, can get n-best results,
  // Neeed to specify -nN option to use this function, where
  // N>=2
  virtual bool next() = 0;

  // parse 'str' and return result as string
  // 'str' must be written in CRF++'s input format
  virtual const char* parse(const char* str) = 0;

#ifndef SWIG
  // return parsed result as string
  virtual const char* toString() = 0;

  // return parsed result as string.
  // Result is saved in the buffer 'result', 'size' is the
  // size of the buffer. if failed, return NULL
  virtual const char* toString(char* result , size_t size) = 0;

  // parse 'str' and return parsed result.
  // You don't need to delete return value, but the buffer
  // is rewritten whenever you call parse method.
  // if failed, return NULL
  virtual const char* parse(const char *str, size_t size) = 0;

  // parse 'str' and return parsed result.
  // The result is stored in the buffer 'result'.
  // 'size2' is the size of the buffer. if failed, return NULL
  virtual const char* parse(const char *str, size_t size1,
                            char *result, size_t size2) = 0;
#endif
  // return internal error code as string
  virtual const char* what() = 0;

  virtual ~Tagger() {}
};

/* factory method */

// create CRFPP::Tagger instance with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
CRFPP_DLL_EXTERN Tagger *createTagger(int argc, char **argv);

// create CRFPP::Tagger instance with parameter in arg
// e.g. arg = "-m model -v3";
CRFPP_DLL_EXTERN Tagger *createTagger(const char *arg);

// create CRFPP::Model instance with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
CRFPP_DLL_EXTERN Model *createModel(int argc, char **argv);

// load model from [buf, buf+size].
CRFPP_DLL_EXTERN Model *createModelFromArray(int argc, char **argv,
                                             const char *model_buf,
                                             size_t model_size);

// create CRFPP::Model instance with parameter in arg
// e.g. arg = "-m model -v3";
CRFPP_DLL_EXTERN Model *createModel(const char *arg);

// load model from [buf, buf+size].
CRFPP_DLL_EXTERN Model *createModelFromArray(const char *arg,
                                             const char *model_buf,
                                             size_t model_size);

// return error code of createTagger();
CRFPP_DLL_EXTERN const char *getTaggerError();

// alias of getTaggerError();
CRFPP_DLL_EXTERN const char *getLastError();
}

#endif
#endif