File size: 13,480 Bytes
69fb171 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 |
/*
CRF++ -- Yet Another CRF toolkit
$Id: crfpp.h 1592 2007-02-12 09:40:53Z taku $;
Copyright(C) 2005-2007 Taku Kudo <[email protected]>
*/
#ifndef CRFPP_CRFPP_H_
#define CRFPP_CRFPP_H_
/* C interface */
#ifdef __cplusplus
#include <cstdio>
#else
#include <stdio.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
#include <windows.h>
# ifdef DLL_EXPORT
# define CRFPP_DLL_EXTERN __declspec(dllexport)
# define CRFPP_DLL_CLASS_EXTERN __declspec(dllexport)
# else
# define CRFPP_DLL_EXTERN __declspec(dllimport)
# endif
#endif
#ifndef CRFPP_DLL_EXTERN
# define CRFPP_DLL_EXTERN extern
#endif
#ifndef CRFPP_DLL_CLASS_EXTERN
# define CRFPP_DLL_CLASS_EXTERN
#endif
#ifndef SWIG
typedef struct crfpp_t crfpp_t;
typedef struct crfpp_model_t crfpp_model_t;
/* C interface */
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new(int, char**);
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new2(const char*);
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new(int, char**, const char *, size_t);
CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new2(const char*, const char *, size_t);
CRFPP_DLL_EXTERN const char * crfpp_model_get_template(crfpp_model_t*);
CRFPP_DLL_EXTERN void crfpp_model_destroy(crfpp_model_t*);
CRFPP_DLL_EXTERN const char * crfpp_model_strerror(crfpp_model_t *);
CRFPP_DLL_EXTERN crfpp_t* crfpp_model_new_tagger(crfpp_model_t *);
CRFPP_DLL_EXTERN crfpp_t* crfpp_new(int, char**);
CRFPP_DLL_EXTERN crfpp_t* crfpp_new2(const char*);
CRFPP_DLL_EXTERN void crfpp_destroy(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_set_model(crfpp_t *, crfpp_model_t *);
CRFPP_DLL_EXTERN int crfpp_add2(crfpp_t*, size_t, const char **);
CRFPP_DLL_EXTERN int crfpp_add(crfpp_t*, const char*);
CRFPP_DLL_EXTERN size_t crfpp_size(crfpp_t*);
CRFPP_DLL_EXTERN size_t crfpp_xsize(crfpp_t*);
CRFPP_DLL_EXTERN size_t crfpp_dsize(crfpp_t*);
CRFPP_DLL_EXTERN const float* crfpp_weight_vector(crfpp_t*);
CRFPP_DLL_EXTERN size_t crfpp_result(crfpp_t*, size_t);
CRFPP_DLL_EXTERN size_t crfpp_answer(crfpp_t*, size_t);
CRFPP_DLL_EXTERN size_t crfpp_y(crfpp_t*, size_t);
CRFPP_DLL_EXTERN size_t crfpp_ysize(crfpp_t*);
CRFPP_DLL_EXTERN double crfpp_prob(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_prob2(crfpp_t*, size_t);
CRFPP_DLL_EXTERN double crfpp_prob3(crfpp_t*);
CRFPP_DLL_EXTERN void crfpp_set_penalty(crfpp_t *, size_t i, size_t j, double penalty);
CRFPP_DLL_EXTERN double crfpp_penalty(crfpp_t *, size_t i, size_t j);
CRFPP_DLL_EXTERN double crfpp_alpha(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_beta(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_emisstion_cost(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_next_transition_cost(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_prev_transition_cost(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_best_cost(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN const int* crfpp_emittion_vector(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN const int* crfpp_next_transition_vector(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN const int* crfpp_prev_transition_vector(crfpp_t*, size_t,
size_t, size_t);
CRFPP_DLL_EXTERN double crfpp_Z(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_parse(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_empty(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_clear(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_next(crfpp_t*);
CRFPP_DLL_EXTERN int crfpp_test(int, char **);
CRFPP_DLL_EXTERN int crfpp_test2(const char *);
CRFPP_DLL_EXTERN int crfpp_learn(int, char **);
CRFPP_DLL_EXTERN int crfpp_learn2(const char *);
CRFPP_DLL_EXTERN const char* crfpp_strerror(crfpp_t*);
CRFPP_DLL_EXTERN const char* crfpp_yname(crfpp_t*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_y2(crfpp_t*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_x(crfpp_t*, size_t, size_t);
CRFPP_DLL_EXTERN const char** crfpp_x2(crfpp_t*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_parse_tostr(crfpp_t*, const char*);
CRFPP_DLL_EXTERN const char* crfpp_parse_tostr2(crfpp_t*,
const char*, size_t);
CRFPP_DLL_EXTERN const char* crfpp_parse_tostr3(crfpp_t*, const char*,
size_t, char *, size_t);
CRFPP_DLL_EXTERN const char* crfpp_tostr(crfpp_t*);
CRFPP_DLL_EXTERN const char* crfpp_tostr2(crfpp_t*, char *, size_t);
CRFPP_DLL_EXTERN void crfpp_set_vlevel(crfpp_t *, unsigned int);
CRFPP_DLL_EXTERN unsigned int crfpp_vlevel(crfpp_t *);
CRFPP_DLL_EXTERN void crfpp_set_cost_factor(crfpp_t *, float);
CRFPP_DLL_EXTERN float crfpp_cost_factor(crfpp_t *);
CRFPP_DLL_EXTERN void crfpp_set_nbest(crfpp_t *, size_t);
#endif
#ifdef __cplusplus
}
#endif
/* C++ interface */
#ifdef __cplusplus
namespace CRFPP {
class Tagger;
class CRFPP_DLL_CLASS_EXTERN Model {
public:
#ifndef SWIG
// open model with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
virtual bool open(int argc, char** argv) = 0;
// open model with parameter arg, e.g. arg = "-m model -v3";
virtual bool open(const char* arg) = 0;
// open model with parameters in argv[].
// e.g, argv[] = {"CRF++", "-v3"};
virtual bool openFromArray(int argc, char** argv,
const char *model_buf,
size_t model_size) = 0;
// open model with parameter arg, e.g. arg = "-m model -v3";
virtual bool openFromArray(const char* arg,
const char *model_buf,
size_t model_size) = 0;
#endif
// return template string embedded in this model file.
virtual const char *getTemplate() const = 0;
// create Tagger object. Returned object shared the same
// model object
virtual Tagger *createTagger() const = 0;
virtual const char* what() = 0;
virtual ~Model() {}
};
class CRFPP_DLL_CLASS_EXTERN Tagger {
public:
#ifndef SWIG
// open model with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
virtual bool open(int argc, char** argv) = 0;
// open model with parameter arg, e.g. arg = "-m model -v3";
virtual bool open(const char* arg) = 0;
// add str[] as tokens to the current context
virtual bool add(size_t size, const char **str) = 0;
// close the current model
virtual void close() = 0;
// return parameter vector. the size should be dsize();
virtual const float *weight_vector() const = 0;
#endif
// set Model
virtual bool set_model(const Model &model) = 0;
// set vlevel
virtual void set_vlevel(unsigned int vlevel) = 0;
// get vlevel
virtual unsigned int vlevel() const = 0;
// set cost factor
virtual void set_cost_factor(float cost_factor) = 0;
// get cost factor
virtual float cost_factor() const = 0;
// set nbest
virtual void set_nbest(size_t nbest) = 0;
// get nbest
virtual size_t nbest() const = 0;
// add one line to the current context
virtual bool add(const char* str) = 0;
// return size of tokens(lines)
virtual size_t size() const = 0;
// return size of column
virtual size_t xsize() const = 0;
// return size of features
virtual size_t dsize() const = 0;
// return output tag-id of i-th token
virtual size_t result(size_t i) const = 0;
// return answer tag-id of i-th token if it is available
virtual size_t answer(size_t i) const = 0;
// alias of result(i)
virtual size_t y(size_t i) const = 0;
// return output tag of i-th token as string
virtual const char* y2(size_t i) const = 0;
// return i-th tag-id as string
virtual const char* yname(size_t i) const = 0;
// return token at [i,j] as string(i:token j:column)
virtual const char* x(size_t i, size_t j) const = 0;
#ifndef SWIG
// return an array of strings at i-th tokens
virtual const char** x(size_t) const = 0;
#endif
// return size of output tags
virtual size_t ysize() const = 0;
// return marginal probability of j-th tag id at i-th token
virtual double prob(size_t i, size_t j) const = 0;
// return marginal probability of output tag at i-th token
// same as prob(i, tagger->y(i));
virtual double prob(size_t i) const = 0;
// return conditional probability of enter output
virtual double prob() const = 0;
// set token-level penalty. It would be useful for implementing
// Dual decompositon decoding.
// e.g.
// "Dual Decomposition for Parsing with Non-Projective Head Automata"
// Terry Koo Alexander M. Rush Michael Collins Tommi Jaakkola David Sontag
virtual void set_penalty(size_t i, size_t j, double penalty) = 0;
virtual double penalty(size_t i, size_t j) const = 0;
// return forward log-prob of the j-th tag at i-th token
virtual double alpha(size_t i, size_t j) const = 0;
// return backward log-prob of the j-th tag at i-th token
virtual double beta(size_t i, size_t j) const = 0;
// return emission cost of the j-th tag at i-th token
virtual double emission_cost(size_t i, size_t j) const = 0;
// return transition cost of [j-th tag at i-th token] to
// [k-th tag at(i+1)-th token]
virtual double next_transition_cost(size_t i,
size_t j, size_t k) const = 0;
// return transition cost of [j-th tag at i-th token] to
// [k-th tag at(i-1)-th token]
virtual double prev_transition_cost(size_t i,
size_t j, size_t k) const = 0;
// return the best accumulative cost to the j-th tag at i-th token
// used in viterbi search
virtual double best_cost(size_t i, size_t j) const = 0;
#ifndef SWIG
// return emission feature vector of the j-th tag at i-th token
virtual const int* emission_vector(size_t i, size_t j) const = 0;
// return transition feature vector of [j-th tag at i-th token] to
// [k-th tag at(i+1)-th token]
virtual const int* next_transition_vector(size_t i,
size_t j, size_t k) const = 0;
// return transition feature vector of [j-th tag at i-th token] to
// [k-th tag at(i-1)-th token]
virtual const int* prev_transition_vector(size_t i,
size_t j, size_t k) const = 0;
#endif
// normalizing factor(log-prob)
virtual double Z() const = 0;
// do parse and change the internal status, if failed, returns false
virtual bool parse() = 0;
// return true if the context is empty
virtual bool empty() const = 0;
// clear all context
virtual bool clear() = 0;
// change the internal state to output next-optimal output.
// calling it n-th times, can get n-best results,
// Neeed to specify -nN option to use this function, where
// N>=2
virtual bool next() = 0;
// parse 'str' and return result as string
// 'str' must be written in CRF++'s input format
virtual const char* parse(const char* str) = 0;
#ifndef SWIG
// return parsed result as string
virtual const char* toString() = 0;
// return parsed result as string.
// Result is saved in the buffer 'result', 'size' is the
// size of the buffer. if failed, return NULL
virtual const char* toString(char* result , size_t size) = 0;
// parse 'str' and return parsed result.
// You don't need to delete return value, but the buffer
// is rewritten whenever you call parse method.
// if failed, return NULL
virtual const char* parse(const char *str, size_t size) = 0;
// parse 'str' and return parsed result.
// The result is stored in the buffer 'result'.
// 'size2' is the size of the buffer. if failed, return NULL
virtual const char* parse(const char *str, size_t size1,
char *result, size_t size2) = 0;
#endif
// return internal error code as string
virtual const char* what() = 0;
virtual ~Tagger() {}
};
/* factory method */
// create CRFPP::Tagger instance with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
CRFPP_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
// create CRFPP::Tagger instance with parameter in arg
// e.g. arg = "-m model -v3";
CRFPP_DLL_EXTERN Tagger *createTagger(const char *arg);
// create CRFPP::Model instance with parameters in argv[]
// e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
CRFPP_DLL_EXTERN Model *createModel(int argc, char **argv);
// load model from [buf, buf+size].
CRFPP_DLL_EXTERN Model *createModelFromArray(int argc, char **argv,
const char *model_buf,
size_t model_size);
// create CRFPP::Model instance with parameter in arg
// e.g. arg = "-m model -v3";
CRFPP_DLL_EXTERN Model *createModel(const char *arg);
// load model from [buf, buf+size].
CRFPP_DLL_EXTERN Model *createModelFromArray(const char *arg,
const char *model_buf,
size_t model_size);
// return error code of createTagger();
CRFPP_DLL_EXTERN const char *getTaggerError();
// alias of getTaggerError();
CRFPP_DLL_EXTERN const char *getLastError();
}
#endif
#endif
|