CRF++
|
00001 /* 00002 CRF++ -- Yet Another CRF toolkit 00003 00004 $Id: crfpp.h 1592 2007-02-12 09:40:53Z taku $; 00005 00006 Copyright(C) 2005-2007 Taku Kudo <taku@chasen.org> 00007 */ 00008 #ifndef CRFPP_CRFPP_H_ 00009 #define CRFPP_CRFPP_H_ 00010 00011 /* C interface */ 00012 #ifdef __cplusplus 00013 #include <cstdio> 00014 #else 00015 #include <stdio.h> 00016 #endif 00017 00018 #ifdef __cplusplus 00019 extern "C" { 00020 #endif 00021 00022 #ifdef _WIN32 00023 #include <windows.h> 00024 # ifdef DLL_EXPORT 00025 # define CRFPP_DLL_EXTERN __declspec(dllexport) 00026 # define CRFPP_DLL_CLASS_EXTERN __declspec(dllexport) 00027 # else 00028 # define CRFPP_DLL_EXTERN __declspec(dllimport) 00029 # endif 00030 #endif 00031 00032 #ifndef CRFPP_DLL_EXTERN 00033 # define CRFPP_DLL_EXTERN extern 00034 #endif 00035 00036 #ifndef CRFPP_DLL_CLASS_EXTERN 00037 # define CRFPP_DLL_CLASS_EXTERN 00038 #endif 00039 00040 #ifndef SWIG 00041 typedef struct crfpp_t crfpp_t; 00042 typedef struct crfpp_model_t crfpp_model_t; 00043 00044 /* C interface */ 00045 CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new(int, char**); 00046 CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new2(const char*); 00047 CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new(int, char**, const char *, size_t); 00048 CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new2(const char*, const char *, size_t); 00049 CRFPP_DLL_EXTERN const char * crfpp_model_get_template(crfpp_model_t*); 00050 CRFPP_DLL_EXTERN void crfpp_model_destroy(crfpp_model_t*); 00051 CRFPP_DLL_EXTERN const char * crfpp_model_strerror(crfpp_model_t *); 00052 CRFPP_DLL_EXTERN crfpp_t* crfpp_model_new_tagger(crfpp_model_t *); 00053 00054 CRFPP_DLL_EXTERN crfpp_t* crfpp_new(int, char**); 00055 CRFPP_DLL_EXTERN crfpp_t* crfpp_new2(const char*); 00056 CRFPP_DLL_EXTERN void crfpp_destroy(crfpp_t*); 00057 CRFPP_DLL_EXTERN int crfpp_set_model(crfpp_t *, crfpp_model_t *); 00058 CRFPP_DLL_EXTERN int crfpp_add2(crfpp_t*, size_t, const char **); 00059 CRFPP_DLL_EXTERN int crfpp_add(crfpp_t*, const char*); 00060 CRFPP_DLL_EXTERN size_t crfpp_size(crfpp_t*); 00061 CRFPP_DLL_EXTERN size_t crfpp_xsize(crfpp_t*); 00062 CRFPP_DLL_EXTERN size_t crfpp_dsize(crfpp_t*); 00063 CRFPP_DLL_EXTERN const float* crfpp_weight_vector(crfpp_t*); 00064 CRFPP_DLL_EXTERN size_t crfpp_result(crfpp_t*, size_t); 00065 CRFPP_DLL_EXTERN size_t crfpp_answer(crfpp_t*, size_t); 00066 CRFPP_DLL_EXTERN size_t crfpp_y(crfpp_t*, size_t); 00067 CRFPP_DLL_EXTERN size_t crfpp_ysize(crfpp_t*); 00068 CRFPP_DLL_EXTERN double crfpp_prob(crfpp_t*, size_t, size_t); 00069 CRFPP_DLL_EXTERN double crfpp_prob2(crfpp_t*, size_t); 00070 CRFPP_DLL_EXTERN double crfpp_prob3(crfpp_t*); 00071 CRFPP_DLL_EXTERN void crfpp_set_penalty(crfpp_t *, size_t i, size_t j, double penalty); 00072 CRFPP_DLL_EXTERN double crfpp_penalty(crfpp_t *, size_t i, size_t j); 00073 CRFPP_DLL_EXTERN double crfpp_alpha(crfpp_t*, size_t, size_t); 00074 CRFPP_DLL_EXTERN double crfpp_beta(crfpp_t*, size_t, size_t); 00075 CRFPP_DLL_EXTERN double crfpp_emisstion_cost(crfpp_t*, size_t, size_t); 00076 CRFPP_DLL_EXTERN double crfpp_next_transition_cost(crfpp_t*, size_t, 00077 size_t, size_t); 00078 CRFPP_DLL_EXTERN double crfpp_prev_transition_cost(crfpp_t*, size_t, 00079 size_t, size_t); 00080 CRFPP_DLL_EXTERN double crfpp_best_cost(crfpp_t*, size_t, size_t); 00081 CRFPP_DLL_EXTERN const int* crfpp_emittion_vector(crfpp_t*, size_t, size_t); 00082 CRFPP_DLL_EXTERN const int* crfpp_next_transition_vector(crfpp_t*, size_t, 00083 size_t, size_t); 00084 CRFPP_DLL_EXTERN const int* crfpp_prev_transition_vector(crfpp_t*, size_t, 00085 size_t, size_t); 00086 CRFPP_DLL_EXTERN double crfpp_Z(crfpp_t*); 00087 CRFPP_DLL_EXTERN int crfpp_parse(crfpp_t*); 00088 CRFPP_DLL_EXTERN int crfpp_empty(crfpp_t*); 00089 CRFPP_DLL_EXTERN int crfpp_clear(crfpp_t*); 00090 CRFPP_DLL_EXTERN int crfpp_next(crfpp_t*); 00091 CRFPP_DLL_EXTERN int crfpp_test(int, char **); 00092 CRFPP_DLL_EXTERN int crfpp_test2(const char *); 00093 CRFPP_DLL_EXTERN int crfpp_learn(int, char **); 00094 CRFPP_DLL_EXTERN int crfpp_learn2(const char *); 00095 CRFPP_DLL_EXTERN const char* crfpp_strerror(crfpp_t*); 00096 CRFPP_DLL_EXTERN const char* crfpp_yname(crfpp_t*, size_t); 00097 CRFPP_DLL_EXTERN const char* crfpp_y2(crfpp_t*, size_t); 00098 CRFPP_DLL_EXTERN const char* crfpp_x(crfpp_t*, size_t, size_t); 00099 CRFPP_DLL_EXTERN const char** crfpp_x2(crfpp_t*, size_t); 00100 CRFPP_DLL_EXTERN const char* crfpp_parse_tostr(crfpp_t*, const char*); 00101 CRFPP_DLL_EXTERN const char* crfpp_parse_tostr2(crfpp_t*, 00102 const char*, size_t); 00103 CRFPP_DLL_EXTERN const char* crfpp_parse_tostr3(crfpp_t*, const char*, 00104 size_t, char *, size_t); 00105 CRFPP_DLL_EXTERN const char* crfpp_tostr(crfpp_t*); 00106 CRFPP_DLL_EXTERN const char* crfpp_tostr2(crfpp_t*, char *, size_t); 00107 00108 CRFPP_DLL_EXTERN void crfpp_set_vlevel(crfpp_t *, unsigned int); 00109 CRFPP_DLL_EXTERN unsigned int crfpp_vlevel(crfpp_t *); 00110 CRFPP_DLL_EXTERN void crfpp_set_cost_factor(crfpp_t *, float); 00111 CRFPP_DLL_EXTERN float crfpp_cost_factor(crfpp_t *); 00112 CRFPP_DLL_EXTERN void crfpp_set_nbest(crfpp_t *, size_t); 00113 #endif 00114 00115 #ifdef __cplusplus 00116 } 00117 #endif 00118 00119 /* C++ interface */ 00120 #ifdef __cplusplus 00121 00122 namespace CRFPP { 00123 00124 class Tagger; 00125 00126 class CRFPP_DLL_CLASS_EXTERN Model { 00127 public: 00128 #ifndef SWIG 00129 // open model with parameters in argv[] 00130 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"}; 00131 virtual bool open(int argc, char** argv) = 0; 00132 00133 // open model with parameter arg, e.g. arg = "-m model -v3"; 00134 virtual bool open(const char* arg) = 0; 00135 00136 // open model with parameters in argv[]. 00137 // e.g, argv[] = {"CRF++", "-v3"}; 00138 virtual bool openFromArray(int argc, char** argv, 00139 const char *model_buf, 00140 size_t model_size) = 0; 00141 00142 // open model with parameter arg, e.g. arg = "-m model -v3"; 00143 virtual bool openFromArray(const char* arg, 00144 const char *model_buf, 00145 size_t model_size) = 0; 00146 #endif 00147 // return template string embedded in this model file. 00148 virtual const char *getTemplate() const = 0; 00149 00150 // create Tagger object. Returned object shared the same 00151 // model object 00152 virtual Tagger *createTagger() const = 0; 00153 00154 virtual const char* what() = 0; 00155 00156 virtual ~Model() {} 00157 }; 00158 00159 class CRFPP_DLL_CLASS_EXTERN Tagger { 00160 public: 00161 #ifndef SWIG 00162 // open model with parameters in argv[] 00163 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"}; 00164 virtual bool open(int argc, char** argv) = 0; 00165 00166 // open model with parameter arg, e.g. arg = "-m model -v3"; 00167 virtual bool open(const char* arg) = 0; 00168 00169 // add str[] as tokens to the current context 00170 virtual bool add(size_t size, const char **str) = 0; 00171 00172 // close the current model 00173 virtual void close() = 0; 00174 00175 // return parameter vector. the size should be dsize(); 00176 virtual const float *weight_vector() const = 0; 00177 #endif 00178 00179 // set Model 00180 virtual bool set_model(const Model &model) = 0; 00181 00182 // set vlevel 00183 virtual void set_vlevel(unsigned int vlevel) = 0; 00184 00185 // get vlevel 00186 virtual unsigned int vlevel() const = 0; 00187 00188 // set cost factor 00189 virtual void set_cost_factor(float cost_factor) = 0; 00190 00191 // get cost factor 00192 virtual float cost_factor() const = 0; 00193 00194 // set nbest 00195 virtual void set_nbest(size_t nbest) = 0; 00196 00197 // get nbest 00198 virtual size_t nbest() const = 0; 00199 00200 // add one line to the current context 00201 virtual bool add(const char* str) = 0; 00202 00203 // return size of tokens(lines) 00204 virtual size_t size() const = 0; 00205 00206 // return size of column 00207 virtual size_t xsize() const = 0; 00208 00209 // return size of features 00210 virtual size_t dsize() const = 0; 00211 00212 // return output tag-id of i-th token 00213 virtual size_t result(size_t i) const = 0; 00214 00215 // return answer tag-id of i-th token if it is available 00216 virtual size_t answer(size_t i) const = 0; 00217 00218 // alias of result(i) 00219 virtual size_t y(size_t i) const = 0; 00220 00221 // return output tag of i-th token as string 00222 virtual const char* y2(size_t i) const = 0; 00223 00224 // return i-th tag-id as string 00225 virtual const char* yname(size_t i) const = 0; 00226 00227 // return token at [i,j] as string(i:token j:column) 00228 virtual const char* x(size_t i, size_t j) const = 0; 00229 00230 #ifndef SWIG 00231 // return an array of strings at i-th tokens 00232 virtual const char** x(size_t) const = 0; 00233 #endif 00234 00235 // return size of output tags 00236 virtual size_t ysize() const = 0; 00237 00238 // return marginal probability of j-th tag id at i-th token 00239 virtual double prob(size_t i, size_t j) const = 0; 00240 00241 // return marginal probability of output tag at i-th token 00242 // same as prob(i, tagger->y(i)); 00243 virtual double prob(size_t i) const = 0; 00244 00245 // return conditional probability of enter output 00246 virtual double prob() const = 0; 00247 00248 // set token-level penalty. It would be useful for implementing 00249 // Dual decompositon decoding. 00250 // e.g. 00251 // "Dual Decomposition for Parsing with Non-Projective Head Automata" 00252 // Terry Koo Alexander M. Rush Michael Collins Tommi Jaakkola David Sontag 00253 virtual void set_penalty(size_t i, size_t j, double penalty) = 0; 00254 virtual double penalty(size_t i, size_t j) const = 0; 00255 00256 // return forward log-prob of the j-th tag at i-th token 00257 virtual double alpha(size_t i, size_t j) const = 0; 00258 00259 // return backward log-prob of the j-th tag at i-th token 00260 virtual double beta(size_t i, size_t j) const = 0; 00261 00262 // return emission cost of the j-th tag at i-th token 00263 virtual double emission_cost(size_t i, size_t j) const = 0; 00264 00265 // return transition cost of [j-th tag at i-th token] to 00266 // [k-th tag at(i+1)-th token] 00267 virtual double next_transition_cost(size_t i, 00268 size_t j, size_t k) const = 0; 00269 00270 // return transition cost of [j-th tag at i-th token] to 00271 // [k-th tag at(i-1)-th token] 00272 virtual double prev_transition_cost(size_t i, 00273 size_t j, size_t k) const = 0; 00274 00275 // return the best accumulative cost to the j-th tag at i-th token 00276 // used in viterbi search 00277 virtual double best_cost(size_t i, size_t j) const = 0; 00278 00279 #ifndef SWIG 00280 // return emission feature vector of the j-th tag at i-th token 00281 virtual const int* emission_vector(size_t i, size_t j) const = 0; 00282 00283 // return transition feature vector of [j-th tag at i-th token] to 00284 // [k-th tag at(i+1)-th token] 00285 virtual const int* next_transition_vector(size_t i, 00286 size_t j, size_t k) const = 0; 00287 00288 // return transition feature vector of [j-th tag at i-th token] to 00289 // [k-th tag at(i-1)-th token] 00290 virtual const int* prev_transition_vector(size_t i, 00291 size_t j, size_t k) const = 0; 00292 #endif 00293 00294 // normalizing factor(log-prob) 00295 virtual double Z() const = 0; 00296 00297 // do parse and change the internal status, if failed, returns false 00298 virtual bool parse() = 0; 00299 00300 // return true if the context is empty 00301 virtual bool empty() const = 0; 00302 00303 // clear all context 00304 virtual bool clear() = 0; 00305 00306 // change the internal state to output next-optimal output. 00307 // calling it n-th times, can get n-best results, 00308 // Neeed to specify -nN option to use this function, where 00309 // N>=2 00310 virtual bool next() = 0; 00311 00312 // parse 'str' and return result as string 00313 // 'str' must be written in CRF++'s input format 00314 virtual const char* parse(const char* str) = 0; 00315 00316 #ifndef SWIG 00317 // return parsed result as string 00318 virtual const char* toString() = 0; 00319 00320 // return parsed result as string. 00321 // Result is saved in the buffer 'result', 'size' is the 00322 // size of the buffer. if failed, return NULL 00323 virtual const char* toString(char* result , size_t size) = 0; 00324 00325 // parse 'str' and return parsed result. 00326 // You don't need to delete return value, but the buffer 00327 // is rewritten whenever you call parse method. 00328 // if failed, return NULL 00329 virtual const char* parse(const char *str, size_t size) = 0; 00330 00331 // parse 'str' and return parsed result. 00332 // The result is stored in the buffer 'result'. 00333 // 'size2' is the size of the buffer. if failed, return NULL 00334 virtual const char* parse(const char *str, size_t size1, 00335 char *result, size_t size2) = 0; 00336 #endif 00337 // return internal error code as string 00338 virtual const char* what() = 0; 00339 00340 virtual ~Tagger() {} 00341 }; 00342 00343 /* factory method */ 00344 00345 // create CRFPP::Tagger instance with parameters in argv[] 00346 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"}; 00347 CRFPP_DLL_EXTERN Tagger *createTagger(int argc, char **argv); 00348 00349 // create CRFPP::Tagger instance with parameter in arg 00350 // e.g. arg = "-m model -v3"; 00351 CRFPP_DLL_EXTERN Tagger *createTagger(const char *arg); 00352 00353 // create CRFPP::Model instance with parameters in argv[] 00354 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"}; 00355 CRFPP_DLL_EXTERN Model *createModel(int argc, char **argv); 00356 00357 // load model from [buf, buf+size]. 00358 CRFPP_DLL_EXTERN Model *createModelFromArray(int argc, char **argv, 00359 const char *model_buf, 00360 size_t model_size); 00361 00362 // create CRFPP::Model instance with parameter in arg 00363 // e.g. arg = "-m model -v3"; 00364 CRFPP_DLL_EXTERN Model *createModel(const char *arg); 00365 00366 // load model from [buf, buf+size]. 00367 CRFPP_DLL_EXTERN Model *createModelFromArray(const char *arg, 00368 const char *model_buf, 00369 size_t model_size); 00370 00371 // return error code of createTagger(); 00372 CRFPP_DLL_EXTERN const char *getTaggerError(); 00373 00374 // alias of getTaggerError(); 00375 CRFPP_DLL_EXTERN const char *getLastError(); 00376 } 00377 00378 #endif 00379 #endif