CRF++
/home/taku/proj/crfpp/crfpp.h
Go to the documentation of this file.
00001 /*
00002   CRF++ -- Yet Another CRF toolkit
00003 
00004   $Id: crfpp.h 1592 2007-02-12 09:40:53Z taku $;
00005 
00006   Copyright(C) 2005-2007 Taku Kudo <taku@chasen.org>
00007 */
00008 #ifndef CRFPP_CRFPP_H_
00009 #define CRFPP_CRFPP_H_
00010 
00011 /* C interface  */
00012 #ifdef __cplusplus
00013 #include <cstdio>
00014 #else
00015 #include <stdio.h>
00016 #endif
00017 
00018 #ifdef __cplusplus
00019 extern "C" {
00020 #endif
00021 
00022 #ifdef _WIN32
00023 #include <windows.h>
00024 #  ifdef DLL_EXPORT
00025 #    define CRFPP_DLL_EXTERN  __declspec(dllexport)
00026 #    define CRFPP_DLL_CLASS_EXTERN  __declspec(dllexport)
00027 #  else
00028 #    define CRFPP_DLL_EXTERN  __declspec(dllimport)
00029 #  endif
00030 #endif
00031 
00032 #ifndef CRFPP_DLL_EXTERN
00033 #  define CRFPP_DLL_EXTERN extern
00034 #endif
00035 
00036 #ifndef CRFPP_DLL_CLASS_EXTERN
00037 #  define CRFPP_DLL_CLASS_EXTERN
00038 #endif
00039 
00040 #ifndef SWIG
00041   typedef struct crfpp_t crfpp_t;
00042   typedef struct crfpp_model_t crfpp_model_t;
00043 
00044   /* C interface */
00045   CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new(int,  char**);
00046   CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_new2(const char*);
00047   CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new(int,  char**, const char *, size_t);
00048   CRFPP_DLL_EXTERN crfpp_model_t* crfpp_model_from_array_new2(const char*, const char *, size_t);
00049   CRFPP_DLL_EXTERN const char *   crfpp_model_get_template(crfpp_model_t*);
00050   CRFPP_DLL_EXTERN void           crfpp_model_destroy(crfpp_model_t*);
00051   CRFPP_DLL_EXTERN const char *   crfpp_model_strerror(crfpp_model_t *);
00052   CRFPP_DLL_EXTERN crfpp_t*       crfpp_model_new_tagger(crfpp_model_t *);
00053 
00054   CRFPP_DLL_EXTERN crfpp_t* crfpp_new(int,  char**);
00055   CRFPP_DLL_EXTERN crfpp_t* crfpp_new2(const char*);
00056   CRFPP_DLL_EXTERN void     crfpp_destroy(crfpp_t*);
00057   CRFPP_DLL_EXTERN int      crfpp_set_model(crfpp_t *, crfpp_model_t *);
00058   CRFPP_DLL_EXTERN int      crfpp_add2(crfpp_t*, size_t, const char **);
00059   CRFPP_DLL_EXTERN int      crfpp_add(crfpp_t*, const char*);
00060   CRFPP_DLL_EXTERN size_t   crfpp_size(crfpp_t*);
00061   CRFPP_DLL_EXTERN size_t   crfpp_xsize(crfpp_t*);
00062   CRFPP_DLL_EXTERN size_t   crfpp_dsize(crfpp_t*);
00063   CRFPP_DLL_EXTERN const float* crfpp_weight_vector(crfpp_t*);
00064   CRFPP_DLL_EXTERN size_t   crfpp_result(crfpp_t*, size_t);
00065   CRFPP_DLL_EXTERN size_t   crfpp_answer(crfpp_t*, size_t);
00066   CRFPP_DLL_EXTERN size_t   crfpp_y(crfpp_t*, size_t);
00067   CRFPP_DLL_EXTERN size_t   crfpp_ysize(crfpp_t*);
00068   CRFPP_DLL_EXTERN double   crfpp_prob(crfpp_t*, size_t, size_t);
00069   CRFPP_DLL_EXTERN double   crfpp_prob2(crfpp_t*, size_t);
00070   CRFPP_DLL_EXTERN double   crfpp_prob3(crfpp_t*);
00071   CRFPP_DLL_EXTERN void     crfpp_set_penalty(crfpp_t *, size_t i, size_t j, double penalty);
00072   CRFPP_DLL_EXTERN double   crfpp_penalty(crfpp_t *, size_t i, size_t j);
00073   CRFPP_DLL_EXTERN double   crfpp_alpha(crfpp_t*, size_t, size_t);
00074   CRFPP_DLL_EXTERN double   crfpp_beta(crfpp_t*, size_t, size_t);
00075   CRFPP_DLL_EXTERN double   crfpp_emisstion_cost(crfpp_t*, size_t, size_t);
00076   CRFPP_DLL_EXTERN double   crfpp_next_transition_cost(crfpp_t*, size_t,
00077                                                        size_t, size_t);
00078   CRFPP_DLL_EXTERN double   crfpp_prev_transition_cost(crfpp_t*, size_t,
00079                                                        size_t, size_t);
00080   CRFPP_DLL_EXTERN double   crfpp_best_cost(crfpp_t*, size_t, size_t);
00081   CRFPP_DLL_EXTERN const int* crfpp_emittion_vector(crfpp_t*, size_t, size_t);
00082   CRFPP_DLL_EXTERN const int* crfpp_next_transition_vector(crfpp_t*, size_t,
00083                                                            size_t, size_t);
00084   CRFPP_DLL_EXTERN const int* crfpp_prev_transition_vector(crfpp_t*, size_t,
00085                                                            size_t, size_t);
00086   CRFPP_DLL_EXTERN double   crfpp_Z(crfpp_t*);
00087   CRFPP_DLL_EXTERN int      crfpp_parse(crfpp_t*);
00088   CRFPP_DLL_EXTERN int      crfpp_empty(crfpp_t*);
00089   CRFPP_DLL_EXTERN int      crfpp_clear(crfpp_t*);
00090   CRFPP_DLL_EXTERN int      crfpp_next(crfpp_t*);
00091   CRFPP_DLL_EXTERN int      crfpp_test(int, char **);
00092   CRFPP_DLL_EXTERN int      crfpp_test2(const char *);
00093   CRFPP_DLL_EXTERN int      crfpp_learn(int, char **);
00094   CRFPP_DLL_EXTERN int      crfpp_learn2(const char *);
00095   CRFPP_DLL_EXTERN const char*  crfpp_strerror(crfpp_t*);
00096   CRFPP_DLL_EXTERN const char*  crfpp_yname(crfpp_t*, size_t);
00097   CRFPP_DLL_EXTERN const char*  crfpp_y2(crfpp_t*, size_t);
00098   CRFPP_DLL_EXTERN const char*  crfpp_x(crfpp_t*, size_t, size_t);
00099   CRFPP_DLL_EXTERN const char** crfpp_x2(crfpp_t*, size_t);
00100   CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr(crfpp_t*, const char*);
00101   CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr2(crfpp_t*,
00102                                                    const char*, size_t);
00103   CRFPP_DLL_EXTERN const char*  crfpp_parse_tostr3(crfpp_t*, const char*,
00104                                                    size_t, char *, size_t);
00105   CRFPP_DLL_EXTERN const char*  crfpp_tostr(crfpp_t*);
00106   CRFPP_DLL_EXTERN const char*  crfpp_tostr2(crfpp_t*, char *, size_t);
00107 
00108   CRFPP_DLL_EXTERN void crfpp_set_vlevel(crfpp_t *, unsigned int);
00109   CRFPP_DLL_EXTERN unsigned int crfpp_vlevel(crfpp_t *);
00110   CRFPP_DLL_EXTERN void crfpp_set_cost_factor(crfpp_t *, float);
00111   CRFPP_DLL_EXTERN float crfpp_cost_factor(crfpp_t *);
00112   CRFPP_DLL_EXTERN void crfpp_set_nbest(crfpp_t *, size_t);
00113 #endif
00114 
00115 #ifdef __cplusplus
00116 }
00117 #endif
00118 
00119 /* C++ interface */
00120 #ifdef __cplusplus
00121 
00122 namespace CRFPP {
00123 
00124 class Tagger;
00125 
00126 class CRFPP_DLL_CLASS_EXTERN Model {
00127  public:
00128 #ifndef SWIG
00129   // open model with parameters in argv[]
00130   // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00131   virtual bool open(int argc,  char** argv) = 0;
00132 
00133   // open model with parameter arg, e.g. arg = "-m model -v3";
00134   virtual bool open(const char* arg) = 0;
00135 
00136   // open model with parameters in argv[].
00137   // e.g, argv[] = {"CRF++", "-v3"};
00138   virtual bool openFromArray(int argc,  char** argv,
00139                              const char *model_buf,
00140                              size_t model_size) = 0;
00141 
00142   // open model with parameter arg, e.g. arg = "-m model -v3";
00143   virtual bool openFromArray(const char* arg,
00144                              const char *model_buf,
00145                              size_t model_size) = 0;
00146 #endif
00147   // return template string embedded in this model file.
00148   virtual const char *getTemplate() const = 0;
00149 
00150   // create Tagger object. Returned object shared the same
00151   // model object
00152   virtual Tagger *createTagger() const = 0;
00153 
00154   virtual const char* what() = 0;
00155 
00156   virtual ~Model() {}
00157 };
00158 
00159 class CRFPP_DLL_CLASS_EXTERN Tagger {
00160  public:
00161 #ifndef SWIG
00162   // open model with parameters in argv[]
00163   // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00164   virtual bool open(int argc,  char** argv) = 0;
00165 
00166   // open model with parameter arg, e.g. arg = "-m model -v3";
00167   virtual bool open(const char* arg) = 0;
00168 
00169   // add str[] as tokens to the current context
00170   virtual bool add(size_t size, const char **str) = 0;
00171 
00172   // close the current model
00173   virtual void close() = 0;
00174 
00175   // return parameter vector. the size should be dsize();
00176   virtual const float *weight_vector() const = 0;
00177 #endif
00178 
00179   // set Model
00180   virtual bool set_model(const Model &model) = 0;
00181 
00182   // set vlevel
00183   virtual void set_vlevel(unsigned int vlevel) = 0;
00184 
00185   // get vlevel
00186   virtual unsigned int vlevel() const = 0;
00187 
00188   // set cost factor
00189   virtual void set_cost_factor(float cost_factor) = 0;
00190 
00191   // get cost factor
00192   virtual float cost_factor() const = 0;
00193 
00194   // set nbest
00195   virtual void set_nbest(size_t nbest) = 0;
00196 
00197   // get nbest
00198   virtual size_t nbest() const = 0;
00199 
00200   // add one line to the current context
00201   virtual bool add(const char* str) = 0;
00202 
00203   // return size of tokens(lines)
00204   virtual size_t size() const = 0;
00205 
00206   // return size of column
00207   virtual size_t xsize() const = 0;
00208 
00209   // return size of features
00210   virtual size_t dsize() const = 0;
00211 
00212   // return output tag-id of i-th token
00213   virtual size_t result(size_t i) const = 0;
00214 
00215   // return answer tag-id of i-th token if it is available
00216   virtual size_t answer(size_t i) const = 0;
00217 
00218   // alias of result(i)
00219   virtual size_t y(size_t i) const = 0;
00220 
00221   // return output tag of i-th token as string
00222   virtual const char*   y2(size_t i) const = 0;
00223 
00224   // return i-th tag-id as string
00225   virtual const char*   yname(size_t i) const = 0;
00226 
00227   // return token at [i,j] as string(i:token j:column)
00228   virtual const char*   x(size_t i, size_t j) const = 0;
00229 
00230 #ifndef SWIG
00231   // return an array of strings at i-th tokens
00232   virtual const char**  x(size_t) const = 0;
00233 #endif
00234 
00235   // return size of output tags
00236   virtual size_t ysize() const = 0;
00237 
00238   // return marginal probability of j-th tag id at i-th token
00239   virtual double prob(size_t i, size_t j) const = 0;
00240 
00241   // return marginal probability of output tag at i-th token
00242   // same as prob(i, tagger->y(i));
00243   virtual double prob(size_t i) const = 0;
00244 
00245   // return conditional probability of enter output
00246   virtual double prob() const = 0;
00247 
00248   // set token-level penalty. It would be useful for implementing
00249   // Dual decompositon decoding.
00250   // e.g.
00251   // "Dual Decomposition for Parsing with Non-Projective Head Automata"
00252   // Terry Koo Alexander M. Rush Michael Collins Tommi Jaakkola David Sontag
00253   virtual void set_penalty(size_t i, size_t j, double penalty) = 0;
00254   virtual double penalty(size_t i, size_t j) const = 0;
00255 
00256   // return forward log-prob of the j-th tag at i-th token
00257   virtual double alpha(size_t i, size_t j) const = 0;
00258 
00259   // return backward log-prob of the j-th tag at i-th token
00260   virtual double beta(size_t i, size_t j) const = 0;
00261 
00262   // return emission cost of the j-th tag at i-th token
00263   virtual double emission_cost(size_t i, size_t j) const = 0;
00264 
00265   // return transition cost of [j-th tag at i-th token] to
00266   // [k-th tag at(i+1)-th token]
00267   virtual double next_transition_cost(size_t i,
00268                                       size_t j, size_t k) const = 0;
00269 
00270   // return transition cost of [j-th tag at i-th token] to
00271   // [k-th tag at(i-1)-th token]
00272   virtual double prev_transition_cost(size_t i,
00273                                       size_t j, size_t k) const = 0;
00274 
00275   //  return the best accumulative cost to the j-th tag at i-th token
00276   // used in viterbi search
00277   virtual double best_cost(size_t i, size_t j) const = 0;
00278 
00279 #ifndef SWIG
00280   // return emission feature vector of the j-th tag at i-th token
00281   virtual const int* emission_vector(size_t i, size_t j) const = 0;
00282 
00283   // return transition feature vector of [j-th tag at i-th token] to
00284   // [k-th tag at(i+1)-th token]
00285   virtual const int* next_transition_vector(size_t i,
00286                                             size_t j, size_t k) const = 0;
00287 
00288   // return transition feature vector of [j-th tag at i-th token] to
00289   // [k-th tag at(i-1)-th token]
00290   virtual const int* prev_transition_vector(size_t i,
00291                                             size_t j, size_t k) const = 0;
00292 #endif
00293 
00294   // normalizing factor(log-prob)
00295   virtual double Z() const = 0;
00296 
00297   // do parse and change the internal status, if failed, returns false
00298   virtual bool parse() = 0;
00299 
00300   // return true if the context is empty
00301   virtual bool empty() const = 0;
00302 
00303   // clear all context
00304   virtual bool clear() = 0;
00305 
00306   // change the internal state to output next-optimal output.
00307   // calling it n-th times, can get n-best results,
00308   // Neeed to specify -nN option to use this function, where
00309   // N>=2
00310   virtual bool next() = 0;
00311 
00312   // parse 'str' and return result as string
00313   // 'str' must be written in CRF++'s input format
00314   virtual const char* parse(const char* str) = 0;
00315 
00316 #ifndef SWIG
00317   // return parsed result as string
00318   virtual const char* toString() = 0;
00319 
00320   // return parsed result as string.
00321   // Result is saved in the buffer 'result', 'size' is the
00322   // size of the buffer. if failed, return NULL
00323   virtual const char* toString(char* result , size_t size) = 0;
00324 
00325   // parse 'str' and return parsed result.
00326   // You don't need to delete return value, but the buffer
00327   // is rewritten whenever you call parse method.
00328   // if failed, return NULL
00329   virtual const char* parse(const char *str, size_t size) = 0;
00330 
00331   // parse 'str' and return parsed result.
00332   // The result is stored in the buffer 'result'.
00333   // 'size2' is the size of the buffer. if failed, return NULL
00334   virtual const char* parse(const char *str, size_t size1,
00335                             char *result, size_t size2) = 0;
00336 #endif
00337   // return internal error code as string
00338   virtual const char* what() = 0;
00339 
00340   virtual ~Tagger() {}
00341 };
00342 
00343 /* factory method */
00344 
00345 // create CRFPP::Tagger instance with parameters in argv[]
00346 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00347 CRFPP_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
00348 
00349 // create CRFPP::Tagger instance with parameter in arg
00350 // e.g. arg = "-m model -v3";
00351 CRFPP_DLL_EXTERN Tagger *createTagger(const char *arg);
00352 
00353 // create CRFPP::Model instance with parameters in argv[]
00354 // e.g, argv[] = {"CRF++", "-m", "model", "-v3"};
00355 CRFPP_DLL_EXTERN Model *createModel(int argc, char **argv);
00356 
00357 // load model from [buf, buf+size].
00358 CRFPP_DLL_EXTERN Model *createModelFromArray(int argc, char **argv,
00359                                              const char *model_buf,
00360                                              size_t model_size);
00361 
00362 // create CRFPP::Model instance with parameter in arg
00363 // e.g. arg = "-m model -v3";
00364 CRFPP_DLL_EXTERN Model *createModel(const char *arg);
00365 
00366 // load model from [buf, buf+size].
00367 CRFPP_DLL_EXTERN Model *createModelFromArray(const char *arg,
00368                                              const char *model_buf,
00369                                              size_t model_size);
00370 
00371 // return error code of createTagger();
00372 CRFPP_DLL_EXTERN const char *getTaggerError();
00373 
00374 // alias of getTaggerError();
00375 CRFPP_DLL_EXTERN const char *getLastError();
00376 }
00377 
00378 #endif
00379 #endif