|
#include "llm.h" |
|
|
|
#include "llms/dolly.cc" |
|
#include "llms/gpt-neox.cc" |
|
#include "llms/gpt2.cc" |
|
#include "llms/gptj.cc" |
|
#include "llms/llama.cc" |
|
#include "llms/mpt.cc" |
|
#include "llms/replit.cc" |
|
#include "llms/starcoder.cc" |
|
|
|
|
|
#include "llms/falcon.cc" |
|
|
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
LLM* ctransformers_llm_create(const char* model_path, const char* model_type, |
|
const int context_length, const int gpu_layers) { |
|
std::string type = model_type; |
|
|
|
type.erase(std::remove_if(type.begin(), type.end(), |
|
[](const char c) { return !std::isalnum(c); }), |
|
type.end()); |
|
|
|
LLM* llm = nullptr; |
|
if (type == "dollyv2") { |
|
llm = new dollyv2_llm; |
|
} else if (type == "falcon") { |
|
llm = new falcon_llm; |
|
} else if (type == "gpt2") { |
|
llm = new gpt2_llm; |
|
} else if (type == "gptj") { |
|
llm = new gptj_llm; |
|
} else if (type == "gptneox") { |
|
llm = new gpt_neox_llm; |
|
} else if (type == "llama") { |
|
llm = new llama_llm; |
|
} else if (type == "mpt") { |
|
llm = new mpt_llm; |
|
} else if (type == "replit") { |
|
llm = new replit_llm; |
|
} else if (type == "starcoder") { |
|
llm = new starcoder_llm; |
|
} |
|
|
|
if (llm == nullptr) { |
|
fprintf(stderr, "Model type '%s' is not supported.\n", model_type); |
|
return nullptr; |
|
} |
|
if (!llm->Init(model_path, context_length, gpu_layers)) { |
|
delete llm; |
|
return nullptr; |
|
} |
|
return llm; |
|
} |
|
|
|
void ctransformers_llm_delete(LLM* llm) { delete llm; } |
|
|
|
int ctransformers_llm_tokenize(LLM* llm, const char* text, int* output) { |
|
const std::vector<gpt_vocab::id> tokens = llm->Tokenize(text); |
|
std::copy(tokens.begin(), tokens.end(), output); |
|
return tokens.size(); |
|
} |
|
|
|
const char* ctransformers_llm_detokenize(LLM* llm, const int token) { |
|
return llm->Detokenize(token).c_str(); |
|
} |
|
|
|
bool ctransformers_llm_is_eos_token(LLM* llm, const int token) { |
|
return llm->IsEosToken(token); |
|
} |
|
|
|
int ctransformers_llm_eos_token_id(LLM* llm) { return llm->EosToken(); } |
|
|
|
int ctransformers_llm_vocab_size(LLM* llm) { return llm->VocabSize(); } |
|
|
|
int ctransformers_llm_context_length(LLM* llm) { return llm->ContextLength(); } |
|
|
|
bool ctransformers_llm_batch_eval(LLM* llm, const int* tokens, |
|
const int n_tokens, const int batch_size, |
|
const int threads) { |
|
return llm->BatchEval(std::vector<gpt_vocab::id>(tokens, tokens + n_tokens), |
|
batch_size, threads); |
|
} |
|
|
|
float* ctransformers_llm_logits_data(LLM* llm) { return llm->Logits().data(); } |
|
|
|
int ctransformers_llm_logits_size(LLM* llm) { return llm->Logits().size(); } |
|
|
|
const float* ctransformers_llm_embeddings_data(LLM* llm) { |
|
return llm->Embeddings().data(); |
|
} |
|
|
|
int ctransformers_llm_embeddings_size(LLM* llm) { |
|
return llm->Embeddings().size(); |
|
} |
|
|
|
int ctransformers_llm_sample(LLM* llm, const int top_k, const float top_p, |
|
const float temperature, |
|
const float repetition_penalty, |
|
const int last_n_tokens, const int seed) { |
|
return llm->Sample(top_k, top_p, temperature, repetition_penalty, |
|
last_n_tokens, seed); |
|
} |
|
|
|
void ctransformers_llm_reset(LLM* llm) { llm->Reset(); } |
|
|
|
#ifdef __cplusplus |
|
} |
|
#endif |
|
|