File size: 3,353 Bytes
1cf2abd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include "llm.h"

#include "llms/dolly.cc"
#include "llms/gpt-neox.cc"
#include "llms/gpt2.cc"
#include "llms/gptj.cc"
#include "llms/llama.cc"
#include "llms/mpt.cc"
#include "llms/replit.cc"
#include "llms/starcoder.cc"

// Import falcon after llama.
#include "llms/falcon.cc"

#ifdef __cplusplus
extern "C" {
#endif

LLM* ctransformers_llm_create(const char* model_path, const char* model_type,
                              const int context_length, const int gpu_layers) {
  std::string type = model_type;
  // Remove non-alphanumeric characters from model type.
  type.erase(std::remove_if(type.begin(), type.end(),
                            [](const char c) { return !std::isalnum(c); }),
             type.end());

  LLM* llm = nullptr;
  if (type == "dollyv2") {
    llm = new dollyv2_llm;
  } else if (type == "falcon") {
    llm = new falcon_llm;
  } else if (type == "gpt2") {
    llm = new gpt2_llm;
  } else if (type == "gptj") {
    llm = new gptj_llm;
  } else if (type == "gptneox") {
    llm = new gpt_neox_llm;
  } else if (type == "llama") {
    llm = new llama_llm;
  } else if (type == "mpt") {
    llm = new mpt_llm;
  } else if (type == "replit") {
    llm = new replit_llm;
  } else if (type == "starcoder") {
    llm = new starcoder_llm;
  }

  if (llm == nullptr) {
    fprintf(stderr, "Model type '%s' is not supported.\n", model_type);
    return nullptr;
  }
  if (!llm->Init(model_path, context_length, gpu_layers)) {
    delete llm;
    return nullptr;
  }
  return llm;
}

void ctransformers_llm_delete(LLM* llm) { delete llm; }

int ctransformers_llm_tokenize(LLM* llm, const char* text, int* output) {
  const std::vector<gpt_vocab::id> tokens = llm->Tokenize(text);
  std::copy(tokens.begin(), tokens.end(), output);
  return tokens.size();
}

const char* ctransformers_llm_detokenize(LLM* llm, const int token) {
  return llm->Detokenize(token).c_str();
}

bool ctransformers_llm_is_eos_token(LLM* llm, const int token) {
  return llm->IsEosToken(token);
}

int ctransformers_llm_eos_token_id(LLM* llm) { return llm->EosToken(); }

int ctransformers_llm_vocab_size(LLM* llm) { return llm->VocabSize(); }

int ctransformers_llm_context_length(LLM* llm) { return llm->ContextLength(); }

bool ctransformers_llm_batch_eval(LLM* llm, const int* tokens,
                                  const int n_tokens, const int batch_size,
                                  const int threads) {
  return llm->BatchEval(std::vector<gpt_vocab::id>(tokens, tokens + n_tokens),
                        batch_size, threads);
}

float* ctransformers_llm_logits_data(LLM* llm) { return llm->Logits().data(); }

int ctransformers_llm_logits_size(LLM* llm) { return llm->Logits().size(); }

const float* ctransformers_llm_embeddings_data(LLM* llm) {
  return llm->Embeddings().data();
}

int ctransformers_llm_embeddings_size(LLM* llm) {
  return llm->Embeddings().size();
}

int ctransformers_llm_sample(LLM* llm, const int top_k, const float top_p,
                             const float temperature,
                             const float repetition_penalty,
                             const int last_n_tokens, const int seed) {
  return llm->Sample(top_k, top_p, temperature, repetition_penalty,
                     last_n_tokens, seed);
}

void ctransformers_llm_reset(LLM* llm) { llm->Reset(); }

#ifdef __cplusplus
}
#endif