Text Generation
Transformers
PyTorch
English
llama
text-generation-inference
Inference Endpoints
Conrad Lippert-Zajaczkowski commited on
Commit
3555efd
·
1 Parent(s): 997becd
Files changed (2) hide show
  1. config.json +1 -1
  2. handler.py +3 -4
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "imone/LLaMA2_13B_with_EOT_token",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "swapnice/swapnice-openorcaxopenchat-preview2-13b",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
handler.py CHANGED
@@ -3,17 +3,16 @@ from typing import Dict, List, Any
3
  from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
4
 
5
  # get dtype
6
- dtype = torch.float16
7
 
8
  class EndpointHandler:
9
  def __init__(self, path=""):
10
  # load the model
11
- tokenizer = LlamaTokenizer.from_pretrained(".", local_files_only=True)
12
  model = LlamaForCausalLM.from_pretrained(
13
- ".",
14
  device_map="auto",
15
  torch_dtype=dtype,
16
- offload_folder="offload"
17
  )
18
  # create inference pipeline
19
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
3
  from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
4
 
5
  # get dtype
6
+ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
7
 
8
  class EndpointHandler:
9
  def __init__(self, path=""):
10
  # load the model
11
+ tokenizer = LlamaTokenizer.from_pretrained("/repository/tokenizer", local_files_only=True)
12
  model = LlamaForCausalLM.from_pretrained(
13
+ "/repository/pytorch_model",
14
  device_map="auto",
15
  torch_dtype=dtype,
 
16
  )
17
  # create inference pipeline
18
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)