yuzhe commited on
Commit
719a57d
·
verified ·
1 Parent(s): 904e875

Delete handler.py

Browse files
Files changed (1) hide show
  1. handler.py +0 -48
handler.py DELETED
@@ -1,48 +0,0 @@
1
- from transformers import (
2
- AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
3
- )
4
- import torch, os
5
-
6
- MODEL_ID = "Qwen/Qwen3-32B" # 换成自己的模型
7
-
8
- def get_model():
9
- # ① 先试 bfloat16,A100/H100 都原生支持
10
- return AutoModelForCausalLM.from_pretrained(
11
- MODEL_ID,
12
- torch_dtype=torch.bfloat16,
13
- device_map="auto", # TGI 同款逻辑,自动分片
14
- low_cpu_mem_usage=True, # 先在 CPU 建图,再流式拷到 GPU
15
- trust_remote_code=True
16
- )
17
-
18
- # ---- 如果 bfloat16 仍 OOM,可改成 4-bit 量化 ----
19
- # bnb_cfg = BitsAndBytesConfig(
20
- # load_in_4bit=True,
21
- # bnb_4bit_quant_type="nf4",
22
- # bnb_4bit_use_double_quant=True,
23
- # )
24
- # def get_model():
25
- # return AutoModelForCausalLM.from_pretrained(
26
- # MODEL_ID,
27
- # device_map="auto",
28
- # quantization_config=bnb_cfg,
29
- # trust_remote_code=True
30
- # )
31
-
32
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
33
- model = get_model()
34
- generator = pipeline(
35
- "text-generation",
36
- model=model,
37
- tokenizer=tokenizer,
38
- device_map="auto",
39
- torch_dtype=getattr(model, "dtype", torch.bfloat16),
40
- )
41
-
42
- def __init__(self, *args, **kwargs):
43
- pass
44
-
45
- def __call__(self, data):
46
- prompt = data.get("inputs") if isinstance(data, dict) else data
47
- outputs = generator(prompt, max_new_tokens=256)
48
- return outputs