Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -33,12 +33,26 @@ h3 {
|
|
33 |
}
|
34 |
"""
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
@spaces.GPU
|
44 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
@@ -51,7 +65,10 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
51 |
|
52 |
print(f"Conversation is -\n{conversation}")
|
53 |
|
54 |
-
|
|
|
|
|
|
|
55 |
terminators = [
|
56 |
text_pipeline.tokenizer.eos_token_id,
|
57 |
text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
@@ -75,9 +92,10 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
75 |
for part in full_text.split(". "): # Stream berdasarkan kalimat
|
76 |
buffer += part.strip() + ". "
|
77 |
yield buffer
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
81 |
|
82 |
# input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
|
83 |
# inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1
|
|
|
33 |
}
|
34 |
"""
|
35 |
|
36 |
+
# -------------------------------------
|
37 |
+
# ------- use model stunting V5 -------
|
38 |
+
# -------------------------------------
|
39 |
+
|
40 |
+
text_pipeline = pipeline(
|
41 |
+
"text-generation",
|
42 |
+
model=MODEL_ID,
|
43 |
+
model_kwargs={"torch_dtype": torch.bfloat16},
|
44 |
+
device_map="auto",
|
45 |
+
)
|
46 |
+
# -------------------------------------
|
47 |
+
# ------- use model stunting V6 -------
|
48 |
+
# -------------------------------------
|
49 |
+
|
50 |
+
# model = AutoModelForCausalLM.from_pretrained(
|
51 |
+
# MODEL_ID,
|
52 |
+
# torch_dtype=torch.bfloat16,
|
53 |
+
# device_map="auto",
|
54 |
+
# )
|
55 |
+
# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
56 |
|
57 |
@spaces.GPU
|
58 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
|
|
65 |
|
66 |
print(f"Conversation is -\n{conversation}")
|
67 |
|
68 |
+
# -------------------------------------
|
69 |
+
# ------- use model stunting V5 -------
|
70 |
+
# -------------------------------------
|
71 |
+
|
72 |
terminators = [
|
73 |
text_pipeline.tokenizer.eos_token_id,
|
74 |
text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
92 |
for part in full_text.split(". "): # Stream berdasarkan kalimat
|
93 |
buffer += part.strip() + ". "
|
94 |
yield buffer
|
95 |
+
|
96 |
+
# -------------------------------------
|
97 |
+
# ------- use model stunting V6 -------
|
98 |
+
# -------------------------------------
|
99 |
|
100 |
# input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
|
101 |
# inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1
|