kodetr commited on
Commit
3571b0e
·
verified ·
1 Parent(s): 004e137

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -20
app.py CHANGED
@@ -8,7 +8,7 @@ from threading import Thread
8
 
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
- MODEL_ID = "kodetr/stunting-qa-v4"
12
  MODELS = os.environ.get("MODELS")
13
 
14
  TITLE = "<h1><center>KONSULTASI STUNTING</center></h1>"
@@ -51,32 +51,60 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
51
 
52
  print(f"Conversation is -\n{conversation}")
53
 
54
- input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
55
- inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1
 
 
 
56
 
57
- streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
58
-
59
- generate_kwargs = dict(
60
- inputs,
61
- streamer=streamer,
62
- top_k=top_k,
63
- top_p=top_p,
64
- repetition_penalty=penalty,
65
- max_new_tokens=max_new_tokens,
66
- do_sample=True,
67
  temperature=temperature,
68
- pad_token_id=128000,
69
- eos_token_id=[128001,128008,128009],
 
70
  )
71
-
72
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
73
- thread.start()
74
 
 
 
75
  buffer = ""
76
- for new_text in streamer:
77
- buffer += new_text
78
  yield buffer
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
  chatbot = gr.Chatbot(height=600)
 
8
 
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
+ MODEL_ID = "kodetr/stunting-qa-v5"
12
  MODELS = os.environ.get("MODELS")
13
 
14
  TITLE = "<h1><center>KONSULTASI STUNTING</center></h1>"
 
51
 
52
  print(f"Conversation is -\n{conversation}")
53
 
54
+ # ------- use model stunting V5 -------
55
+ terminators = [
56
+ text_pipeline.tokenizer.eos_token_id,
57
+ text_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
58
+ ]
59
 
60
+ # Hasil dari pipeline akan berupa list dengan dictionary berisi text
61
+ outputs = text_pipeline(
62
+ conversation,
63
+ max_new_tokens=max_new_tokens,
64
+ eos_token_id=terminators,
65
+ do_sample=True,
 
 
 
 
66
  temperature=temperature,
67
+ top_p=top_p,
68
+ top_k=top_k,
69
+ repetition_penalty=penalty
70
  )
 
 
 
71
 
72
+ # Karena pipeline tidak support streaming per token, kita bisa stream per kalimat atau per paragraf
73
+ full_text = outputs[0]["generated_text"]
74
  buffer = ""
75
+ for part in full_text.split(". "): # Stream berdasarkan kalimat
76
+ buffer += part.strip() + ". "
77
  yield buffer
78
 
79
+ # ------- use model stunting V6 -------
80
+
81
+
82
+ # input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
83
+ # inputs = tokenizer(input_ids, return_tensors="pt").to(0) #gpu 0, cpu 1
84
+
85
+ # streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
86
+
87
+ # generate_kwargs = dict(
88
+ # inputs,
89
+ # streamer=streamer,
90
+ # top_k=top_k,
91
+ # top_p=top_p,
92
+ # repetition_penalty=penalty,
93
+ # max_new_tokens=max_new_tokens,
94
+ # do_sample=True,
95
+ # temperature=temperature,
96
+ # pad_token_id=128000,
97
+ # eos_token_id=[128001,128008,128009],
98
+ # )
99
+
100
+ # thread = Thread(target=model.generate, kwargs=generate_kwargs)
101
+ # thread.start()
102
+
103
+ # buffer = ""
104
+ # for new_text in streamer:
105
+ # buffer += new_text
106
+ # yield buffer
107
+
108
 
109
 
110
  chatbot = gr.Chatbot(height=600)