ajsbsd commited on
Commit
72d5687
·
verified ·
1 Parent(s): 188cfcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -11
app.py CHANGED
@@ -98,17 +98,35 @@ def predict_chat(message: str, history: list):
98
  prompt_input += f"Assistant: {msg['content']}\n"
99
  prompt_input += "Assistant:"
100
 
101
- # FIXED: Use max_tokens instead of max_new_tokens for ctransformers
102
- for token in model.generate(
103
- prompt_input,
104
- max_tokens=MAX_NEW_TOKENS, # Changed from max_new_tokens
105
- temperature=TEMPERATURE,
106
- top_k=TOP_K,
107
- top_p=TOP_P,
108
- sample=DO_SAMPLE, # Changed from do_sample
109
- repetition_penalty=1.1,
110
- stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
111
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  generated_text += token
113
  yield generated_text
114
 
 
98
  prompt_input += f"Assistant: {msg['content']}\n"
99
  prompt_input += "Assistant:"
100
 
101
+ # FIXED: Use the correct ctransformers method - call model() directly for streaming
102
+ try:
103
+ for token in model(
104
+ prompt_input,
105
+ max_new_tokens=MAX_NEW_TOKENS,
106
+ temperature=TEMPERATURE,
107
+ top_k=TOP_K,
108
+ top_p=TOP_P,
109
+ do_sample=DO_SAMPLE,
110
+ repetition_penalty=1.1,
111
+ stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"],
112
+ stream=True
113
+ ):
114
+ generated_text += token
115
+ yield generated_text
116
+ except Exception as e:
117
+ print(f"Error in GGUF generation: {e}")
118
+ # Fallback to non-streaming generation
119
+ output = model(
120
+ prompt_input,
121
+ max_new_tokens=MAX_NEW_TOKENS,
122
+ temperature=TEMPERATURE,
123
+ top_k=TOP_K,
124
+ top_p=TOP_P,
125
+ do_sample=DO_SAMPLE,
126
+ repetition_penalty=1.1,
127
+ stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
128
+ )
129
+ yield output
130
  generated_text += token
131
  yield generated_text
132