Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from threading import Thread
|
|
| 8 |
|
| 9 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained("soketlabs/pragna-1b", token=os.environ.get('HF_TOKEN'))
|
| 11 |
-
model = AutoModelForCausalLM.from_pretrained("soketlabs/pragna-1b", token=os.environ.get('HF_TOKEN'))
|
| 12 |
|
| 13 |
# using CUDA for an optimal experience
|
| 14 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
@@ -38,7 +38,7 @@ def predict(message, history):
|
|
| 38 |
generate_kwargs = dict(
|
| 39 |
model_inputs,
|
| 40 |
streamer=streamer,
|
| 41 |
-
max_new_tokens=
|
| 42 |
do_sample=True,
|
| 43 |
top_p=0.95,
|
| 44 |
top_k=50,
|
|
|
|
| 8 |
|
| 9 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained("soketlabs/pragna-1b", token=os.environ.get('HF_TOKEN'))
|
| 11 |
+
model = AutoModelForCausalLM.from_pretrained("soketlabs/pragna-1b-it-v0.1", token=os.environ.get('HF_TOKEN'))
|
| 12 |
|
| 13 |
# using CUDA for an optimal experience
|
| 14 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
| 38 |
generate_kwargs = dict(
|
| 39 |
model_inputs,
|
| 40 |
streamer=streamer,
|
| 41 |
+
max_new_tokens=300,
|
| 42 |
do_sample=True,
|
| 43 |
top_p=0.95,
|
| 44 |
top_k=50,
|