Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -49,10 +49,12 @@ def install(package):
|
|
49 |
install('accelerate')
|
50 |
MODEL_NAME = "tiiuae/falcon-7b-instruct"
|
51 |
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
54 |
)
|
55 |
-
model = model.eval()
|
56 |
|
57 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
58 |
print(f"Model device: {model.device}")
|
@@ -216,28 +218,31 @@ def get_llama_response(message: str, history: list) -> str:
|
|
216 |
|
217 |
|
218 |
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
241 |
|
242 |
import gradio as gr
|
243 |
|
|
|
49 |
install('accelerate')
|
50 |
MODEL_NAME = "tiiuae/falcon-7b-instruct"
|
51 |
|
52 |
+
llama_pipeline = pipeline(
|
53 |
+
"text-generation",
|
54 |
+
model=model,
|
55 |
+
torch_dtype=torch.float16,
|
56 |
+
device_map="auto",
|
57 |
)
|
|
|
58 |
|
59 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
60 |
print(f"Model device: {model.device}")
|
|
|
218 |
|
219 |
|
220 |
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
|
221 |
+
query = """
|
222 |
+
Answer the question based only on the following context. Dont provide any information out of the context:
|
223 |
+
|
224 |
+
{context}
|
225 |
+
|
226 |
+
---
|
227 |
+
|
228 |
+
Answer the question based on the above context: {question}
|
229 |
+
"""
|
230 |
+
|
231 |
+
|
232 |
+
query=query.format(context=context_text,question=message)
|
233 |
+
|
234 |
+
sequences = llama_pipeline(
|
235 |
+
query,
|
236 |
+
do_sample=True,
|
237 |
+
top_k=10,
|
238 |
+
num_return_sequences=1,
|
239 |
+
eos_token_id=tokenizer.eos_token_id,
|
240 |
+
max_length=1024,
|
241 |
+
)
|
242 |
+
|
243 |
+
generated_text = sequences[0]['generated_text']
|
244 |
+
response = generated_text[len(query):]
|
245 |
+
return response.strip()
|
246 |
|
247 |
import gradio as gr
|
248 |
|