Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,25 @@ import streamlit as st
|
|
3 |
from phi.assistant import Assistant
|
4 |
from phi.tools.arxiv_toolkit import ArxivToolkit
|
5 |
from huggingface_hub import InferenceClient
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Initialize the Hugging Face Inference Client
|
9 |
raw_client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
|
@@ -37,7 +55,7 @@ if st.button("Search") and query:
|
|
37 |
# Generate response using Zephyr
|
38 |
response = ""
|
39 |
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
|
40 |
-
token = message
|
41 |
response += token
|
42 |
|
43 |
# Search arXiv and parse results
|
|
|
3 |
from phi.assistant import Assistant
|
4 |
from phi.tools.arxiv_toolkit import ArxivToolkit
|
5 |
from huggingface_hub import InferenceClient
|
6 |
+
|
7 |
+
# Define a wrapper for Hugging Face LLM
|
8 |
+
class HuggingFaceLLM:
|
9 |
+
def __init__(self, client):
|
10 |
+
self.client = client
|
11 |
+
|
12 |
+
def chat_completion(self, messages, max_tokens=512, stream=False, temperature=0.7, top_p=0.95):
|
13 |
+
response = self.client.post(
|
14 |
+
payload={
|
15 |
+
"inputs": messages,
|
16 |
+
"parameters": {
|
17 |
+
"max_tokens": max_tokens,
|
18 |
+
"temperature": temperature,
|
19 |
+
"top_p": top_p,
|
20 |
+
},
|
21 |
+
"stream": stream,
|
22 |
+
}
|
23 |
+
)
|
24 |
+
return response
|
25 |
|
26 |
# Initialize the Hugging Face Inference Client
|
27 |
raw_client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
|
|
|
55 |
# Generate response using Zephyr
|
56 |
response = ""
|
57 |
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
|
58 |
+
token = message["choices"][0]["delta"]["content"]
|
59 |
response += token
|
60 |
|
61 |
# Search arXiv and parse results
|