working
Browse files
app.py
CHANGED
@@ -40,7 +40,7 @@ else:
|
|
40 |
joblib.dump(docs_processed, cache_file)
|
41 |
print("Created and saved docs_processed to cache.")
|
42 |
|
43 |
-
class RetrieverTool(
|
44 |
name = "retriever"
|
45 |
description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
|
46 |
inputs = {
|
@@ -52,14 +52,14 @@ class RetrieverTool(Tool):
|
|
52 |
output_type = "string"
|
53 |
|
54 |
def __init__(self, docs, **kwargs):
|
55 |
-
super().__init__(**kwargs)
|
56 |
|
57 |
self.retriever = BM25Retriever.from_documents(
|
58 |
docs,
|
59 |
k=7,
|
60 |
)
|
61 |
|
62 |
-
def
|
63 |
assert isinstance(query, str), "Your search query must be a string"
|
64 |
|
65 |
docs = self.retriever.invoke(
|
@@ -72,6 +72,8 @@ class RetrieverTool(Tool):
|
|
72 |
]
|
73 |
)
|
74 |
|
|
|
|
|
75 |
retriever_tool = RetrieverTool(docs_processed)
|
76 |
# Download gguf model files
|
77 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
@@ -85,7 +87,7 @@ hf_hub_download(
|
|
85 |
t5_size="base"
|
86 |
hf_hub_download(
|
87 |
repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
|
88 |
-
filename=f"flan-t5-{
|
89 |
local_dir="./models",
|
90 |
)
|
91 |
|
@@ -159,6 +161,7 @@ def generate_t5(llama,message):#text size must be smaller than ctx(default=512)
|
|
159 |
return None
|
160 |
|
161 |
|
|
|
162 |
def to_query(question):
|
163 |
system = """
|
164 |
You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
|
@@ -200,6 +203,17 @@ Search Query:
|
|
200 |
return None
|
201 |
|
202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf")->str:
|
204 |
global llm
|
205 |
global llm_model
|
@@ -251,7 +265,9 @@ def respond(
|
|
251 |
if model is None:#
|
252 |
return
|
253 |
|
254 |
-
|
|
|
|
|
255 |
|
256 |
# Create a chat interface
|
257 |
demo = gr.ChatInterface(
|
|
|
40 |
joblib.dump(docs_processed, cache_file)
|
41 |
print("Created and saved docs_processed to cache.")
|
42 |
|
43 |
+
class RetrieverTool():
|
44 |
name = "retriever"
|
45 |
description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
|
46 |
inputs = {
|
|
|
52 |
output_type = "string"
|
53 |
|
54 |
def __init__(self, docs, **kwargs):
|
55 |
+
#super().__init__(**kwargs)
|
56 |
|
57 |
self.retriever = BM25Retriever.from_documents(
|
58 |
docs,
|
59 |
k=7,
|
60 |
)
|
61 |
|
62 |
+
def __call__(self, query: str) -> str:
|
63 |
assert isinstance(query, str), "Your search query must be a string"
|
64 |
|
65 |
docs = self.retriever.invoke(
|
|
|
72 |
]
|
73 |
)
|
74 |
|
75 |
+
|
76 |
+
|
77 |
retriever_tool = RetrieverTool(docs_processed)
|
78 |
# Download gguf model files
|
79 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
|
87 |
t5_size="base"
|
88 |
hf_hub_download(
|
89 |
repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
|
90 |
+
filename=f"flan-t5-{t5_size}.Q8_0.gguf",
|
91 |
local_dir="./models",
|
92 |
)
|
93 |
|
|
|
161 |
return None
|
162 |
|
163 |
|
164 |
+
llama = None
|
165 |
def to_query(question):
|
166 |
system = """
|
167 |
You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
|
|
|
203 |
return None
|
204 |
|
205 |
|
206 |
+
qwen_prompt = """<|im_start|>system
|
207 |
+
You answer questions from the user, always using the context provided as a basis.
|
208 |
+
Write down your reasoning for answering the question, between the <think> and </think> tags.<|im_end|>
|
209 |
+
<|im_start|>user
|
210 |
+
Context:
|
211 |
+
%s
|
212 |
+
Question:
|
213 |
+
%s<|im_end|>
|
214 |
+
<|im_start|>assistant
|
215 |
+
<think>"""
|
216 |
+
|
217 |
def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf")->str:
|
218 |
global llm
|
219 |
global llm_model
|
|
|
265 |
if model is None:#
|
266 |
return
|
267 |
|
268 |
+
query = to_query(message)
|
269 |
+
document = retriever_tool(query=query)
|
270 |
+
return answer(document,message)
|
271 |
|
272 |
# Create a chat interface
|
273 |
demo = gr.ChatInterface(
|