Update app.py
Browse files
app.py
CHANGED
@@ -110,6 +110,55 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
|
|
110 |
llm = None
|
111 |
llm_model = None
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
def respond(
|
114 |
message: str,
|
115 |
history: List[Tuple[str, str]],
|
@@ -150,14 +199,16 @@ def respond(
|
|
150 |
flash_attn=False,
|
151 |
n_gpu_layers=0,
|
152 |
n_batch=8,
|
153 |
-
n_ctx=
|
154 |
n_threads=2,
|
155 |
n_threads_batch=2,
|
156 |
)
|
157 |
llm_model = model
|
158 |
provider = LlamaCppPythonProvider(llm)
|
159 |
|
160 |
-
|
|
|
|
|
161 |
|
162 |
retriever_system="""
|
163 |
You are an AI assistant that answers questions based on documents provided by the user. Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
|
@@ -239,7 +290,7 @@ demo = gr.ChatInterface(
|
|
239 |
value="You are a helpful assistant.",
|
240 |
label="System Prompt",
|
241 |
info="Define the AI assistant's personality and behavior",
|
242 |
-
lines=2,
|
243 |
),
|
244 |
gr.Slider(
|
245 |
minimum=512,
|
|
|
110 |
llm = None
|
111 |
llm_model = None
|
112 |
|
113 |
+
|
114 |
+
query_system = """
|
115 |
+
You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
|
116 |
+
The goal is to identify the most important keywords for a search engine.
|
117 |
+
|
118 |
+
Here are some examples:
|
119 |
+
|
120 |
+
User Question: What is transformer?
|
121 |
+
Search Query: transformer
|
122 |
+
|
123 |
+
User Question: How does a transformer model work in natural language processing?
|
124 |
+
Search Query: transformer model natural language processing
|
125 |
+
|
126 |
+
User Question: What are the advantages of using transformers over recurrent neural networks?
|
127 |
+
Search Query: transformer vs recurrent neural network advantages
|
128 |
+
|
129 |
+
User Question: Explain the attention mechanism in transformers.
|
130 |
+
Search Query: transformer attention mechanism
|
131 |
+
|
132 |
+
User Question: What are the different types of transformer architectures?
|
133 |
+
Search Query: transformer architectures
|
134 |
+
|
135 |
+
User Question: What is the history of the transformer model?
|
136 |
+
Search Query: transformer model history
|
137 |
+
"""
|
138 |
+
def to_query(provider,message):
|
139 |
+
try:
|
140 |
+
agent = LlamaCppAgent(
|
141 |
+
provider,
|
142 |
+
system_prompt=f"{query_system}",
|
143 |
+
predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
|
144 |
+
debug_output=True,
|
145 |
+
)
|
146 |
+
|
147 |
+
|
148 |
+
settings = provider.get_provider_default_settings()
|
149 |
+
messages = BasicChatHistory()
|
150 |
+
result = agent.get_chat_response(
|
151 |
+
message,
|
152 |
+
llm_sampling_settings=settings,
|
153 |
+
chat_history=messages,
|
154 |
+
returns_streaming_generator=False,
|
155 |
+
print_output=False,
|
156 |
+
)
|
157 |
+
return result
|
158 |
+
except Exception as e:
|
159 |
+
# Custom exception handling
|
160 |
+
raise CustomExceptionHandling(e, sys) from e
|
161 |
+
|
162 |
def respond(
|
163 |
message: str,
|
164 |
history: List[Tuple[str, str]],
|
|
|
199 |
flash_attn=False,
|
200 |
n_gpu_layers=0,
|
201 |
n_batch=8,
|
202 |
+
n_ctx=4096,
|
203 |
n_threads=2,
|
204 |
n_threads_batch=2,
|
205 |
)
|
206 |
llm_model = model
|
207 |
provider = LlamaCppPythonProvider(llm)
|
208 |
|
209 |
+
query = to_query(provider,message)
|
210 |
+
|
211 |
+
text = retriever_tool(query=f"{query}")
|
212 |
|
213 |
retriever_system="""
|
214 |
You are an AI assistant that answers questions based on documents provided by the user. Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
|
|
|
290 |
value="You are a helpful assistant.",
|
291 |
label="System Prompt",
|
292 |
info="Define the AI assistant's personality and behavior",
|
293 |
+
lines=2,visible=False
|
294 |
),
|
295 |
gr.Slider(
|
296 |
minimum=512,
|