Update app.py
Browse files
app.py
CHANGED
|
@@ -110,6 +110,55 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
|
|
| 110 |
llm = None
|
| 111 |
llm_model = None
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def respond(
|
| 114 |
message: str,
|
| 115 |
history: List[Tuple[str, str]],
|
|
@@ -150,14 +199,16 @@ def respond(
|
|
| 150 |
flash_attn=False,
|
| 151 |
n_gpu_layers=0,
|
| 152 |
n_batch=8,
|
| 153 |
-
n_ctx=
|
| 154 |
n_threads=2,
|
| 155 |
n_threads_batch=2,
|
| 156 |
)
|
| 157 |
llm_model = model
|
| 158 |
provider = LlamaCppPythonProvider(llm)
|
| 159 |
|
| 160 |
-
|
|
|
|
|
|
|
| 161 |
|
| 162 |
retriever_system="""
|
| 163 |
You are an AI assistant that answers questions based on documents provided by the user. Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
|
|
@@ -239,7 +290,7 @@ demo = gr.ChatInterface(
|
|
| 239 |
value="You are a helpful assistant.",
|
| 240 |
label="System Prompt",
|
| 241 |
info="Define the AI assistant's personality and behavior",
|
| 242 |
-
lines=2,
|
| 243 |
),
|
| 244 |
gr.Slider(
|
| 245 |
minimum=512,
|
|
|
|
| 110 |
llm = None
|
| 111 |
llm_model = None
|
| 112 |
|
| 113 |
+
|
| 114 |
+
query_system = """
|
| 115 |
+
You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
|
| 116 |
+
The goal is to identify the most important keywords for a search engine.
|
| 117 |
+
|
| 118 |
+
Here are some examples:
|
| 119 |
+
|
| 120 |
+
User Question: What is transformer?
|
| 121 |
+
Search Query: transformer
|
| 122 |
+
|
| 123 |
+
User Question: How does a transformer model work in natural language processing?
|
| 124 |
+
Search Query: transformer model natural language processing
|
| 125 |
+
|
| 126 |
+
User Question: What are the advantages of using transformers over recurrent neural networks?
|
| 127 |
+
Search Query: transformer vs recurrent neural network advantages
|
| 128 |
+
|
| 129 |
+
User Question: Explain the attention mechanism in transformers.
|
| 130 |
+
Search Query: transformer attention mechanism
|
| 131 |
+
|
| 132 |
+
User Question: What are the different types of transformer architectures?
|
| 133 |
+
Search Query: transformer architectures
|
| 134 |
+
|
| 135 |
+
User Question: What is the history of the transformer model?
|
| 136 |
+
Search Query: transformer model history
|
| 137 |
+
"""
|
| 138 |
+
def to_query(provider,message):
|
| 139 |
+
try:
|
| 140 |
+
agent = LlamaCppAgent(
|
| 141 |
+
provider,
|
| 142 |
+
system_prompt=f"{query_system}",
|
| 143 |
+
predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
|
| 144 |
+
debug_output=True,
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
settings = provider.get_provider_default_settings()
|
| 149 |
+
messages = BasicChatHistory()
|
| 150 |
+
result = agent.get_chat_response(
|
| 151 |
+
message,
|
| 152 |
+
llm_sampling_settings=settings,
|
| 153 |
+
chat_history=messages,
|
| 154 |
+
returns_streaming_generator=False,
|
| 155 |
+
print_output=False,
|
| 156 |
+
)
|
| 157 |
+
return result
|
| 158 |
+
except Exception as e:
|
| 159 |
+
# Custom exception handling
|
| 160 |
+
raise CustomExceptionHandling(e, sys) from e
|
| 161 |
+
|
| 162 |
def respond(
|
| 163 |
message: str,
|
| 164 |
history: List[Tuple[str, str]],
|
|
|
|
| 199 |
flash_attn=False,
|
| 200 |
n_gpu_layers=0,
|
| 201 |
n_batch=8,
|
| 202 |
+
n_ctx=4096,
|
| 203 |
n_threads=2,
|
| 204 |
n_threads_batch=2,
|
| 205 |
)
|
| 206 |
llm_model = model
|
| 207 |
provider = LlamaCppPythonProvider(llm)
|
| 208 |
|
| 209 |
+
query = to_query(provider,message)
|
| 210 |
+
|
| 211 |
+
text = retriever_tool(query=f"{query}")
|
| 212 |
|
| 213 |
retriever_system="""
|
| 214 |
You are an AI assistant that answers questions based on documents provided by the user. Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
|
|
|
|
| 290 |
value="You are a helpful assistant.",
|
| 291 |
label="System Prompt",
|
| 292 |
info="Define the AI assistant's personality and behavior",
|
| 293 |
+
lines=2,visible=False
|
| 294 |
),
|
| 295 |
gr.Slider(
|
| 296 |
minimum=512,
|