Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Runtime error

App Files Files Community

Luigi commited on Apr 10

Commit

c9fd924

1 Parent(s): f7a541f

open web search settgins to user

Browse files

Files changed (1) hide show

app.py +12 -4

app.py CHANGED Viewed

@@ -102,6 +102,7 @@ MODELS = {
     },
 }
 # ----- Sidebar settings -----
 with st.sidebar:
     st.header("⚙️ Settings")
@@ -114,6 +115,10 @@ with st.sidebar:
     repeat_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.1)
     enable_search = st.checkbox("Enable Web Search", value=False)
 # ---- Define selected model and manage its download/load ----
 selected_model = MODELS[selected_model_name]
 model_path = os.path.join("models", selected_model["filename"])
@@ -123,7 +128,7 @@ def try_load_model(path):
     try:
         return Llama(
             model_path=path,
-            n_ctx=2048,  # Reduced context window
             n_threads=2,
             n_threads_batch=1,
             n_batch=256,
@@ -194,12 +199,15 @@ if user_input:
         st.session_state.chat_history.append({"role": "user", "content": user_input})
         st.session_state.pending_response = True
-        # Optionally retrieve extra context
-        retrieved_context = retrieve_context(user_input, max_results=6, max_chars_per_result=600) if enable_search else ""
         st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
         st.sidebar.text(retrieved_context or "No context found.")
-        # Build augmented query
         if enable_search and retrieved_context:
             augmented_user_input = (
                 f"{system_prompt_base.strip()}\n\n"

     },
 }
+# ----- Sidebar settings -----
 # ----- Sidebar settings -----
 with st.sidebar:
     st.header("⚙️ Settings")
     repeat_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.1)
     enable_search = st.checkbox("Enable Web Search", value=False)
+    # NEW SETTINGS: Expose search configuration
+    max_results = st.number_input("Max Results for Context", min_value=1, max_value=20, value=6, step=1)
+    max_chars_per_result = st.number_input("Max Chars Per Result", min_value=100, max_value=2000, value=600, step=50)
 # ---- Define selected model and manage its download/load ----
 selected_model = MODELS[selected_model_name]
 model_path = os.path.join("models", selected_model["filename"])
     try:
         return Llama(
             model_path=path,
+            n_ctx=4096,  # Reduced context window
             n_threads=2,
             n_threads_batch=1,
             n_batch=256,
         st.session_state.chat_history.append({"role": "user", "content": user_input})
         st.session_state.pending_response = True
+        # Use the new settings when retrieving web search context
+        retrieved_context = (
+            retrieve_context(user_input, max_results=max_results, max_chars_per_result=max_chars_per_result)
+            if enable_search else ""
+        )
         st.sidebar.markdown("### Retrieved Context" if enable_search else "Web Search Disabled")
         st.sidebar.text(retrieved_context or "No context found.")
+        # Build augmented query as before...
         if enable_search and retrieved_context:
             augmented_user_input = (
                 f"{system_prompt_base.strip()}\n\n"