Vendor-contract-extractor

Running

App Files Files Community

Jyothikamalesh commited on Jan 13

Commit

d3a5649

verified ·

1 Parent(s): 2efa6f5

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -85

app.py CHANGED Viewed

@@ -1,98 +1,51 @@
 import gradio as gr
-from openai import OpenAI, APIError
-import os
-import tenacity
-import asyncio
-ACCESS_TOKEN = os.getenv("HF_TOKEN")
-client = OpenAI(
-    base_url="https://api-inference.huggingface.co/v1/",
-    api_key=ACCESS_TOKEN,
-)
-# Retry logic with tenacity for handling API rate limits
-@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
-async def respond(
     message,
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
-    try:
-        # Only use the system message and the current message for the response
-        messages = [{"role": "system", "content": system_message},
-                    {"role": "user", "content": message}]
-        response = ""
-        # Properly stream chat completions using dot notation
-        stream = client.chat.completions.create(
-            model="NousResearch/Hermes-3-Llama-3.1-8B",
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
-            messages=messages,
-        )
-        # Stream response and concatenate tokens
-        for chunk in stream:
-            if hasattr(chunk.choices[0].delta, 'content'):
-                token = chunk.choices[0].delta.content
-                response += token
-        return response
-    except APIError as e:
-        # Handle both string and dict types of error bodies
-        error_details = e.body
-        if isinstance(error_details, dict):
-            error_type = error_details.get("type", "Unknown")
-            error_code = error_details.get("code", "Unknown")
-            error_param = error_details.get("param", "Unknown")
-            error_message = error_details.get("message", "An error occurred.")
-            error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
-        else:
-            error_str = f"Error: {error_details}"
-        print(f"APIError: {error_str}")
-        return error_str
-    except Exception as e:
-        print(f"Exception: {e}")
-        return "Error occurred. Please try again."
-# Async Gradio function to handle user input and response generation without history
-async def generate_response(message, system_message, max_tokens, temperature, top_p):
-    response = await respond(message, system_message, max_tokens, temperature, top_p)
-    return response
-def launch_app():
-    try:
-        demo = gr.Blocks()
-        with demo:
-            gr.Markdown("# Chatbot")
-            message = gr.Textbox(label="Message")
-            system_message = gr.Textbox(label="System message")
-            max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
-            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
-            response = gr.Text(label="Response")
-            # Use the async version of generate_response without history
-            gr.Button("Generate Response").click(
-                generate_response,
-                inputs=[message, system_message, max_tokens, temperature, top_p],
-                outputs=[response],
-                show_progress=False,
-            )
-        demo.launch(show_error=True)
-    except KeyError as e:
-        print(f"Error: {e}")
-        print("Please try again.")
 if __name__ == "__main__":
-    launch_app()

 import gradio as gr
+from huggingface_hub import InferenceClient
+# Initialize the InferenceClient with the model name
+client = InferenceClient("NousResearch/Hermes-3-Llama-3.1-8B")
+def respond(
     message,
+    history,
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
+    # Create a list of messages with the system message and user input
+    messages = [{"role": "system", "content": system_message}, {"role": "user", "content": message}]
+    # Get the response from the model
+    response = client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=False,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    # Return the response
+    return response.choices[0].message.content
+# Create a ChatInterface with the respond function and additional inputs
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
+)
 if __name__ == "__main__":
+    demo.launch()