Vendor-contract-extractor

Running

File size: 3,453 Bytes

47fcff2
2efa6f5
79cade0
2efa6f5
 
79cade0
0c5007d
79cade0
a5bb25c
f4f89be
a5bb25c
 
 
2efa6f5
 
 
79cade0
 
 
 
 
 
2efa6f5
 
 
 
f4f89be
2efa6f5
 
 
 
 
 
 
 
 
 
f4f89be
2efa6f5
 
 
 
 
f4f89be
2efa6f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5bb25c
47fcff2
2efa6f5

import gradio as gr
from openai import OpenAI, APIError
import os
import tenacity
import asyncio

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

# Retry logic with tenacity for handling API rate limits
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5))
async def respond(
    message,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    try:
        # Only use the system message and the current message for the response
        messages = [{"role": "system", "content": system_message},
                    {"role": "user", "content": message}]

        response = ""
        # Properly stream chat completions using dot notation
        stream = client.chat.completions.create(
            model="NousResearch/Hermes-3-Llama-3.1-8B",
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            messages=messages,
        )

        # Stream response and concatenate tokens
        for chunk in stream:
            if hasattr(chunk.choices[0].delta, 'content'):
                token = chunk.choices[0].delta.content
                response += token

        return response

    except APIError as e:
        # Handle both string and dict types of error bodies
        error_details = e.body
        if isinstance(error_details, dict):
            error_type = error_details.get("type", "Unknown")
            error_code = error_details.get("code", "Unknown")
            error_param = error_details.get("param", "Unknown")
            error_message = error_details.get("message", "An error occurred.")
            error_str = f"{error_type}: {error_message} (code: {error_code}, param: {error_param})"
        else:
            error_str = f"Error: {error_details}"

        print(f"APIError: {error_str}")
        return error_str

    except Exception as e:
        print(f"Exception: {e}")
        return "Error occurred. Please try again."


# Async Gradio function to handle user input and response generation without history
async def generate_response(message, system_message, max_tokens, temperature, top_p):
    response = await respond(message, system_message, max_tokens, temperature, top_p)
    return response


def launch_app():
    try:
        demo = gr.Blocks()
        with demo:
            gr.Markdown("# Chatbot")
            message = gr.Textbox(label="Message")
            system_message = gr.Textbox(label="System message")
            max_tokens = gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens")
            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
            response = gr.Text(label="Response")

            # Use the async version of generate_response without history
            gr.Button("Generate Response").click(
                generate_response,
                inputs=[message, system_message, max_tokens, temperature, top_p],
                outputs=[response],
                show_progress=False,
            )
        demo.launch(show_error=True)
    except KeyError as e:
        print(f"Error: {e}")
        print("Please try again.")

if __name__ == "__main__":
    launch_app()