Autofilling / app.py
Michael Brunzel
Switch to production
3b05c47
raw
history blame
2.36 kB
import gradio as gr
import os
from dotenv import load_dotenv
from text_generation import Client
load_dotenv()
API_TOKEN = os.environ.get("API_TOKEN", None)
MODEL_URL = os.environ.get("MODEL_URL", None)
def evaluate(hotel_request: str, **kwargs):
hf_client = Client(
MODEL_URL,
headers={"Authorization": f"Bearer {API_TOKEN}"},
)
stream = hf_client.generate_stream(
hotel_request,
**kwargs,
)
for response in stream:
output += response.token.text
yield output
return output
gr.Interface(
fn=evaluate,
inputs=[
# gr.components.Textbox(
# lines=2,
# label="Instruction",
# placeholder="Tell me about alpacas.",
# ),
gr.components.Textbox(lines=2, label="Input", placeholder="Request for the Hotel"),
# gr.components.Slider(
# minimum=0, maximum=1, value=0.1, label="Temperature"
# ),
# gr.components.Slider(
# minimum=0, maximum=1, value=0.75, label="Top p"
# ),
# gr.components.Slider(
# minimum=0, maximum=100, step=1, value=40, label="Top k"
# ),
# gr.components.Slider(
# minimum=1, maximum=4, step=1, value=4, label="Beams"
# ),
# gr.components.Slider(
# minimum=1, maximum=2000, step=1, value=128, label="Max tokens"
# ),
# gr.components.Checkbox(label="Stream output"),
],
outputs=[
gr.inputs.Textbox(
lines=1,
label="Guest Name",
),
gr.inputs.Textbox(
lines=1,
label="Hotel",
),
gr.inputs.Textbox(
lines=1,
label="Location",
),
gr.inputs.Textbox(
lines=1,
label="Date",
)
],
allow_flagging="never",
title="Falcon-LoRA",
description="Falcon-LoRA is a 1B-parameter LLM finetuned to follow instructions. It is trained on the [Hotel Requests](https://huggingface.co/datasets/MichaelAI23/hotel_requests) dataset.", # noqa: E501
).queue().launch() #server_name="0.0.0.0", server_port=8080)