Spaces:
Sleeping
Sleeping
File size: 4,348 Bytes
9ca7283 c5d274e 9ca7283 c5d274e fa89eeb 9ca7283 fa89eeb c5d274e 9ca7283 c5d274e 9ca7283 fa89eeb 9ca7283 fa89eeb 9ca7283 c5d274e fa89eeb 9ca7283 c5d274e 9ca7283 c5d274e 9ca7283 c5d274e 9ca7283 c5d274e 9ca7283 6c757bd 9ca7283 6c757bd 9ca7283 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("meta-llama/Meta-Llama-3-8B",token=os.getenv('HF_API_TOKEN'))
# ## None type
# def respond(
# message: str,
# history: list[tuple[str, str]], # This will not be used
# system_message: str,
# max_tokens: int,
# temperature: float,
# top_p: float,
# ):
# messages = [{"role": "system", "content": system_message}]
# # Append only the latest user message
# messages.append({"role": "user", "content": message})
# response = ""
# try:
# # Generate response from the model
# for message in client.chat_completion(
# messages,
# max_tokens=max_tokens,
# stream=True,
# temperature=temperature,
# top_p=top_p,
# ):
# if message.choices[0].delta.content is not None:
# token = message.choices[0].delta.content
# response += token
# yield response
# except Exception as e:
# yield f"An error occurred: {e}"
# """
# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
# """
# demo = gr.ChatInterface(
# respond,
# additional_inputs=[
# gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
# gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
# gr.Slider(
# minimum=0.1,
# maximum=1.0,
# value=0.95,
# step=0.05,
# label="Top-p (nucleus sampling)",
# ),
# ],
# )
# if __name__ == "__main__":
# demo.launch()
####19
def respond(
message: str,
history: list[tuple[str, str]], # This will not be used
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
):
# Combine the system message and user input into a single prompt
prompt = f"{system_message}\n{message}"
response = ""
try:
# Generate response from the model using text generation method
for message in client.text_generation(
prompt=prompt,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
):
if message.token is not None:
response += message.token
yield response
except Exception as e:
yield f"An error occurred: {e}"
# Define the Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
# import os
# import gradio as gr
# from huggingface_hub import InferenceClient
# # Initialize the Hugging Face Inference Client
# client = InferenceClient(
# "meta-llama/Meta-Llama-3.1-8B-Instruct",
# token= os.getenv("HF_API_TOKEN"),# Replace with your actual token
# )
# # Define a function to handle the chat input and get a response from the model
# def chat_with_model(user_input):
# # Call the client to get the model's response
# response = ""
# for message in client.chat_completion(
# messages=[{"role": "user", "content": user_input}],
# max_tokens=500,
# stream=True,
# ):
# response += message.choices[0].delta.content
# return response
# # Create a Gradio interface with a chat component
# with gr.Blocks() as demo:
# chatbot = gr.Chatbot()
# with gr.Row():
# txt = gr.Textbox(show_label=False, placeholder="Type your message here...")
# txt.submit(chat_with_model, inputs=txt, outputs=chatbot)
# demo.launch()
|