File size: 2,054 Bytes
58d9279
 
 
ec03c70
 
58d9279
f56245e
 
 
 
0451106
 
58d9279
 
 
 
 
e82a10b
58d9279
 
 
 
 
52e622e
58d9279
 
 
 
18160e9
58d9279
 
 
 
 
 
 
0451106
 
f56245e
 
 
e82a10b
 
 
 
 
 
 
45de1a4
e82a10b
 
 
45de1a4
 
4505847
 
45de1a4
 
 
4505847
e82a10b
 
 
 
45de1a4
 
 
e82a10b
 
 
45de1a4
 
4505847
 
e82a10b
 
 
45de1a4
 
 
 
 
 
 
e82a10b
45de1a4
e82a10b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
# Inference

import gradio as gr

app = gr.load(
    "google/gemma-2-2b-it",
    src = "models",
    inputs = [gr.Textbox(label = "Input")],
    outputs = [gr.Textbox(label = "Output")],
    title = "Google Gemma",
    description = "Inference",
    examples = [
        ["Hello, World."]
    ]
).launch()
"""
"""
# Pipeline

import gradio as gr
from transformers import pipeline

pipe = pipeline(model = "google/gemma-2-2b-it")

def fn(input):
    output = pipe(
        input,
        max_new_tokens = 2048
    )
    return output[0]["generated_text"]#[len(input):]

app = gr.Interface(
    fn = fn,
    inputs = [gr.Textbox(label = "Input")],
    outputs = [gr.Textbox(label = "Output")],
    title = "Google Gemma",
    description = "Pipeline",
    examples = [
        ["Hello, World."]
    ]
).launch()
"""

import gradio as gr
from huggingface_hub import InferenceClient
import os

# Initialize Hugging Face Inference Client
hf_token = os.getenv("HF_TOKEN")
client = InferenceClient(api_key=hf_token)

# Function to handle user inputs and fetch model responses
def chatbot(input_text, history=[]):
    #messages = [{"role": "user", "content": input_text}]
    messages = []
    for user_input, bot_response in history:
        messages.append({"role": "user", "content": user_input})
        messages.append({"role": "assistant", "content": bot_response})
    messages.append({"role": "user", "content": input_text})

    stream = client.chat.completions.create(
        model="google/gemma-2-2b-it", 
        messages=messages, 
        #temperature=0.5,
        #max_tokens=2048,
        #top_p=0.7,
        stream=True
    )

    # Concatenate streamed response
    bot_response = "".join(chunk.choices[0].delta.content for chunk in stream)

    # Update conversation history
    history.append((input_text, bot_response))
    return bot_response, history

# Gradio Interface
demo = gr.Interface(
    fn=chatbot, 
    inputs=["text", "state"], 
    outputs=["text", "state"], 
    title="Gemma Chatbot"
)

# Launch Gradio App
demo.launch()