File size: 2,835 Bytes
1ecdaca
f4a7d4e
1ecdaca
 
f4a7d4e
 
 
 
 
 
 
1ecdaca
f4a7d4e
 
 
 
 
 
 
1ecdaca
 
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
 
 
1ecdaca
f4a7d4e
1ecdaca
 
f4a7d4e
 
 
 
 
 
 
 
 
1ecdaca
 
 
f4a7d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ecdaca
f4a7d4e
 
 
 
 
1ecdaca
 
 
f4a7d4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
 
 
 
 
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
1ecdaca
f4a7d4e
 
 
1ecdaca
f4a7d4e
 
 
 
 
1ecdaca
 
f4a7d4e
1ecdaca
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import gradio as gr

from huggingface_hub import InferenceClient

from transformers import AutoTokenizer, AutoModelForCausalLM

import torch

# Initialize the InferenceClient

client = InferenceClient("01-ai/Yi-Coder-9B-Chat")

# Initialize tokenizer and model

model_path = "01-ai/Yi-Coder-9B-Chat"  # Make sure this is correct

tokenizer = AutoTokenizer.from_pretrained(model_path)

model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()

def respond(

    message,

    history: list[tuple[str, str]],

    system_message,

    max_tokens,

    temperature,

    top_p,

    use_local_model: bool,

):

    messages = [{"role": "system", "content": system_message}]

    for user, assistant in history:

        if user:

            messages.append({"role": "user", "content": user})

        if assistant:

            messages.append({"role": "assistant", "content": assistant})

    messages.append({"role": "user", "content": message})

    if use_local_model:

        # Use local model

        input_ids = tokenizer.encode("".join([m["content"] for m in messages]), return_tensors="pt")

        input_ids = input_ids.to(model.device)

        

        with torch.no_grad():

            output = model.generate(

                input_ids,

                max_new_tokens=max_tokens,

                temperature=temperature,

                top_p=top_p,

                do_sample=True,

                pad_token_id=tokenizer.eos_token_id,

            )

        

        response = tokenizer.decode(output[0], skip_special_tokens=True)

        yield response

    else:

        # Use Hugging Face Inference API

        response = ""

        for message in client.text_generation(

            "".join([m["content"] for m in messages]),

            max_new_tokens=max_tokens,

            stream=True,

            temperature=temperature,

            top_p=top_p,

        ):

            response += message

            yield response

# Create Gradio interface

demo = gr.ChatInterface(

    respond,

    additional_inputs=[

        gr.Textbox(value="Odpowiadasz w Jezyku Polskim jesteś Coder/Developer/Programista tworzysz pełny kod..", label="System message"),

        gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),

        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),

        gr.Slider(

            minimum=0.1,

            maximum=1.0,

            value=0.95,

            step=0.05,

            label="Top-p (nucleus sampling)",

        ),

        gr.Checkbox(label="Use Local Model", value=False),

    ],

    title="Advanced Chat Interface",

    description="Chat with an AI model using either the Hugging Face Inference API or a local model.",

)

if name == "__main__":

    demo.launch()