hackergeek98 commited on
Commit
e5a2042
·
verified ·
1 Parent(s): e9027a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -44
app.py CHANGED
@@ -1,52 +1,56 @@
1
  import gradio as gr
2
- import torch
3
- from peft import PeftModel
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from huggingface_hub import login
6
- import os
7
-
8
- # Get token from environment (automatically loaded from secrets)
9
- hf_token = os.getenv("gemma3")
10
- login(hf_token)
11
-
12
- # Load tokenizer
13
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-pt")
14
-
15
- # Load base model on CPU with optimizations
16
- base_model = AutoModelForCausalLM.from_pretrained(
17
- "google/gemma-3-1b-pt",
18
- torch_dtype=torch.bfloat16, # Efficient memory usage
19
- low_cpu_mem_usage=True
20
- )
21
-
22
- # Load fine-tuned model
23
- model = PeftModel.from_pretrained(base_model, "hackergeek98/gemma-finetuned")
24
- model = model.to("cpu") # Ensure it runs on CPU
25
-
26
- # Chatbot function
27
- def chat(message, history=[]):
28
- messages = [{"role": "user", "content": message}]
29
-
30
- input_ids = tokenizer(message, return_tensors="pt").input_ids.to("cpu")
31
-
32
- with torch.no_grad(): # Disable gradient calculations for efficiency
33
- output_ids = model.generate(input_ids, max_length=100)
34
-
35
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
36
-
37
- history.append((message, response)) # Store conversation history
38
- return history, history
39
-
40
- # Gradio UI
 
 
 
41
  demo = gr.ChatInterface(
42
- chat,
43
- chatbot=gr.Chatbot(height=400),
44
  additional_inputs=[
45
- gr.Textbox(value="Welcome to the chatbot!", label="System message")
 
 
 
46
  ],
47
- title="Fine-Tuned Gemma Chatbot",
48
- description="This chatbot is fine-tuned on Persian text using Gemma.",
49
  )
50
 
 
51
  if __name__ == "__main__":
52
  demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ client = InferenceClient("hackergeek98/gemma-finetuned")
5
+
6
+ def respond(
7
+ message,
8
+ history: list[tuple[str, str]],
9
+ system_message,
10
+ max_tokens,
11
+ temperature,
12
+ top_p,
13
+ ):
14
+ # Preparing the messages list
15
+ messages = [{"role": "system", "content": system_message}]
16
+
17
+ # Adding conversation history
18
+ for val in history:
19
+ if val[0]:
20
+ messages.append({"role": "user", "content": val[0]})
21
+ if val[1]:
22
+ messages.append({"role": "assistant", "content": val[1]})
23
+
24
+ # Adding the new user message
25
+ messages.append({"role": "user", "content": message})
26
+
27
+ # Initialize the response string
28
+ response = ""
29
+
30
+ # Corrected method for chat completion
31
+ for message in client.chat_completion(
32
+ messages=messages, # Argument should be named 'messages'
33
+ max_tokens=max_tokens,
34
+ temperature=temperature,
35
+ top_p=top_p,
36
+ stream=True # Stream the response
37
+ ):
38
+ # Accumulate the response from the streaming output
39
+ token = message.choices[0].delta.content
40
+ response += token
41
+ yield response
42
+
43
+ # Gradio interface setup
44
  demo = gr.ChatInterface(
45
+ respond,
 
46
  additional_inputs=[
47
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
48
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
49
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
50
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
51
  ],
 
 
52
  )
53
 
54
+ # Run the app
55
  if __name__ == "__main__":
56
  demo.launch()