nazimali commited on
Commit
d8ce0fe
·
verified ·
1 Parent(s): cbc504a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -44
app.py CHANGED
@@ -1,62 +1,77 @@
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
27
 
28
- response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
 
61
 
62
  if __name__ == "__main__":
 
1
+ import spaces
2
+ import torch
3
+
4
  import gradio as gr
5
+ from huggingface_hub import snapshot_download
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
7
 
8
+
9
+ model = None
10
+ model_id = "nazimali/Mistral-Nemo-Kurdish-Instruct"
11
+
12
+ infer_prompt = """Li jêr rêwerzek heye ku peywirek rave dike, bi têketinek ku çarçoveyek din peyda dike ve tê hev kirin. Bersivek ku daxwazê ​​bi guncan temam dike binivîsin.
13
+ ### Telîmat:
14
+ {}
15
+ ### Têketin:
16
+ {}
17
+ ### Bersiv:
18
  """
19
+
20
+ snapshot_download("nazimali/Mistral-Nemo-Kurdish")
21
+ snapshot_download(repo_id=model_id)
22
 
23
 
24
+ @spaces.GPU
25
  def respond(
26
  message,
27
  history: list[tuple[str, str]],
 
 
 
 
28
  ):
29
+ global model
30
 
31
+ if model is None:
32
+ bnb_config = BitsAndBytesConfig(
33
+ load_in_4bit=True,
34
+ bnb_4bit_use_double_quant=True,
35
+ bnb_4bit_quant_type="nf4",
36
+ bnb_4bit_compute_dtype=torch.bfloat16,
37
+ )
38
 
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ model_id,
41
+ quantization_config=bnb_config,
42
+ device_map="auto",
43
+ )
44
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
45
 
46
+ model.eval()
47
 
48
+ prompt = infer_prompt.format("tu arîkarek alîkar î", message)
 
 
 
 
 
 
 
49
 
50
+ input_ids = tokenizer(
51
+ prompt,
52
+ return_tensors="pt",
53
+ add_special_tokens=False,
54
+ return_token_type_ids=False,
55
+ ).to("cuda")
56
 
57
+ with torch.inference_mode():
58
+ generated_ids = model.generate(
59
+ **input_ids,
60
+ max_new_tokens=120,
61
+ do_sample=True,
62
+ temperature=0.7,
63
+ top_p=0.7,
64
+ num_return_sequences=1,
65
+ pad_token_id=tokenizer.pad_token_id,
66
+ eos_token_id=tokenizer.eos_token_id,
67
+ )
68
+
69
+ decoded_output = tokenizer.batch_decode(generated_ids)[0]
70
+
71
+ return decoded_output.replace(prompt, "").replace("</s>", "")
72
+
73
+
74
+ demo = gr.ChatInterface(respond, examples=["سڵاو ئەلیکوم، چۆنیت؟", "Selam alikum, tu çawa yî?"], title="Mistral Nemo Kurdish Instruct")
75
 
76
 
77
  if __name__ == "__main__":