michailroussos commited on
Commit
1960c65
·
1 Parent(s): 9764582

more changes to work with our model

Browse files
Files changed (2) hide show
  1. app.py +18 -41
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
 
5
- client = AutoModelForCausalLM.from_pretrained("michailroussos/model-mistral_CP1250", torch_dtype=torch.float16)
6
- tokenizer = AutoTokenizer.from_pretrained("michailroussos/model-mistral_CP1250")
 
 
 
 
7
 
8
  def respond(
9
  message,
@@ -13,44 +16,19 @@ def respond(
13
  temperature,
14
  top_p,
15
  ):
16
- messages = [{"role": "system", "content": system_message}]
17
-
18
- for val in history:
19
- if val[0]:
20
- messages.append({"role": "user", "content": val[0]})
21
- if val[1]:
22
- messages.append({"role": "assistant", "content": val[1]})
23
-
24
- messages.append({"role": "user", "content": message})
25
-
26
- # Convert messages to prompt
27
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
28
-
29
  # Generate response
30
- inputs = tokenizer(prompt, return_tensors="pt").to(client.device)
 
 
 
 
 
31
 
32
- response = ""
33
- for _ in range(max_tokens):
34
- with torch.no_grad():
35
- outputs = client.generate(
36
- inputs.input_ids,
37
- max_new_tokens=1,
38
- temperature=temperature,
39
- top_p=top_p,
40
- do_sample=True,
41
- )
42
-
43
- new_token = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
44
- response += new_token
45
-
46
- inputs = tokenizer(inputs.input_ids.tolist()[0] + outputs[0][inputs.input_ids.shape[1]:].tolist(), return_tensors="pt")
47
-
48
- yield response
49
 
50
-
51
- """
52
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
53
- """
54
  demo = gr.ChatInterface(
55
  respond,
56
  additional_inputs=[
@@ -67,6 +45,5 @@ demo = gr.ChatInterface(
67
  ],
68
  )
69
 
70
-
71
  if __name__ == "__main__":
72
- demo.launch()
 
1
  import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
 
3
 
4
+ # Use ctransformers for GGUF models
5
+ client = AutoModelForCausalLM.from_pretrained(
6
+ "michailroussos/model-mistral_CP1250",
7
+ model_type='mistral',
8
+ gpu_layers=0 # Set to 0 for CPU, or appropriate number for GPU
9
+ )
10
 
11
  def respond(
12
  message,
 
16
  temperature,
17
  top_p,
18
  ):
19
+ # Combine system message and current message
20
+ full_prompt = f"{system_message}\n{message}"
21
+
 
 
 
 
 
 
 
 
 
 
22
  # Generate response
23
+ response = client(
24
+ full_prompt,
25
+ max_new_tokens=max_tokens,
26
+ temperature=temperature,
27
+ top_p=top_p
28
+ )
29
 
30
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
32
  demo = gr.ChatInterface(
33
  respond,
34
  additional_inputs=[
 
45
  ],
46
  )
47
 
 
48
  if __name__ == "__main__":
49
+ demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  huggingface_hub==0.25.2
2
  transformers==4.47.0
3
- torch
 
 
 
1
  huggingface_hub==0.25.2
2
  transformers==4.47.0
3
+ torch
4
+ ctransformers
5
+ gradio