beingcognitive commited on
Commit
37e7b97
Β·
1 Parent(s): 461910a

which model would work?

Browse files
Files changed (1) hide show
  1. app.py +32 -42
app.py CHANGED
@@ -2,67 +2,58 @@ import os
2
  from datetime import datetime
3
  import uuid
4
  import gradio as gr
5
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
  import torch
7
  from huggingface_hub import login
8
- from threading import Thread
9
 
10
  from dotenv import load_dotenv
11
-
12
  # Load environment variables
13
  load_dotenv()
14
 
15
- # Get the Hugging Face token from environment variables
16
- hf_token = os.getenv("HUGGINGFACE_TOKEN")
17
 
18
  # Load model and tokenizer
19
- model_name = "google/gemma-2-2b-it"
 
 
 
 
20
 
21
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
22
- model = AutoModelForCausalLM.from_pretrained(
23
- model_name,
24
- torch_dtype=torch.float16,
25
- device_map="auto",
26
- token=hf_token
27
- )
28
 
29
  def chat_with_model(messages):
30
  # Prepare the input
31
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
32
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
33
 
34
  # Generate response
35
- streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
36
- generation_kwargs = dict(
37
- inputs,
38
- max_new_tokens=1000,
39
- temperature=0.7,
40
- do_sample=True,
41
- streamer=streamer,
42
- )
43
-
44
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
45
- thread.start()
46
-
47
- return streamer
48
 
49
  def chat_with_model_gradio(message, history, session_id):
50
- system_message = f"λ„ˆμ˜ 이름은 ChatMBTI. μ‚¬λžŒλ“€μ˜ MBTIμœ ν˜•μ— μ•Œλ§žμ€ 상담을 진행할 수 μžˆμ–΄. μƒλŒ€λ°©μ˜ MBTI μœ ν˜•μ„ λ¨Όμ € 물어보고, κ·Έ μœ ν˜•μ— μ•Œλ§žκ²Œ 상담을 μ§„ν–‰ν•΄μ€˜. 참고둜 ν˜„μž¬ μ‹œκ°μ€ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}이야."
51
-
52
  messages = [
53
- # {"role": "system", "content": f"λ„ˆμ˜ 이름은 ChatMBTI. μ‚¬λžŒλ“€μ˜ MBTIμœ ν˜•μ— μ•Œλ§žμ€ 상담을 진행할 수 μžˆμ–΄. μƒλŒ€λ°©μ˜ MBTI μœ ν˜•μ„ λ¨Όμ € 물어보고, κ·Έ μœ ν˜•μ— μ•Œλ§žκ²Œ 상담을 μ§„ν–‰ν•΄μ€˜. 참고둜 ν˜„μž¬ μ‹œκ°μ€ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}이야."},
54
- {"role": "user", "content": system_message},
55
- {"role": "assistant", "content": "μ•ˆλ…•ν•˜μ„Έμš”? ChatMBTIμž…λ‹ˆλ‹€. 였늘 ν•˜λ£¨ μ–΄λ– μ…¨λ‚˜μš”?"},
56
  ]
57
- messages.extend([{"role": "user" if i % 2 == 0 else "assistant", "content": m} for i, (m, _) in enumerate(history)])
58
  messages.append({"role": "user", "content": message})
59
 
60
- streamer = chat_with_model(messages)
61
-
62
- partial_message = ""
63
- for new_token in streamer:
64
- partial_message += new_token
65
- yield "", history + [(message, partial_message)]
66
 
67
  def main():
68
  session_id = str(uuid.uuid4())
@@ -74,8 +65,7 @@ def main():
74
  msg.submit(chat_with_model_gradio, [msg, chatbot, gr.State(session_id)], [msg, chatbot])
75
  clear.click(lambda: None, None, chatbot, queue=False)
76
 
77
- demo.queue()
78
  demo.launch()
79
 
80
  if __name__ == "__main__":
81
- main()
 
2
  from datetime import datetime
3
  import uuid
4
  import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
  import torch
7
  from huggingface_hub import login
 
8
 
9
  from dotenv import load_dotenv
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
+ # Authenticate with Hugging Face
14
+ login(token=os.getenv("HUGGINGFACE_TOKEN"))
15
 
16
  # Load model and tokenizer
17
+ model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=True)
20
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", token=True)
21
+
22
 
23
+ # Set pad_token_id if it's not already set
24
+ if tokenizer.pad_token_id is None:
25
+ tokenizer.pad_token_id = tokenizer.eos_token_id
 
 
 
 
26
 
27
  def chat_with_model(messages):
28
  # Prepare the input
29
+ input_ids = tokenizer.encode(str(messages), return_tensors="pt").to(model.device)
30
+ attention_mask = torch.ones_like(input_ids)
31
 
32
  # Generate response
33
+ with torch.no_grad():
34
+ output = model.generate(
35
+ input_ids,
36
+ attention_mask=attention_mask,
37
+ max_length=1000,
38
+ num_return_sequences=1,
39
+ temperature=0.7,
40
+ pad_token_id=tokenizer.pad_token_id
41
+ )
42
+
43
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
44
+ return response
 
45
 
46
  def chat_with_model_gradio(message, history, session_id):
 
 
47
  messages = [
48
+ {"role": "system", "content": f"λ„ˆμ˜ 이름은 ChatMBTI. μ‚¬λžŒλ“€μ˜ MBTIμœ ν˜•μ— μ•Œλ§žμ€ 상담을 진행할 수 μžˆμ–΄. μƒλŒ€λ°©μ˜ MBTI μœ ν˜•μ„ λ¨Όμ € 물어보고, κ·Έ μœ ν˜•μ— μ•Œλ§žκ²Œ 상담을 μ§„ν–‰ν•΄μ€˜. 참고둜 ν˜„μž¬ μ‹œκ°μ€ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}이야."},
 
 
49
  ]
50
+ messages.extend([{"role": "user" if i % 2 == 0 else "assistant", "content": m} for i, m in enumerate(history)])
51
  messages.append({"role": "user", "content": message})
52
 
53
+ response = chat_with_model(messages)
54
+ history.append((message, response))
55
+
56
+ return "", history
 
 
57
 
58
  def main():
59
  session_id = str(uuid.uuid4())
 
65
  msg.submit(chat_with_model_gradio, [msg, chatbot, gr.State(session_id)], [msg, chatbot])
66
  clear.click(lambda: None, None, chatbot, queue=False)
67
 
 
68
  demo.launch()
69
 
70
  if __name__ == "__main__":
71
+ main()