elapt1c commited on
Commit
b8b1c07
·
verified ·
1 Parent(s): 807d739

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -21
app.py CHANGED
@@ -1,21 +1,155 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
-
3
- # Load the tokenizer and model
4
- tokenizer = AutoTokenizer.from_pretrained("elapt1c/ElapticAI-1a")
5
- model = AutoModelForCausalLM.from_pretrained("elapt1c/ElapticAI-1a")
6
-
7
- # Define a function for chatbot interaction
8
- def chat(user_input):
9
- input_ids = tokenizer(user_input, return_tensors="pt").input_ids
10
- output = model.generate(input_ids)
11
- response = tokenizer.decode(output[0], skip_special_tokens=True)
12
- return response
13
-
14
- # Start the chatbot loop
15
- print("Chatbot ready. Type 'exit' to quit.")
16
- while True:
17
- user_input = input("You: ")
18
- if user_input.lower() == 'exit':
19
- break
20
- response = chat(user_input)
21
- print(f"Bot: {response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import AutoTokenizer, AutoConfig
5
+ import torch.nn as nn
6
+
7
+ # ----- Model Definition -----
8
+ class CustomDialoGPT(nn.Module):
9
+ def __init__(self, vocab_size, n_embd=768, n_head=12, n_layer=12):
10
+ super().__init__()
11
+
12
+ config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium",
13
+ vocab_size=vocab_size,
14
+ n_embd=n_embd,
15
+ n_head=n_head,
16
+ n_layer=n_layer,
17
+ bos_token_id=50256,
18
+ eos_token_id=50256,
19
+ pad_token_id = 50256
20
+ )
21
+ self.transformer = AutoModelForCausalLM.from_config(config)
22
+ self.lm_head = nn.Linear(n_embd, vocab_size, bias=False)
23
+
24
+ def forward(self, input_ids):
25
+ transformer_outputs = self.transformer(input_ids=input_ids, output_hidden_states=True)
26
+ hidden_states = transformer_outputs.hidden_states[-1] #get last hidden state
27
+ logits = self.lm_head(hidden_states)
28
+ return logits
29
+
30
+ def get_num_params(self):
31
+ return sum(p.numel() for p in self.parameters())
32
+
33
+ def build_model(vocab_size, target_params=128_000_000):
34
+ """Build a model with around the target parameter count, ensuring n_embd is divisible by n_head."""
35
+
36
+ n_embd_options = [512, 768, 1024]
37
+ n_head_options = [8, 12, 16]
38
+ n_layer_options = [6, 8, 12, 16]
39
+
40
+ best_params_diff = float('inf')
41
+ best_n_embd = 0
42
+ best_n_head = 0
43
+ best_n_layer = 0
44
+
45
+ for n_embd in n_embd_options:
46
+ for n_head in n_head_options:
47
+ if n_embd % n_head != 0:
48
+ continue
49
+ for n_layer in n_layer_options:
50
+ model = CustomDialoGPT(vocab_size, n_embd, n_head, n_layer)
51
+ params = model.get_num_params()
52
+ params_diff = abs(params - target_params)
53
+
54
+ if params_diff < best_params_diff:
55
+ best_params_diff = params_diff
56
+ best_n_embd = n_embd
57
+ best_n_head = n_head
58
+ best_n_layer = n_layer
59
+
60
+ del model
61
+ print("Model parameters:", best_n_embd, best_n_head, best_n_layer)
62
+
63
+ return CustomDialoGPT(vocab_size, best_n_embd, best_n_head, best_n_layer)
64
+
65
+ def chat_with_model(user_input, model, tokenizer, device="cpu", max_length=100, temperature=0.8):
66
+ """
67
+ Generates a response from the model given the user input.
68
+ """
69
+ input_ids = tokenizer.encode(user_input, return_tensors='pt').to(device)
70
+
71
+ with torch.no_grad():
72
+ output = model.transformer.generate(
73
+ inputs=input_ids,
74
+ max_length=max_length,
75
+ temperature=temperature,
76
+ pad_token_id=tokenizer.eos_token_id,
77
+ )
78
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
79
+ return generated_text
80
+
81
+ def load_model_and_tokenizer(model_repo, device):
82
+ """Loads the model and tokenizer from the Hugging Face model repo."""
83
+ try:
84
+ # Check if running in hugging face
85
+ if 'HF_MODEL_ID' in os.environ:
86
+ # Load tokenizer
87
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
88
+ vocab_size = len(tokenizer)
89
+
90
+ model = build_model(vocab_size)
91
+
92
+ #Load model and optimizer
93
+ checkpoint_files = [f for f in os.listdir(".") if f.endswith('.pth')]
94
+ if not checkpoint_files:
95
+ print("No checkpoint found. Please train the model first.")
96
+ return
97
+
98
+ checkpoint_path = checkpoint_files[0]
99
+
100
+ checkpoint = torch.load(checkpoint_path, map_location=device)
101
+ model.load_state_dict(checkpoint['model_state_dict'])
102
+
103
+ model.to(device)
104
+ model.eval()
105
+ print(f"Model loaded on device: {device}")
106
+ else:
107
+ # Load tokenizer
108
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
109
+ vocab_size = len(tokenizer)
110
+
111
+ model = build_model(vocab_size)
112
+
113
+ #Load model and optimizer
114
+ checkpoint_path = input("Enter the path to your .pth checkpoint file: ")
115
+
116
+ checkpoint = torch.load(checkpoint_path, map_location=device)
117
+ model.load_state_dict(checkpoint['model_state_dict'])
118
+
119
+ model.to(device)
120
+ model.eval()
121
+ print(f"Model loaded on device: {device}")
122
+ return model, tokenizer
123
+ except Exception as e:
124
+ print(f"Error loading model or tokenizer: {e}")
125
+ return None, None
126
+
127
+
128
+ def gradio_chat(model, tokenizer, device="cpu", max_length = 100, temperature = 0.8):
129
+ """Defines the gradio chatbot interaction."""
130
+ def respond(message, chat_history):
131
+ bot_message = chat_with_model(message, model, tokenizer, device=device, max_length=max_length, temperature = temperature)
132
+ chat_history.append((message, bot_message))
133
+ return "", chat_history
134
+
135
+ with gr.Blocks() as demo:
136
+ chatbot = gr.Chatbot()
137
+ msg = gr.Textbox()
138
+ clear = gr.Button("Clear")
139
+
140
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
141
+ clear.click(lambda: None, None, chatbot, queue=False)
142
+
143
+ return demo
144
+
145
+
146
+ if __name__ == "__main__":
147
+ # Load model and tokenizer (downloading from hugging face model repo).
148
+ device = "cuda" if torch.cuda.is_available() else "cpu"
149
+ print(f"Using device: {device}")
150
+
151
+ model, tokenizer = load_model_and_tokenizer("elapt1c/ElapticAI-1a", device = device)
152
+ if model and tokenizer:
153
+ #launch the gradio interface.
154
+ demo = gradio_chat(model, tokenizer, device = device)
155
+ demo.launch()