ElapticAI-1a-chat

Sleeping

App Files Files Community

elapt1c commited on Jan 22

Commit

b8b1c07

verified ·

1 Parent(s): 807d739

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -21

app.py CHANGED Viewed

@@ -1,21 +1,155 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("elapt1c/ElapticAI-1a")
-model = AutoModelForCausalLM.from_pretrained("elapt1c/ElapticAI-1a")
-# Define a function for chatbot interaction
-def chat(user_input):
-    input_ids = tokenizer(user_input, return_tensors="pt").input_ids
-    output = model.generate(input_ids)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    return response
-# Start the chatbot loop
-print("Chatbot ready. Type 'exit' to quit.")
-while True:
-    user_input = input("You: ")
-    if user_input.lower() == 'exit':
-        break
-    response = chat(user_input)
-    print(f"Bot: {response}")

+import os
+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoConfig
+import torch.nn as nn
+# ----- Model Definition -----
+class CustomDialoGPT(nn.Module):
+    def __init__(self, vocab_size, n_embd=768, n_head=12, n_layer=12):
+        super().__init__()
+        config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium",
+            vocab_size=vocab_size,
+            n_embd=n_embd,
+            n_head=n_head,
+            n_layer=n_layer,
+            bos_token_id=50256,
+            eos_token_id=50256,
+            pad_token_id = 50256
+        )
+        self.transformer = AutoModelForCausalLM.from_config(config)
+        self.lm_head = nn.Linear(n_embd, vocab_size, bias=False)
+    def forward(self, input_ids):
+        transformer_outputs = self.transformer(input_ids=input_ids, output_hidden_states=True)
+        hidden_states = transformer_outputs.hidden_states[-1] #get last hidden state
+        logits = self.lm_head(hidden_states)
+        return logits
+    def get_num_params(self):
+        return sum(p.numel() for p in self.parameters())
+def build_model(vocab_size, target_params=128_000_000):
+    """Build a model with around the target parameter count, ensuring n_embd is divisible by n_head."""
+    n_embd_options = [512, 768, 1024]
+    n_head_options = [8, 12, 16]
+    n_layer_options = [6, 8, 12, 16]
+    best_params_diff = float('inf')
+    best_n_embd = 0
+    best_n_head = 0
+    best_n_layer = 0
+    for n_embd in n_embd_options:
+      for n_head in n_head_options:
+          if n_embd % n_head != 0:
+              continue
+          for n_layer in n_layer_options:
+              model = CustomDialoGPT(vocab_size, n_embd, n_head, n_layer)
+              params = model.get_num_params()
+              params_diff = abs(params - target_params)
+              if params_diff < best_params_diff:
+                  best_params_diff = params_diff
+                  best_n_embd = n_embd
+                  best_n_head = n_head
+                  best_n_layer = n_layer
+              del model
+    print("Model parameters:", best_n_embd, best_n_head, best_n_layer)
+    return CustomDialoGPT(vocab_size, best_n_embd, best_n_head, best_n_layer)
+def chat_with_model(user_input, model, tokenizer, device="cpu", max_length=100, temperature=0.8):
+    """
+    Generates a response from the model given the user input.
+    """
+    input_ids = tokenizer.encode(user_input, return_tensors='pt').to(device)
+    with torch.no_grad():
+        output = model.transformer.generate(
+            inputs=input_ids,
+            max_length=max_length,
+            temperature=temperature,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return generated_text
+def load_model_and_tokenizer(model_repo, device):
+    """Loads the model and tokenizer from the Hugging Face model repo."""
+    try:
+        # Check if running in hugging face
+        if 'HF_MODEL_ID' in os.environ:
+            # Load tokenizer
+            tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+            vocab_size = len(tokenizer)
+            model = build_model(vocab_size)
+            #Load model and optimizer
+            checkpoint_files = [f for f in os.listdir(".") if f.endswith('.pth')]
+            if not checkpoint_files:
+                print("No checkpoint found. Please train the model first.")
+                return
+            checkpoint_path = checkpoint_files[0]
+            checkpoint = torch.load(checkpoint_path, map_location=device)
+            model.load_state_dict(checkpoint['model_state_dict'])
+            model.to(device)
+            model.eval()
+            print(f"Model loaded on device: {device}")
+        else:
+            # Load tokenizer
+            tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+            vocab_size = len(tokenizer)
+            model = build_model(vocab_size)
+            #Load model and optimizer
+            checkpoint_path = input("Enter the path to your .pth checkpoint file: ")
+            checkpoint = torch.load(checkpoint_path, map_location=device)
+            model.load_state_dict(checkpoint['model_state_dict'])
+            model.to(device)
+            model.eval()
+            print(f"Model loaded on device: {device}")
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error loading model or tokenizer: {e}")
+        return None, None
+def gradio_chat(model, tokenizer, device="cpu", max_length = 100, temperature = 0.8):
+  """Defines the gradio chatbot interaction."""
+  def respond(message, chat_history):
+    bot_message = chat_with_model(message, model, tokenizer, device=device, max_length=max_length, temperature = temperature)
+    chat_history.append((message, bot_message))
+    return "", chat_history
+  with gr.Blocks() as demo:
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox()
+        clear = gr.Button("Clear")
+        msg.submit(respond, [msg, chatbot], [msg, chatbot])
+        clear.click(lambda: None, None, chatbot, queue=False)
+  return demo
+if __name__ == "__main__":
+    # Load model and tokenizer (downloading from hugging face model repo).
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    model, tokenizer = load_model_and_tokenizer("elapt1c/ElapticAI-1a", device = device)
+    if model and tokenizer:
+      #launch the gradio interface.
+      demo = gradio_chat(model, tokenizer, device = device)
+      demo.launch()