Spaces:

alibayram
/

usta-llm-demo

Sleeping

App Files Files Community

alibayram commited on Jun 10

Commit

8d6020c

1 Parent(s): 8d4b0c7

space update

Browse files

Files changed (2) hide show

app.py +8 -10
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from v1.usta_tokenizer import UstaTokenizer
 def load_model():
     try:
         u_tokenizer = UstaTokenizer("v1/tokenizer.json")
         # Model parameters - adjust these to match your trained model
         context_length = 32
@@ -27,11 +28,13 @@ def load_model():
             context_length=context_length,
             num_layers=num_layers
         )
         # Load the trained weights if available
         model_path = "v1/u_model.pth"
         if not os.path.exists(model_path):
             # Download the model file from GitHub
             try:
                 print("📥 Downloading model weights from GitHub...")
@@ -39,10 +42,12 @@ def load_model():
                 url = "https://github.com/malibayram/llm-from-scratch/raw/main/u_model.pth"
                 response = requests.get(url)
                 response.raise_for_status()  # Raise an exception for bad status codes
                 # Create v1 directory if it doesn't exist
                 os.makedirs("v1", exist_ok=True)
                 with open(model_path, "wb") as f:
                     f.write(response.content)
                 print("✅ Model weights downloaded successfully!")
@@ -52,7 +57,7 @@ def load_model():
         if os.path.exists(model_path):
             try:
-                u_model.load_state_dict(torch.load(model_path, map_location="cpu"))
                 u_model.eval()
                 print("✅ Model weights loaded successfully!")
             except Exception as e:
@@ -150,14 +155,7 @@ demo = gr.ChatInterface(
         ),
     ],
     title="🤖 Usta Model Chat",
-    description="Chat with a custom transformer language model built from scratch! This model specializes in geographical knowledge including countries, capitals, and cities.",
-    examples=[
-        "the capital of france",
-        "tell me about spain",
-        "what is the capital of united states",
-        "paris is in",
-        "germany and its capital"
-    ]
 )
 if __name__ == "__main__":

 def load_model():
     try:
         u_tokenizer = UstaTokenizer("v1/tokenizer.json")
+        print("✅ Tokenizer loaded successfully! vocab size:", len(u_tokenizer.vocab))
         # Model parameters - adjust these to match your trained model
         context_length = 32
             context_length=context_length,
             num_layers=num_layers
         )
+        print("✅ Model loaded successfully! vocab size:", len(u_model.vocab))
         # Load the trained weights if available
         model_path = "v1/u_model.pth"
         if not os.path.exists(model_path):
+            print("❌ Model file not found at", model_path)
             # Download the model file from GitHub
             try:
                 print("📥 Downloading model weights from GitHub...")
                 url = "https://github.com/malibayram/llm-from-scratch/raw/main/u_model.pth"
                 response = requests.get(url)
                 response.raise_for_status()  # Raise an exception for bad status codes
+                print("✅ Model weights downloaded successfully!")
                 # Create v1 directory if it doesn't exist
                 os.makedirs("v1", exist_ok=True)
+                # Save the model weights to the local file system
                 with open(model_path, "wb") as f:
                     f.write(response.content)
                 print("✅ Model weights downloaded successfully!")
         if os.path.exists(model_path):
             try:
+                u_model.load_state_dict(torch.load(model_path, map_location="cpu", weights_only=False))
                 u_model.eval()
                 print("✅ Model weights loaded successfully!")
             except Exception as e:
         ),
     ],
     title="🤖 Usta Model Chat",
+    description="Chat with a custom transformer language model built from scratch! This model specializes in geographical knowledge including countries, capitals, and cities."
 )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 torch>=2.7.1
 requests>=2.32.4

+gradio>=5.33.1
 torch>=2.7.1
 requests>=2.32.4