Spaces:

cheberle
/

testtrain

Runtime error

App Files Files Community

cheberle commited on Jan 17

Commit

bf07e8f

1 Parent(s): 4d35d17

f

Browse files

Files changed (2) hide show

app.py +27 -41
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,74 +1,60 @@
 import gradio as gr
-from datasets import Dataset
-from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
 import pandas as pd
-from huggingface_hub import login
 import torch
 def train_model(file, hf_token):
     try:
-        # Login to Hugging Face
-        if not hf_token:
-            return "Please provide a Hugging Face token"
-        login(hf_token)
-        # Load and prepare data
         df = pd.read_csv(file.name)
-        dataset = Dataset.from_pandas(df)
-        # Model setup - force CPU
         model_name = "facebook/opt-125m"
-        device_map = "cpu"  # Force CPU usage
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map=device_map,
-            torch_dtype=torch.float32  # Use float32 for CPU
         )
-        # Training configuration
-        training_args = TrainingArguments(
             output_dir="./results",
-            num_train_epochs=3,
-            per_device_train_batch_size=1,  # Reduced for CPU
-            learning_rate=3e-5,
-            save_strategy="epoch",
-            push_to_hub=True,
-            hub_token=hf_token,
-            no_cuda=True,  # Force CPU usage
-            report_to="none"  # Disable wandb logging
         )
-        # Initialize trainer
         trainer = Trainer(
             model=model,
-            args=training_args,
             train_dataset=dataset,
             tokenizer=tokenizer
         )
-        # Run training
-        trainer.train()
-        # Push to hub
-        model.push_to_hub(f"cheberle/product-classifier-{pd.Timestamp.now().strftime('%Y%m%d')}")
-        return "Training completed successfully!"
     except Exception as e:
-        return f"Error occurred: {str(e)}"
-# Create Gradio interface
 demo = gr.Interface(
     fn=train_model,
     inputs=[
-        gr.File(label="Upload your CSV file"),
-        gr.Textbox(label="Hugging Face Token", type="password")
     ],
     outputs="text",
-    title="Product Classifier Training",
-    description="Upload your CSV data to train a product classifier model on CPU."
 )
 if __name__ == "__main__":
-    demo.launch(share=False)

 import gradio as gr
 import pandas as pd
+from datasets import Dataset
+from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM
 import torch
+print("CUDA available:", torch.cuda.is_available())
+print("Device:", torch.device('cpu'))
 def train_model(file, hf_token):
     try:
+        # Basic data loading test
         df = pd.read_csv(file.name)
+        print(f"Loaded CSV with {len(df)} rows")
+        # Load tokenizer and model
         model_name = "facebook/opt-125m"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map=None,  # Force simple device mapping
+            low_cpu_mem_usage=True
         )
+        model = model.to('cpu')  # Explicitly move to CPU
+        # Basic dataset creation
+        dataset = Dataset.from_pandas(df)
+        args = TrainingArguments(
             output_dir="./results",
+            per_device_train_batch_size=1,
+            num_train_epochs=1,
+            no_cuda=True,
+            local_rank=-1
         )
         trainer = Trainer(
             model=model,
+            args=args,
             train_dataset=dataset,
             tokenizer=tokenizer
         )
+        return f"Setup successful! Loaded {len(df)} rows"
     except Exception as e:
+        return f"Error: {str(e)}\nType: {type(e)}"
 demo = gr.Interface(
     fn=train_model,
     inputs=[
+        gr.File(label="Upload CSV file"),
+        gr.Textbox(label="HF Token", type="password")
     ],
     outputs="text",
+    title="Product Classifier Training (CPU)",
 )
 if __name__ == "__main__":
+    demo.launch(debug=True)  # Enable debug mode

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ transformers==4.37.2
 torch==2.1.2
 datasets==2.16.1
 pandas==2.2.0

 torch==2.1.2
 datasets==2.16.1
 pandas==2.2.0
+huggingface-hub==0.27.0