Spaces:

mohamedrasheq
/

DeepSeek-R1-FTCD

Sleeping

App Files Files Community

MRasheq commited on Jan 30

Commit

57880bf

1 Parent(s): 9716d05

Second Commit

Browse files

Files changed (1) hide show

app.py +13 -29

app.py CHANGED Viewed

@@ -20,10 +20,10 @@ from peft import (
 )
 from datetime import datetime
-# Constants - Modified for HF Spaces
-MODEL_NAME = "deepseek-ai/DeepSeek-R1"
-OUTPUT_DIR = "/tmp/finetuned_models"  # Using /tmp for HF Spaces
-LOGS_DIR = "/tmp/training_logs"       # Using /tmp for HF Spaces
 class TrainingInterface:
     def __init__(self):
@@ -32,14 +32,12 @@ class TrainingInterface:
         self.is_training = False
     def get_database_url(self):
-        """Get database URL from HF Space secrets"""
         database_url = os.environ.get('DATABASE_URL')
         if not database_url:
             raise Exception("DATABASE_URL not found in environment variables")
         return database_url
     def fetch_training_data(self, progress=gr.Progress()):
-        """Fetch training data from database"""
         try:
             database_url = self.get_database_url()
             engine = create_engine(database_url)
@@ -60,7 +58,6 @@ class TrainingInterface:
             raise gr.Error(f"Database error: {str(e)}")
     def prepare_training_data(self, df, progress=gr.Progress()):
-        """Convert DataFrame into training format"""
         formatted_data = []
         try:
             total_rows = len(df)
@@ -71,7 +68,7 @@ class TrainingInterface:
                 text = str(row_data['text']).strip()
                 if chunk_id and text:
-                    formatted_text = f"User: {chunk_id}\nAssistant: {text}"
                     formatted_data.append({"text": formatted_text})
             if not formatted_data:
@@ -82,7 +79,6 @@ class TrainingInterface:
             raise gr.Error(f"Data preparation error: {str(e)}")
     def stop_training(self):
-        """Stop the training process"""
         self.is_training = False
         return "Training stopped by user."
@@ -93,17 +89,14 @@ class TrainingInterface:
         batch_size=4,
         progress=gr.Progress()
     ):
-        """Main training function"""
         try:
             self.is_training = True
-            # Create directories in /tmp for HF Spaces
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             specific_output_dir = os.path.join(OUTPUT_DIR, f"run_{timestamp}")
             os.makedirs(specific_output_dir, exist_ok=True)
             os.makedirs(LOGS_DIR, exist_ok=True)
-            # Data preparation
             progress(0.1, desc="Fetching data...")
             if not self.is_training:
                 return "Training cancelled."
@@ -111,32 +104,27 @@ class TrainingInterface:
             df = self.fetch_training_data()
             formatted_data = self.prepare_training_data(df)
-            # Model initialization
             progress(0.2, desc="Loading model...")
             if not self.is_training:
                 return "Training cancelled."
-            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
-                trust_remote_code=True,
                 torch_dtype=torch.float16,
                 load_in_8bit=True,
-                device_map="auto"  # Important for HF Spaces GPU allocation
             )
-            # LoRA configuration
             progress(0.3, desc="Setting up LoRA...")
             if not self.is_training:
                 return "Training cancelled."
             lora_config = LoraConfig(
                 r=16,
                 lora_alpha=32,
-                target_modules=[
-                    "q_proj", "k_proj", "v_proj", "o_proj",
-                    "gate_proj", "up_proj", "down_proj"
-                ],
                 lora_dropout=0.05,
                 bias="none",
                 task_type="CAUSAL_LM"
@@ -145,7 +133,6 @@ class TrainingInterface:
             model = prepare_model_for_kbit_training(model)
             model = get_peft_model(model, lora_config)
-            # Training setup
             progress(0.4, desc="Configuring training...")
             if not self.is_training:
                 return "Training cancelled."
@@ -161,9 +148,9 @@ class TrainingInterface:
                 logging_dir=os.path.join(LOGS_DIR, f"run_{timestamp}"),
                 logging_steps=10,
                 save_strategy="epoch",
-                evaluation_strategy="epoch",
                 save_total_limit=2,
-                remove_unused_columns=False,  # Important for HF Spaces
             )
             dataset = Dataset.from_dict({
@@ -175,7 +162,6 @@ class TrainingInterface:
                 mlm=False
             )
-            # Custom progress callback
             class ProgressCallback(gr.Progress):
                 def __init__(self, progress_callback, training_interface):
                     self.progress_callback = progress_callback
@@ -210,7 +196,6 @@ class TrainingInterface:
             if not self.is_training:
                 return "Training cancelled."
-            # Save model
             progress(0.9, desc="Saving model...")
             trainer.save_model()
             tokenizer.save_pretrained(specific_output_dir)
@@ -223,11 +208,10 @@ class TrainingInterface:
             raise gr.Error(f"Training error: {str(e)}")
 def create_training_interface():
-    """Create Gradio interface"""
     interface = TrainingInterface()
-    with gr.Blocks(title="DeepSeek Model Training Interface") as app:
-        gr.Markdown("# DeepSeek Model Fine-tuning Interface")
         with gr.Row():
             with gr.Column():

 )
 from datetime import datetime
+# Changed to a model that doesn't require flash-attention
+MODEL_NAME = "deepseek-ai/deepseek-coder-6.7b-base"
+OUTPUT_DIR = "/tmp/finetuned_models"
+LOGS_DIR = "/tmp/training_logs"
 class TrainingInterface:
     def __init__(self):
         self.is_training = False
     def get_database_url(self):
         database_url = os.environ.get('DATABASE_URL')
         if not database_url:
             raise Exception("DATABASE_URL not found in environment variables")
         return database_url
     def fetch_training_data(self, progress=gr.Progress()):
         try:
             database_url = self.get_database_url()
             engine = create_engine(database_url)
             raise gr.Error(f"Database error: {str(e)}")
     def prepare_training_data(self, df, progress=gr.Progress()):
         formatted_data = []
         try:
             total_rows = len(df)
                 text = str(row_data['text']).strip()
                 if chunk_id and text:
+                    formatted_text = f"Question: {chunk_id}\nAnswer: {text}"  # Changed format for deepseek-coder
                     formatted_data.append({"text": formatted_text})
             if not formatted_data:
             raise gr.Error(f"Data preparation error: {str(e)}")
     def stop_training(self):
         self.is_training = False
         return "Training stopped by user."
         batch_size=4,
         progress=gr.Progress()
     ):
         try:
             self.is_training = True
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             specific_output_dir = os.path.join(OUTPUT_DIR, f"run_{timestamp}")
             os.makedirs(specific_output_dir, exist_ok=True)
             os.makedirs(LOGS_DIR, exist_ok=True)
             progress(0.1, desc="Fetching data...")
             if not self.is_training:
                 return "Training cancelled."
             df = self.fetch_training_data()
             formatted_data = self.prepare_training_data(df)
             progress(0.2, desc="Loading model...")
             if not self.is_training:
                 return "Training cancelled."
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
             model = AutoModelForCausalLM.from_pretrained(
                 MODEL_NAME,
                 torch_dtype=torch.float16,
                 load_in_8bit=True,
+                device_map="auto"
             )
             progress(0.3, desc="Setting up LoRA...")
             if not self.is_training:
                 return "Training cancelled."
+            # Updated LoRA config for deepseek-coder model
             lora_config = LoraConfig(
                 r=16,
                 lora_alpha=32,
+                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
                 lora_dropout=0.05,
                 bias="none",
                 task_type="CAUSAL_LM"
             model = prepare_model_for_kbit_training(model)
             model = get_peft_model(model, lora_config)
             progress(0.4, desc="Configuring training...")
             if not self.is_training:
                 return "Training cancelled."
                 logging_dir=os.path.join(LOGS_DIR, f"run_{timestamp}"),
                 logging_steps=10,
                 save_strategy="epoch",
+                evaluation_strategy="no",  # Changed to "no" since we don't have eval data
                 save_total_limit=2,
+                remove_unused_columns=False,
             )
             dataset = Dataset.from_dict({
                 mlm=False
             )
             class ProgressCallback(gr.Progress):
                 def __init__(self, progress_callback, training_interface):
                     self.progress_callback = progress_callback
             if not self.is_training:
                 return "Training cancelled."
             progress(0.9, desc="Saving model...")
             trainer.save_model()
             tokenizer.save_pretrained(specific_output_dir)
             raise gr.Error(f"Training error: {str(e)}")
 def create_training_interface():
     interface = TrainingInterface()
+    with gr.Blocks(title="DeepSeek Coder Training Interface") as app:
+        gr.Markdown("# DeepSeek Coder Fine-tuning Interface")
         with gr.Row():
             with gr.Column():