Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,7 @@ from accelerate import Accelerator
|
|
23 |
import subprocess
|
24 |
import sys
|
25 |
import json
|
|
|
26 |
|
27 |
# --- Configuration ---
|
28 |
YOUR_HF_USERNAME = "Twelve2five"
|
@@ -333,19 +334,52 @@ def train_model(
|
|
333 |
learning_rate=1e-4,
|
334 |
progress=gr.Progress()
|
335 |
):
|
336 |
-
progress(0, desc="
|
337 |
log = []
|
338 |
|
339 |
-
#
|
340 |
-
log.append("
|
341 |
-
|
342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
# Now import everything after installation to ensure we use the correct versions
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
|
350 |
# --- Configuration ---
|
351 |
progress(0.05, desc="Setting up configuration...")
|
@@ -371,8 +405,10 @@ def train_model(
|
|
371 |
progress(0.1, desc="Loading base model...")
|
372 |
try:
|
373 |
# First try to download the repo without loading the model
|
374 |
-
# to see what files are available
|
375 |
local_model_path = "./model_files"
|
|
|
|
|
|
|
376 |
snapshot_download(
|
377 |
repo_id=hf_model_repo_id,
|
378 |
local_dir=local_model_path,
|
@@ -387,29 +423,31 @@ def train_model(
|
|
387 |
config_data = json.load(f)
|
388 |
log.append(f"Model architecture type: {config_data.get('model_type', 'unknown')}")
|
389 |
|
390 |
-
# Force model_type to llama
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
|
|
|
|
396 |
|
397 |
-
# Now try to load
|
398 |
-
config =
|
399 |
local_model_path,
|
400 |
-
trust_remote_code=False
|
401 |
)
|
402 |
|
403 |
log.append(f"Successfully loaded config: {config.model_type}")
|
404 |
|
405 |
-
# Load model with
|
406 |
-
model =
|
407 |
local_model_path,
|
408 |
config=config,
|
409 |
quantization_config=bnb_config,
|
410 |
device_map="auto",
|
411 |
-
|
412 |
-
|
413 |
)
|
414 |
|
415 |
log.append(f"Loaded model vocab size: {model.config.vocab_size}")
|
@@ -417,7 +455,20 @@ def train_model(
|
|
417 |
except Exception as e:
|
418 |
error_msg = f"Error loading model: {str(e)}"
|
419 |
log.append(error_msg)
|
420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
|
422 |
# --- Prepare for K-bit Training & Apply LoRA ---
|
423 |
progress(0.15, desc="Preparing model for fine-tuning...")
|
|
|
23 |
import subprocess
|
24 |
import sys
|
25 |
import json
|
26 |
+
import shutil
|
27 |
|
28 |
# --- Configuration ---
|
29 |
YOUR_HF_USERNAME = "Twelve2five"
|
|
|
334 |
learning_rate=1e-4,
|
335 |
progress=gr.Progress()
|
336 |
):
|
337 |
+
progress(0, desc="Setting up environment...")
|
338 |
log = []
|
339 |
|
340 |
+
# Completely clean up transformers installation
|
341 |
+
log.append("Completely reinstalling transformers and dependencies...")
|
342 |
+
|
343 |
+
# First uninstall any existing transformers
|
344 |
+
subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "transformers"])
|
345 |
+
|
346 |
+
# Clean any cached files that might be causing issues
|
347 |
+
cache_dirs = [
|
348 |
+
os.path.expanduser("~/.cache/huggingface"),
|
349 |
+
os.path.expanduser("~/.cache/pip")
|
350 |
+
]
|
351 |
+
|
352 |
+
for cache_dir in cache_dirs:
|
353 |
+
if os.path.exists(cache_dir):
|
354 |
+
log.append(f"Cleaning cache directory: {cache_dir}")
|
355 |
+
try:
|
356 |
+
shutil.rmtree(cache_dir)
|
357 |
+
except Exception as e:
|
358 |
+
log.append(f"Warning: Could not clean {cache_dir}: {e}")
|
359 |
+
|
360 |
+
# Install a stable version of transformers known to work with Llama models
|
361 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers==4.35.2", "sentencepiece"])
|
362 |
+
|
363 |
+
# Install other dependencies
|
364 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
|
365 |
+
"accelerate", "bitsandbytes==0.41.1", "peft==0.6.1",
|
366 |
+
"datasets", "huggingface_hub", "deepspeed==0.12.3"])
|
367 |
|
368 |
# Now import everything after installation to ensure we use the correct versions
|
369 |
+
try:
|
370 |
+
from datasets import Dataset
|
371 |
+
from huggingface_hub import snapshot_download
|
372 |
+
import torch
|
373 |
+
import transformers
|
374 |
+
from transformers import AutoModelForCausalLM, LlamaConfig, LlamaForCausalLM
|
375 |
+
from transformers import BitsAndBytesConfig, TrainingArguments, Trainer
|
376 |
+
from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_training
|
377 |
+
|
378 |
+
log.append(f"Transformers version: {transformers.__version__}")
|
379 |
+
log.append(f"PyTorch version: {torch.__version__}")
|
380 |
+
except ImportError as e:
|
381 |
+
log.append(f"Error importing libraries: {e}")
|
382 |
+
return "\n".join(log)
|
383 |
|
384 |
# --- Configuration ---
|
385 |
progress(0.05, desc="Setting up configuration...")
|
|
|
405 |
progress(0.1, desc="Loading base model...")
|
406 |
try:
|
407 |
# First try to download the repo without loading the model
|
|
|
408 |
local_model_path = "./model_files"
|
409 |
+
if os.path.exists(local_model_path):
|
410 |
+
shutil.rmtree(local_model_path) # Clean up any previous files
|
411 |
+
|
412 |
snapshot_download(
|
413 |
repo_id=hf_model_repo_id,
|
414 |
local_dir=local_model_path,
|
|
|
423 |
config_data = json.load(f)
|
424 |
log.append(f"Model architecture type: {config_data.get('model_type', 'unknown')}")
|
425 |
|
426 |
+
# Force model_type to llama
|
427 |
+
config_data["model_type"] = "llama"
|
428 |
+
if "architectures" in config_data:
|
429 |
+
config_data["architectures"] = ["LlamaForCausalLM"]
|
430 |
+
|
431 |
+
with open(os.path.join(local_model_path, "config.json"), "w") as f:
|
432 |
+
json.dump(config_data, f)
|
433 |
+
log.append("Updated config.json to use llama model_type")
|
434 |
|
435 |
+
# Now try to load with explicit Llama classes
|
436 |
+
config = LlamaConfig.from_pretrained(
|
437 |
local_model_path,
|
438 |
+
trust_remote_code=False
|
439 |
)
|
440 |
|
441 |
log.append(f"Successfully loaded config: {config.model_type}")
|
442 |
|
443 |
+
# Load model with specific Llama class
|
444 |
+
model = LlamaForCausalLM.from_pretrained(
|
445 |
local_model_path,
|
446 |
config=config,
|
447 |
quantization_config=bnb_config,
|
448 |
device_map="auto",
|
449 |
+
torch_dtype=torch.bfloat16,
|
450 |
+
low_cpu_mem_usage=True
|
451 |
)
|
452 |
|
453 |
log.append(f"Loaded model vocab size: {model.config.vocab_size}")
|
|
|
455 |
except Exception as e:
|
456 |
error_msg = f"Error loading model: {str(e)}"
|
457 |
log.append(error_msg)
|
458 |
+
|
459 |
+
# Try a fallback approach
|
460 |
+
try:
|
461 |
+
log.append("Trying fallback approach with AutoModelForCausalLM...")
|
462 |
+
model = AutoModelForCausalLM.from_pretrained(
|
463 |
+
local_model_path,
|
464 |
+
device_map="auto",
|
465 |
+
torch_dtype=torch.bfloat16,
|
466 |
+
low_cpu_mem_usage=True
|
467 |
+
)
|
468 |
+
log.append(f"Fallback model loaded successfully")
|
469 |
+
except Exception as e2:
|
470 |
+
log.append(f"Fallback approach also failed: {str(e2)}")
|
471 |
+
return "\n".join(log)
|
472 |
|
473 |
# --- Prepare for K-bit Training & Apply LoRA ---
|
474 |
progress(0.15, desc="Preparing model for fine-tuning...")
|