vibesNature / app.py
Ahmadkhan12's picture
Update app.py
14984e1 verified
raw
history blame
2.5 kB
# Import libraries
import os
import gradio as gr
import torch
import soundfile as sf
import numpy as np
from PIL import Image
import torch.nn.functional as F
import logging
from scipy.io.wavfile import write as write_wav
from scipy import signal
from moviepy.editor import VideoFileClip, AudioFileClip
from transformers import AutoProcessor, AutoModelForAudioGeneration
import requests # Add this line
# Set up logging for better debug tracking
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger()
# Download Places365 class labels
try:
logging.info("Downloading Places365 class labels...")
url = "http://places2.csail.mit.edu/models_places365/categories_places365.txt"
response = requests.get(url)
with open("categories_places365.txt", "wb") as f:
f.write(response.content)
logging.info("Places365 class labels downloaded successfully.")
except Exception as e:
logging.error(f"Error downloading Places365 class labels: {e}")
raise
# Load Places365 model for scene detection (on CPU to save GPU memory)
try:
logging.info("Loading Places365 model for scene detection...")
places365 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
places365.eval()
places365.to("cpu") # Move to CPU
logging.info("Places365 model loaded successfully.")
except Exception as e:
logging.error(f"Error loading Places365 model: {e}")
raise
# Load Places365 class labels
with open("categories_places365.txt", "r") as f:
SCENE_CLASSES = [line.strip().split(" ")[0][3:] for line in f.readlines()]
# Load AudioGen Medium and MusicGen Medium models
try:
logging.info("Loading AudioGen Medium and MusicGen Medium models...")
audiogen_processor = AutoProcessor.from_pretrained("facebook/audiogen-medium")
audiogen_model = AutoModelForAudioGeneration.from_pretrained("facebook/audiogen-medium")
musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-medium")
musicgen_model = AutoModelForAudioGeneration.from_pretrained("facebook/musicgen-medium")
# Move models to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
audiogen_model.to(device)
musicgen_model.to(device)
logging.info("AudioGen Medium and MusicGen Medium models loaded successfully.")
except Exception as e:
logging.error(f"Error loading AudioGen/MusicGen models: {e}")
raise
# Rest of the code remains the same...