Bey / main.py
Luisgust's picture
Create main.py
c533a73 verified
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
import torch
import cv2
import numpy as np
import logging
from io import BytesIO
import tempfile
import os
from insightface.app import FaceAnalysis
app = FastAPI()
# Load model and necessary components
model = None
def load_model():
global model
from vtoonify_model import Model
model = Model(device='cuda' if torch.cuda.is_available() else 'cpu')
model.load_model('cartoon4')
# Initialize the InsightFace model for face detection
face_detector = FaceAnalysis(allowed_modules=['detection'])
face_detector.prepare(ctx_id=0 if torch.cuda.is_available() else -1, det_size=(640, 640))
# Configure logging
logging.basicConfig(level=logging.INFO)
def detect_and_crop_face(image, padding=0.6):
# Get original dimensions
orig_h, orig_w = image.shape[:2]
# Resize the image for detection
resized_image = cv2.resize(image, (640, 640))
# Detect faces on the resized image
faces = face_detector.get(resized_image)
# If faces are detected, sort by x-coordinate and select the leftmost face
if faces:
faces = sorted(faces, key=lambda face: face.bbox[0])
face = faces[0] # Select the leftmost face
bbox = face.bbox.astype(int)
# Calculate scaling factors
h_scale = orig_h / 640
w_scale = orig_w / 640
# Map the bounding box to the original image size
x1, y1, x2, y2 = bbox
x1 = int(x1 * w_scale)
y1 = int(y1 * h_scale)
x2 = int(x2 * w_scale)
y2 = int(y2 * h_scale)
# Calculate padding
width = x2 - x1
height = y2 - y1
x1 = max(0, x1 - int(padding * width))
y1 = max(0, y1 - int(padding * height))
x2 = min(orig_w, x2 + int(padding * width))
y2 = min(orig_h, y2 + int(padding * height))
cropped_face = image[y1:y2, x1:x2]
return cropped_face
return None
@app.post("/upload/")
async def process_image(file: UploadFile = File(...), top: int = Form(...), bottom: int = Form(...), left: int = Form(...), right: int = Form(...)):
global model
if model is None:
load_model()
# Read the uploaded image file
contents = await file.read()
# Convert the uploaded image to numpy array
nparr = np.frombuffer(contents, np.uint8)
frame_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # Read as BGR format by default
if frame_bgr is None:
logging.error("Failed to decode the image.")
return {"error": "Failed to decode the image. Please ensure the file is a valid image format."}
logging.info(f"Uploaded image shape: {frame_bgr.shape}")
# Detect and crop face
cropped_face = detect_and_crop_face(frame_bgr)
if cropped_face is None:
return {"error": "No face detected or alignment failed."}
# Save the cropped face temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
cv2.imwrite(temp_file.name, cropped_face)
temp_file_path = temp_file.name
try:
# Process the cropped face using the file path
aligned_face, instyle, message = model.detect_and_align_image(temp_file_path, top, bottom, left, right)
if aligned_face is None or instyle is None:
logging.error("Failed to process the image: No face detected or alignment failed.")
return {"error": message}
processed_image, message = model.image_toonify(aligned_face, instyle, model.exstyle, style_degree=0.5, style_type='cartoon4')
if processed_image is None:
logging.error("Failed to toonify the image.")
return {"error": message}
# Convert the processed image to RGB before returning
processed_image_rgb = cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB)
# Convert processed image to bytes
_, encoded_image = cv2.imencode('.jpg', processed_image_rgb)
# Return the processed image as a streaming response
return StreamingResponse(BytesIO(encoded_image.tobytes()), media_type="image/jpeg")
finally:
# Clean up the temporary file
os.remove(temp_file_path)