from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import StreamingResponse, FileResponse from fastapi.staticfiles import StaticFiles import torch import cv2 import numpy as np import logging from io import BytesIO import tempfile import os from insightface.app import FaceAnalysis app = FastAPI() # Load model and necessary components model = None def load_model(): global model from vtoonify_model import Model model = Model(device='cuda' if torch.cuda.is_available() else 'cpu') model.load_model('cartoon4') # Initialize the InsightFace model for face detection face_detector = FaceAnalysis(allowed_modules=['detection']) face_detector.prepare(ctx_id=0 if torch.cuda.is_available() else -1, det_size=(640, 640)) # Configure logging logging.basicConfig(level=logging.INFO) def detect_and_crop_face(image, padding=0.6): # Get original dimensions orig_h, orig_w = image.shape[:2] # Resize the image for detection resized_image = cv2.resize(image, (640, 640)) # Detect faces on the resized image faces = face_detector.get(resized_image) # If faces are detected, sort by x-coordinate and select the leftmost face if faces: faces = sorted(faces, key=lambda face: face.bbox[0]) face = faces[0] # Select the leftmost face bbox = face.bbox.astype(int) # Calculate scaling factors h_scale = orig_h / 640 w_scale = orig_w / 640 # Map the bounding box to the original image size x1, y1, x2, y2 = bbox x1 = int(x1 * w_scale) y1 = int(y1 * h_scale) x2 = int(x2 * w_scale) y2 = int(y2 * h_scale) # Calculate padding width = x2 - x1 height = y2 - y1 x1 = max(0, x1 - int(padding * width)) y1 = max(0, y1 - int(padding * height)) x2 = min(orig_w, x2 + int(padding * width)) y2 = min(orig_h, y2 + int(padding * height)) cropped_face = image[y1:y2, x1:x2] return cropped_face return None @app.post("/upload/") async def process_image(file: UploadFile = File(...), top: int = Form(...), bottom: int = Form(...), left: int = Form(...), right: int = Form(...)): global model if model is None: load_model() # Read the uploaded image file contents = await file.read() # Convert the uploaded image to numpy array nparr = np.frombuffer(contents, np.uint8) frame_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # Read as BGR format by default if frame_bgr is None: logging.error("Failed to decode the image.") return {"error": "Failed to decode the image. Please ensure the file is a valid image format."} logging.info(f"Uploaded image shape: {frame_bgr.shape}") # Detect and crop face cropped_face = detect_and_crop_face(frame_bgr) if cropped_face is None: return {"error": "No face detected or alignment failed."} # Save the cropped face temporarily with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: cv2.imwrite(temp_file.name, cropped_face) temp_file_path = temp_file.name try: # Process the cropped face using the file path aligned_face, instyle, message = model.detect_and_align_image(temp_file_path, top, bottom, left, right) if aligned_face is None or instyle is None: logging.error("Failed to process the image: No face detected or alignment failed.") return {"error": message} processed_image, message = model.image_toonify(aligned_face, instyle, model.exstyle, style_degree=0.5, style_type='cartoon4') if processed_image is None: logging.error("Failed to toonify the image.") return {"error": message} # Convert the processed image to RGB before returning processed_image_rgb = cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB) # Convert processed image to bytes _, encoded_image = cv2.imencode('.jpg', processed_image_rgb) # Return the processed image as a streaming response return StreamingResponse(BytesIO(encoded_image.tobytes()), media_type="image/jpeg") finally: # Clean up the temporary file os.remove(temp_file_path)