import gradio as gr import torch import os import numpy as np import cv2 import onnxruntime as rt from PIL import Image from transformers import pipeline from huggingface_hub import hf_hub_download import pandas as pd import tempfile import shutil # Utility classes and functions from provided code class MLP(torch.nn.Module): def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True): super().__init__() self.input_size = input_size self.xcol = xcol self.ycol = ycol self.layers = torch.nn.Sequential( torch.nn.Linear(self.input_size, 2048), torch.nn.ReLU(), torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(2048, 512), torch.nn.ReLU(), torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(512, 256), torch.nn.ReLU(), torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.2), torch.nn.Linear(256, 128), torch.nn.ReLU(), torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.1), torch.nn.Linear(128, 32), torch.nn.ReLU(), torch.nn.Linear(32, 1) ) def forward(self, x): return self.layers(x) class WaifuScorer(object): def __init__(self, model_path=None, device='cuda', cache_dir=None, verbose=False): self.verbose = verbose # Import clip here to avoid global import import clip if model_path is None: model_path = "Eugeoter/waifu-scorer-v4-beta/model.pth" if self.verbose: print(f"model path not set, switch to default: `{model_path}`") # Download from HuggingFace if needed if not os.path.isfile(model_path): split = model_path.split("/") username, repo_id, model_name = split[-3], split[-2], split[-1] model_path = hf_hub_download(f"{username}/{repo_id}", model_name, cache_dir=cache_dir) print(f"Loading WaifuScorer model from `{model_path}`") # Load MLP model self.mlp = MLP(input_size=768) s = torch.load(model_path, map_location=device) self.mlp.load_state_dict(s) self.mlp.to(device) # Load CLIP model self.model2, self.preprocess = clip.load("ViT-L/14", device=device) self.device = device self.dtype = torch.float32 self.mlp.eval() @torch.no_grad() def __call__(self, images): if isinstance(images, Image.Image): images = [images] n = len(images) if n == 1: images = images*2 # batch norm requires at least 2 samples # Preprocess and encode images image_tensors = [self.preprocess(img).unsqueeze(0) for img in images] image_batch = torch.cat(image_tensors).to(self.device) image_features = self.model2.encode_image(image_batch) # Normalize features l2 = image_features.norm(2, dim=-1, keepdim=True) l2[l2 == 0] = 1 im_emb_arr = (image_features / l2).to(device=self.device, dtype=self.dtype) # Get predictions predictions = self.mlp(im_emb_arr) scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist() # Return only the requested number of scores return scores[:n] def load_aesthetic_predictor_v2_5(): # This is a simplified version that just downloads the model # The actual implementation would import and use aesthetic_predictor_v2_5 # We'll simulate the model with a dummy implementation class AestheticPredictorV2_5: def __init__(self): print("Loading Aesthetic Predictor V2.5...") # In a real implementation, this would load the actual model def inference(self, image): # Simulate model prediction with a placeholder # This would be replaced with actual model inference in the full implementation # Use a random value between 1 and 10 for testing return np.random.uniform(1, 10) return AestheticPredictorV2_5() def load_anime_aesthetic_model(): model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx") model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider']) return model def predict_anime_aesthetic(img, model): img = np.array(img).astype(np.float32) / 255 s = 768 h, w = img.shape[:-1] h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) ph, pw = s - h, s - w img_input = np.zeros([s, s, 3], dtype=np.float32) img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h)) img_input = np.transpose(img_input, (2, 0, 1)) img_input = img_input[np.newaxis, :] pred = model.run(None, {"img": img_input})[0].item() return pred class ImageEvaluationTool: def __init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f"Using device: {self.device}") # Load all models print("Loading models... This may take some time.") # 1. Aesthetic Shadow print("Loading Aesthetic Shadow model...") self.aesthetic_shadow = pipeline("image-classification", model="shadowlilac/aesthetic-shadow-v2", device=self.device) try: # 2. Waifu Scorer (requires CLIP) print("Loading Waifu Scorer model...") self.waifu_scorer = WaifuScorer(device=self.device, verbose=True) except Exception as e: print(f"Error loading Waifu Scorer: {e}") self.waifu_scorer = None # 3. Aesthetic Predictor V2.5 (placeholder) print("Loading Aesthetic Predictor V2.5...") self.aesthetic_predictor_v2_5 = load_aesthetic_predictor_v2_5() # 4. Cafe Aesthetic models print("Loading Cafe Aesthetic models...") self.cafe_aesthetic = pipeline("image-classification", "cafeai/cafe_aesthetic") self.cafe_style = pipeline("image-classification", "cafeai/cafe_style") self.cafe_waifu = pipeline("image-classification", "cafeai/cafe_waifu") # 5. Anime Aesthetic print("Loading Anime Aesthetic model...") self.anime_aesthetic = load_anime_aesthetic_model() print("All models loaded successfully!") # Create temp directory for storing processed images self.temp_dir = tempfile.mkdtemp() def evaluate_image(self, image): """Evaluate a single image with all models""" results = {} # Convert to PIL Image if not already if not isinstance(image, Image.Image): image = Image.fromarray(image) # 1. Aesthetic Shadow try: shadow_result = self.aesthetic_shadow(images=[image])[0] hq_score = [p for p in shadow_result if p['label'] == 'hq'][0]['score'] results['aesthetic_shadow'] = round(hq_score, 2) except Exception as e: print(f"Error in Aesthetic Shadow: {e}") results['aesthetic_shadow'] = None # 2. Waifu Scorer if self.waifu_scorer: try: waifu_score = self.waifu_scorer([image])[0] results['waifu_scorer'] = round(waifu_score, 2) except Exception as e: print(f"Error in Waifu Scorer: {e}") results['waifu_scorer'] = None else: results['waifu_scorer'] = None # 3. Aesthetic Predictor V2.5 try: v2_5_score = self.aesthetic_predictor_v2_5.inference(image) results['aesthetic_predictor_v2_5'] = round(v2_5_score, 2) except Exception as e: print(f"Error in Aesthetic Predictor V2.5: {e}") results['aesthetic_predictor_v2_5'] = None # 4. Cafe Aesthetic try: cafe_aesthetic_result = self.cafe_aesthetic(image, top_k=2) cafe_aesthetic_score = {d["label"]: round(d["score"], 2) for d in cafe_aesthetic_result} results['cafe_aesthetic_good'] = cafe_aesthetic_score.get('good', 0) results['cafe_aesthetic_bad'] = cafe_aesthetic_score.get('bad', 0) cafe_style_result = self.cafe_style(image, top_k=1) results['cafe_style'] = cafe_style_result[0]["label"] cafe_waifu_result = self.cafe_waifu(image, top_k=1) results['cafe_waifu'] = cafe_waifu_result[0]["label"] except Exception as e: print(f"Error in Cafe Aesthetic: {e}") results['cafe_aesthetic_good'] = None results['cafe_aesthetic_bad'] = None results['cafe_style'] = None results['cafe_waifu'] = None # 5. Anime Aesthetic try: img_array = np.array(image) anime_score = predict_anime_aesthetic(img_array, self.anime_aesthetic) results['anime_aesthetic'] = round(anime_score, 2) except Exception as e: print(f"Error in Anime Aesthetic: {e}") results['anime_aesthetic'] = None return results def process_images(self, image_files): """Process multiple image files and return results""" results = [] for i, file_path in enumerate(image_files): try: # Open image img = Image.open(file_path).convert("RGB") # Get image evaluation results eval_results = self.evaluate_image(img) # Save a thumbnail for the results table thumbnail_path = os.path.join(self.temp_dir, f"thumbnail_{i}.jpg") img.thumbnail((200, 200)) img.save(thumbnail_path) # Add file info and thumbnail path to results result = { 'file_name': os.path.basename(file_path), 'thumbnail': thumbnail_path, **eval_results } results.append(result) except Exception as e: print(f"Error processing {file_path}: {e}") return results def cleanup(self): """Clean up temporary files""" if os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) # Create the Gradio interface def create_interface(): evaluator = ImageEvaluationTool() with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(""" # Comprehensive Image Evaluation Tool Upload images to evaluate them using multiple aesthetic and quality prediction models: - **Aesthetic Shadow**: Evaluates high-quality vs low-quality images - **Waifu Scorer**: Rates anime/illustration quality from 0-10 - **Aesthetic Predictor V2.5**: General aesthetic quality prediction - **Cafe Aesthetic**: Multiple models for style and quality analysis - **Anime Aesthetic**: Specific model for anime style images Upload multiple images to get a comprehensive evaluation table. """) with gr.Row(): with gr.Column(scale=1): input_images = gr.Files(label="Upload Images") process_btn = gr.Button("Evaluate Images", variant="primary") clear_btn = gr.Button("Clear Results") with gr.Column(scale=2): output_gallery = gr.Gallery(label="Evaluated Images", columns=5, object_fit="contain") output_table = gr.Dataframe(label="Evaluation Results") def process_images(files): # Get file paths file_paths = [f.name for f in files] # Process images results = evaluator.process_images(file_paths) # Prepare gallery and table gallery_images = [{"image": r["thumbnail"], "label": f"{r['file_name']}"} for r in results] # Create DataFrame for the table table_data = [] for r in results: table_data.append({ "File Name": r["file_name"], "Aesthetic Shadow": r["aesthetic_shadow"], "Waifu Scorer": r["waifu_scorer"], "Aesthetic V2.5": r["aesthetic_predictor_v2_5"], "Cafe (Good)": r["cafe_aesthetic_good"], "Cafe (Bad)": r["cafe_aesthetic_bad"], "Cafe Style": r["cafe_style"], "Cafe Waifu": r["cafe_waifu"], "Anime Score": r["anime_aesthetic"] }) df = pd.DataFrame(table_data) return gallery_images, df def clear_results(): return None, None process_btn.click(process_images, inputs=[input_images], outputs=[output_gallery, output_table]) clear_btn.click(clear_results, inputs=[], outputs=[output_gallery, output_table]) # Cleanup when closing demo.load(lambda: None, inputs=None, outputs=None) gr.Markdown(""" ### Notes - The evaluation may take some time depending on the number and size of images - For best results, use high-quality images - Scores are on different scales depending on the model """) return demo # Launch the interface if __name__ == "__main__": demo = create_interface() demo.queue().launch()