Spaces:

VOIDER
/

CIET

Runtime error

File size: 19,967 Bytes

import gradio as gr
import torch
import os
import numpy as np
import cv2
import onnxruntime as rt
from PIL import Image
from transformers import pipeline
from huggingface_hub import hf_hub_download
import pandas as pd
import tempfile
import shutil
import base64
from io import BytesIO

# Import necessary function from aesthetic_predictor_v2_5
from aesthetic_predictor_v2_5 import convert_v2_5_from_siglip

class MLP(torch.nn.Module):
    def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True):
        super().__init__()
        self.input_size = input_size
        self.xcol = xcol
        self.ycol = ycol
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(self.input_size, 2048),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(2048, 512),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(512, 256),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(256, 128),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(128, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.layers(x)

class WaifuScorer(object):
    def __init__(self, model_path=None, device='cuda', cache_dir=None, verbose=False):
        self.verbose = verbose

        try:
            import clip

            if model_path is None:
                model_path = "Eugeoter/waifu-scorer-v3/model.pth"
                if self.verbose:
                    print(f"model path not set, switch to default: `{model_path}`")

            if not os.path.isfile(model_path):
                split = model_path.split("/")
                username, repo_id, model_name = split[-3], split[-2], split[-1]
                model_path = hf_hub_download(f"{username}/{repo_id}", model_name, cache_dir=cache_dir)

            print(f"Loading WaifuScorer model from `{model_path}`")

            self.mlp = MLP(input_size=768)
            if model_path.endswith(".safetensors"):
                from safetensors.torch import load_file
                state_dict = load_file(model_path)
            else:
                state_dict = torch.load(model_path, map_location=device)
            self.mlp.load_state_dict(state_dict)
            self.mlp.to(device)

            self.model2, self.preprocess = clip.load("ViT-L/14", device=device)
            self.device = device
            self.dtype = torch.float32
            self.mlp.eval()
            self.available = True
        except Exception as e:
            print(f"Unable to initialize WaifuScorer: {e}")
            self.available = False

    @torch.no_grad()
    def __call__(self, images):
        if not self.available:
            return [None] * (1 if not isinstance(images, list) else len(images))

        if isinstance(images, Image.Image):
            images = [images]
        n = len(images)
        if n == 1:
            images = images*2

        image_tensors = [self.preprocess(img).unsqueeze(0) for img in images]
        image_batch = torch.cat(image_tensors).to(self.device)
        image_features = self.model2.encode_image(image_batch)

        l2 = image_features.norm(2, dim=-1, keepdim=True)
        l2[l2 == 0] = 1
        im_emb_arr = (image_features / l2).to(device=self.device, dtype=self.dtype)

        predictions = self.mlp(im_emb_arr)
        scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist()

        return scores[:n]

def load_aesthetic_predictor_v2_5():
    class AestheticPredictorV2_5_Impl: # Renamed class to avoid confusion
        def __init__(self):
            print("Loading Aesthetic Predictor V2.5...")
            self.model, self.preprocessor = convert_v2_5_from_siglip(
                low_cpu_mem_usage=True,
                trust_remote_code=True,
            )
            if torch.cuda.is_available():
                self.model = self.model.to(torch.bfloat16).cuda()

        def inference(self, image: Image.Image) -> float:
            # preprocess image
            pixel_values = self.preprocessor(
                images=image.convert("RGB"), return_tensors="pt"
            ).pixel_values

            if torch.cuda.is_available():
                pixel_values = pixel_values.to(torch.bfloat16).cuda()

            # predict aesthetic score
            with torch.inference_mode():
                score = self.model(pixel_values).logits.squeeze().float().cpu().numpy()

            return score

    return AestheticPredictorV2_5_Impl() # Return an instance of the implementation class

def load_anime_aesthetic_model():
    model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx")
    model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider'])
    return model

def predict_anime_aesthetic(img, model):
    img = np.array(img).astype(np.float32) / 255
    s = 768
    h, w = img.shape[:-1]
    h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s)
    ph, pw = s - h, s - w
    img_input = np.zeros([s, s, 3], dtype=np.float32)
    img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h))
    img_input = np.transpose(img_input, (2, 0, 1))
    img_input = img_input[np.newaxis, :]
    pred = model.run(None, {"img": img_input})[0].item()
    return pred

class ImageEvaluationTool:
    def __init__(self):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")

        print("Loading models... This may take some time.")

        print("Loading Aesthetic Shadow model...")
        self.aesthetic_shadow = pipeline("image-classification", model="NeoChen1024/aesthetic-shadow-v2-backup", device=self.device)

        print("Loading Waifu Scorer model...")
        self.waifu_scorer = WaifuScorer(device=self.device, verbose=True)

        print("Loading Aesthetic Predictor V2.5...")
        self.aesthetic_predictor_v2_5 = load_aesthetic_predictor_v2_5()

        print("Loading Anime Aesthetic model...")
        self.anime_aesthetic = load_anime_aesthetic_model()

        print("All models loaded successfully!")

        self.temp_dir = tempfile.mkdtemp()

    def evaluate_image(self, image):
        results = {}

        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        try:
            shadow_result = self.aesthetic_shadow(images=[image])[0]
            hq_score = [p for p in shadow_result if p['label'] == 'hq'][0]['score']
            # Scale aesthetic_shadow to 0-10 and clamp
            aesthetic_shadow_score = np.clip(hq_score * 10.0, 0.0, 10.0)
            results['aesthetic_shadow'] = aesthetic_shadow_score
        except Exception as e:
            print(f"Error in Aesthetic Shadow: {e}")
            results['aesthetic_shadow'] = None

        try:
            waifu_score = self.waifu_scorer([image])[0]
            # Clamp waifu_score
            waifu_score_clamped = np.clip(waifu_score, 0.0, 10.0)
            results['waifu_scorer'] = waifu_score_clamped
        except Exception as e:
            print(f"Error in Waifu Scorer: {e}")
            results['waifu_scorer'] = None

        try:
            v2_5_score = self.aesthetic_predictor_v2_5.inference(image)
            # Clamp v2.5 score
            v2_5_score_clamped = np.clip(v2_5_score, 0.0, 10.0)
            results['aesthetic_predictor_v2_5'] = float(np.round(v2_5_score_clamped, 4)) # Keep 4 decimal places after clamping
        except Exception as e:
            print(f"Error in Aesthetic Predictor V2.5: {e}")
            results['aesthetic_predictor_v2_5'] = None

        try:
            img_array = np.array(image)
            anime_score = predict_anime_aesthetic(img_array, self.anime_aesthetic)
            # Scale Anime Score to 0-10 and clamp
            anime_score_scaled = np.clip(anime_score * 10.0, 0.0, 10.0)
            results['anime_aesthetic'] = anime_score_scaled
        except Exception as e:
            print(f"Error in Anime Aesthetic: {e}")
            results['anime_aesthetic'] = None

        # Calculate Final Score (simple average of available scores)
        valid_scores = [v for v in results.values() if v is not None]
        if valid_scores:
            final_score = np.mean(valid_scores)
            results['final_score'] = np.clip(final_score, 0.0, 10.0) # Clamp final score too
        else:
            results['final_score'] = None

        return results

    def image_to_base64(self, image):
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        return base64.b64encode(buffered.getvalue()).decode('utf-8')

    def process_single_image(self, file_path):
        try:
            img = Image.open(file_path).convert("RGB")
            eval_results = self.evaluate_image(img)
            thumbnail = img.copy()
            thumbnail.thumbnail((200, 200))
            img_base64 = self.image_to_base64(thumbnail)
            result = {
                'file_name': os.path.basename(file_path),
                'img_data': img_base64,
                **eval_results
            }
            return result
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            return None

    def process_images_evaluation(self, image_files): # Renamed and now for evaluation only
        results = []

        for i, file_path in enumerate(image_files):
            try:
                img = Image.open(file_path).convert("RGB")
                eval_results = self.evaluate_image(img)

                thumbnail = img.copy()
                thumbnail.thumbnail((200, 200))

                img_base64 = self.image_to_base64(thumbnail)

                result = {
                    'file_name': os.path.basename(file_path),
                    'img_data': img_base64,
                    **eval_results
                }
                results.append(result)

            except Exception as e:
                print(f"Error processing {file_path}: {e}")

        return results

    def sort_results(self, results, sort_by="Final Score"): # New function for sorting
        def sort_key(res): # Define a sorting key function
            sort_value = res.get(sort_by.lower().replace(" ", "_"), None) # Handle spaces and case
            if sort_value is None: # Put N/A at the end
                return -float('inf') if sort_by == "File Name" else float('inf') # File Name sort N/A at end alphabetically
            return sort_value

        results.sort(key=sort_key, reverse=sort_by != "File Name") # Sort results, reverse for score columns
        return results

    def generate_html_table(self, results):
        html = """
        <style>
            .results-table {
                width: 100%;
                border-collapse: collapse;
                margin: 20px 0;
                font-family: Arial, sans-serif;
                background-color: transparent;
            }

            .results-table th,
            .results-table td {
                color: #eee;
                border: 1px solid #ddd;
                padding: 8px;
                text-align: center;
                background-color: transparent;
            }

            .results-table th {
                font-weight: bold;
            }

            .results-table tr:nth-child(even) {
                background-color: transparent;
            }

            .results-table tr:hover {
                background-color: rgba(255, 255, 255, 0.1);
            }

            .image-preview {
                max-width: 150px;
                max-height: 150px;
                display: block;
                margin: 0 auto;
            }

            .good-score {
                color: #0f0;
                font-weight: bold;
            }
            .bad-score {
                color: #f00;
                font-weight: bold;
            }
            .medium-score {
                color: orange;
                font-weight: bold;
            }
        </style>

        <table class="results-table">
            <thead>
                <tr>
                    <th>Image</th>
                    <th>File Name</th>
                    <th>Aesthetic Shadow</th>
                    <th>Waifu Scorer</th>
                    <th>Aesthetic V2.5</th>
                    <th>Anime Score</th>
                    <th>Final Score</th>
                </tr>
            </thead>
            <tbody>
        """

        for result in results:
            html += "<tr>"
            html += f'<td><img src="data:image/jpeg;base64,{result["img_data"]}" class="image-preview"></td>'
            html += f'<td>{result["file_name"]}</td>'

            score = result["aesthetic_shadow"]
            score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 4 else "bad-score"
            html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' # Format to 4 decimal places

            score = result["waifu_scorer"]
            score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score"
            html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' # Format to 4 decimal places

            score = result["aesthetic_predictor_v2_5"]
            score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score"
            html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' # Format to 4 decimal places

            score = result["anime_aesthetic"]
            score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score"
            html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' # Format to 4 decimal places

            score = result["final_score"]
            score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score"
            html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' # Format to 4 decimal places


            html += "</tr>"

        html += """
            </tbody>
        </table>
        """

        return html

    def cleanup(self):
        if os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)

# Global variable to store evaluation results
global_results = None

def create_interface():
    global global_results # Use the global variable

    evaluator = ImageEvaluationTool()
    sort_options = ["Final Score", "File Name", "Aesthetic Shadow", "Waifu Scorer", "Aesthetic V2.5", "Anime Score"] # Sort options

    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # Comprehensive Image Evaluation Tool

        Upload images to evaluate them using multiple aesthetic and quality prediction models:

        - **Aesthetic Shadow**: Evaluates high-quality vs low-quality images (scaled to 0-10)
        - **Waifu Scorer**: Rates anime/illustration quality from 0-10
        - **Aesthetic Predictor V2.5**: General aesthetic quality prediction (clamped to 0-10)
        - **Anime Aesthetic**: Specific model for anime style images (scaled and clamped to 0-10)
        - **Final Score**: Average of available scores (clamped to 0-10)

        Upload multiple images to get a comprehensive evaluation table. Scores are clamped to the range 0.0000 - 10.0000.
        """)

        with gr.Row():
            with gr.Column(scale=1):
                input_images = gr.Files(label="Upload Images")
                sort_dropdown = gr.Dropdown(sort_options, value="Final Score", label="Sort by") # Dropdown for sorting
                process_btn = gr.Button("Evaluate Images", variant="primary")
                clear_btn = gr.Button("Clear Results")

            with gr.Column(scale=2):
                progress_html = gr.HTML(label="Progress") # Keep progress_html if you want to show initial progress
                output_html = gr.HTML(label="Evaluation Results")

        def process_images_and_update(files): # Renamed and simplified
            global global_results
            file_paths = [f.name for f in files]
            total = len(file_paths)
            progress_html_content = "" # Initialize progress content

            if not file_paths: # Handle no files uploaded
                global_results = []
                return progress_html_content, evaluator.generate_html_table([]) # Empty table

            progress_html_content = ""
            for i, file_path in enumerate(file_paths):
                percent = (i / total) * 100
                progress_bar = f"""
                <div>
                    <p>Processing {os.path.basename(file_path)}</p>
                    <progress value="{percent}" max="100"></progress>
                    <p>{percent:.1f}% complete</p>
                </div>
                """
                progress_html_content = progress_bar # Update progress content
                yield progress_html_content, gr.update() # Yield progress update
                # No need to process and sort here, just evaluate
            global_results = evaluator.process_images_evaluation(file_paths) # Evaluate all images and store
            sorted_results = evaluator.sort_results(global_results, sort_by="Final Score") # Initial sort by Final Score
            html_table = evaluator.generate_html_table(sorted_results)
            yield "<p>Processing complete</p>", html_table # Final progress and table

        def update_table_sort(sort_by_column): # New function for sorting update
            global global_results
            if global_results is None:
                return "No images evaluated yet." # Or handle case when no images are evaluated
            sorted_results = evaluator.sort_results(global_results, sort_by=sort_by_column)
            html_table = evaluator.generate_html_table(sorted_results)
            return html_table

        def clear_results():
            global global_results
            global_results = None # Clear stored results
            return gr.update(value=""), gr.update(value="")


        process_btn.click(
            process_images_and_update,
            inputs=[input_images],
            outputs=[progress_html, output_html]
        )
        sort_dropdown.change( # Only update table on sort change
            update_table_sort,
            inputs=[sort_dropdown],
            outputs=[output_html] # Only update output_html
        )
        clear_btn.click(
            clear_results,
            inputs=[],
            outputs=[progress_html, output_html]
        )

        demo.load(lambda: None, inputs=None, outputs=None)

        gr.Markdown("""
        ### Notes
        - The evaluation may take some time depending on the number and size of images
        - For best results, use high-quality images
        - Scores are color-coded: green for good (>=7), orange for medium (>=5), and red for poor scores (<5, or <4 for Aesthetic Shadow)
        - Some models may fail for certain image types, shown as "N/A" in the results
        - "Final Score" is a simple average of available model scores.
        - Table is sortable by clicking the dropdown above the "Evaluate Images" button. Default sort is by "Final Score". Sorting happens instantly without re-evaluating images.
        """)

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.queue().launch()