File size: 4,839 Bytes

d2f3f0b

import torch
import torchvision
import os
import json
from PIL import Image
from datetime import datetime


__all__ = [
    'current_time',
    'relative_path',
    'NeuralNet',
    'DEVICE',
    'IMAGE_SIZE',
    'TRANSFORM',
    'MARGIN',
    'GRID_SIZE',
    'decrease_size',
    'PROMPT_LLM',
    'PROMPT_CLAUDE',
    'PROMPT_VISION',
    'EOS',
    'GRAMMAR',
    'SYSTEM_PROMPT',
    'ANIMAL',
    'SOMETIMES_ANIMAL',
    'MILK',
    'GLUTEN',
    'LEGAL_NOTICE',
]


MARGIN = 0.1
GRID_SIZE = 4096
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
IMAGE_SIZE = (224, 224)
TRANSFORM = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])
with open('prompt_llm.md', 'r', encoding='utf-8') as _f:
    PROMPT_LLM = _f.read()
with open('prompt_claude.md', 'r', encoding='utf-8') as _f:
    PROMPT_CLAUDE = _f.read()
with open('prompt_vision.md', 'r', encoding='utf-8') as _f:
    PROMPT_VISION = _f.read()
EOS = '\n<|im_end|>'
SYSTEM_PROMPT = 'Du bist ein hilfreicher assistant.'
with open('grammar.gbnf', 'r', encoding='utf-8') as _f:
    GRAMMAR = _f.read()
with open('animal.json', 'r', encoding='utf-8') as _f:
    ANIMAL = json.load(_f)
with open('sometimes_animal.json', 'r', encoding='utf-8') as _f:
    SOMETIMES_ANIMAL = json.load(_f)
with open('milk.json', 'r', encoding='utf-8') as _f:
    MILK = json.load(_f)
with open('gluten.json', 'r', encoding='utf-8') as _f:
    GLUTEN = json.load(_f)


LEGAL_NOTICE = ('Dieses Programm ist nur für Forschungszwecke gedacht. Fehler können nicht ausgeschlossen werden und '
                'sind wahrscheinlich vorhanden. Die Erkennung von Zutaten und Verunreinigungen ist nur zum schnellen '
                'Aussortieren und nicht zum Überprüfen gedacht.')


def current_time() -> str:
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


def relative_path(string: str) -> str:
    return os.path.join(os.path.dirname(__file__), string)


class NeuralNet(torch.nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()

        # Load pre-trained ResNet model
        self.backbone = torchvision.models.resnet18(pretrained=True)

        # Modify the last layer to output 12 values
        self.backbone.fc = torch.nn.Linear(self.backbone.fc.in_features, 12)

        # Add a custom head for key-point detection
        self.head = torch.nn.Sequential(
            torch.nn.Conv2d(512, 256, kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(256, 128, kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(128, 64, kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(64, 12, kernel_size=1),
            torch.nn.AdaptiveAvgPool2d(1)
        )

    def forward(self, x):
        # Check if we need to unsqueeze
        if len(x.shape) == 3:  # Shape [C, H, W]
            x = x.unsqueeze(0)  # Shape [1, C, H, W]

        # Resize input to match ResNet input size if necessary
        if x.shape[-2:] != (224, 224):
            x = torch.nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)

        # Pass input through the backbone
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        # Pass input through the custom head
        x = self.head(x)

        # Flatten the output
        x = x.view(x.size(0), -1)

        return x


def decrease_size(input_path, output_path, max_size, max_side):
    with Image.open(input_path) as img:
        original_size = os.path.getsize(input_path)
        width, height = img.size
        if original_size <= max_size and width <= max_side and height <= max_side:
            img.save(output_path, format=output_path.split('.')[-1].upper())
            print("Image is already below the maximum size.")
        while width > 24 and height > 24:
            img_resized = img.resize((width, height), Image.Resampling.LANCZOS)
            img_resized.save(output_path, format=output_path.split('.')[-1].upper())
            if os.path.getsize(output_path) <= max_size and width <= max_side and height <= max_side:
                print(f"Reduced image size to {os.path.getsize(output_path)} bytes.")
                break
            width, height = int(width * 0.9), int(height * 0.9)
        if os.path.getsize(output_path) > max_size:
            raise ValueError("Could not reduce PNG size below max_size by reducing resolution.")