Fix several bugs in main.py

a876c78 verified 12 months ago

9.54 kB

	import numpy as np
	import torch
	import math
	import easyocr
	import cv2
	import os
	import base64
	import json
	import requests
	from llama_cpp import Llama
	from PIL import Image
	from dotenv import load_dotenv

	from utils import *

	load_dotenv()

	SCALE_FACTOR = 4
	MAX_SIZE = 5_000_000
	MAX_SIDE = 8_000
	# ENGINE = ['easyocr']
	# ENGINE = ['anthropic', 'claude-3-5-sonnet-20240620']
	ENGINE = ['llama_cpp/v2/vision', 'qwen-vl-next_b2583']


	def main() -> None:
	model_weights = torch.load(relative_path('vision_model.pt'))
	model = NeuralNet()
	model.load_state_dict(model_weights)
	model.to(DEVICE)
	model.eval()
	with torch.no_grad():
	file_path = input('Enter file path: ')
	with Image.open(file_path) as image:
	image_size = image.size
	image = image.resize(IMAGE_SIZE, Image.Resampling.LANCZOS)
	image = TRANSFORM(image).to(DEVICE)
	output = model(image).tolist()[0]
	data = {
	'top': {
	'left': {
	'x': output[0] * image_size[0],
	'y': output[1] * image_size[1],
	},
	'right': {
	'x': output[2] * image_size[0],
	'y': output[3] * image_size[1],
	},
	},
	'bottom': {
	'left': {
	'x': output[4] * image_size[0],
	'y': output[5] * image_size[1],
	},
	'right': {
	'x': output[6] * image_size[0],
	'y': output[7] * image_size[1],
	},
	},
	'curvature': {
	'top': {
	'x': output[8] * image_size[0],
	'y': output[9] * image_size[1],
	},
	'bottom': {
	'x': output[10] * image_size[0],
	'y': output[11] * image_size[1],
	},
	},
	}
	print(f"{data=}")
	image = cv2.imread(file_path)
	size_x = ((data['top']['right']['x'] - data['top']['left']['x']) +
	(data['bottom']['right']['x'] - data['bottom']['left']['x'])) / 2
	size_y = ((data['top']['right']['y'] - data['top']['left']['y']) +
	(data['bottom']['right']['y'] - data['bottom']['left']['y'])) / 2
	margin_x = size_x * MARGIN
	margin_y = size_y * MARGIN
	points = np.array([
	(max(data['top']['left']['x'] - margin_x, 0),
	max(data['top']['left']['y'] - margin_y, 0)),
	(min(data['top']['right']['x'] + margin_x, image_size[0]),
	max(data['top']['right']['y'] - margin_y, 0)),
	(min(data['bottom']['right']['x'] + margin_x, image_size[0]),
	min(data['bottom']['right']['y'] + margin_y, image_size[1])),
	(max(data['bottom']['left']['x'] - margin_x, 0),
	min(data['bottom']['left']['y'] + margin_y, image_size[1])),
	(data['curvature']['top']['x'],
	max(data['curvature']['top']['y'] - margin_y, 0)),
	(data['curvature']['bottom']['x'],
	min(data['curvature']['bottom']['y'] + margin_y, image_size[1])),
	], dtype=np.float32)
	points_float: list[list[float]] = points.tolist()
	max_height = int(max([ # y: top left - bottom left, top right - bottom right, curvature top - curvature bottom
	abs(points_float[0][1] - points_float[3][1]),
	abs(points_float[1][1] - points_float[4][1]),
	abs(points_float[2][1] - points_float[5][1]),
	])) * SCALE_FACTOR
	max_width = int(max([ # x: top left - top right, bottom left - bottom right
	abs(points_float[0][0] - points_float[1][0]),
	abs(points_float[3][0] - points_float[2][0]),
	])) * SCALE_FACTOR
	destination_points = np.array([
	[0, 0],
	[max_width - 1, 0],
	[max_width - 1, max_height - 1],
	[0, max_height - 1],
	[max_width // 2, 0],
	[max_width // 2, max_height - 1],
	], dtype=np.float32)
	homography, _ = cv2.findHomography(points, destination_points)
	warped_image = cv2.warpPerspective(image, homography, (max_width, max_height))
	cv2.imwrite('_warped_image.png', warped_image)
	del data
	if ENGINE[0] == 'easyocr':
	reader = easyocr.Reader(['de', 'fr', 'en'], gpu=True)
	result = reader.readtext('_warped_image.png')
	os.remove('_warped_image.png')
	text = '\n'.join([r[1] for r in result])
	ingredients = {}
	elif ENGINE[0] == 'anthropic':
	decrease_size('_warped_image.png', '_warped_image.webp', MAX_SIZE, MAX_SIDE)
	os.remove('_warped_image.png')
	with open('_warped_image.webp', 'rb') as f:
	base64_image = base64.b64encode(f.read()).decode('utf-8')
	response = requests.post(
	url='https://api.anthropic.com/v1/messages',
	headers={
	'x-api-key': os.environ['ANTHROPIC_API_KEY'],
	'anthropic-version': '2023-06-01',
	'content-type': 'application/json',
	},
	data=json.dumps({
	'model': ENGINE[1],
	'max_tokens': 1024,
	'messages': [
	{
	'role': 'user', 'content': [
	{
	'type': 'image',
	'source': {
	'type': 'base64',
	'media_type': 'image/webp',
	'data': base64_image,
	},
	},
	{
	'type': 'text',
	'text': PROMPT_CLAUDE,
	},
	],
	},
	],
	}),
	)
	os.remove('_warped_image.webp')
	try:
	data = response.json()
	ingredients = json.loads('{' + data['content'][0]['text'].split('{', 1)[-1].rsplit('}', 1)[0] + '}')
	except Exception as e:
	print(data)
	raise e
	text = ''
	elif ENGINE[0] == 'llama_cpp/v2/vision':
	decrease_size('_warped_image.png', '_warped_image.webp', MAX_SIZE, MAX_SIDE)
	# os.remove('_warped_image.png')
	response = requests.post(
	url='http://127.0.0.1:11434/llama_cpp/v2/vision',
	headers={
	'x-version': '2024-05-21',
	'content-type': 'application/json',
	},
	data=json.dumps({
	'task': PROMPT_VISION,
	'model': ENGINE[1],
	'image_path': relative_path('_warped_image.webp'),
	}),
	)
	os.remove('_warped_image.webp')
	text: str = response.json()['text']
	ingredients = {}
	else:
	raise ValueError(f'Unknown engine: {ENGINE[0]}')
	if text != '':
	if DEVICE == 'cuda':
	n_gpu_layers = -1
	else:
	n_gpu_layers = 0
	llm = Llama(
	model_path=relative_path('llm.Q4_K_M.gguf'),
	n_gpu_layers=n_gpu_layers,
	)
	llm_result = llm.create_chat_completion(
	messages=[
	{
	'role': 'system',
	'content': SYSTEM_PROMPT,
	},
	{
	'role': 'user',
	'content': PROMPT_LLM.replace('{{old_data}}', text),
	},
	],
	max_tokens=1024,
	temperature=0,
	# grammar=GRAMMAR,
	)
	try:
	ingredients = json.loads(
	'{' + llm_result['choices'][0]['message']['content'].split('{', 1)[-1].rsplit('}', 1)[0] + '}')
	except Exception as e:
	print(f"{llm_result=}")
	raise e
	animal_ingredients = [item for item in ingredients['Zutaten'] if item in ANIMAL]
	sometimes_animal_ingredients = [item for item in ingredients['Zutaten'] if item in SOMETIMES_ANIMAL]
	milk_ingredients = ([item for item in ingredients['Zutaten'] if item in MILK]
	+ [item for item in ingredients['Verunreinigungen'] if item in MILK])
	gluten_ingredients = ([item for item in ingredients['Zutaten'] if item in GLUTEN]
	+ [item for item in ingredients['Verunreinigungen'] if item in GLUTEN])
	print('=' * 64)
	print('Zutaten: ' + ', '.join(ingredients['Zutaten']))
	print('=' * 64)
	print(('Kann Spuren von ' + ', '.join(ingredients['Verunreinigungen']) + ' enthalten.')
	if len(ingredients['Verunreinigungen']) > 0 else 'ohne Verunreinigungen')
	print('=' * 64)
	print('Gefundene tierische Zutaten: '
	+ (', '.join(animal_ingredients) if len(animal_ingredients) > 0 else 'keine'))
	print('=' * 64)
	print('Gefundene potenzielle tierische Zutaten: '
	+ (', '.join(sometimes_animal_ingredients) if len(sometimes_animal_ingredients) > 0 else 'keine'))
	print('=' * 64)
	print('Gefundene Milchprodukte: ' + (', '.join(milk_ingredients) if len(milk_ingredients) > 0 else 'keine'))
	print('=' * 64)
	print('Gefundene Gluten: ' + (', '.join(gluten_ingredients) if len(gluten_ingredients) > 0 else 'keine'))
	print('=' * 64)
	print(LEGAL_NOTICE)
	print('=' * 64)


	if __name__ == '__main__':
	main()