import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModel from sklearn.metrics.pairwise import cosine_similarity MODELS = { "rubert-tiny2": "cointegrated/rubert-tiny2", "sbert": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "LaBSE": "sentence-transformers/LaBSE", "ruRoberta": "sberbank-ai/ruRoberta-large" } def get_embeddings(model, tokenizer, texts): inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512) outputs = model(**inputs) return outputs.last_hidden_state[:, 0].detach().numpy() def classify(model_name: str, item: str, categories: str) -> str: # Загрузка модели и токенизатора tokenizer = AutoTokenizer.from_pretrained(MODELS[model_name]) model = AutoModel.from_pretrained(MODELS[model_name]) # Подготовка текстов texts = [item] + [c.strip() for c in categories.split(",")] # Получение эмбеддингов embeddings = get_embeddings(model, tokenizer, texts) # Сравнение с категориями item_embedding = embeddings[0].reshape(1, -1) category_embeddings = embeddings[1:] similarities = cosine_similarity(item_embedding, category_embeddings)[0] best_idx = np.argmax(similarities) return f"{texts[1:][best_idx]} ({similarities[best_idx]:.2f})" iface = gr.Interface( fn=classify, inputs=[ gr.Dropdown(list(MODELS.keys()), label="Модель"), gr.Textbox(label="Товар"), gr.Textbox(label="Категории", value="Инструменты, Овощи, Техника") ], outputs=gr.Textbox(label="Результат") ) iface.launch()