import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModel from sklearn.metrics.pairwise import cosine_similarity MODELS = { "rubert-tiny2": "cointegrated/rubert-tiny2", "sbert": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "LaBSE": "sentence-transformers/LaBSE", "ruRoberta": "sberbank-ai/ruRoberta-large" } def get_embeddings(model, tokenizer, text): # Добавляем промпт prompted_text = f"Товар: {text}. Категория:" inputs = tokenizer(prompted_text, padding=True, truncation=True, return_tensors="pt", max_length=512) outputs = model(**inputs) return outputs.last_hidden_state[:, 0].detach().numpy() def classify(model_name: str, item: str, categories: str) -> str: tokenizer = AutoTokenizer.from_pretrained(MODELS[model_name]) model = AutoModel.from_pretrained(MODELS[model_name]) # Эмбеддинги для товара с промптом item_embedding = get_embeddings(model, tokenizer, item) # Эмбеддинги для категорий category_embeddings = [] for category in categories.split(","): emb = get_embeddings(model, tokenizer, category.strip()) category_embeddings.append(emb) # Сравнение similarities = cosine_similarity(item_embedding, np.vstack(category_embeddings))[0] best_idx = np.argmax(similarities) return f"{categories.split(',')[best_idx].strip()} ({similarities[best_idx]:.2f})" gr.Interface( fn=classify, inputs=[ gr.Dropdown(list(MODELS.keys())), gr.Textbox(), gr.Textbox(value="Инструменты, Овощи, Техника") ], outputs=gr.Textbox() ).launch()