NomClass / app.py
hh1199's picture
Update app.py
0803d70 verified
raw
history blame
1.76 kB
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
MODELS = {
"rubert-tiny2": "cointegrated/rubert-tiny2",
"sbert": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"LaBSE": "sentence-transformers/LaBSE",
"ruRoberta": "sberbank-ai/ruRoberta-large"
}
def get_embeddings(model, tokenizer, texts):
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
outputs = model(**inputs)
return outputs.last_hidden_state[:, 0].detach().numpy()
def classify(model_name: str, item: str, categories: str) -> str:
# Загрузка модели и токенизатора
tokenizer = AutoTokenizer.from_pretrained(MODELS[model_name])
model = AutoModel.from_pretrained(MODELS[model_name])
# Подготовка текстов
texts = [item] + [c.strip() for c in categories.split(",")]
# Получение эмбеддингов
embeddings = get_embeddings(model, tokenizer, texts)
# Сравнение с категориями
item_embedding = embeddings[0].reshape(1, -1)
category_embeddings = embeddings[1:]
similarities = cosine_similarity(item_embedding, category_embeddings)[0]
best_idx = np.argmax(similarities)
return f"{texts[1:][best_idx]} ({similarities[best_idx]:.2f})"
iface = gr.Interface(
fn=classify,
inputs=[
gr.Dropdown(list(MODELS.keys()), label="Модель"),
gr.Textbox(label="Товар"),
gr.Textbox(label="Категории", value="Инструменты, Овощи, Техника")
],
outputs=gr.Textbox(label="Результат")
)
iface.launch()