import gradio as gr import pdfplumber from transformers import pipeline # Lżejszy model NER (publicznie dostępny) extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple") def extract_seller(pdf_file): with pdfplumber.open(pdf_file) as pdf: # Pobranie tekstu z PDF full_text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) # Podział tekstu na krótkie fragmenty (maks. 512 znaków) chunks = [full_text[i:i+512] for i in range(0, len(full_text), 512)] seller_name = None for chunk in chunks: entities = extractor(chunk) for entity in entities: if "ORG" in entity["entity_group"]: #