Spaces:
Sleeping
Sleeping
import re | |
class TextProcessor: | |
def clean_text(text: str) -> str: | |
"""Clean and normalize text content with improved handling""" | |
if not text: | |
return "" | |
# Improved text cleaning | |
text = re.sub(r'[^\w\s.,;:()\-\'"]', ' ', text) | |
text = re.sub(r'\s+', ' ', text) | |
text = text.encode('ascii', 'ignore').decode('ascii') # Better character handling | |
return text.strip() | |
def format_paper(title: str, abstract: str, max_length: int = 1000) -> str: | |
"""Format paper information with improved structure""" | |
title = TextProcessor.clean_text(title) | |
abstract = TextProcessor.clean_text(abstract) | |
if len(abstract) > max_length: | |
abstract = abstract[:max_length-3] + "..." | |
return f"""Title: {title}\nAbstract: {abstract}\n---""" | |