Spaces:
Sleeping
Sleeping
| import re | |
| class TextProcessor: | |
| def clean_text(text: str) -> str: | |
| """Clean and normalize text content with improved handling""" | |
| if not text: | |
| return "" | |
| # Improved text cleaning | |
| text = re.sub(r'[^\w\s.,;:()\-\'"]', ' ', text) | |
| text = re.sub(r'\s+', ' ', text) | |
| text = text.encode('ascii', 'ignore').decode('ascii') # Better character handling | |
| return text.strip() | |
| def format_paper(title: str, abstract: str, max_length: int = 1000) -> str: | |
| """Format paper information with improved structure""" | |
| title = TextProcessor.clean_text(title) | |
| abstract = TextProcessor.clean_text(abstract) | |
| if len(abstract) > max_length: | |
| abstract = abstract[:max_length-3] + "..." | |
| return f"""Title: {title}\nAbstract: {abstract}\n---""" | |