Spaces:
Sleeping
Sleeping
File size: 908 Bytes
3af593c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import re
class TextProcessor:
@staticmethod
def clean_text(text: str) -> str:
"""Clean and normalize text content with improved handling"""
if not text:
return ""
# Improved text cleaning
text = re.sub(r'[^\w\s.,;:()\-\'"]', ' ', text)
text = re.sub(r'\s+', ' ', text)
text = text.encode('ascii', 'ignore').decode('ascii') # Better character handling
return text.strip()
@staticmethod
def format_paper(title: str, abstract: str, max_length: int = 1000) -> str:
"""Format paper information with improved structure"""
title = TextProcessor.clean_text(title)
abstract = TextProcessor.clean_text(abstract)
if len(abstract) > max_length:
abstract = abstract[:max_length-3] + "..."
return f"""Title: {title}\nAbstract: {abstract}\n---"""
|