Spaces:
Sleeping
Sleeping
import os | |
import sqlite3 | |
from docx import Document | |
# Initialize tokenizer | |
def read_file(file_path): | |
"""Read text from Word or Text files.""" | |
if file_path.endswith('.docx'): | |
doc = Document(file_path) | |
return "\n".join([para.text for para in doc.paragraphs]) | |
elif file_path.endswith('.txt'): | |
with open(file_path, 'r', encoding='utf-8') as f: | |
return f.read() | |
else: | |
raise ValueError("Unsupported file format. Only .docx and .txt are allowed.") | |