Spaces:
Sleeping
Sleeping
File size: 502 Bytes
beccb39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import os
import sqlite3
from docx import Document
# Initialize tokenizer
def read_file(file_path):
"""Read text from Word or Text files."""
if file_path.endswith('.docx'):
doc = Document(file_path)
return "\n".join([para.text for para in doc.paragraphs])
elif file_path.endswith('.txt'):
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
else:
raise ValueError("Unsupported file format. Only .docx and .txt are allowed.")
|