Spaces:

muryshev
/

generic-chatbot-backend

Runtime error

init

57cf043 9 months ago

1.61 kB

	import logging
	import zipfile
	from pathlib import Path

	logger = logging.getLogger(__name__)

	class DocxToXml:
	def __init__(self, docx_path: str):
	"""
	Initialize the converter with path to DOCX file

	Args:
	docx_path (str): Path to the DOCX file
	"""
	self.docx_path = Path(docx_path)
	if not self.docx_path.exists():
	raise FileNotFoundError(f"File not found: {docx_path}")

	def extract_document_xml(self) -> str:
	"""
	Extract document.xml content from the DOCX file

	Returns:
	str: Content of document.xml file

	Raises:
	ValueError: If document.xml is not found in the DOCX file
	"""
	try:
	with zipfile.ZipFile(self.docx_path) as docx_zip:
	# The main document content is always stored in word/document.xml
	xml_content = docx_zip.read('word/document.xml')
	return xml_content.decode('utf-8')
	except KeyError:
	raise ValueError("document.xml not found in the DOCX file")
	except Exception as e:
	raise Exception(f"Error extracting XML: {str(e)}")

	@staticmethod
	def convert_file(docx_path: str) -> str:
	"""
	Static method to quickly convert a DOCX file to XML

	Args:
	docx_path (str): Path to the DOCX file

	Returns:
	str: Content of document.xml file
	"""
	converter = DocxToXml(docx_path)
	return converter.extract_document_xml()