|
import gradio as gr |
|
from utils import update_db_hub |
|
from preprocessing import read_file, smart_chunking |
|
import datetime |
|
|
|
def process_file(files, topic): |
|
""" |
|
Processes an uploaded file, extracts its text content, and saves it to the database. |
|
|
|
This function performs the following steps: |
|
1. Reads the content of the uploaded file using the `read_file` function. |
|
- Supports `.docx`, `.txt`, and `.pdf` file formats. |
|
2. Splits the extracted text into chunks (if applicable). |
|
3. Saves the processed text and associated topics to the database using the `save_to_db` function. |
|
4. Returns a success message if the file is processed and saved successfully. |
|
|
|
If any error occurs during processing, the function catches the exception and returns an error message. |
|
|
|
Parameters: |
|
---------- |
|
file : object |
|
The uploaded file object. The file's name (`file.name`) is used to determine the file path. |
|
topic : list or str |
|
A list of topics or a single topic string associated with the file. These are saved to the database along with the file content. |
|
|
|
Returns: |
|
------- |
|
str |
|
- A success message indicating that the file was processed and saved successfully. |
|
- An error message if an exception occurs during processing. |
|
|
|
Example: |
|
-------- |
|
>>> process_file(uploaded_file, ["Persian Literature", "History"]) |
|
'File processed successfully! File saved to the database.' |
|
|
|
>>> process_file(unsupported_file, ["Science"]) |
|
'Error processing file: Unsupported file format. Only .docx, .txt, and .pdf are allowed.' |
|
|
|
""" |
|
|
|
|
|
|
|
texts = [] |
|
topics = [] |
|
dates = [] |
|
log_history = [] |
|
for i, file in enumerate(files): |
|
|
|
|
|
try: |
|
|
|
file_path = file.name |
|
text = read_file(file_path) |
|
chucnks = smart_chunking(text) |
|
|
|
for chunk in chucnks: |
|
texts.append(chunk) |
|
topics.append(topic) |
|
dates.append(datetime.datetime.now().isoformat()) |
|
|
|
|
|
|
|
|
|
log_history.append( f"File {file_path} processed successfully! file saved to the database.") |
|
except Exception as e: |
|
log_history.append( f"Error processing for file {file_path}: {str(e)}") |
|
|
|
print("save in db") |
|
update_db_hub(texts, topics, dates) |
|
|
|
|
|
return "\n".join(log_history) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Dataset Upload Interface") |
|
with gr.Row(): |
|
file_input = gr.File(label="Upload File (.docx or .txt or .pdf)", file_count="multiple") |
|
topic_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin") |
|
submit_button = gr.Button("Upload and Process") |
|
output_text = gr.Textbox(label="Status") |
|
|
|
submit_button.click(process_file, inputs=[file_input, topic_input], outputs=output_text) |
|
|
|
|
|
demo.launch() |