Spaces:
Sleeping
Sleeping
File size: 2,963 Bytes
b59ab77 b856854 b59ab77 033e470 09d42b9 07a4d78 09d42b9 69fb57b 09d42b9 876505c 095e948 69fb57b 095e948 876505c 095e948 033e470 de57b99 033e470 69fb57b 033e470 69fb57b 095e948 876505c 69fb57b b59ab77 033e470 07a4d78 b59ab77 07a4d78 b59ab77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
from database import save_to_db
from preprocessing import read_file
def process_file(files, topic):
"""
Processes an uploaded file, extracts its text content, and saves it to the database.
This function performs the following steps:
1. Reads the content of the uploaded file using the `read_file` function.
- Supports `.docx`, `.txt`, and `.pdf` file formats.
2. Splits the extracted text into chunks (if applicable).
3. Saves the processed text and associated topics to the database using the `save_to_db` function.
4. Returns a success message if the file is processed and saved successfully.
If any error occurs during processing, the function catches the exception and returns an error message.
Parameters:
----------
file : object
The uploaded file object. The file's name (`file.name`) is used to determine the file path.
topic : list or str
A list of topics or a single topic string associated with the file. These are saved to the database along with the file content.
Returns:
-------
str
- A success message indicating that the file was processed and saved successfully.
- An error message if an exception occurs during processing.
Example:
--------
>>> process_file(uploaded_file, ["Persian Literature", "History"])
'File processed successfully! File saved to the database.'
>>> process_file(unsupported_file, ["Science"])
'Error processing file: Unsupported file format. Only .docx, .txt, and .pdf are allowed.'
"""
# progress = gr.Progress()
log_history = [] # To store logs for each file
for i, file in enumerate(files):
# progress(i / len(files), desc=f"Processing file {i + 1}/{len(files)}: {file_path}")
try:
# Read the file content
file_path = file.name
text = read_file(file_path)
print(f"for file {file_path}", text[:1000])
# Spl
# Save chunks to database
save_to_db([text], topic)
log_history.append( f"File {file_path} processed successfully! file saved to the database.")
except Exception as e:
log_history.append( f"Error processing for file {file_path}: {str(e)}")
# progress(1.0, desc="Processing complete!")
return "\n".join(log_history)
# Define Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Dataset Upload Interface")
with gr.Row():
file_input = gr.File(label="Upload File (.docx or .txt or .pdf)", file_count="multiple")
topic_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin")
submit_button = gr.Button("Upload and Process")
output_text = gr.Textbox(label="Status")
submit_button.click(process_file, inputs=[file_input, topic_input], outputs=output_text)
# Launch the app
demo.launch() |