import gradio as gr from utils import update_db_hub from preprocessing import read_file, smart_chunking import datetime def process_file(files, topic): """ Processes an uploaded file, extracts its text content, and saves it to the database. This function performs the following steps: 1. Reads the content of the uploaded file using the `read_file` function. - Supports `.docx`, `.txt`, and `.pdf` file formats. 2. Splits the extracted text into chunks (if applicable). 3. Saves the processed text and associated topics to the database using the `save_to_db` function. 4. Returns a success message if the file is processed and saved successfully. If any error occurs during processing, the function catches the exception and returns an error message. Parameters: ---------- file : object The uploaded file object. The file's name (`file.name`) is used to determine the file path. topic : list or str A list of topics or a single topic string associated with the file. These are saved to the database along with the file content. Returns: ------- str - A success message indicating that the file was processed and saved successfully. - An error message if an exception occurs during processing. Example: -------- >>> process_file(uploaded_file, ["Persian Literature", "History"]) 'File processed successfully! File saved to the database.' >>> process_file(unsupported_file, ["Science"]) 'Error processing file: Unsupported file format. Only .docx, .txt, and .pdf are allowed.' """ # progress = gr.Progress() texts = [] topics = [] dates = [] log_history = [] # To store logs for each file for i, file in enumerate(files): # progress(i / len(files), desc=f"Processing file {i + 1}/{len(files)}: {file_path}") try: # Read the file content file_path = file.name text = read_file(file_path) chucnks = smart_chunking(text) # print(f"for file {file_path}", text[:1000]) for chunk in chucnks: texts.append(chunk) topics.append(topic) dates.append(datetime.datetime.now().isoformat()) # Spl # Save chunks to database log_history.append( f"File {file_path} processed successfully! file saved to the database.") except Exception as e: log_history.append( f"Error processing for file {file_path}: {str(e)}") print("save in db") update_db_hub(texts, topics, dates) # print('saved') # progress(1.0, desc="Processing complete!") return "\n".join(log_history) # Define Gradio interface with gr.Blocks() as demo: gr.Markdown("# Dataset Upload Interface") with gr.Row(): file_input = gr.File(label="Upload File (.docx or .txt or .pdf)", file_count="multiple") topic_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin") submit_button = gr.Button("Upload and Process") output_text = gr.Textbox(label="Status") submit_button.click(process_file, inputs=[file_input, topic_input], outputs=output_text) # Launch the app demo.launch()