File size: 2,963 Bytes
b59ab77
b856854
 
b59ab77
033e470
09d42b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07a4d78
09d42b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69fb57b
09d42b9
876505c
095e948
69fb57b
095e948
876505c
095e948
033e470
 
 
 
de57b99
033e470
 
 
 
 
69fb57b
033e470
69fb57b
095e948
 
876505c
69fb57b
b59ab77
 
 
 
 
033e470
07a4d78
b59ab77
 
 
07a4d78
b59ab77
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from database import save_to_db
from preprocessing import read_file

def process_file(files, topic):
    """
    Processes an uploaded file, extracts its text content, and saves it to the database.

    This function performs the following steps:
    1. Reads the content of the uploaded file using the `read_file` function.
       - Supports `.docx`, `.txt`, and `.pdf` file formats.
    2. Splits the extracted text into chunks (if applicable).
    3. Saves the processed text and associated topics to the database using the `save_to_db` function.
    4. Returns a success message if the file is processed and saved successfully.

    If any error occurs during processing, the function catches the exception and returns an error message.

    Parameters:
    ----------
    file : object
        The uploaded file object. The file's name (`file.name`) is used to determine the file path.
    topic : list or str
        A list of topics or a single topic string associated with the file. These are saved to the database along with the file content.

    Returns:
    -------
    str
        - A success message indicating that the file was processed and saved successfully.
        - An error message if an exception occurs during processing.

    Example:
    --------
    >>> process_file(uploaded_file, ["Persian Literature", "History"])
    'File processed successfully! File saved to the database.'

    >>> process_file(unsupported_file, ["Science"])
    'Error processing file: Unsupported file format. Only .docx, .txt, and .pdf are allowed.'
    
    """
    # progress = gr.Progress()
    
    log_history = []  # To store logs for each file
    for i, file in enumerate(files):
        # progress(i / len(files), desc=f"Processing file {i + 1}/{len(files)}: {file_path}")

        try:
            # Read the file content
            file_path = file.name
            text = read_file(file_path)
            print(f"for file {file_path}", text[:1000])
    
            # Spl
            # Save chunks to database
            save_to_db([text], topic)
    
            log_history.append( f"File {file_path} processed successfully! file saved to the database.")
        except Exception as e:
            log_history.append( f"Error processing for file {file_path}: {str(e)}")

        
    # progress(1.0, desc="Processing complete!")
    return "\n".join(log_history)

# Define Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Dataset Upload Interface")
    with gr.Row():
        file_input = gr.File(label="Upload File (.docx or .txt or .pdf)",  file_count="multiple")
        topic_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin")
    submit_button = gr.Button("Upload and Process")
    output_text = gr.Textbox(label="Status")

    submit_button.click(process_file, inputs=[file_input, topic_input], outputs=output_text)

# Launch the app
demo.launch()