Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,39 @@ from database import save_to_db
|
|
| 3 |
from preprocessing import read_file
|
| 4 |
|
| 5 |
def process_file(file, topics):
|
| 6 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
try:
|
| 8 |
# Read the file content
|
| 9 |
file_path = file.name
|
|
@@ -21,7 +53,7 @@ def process_file(file, topics):
|
|
| 21 |
with gr.Blocks() as demo:
|
| 22 |
gr.Markdown("# Dataset Upload Interface")
|
| 23 |
with gr.Row():
|
| 24 |
-
file_input = gr.File(label="Upload File (.docx or .txt)")
|
| 25 |
topics_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin")
|
| 26 |
submit_button = gr.Button("Upload and Process")
|
| 27 |
output_text = gr.Textbox(label="Status")
|
|
|
|
| 3 |
from preprocessing import read_file
|
| 4 |
|
| 5 |
def process_file(file, topics):
|
| 6 |
+
"""
|
| 7 |
+
Processes an uploaded file, extracts its text content, and saves it to the database.
|
| 8 |
+
|
| 9 |
+
This function performs the following steps:
|
| 10 |
+
1. Reads the content of the uploaded file using the `read_file` function.
|
| 11 |
+
- Supports `.docx`, `.txt`, and `.pdf` file formats.
|
| 12 |
+
2. Splits the extracted text into chunks (if applicable).
|
| 13 |
+
3. Saves the processed text and associated topics to the database using the `save_to_db` function.
|
| 14 |
+
4. Returns a success message if the file is processed and saved successfully.
|
| 15 |
+
|
| 16 |
+
If any error occurs during processing, the function catches the exception and returns an error message.
|
| 17 |
+
|
| 18 |
+
Parameters:
|
| 19 |
+
----------
|
| 20 |
+
file : object
|
| 21 |
+
The uploaded file object. The file's name (`file.name`) is used to determine the file path.
|
| 22 |
+
topics : list or str
|
| 23 |
+
A list of topics or a single topic string associated with the file. These are saved to the database along with the file content.
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
-------
|
| 27 |
+
str
|
| 28 |
+
- A success message indicating that the file was processed and saved successfully.
|
| 29 |
+
- An error message if an exception occurs during processing.
|
| 30 |
+
|
| 31 |
+
Example:
|
| 32 |
+
--------
|
| 33 |
+
>>> process_file(uploaded_file, ["Persian Literature", "History"])
|
| 34 |
+
'File processed successfully! File saved to the database.'
|
| 35 |
+
|
| 36 |
+
>>> process_file(unsupported_file, ["Science"])
|
| 37 |
+
'Error processing file: Unsupported file format. Only .docx, .txt, and .pdf are allowed.'
|
| 38 |
+
"""
|
| 39 |
try:
|
| 40 |
# Read the file content
|
| 41 |
file_path = file.name
|
|
|
|
| 53 |
with gr.Blocks() as demo:
|
| 54 |
gr.Markdown("# Dataset Upload Interface")
|
| 55 |
with gr.Row():
|
| 56 |
+
file_input = gr.File(label="Upload File (.docx or .txt or .pdf)")
|
| 57 |
topics_input = gr.Textbox(label="Topics (comma-separated)", placeholder="e.g., science, technology, law, medicin")
|
| 58 |
submit_button = gr.Button("Upload and Process")
|
| 59 |
output_text = gr.Textbox(label="Status")
|