Update app.py
Browse files
app.py
CHANGED
@@ -125,19 +125,18 @@ def extract_full_paper_with_labels(pdf_path, progress=None):
|
|
125 |
def process_pdf_file(pdf_file, api_key, repo_address):
|
126 |
if pdf_file is None:
|
127 |
return None, "No PDF file uploaded."
|
128 |
-
|
129 |
-
#
|
130 |
file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file['name']
|
131 |
result = extract_full_paper_with_labels(file_path)
|
132 |
|
133 |
-
# Convert the result dictionary
|
134 |
df = pd.DataFrame([result])
|
135 |
base = os.path.splitext(result['filename'])[0]
|
136 |
parquet_filename = f"{base}.parquet"
|
137 |
df.to_parquet(parquet_filename, index=False)
|
138 |
|
139 |
repo_status = ""
|
140 |
-
# If API key and repo address are provided, attempt to upload the parquet file.
|
141 |
if api_key and repo_address:
|
142 |
api = HfApi()
|
143 |
try:
|
@@ -153,10 +152,9 @@ def process_pdf_file(pdf_file, api_key, repo_address):
|
|
153 |
else:
|
154 |
repo_status = "API key or repo address not provided, skipping repo upload."
|
155 |
|
156 |
-
# Return the parquet file for local download and the status message.
|
157 |
return parquet_filename, repo_status
|
158 |
|
159 |
-
#
|
160 |
def clear_files():
|
161 |
return None, None, ""
|
162 |
|
@@ -178,7 +176,6 @@ with gr.Blocks() as demo:
|
|
178 |
inputs=[pdf_file_input, api_key_input, repo_address_input],
|
179 |
outputs=[download_file_output, repo_status_output]
|
180 |
)
|
181 |
-
# The clear button now only clears file-related components; API key and Repo Address remain unchanged.
|
182 |
clear_button.click(
|
183 |
clear_files,
|
184 |
inputs=None,
|
@@ -186,3 +183,4 @@ with gr.Blocks() as demo:
|
|
186 |
)
|
187 |
|
188 |
demo.launch()
|
|
|
|
125 |
def process_pdf_file(pdf_file, api_key, repo_address):
|
126 |
if pdf_file is None:
|
127 |
return None, "No PDF file uploaded."
|
128 |
+
|
129 |
+
# Determine file path (Gradio returns a file object or dict)
|
130 |
file_path = pdf_file.name if hasattr(pdf_file, "name") else pdf_file['name']
|
131 |
result = extract_full_paper_with_labels(file_path)
|
132 |
|
133 |
+
# Convert the result dictionary to a DataFrame and write it as a parquet file.
|
134 |
df = pd.DataFrame([result])
|
135 |
base = os.path.splitext(result['filename'])[0]
|
136 |
parquet_filename = f"{base}.parquet"
|
137 |
df.to_parquet(parquet_filename, index=False)
|
138 |
|
139 |
repo_status = ""
|
|
|
140 |
if api_key and repo_address:
|
141 |
api = HfApi()
|
142 |
try:
|
|
|
152 |
else:
|
153 |
repo_status = "API key or repo address not provided, skipping repo upload."
|
154 |
|
|
|
155 |
return parquet_filename, repo_status
|
156 |
|
157 |
+
# Clear only file-related inputs/outputs, preserving API key and repo address.
|
158 |
def clear_files():
|
159 |
return None, None, ""
|
160 |
|
|
|
176 |
inputs=[pdf_file_input, api_key_input, repo_address_input],
|
177 |
outputs=[download_file_output, repo_status_output]
|
178 |
)
|
|
|
179 |
clear_button.click(
|
180 |
clear_files,
|
181 |
inputs=None,
|
|
|
183 |
)
|
184 |
|
185 |
demo.launch()
|
186 |
+
|