Spaces:
Running
on
Zero
Running
on
Zero
added more timings
Browse files
app.py
CHANGED
@@ -254,21 +254,21 @@ def predict(text_input, sample_size_slider, reduce_sample_checkbox, sample_reduc
|
|
254 |
stacked_df['parsed_field'] = [get_field(row) for ix, row in stacked_df.iterrows()]
|
255 |
extra_data = pd.DataFrame(stacked_df['doi'])
|
256 |
print(f"Visualization data prepared in {time.time() - viz_prep_start:.2f} seconds")
|
257 |
-
|
258 |
if citation_graph_checkbox:
|
|
|
259 |
citation_graph = create_citation_graph(records_df)
|
260 |
graph_file_name = f"{filename}_citation_graph.jpg"
|
261 |
graph_file_path = static_dir / graph_file_name
|
262 |
draw_citation_graph(citation_graph,path=graph_file_path,bundle_edges=True,
|
263 |
min_max_coordinates=[np.min(stacked_df['x']),np.max(stacked_df['x']),np.min(stacked_df['y']),np.max(stacked_df['y'])])
|
264 |
-
|
265 |
|
266 |
|
267 |
|
268 |
|
269 |
# Create and save plot
|
270 |
plot_start = time.time()
|
271 |
-
progress(0.7, desc="Creating plot...")
|
272 |
# Create a solid black colormap
|
273 |
black_cmap = mcolors.LinearSegmentedColormap.from_list('black', ['#000000', '#000000'])
|
274 |
|
@@ -337,7 +337,7 @@ def predict(text_input, sample_size_slider, reduce_sample_checkbox, sample_reduc
|
|
337 |
|
338 |
# Get the 30 most common labels
|
339 |
unique_labels, counts = np.unique(combined_labels, return_counts=True)
|
340 |
-
top_30_labels = set(unique_labels[np.argsort(counts)[-
|
341 |
|
342 |
# Replace less common labels with 'Unlabelled'
|
343 |
combined_labels = np.array(['Unlabelled' if label not in top_30_labels else label for label in combined_labels])
|
|
|
254 |
stacked_df['parsed_field'] = [get_field(row) for ix, row in stacked_df.iterrows()]
|
255 |
extra_data = pd.DataFrame(stacked_df['doi'])
|
256 |
print(f"Visualization data prepared in {time.time() - viz_prep_start:.2f} seconds")
|
|
|
257 |
if citation_graph_checkbox:
|
258 |
+
citation_graph_start = time.time()
|
259 |
citation_graph = create_citation_graph(records_df)
|
260 |
graph_file_name = f"{filename}_citation_graph.jpg"
|
261 |
graph_file_path = static_dir / graph_file_name
|
262 |
draw_citation_graph(citation_graph,path=graph_file_path,bundle_edges=True,
|
263 |
min_max_coordinates=[np.min(stacked_df['x']),np.max(stacked_df['x']),np.min(stacked_df['y']),np.max(stacked_df['y'])])
|
264 |
+
print(f"Citation graph created and saved in {time.time() - citation_graph_start:.2f} seconds")
|
265 |
|
266 |
|
267 |
|
268 |
|
269 |
# Create and save plot
|
270 |
plot_start = time.time()
|
271 |
+
progress(0.7, desc="Creating interactive plot...")
|
272 |
# Create a solid black colormap
|
273 |
black_cmap = mcolors.LinearSegmentedColormap.from_list('black', ['#000000', '#000000'])
|
274 |
|
|
|
337 |
|
338 |
# Get the 30 most common labels
|
339 |
unique_labels, counts = np.unique(combined_labels, return_counts=True)
|
340 |
+
top_30_labels = set(unique_labels[np.argsort(counts)[-50:]])
|
341 |
|
342 |
# Replace less common labels with 'Unlabelled'
|
343 |
combined_labels = np.array(['Unlabelled' if label not in top_30_labels else label for label in combined_labels])
|