Spaces:
Runtime error
Runtime error
| # import imp | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import time | |
| # import matplotlib.pyplot as plt | |
| # import seaborn as sns | |
| # import plotly.figure_factory as ff | |
| import altair as alt | |
| # from PIL import Image | |
| import base64 | |
| import tarfile | |
| import os | |
| import requests | |
| from backend import * | |
| predefined_limits = 10000 | |
| st.set_page_config(page_title="arXiv2Latex Downloader", page_icon=":page_with_curl:", layout="wide", initial_sidebar_state="expanded", menu_items={ | |
| "About": "Download the source latex code of multiple arXiv paper with one click" | |
| }) | |
| # title | |
| st.title("arXiv2Latex Downloader") | |
| # input arxiv links to download | |
| pdf_links_input = st.text_area("Please input the paper links you want to download following the format (Currently supports up to 10 links).", "") | |
| st.markdown(""" | |
| Input example: | |
| ```Plain Text | |
| https://arxiv.org/abs/1512.03385 | |
| https://arxiv.org/abs/1706.03762 | |
| https://arxiv.org/abs/2009.09724 | |
| """) | |
| ## one click download | |
| crawling_or_not = st.button("Crawling the latex Code") | |
| if crawling_or_not: | |
| print("Crawling...") | |
| pdf_lists = pdf_links_input.split("\n") | |
| print(pdf_lists) | |
| # cleaning the pdf lists | |
| pdf_lists = [i.strip() for i in pdf_lists if len(i) > 0] | |
| # TODO: limit the number of paper up to 10 since I am not sure that whether base64 support large file download | |
| # try: | |
| if len(pdf_lists) > predefined_limits: | |
| st.warning(f"Currently only support up to {predefined_limits} papers. Please input less than {predefined_limits} papers.") | |
| else: | |
| # parsing | |
| base='./download/' | |
| project_name = get_timestamp().replace(" ","-") | |
| base = os.path.join(base,project_name) | |
| make_dir_if_not_exist(base) | |
| # st.write(download_status) | |
| with st.spinner("Downloading papers..."): | |
| # progress bar | |
| bar = st.progress(0) | |
| download_status = st.empty() | |
| N = len(pdf_lists) | |
| for i, pdf_link in tqdm(enumerate(pdf_lists)): | |
| title = get_name_from_arvix(pdf_link) | |
| file_stamp = pdf_link.split("/")[-1] | |
| source_link = "https://arxiv.org/e-print/"+file_stamp | |
| inp = os.path.join(base,'input') | |
| make_dir_if_not_exist(inp) | |
| out = os.path.join(base,'output') | |
| make_dir_if_not_exist(out) | |
| response = requests.get(source_link) | |
| filename = file_stamp+".tar.gz" | |
| filepath = os.path.join(inp,filename) | |
| open(filepath, "wb").write(response.content) | |
| outpath = os.path.join(out,title) | |
| untar(filepath,outpath) | |
| # finish one paper | |
| bar.progress((i+1)/N) | |
| download_status.text(f"Iteration [{i+1}/{N}]: Finish Downloading of "+title) | |
| with st.spinner("Archiving as Zip Files..."): | |
| # save it as zip file | |
| filepath = archive_dir(out,os.path.join(base,project_name)) | |
| # download | |
| b64 = ToBase64(filepath).decode() | |
| href = f"<a href='data:file/csv;base64,{b64}' download='arxiv2latex-output-{datetime.datetime.now()}.zip' color='red'>Click here to Download the Output Latex Zip Files</a>" | |
| st.markdown(href, unsafe_allow_html=True) | |
| # ็ถๆ | |
| st.success("Finished") | |
| # except Exception as e: | |
| # st.error("Something goes wrong. Please check the input or concat me to fix this bug. Error message: \n"+str(e)) | |