Spaces:
Sleeping
Sleeping
File size: 1,513 Bytes
18160a7 ce57c5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import time
import requests
import pandas as pd
import streamlit as st
from utils.basic_crawl_gg_scholar import scrape_gg_scholar
# from utils.retrieve_doi_by_name import get_doi_by_title
# from utils.get_abstract_by_doi import get_abstract_by_doi
st.set_page_config(page_title="GG Scholar Crawler :v", page_icon=":book:", layout="centered")
st.title("Google Scholar Crawler :book:")
col_1, col_2, col_3, col_4 = st.columns(spec=[5, 1, 1, 1])
keyword = col_1.text_input("Keyword to search:", key="keyword", placeholder="Enter keyword to search...", label_visibility="hidden")
num_pages = col_2.number_input("Pages:", key="pages", placeholder="Number of pages:...", min_value=1, max_value=9999999, value=1, step=1)
start_year = col_3.number_input("Start:", min_value=1900, max_value=2025, value=2020, key="start_year")
end_year = col_4.number_input("End:", min_value=1900, max_value=2025, value=2025, key="end_year")
is_start = st.button("Crawl!", key="crawl_button")
if is_start:
with st.spinner("Crawling basic info..."):
basic_crawled_data = scrape_gg_scholar(query=keyword, num_pages=num_pages, start_year=start_year, end_year=end_year)
st.dataframe(basic_crawled_data, use_container_width=True)
st.success("Crawled basic info successfully!")
# with st.spinner("Retrieving DOI..."):
# doi_crawled_data = get_doi_by_title(basic_crawled_data)
# st.dataframe(doi_crawled_data, use_container_width=True)
# st.success("Retrieved DOI successfully!") |