Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
def extract_pdf_links_and_title(url): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# ํ์ด์ง ์ ๋ชฉ ์ถ์ถ | |
page_title = soup.title.text if soup.title else "No title found" | |
pdf_links = [] | |
for link in soup.find_all('a', href=True): | |
if re.search(r'\.pdf', link['href']): | |
pdf_links.append(link['href']) | |
# PDF ๋งํฌ์ ํ์ด์ง ์ ๋ชฉ์ ๋ฐํ | |
return pdf_links[:100], page_title | |
def generate_html(pdf_links_and_title): | |
pdf_links = pdf_links_and_title[0] # PDF ๋งํฌ ๋ฆฌ์คํธ | |
page_title = pdf_links_and_title[1] # ํ์ด์ง ์ ๋ชฉ | |
html = f"<h1>{page_title}</h1>" # ์ ๋ชฉ์ HTML์ ์ถ๊ฐ | |
for link in pdf_links: | |
html += f'<a href="{link}" target="_blank" download>{link}</a><br/>' | |
return html | |
title = "๋ค์ด๋ฒ ์ฆ๊ถ ๋ฆฌ์์น ๋งํฌ- https://finance.naver.com/research/company_list.naver" | |
iface = gr.Interface(fn=extract_pdf_links_and_title, | |
inputs="text", | |
outputs=["text", "html"], | |
title=title) | |
iface.launch() | |