Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
def extract_pdf_links(url): | |
# URL ์ ํจ์ฑ ๊ฒ์ฌ ์ถ๊ฐ | |
if not re.match(r'http[s]?://', url): | |
return ["Invalid URL"] | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
pdf_links = [] | |
for link in soup.find_all('a', href=True): | |
if re.search(r'\.pdf', link['href']): | |
pdf_links.append(link['href']) | |
return pdf_links[:100] | |
def filter_links_by_keyword(pdf_links, keyword): | |
filtered_links = [link for link in pdf_links if keyword.lower() in link.lower()] | |
return filtered_links | |
def generate_html(pdf_links): | |
html = "" | |
for link in pdf_links: | |
html += f'<a href="{link}" target="_blank" download>{link}</a><br/>' | |
return html | |
def main(url, keyword): | |
pdf_links = extract_pdf_links(url) | |
if keyword: # ํค์๋๊ฐ ๋น์ด์์ง ์์ ๊ฒฝ์ฐ์๋ง ํํฐ๋ง | |
pdf_links = filter_links_by_keyword(pdf_links, keyword) | |
return generate_html(pdf_links) | |
title = "๋ค์ด๋ฒ ์ฆ๊ถ ๋ฆฌ์์น ๋งํฌ - https://finance.naver.com/research/company_list.naver" | |
iface = gr.Interface(main, | |
inputs=["text", "text"], # URL๊ณผ ํค์๋ ์ ๋ ฅ | |
outputs="text", | |
title=title) | |
iface.launch() | |