import gradio as gr import requests from bs4 import BeautifulSoup import re def fetch_pdf_links_and_titles(): try: url = "https://finance.naver.com/research/company_list.naver" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') seen_urls = set() links_html = "
" pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$")) for link in pdf_links: title = link.text.strip() full_url = link['href'] if full_url not in seen_urls: seen_urls.add(full_url) # HTML 문자열로 링크를 추가하며 타이틀과 URL을 포함 links_html += f"

{title}

" links_html += "
" return links_html if links_html else "No PDF links found." except Exception as e: return f"An error occurred: {str(e)}" # Gradio 인터페이스 with gr.Blocks() as app: btn_fetch = gr.Button("PDF 링크 및 정보 조회") output_html = gr.HTML() btn_fetch.click( fn=fetch_pdf_links_and_titles, outputs=output_html ) app.launch()