Spaces:
Sleeping
Sleeping
File size: 1,344 Bytes
738953f 4012bf8 43a0009 4012bf8 dc39e39 4012bf8 405a8de 7e27e95 dc39e39 4012bf8 dfb729a 4012bf8 0750144 dc39e39 ae656a9 dc39e39 43a0009 dc39e39 75fb651 0290677 75fb651 dc39e39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
import re
import requests
from bs4 import BeautifulSoup
def extract_pdf_links(url):
# URL ์ ํจ์ฑ ๊ฒ์ฌ ์ถ๊ฐ
if not re.match(r'http[s]?://', url):
return ["Invalid URL"]
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
pdf_links = []
for link in soup.find_all('a', href=True):
if re.search(r'\.pdf', link['href']):
pdf_links.append(link['href'])
return pdf_links[:100]
def filter_links_by_keyword(pdf_links, keyword):
filtered_links = [link for link in pdf_links if keyword.lower() in link.lower()]
return filtered_links
def generate_html(pdf_links):
html = ""
for link in pdf_links:
html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'
return html
def main(url, keyword):
pdf_links = extract_pdf_links(url)
if keyword: # ํค์๋๊ฐ ๋น์ด์์ง ์์ ๊ฒฝ์ฐ์๋ง ํํฐ๋ง
pdf_links = filter_links_by_keyword(pdf_links, keyword)
return generate_html(pdf_links)
title = "๋ค์ด๋ฒ ์ฆ๊ถ ๋ฆฌ์์น ๋งํฌ - https://finance.naver.com/research/company_list.naver"
iface = gr.Interface(main,
inputs=["text", "text"], # URL๊ณผ ํค์๋ ์
๋ ฅ
outputs="text",
title=title)
iface.launch()
|