File size: 1,344 Bytes
738953f
4012bf8
43a0009
 
 
4012bf8
dc39e39
 
 
 
4012bf8
 
 
 
 
 
 
 
405a8de
7e27e95
dc39e39
 
 
 
4012bf8
 
 
dfb729a
4012bf8
0750144
dc39e39
 
 
 
 
ae656a9
dc39e39
43a0009
dc39e39
 
75fb651
0290677
75fb651
dc39e39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import re
import requests
from bs4 import BeautifulSoup

def extract_pdf_links(url):
    # URL ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ ์ถ”๊ฐ€
    if not re.match(r'http[s]?://', url):
        return ["Invalid URL"]
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    pdf_links = []
    for link in soup.find_all('a', href=True):
        if re.search(r'\.pdf', link['href']):
            pdf_links.append(link['href'])

    return pdf_links[:100]

def filter_links_by_keyword(pdf_links, keyword):
    filtered_links = [link for link in pdf_links if keyword.lower() in link.lower()]
    return filtered_links

def generate_html(pdf_links):
    html = ""
    for link in pdf_links:
        html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'
    return html

def main(url, keyword):
    pdf_links = extract_pdf_links(url)
    if keyword:  # ํ‚ค์›Œ๋“œ๊ฐ€ ๋น„์–ด์žˆ์ง€ ์•Š์€ ๊ฒฝ์šฐ์—๋งŒ ํ•„ํ„ฐ๋ง
        pdf_links = filter_links_by_keyword(pdf_links, keyword)
    return generate_html(pdf_links)

title = "๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ๋ฆฌ์„œ์น˜ ๋งํฌ - https://finance.naver.com/research/company_list.naver"

iface = gr.Interface(main, 
                     inputs=["text", "text"],  # URL๊ณผ ํ‚ค์›Œ๋“œ ์ž…๋ ฅ
                     outputs="text",
                     title=title)

iface.launch()