File size: 1,186 Bytes
738953f
4012bf8
43a0009
 
 
b1a3ea2
4012bf8
 
 
b1a3ea2
 
 
4012bf8
 
 
 
 
b1a3ea2
 
7e27e95
b1a3ea2
 
 
dc39e39
b1a3ea2
4012bf8
dfb729a
0750144
b1a3ea2
ae656a9
b1a3ea2
43a0009
b1a3ea2
 
 
0290677
75fb651
dc39e39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import re
import requests
from bs4 import BeautifulSoup

def extract_pdf_links_and_title(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 페이지 제목 추출
    page_title = soup.title.text if soup.title else "No title found"

    pdf_links = []
    for link in soup.find_all('a', href=True):
        if re.search(r'\.pdf', link['href']):
            pdf_links.append(link['href'])

    # PDF 링크와 페이지 제목을 반환
    return pdf_links[:100], page_title

def generate_html(pdf_links_and_title):
    pdf_links = pdf_links_and_title[0]  # PDF 링크 리스트
    page_title = pdf_links_and_title[1]  # 페이지 제목

    html = f"<h1>{page_title}</h1>"  # 제목을 HTML에 추가
    for link in pdf_links:
        html += f'<a href="{link}" target="_blank" download>{link}</a><br/>'

    return html

title = "네이버 증권 리서치 링크-  https://finance.naver.com/research/company_list.naver"

iface = gr.Interface(fn=extract_pdf_links_and_title, 
                     inputs="text", 
                     outputs=["text", "html"],
                     title=title)

iface.launch()