Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,78 +3,72 @@ from bs4 import BeautifulSoup
|
|
| 3 |
import pandas as pd
|
| 4 |
import gradio as gr
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def scrape_kosdaq_data():
|
| 10 |
-
print("๋๋ฒ๊น
: ๋ค์ด๋ฒ ์ฆ๊ถ ํ์ด์ง ์์ฒญ ์์...")
|
| 11 |
-
|
| 12 |
-
# ์์ฒญ ํค๋ ์ถ๊ฐ
|
| 13 |
headers = {
|
| 14 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
| 15 |
}
|
| 16 |
-
|
| 17 |
-
# ํ์ด์ง ์์ฒญ
|
| 18 |
-
response = requests.get(KOSDAQ_URL, headers=headers)
|
| 19 |
-
if response.status_code == 200:
|
| 20 |
-
print("๋๋ฒ๊น
: ๋ค์ด๋ฒ ์ฆ๊ถ ํ์ด์ง ์์ฒญ ์ฑ๊ณต")
|
| 21 |
-
else:
|
| 22 |
-
print(f"๋๋ฒ๊น
: ์์ฒญ ์คํจ, ์ํ ์ฝ๋: {response.status_code}")
|
| 23 |
-
return pd.DataFrame() # ๋น DataFrame ๋ฐํ
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
# ๊ฐ ์ด์ ํ
์คํธ๋ฅผ ์ถ์ถ
|
| 46 |
-
row_data = [col.get_text(strip=True) for col in cols]
|
| 47 |
-
# ๋งํฌ๊ฐ ์๋ ๊ฒฝ์ฐ ์ข
๋ชฉ๋ช
์ ์ถ๊ฐ
|
| 48 |
-
link = row.find("a", class_="tltle")
|
| 49 |
-
if link:
|
| 50 |
-
row_data[1] = link.text.strip()
|
| 51 |
-
data.append(row_data)
|
| 52 |
-
print(f"๋๋ฒ๊น
: ์ถ์ถ๋ ๋ฐ์ดํฐ ํ ์ - {len(data)}")
|
| 53 |
-
|
| 54 |
-
if data:
|
| 55 |
-
print(f"๋๋ฒ๊น
: ์ฒซ ๋ฒ์งธ ๋ฐ์ดํฐ ํ - {data[0]}")
|
| 56 |
-
else:
|
| 57 |
-
print("๋๋ฒ๊น
: ๋ฐ์ดํฐ ์ถ์ถ ์คํจ")
|
| 58 |
-
return pd.DataFrame(columns=headers) # ๋น DataFrame ๋ฐํ
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return df
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
fn=display_kosdaq_info,
|
| 76 |
-
inputs=None,
|
| 77 |
-
outputs="dataframe",
|
| 78 |
-
title="์ฝ์ค๋ฅ ์ข
๋ชฉ ์ ๋ณด ์คํฌ๋ํผ",
|
| 79 |
-
description="๋ค์ด๋ฒ ์ฆ๊ถ ์ฌ์ดํธ์์ ์ฝ์ค๋ฅ ์ข
๋ชฉ ์ ๋ณด๋ฅผ ์คํฌ๋ํํ์ฌ ํ์๏ฟฝ๏ฟฝ๋๋ค."
|
| 80 |
-
).launch()
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import gradio as gr
|
| 5 |
|
| 6 |
+
def scrape_kosdaq():
|
| 7 |
+
url = "https://finance.naver.com/sise/sise_rise.naver?sosok=1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
headers = {
|
| 9 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
|
| 10 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
try:
|
| 13 |
+
# Request the webpage
|
| 14 |
+
response = requests.get(url, headers=headers)
|
| 15 |
+
response.raise_for_status()
|
| 16 |
+
print("[INFO] Page fetched successfully.")
|
| 17 |
+
|
| 18 |
+
# Parse the HTML
|
| 19 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 20 |
+
|
| 21 |
+
# Locate the table
|
| 22 |
+
table = soup.find("table", class_="type_2")
|
| 23 |
+
rows = table.find_all("tr")[2:] # Skip the header rows
|
| 24 |
+
|
| 25 |
+
data = []
|
| 26 |
+
|
| 27 |
+
# Extract data row by row
|
| 28 |
+
for row in rows:
|
| 29 |
+
cols = row.find_all("td")
|
| 30 |
+
if len(cols) < 12: # Skip blank or irrelevant rows
|
| 31 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
entry = {
|
| 34 |
+
"Rank": cols[0].get_text(strip=True),
|
| 35 |
+
"Name": cols[1].get_text(strip=True),
|
| 36 |
+
"Price": cols[2].get_text(strip=True),
|
| 37 |
+
"Change": cols[3].get_text(strip=True),
|
| 38 |
+
"Change_Rate": cols[4].get_text(strip=True),
|
| 39 |
+
"Volume": cols[5].get_text(strip=True),
|
| 40 |
+
"Buy_Price": cols[6].get_text(strip=True),
|
| 41 |
+
"Sell_Price": cols[7].get_text(strip=True),
|
| 42 |
+
"Total_Buy_Quantity": cols[8].get_text(strip=True),
|
| 43 |
+
"Total_Sell_Quantity": cols[9].get_text(strip=True),
|
| 44 |
+
"PER": cols[10].get_text(strip=True),
|
| 45 |
+
"ROE": cols[11].get_text(strip=True),
|
| 46 |
+
}
|
| 47 |
+
data.append(entry)
|
| 48 |
+
|
| 49 |
+
print(f"[DEBUG] Extracted {len(data)} rows.")
|
| 50 |
+
return pd.DataFrame(data)
|
| 51 |
+
|
| 52 |
+
except requests.exceptions.RequestException as e:
|
| 53 |
+
print(f"[ERROR] Failed to fetch page: {e}")
|
| 54 |
+
return pd.DataFrame()
|
| 55 |
+
|
| 56 |
+
def display_data():
|
| 57 |
+
df = scrape_kosdaq()
|
| 58 |
+
if df.empty:
|
| 59 |
+
return "Failed to fetch data or no data available."
|
| 60 |
return df
|
| 61 |
|
| 62 |
+
# Gradio Interface
|
| 63 |
+
def gradio_interface():
|
| 64 |
+
with gr.Blocks() as demo:
|
| 65 |
+
gr.Markdown("### Naver Kosdaq Stock Scraper")
|
| 66 |
+
output = gr.Dataframe()
|
| 67 |
+
fetch_button = gr.Button("Fetch Data")
|
| 68 |
+
|
| 69 |
+
fetch_button.click(display_data, inputs=[], outputs=output)
|
| 70 |
+
|
| 71 |
+
return demo
|
| 72 |
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
gradio_interface().launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|