Spaces:
Sleeping
Sleeping
Update extract.py
Browse files- extract.py +63 -1
extract.py
CHANGED
@@ -3,7 +3,9 @@ from selenium.common.exceptions import WebDriverException
|
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
|
6 |
-
import time
|
|
|
|
|
7 |
|
8 |
from selenium.webdriver.common.by import By
|
9 |
from selenium.webdriver.support.ui import WebDriverWait
|
@@ -121,6 +123,66 @@ def scrape_vehicle(driver):
|
|
121 |
return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd
|
122 |
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
def get_vehicle_info(driver, plate_number: str):
|
125 |
# options = webdriver.ChromeOptions()
|
126 |
# options.add_argument("--headless")
|
|
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
|
6 |
+
import time, requests
|
7 |
+
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
|
10 |
from selenium.webdriver.common.by import By
|
11 |
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
123 |
return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd
|
124 |
|
125 |
|
126 |
+
def get_vehicle_info_bs4(nopol: str):
|
127 |
+
content = requests.get("https://www.jambisamsat.net/infopkb.php", params={"no_polisi":"BH 1337 NN"}).content
|
128 |
+
soup = BeautifulSoup(content, "html.parser")
|
129 |
+
|
130 |
+
# === 1. Data Kendaraan ===
|
131 |
+
data_kendaraan = {}
|
132 |
+
table = soup.find("table")
|
133 |
+
if table:
|
134 |
+
for row in table.find_all("tr"):
|
135 |
+
cols = row.find_all("td")
|
136 |
+
if len(cols) >= 3:
|
137 |
+
key = cols[0].get_text(strip=True).lower().replace(".", "").replace(" ", "_")
|
138 |
+
val = cols[2].get_text(strip=True)
|
139 |
+
data_kendaraan[key] = val
|
140 |
+
|
141 |
+
# === 2. Total Tagihan (div.row outside det_pkb and det_swd) ===
|
142 |
+
all_rows = soup.find_all("div", class_="row")
|
143 |
+
total_tagihan = []
|
144 |
+
for row in all_rows:
|
145 |
+
if row.find_parent(id="det_pkb") or row.find_parent(id="det_swd"):
|
146 |
+
continue # skip rows inside det_pkb or det_swd
|
147 |
+
ps = row.find_all("p")
|
148 |
+
if len(ps) >= 3 and all(keyword in ps[i].text for i, keyword in enumerate(["Pokok", "Denda", "Total"])):
|
149 |
+
total_tagihan.append({
|
150 |
+
"pokok": ps[0].get_text(strip=True),
|
151 |
+
"denda": ps[1].get_text(strip=True),
|
152 |
+
"total": ps[2].get_text(strip=True)
|
153 |
+
})
|
154 |
+
|
155 |
+
# === 3. Rincian PKB ===
|
156 |
+
rincians_pkb = []
|
157 |
+
pkb_div = soup.find("div", id="det_pkb")
|
158 |
+
if pkb_div:
|
159 |
+
rows = pkb_div.find_all("div", class_="row")[1:] # skip header
|
160 |
+
for row in rows:
|
161 |
+
cols = row.find_all("p")
|
162 |
+
if len(cols) >= 3:
|
163 |
+
rincians_pkb.append({
|
164 |
+
"pokok": cols[0].get_text(strip=True),
|
165 |
+
"denda": cols[1].get_text(strip=True),
|
166 |
+
"total": cols[2].get_text(strip=True)
|
167 |
+
})
|
168 |
+
|
169 |
+
# === 4. Rincian SWDKLLJ ===
|
170 |
+
rincians_swd = []
|
171 |
+
swd_div = soup.find("div", id="det_swd")
|
172 |
+
if swd_div:
|
173 |
+
rows = swd_div.find_all("div", class_="row")[1:] # skip header
|
174 |
+
for row in rows:
|
175 |
+
cols = row.find_all("p")
|
176 |
+
if len(cols) >= 3:
|
177 |
+
rincians_swd.append({
|
178 |
+
"pokok": cols[0].get_text(strip=True),
|
179 |
+
"denda": cols[1].get_text(strip=True),
|
180 |
+
"total": cols[2].get_text(strip=True)
|
181 |
+
})
|
182 |
+
return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd
|
183 |
+
|
184 |
+
|
185 |
+
|
186 |
def get_vehicle_info(driver, plate_number: str):
|
187 |
# options = webdriver.ChromeOptions()
|
188 |
# options.add_argument("--headless")
|