|
from selenium import webdriver |
|
from selenium.common.exceptions import WebDriverException |
|
from PIL import Image |
|
from io import BytesIO |
|
|
|
import time |
|
|
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from selenium.webdriver.support import expected_conditions as EC |
|
|
|
def take_webdata(url): |
|
options = webdriver.ChromeOptions() |
|
options.add_argument('--headless') |
|
options.add_argument('--no-sandbox') |
|
options.add_argument('--disable-dev-shm-usage') |
|
|
|
try: |
|
wd = webdriver.Chrome(options=options) |
|
wd.set_window_size(1080, 720) |
|
wd.get(url) |
|
wd.implicitly_wait(5) |
|
|
|
page_title = wd.title |
|
screenshot = wd.get_screenshot_as_png() |
|
|
|
except WebDriverException as e: |
|
return Image.new('RGB', (1, 1)), page_title |
|
finally: |
|
if wd: |
|
wd.quit() |
|
|
|
return Image.open(BytesIO(screenshot)) , page_title |
|
|
|
|
|
def scrape_vehicle(driver): |
|
data_kendaraan = {} |
|
try: |
|
rows = driver.find_elements(By.CSS_SELECTOR, "table tr") |
|
for row in rows: |
|
cols = row.find_elements(By.TAG_NAME, "td") |
|
if len(cols) >= 3: |
|
key = cols[0].text.strip().lower().replace(".", "").replace(" ", "_") |
|
value = cols[2].text.strip() |
|
data_kendaraan[key] = value |
|
except Exception as e: |
|
print("Gagal parsing tabel:", e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
total_tagihan = [] |
|
try: |
|
all_rows = driver.find_elements(By.CSS_SELECTOR, "div.row") |
|
for row in all_rows: |
|
print("[ROW TOTAL]", row.text) |
|
if not ("Pokok" in row.text or "Denda" in row.text or "Total" in row.text): |
|
cols = row.find_elements(By.TAG_NAME, "p") |
|
print("[COLS TOTAL]", [x.text for x in cols]) |
|
if len(cols) >= 4: |
|
total_tagihan.append({ |
|
"pokok": cols[0].text.strip(), |
|
"denda": cols[1].text.strip(), |
|
"total": cols[2].text.strip(), |
|
"jenis": cols[3].text.strip() |
|
}) |
|
except Exception as e: |
|
print("Gagal parsing total tagihan:", e) |
|
|
|
|
|
rincians_pkb = [] |
|
try: |
|
pkb_rows = driver.find_elements(By.CSS_SELECTOR, "#det_pkb .row")[1:] |
|
for row in pkb_rows: |
|
print("[ROW PKB]", row.text) |
|
cols = row.find_elements(By.TAG_NAME, "p") |
|
print("[COLS PKB]", [x.text for x in cols]) |
|
if len(cols) >= 3: |
|
rincians_pkb.append({ |
|
"pokok": cols[0].text.strip(), |
|
"denda": cols[1].text.strip(), |
|
"total": cols[2].text.strip() |
|
}) |
|
except Exception as e: |
|
print("Gagal parsing det_pkb:", e) |
|
|
|
|
|
rincians_swd = [] |
|
try: |
|
swd_rows = driver.find_elements(By.CSS_SELECTOR, "#det_swd .row")[1:] |
|
for row in swd_rows: |
|
print("[ROW SWD]", row.text) |
|
cols = row.find_elements(By.TAG_NAME, "p") |
|
print("[COLS SWD]", [x.text for x in cols]) |
|
if len(cols) >= 3: |
|
rincians_swd.append({ |
|
"pokok": cols[0].text.strip(), |
|
"denda": cols[1].text.strip(), |
|
"total": cols[2].text.strip() |
|
}) |
|
except Exception as e: |
|
print("Gagal parsing det_swd:", e) |
|
|
|
|
|
|
|
return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd |
|
|
|
|
|
def get_vehicle_info(driver, plate_number: str): |
|
|
|
options = webdriver.ChromeOptions() |
|
options.add_argument("--headless") |
|
options.add_argument("--disable-gpu") |
|
options.add_argument("--no-sandbox") |
|
|
|
|
|
driver = webdriver.Chrome(options=options) |
|
|
|
try: |
|
driver.get("https://www.jambisamsat.net/infopkb.html") |
|
time.sleep(1) |
|
|
|
WebDriverWait(driver, 10).until( |
|
EC.presence_of_element_located((By.ID, "no_polisi")) |
|
) |
|
|
|
input_field = driver.find_element(By.ID, "no_polisi") |
|
input_field.clear() |
|
input_field.send_keys(plate_number) |
|
|
|
submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') |
|
submit_button.click() |
|
|
|
|
|
WebDriverWait(driver, 10).until( |
|
EC.url_contains("infopkb.php") |
|
) |
|
|
|
driver.implicitly_wait(3) |
|
|
|
scroll_height = driver.execute_script("return document.body.scrollHeight") |
|
driver.set_window_size(1920, scroll_height + 200) |
|
|
|
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_pkb"))) |
|
button.click() |
|
|
|
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_swd"))) |
|
button.click() |
|
|
|
time.sleep(0.6) |
|
|
|
return scrape_vehicle(driver) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except WebDriverException as e: |
|
return Image.new('RGB', (1, 1)), page_title |
|
|
|
finally: |
|
driver.quit() |