from selenium import webdriver from selenium.common.exceptions import WebDriverException from PIL import Image from io import BytesIO import time from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC def take_webdata(url): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') try: wd = webdriver.Chrome(options=options) wd.set_window_size(1080, 720) # Adjust the window size here wd.get(url) wd.implicitly_wait(5) # Get the page title page_title = wd.title screenshot = wd.get_screenshot_as_png() except WebDriverException as e: return Image.new('RGB', (1, 1)), page_title finally: if wd: wd.quit() return Image.open(BytesIO(screenshot)) , page_title def scrape_vehicle(driver): data_kendaraan = {} try: rows = driver.find_elements(By.CSS_SELECTOR, "table tr") for row in rows: cols = row.find_elements(By.TAG_NAME, "td") if len(cols) >= 3: key = cols[0].text.strip().lower().replace(".", "").replace(" ", "_") value = cols[2].text.strip() data_kendaraan[key] = value except Exception as e: print("Gagal parsing tabel:", e) # rincians = [] # try: # container = driver.find_element(By.ID, "det_pkb") # rows = container.find_elements(By.CLASS_NAME, "row") # for row in rows[1:]: # skip header # cols = row.find_elements(By.TAG_NAME, "p") # if len(cols) >= 3: # rincian = { # "pokok": cols[0].text.strip(), # "denda": cols[1].text.strip(), # "total": cols[2].text.strip(), # } # if len(cols) > 3: # rincian["jenis"] = cols[3].text.strip().upper() # rincians.append(rincian) # except Exception as e: # print("Gagal parsing det_pkb:", e) total_tagihan = [] try: all_rows = driver.find_elements(By.CSS_SELECTOR, "div.row") for row in all_rows: print("[ROW TOTAL]", row.text) if not ("Pokok" in row.text or "Denda" in row.text or "Total" in row.text): cols = row.find_elements(By.TAG_NAME, "p") print("[COLS TOTAL]", [x.text for x in cols]) if len(cols) >= 4: total_tagihan.append({ "pokok": cols[0].text.strip(), "denda": cols[1].text.strip(), "total": cols[2].text.strip(), "jenis": cols[3].text.strip() }) except Exception as e: print("Gagal parsing total tagihan:", e) rincians_pkb = [] try: pkb_rows = driver.find_elements(By.CSS_SELECTOR, "#det_pkb .row")[1:] # skip header for row in pkb_rows: print("[ROW PKB]", row.text) cols = row.find_elements(By.TAG_NAME, "p") print("[COLS PKB]", [x.text for x in cols]) if len(cols) >= 3: rincians_pkb.append({ "pokok": cols[0].text.strip(), "denda": cols[1].text.strip(), "total": cols[2].text.strip() }) except Exception as e: print("Gagal parsing det_pkb:", e) rincians_swd = [] try: swd_rows = driver.find_elements(By.CSS_SELECTOR, "#det_swd .row")[1:] # skip header for row in swd_rows: print("[ROW SWD]", row.text) cols = row.find_elements(By.TAG_NAME, "p") print("[COLS SWD]", [x.text for x in cols]) if len(cols) >= 3: rincians_swd.append({ "pokok": cols[0].text.strip(), "denda": cols[1].text.strip(), "total": cols[2].text.strip() }) except Exception as e: print("Gagal parsing det_swd:", e) # rincians = [total_tagihan, rincians_pkb, rincians_swd] # return data_kendaraan, rincians return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd def get_vehicle_info(driver, plate_number: str): # Configure headless Chrome options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") # Path to chromedriver (adjust if needed) driver = webdriver.Chrome(options=options) try: driver.get("https://www.jambisamsat.net/infopkb.html") time.sleep(1) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "no_polisi")) ) input_field = driver.find_element(By.ID, "no_polisi") input_field.clear() input_field.send_keys(plate_number) submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') submit_button.click() # Wait for the new page to load WebDriverWait(driver, 10).until( EC.url_contains("infopkb.php") ) driver.implicitly_wait(3) scroll_height = driver.execute_script("return document.body.scrollHeight") driver.set_window_size(1920, scroll_height + 200) # force full-page height button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_pkb"))) button.click() button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_swd"))) button.click() time.sleep(0.6) return scrape_vehicle(driver) # print(data_kendaraan, rincian) # page_title = driver.title # screenshot = driver.get_screenshot_as_png() # return Image.open(BytesIO(screenshot)) , page_title except WebDriverException as e: return Image.new('RGB', (1, 1)), page_title finally: driver.quit()