jonathanjordan21 commited on
Commit
4843d7c
·
verified ·
1 Parent(s): 8ad7319

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +63 -1
extract.py CHANGED
@@ -3,7 +3,9 @@ from selenium.common.exceptions import WebDriverException
3
  from PIL import Image
4
  from io import BytesIO
5
 
6
- import time
 
 
7
 
8
  from selenium.webdriver.common.by import By
9
  from selenium.webdriver.support.ui import WebDriverWait
@@ -121,6 +123,66 @@ def scrape_vehicle(driver):
121
  return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd
122
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  def get_vehicle_info(driver, plate_number: str):
125
  # options = webdriver.ChromeOptions()
126
  # options.add_argument("--headless")
 
3
  from PIL import Image
4
  from io import BytesIO
5
 
6
+ import time, requests
7
+
8
+ from bs4 import BeautifulSoup
9
 
10
  from selenium.webdriver.common.by import By
11
  from selenium.webdriver.support.ui import WebDriverWait
 
123
  return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd
124
 
125
 
126
+ def get_vehicle_info_bs4(nopol: str):
127
+ content = requests.get("https://www.jambisamsat.net/infopkb.php", params={"no_polisi":"BH 1337 NN"}).content
128
+ soup = BeautifulSoup(content, "html.parser")
129
+
130
+ # === 1. Data Kendaraan ===
131
+ data_kendaraan = {}
132
+ table = soup.find("table")
133
+ if table:
134
+ for row in table.find_all("tr"):
135
+ cols = row.find_all("td")
136
+ if len(cols) >= 3:
137
+ key = cols[0].get_text(strip=True).lower().replace(".", "").replace(" ", "_")
138
+ val = cols[2].get_text(strip=True)
139
+ data_kendaraan[key] = val
140
+
141
+ # === 2. Total Tagihan (div.row outside det_pkb and det_swd) ===
142
+ all_rows = soup.find_all("div", class_="row")
143
+ total_tagihan = []
144
+ for row in all_rows:
145
+ if row.find_parent(id="det_pkb") or row.find_parent(id="det_swd"):
146
+ continue # skip rows inside det_pkb or det_swd
147
+ ps = row.find_all("p")
148
+ if len(ps) >= 3 and all(keyword in ps[i].text for i, keyword in enumerate(["Pokok", "Denda", "Total"])):
149
+ total_tagihan.append({
150
+ "pokok": ps[0].get_text(strip=True),
151
+ "denda": ps[1].get_text(strip=True),
152
+ "total": ps[2].get_text(strip=True)
153
+ })
154
+
155
+ # === 3. Rincian PKB ===
156
+ rincians_pkb = []
157
+ pkb_div = soup.find("div", id="det_pkb")
158
+ if pkb_div:
159
+ rows = pkb_div.find_all("div", class_="row")[1:] # skip header
160
+ for row in rows:
161
+ cols = row.find_all("p")
162
+ if len(cols) >= 3:
163
+ rincians_pkb.append({
164
+ "pokok": cols[0].get_text(strip=True),
165
+ "denda": cols[1].get_text(strip=True),
166
+ "total": cols[2].get_text(strip=True)
167
+ })
168
+
169
+ # === 4. Rincian SWDKLLJ ===
170
+ rincians_swd = []
171
+ swd_div = soup.find("div", id="det_swd")
172
+ if swd_div:
173
+ rows = swd_div.find_all("div", class_="row")[1:] # skip header
174
+ for row in rows:
175
+ cols = row.find_all("p")
176
+ if len(cols) >= 3:
177
+ rincians_swd.append({
178
+ "pokok": cols[0].get_text(strip=True),
179
+ "denda": cols[1].get_text(strip=True),
180
+ "total": cols[2].get_text(strip=True)
181
+ })
182
+ return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd
183
+
184
+
185
+
186
  def get_vehicle_info(driver, plate_number: str):
187
  # options = webdriver.ChromeOptions()
188
  # options.add_argument("--headless")