Spaces:
Sleeping
Sleeping
File size: 1,900 Bytes
947c08e cdc95a6 947c08e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from pprint import pprint
from ..utils import SeleniumScraper
from bs4 import BeautifulSoup
import json, uuid, threading, time, sqlite3, os
from core.settings import BASE_DIR
from backend.module.utils import date_utils
scraper = None
def scrap(search:str="",page:int=1):
if not search: raise ValueError("The 'search' parameter is required.")
global scraper
try:
url = f"https://www.colamanga.com/search?type={search.get("type")}&page={page}&searchString={search.get("text").replace(" ", "%20")}"
print(url)
if not scraper: scraper = SeleniumScraper()
driver = scraper.driver()
driver.get(url)
source = BeautifulSoup(driver.page_source, 'html.parser')
div = source.select("div.fed-part-layout")[0]
dl_list = div.find_all("dl", {"class": "fed-deta-info"})
DATA = []
for dl in dl_list:
object = {}
dt = dl.find("dt",{"class": "fed-deta-images"})
a = dt.find("a",{"class": "fed-list-pics"})
id = a.get("href").strip("/")
object["id"] = id
cover_link_split = a.get("data-original").split("/")
cover_id = cover_link_split[len(cover_link_split)-2]
object["cover"] = f"/api/web_scrap/get_cover/colamanga/{id}/{cover_id}/"
dd = dl.find("dd",{"class": "fed-deta-content"})
h1 = dd.find("h1",{"class": "fed-part-eone"})
object["title"] = h1.find("a").text
DATA.append(object)
return DATA
except Exception as e:
raise Exception(e)
except: pass
if __name__ == "__main__":
DATA = scrap(page=1,search="妖")
# with open("./temp.html","w", encoding='utf-8') as f:
# f.write(ul.prettify()) # Write each element prettified
# pprint(DATA) |