FN_scrapping / make_csv.py
BarngSJ
ew
34d098b
import ast
import pandas as pd
import datetime
today_format = datetime.datetime.today().strftime("%Y%m%d")
# Load the CSV into a DataFrame
df = pd.read_csv("all_data.csv")
df_name = pd.read_csv("tempo_list.csv")
# Split the 'ratings' column into separate columns
ratings_df = df["ratings"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else {})
ratings_split = pd.json_normalize(ratings_df)
# Merge the new ratings columns with the original DataFrame
df = df.drop("ratings", axis=1).join(ratings_split)
df = df.dropna(subset=["name"])
# Extract tempo_code from the source_url
df["tempo_code"] = df["source_url"].apply(lambda x: x.split("/")[4] if isinstance(x, str) else "")
df["tempo_code"] = df["tempo_code"].apply(lambda x: x[3:])
# Merge with df_name based on tempo_code
df = pd.merge(df, df_name, left_on="tempo_code", right_on="サロンID", how="left")
df["review_date"] = df["review_date"].str.replace("[投稿日] ", "")
df["review_date"] = pd.to_datetime(df["review_date"])
# If you need the date and time separately:
df["date"] = df["review_date"].dt.date
df["time"] = df["review_date"].dt.time
df = df.dropna(subset=["review_date"])
df = df[["サロンID", "サロン名", "name", "details", "date", "time", "total_rating", "雰囲気", "接客サービス", "技術・仕上がり", "メニュー・料金", "review"]]
df = df.rename(
columns={
"name": "客様",
"details": "詳細情報",
"date": "日程",
"time": "時刻",
"total_rating": "総合評価",
"review": "コメント",
}
)
df = df.drop_duplicates()
# Save the new DataFrame to a new CSV file
df.to_csv(f"{today_format}.csv", index=False)