Spaces:
Sleeping
Sleeping
import ast | |
import pandas as pd | |
import datetime | |
today_format = datetime.datetime.today().strftime("%Y%m%d") | |
# Load the CSV into a DataFrame | |
df = pd.read_csv("all_data.csv") | |
df_name = pd.read_csv("tempo_list.csv") | |
# Split the 'ratings' column into separate columns | |
ratings_df = df["ratings"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else {}) | |
ratings_split = pd.json_normalize(ratings_df) | |
# Merge the new ratings columns with the original DataFrame | |
df = df.drop("ratings", axis=1).join(ratings_split) | |
df = df.dropna(subset=["name"]) | |
# Extract tempo_code from the source_url | |
df["tempo_code"] = df["source_url"].apply(lambda x: x.split("/")[4] if isinstance(x, str) else "") | |
df["tempo_code"] = df["tempo_code"].apply(lambda x: x[3:]) | |
# Merge with df_name based on tempo_code | |
df = pd.merge(df, df_name, left_on="tempo_code", right_on="サロンID", how="left") | |
df["review_date"] = df["review_date"].str.replace("[投稿日] ", "") | |
df["review_date"] = pd.to_datetime(df["review_date"]) | |
# If you need the date and time separately: | |
df["date"] = df["review_date"].dt.date | |
df["time"] = df["review_date"].dt.time | |
df = df.dropna(subset=["review_date"]) | |
df = df[["サロンID", "サロン名", "name", "details", "date", "time", "total_rating", "雰囲気", "接客サービス", "技術・仕上がり", "メニュー・料金", "review"]] | |
df = df.rename( | |
columns={ | |
"name": "客様", | |
"details": "詳細情報", | |
"date": "日程", | |
"time": "時刻", | |
"total_rating": "総合評価", | |
"review": "コメント", | |
} | |
) | |
df = df.drop_duplicates() | |
# Save the new DataFrame to a new CSV file | |
df.to_csv(f"{today_format}.csv", index=False) | |