File size: 1,704 Bytes
34d098b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import ast
import pandas as pd

import datetime

today_format = datetime.datetime.today().strftime("%Y%m%d")

# Load the CSV into a DataFrame
df = pd.read_csv("all_data.csv")
df_name = pd.read_csv("tempo_list.csv")

# Split the 'ratings' column into separate columns
ratings_df = df["ratings"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else {})
ratings_split = pd.json_normalize(ratings_df)

# Merge the new ratings columns with the original DataFrame
df = df.drop("ratings", axis=1).join(ratings_split)
df = df.dropna(subset=["name"])

# Extract tempo_code from the source_url
df["tempo_code"] = df["source_url"].apply(lambda x: x.split("/")[4] if isinstance(x, str) else "")
df["tempo_code"] = df["tempo_code"].apply(lambda x: x[3:])

# Merge with df_name based on tempo_code
df = pd.merge(df, df_name, left_on="tempo_code", right_on="サロンID", how="left")
df["review_date"] = df["review_date"].str.replace("[投稿日] ", "")
df["review_date"] = pd.to_datetime(df["review_date"])

# If you need the date and time separately:
df["date"] = df["review_date"].dt.date
df["time"] = df["review_date"].dt.time

df = df.dropna(subset=["review_date"])

df = df[["サロンID", "サロン名", "name", "details", "date", "time", "total_rating", "雰囲気", "接客サービス", "技術・仕上がり", "メニュー・料金", "review"]]
df = df.rename(
    columns={
        "name": "客様",
        "details": "詳細情報",
        "date": "日程",
        "time": "時刻",
        "total_rating": "総合評価",
        "review": "コメント",
    }
)


df = df.drop_duplicates()
# Save the new DataFrame to a new CSV file
df.to_csv(f"{today_format}.csv", index=False)