Kaung Myat Htet commited on
Commit
34e1933
·
1 Parent(s): 6cab7bb

initialize project

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. app.py +123 -0
  3. data/sample_gpg_data.jsonl +0 -0
  4. requirements.txt +2 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+
4
+ import pandas as pd
5
+ from openai import OpenAI
6
+ import gradio as gr
7
+
8
+ input_file = "profile-generation/data/sample_gpg_data.jsonl"
9
+ user_df = pd.read_json(input_file, lines=True)
10
+
11
+ user_ids = user_df["user_id"].unique().tolist()
12
+ client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
13
+
14
+
15
+ # Simple in-memory cache
16
+ guidance_cache = {}
17
+ profile_cache = {}
18
+
19
+ def hash_titles(titles):
20
+ joined = "\n".join(sorted(titles))
21
+ return hashlib.md5(joined.encode("utf-8")).hexdigest()
22
+
23
+ def get_books(user_id):
24
+ if user_id is None:
25
+ return "Please select a user.", pd.DataFrame(), ""
26
+
27
+ user_info = user_df.loc[user_df["user_id"] == user_id]
28
+ print(user_info)
29
+ books_list = user_df.loc[user_df["user_id"] == user_id, "purchased_books"].values
30
+ if len(books_list) == 0:
31
+ return f"No books found for {user_id}.", pd.DataFrame(), ""
32
+
33
+ books = books_list[0]
34
+ df = pd.DataFrame(books)
35
+ df = df[['title', 'author', 'categories']].rename(columns={'title': 'Title', 'author': 'Author', 'categories': 'Category'})
36
+ books_info = generate_books(books_list)
37
+ titles = [book["title"] for book in books if "title" in book]
38
+
39
+ cache_key = hash_titles(titles)
40
+
41
+ if cache_key in guidance_cache:
42
+ guidance_response = guidance_cache[cache_key]
43
+ profile_response = profile_cache[cache_key]
44
+ print("✅ Using cached response")
45
+ else:
46
+ print("🧠 Calling OpenAI API")
47
+ guidance_prompt_str = guidance_prompt(books_info)
48
+ guidance_response = client.chat.completions.create(
49
+ model="gpt-3.5-turbo",
50
+ messages=[{"role": "user", "content": guidance_prompt_str}],
51
+ temperature=0.3,
52
+ max_tokens=150
53
+ ).choices[0].message.content.strip()
54
+ guidance_cache[cache_key] = guidance_response
55
+ profile_response = client.chat.completions.create(
56
+ model="gpt-3.5-turbo",
57
+ messages=[
58
+ {"role": "user", "content": profile_prompt(books_info, guidance_response)}
59
+ ],
60
+ temperature=0.3,
61
+ max_tokens=150
62
+ ).choices[0].message.content.strip()
63
+ profile_cache[cache_key] = profile_response
64
+ candidates_options = user_info.get("candidate_options", [])
65
+ rec_prompt = build_recommendation_prompt(profile_response, candidates_options)
66
+ choice = extract_choice(rec_prompt)
67
+ predicted_book = candidates_options.values[choice-1] if choice and 1 <= choice <= len(candidates_options) else None
68
+ target_book = user_info.get("target_asin", '')
69
+ print("target_book:", target_book)
70
+ return f"{user_id}", df, guidance_response, profile_response, rec_prompt, pd.DataFrame(candidates_options.values[0]), target_book.values, predicted_book[0]['asin']
71
+
72
+ def extract_choice(response_text):
73
+ for token in response_text.split():
74
+ if token.strip("[]").isdigit():
75
+ return int(token.strip("[]"))
76
+ return None
77
+
78
+ def generate_books(books):
79
+ book_combos = []
80
+ for book in books:
81
+ categories = ', '.join(book[0]['categories'])
82
+ book_combos.append(f"Title of the book is {book[0]['title']} and the category of the book is {categories}. Description of the book is {book[0]['description']}")
83
+ return book_combos
84
+
85
+ def guidance_prompt(titles):
86
+ return f"""Here is a list of books a person has read:\n{chr(10).join("- " + t for t in titles)}\n\nWhat genres or themes do you notice across these books? Please list them concisely."""
87
+
88
+ def profile_prompt(titles, guidance):
89
+ return f"""Here is a list of books a person has read:\n{chr(10).join("- " + t for t in titles)}\n\nBased on the following genres/themes: {guidance}\n\nSummarize this person's book preferences in one paragraph."""
90
+
91
+ def build_recommendation_prompt(profile, candidates):
92
+ prompt = f"""A user has the following reading preference:\n"{profile}"\n\nHere are some books they might consider next:\n"""
93
+ for i, book in enumerate(candidates, start=1):
94
+ prompt += f"[{i}] {book[0].get('title', 'Unknown Title')}\n"
95
+ prompt += "\nWhich of these books best matches the user's preference? Respond ONLY with the number [1-4]."
96
+ return prompt
97
+
98
+
99
+ def get_books_theme(books):
100
+ return
101
+
102
+ with gr.Blocks() as demo:
103
+ gr.Markdown("## Select User")
104
+ user_dropdown = gr.Dropdown(choices=user_df["user_id"].tolist(), value=None, label="User ID")
105
+
106
+ gr.Markdown("## Selected User")
107
+ output_text = gr.Textbox(show_label=False)
108
+ gr.Markdown("## Books read")
109
+ output_table = gr.Dataframe(label="Books Read", interactive=False, show_label=False)
110
+ gr.Markdown("## User Books Theme")
111
+ output_theme = gr.Textbox(label="User Books Theme", lines=8, show_label=False)
112
+ gr.Markdown("## User Profile")
113
+ output_profile = gr.Textbox(label="User Profile", show_label=False, lines=6)
114
+ output_rec_prompt = gr.Textbox(label="Recommendation Prompt")
115
+ output_candidate_options = gr.DataFrame(label="Candidate Books")
116
+ output_target_id = gr.Textbox(label="Target Book")
117
+ output_predicted_book = gr.Textbox(label="Predicted Book")
118
+
119
+ user_dropdown.change(fn=get_books, inputs=user_dropdown, outputs=[output_text, output_table, output_theme, output_profile, output_rec_prompt, output_candidate_options, output_target_id, output_predicted_book])
120
+
121
+
122
+ if __name__ == "__main__":
123
+ demo.launch()
data/sample_gpg_data.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pandas
2
+ openai