ezequiellopez commited on
Commit
8ebe686
·
1 Parent(s): b21b232

refinements and multiple platform

Browse files
Dockerfile CHANGED
@@ -15,6 +15,7 @@ RUN useradd -m -u 1000 user
15
  # Switch to the "user" user
16
  USER user
17
 
 
18
  # Set the working directory in the container
19
  WORKDIR /app
20
 
 
15
  # Switch to the "user" user
16
  USER user
17
 
18
+
19
  # Set the working directory in the container
20
  WORKDIR /app
21
 
app/app.py CHANGED
@@ -36,7 +36,7 @@ async def rerank_items(input_data: Input) -> Output:
36
  # who is the user?
37
  user = input_data.session.user_id
38
  date = input_data.session.current_time
39
- platform = input_data.session.platform
40
  items = input_data.items
41
  # TODO consider sampling them?
42
 
@@ -51,6 +51,7 @@ async def rerank_items(input_data: Input) -> Output:
51
  # if user already exists -> has boosting records
52
  if user_in_db:
53
  # has been boosted today?
 
54
  if user_in_db.is_boosted_today():
55
  # return only reranked items, no insertion
56
  return Output(reranked_ids=reranked_ids, new_items=[])
@@ -79,14 +80,11 @@ async def rerank_items(input_data: Input) -> Output:
79
 
80
  # user doesn't exist
81
  else:
82
- print(first_topic)
83
- print(platform)
84
  if first_topic != "non-civic":
85
  fetched_boost = boost_db.get_random_boost(topic=first_topic,
86
  platform=platform,
87
  blacklist_ids=[])
88
- print(fetched_boost)
89
- print(type(fetched_boost))
90
  user_db.add_user(user_id=user,
91
  user=User(user_id=user, last_boost=date, boosts=[fetched_boost]))
92
 
 
36
  # who is the user?
37
  user = input_data.session.user_id
38
  date = input_data.session.current_time
39
+ platform = input_data.session.platform.value
40
  items = input_data.items
41
  # TODO consider sampling them?
42
 
 
51
  # if user already exists -> has boosting records
52
  if user_in_db:
53
  # has been boosted today?
54
+ print(user_in_db)
55
  if user_in_db.is_boosted_today():
56
  # return only reranked items, no insertion
57
  return Output(reranked_ids=reranked_ids, new_items=[])
 
80
 
81
  # user doesn't exist
82
  else:
 
 
83
  if first_topic != "non-civic":
84
  fetched_boost = boost_db.get_random_boost(topic=first_topic,
85
  platform=platform,
86
  blacklist_ids=[])
87
+
 
88
  user_db.add_user(user_id=user,
89
  user=User(user_id=user, last_boost=date, boosts=[fetched_boost]))
90
 
app/modules/database.py CHANGED
@@ -60,14 +60,14 @@ class User:
60
  self.boosts.append(boost)
61
 
62
  def is_boosted_today(self):
63
- self.is_boosted_today = compare_date_with_today(self.last_boost)
64
 
65
  def update_boosted_today(self, status):
66
  """Update the boosted_today status."""
67
  self.boosted_today = status
68
 
69
- def __repr__(self):
70
- return f"User(boosts={self.boosts}, boosted_today={self.boosted_today})"
71
 
72
  class UserDatabase:
73
  def __init__(self):
 
60
  self.boosts.append(boost)
61
 
62
  def is_boosted_today(self):
63
+ return compare_date_with_today(self.last_boost)
64
 
65
  def update_boosted_today(self, status):
66
  """Update the boosted_today status."""
67
  self.boosted_today = status
68
 
69
+ #def __repr__(self):
70
+ # return f"User(boosts={self.boosts}, boosted_today={self.boosted_today})"
71
 
72
  class UserDatabase:
73
  def __init__(self):
app/modules/models/api.py CHANGED
@@ -2,6 +2,13 @@
2
  from pydantic import BaseModel, Field
3
  from typing import List, Optional
4
  from uuid import UUID
 
 
 
 
 
 
 
5
 
6
 
7
  class Engagement(BaseModel):
@@ -11,21 +18,27 @@ class Engagement(BaseModel):
11
  award: int
12
 
13
  class Item(BaseModel):
14
- id: UUID
15
- post_id: Optional[UUID] = None
16
- parent_id: Optional[UUID] = None
17
- title: Optional[str] = None
18
- text: Optional[str] = None
19
- author_name_hash: str
20
- type: str
21
- created_at: str
22
- engagements: Engagement
 
 
 
 
 
23
 
24
  class Session(BaseModel):
25
- user_id: UUID
26
  user_name_hash: str
27
- platform: str
28
- current_time: str
 
29
 
30
  class Input(BaseModel):
31
  session: Session
@@ -38,3 +51,114 @@ class NewItem(BaseModel):
38
  class Output(BaseModel):
39
  ranked_ids: List[UUID]
40
  new_items: List[Optional[NewItem]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from pydantic import BaseModel, Field
3
  from typing import List, Optional
4
  from uuid import UUID
5
+ from datetime import datetime
6
+ from typing import Literal, Optional, Union
7
+
8
+ from pydantic import BaseModel, Field, HttpUrl
9
+ from pydantic.types import NonNegativeInt
10
+ from enum import Enum
11
+
12
 
13
 
14
  class Engagement(BaseModel):
 
18
  award: int
19
 
20
  class Item(BaseModel):
21
+ id: UUID = Field(..., example=UUID("3fa85f64-5717-4562-b3fc-2c963f66afa6"))
22
+ #post_id: Optional[UUID] = None
23
+ #parent_id: Optional[UUID] = None
24
+ #title: Optional[str] = None
25
+ text: str = Field(..., example="I never liked politicians anyway, they don't care much about citizens")
26
+ #author_name_hash: str
27
+ #type: str
28
+ #created_at: str
29
+ #engagements: Engagement
30
+
31
+ class Platform(Enum):
32
+ TWITTER = "twitter"
33
+ FACEBOOK = "facebook"
34
+ REDDIT = "reddit"
35
 
36
  class Session(BaseModel):
37
+ user_id: UUID = Field(..., example=UUID("4fa85f64-5717-4562-b3fc-2c963f66afd6"))
38
  user_name_hash: str
39
+ platform: Platform = Field(..., example=Platform.TWITTER)
40
+ current_time: datetime = Field(..., example="2024-04-09T19:29:38.072017Z")
41
+
42
 
43
  class Input(BaseModel):
44
  session: Session
 
51
  class Output(BaseModel):
52
  ranked_ids: List[UUID]
53
  new_items: List[Optional[NewItem]]
54
+
55
+
56
+ class TwitterEngagements(BaseModel):
57
+ """Engagement counts from Twitter"""
58
+
59
+ retweet: NonNegativeInt
60
+ like: NonNegativeInt
61
+ comment: NonNegativeInt
62
+ share: NonNegativeInt
63
+
64
+
65
+ class RedditEngagements(BaseModel):
66
+ """Engagement counts from Reddit"""
67
+
68
+ upvote: NonNegativeInt
69
+ downvote: NonNegativeInt
70
+ comment: NonNegativeInt
71
+ award: NonNegativeInt
72
+
73
+
74
+ class FacebookEngagements(BaseModel):
75
+ """Engagement counts from Facebook"""
76
+
77
+ like: NonNegativeInt
78
+ love: NonNegativeInt
79
+ care: NonNegativeInt
80
+ haha: NonNegativeInt
81
+ wow: NonNegativeInt
82
+ sad: NonNegativeInt
83
+ angry: NonNegativeInt
84
+ comment: NonNegativeInt
85
+ share: NonNegativeInt
86
+
87
+
88
+ class ContentItem(BaseModel):
89
+ """A content item to be ranked"""
90
+
91
+ id: str = Field(
92
+ description="A unique ID describing a specific piece of content. We will do our best to make an ID for a given item persist between requests, but that property is not guaranteed."
93
+ )
94
+
95
+ post_id: Optional[str] = Field(
96
+ description="The ID of the post to which this comment belongs. Useful for linking comments to their post when comments are shown in a feed. Currently this UX only exists on Facebook.",
97
+ default=None,
98
+ )
99
+
100
+ parent_id: Optional[str] = Field(
101
+ description="For threaded comments, this identifies the comment to which this one is a reply. Blank for top-level comments.",
102
+ default=None,
103
+ )
104
+
105
+ title: Optional[str] = Field(
106
+ description="The post title, only available on reddit posts.", default=None
107
+ )
108
+
109
+ text: str = Field(
110
+ description="The text of the content item. Assume UTF-8, and that leading and trailing whitespace have been trimmed."
111
+ )
112
+
113
+ author_name_hash: str = Field(
114
+ description="A hash of the author's name (salted). Use this to determine which posts are by the same author. When the post is by the current user, this should match `session.user_name_hash`."
115
+ )
116
+
117
+ type: Literal["post", "comment"] = Field(
118
+ description="Whether the content item is a `post` or `comment`. On Twitter, tweets will be identified as `comment` when they are replies displayed on the page for a single tweet."
119
+ )
120
+
121
+ embedded_urls: Optional[list[HttpUrl]] = Field(
122
+ description="A list of URLs that are embedded in the content item. This could be links to images, videos, or other content. They may or may not also appear in the text of the item."
123
+ )
124
+
125
+ created_at: datetime = Field(
126
+ description="The time that the item was created in UTC, in `YYYY-MM-DD hh:mm:ss` format, at the highest resolution available (which may be as low as the hour)."
127
+ )
128
+
129
+ engagements: Union[TwitterEngagements, RedditEngagements, FacebookEngagements] = (
130
+ Field(description="Engagement counts for the content item.")
131
+ )
132
+
133
+
134
+ class Session(BaseModel):
135
+ """Data that is scoped to the user's browsing session (generally a single page view)"""
136
+
137
+ user_id: str = Field(
138
+ description="A unique id for this study participant. Will remain fixed for the duration of the experiment."
139
+ )
140
+ user_name_hash: str = Field(
141
+ description="A (salted) hash of the user's username. We'll do our best to make it match the `item.author_name_hash` on posts authored by the current user."
142
+ )
143
+ cohort: str = Field(
144
+ description="The cohort to which the user has been assigned. You can safely ignore this. It is used by the PRC request router."
145
+ )
146
+ platform: Literal["twitter", "reddit", "facebook"] = Field(
147
+ description="The platform on which the user is viewing content."
148
+ )
149
+ current_time: datetime = Field(
150
+ description="The current time according to the user's browser, in UTC, in `YYYY-MM-DD hh:mm:ss` format."
151
+ )
152
+
153
+
154
+ class RankingRequest(BaseModel):
155
+ """A complete ranking request"""
156
+
157
+ session: Session = Field(
158
+ description="Data that is scoped to the user's browsing session"
159
+ )
160
+ #survey: Optional[SurveyResponse] = Field(
161
+ # description="Responses to PRC survey. Added by the request router.",
162
+ # default=None,
163
+ #)
164
+ items: list[ContentItem] = Field(description="The content items to be ranked.")
app/modules/utils.py CHANGED
@@ -10,15 +10,16 @@ def load_csv_to_dict(filename):
10
  data_list = list(dict_reader)
11
  return data_list
12
 
13
- def get_stringified_date(date):
14
- """
15
- Gets a date like 2024-04-09T19:29:38.072017Z and returns the day
16
- """
17
- return str(datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").date())
18
 
19
- def compare_date_with_today(date_str):
20
  # Parse the input date string
21
- input_date = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
 
22
 
23
  # Get today's date (without time component)
24
  today_date = datetime.now().date()
 
10
  data_list = list(dict_reader)
11
  return data_list
12
 
13
+ #def get_stringified_date(date):
14
+ # """
15
+ # Gets a date like 2024-04-09T19:29:38.072017Z and returns the day
16
+ # """
17
+ # return str(datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").date())
18
 
19
+ def compare_date_with_today(input_date:datetime):
20
  # Parse the input date string
21
+
22
+ #input_date = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
23
 
24
  # Get today's date (without time component)
25
  today_date = datetime.now().date()