Spaces:
Sleeping
Sleeping
ezequiellopez
commited on
Commit
·
8ebe686
1
Parent(s):
b21b232
refinements and multiple platform
Browse files- Dockerfile +1 -0
- app/app.py +3 -5
- app/modules/database.py +3 -3
- app/modules/models/api.py +136 -12
- app/modules/utils.py +8 -7
Dockerfile
CHANGED
@@ -15,6 +15,7 @@ RUN useradd -m -u 1000 user
|
|
15 |
# Switch to the "user" user
|
16 |
USER user
|
17 |
|
|
|
18 |
# Set the working directory in the container
|
19 |
WORKDIR /app
|
20 |
|
|
|
15 |
# Switch to the "user" user
|
16 |
USER user
|
17 |
|
18 |
+
|
19 |
# Set the working directory in the container
|
20 |
WORKDIR /app
|
21 |
|
app/app.py
CHANGED
@@ -36,7 +36,7 @@ async def rerank_items(input_data: Input) -> Output:
|
|
36 |
# who is the user?
|
37 |
user = input_data.session.user_id
|
38 |
date = input_data.session.current_time
|
39 |
-
platform = input_data.session.platform
|
40 |
items = input_data.items
|
41 |
# TODO consider sampling them?
|
42 |
|
@@ -51,6 +51,7 @@ async def rerank_items(input_data: Input) -> Output:
|
|
51 |
# if user already exists -> has boosting records
|
52 |
if user_in_db:
|
53 |
# has been boosted today?
|
|
|
54 |
if user_in_db.is_boosted_today():
|
55 |
# return only reranked items, no insertion
|
56 |
return Output(reranked_ids=reranked_ids, new_items=[])
|
@@ -79,14 +80,11 @@ async def rerank_items(input_data: Input) -> Output:
|
|
79 |
|
80 |
# user doesn't exist
|
81 |
else:
|
82 |
-
print(first_topic)
|
83 |
-
print(platform)
|
84 |
if first_topic != "non-civic":
|
85 |
fetched_boost = boost_db.get_random_boost(topic=first_topic,
|
86 |
platform=platform,
|
87 |
blacklist_ids=[])
|
88 |
-
|
89 |
-
print(type(fetched_boost))
|
90 |
user_db.add_user(user_id=user,
|
91 |
user=User(user_id=user, last_boost=date, boosts=[fetched_boost]))
|
92 |
|
|
|
36 |
# who is the user?
|
37 |
user = input_data.session.user_id
|
38 |
date = input_data.session.current_time
|
39 |
+
platform = input_data.session.platform.value
|
40 |
items = input_data.items
|
41 |
# TODO consider sampling them?
|
42 |
|
|
|
51 |
# if user already exists -> has boosting records
|
52 |
if user_in_db:
|
53 |
# has been boosted today?
|
54 |
+
print(user_in_db)
|
55 |
if user_in_db.is_boosted_today():
|
56 |
# return only reranked items, no insertion
|
57 |
return Output(reranked_ids=reranked_ids, new_items=[])
|
|
|
80 |
|
81 |
# user doesn't exist
|
82 |
else:
|
|
|
|
|
83 |
if first_topic != "non-civic":
|
84 |
fetched_boost = boost_db.get_random_boost(topic=first_topic,
|
85 |
platform=platform,
|
86 |
blacklist_ids=[])
|
87 |
+
|
|
|
88 |
user_db.add_user(user_id=user,
|
89 |
user=User(user_id=user, last_boost=date, boosts=[fetched_boost]))
|
90 |
|
app/modules/database.py
CHANGED
@@ -60,14 +60,14 @@ class User:
|
|
60 |
self.boosts.append(boost)
|
61 |
|
62 |
def is_boosted_today(self):
|
63 |
-
|
64 |
|
65 |
def update_boosted_today(self, status):
|
66 |
"""Update the boosted_today status."""
|
67 |
self.boosted_today = status
|
68 |
|
69 |
-
def __repr__(self):
|
70 |
-
|
71 |
|
72 |
class UserDatabase:
|
73 |
def __init__(self):
|
|
|
60 |
self.boosts.append(boost)
|
61 |
|
62 |
def is_boosted_today(self):
|
63 |
+
return compare_date_with_today(self.last_boost)
|
64 |
|
65 |
def update_boosted_today(self, status):
|
66 |
"""Update the boosted_today status."""
|
67 |
self.boosted_today = status
|
68 |
|
69 |
+
#def __repr__(self):
|
70 |
+
# return f"User(boosts={self.boosts}, boosted_today={self.boosted_today})"
|
71 |
|
72 |
class UserDatabase:
|
73 |
def __init__(self):
|
app/modules/models/api.py
CHANGED
@@ -2,6 +2,13 @@
|
|
2 |
from pydantic import BaseModel, Field
|
3 |
from typing import List, Optional
|
4 |
from uuid import UUID
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
class Engagement(BaseModel):
|
@@ -11,21 +18,27 @@ class Engagement(BaseModel):
|
|
11 |
award: int
|
12 |
|
13 |
class Item(BaseModel):
|
14 |
-
id: UUID
|
15 |
-
post_id: Optional[UUID] = None
|
16 |
-
parent_id: Optional[UUID] = None
|
17 |
-
title: Optional[str] = None
|
18 |
-
text:
|
19 |
-
author_name_hash: str
|
20 |
-
type: str
|
21 |
-
created_at: str
|
22 |
-
engagements: Engagement
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
class Session(BaseModel):
|
25 |
-
user_id: UUID
|
26 |
user_name_hash: str
|
27 |
-
platform:
|
28 |
-
current_time:
|
|
|
29 |
|
30 |
class Input(BaseModel):
|
31 |
session: Session
|
@@ -38,3 +51,114 @@ class NewItem(BaseModel):
|
|
38 |
class Output(BaseModel):
|
39 |
ranked_ids: List[UUID]
|
40 |
new_items: List[Optional[NewItem]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from pydantic import BaseModel, Field
|
3 |
from typing import List, Optional
|
4 |
from uuid import UUID
|
5 |
+
from datetime import datetime
|
6 |
+
from typing import Literal, Optional, Union
|
7 |
+
|
8 |
+
from pydantic import BaseModel, Field, HttpUrl
|
9 |
+
from pydantic.types import NonNegativeInt
|
10 |
+
from enum import Enum
|
11 |
+
|
12 |
|
13 |
|
14 |
class Engagement(BaseModel):
|
|
|
18 |
award: int
|
19 |
|
20 |
class Item(BaseModel):
|
21 |
+
id: UUID = Field(..., example=UUID("3fa85f64-5717-4562-b3fc-2c963f66afa6"))
|
22 |
+
#post_id: Optional[UUID] = None
|
23 |
+
#parent_id: Optional[UUID] = None
|
24 |
+
#title: Optional[str] = None
|
25 |
+
text: str = Field(..., example="I never liked politicians anyway, they don't care much about citizens")
|
26 |
+
#author_name_hash: str
|
27 |
+
#type: str
|
28 |
+
#created_at: str
|
29 |
+
#engagements: Engagement
|
30 |
+
|
31 |
+
class Platform(Enum):
|
32 |
+
TWITTER = "twitter"
|
33 |
+
FACEBOOK = "facebook"
|
34 |
+
REDDIT = "reddit"
|
35 |
|
36 |
class Session(BaseModel):
|
37 |
+
user_id: UUID = Field(..., example=UUID("4fa85f64-5717-4562-b3fc-2c963f66afd6"))
|
38 |
user_name_hash: str
|
39 |
+
platform: Platform = Field(..., example=Platform.TWITTER)
|
40 |
+
current_time: datetime = Field(..., example="2024-04-09T19:29:38.072017Z")
|
41 |
+
|
42 |
|
43 |
class Input(BaseModel):
|
44 |
session: Session
|
|
|
51 |
class Output(BaseModel):
|
52 |
ranked_ids: List[UUID]
|
53 |
new_items: List[Optional[NewItem]]
|
54 |
+
|
55 |
+
|
56 |
+
class TwitterEngagements(BaseModel):
|
57 |
+
"""Engagement counts from Twitter"""
|
58 |
+
|
59 |
+
retweet: NonNegativeInt
|
60 |
+
like: NonNegativeInt
|
61 |
+
comment: NonNegativeInt
|
62 |
+
share: NonNegativeInt
|
63 |
+
|
64 |
+
|
65 |
+
class RedditEngagements(BaseModel):
|
66 |
+
"""Engagement counts from Reddit"""
|
67 |
+
|
68 |
+
upvote: NonNegativeInt
|
69 |
+
downvote: NonNegativeInt
|
70 |
+
comment: NonNegativeInt
|
71 |
+
award: NonNegativeInt
|
72 |
+
|
73 |
+
|
74 |
+
class FacebookEngagements(BaseModel):
|
75 |
+
"""Engagement counts from Facebook"""
|
76 |
+
|
77 |
+
like: NonNegativeInt
|
78 |
+
love: NonNegativeInt
|
79 |
+
care: NonNegativeInt
|
80 |
+
haha: NonNegativeInt
|
81 |
+
wow: NonNegativeInt
|
82 |
+
sad: NonNegativeInt
|
83 |
+
angry: NonNegativeInt
|
84 |
+
comment: NonNegativeInt
|
85 |
+
share: NonNegativeInt
|
86 |
+
|
87 |
+
|
88 |
+
class ContentItem(BaseModel):
|
89 |
+
"""A content item to be ranked"""
|
90 |
+
|
91 |
+
id: str = Field(
|
92 |
+
description="A unique ID describing a specific piece of content. We will do our best to make an ID for a given item persist between requests, but that property is not guaranteed."
|
93 |
+
)
|
94 |
+
|
95 |
+
post_id: Optional[str] = Field(
|
96 |
+
description="The ID of the post to which this comment belongs. Useful for linking comments to their post when comments are shown in a feed. Currently this UX only exists on Facebook.",
|
97 |
+
default=None,
|
98 |
+
)
|
99 |
+
|
100 |
+
parent_id: Optional[str] = Field(
|
101 |
+
description="For threaded comments, this identifies the comment to which this one is a reply. Blank for top-level comments.",
|
102 |
+
default=None,
|
103 |
+
)
|
104 |
+
|
105 |
+
title: Optional[str] = Field(
|
106 |
+
description="The post title, only available on reddit posts.", default=None
|
107 |
+
)
|
108 |
+
|
109 |
+
text: str = Field(
|
110 |
+
description="The text of the content item. Assume UTF-8, and that leading and trailing whitespace have been trimmed."
|
111 |
+
)
|
112 |
+
|
113 |
+
author_name_hash: str = Field(
|
114 |
+
description="A hash of the author's name (salted). Use this to determine which posts are by the same author. When the post is by the current user, this should match `session.user_name_hash`."
|
115 |
+
)
|
116 |
+
|
117 |
+
type: Literal["post", "comment"] = Field(
|
118 |
+
description="Whether the content item is a `post` or `comment`. On Twitter, tweets will be identified as `comment` when they are replies displayed on the page for a single tweet."
|
119 |
+
)
|
120 |
+
|
121 |
+
embedded_urls: Optional[list[HttpUrl]] = Field(
|
122 |
+
description="A list of URLs that are embedded in the content item. This could be links to images, videos, or other content. They may or may not also appear in the text of the item."
|
123 |
+
)
|
124 |
+
|
125 |
+
created_at: datetime = Field(
|
126 |
+
description="The time that the item was created in UTC, in `YYYY-MM-DD hh:mm:ss` format, at the highest resolution available (which may be as low as the hour)."
|
127 |
+
)
|
128 |
+
|
129 |
+
engagements: Union[TwitterEngagements, RedditEngagements, FacebookEngagements] = (
|
130 |
+
Field(description="Engagement counts for the content item.")
|
131 |
+
)
|
132 |
+
|
133 |
+
|
134 |
+
class Session(BaseModel):
|
135 |
+
"""Data that is scoped to the user's browsing session (generally a single page view)"""
|
136 |
+
|
137 |
+
user_id: str = Field(
|
138 |
+
description="A unique id for this study participant. Will remain fixed for the duration of the experiment."
|
139 |
+
)
|
140 |
+
user_name_hash: str = Field(
|
141 |
+
description="A (salted) hash of the user's username. We'll do our best to make it match the `item.author_name_hash` on posts authored by the current user."
|
142 |
+
)
|
143 |
+
cohort: str = Field(
|
144 |
+
description="The cohort to which the user has been assigned. You can safely ignore this. It is used by the PRC request router."
|
145 |
+
)
|
146 |
+
platform: Literal["twitter", "reddit", "facebook"] = Field(
|
147 |
+
description="The platform on which the user is viewing content."
|
148 |
+
)
|
149 |
+
current_time: datetime = Field(
|
150 |
+
description="The current time according to the user's browser, in UTC, in `YYYY-MM-DD hh:mm:ss` format."
|
151 |
+
)
|
152 |
+
|
153 |
+
|
154 |
+
class RankingRequest(BaseModel):
|
155 |
+
"""A complete ranking request"""
|
156 |
+
|
157 |
+
session: Session = Field(
|
158 |
+
description="Data that is scoped to the user's browsing session"
|
159 |
+
)
|
160 |
+
#survey: Optional[SurveyResponse] = Field(
|
161 |
+
# description="Responses to PRC survey. Added by the request router.",
|
162 |
+
# default=None,
|
163 |
+
#)
|
164 |
+
items: list[ContentItem] = Field(description="The content items to be ranked.")
|
app/modules/utils.py
CHANGED
@@ -10,15 +10,16 @@ def load_csv_to_dict(filename):
|
|
10 |
data_list = list(dict_reader)
|
11 |
return data_list
|
12 |
|
13 |
-
def get_stringified_date(date):
|
14 |
-
"""
|
15 |
-
Gets a date like 2024-04-09T19:29:38.072017Z and returns the day
|
16 |
-
"""
|
17 |
-
return str(datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").date())
|
18 |
|
19 |
-
def compare_date_with_today(
|
20 |
# Parse the input date string
|
21 |
-
|
|
|
22 |
|
23 |
# Get today's date (without time component)
|
24 |
today_date = datetime.now().date()
|
|
|
10 |
data_list = list(dict_reader)
|
11 |
return data_list
|
12 |
|
13 |
+
#def get_stringified_date(date):
|
14 |
+
# """
|
15 |
+
# Gets a date like 2024-04-09T19:29:38.072017Z and returns the day
|
16 |
+
# """
|
17 |
+
# return str(datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%fZ").date())
|
18 |
|
19 |
+
def compare_date_with_today(input_date:datetime):
|
20 |
# Parse the input date string
|
21 |
+
|
22 |
+
#input_date = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
|
23 |
|
24 |
# Get today's date (without time component)
|
25 |
today_date = datetime.now().date()
|