Sam Fred
commited on
Commit
·
58e450d
1
Parent(s):
12f0926
Commit
Browse files- __init__.py +0 -0
- app.py +208 -0
- endpoints/__pycache__/analytics.cpython-312.pyc +0 -0
- endpoints/__pycache__/database.cpython-312.pyc +0 -0
- endpoints/__pycache__/images.cpython-312.pyc +0 -0
- endpoints/__pycache__/models.cpython-312.pyc +0 -0
- endpoints/__pycache__/posts.cpython-312.pyc +0 -0
- endpoints/analytics.py +20 -0
- endpoints/database.py +18 -0
- endpoints/images.py +0 -0
- endpoints/models.py +28 -0
- endpoints/posts.py +63 -0
- google_trends.py +0 -0
- models/engagement_rate_model.pkl +3 -0
- models/models.txt +0 -0
- models/promotion_strategy_model.pkl +3 -0
- models/prophet_model.pkl +3 -0
- models/viral_potential_model.pkl +3 -0
- requirements.txt +25 -0
- schemas.js +11 -0
- utils/__pycache__/database.cpython-312.pyc +0 -0
- utils/__pycache__/image_processing.cpython-312.pyc +0 -0
- utils/__pycache__/instaloader_utils.cpython-312.pyc +0 -0
- utils/__pycache__/logging_utils.cpython-312.pyc +0 -0
- utils/__pycache__/preprocessing.cpython-312.pyc +0 -0
- utils/__pycache__/visualization.cpython-312.pyc +0 -0
- utils/database.py +114 -0
- utils/image_processing.py +43 -0
- utils/instaloader_utils.py +71 -0
- utils/logging_utils.py +5 -0
- utils/preprocessing.py +43 -0
- utils/visualization.py +20 -0
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# api/main.py
|
2 |
+
from fastapi import FastAPI, HTTPException, Depends
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from typing import List, Dict
|
5 |
+
import logging
|
6 |
+
import requests
|
7 |
+
from io import BytesIO
|
8 |
+
from PIL import Image
|
9 |
+
import pytesseract
|
10 |
+
from textblob import TextBlob
|
11 |
+
import pandas as pd
|
12 |
+
import joblib
|
13 |
+
from sqlalchemy.orm import Session
|
14 |
+
from utils.database import init_db, save_to_db, fetch_posts_from_db, get_db
|
15 |
+
from utils.instaloader_utils import fetch_user_posts, fetch_competitors_data
|
16 |
+
import torch
|
17 |
+
from torchvision import transforms
|
18 |
+
from transformers import ResNetForImageClassification
|
19 |
+
|
20 |
+
# Set up logging
|
21 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
22 |
+
|
23 |
+
# Initialize FastAPI app
|
24 |
+
app = FastAPI()
|
25 |
+
|
26 |
+
# Initialize database
|
27 |
+
init_db()
|
28 |
+
|
29 |
+
# Load models
|
30 |
+
viral_model = joblib.load("api/models/viral_potential_model.pkl")
|
31 |
+
engagement_model = joblib.load("api/models/engagement_rate_model.pkl")
|
32 |
+
promotion_model = joblib.load("api/models/promotion_strategy_model.pkl")
|
33 |
+
|
34 |
+
class UserRequest(BaseModel):
|
35 |
+
username: str
|
36 |
+
|
37 |
+
class AnalyzePostRequest(BaseModel):
|
38 |
+
caption: str
|
39 |
+
hashtags: str
|
40 |
+
image_url: str
|
41 |
+
|
42 |
+
@app.post("/fetch-posts")
|
43 |
+
async def fetch_posts(user: UserRequest):
|
44 |
+
"""
|
45 |
+
Fetch posts from a given Instagram profile (public data only).
|
46 |
+
"""
|
47 |
+
username = user.username
|
48 |
+
logging.info(f"Fetching posts for user: {username}")
|
49 |
+
|
50 |
+
try:
|
51 |
+
# Fetch user's posts
|
52 |
+
user_posts = fetch_user_posts(username)
|
53 |
+
if not user_posts:
|
54 |
+
raise HTTPException(status_code=404, detail="No posts found for the user.")
|
55 |
+
|
56 |
+
# Fetch competitors' posts
|
57 |
+
competitors_posts = fetch_competitors_data(username)
|
58 |
+
|
59 |
+
# Combine user and competitor data
|
60 |
+
all_posts = user_posts + competitors_posts
|
61 |
+
|
62 |
+
# Save data to the database
|
63 |
+
save_to_db(all_posts)
|
64 |
+
|
65 |
+
return {"status": "success", "data": all_posts}
|
66 |
+
except Exception as e:
|
67 |
+
logging.error(f"Error fetching posts: {e}")
|
68 |
+
raise HTTPException(status_code=500, detail=str(e))
|
69 |
+
|
70 |
+
@app.post("/analyze")
|
71 |
+
async def analyze(user: UserRequest, db: Session = Depends(get_db)):
|
72 |
+
"""
|
73 |
+
Analyze user and competitor data.
|
74 |
+
"""
|
75 |
+
username = user.username
|
76 |
+
logging.info(f"Analyzing data for user: {username}")
|
77 |
+
|
78 |
+
try:
|
79 |
+
# Fetch data from the database
|
80 |
+
user_posts = fetch_posts_from_db(username)
|
81 |
+
if not user_posts:
|
82 |
+
raise HTTPException(status_code=404, detail="No posts found for the user.")
|
83 |
+
|
84 |
+
# Perform analysis (e.g., viral potential, engagement rate, etc.)
|
85 |
+
analysis_results = {
|
86 |
+
"viral_potential": predict_viral_potential(user_posts),
|
87 |
+
"top_hashtags": recommend_hashtags(user_posts),
|
88 |
+
"engagement_stats": {
|
89 |
+
"mean_likes": sum(post['likes'] for post in user_posts) / len(user_posts),
|
90 |
+
"mean_comments": sum(post['comments'] for post in user_posts) / len(user_posts)
|
91 |
+
}
|
92 |
+
}
|
93 |
+
|
94 |
+
return {"status": "success", "results": analysis_results}
|
95 |
+
except Exception as e:
|
96 |
+
logging.error(f"Error analyzing data: {e}")
|
97 |
+
raise HTTPException(status_code=500, detail=str(e))
|
98 |
+
|
99 |
+
@app.post("/analyze-post")
|
100 |
+
async def analyze_post(post: AnalyzePostRequest, db: Session = Depends(get_db)):
|
101 |
+
"""
|
102 |
+
Analyze a single post (caption, hashtags, and image).
|
103 |
+
"""
|
104 |
+
try:
|
105 |
+
# Download and analyze the image
|
106 |
+
response = requests.get(post.image_url)
|
107 |
+
response.raise_for_status()
|
108 |
+
image = Image.open(BytesIO(response.content))
|
109 |
+
|
110 |
+
# Extract text from the image
|
111 |
+
extracted_text = extract_text_from_image(image)
|
112 |
+
|
113 |
+
# Analyze the image content
|
114 |
+
image_analysis = analyze_image(image)
|
115 |
+
|
116 |
+
# Preprocess input for models
|
117 |
+
features = {
|
118 |
+
'caption_length': len(post.caption),
|
119 |
+
'hashtag_count': len(post.hashtags.split(",")),
|
120 |
+
'sentiment': TextBlob(post.caption).sentiment.polarity
|
121 |
+
}
|
122 |
+
features_df = pd.DataFrame([features])
|
123 |
+
|
124 |
+
# Make predictions
|
125 |
+
viral_score = viral_model.predict_proba(features_df)[0][1]
|
126 |
+
engagement_rate = engagement_model.predict(features_df)[0]
|
127 |
+
promote = promotion_model.predict(features_df)[0]
|
128 |
+
|
129 |
+
# Save post to database
|
130 |
+
post_data = {
|
131 |
+
"caption": post.caption,
|
132 |
+
"hashtags": post.hashtags,
|
133 |
+
"image_url": post.image_url,
|
134 |
+
"engagement_rate": engagement_rate,
|
135 |
+
"viral_score": viral_score,
|
136 |
+
"promote": bool(promote)
|
137 |
+
}
|
138 |
+
save_to_db([post_data])
|
139 |
+
|
140 |
+
return {
|
141 |
+
"extracted_text": extracted_text,
|
142 |
+
"image_analysis": image_analysis,
|
143 |
+
"viral_score": viral_score,
|
144 |
+
"engagement_rate": engagement_rate,
|
145 |
+
"promote": bool(promote)
|
146 |
+
}
|
147 |
+
except Exception as e:
|
148 |
+
logging.error(f"Error analyzing post: {e}")
|
149 |
+
raise HTTPException(status_code=500, detail=str(e))
|
150 |
+
|
151 |
+
# Image processing functions
|
152 |
+
def resize_image(image, max_size=(800, 600)):
|
153 |
+
"""Resize an image to the specified maximum size."""
|
154 |
+
image.thumbnail(max_size)
|
155 |
+
return image
|
156 |
+
|
157 |
+
def extract_text_from_image(image):
|
158 |
+
"""Extract text from an image using OCR."""
|
159 |
+
try:
|
160 |
+
image = resize_image(image)
|
161 |
+
text = pytesseract.image_to_string(image)
|
162 |
+
return text
|
163 |
+
except Exception as e:
|
164 |
+
logging.error(f"Error extracting text from image: {e}")
|
165 |
+
return ""
|
166 |
+
|
167 |
+
def analyze_image(image):
|
168 |
+
"""Analyze image content using a pre-trained model."""
|
169 |
+
try:
|
170 |
+
preprocess = transforms.Compose([
|
171 |
+
transforms.Resize(256),
|
172 |
+
transforms.CenterCrop(224),
|
173 |
+
transforms.ToTensor(),
|
174 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
175 |
+
])
|
176 |
+
image_tensor = preprocess(image).unsqueeze(0)
|
177 |
+
|
178 |
+
# Load ResNet model
|
179 |
+
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
|
180 |
+
model.eval()
|
181 |
+
|
182 |
+
with torch.no_grad():
|
183 |
+
output = model(image_tensor)
|
184 |
+
return output.logits.tolist() # Return the logits as a list
|
185 |
+
except Exception as e:
|
186 |
+
logging.error(f"Error analyzing image: {e}")
|
187 |
+
return None
|
188 |
+
|
189 |
+
# Helper functions
|
190 |
+
def predict_viral_potential(posts: List[Dict]) -> List[Dict]:
|
191 |
+
"""
|
192 |
+
Predict viral potential for posts.
|
193 |
+
"""
|
194 |
+
# Placeholder for viral potential prediction logic
|
195 |
+
return [{"caption": post["caption"], "viral_score": 0.8} for post in posts]
|
196 |
+
|
197 |
+
def recommend_hashtags(posts: List[Dict]) -> List[str]:
|
198 |
+
"""
|
199 |
+
Recommend trending hashtags.
|
200 |
+
"""
|
201 |
+
hashtags = [hashtag for post in posts for hashtag in post['hashtags']]
|
202 |
+
hashtag_counts = Counter(hashtags)
|
203 |
+
return [hashtag for hashtag, _ in hashtag_counts.most_common(10)]
|
204 |
+
|
205 |
+
# Run the API
|
206 |
+
if __name__ == "__main__":
|
207 |
+
import uvicorn
|
208 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
endpoints/__pycache__/analytics.cpython-312.pyc
ADDED
Binary file (163 Bytes). View file
|
|
endpoints/__pycache__/database.cpython-312.pyc
ADDED
Binary file (925 Bytes). View file
|
|
endpoints/__pycache__/images.cpython-312.pyc
ADDED
Binary file (160 Bytes). View file
|
|
endpoints/__pycache__/models.cpython-312.pyc
ADDED
Binary file (1.3 kB). View file
|
|
endpoints/__pycache__/posts.cpython-312.pyc
ADDED
Binary file (2.87 kB). View file
|
|
endpoints/analytics.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, HTTPException
|
2 |
+
from api.utils.preprocessing import preprocess_data
|
3 |
+
from api.utils.visualization import generate_engagement_heatmap
|
4 |
+
from api.utils.logging_utils import logger
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
@router.post("/analyze")
|
9 |
+
async def analyze(username: str):
|
10 |
+
try:
|
11 |
+
# Preprocess data
|
12 |
+
data = preprocess_data(username)
|
13 |
+
|
14 |
+
# Generate engagement heatmap
|
15 |
+
heatmap = generate_engagement_heatmap(data)
|
16 |
+
|
17 |
+
return {"status": "success", "heatmap": heatmap}
|
18 |
+
except Exception as e:
|
19 |
+
logger.error(f"Error analyzing data: {e}")
|
20 |
+
raise HTTPException(status_code=500, detail=str(e))
|
endpoints/database.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy.orm import Session
|
2 |
+
from models import Post, SessionLocal
|
3 |
+
|
4 |
+
# Get database session
|
5 |
+
def get_db():
|
6 |
+
db = SessionLocal()
|
7 |
+
try:
|
8 |
+
yield db
|
9 |
+
finally:
|
10 |
+
db.close()
|
11 |
+
|
12 |
+
# Save post to database
|
13 |
+
def save_post(db: Session, post_data: dict):
|
14 |
+
post = Post(**post_data)
|
15 |
+
db.add(post)
|
16 |
+
db.commit()
|
17 |
+
db.refresh(post)
|
18 |
+
return post
|
endpoints/images.py
ADDED
File without changes
|
endpoints/models.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime
|
2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
3 |
+
from sqlalchemy.orm import sessionmaker
|
4 |
+
|
5 |
+
# Database URL (e.g., SQLite, PostgreSQL, MySQL)
|
6 |
+
DATABASE_URL = "sqlite:///./instagram_ai.db"
|
7 |
+
|
8 |
+
# Create engine and session
|
9 |
+
engine = create_engine(DATABASE_URL)
|
10 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
11 |
+
|
12 |
+
# Base class for models
|
13 |
+
Base = declarative_base()
|
14 |
+
|
15 |
+
# Define Post model
|
16 |
+
class Post(Base):
|
17 |
+
__tablename__ = "posts"
|
18 |
+
id = Column(Integer, primary_key=True, index=True)
|
19 |
+
caption = Column(String, nullable=False)
|
20 |
+
hashtags = Column(String)
|
21 |
+
image_url = Column(String)
|
22 |
+
posting_time = Column(DateTime)
|
23 |
+
engagement_rate = Column(Float)
|
24 |
+
viral_score = Column(Float)
|
25 |
+
promote = Column(Integer) # 1 for promote, 0 for don't promote
|
26 |
+
|
27 |
+
# Create tables
|
28 |
+
Base.metadata.create_all(bind=engine)
|
endpoints/posts.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, Depends, HTTPException
|
2 |
+
from sqlalchemy.orm import Session
|
3 |
+
from database import get_db, save_post
|
4 |
+
from utils.preprocessing import preprocess_data
|
5 |
+
from utils.image_processing import extract_text_from_image, analyze_image
|
6 |
+
import joblib
|
7 |
+
import requests
|
8 |
+
from PIL import Image
|
9 |
+
from io import BytesIO
|
10 |
+
from textblob import TextBlob
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
# Load models
|
14 |
+
viral_model = joblib.load("models/viral_potential_model.pkl")
|
15 |
+
engagement_model = joblib.load("models/engagement_rate_model.pkl")
|
16 |
+
promotion_model = joblib.load("models/promotion_strategy_model.pkl")
|
17 |
+
|
18 |
+
router = APIRouter()
|
19 |
+
|
20 |
+
# Endpoint to analyze and save a post
|
21 |
+
@router.post("/analyze-post")
|
22 |
+
async def analyze_post(caption: str, hashtags: str, image_url: str, db: Session = Depends(get_db)):
|
23 |
+
try:
|
24 |
+
# Download and analyze the image
|
25 |
+
response = requests.get(image_url)
|
26 |
+
response.raise_for_status()
|
27 |
+
image = Image.open(BytesIO(response.content))
|
28 |
+
extracted_text = extract_text_from_image(image)
|
29 |
+
image_analysis = analyze_image(image)
|
30 |
+
|
31 |
+
# Preprocess input for models
|
32 |
+
features = {
|
33 |
+
'caption_length': len(caption),
|
34 |
+
'hashtag_count': len(hashtags.split(",")),
|
35 |
+
'sentiment': TextBlob(caption).sentiment.polarity
|
36 |
+
}
|
37 |
+
features_df = pd.DataFrame([features])
|
38 |
+
|
39 |
+
# Make predictions
|
40 |
+
viral_score = viral_model.predict_proba(features_df)[0][1]
|
41 |
+
engagement_rate = engagement_model.predict(features_df)[0]
|
42 |
+
promote = promotion_model.predict(features_df)[0]
|
43 |
+
|
44 |
+
# Save post to database
|
45 |
+
post_data = {
|
46 |
+
"caption": caption,
|
47 |
+
"hashtags": hashtags,
|
48 |
+
"image_url": image_url,
|
49 |
+
"engagement_rate": engagement_rate,
|
50 |
+
"viral_score": viral_score,
|
51 |
+
"promote": promote
|
52 |
+
}
|
53 |
+
save_post(db, post_data)
|
54 |
+
|
55 |
+
return {
|
56 |
+
"extracted_text": extracted_text,
|
57 |
+
"image_analysis": image_analysis,
|
58 |
+
"viral_score": viral_score,
|
59 |
+
"engagement_rate": engagement_rate,
|
60 |
+
"promote": bool(promote)
|
61 |
+
}
|
62 |
+
except Exception as e:
|
63 |
+
raise HTTPException(status_code=500, detail=str(e))
|
google_trends.py
ADDED
File without changes
|
models/engagement_rate_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aafaee190ed53d6859751ab011e123134648137afcf840f57ae58e8d1e97445a
|
3 |
+
size 73122
|
models/models.txt
ADDED
File without changes
|
models/promotion_strategy_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d9f980263c17b1c8f05732ede25d5235caf5138f8d00620d457762ae0da21ec
|
3 |
+
size 1247
|
models/prophet_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:359ea47c7383c48dcaee07b947b8c6ef4f77694a1f2d39dc194861eee9b3e84c
|
3 |
+
size 178763
|
models/viral_potential_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:497dd50e7db8b9b7419121babc96a01efd5fbdb35a365971a289f9bba7c8d784
|
3 |
+
size 48185
|
requirements.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
numpy
|
3 |
+
scikit-learn
|
4 |
+
xgboost
|
5 |
+
statsmodels
|
6 |
+
tensorflow
|
7 |
+
textblob
|
8 |
+
imbalanced-learn
|
9 |
+
matplotlib
|
10 |
+
uvicorn
|
11 |
+
prophet
|
12 |
+
seaborn
|
13 |
+
pytesseract
|
14 |
+
torch
|
15 |
+
torchvision
|
16 |
+
Pillow
|
17 |
+
python-dotenv
|
18 |
+
transformers
|
19 |
+
python-dotenv
|
20 |
+
requests
|
21 |
+
datasets
|
22 |
+
plotly
|
23 |
+
fastapi
|
24 |
+
huggingface_hub
|
25 |
+
kaggle
|
schemas.js
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const googleTrends = require('google-trends-api');
|
2 |
+
|
3 |
+
const keyword = process.argv[2];
|
4 |
+
|
5 |
+
googleTrends.interestOverTime({ keyword: keyword, startTime: new Date('2023-01-01') })
|
6 |
+
.then(function(results) {
|
7 |
+
console.log(results);
|
8 |
+
})
|
9 |
+
.catch(function(err) {
|
10 |
+
console.error(err);
|
11 |
+
});
|
utils/__pycache__/database.cpython-312.pyc
ADDED
Binary file (4.35 kB). View file
|
|
utils/__pycache__/image_processing.cpython-312.pyc
ADDED
Binary file (2.38 kB). View file
|
|
utils/__pycache__/instaloader_utils.cpython-312.pyc
ADDED
Binary file (4.03 kB). View file
|
|
utils/__pycache__/logging_utils.cpython-312.pyc
ADDED
Binary file (494 Bytes). View file
|
|
utils/__pycache__/preprocessing.cpython-312.pyc
ADDED
Binary file (2.36 kB). View file
|
|
utils/__pycache__/visualization.cpython-312.pyc
ADDED
Binary file (1.63 kB). View file
|
|
utils/database.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# api/utils/database.py
|
2 |
+
import sqlite3
|
3 |
+
import json
|
4 |
+
from typing import List, Dict
|
5 |
+
from sqlalchemy import create_engine
|
6 |
+
from sqlalchemy.orm import sessionmaker, Session
|
7 |
+
|
8 |
+
# SQLite database setup
|
9 |
+
DATABASE = "instagram_ai.db"
|
10 |
+
|
11 |
+
def init_db():
|
12 |
+
"""
|
13 |
+
Initialize the SQLite database.
|
14 |
+
"""
|
15 |
+
conn = sqlite3.connect(DATABASE)
|
16 |
+
cursor = conn.cursor()
|
17 |
+
cursor.execute('''
|
18 |
+
CREATE TABLE IF NOT EXISTS posts (
|
19 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
20 |
+
username TEXT NOT NULL,
|
21 |
+
caption TEXT,
|
22 |
+
hashtags TEXT,
|
23 |
+
likes INTEGER,
|
24 |
+
comments INTEGER,
|
25 |
+
date TEXT,
|
26 |
+
image_url TEXT UNIQUE, # Ensure image_url is unique
|
27 |
+
engagement_rate REAL,
|
28 |
+
viral_score REAL,
|
29 |
+
promote BOOLEAN
|
30 |
+
)
|
31 |
+
''')
|
32 |
+
conn.commit()
|
33 |
+
conn.close()
|
34 |
+
|
35 |
+
def post_exists(image_url: str) -> bool:
|
36 |
+
"""
|
37 |
+
Check if a post with the given image_url already exists in the database.
|
38 |
+
"""
|
39 |
+
conn = sqlite3.connect(DATABASE)
|
40 |
+
cursor = conn.cursor()
|
41 |
+
cursor.execute('SELECT id FROM posts WHERE image_url = ?', (image_url,))
|
42 |
+
result = cursor.fetchone()
|
43 |
+
conn.close()
|
44 |
+
return result is not None
|
45 |
+
|
46 |
+
def save_to_db(data: List[Dict]):
|
47 |
+
"""
|
48 |
+
Save data to the SQLite database, avoiding duplicates.
|
49 |
+
"""
|
50 |
+
conn = sqlite3.connect(DATABASE)
|
51 |
+
cursor = conn.cursor()
|
52 |
+
for post in data:
|
53 |
+
# Check if the post already exists
|
54 |
+
if not post_exists(post.get('image_url')):
|
55 |
+
cursor.execute('''
|
56 |
+
INSERT INTO posts (username, caption, hashtags, likes, comments, date, image_url, engagement_rate, viral_score, promote)
|
57 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
58 |
+
''', (
|
59 |
+
post.get('username', ''),
|
60 |
+
post.get('caption', ''),
|
61 |
+
json.dumps(post.get('hashtags', [])), # Store hashtags as JSON
|
62 |
+
post.get('likes', 0),
|
63 |
+
post.get('comments', 0),
|
64 |
+
post.get('date', ''), # Date is already a string
|
65 |
+
post.get('image_url', ''),
|
66 |
+
post.get('engagement_rate', 0.0),
|
67 |
+
post.get('viral_score', 0.0),
|
68 |
+
post.get('promote', False)
|
69 |
+
))
|
70 |
+
conn.commit()
|
71 |
+
conn.close()
|
72 |
+
print(f"Data saved to database: {DATABASE}")
|
73 |
+
|
74 |
+
def fetch_posts_from_db(username: str) -> List[Dict]:
|
75 |
+
"""
|
76 |
+
Fetch posts from the database for a given username.
|
77 |
+
"""
|
78 |
+
conn = sqlite3.connect(DATABASE)
|
79 |
+
cursor = conn.cursor()
|
80 |
+
cursor.execute('SELECT * FROM posts WHERE username = ?', (username,))
|
81 |
+
rows = cursor.fetchall()
|
82 |
+
conn.close()
|
83 |
+
|
84 |
+
# Convert rows to a list of dictionaries
|
85 |
+
posts = []
|
86 |
+
for row in rows:
|
87 |
+
posts.append({
|
88 |
+
"username": row[1],
|
89 |
+
"caption": row[2],
|
90 |
+
"hashtags": json.loads(row[3]), # Convert JSON back to list
|
91 |
+
"likes": row[4],
|
92 |
+
"comments": row[5],
|
93 |
+
"date": row[6], # Date is already a string
|
94 |
+
"image_url": row[7],
|
95 |
+
"engagement_rate": row[8],
|
96 |
+
"viral_score": row[9],
|
97 |
+
"promote": bool(row[10])
|
98 |
+
})
|
99 |
+
return posts
|
100 |
+
|
101 |
+
# SQLAlchemy setup for dependency injection
|
102 |
+
SQLALCHEMY_DATABASE_URL = f"sqlite:///{DATABASE}"
|
103 |
+
engine = create_engine(SQLALCHEMY_DATABASE_URL)
|
104 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
105 |
+
|
106 |
+
def get_db():
|
107 |
+
"""
|
108 |
+
Dependency to get a database session.
|
109 |
+
"""
|
110 |
+
db = SessionLocal()
|
111 |
+
try:
|
112 |
+
yield db
|
113 |
+
finally:
|
114 |
+
db.close()
|
utils/image_processing.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import pytesseract
|
3 |
+
import torch
|
4 |
+
from torchvision import transforms
|
5 |
+
from transformers import ResNetForImageClassification
|
6 |
+
import logging
|
7 |
+
|
8 |
+
def resize_image(image, max_size=(800, 600)):
|
9 |
+
"""Resize an image to the specified maximum size."""
|
10 |
+
image.thumbnail(max_size)
|
11 |
+
return image
|
12 |
+
|
13 |
+
def extract_text_from_image(image):
|
14 |
+
"""Extract text from an image using OCR."""
|
15 |
+
try:
|
16 |
+
image = resize_image(image)
|
17 |
+
text = pytesseract.image_to_string(image)
|
18 |
+
return text
|
19 |
+
except Exception as e:
|
20 |
+
logging.error(f"Error extracting text from image: {e}")
|
21 |
+
return ""
|
22 |
+
|
23 |
+
def analyze_image(image):
|
24 |
+
"""Analyze image content using a pre-trained model."""
|
25 |
+
try:
|
26 |
+
preprocess = transforms.Compose([
|
27 |
+
transforms.Resize(256),
|
28 |
+
transforms.CenterCrop(224),
|
29 |
+
transforms.ToTensor(),
|
30 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
31 |
+
])
|
32 |
+
image_tensor = preprocess(image).unsqueeze(0)
|
33 |
+
|
34 |
+
# Load ResNet model
|
35 |
+
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
|
36 |
+
model.eval()
|
37 |
+
|
38 |
+
with torch.no_grad():
|
39 |
+
output = model(image_tensor)
|
40 |
+
return output
|
41 |
+
except Exception as e:
|
42 |
+
logging.error(f"Error analyzing image: {e}")
|
43 |
+
return None
|
utils/instaloader_utils.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# api/utils/instaloader_utils.py
|
2 |
+
import instaloader
|
3 |
+
from typing import List, Dict
|
4 |
+
import requests
|
5 |
+
|
6 |
+
# Initialize Instaloader
|
7 |
+
L = instaloader.Instaloader()
|
8 |
+
|
9 |
+
def fetch_user_posts(username: str, max_posts: int = 50) -> List[Dict]:
|
10 |
+
"""
|
11 |
+
Fetch posts from a given Instagram profile (public data only).
|
12 |
+
"""
|
13 |
+
try:
|
14 |
+
profile = instaloader.Profile.from_username(L.context, username)
|
15 |
+
posts = []
|
16 |
+
for post in profile.get_posts():
|
17 |
+
posts.append({
|
18 |
+
"username": username,
|
19 |
+
"caption": post.caption,
|
20 |
+
"hashtags": post.caption_hashtags,
|
21 |
+
"likes": post.likes,
|
22 |
+
"comments": post.comments,
|
23 |
+
"date": post.date_utc.isoformat(),
|
24 |
+
"image_url": post.url
|
25 |
+
})
|
26 |
+
if len(posts) >= max_posts: # Limit the number of posts
|
27 |
+
break
|
28 |
+
return posts
|
29 |
+
except Exception as e:
|
30 |
+
print(f"Error fetching posts for {username}: {e}")
|
31 |
+
return []
|
32 |
+
|
33 |
+
def find_similar_accounts(username: str, rapidapi_key: str) -> List[str]:
|
34 |
+
"""
|
35 |
+
Fetch similar accounts using the RapidAPI endpoint.
|
36 |
+
"""
|
37 |
+
url = "https://instagram-scraper-api2.p.rapidapi.com/v1/similar_accounts"
|
38 |
+
querystring = {"username_or_id_or_url": username}
|
39 |
+
|
40 |
+
headers = {
|
41 |
+
"x-rapidapi-host": "instagram-scraper-api2.p.rapidapi.com",
|
42 |
+
"x-rapidapi-key": "d14b901fa8mshdafabd10d36f007p1ff602jsn91766325646f"
|
43 |
+
}
|
44 |
+
|
45 |
+
try:
|
46 |
+
response = requests.get(url, headers=headers, params=querystring)
|
47 |
+
response.raise_for_status() # Raise an error for bad status codes
|
48 |
+
data = response.json()
|
49 |
+
|
50 |
+
# Extract similar accounts from the API response
|
51 |
+
if data.get("status") == "success":
|
52 |
+
return data.get("data", {}).get("similar_accounts", [])
|
53 |
+
else:
|
54 |
+
print(f"Error fetching similar accounts: {data.get('message')}")
|
55 |
+
return []
|
56 |
+
except requests.exceptions.RequestException as e:
|
57 |
+
print(f"API request failed: {e}")
|
58 |
+
return []
|
59 |
+
|
60 |
+
def fetch_competitors_data(username: str, rapidapi_key: str, max_posts: int = 50) -> List[Dict]:
|
61 |
+
"""
|
62 |
+
Fetch data for similar accounts (competitors) using the RapidAPI endpoint.
|
63 |
+
"""
|
64 |
+
similar_accounts = find_similar_accounts(username, rapidapi_key)
|
65 |
+
all_posts = []
|
66 |
+
for account in similar_accounts:
|
67 |
+
print(f"Fetching posts for competitor: {account}")
|
68 |
+
competitor_posts = fetch_user_posts(account, max_posts)
|
69 |
+
all_posts.extend(competitor_posts)
|
70 |
+
return all_posts
|
71 |
+
|
utils/logging_utils.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
def setup_logging():
|
4 |
+
"""Set up logging configuration."""
|
5 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
utils/preprocessing.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from textblob import TextBlob
|
3 |
+
from sklearn.preprocessing import LabelEncoder
|
4 |
+
import logging
|
5 |
+
|
6 |
+
def preprocess_data(df):
|
7 |
+
"""Preprocess the input DataFrame."""
|
8 |
+
# Ensure required columns exist
|
9 |
+
required_columns = ['likes', 'comments', 'shares', 'posting_time', 'caption', 'hashtags']
|
10 |
+
missing_columns = [col for col in required_columns if col not in df.columns]
|
11 |
+
|
12 |
+
if missing_columns:
|
13 |
+
logging.warning(f"Missing required columns: {missing_columns}")
|
14 |
+
for col in missing_columns:
|
15 |
+
if col in ['likes', 'comments', 'shares']:
|
16 |
+
df[col] = 0 # Fill with default value (integer)
|
17 |
+
elif col == 'caption':
|
18 |
+
df[col] = '' # Fill with default value (empty string)
|
19 |
+
elif col == 'hashtags':
|
20 |
+
df[col] = [[] for _ in range(len(df))] # Fill with default value (list of empty lists)
|
21 |
+
|
22 |
+
# Convert posting_time to datetime
|
23 |
+
df['posting_time'] = pd.to_datetime(df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
|
24 |
+
df = df[df['posting_time'].notna()]
|
25 |
+
|
26 |
+
# Calculate engagement rate
|
27 |
+
df['engagement_rate'] = df['likes'] + df['comments'] + df['shares']
|
28 |
+
|
29 |
+
# Calculate caption length and hashtag count
|
30 |
+
df['caption_length'] = df['caption'].apply(len)
|
31 |
+
df['hashtag_count'] = df['hashtags'].apply(len)
|
32 |
+
|
33 |
+
# Calculate sentiment
|
34 |
+
df['caption_sentiment'] = df['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
|
35 |
+
df['sentiment'] = df['caption_sentiment']
|
36 |
+
|
37 |
+
# Encode categorical columns
|
38 |
+
if 'content_type' in df.columns and 'media_type' in df.columns:
|
39 |
+
label_encoder = LabelEncoder()
|
40 |
+
df['content_type_encoded'] = label_encoder.fit_transform(df['content_type'])
|
41 |
+
df['media_type_encoded'] = label_encoder.fit_transform(df['media_type'])
|
42 |
+
|
43 |
+
return df
|
utils/visualization.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import seaborn as sns
|
3 |
+
|
4 |
+
def plot_engagement_heatmap(engagement_by_hour):
|
5 |
+
"""Plot engagement heatmap by time of day."""
|
6 |
+
plt.figure(figsize=(10, 6))
|
7 |
+
sns.heatmap(engagement_by_hour.pivot_table(index='hour', values='engagement_rate'), annot=True, cmap='YlGnBu')
|
8 |
+
plt.title('Engagement Heatmap by Time of Day')
|
9 |
+
plt.xlabel('Engagement Rate')
|
10 |
+
plt.ylabel('Hour of Day')
|
11 |
+
plt.show()
|
12 |
+
|
13 |
+
def plot_engagement_over_time(engagement_summary):
|
14 |
+
"""Plot engagement rate over time."""
|
15 |
+
plt.figure(figsize=(10, 6))
|
16 |
+
plt.plot(engagement_summary['posting_time'], engagement_summary['engagement_rate'])
|
17 |
+
plt.title('Engagement Rate Over Time')
|
18 |
+
plt.xlabel('Time')
|
19 |
+
plt.ylabel('Engagement Rate')
|
20 |
+
plt.show()
|