Spaces:
Sleeping
Sleeping
import pandas as pd | |
import gradio as gr | |
from datetime import datetime | |
import gspread | |
from google.oauth2.service_account import Credentials | |
import os | |
import time | |
import requests | |
import json | |
import math | |
SHEET_ID = os.getenv('SHEETID') # Replace with your Google Sheet ID | |
log = os.getenv('USER') | |
password = os.getenv('PASSWORD') | |
SOCIAL_SHEET = 'SocialSheet' | |
# Service account credentials file path | |
# CREDENTIALS_FILE = 'credentials.json' | |
conf1 = os.getenv('CONF') | |
dict = json.loads(conf1) | |
XKEY = os.getenv('XKEY') | |
# Initialize Google Sheets client | |
SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive'] | |
creds = Credentials.from_service_account_info(dict, scopes=SCOPES) | |
gc = gspread.authorize(creds) | |
sheet = gc.open_by_key(SHEET_ID) | |
# API endpoint and request details | |
url = "https://google.serper.dev/search" | |
headers = { | |
'X-API-KEY': XKEY, | |
'Content-Type': 'application/json' | |
} | |
def json_serialize(obj): | |
if isinstance(obj, float): | |
if math.isnan(obj) or math.isinf(obj): | |
return None | |
return obj | |
def search_and_store(locations, keywords): | |
all_dfs = [] | |
locations_list = [loc.strip() for loc in locations.split(',')] | |
keywords_list = [kw.strip() for kw in keywords.split(',')] | |
for location in locations_list: | |
for keyword in keywords_list: | |
query = f"{keyword} {location} site:facebook.com/ -inurl:groups -inurl:biz" | |
payload = json.dumps({ | |
"q": query, | |
"num": 100, | |
"tbs": "qdr:m" | |
}) | |
try: | |
response = requests.post(url, headers=headers, data=payload) | |
response.raise_for_status() | |
response_data = response.json() | |
if 'organic' in response_data: | |
df = pd.DataFrame(columns=['link', 'title', 'snippet', 'date', 'query']) | |
for item in response_data['organic']: | |
new_row = pd.DataFrame({ | |
'link': [item.get('link', '')], | |
'title': [item.get('title', '')], | |
'snippet': [item.get('snippet', '')], | |
'date': [item.get('date', '')], | |
'query': [f"{keyword} {location}"] | |
}) | |
df = pd.concat([df, new_row], ignore_index=True) | |
df = df.replace([float('inf'), -float('inf'), float('nan')], None) | |
all_dfs.append(df) | |
else: | |
print(f"No 'organic' results for query: {query}") | |
except Exception as e: | |
print(f"Error for query: {query} - {str(e)}") | |
time.sleep(1) | |
if all_dfs: | |
result_df = pd.concat(all_dfs, ignore_index=True) | |
social_sheet = sheet.worksheet(SOCIAL_SHEET) | |
existing_data = social_sheet.get_all_records() | |
existing_df = pd.DataFrame(existing_data) | |
new_rows = result_df[~result_df['link'].isin(existing_df['link'])] | |
if not new_rows.empty: | |
new_values = json.loads(json.dumps(new_rows.to_dict('records'), default=json_serialize)) | |
new_values = [[row.get(col) for col in new_rows.columns] for row in new_values] | |
social_sheet.append_rows(new_values) | |
return f"{len(new_rows)} new rows have been appended to {SOCIAL_SHEET} in Google Sheets" | |
else: | |
return "No new data to append." | |
else: | |
return "No data was retrieved from the queries." | |
def start_search(locations, keywords): | |
return search_and_store(locations, keywords) | |
iface = gr.Interface( | |
fn=start_search, | |
inputs=[ | |
gr.Textbox(label="Locations (comma-separated)"), | |
gr.Textbox(label="Keywords (comma-separated)") | |
], | |
outputs=gr.Textbox(label="Result"), | |
title="Facebook Page Search and Store", | |
description="Enter locations and keywords to search for Facebook pages and store the results in Google Sheets.", | |
live=False | |
) | |
iface.launch(auth=(log,password)) |