import pandas as pd import gradio as gr from datetime import datetime import gspread from google.oauth2.service_account import Credentials import os import time import requests import json import math SHEET_ID = os.getenv('SHEETID') # Replace with your Google Sheet ID log = os.getenv('USER') password = os.getenv('PASSWORD') SOCIAL_SHEET = 'SocialSheet' # Service account credentials file path # CREDENTIALS_FILE = 'credentials.json' conf1 = os.getenv('CONF') dict = json.loads(conf1) XKEY = os.getenv('XKEY') # Initialize Google Sheets client SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive'] creds = Credentials.from_service_account_info(dict, scopes=SCOPES) gc = gspread.authorize(creds) sheet = gc.open_by_key(SHEET_ID) # API endpoint and request details url = "https://google.serper.dev/search" headers = { 'X-API-KEY': XKEY, 'Content-Type': 'application/json' } def json_serialize(obj): if isinstance(obj, float): if math.isnan(obj) or math.isinf(obj): return None return obj def search_and_store(locations, keywords): all_dfs = [] locations_list = [loc.strip() for loc in locations.split(',')] keywords_list = [kw.strip() for kw in keywords.split(',')] for location in locations_list: for keyword in keywords_list: query = f"{keyword} {location} site:facebook.com/ -inurl:groups -inurl:biz" payload = json.dumps({ "q": query, "num": 100, "tbs": "qdr:m" }) try: response = requests.post(url, headers=headers, data=payload) response.raise_for_status() response_data = response.json() if 'organic' in response_data: df = pd.DataFrame(columns=['link', 'title', 'snippet', 'date', 'query']) for item in response_data['organic']: new_row = pd.DataFrame({ 'link': [item.get('link', '')], 'title': [item.get('title', '')], 'snippet': [item.get('snippet', '')], 'date': [item.get('date', '')], 'query': [f"{keyword} {location}"] }) df = pd.concat([df, new_row], ignore_index=True) df = df.replace([float('inf'), -float('inf'), float('nan')], None) all_dfs.append(df) else: print(f"No 'organic' results for query: {query}") except Exception as e: print(f"Error for query: {query} - {str(e)}") time.sleep(1) if all_dfs: result_df = pd.concat(all_dfs, ignore_index=True) social_sheet = sheet.worksheet(SOCIAL_SHEET) existing_data = social_sheet.get_all_records() existing_df = pd.DataFrame(existing_data) new_rows = result_df[~result_df['link'].isin(existing_df['link'])] if not new_rows.empty: new_values = json.loads(json.dumps(new_rows.to_dict('records'), default=json_serialize)) new_values = [[row.get(col) for col in new_rows.columns] for row in new_values] social_sheet.append_rows(new_values) return f"{len(new_rows)} new rows have been appended to {SOCIAL_SHEET} in Google Sheets" else: return "No new data to append." else: return "No data was retrieved from the queries." def start_search(locations, keywords): return search_and_store(locations, keywords) iface = gr.Interface( fn=start_search, inputs=[ gr.Textbox(label="Locations (comma-separated)"), gr.Textbox(label="Keywords (comma-separated)") ], outputs=gr.Textbox(label="Result"), title="Facebook Page Search and Store", description="Enter locations and keywords to search for Facebook pages and store the results in Google Sheets.", live=False ) iface.launch(auth=(log,password))