File size: 4,057 Bytes
8d86b29
 
 
 
 
 
 
 
 
 
 
 
12ffe5f
8d86b29
 
 
 
 
1c64013
8d86b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c64013
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

import pandas as pd
import gradio as gr
from datetime import datetime
import gspread
from google.oauth2.service_account import Credentials
import os
import time
import requests
import json
import math

SHEET_ID = os.getenv('SHEETID')  # Replace with your Google Sheet ID
log = os.getenv('USER')
password = os.getenv('PASSWORD')
SOCIAL_SHEET = 'SocialSheet'

# Service account credentials file path
CREDENTIALS_FILE = 'credentials.json'

# Initialize Google Sheets client
SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file(CREDENTIALS_FILE, scopes=SCOPES)
gc = gspread.authorize(creds)
sheet = gc.open_by_key(SHEET_ID)

# API endpoint and request details
url = "https://google.serper.dev/search"
headers = {
    'X-API-KEY': '9565be7ef86196d8852c5bd2416349201d7a362c',
    'Content-Type': 'application/json'
}


def json_serialize(obj):
    if isinstance(obj, float):
        if math.isnan(obj) or math.isinf(obj):
            return None
    return obj


def search_and_store(locations, keywords):
    all_dfs = []
    locations_list = [loc.strip() for loc in locations.split(',')]
    keywords_list = [kw.strip() for kw in keywords.split(',')]

    for location in locations_list:
        for keyword in keywords_list:
            query = f"{keyword} {location} site:facebook.com/ -inurl:groups -inurl:biz"
            payload = json.dumps({
                "q": query,
                "num": 100,
                "tbs": "qdr:m"
            })

            try:
                response = requests.post(url, headers=headers, data=payload)
                response.raise_for_status()
                response_data = response.json()

                if 'organic' in response_data:
                    df = pd.DataFrame(columns=['link', 'title', 'snippet', 'date', 'query'])
                    for item in response_data['organic']:
                        new_row = pd.DataFrame({
                            'link': [item.get('link', '')],
                            'title': [item.get('title', '')],
                            'snippet': [item.get('snippet', '')],
                            'date': [item.get('date', '')],
                            'query': [f"{keyword} {location}"]
                        })
                        df = pd.concat([df, new_row], ignore_index=True)

                    df = df.replace([float('inf'), -float('inf'), float('nan')], None)
                    all_dfs.append(df)
                else:
                    print(f"No 'organic' results for query: {query}")
            except Exception as e:
                print(f"Error for query: {query} - {str(e)}")
            time.sleep(1)

    if all_dfs:
        result_df = pd.concat(all_dfs, ignore_index=True)

        social_sheet = sheet.worksheet(SOCIAL_SHEET)
        existing_data = social_sheet.get_all_records()
        existing_df = pd.DataFrame(existing_data)

        new_rows = result_df[~result_df['link'].isin(existing_df['link'])]

        if not new_rows.empty:
            new_values = json.loads(json.dumps(new_rows.to_dict('records'), default=json_serialize))
            new_values = [[row.get(col) for col in new_rows.columns] for row in new_values]
            social_sheet.append_rows(new_values)
            return f"{len(new_rows)} new rows have been appended to {SOCIAL_SHEET} in Google Sheets"
        else:
            return "No new data to append."
    else:
        return "No data was retrieved from the queries."


def start_search(locations, keywords):
    return search_and_store(locations, keywords)


iface = gr.Interface(
    fn=start_search,
    inputs=[
        gr.Textbox(label="Locations (comma-separated)"),
        gr.Textbox(label="Keywords (comma-separated)")
    ],
    outputs=gr.Textbox(label="Result"),
    title="Facebook Page Search and Store",
    description="Enter locations and keywords to search for Facebook pages and store the results in Google Sheets.",
    live=False
)

iface.launch(auth=(log,password))