|
import streamlit as st |
|
import pandas as pd |
|
import requests |
|
import re |
|
import tempfile |
|
import shutil |
|
import os |
|
from difflib import SequenceMatcher |
|
|
|
def construct_query(row): |
|
"""Constructs the Google search query using applicant data.""" |
|
query = str(row['Applicant Name']) |
|
optional_fields = ['Job Title', 'State', 'City', 'Skills'] |
|
for field in optional_fields: |
|
if field in row and pd.notna(row[field]): |
|
value = row[field] |
|
if isinstance(value, str) and value.strip(): |
|
query += f" {value.strip()}" |
|
elif not isinstance(value, str): |
|
query += f" {str(value).strip()}" |
|
query += " linkedin" |
|
return query |
|
|
|
def get_name_from_url(link): |
|
"""Extracts the name part from a LinkedIn profile URL.""" |
|
match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', link) |
|
if match: |
|
return match.group(1).replace('-', ' ') |
|
return None |
|
|
|
def calculate_similarity(name1, name2): |
|
"""Calculates similarity between two names.""" |
|
return SequenceMatcher(None, name1.lower().strip(), name2.lower().strip()).ratio() |
|
|
|
def fetch_linkedin_links(query, api_key, applicant_name): |
|
"""Fetches LinkedIn profile links using BrightData SERP API.""" |
|
linkedin_regex = r'https://(www|[a-z]{2})\.linkedin\.com/.*' |
|
|
|
try: |
|
response = requests.get( |
|
"https://serpapi.brightdata.com/google/search", |
|
params={ |
|
"q": query, |
|
"num": 5, |
|
"api_key": api_key |
|
} |
|
) |
|
response.raise_for_status() |
|
results = response.json() |
|
organic_results = results.get("organic_results", []) |
|
|
|
for result in organic_results: |
|
link = result.get("link") |
|
if re.match(linkedin_regex, link): |
|
profile_name = get_name_from_url(link) |
|
if profile_name: |
|
similarity = calculate_similarity(applicant_name, profile_name) |
|
if similarity >= 0.5: |
|
return link |
|
return None |
|
except Exception as e: |
|
st.error(f"Error fetching link for query '{query}': {e}") |
|
return None |
|
|
|
def process_file(file, api_key): |
|
"""Processes the uploaded Excel file to fetch LinkedIn profile links.""" |
|
try: |
|
df = pd.read_excel(file) |
|
df = df[df['Applicant Name'].notna()] |
|
df = df[df['Applicant Name'].str.strip() != ''] |
|
df['Search Query'] = df.apply(construct_query, axis=1) |
|
df['LinkedIn Link'] = df.apply( |
|
lambda row: fetch_linkedin_links(row['Search Query'], api_key, row['Applicant Name']), |
|
axis=1 |
|
) |
|
|
|
temp_dir = tempfile.mkdtemp() |
|
output_file = os.path.join(temp_dir, "updated_with_linkedin_links.csv") |
|
df.to_csv(output_file, index=False) |
|
return output_file |
|
except Exception as e: |
|
st.error(f"Error processing file: {e}") |
|
return None |
|
|
|
|
|
st.title("LinkedIn Profile Link Scraper") |
|
st.markdown("Upload an Excel file with applicant details, and get a CSV with LinkedIn profile links.") |
|
|
|
api_key = st.text_input("Enter your BrightData SERP API Key", type="password") |
|
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"]) |
|
|
|
if uploaded_file and api_key: |
|
st.write("Processing file...") |
|
output_file = process_file(uploaded_file, api_key) |
|
if output_file: |
|
with open(output_file, "rb") as f: |
|
st.download_button( |
|
label="Download Updated CSV", |
|
data=f, |
|
file_name="updated_with_linkedin_links.csv", |
|
mime="text/csv" |
|
) |
|
shutil.rmtree(os.path.dirname(output_file)) |
|
elif not api_key: |
|
st.warning("Please enter your BrightData SERP API key to proceed.") |