File size: 3,812 Bytes
0297e38
 
 
 
 
 
 
 
94bd43b
 
 
0297e38
94bd43b
 
0297e38
94bd43b
0297e38
 
 
94bd43b
0297e38
94bd43b
 
 
 
0297e38
94bd43b
0297e38
94bd43b
 
 
 
0297e38
94bd43b
 
0297e38
 
 
94bd43b
0297e38
 
 
 
 
 
 
 
 
 
 
 
94bd43b
0297e38
 
 
94bd43b
0297e38
 
94bd43b
 
 
 
 
 
 
 
 
0297e38
94bd43b
 
 
 
 
 
 
0297e38
 
94bd43b
0297e38
94bd43b
 
 
 
 
0297e38
 
 
94bd43b
0297e38
 
94bd43b
 
 
0297e38
94bd43b
0297e38
94bd43b
 
 
 
 
 
 
 
0297e38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
import pandas as pd
import requests
import re
import tempfile
import shutil
import os
from difflib import SequenceMatcher

def construct_query(row):
    """Constructs the Google search query using applicant data."""
    query = str(row['Applicant Name'])
    optional_fields = ['Job Title', 'State', 'City', 'Skills']
    for field in optional_fields:
        if field in row and pd.notna(row[field]):
            value = row[field]
            if isinstance(value, str) and value.strip():
                query += f" {value.strip()}"
            elif not isinstance(value, str):
                query += f" {str(value).strip()}"
    query += " linkedin"
    return query

def get_name_from_url(link):
    """Extracts the name part from a LinkedIn profile URL."""
    match = re.search(r'linkedin\.com/in/([a-zA-Z0-9-]+)', link)
    if match:
        return match.group(1).replace('-', ' ')
    return None

def calculate_similarity(name1, name2):
    """Calculates similarity between two names."""
    return SequenceMatcher(None, name1.lower().strip(), name2.lower().strip()).ratio()

def fetch_linkedin_links(query, api_key, applicant_name):
    """Fetches LinkedIn profile links using BrightData SERP API."""
    linkedin_regex = r'https://(www|[a-z]{2})\.linkedin\.com/.*'
    
    try:
        response = requests.get(
            "https://serpapi.brightdata.com/google/search",
            params={
                "q": query,
                "num": 5,
                "api_key": api_key
            }
        )
        response.raise_for_status()
        results = response.json()
        organic_results = results.get("organic_results", [])
        
        for result in organic_results:
            link = result.get("link")
            if re.match(linkedin_regex, link):
                profile_name = get_name_from_url(link)
                if profile_name:
                    similarity = calculate_similarity(applicant_name, profile_name)
                    if similarity >= 0.5:
                        return link
        return None
    except Exception as e:
        st.error(f"Error fetching link for query '{query}': {e}")
        return None

def process_file(file, api_key):
    """Processes the uploaded Excel file to fetch LinkedIn profile links."""
    try:
        df = pd.read_excel(file)
        df = df[df['Applicant Name'].notna()]
        df = df[df['Applicant Name'].str.strip() != '']
        df['Search Query'] = df.apply(construct_query, axis=1)
        df['LinkedIn Link'] = df.apply(
            lambda row: fetch_linkedin_links(row['Search Query'], api_key, row['Applicant Name']),
            axis=1
        )
        
        temp_dir = tempfile.mkdtemp()
        output_file = os.path.join(temp_dir, "updated_with_linkedin_links.csv")
        df.to_csv(output_file, index=False)
        return output_file
    except Exception as e:
        st.error(f"Error processing file: {e}")
        return None

# Streamlit UI
st.title("LinkedIn Profile Link Scraper")
st.markdown("Upload an Excel file with applicant details, and get a CSV with LinkedIn profile links.")

api_key = st.text_input("Enter your BrightData SERP API Key", type="password")
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"])

if uploaded_file and api_key:
    st.write("Processing file...")
    output_file = process_file(uploaded_file, api_key)
    if output_file:
        with open(output_file, "rb") as f:
            st.download_button(
                label="Download Updated CSV",
                data=f,
                file_name="updated_with_linkedin_links.csv",
                mime="text/csv"
            )
        shutil.rmtree(os.path.dirname(output_file))
elif not api_key:
    st.warning("Please enter your BrightData SERP API key to proceed.")