ElegantSolutions commited on
Commit
f953f49
·
verified ·
1 Parent(s): a9ddd8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -20
app.py CHANGED
@@ -7,6 +7,7 @@ import shutil
7
  import os
8
  from difflib import SequenceMatcher
9
  import json
 
10
 
11
  def construct_query(row):
12
  """Constructs the Google search query using applicant data."""
@@ -73,41 +74,36 @@ def fetch_linkedin_links(query, api_key, applicant_name):
73
  "Content-Type": "application/json",
74
  "Authorization": f"Bearer {api_key}"
75
  }
76
-
 
 
 
77
  payload = {
78
- "zone": "serp_api2", # Or your configured BrightData zone name
79
- "url": f"https://www.google.com/search?q={query}",
80
- "format": "json" # Or "raw" if you want HTML
81
  }
82
-
83
  response = requests.post("https://api.brightdata.com/request", headers=headers, data=json.dumps(payload))
84
  response.raise_for_status()
85
-
86
- data = response.json()
87
-
88
- # If "json" format is used and BrightData parses the page:
89
- if "results" in data:
90
- results = data["results"]
91
- else:
92
- # Fallback: Parse raw HTML if format is "raw"
93
- results = []
94
-
95
- # Search for LinkedIn links in response content (raw or parsed)
96
- links = re.findall(linkedin_regex, response.text)
97
- for link in links:
98
  profile_name = get_name_from_url(link)
99
  if profile_name:
100
  similarity = calculate_similarity(applicant_name, profile_name)
101
  if similarity >= 0.5:
102
  return link
103
-
104
  return None
105
 
106
  except Exception as e:
107
  st.error(f"Error fetching link for query '{query}': {e}")
108
  return None
109
 
110
-
111
 
112
  def process_file(file, api_key):
113
  """Processes the uploaded Excel file to fetch LinkedIn profile links."""
 
7
  import os
8
  from difflib import SequenceMatcher
9
  import json
10
+ from urllib.parse import quote_plus
11
 
12
  def construct_query(row):
13
  """Constructs the Google search query using applicant data."""
 
74
  "Content-Type": "application/json",
75
  "Authorization": f"Bearer {api_key}"
76
  }
77
+
78
+ encoded_query = quote_plus(query)
79
+ search_url = f"https://www.google.com/search?q={encoded_query}"
80
+
81
  payload = {
82
+ "zone": "serp_api2", # Must match your BrightData dashboard zone
83
+ "url": search_url,
84
+ "format": "raw" # Use "raw" if "json" gives problems
85
  }
86
+
87
  response = requests.post("https://api.brightdata.com/request", headers=headers, data=json.dumps(payload))
88
  response.raise_for_status()
89
+
90
+ html = response.text
91
+ linkedin_links = re.findall(r'https://(www|[a-z]{2})\.linkedin\.com/in/[a-zA-Z0-9\-]+', html)
92
+ linkedin_links = list(set(["https://" + link for link in linkedin_links])) # De-duplicate
93
+
94
+ for link in linkedin_links:
 
 
 
 
 
 
 
95
  profile_name = get_name_from_url(link)
96
  if profile_name:
97
  similarity = calculate_similarity(applicant_name, profile_name)
98
  if similarity >= 0.5:
99
  return link
100
+
101
  return None
102
 
103
  except Exception as e:
104
  st.error(f"Error fetching link for query '{query}': {e}")
105
  return None
106
 
 
107
 
108
  def process_file(file, api_key):
109
  """Processes the uploaded Excel file to fetch LinkedIn profile links."""