Spaces:

sigridveronica
/

ai-news-analyzer

Running

File size: 4,017 Bytes

9df4cc0

import random
import time
import html
import requests
from zenrows import ZenRowsClient
from urllib.parse import urlparse
from proxies import headers

# logging.basicConfig()
# logging.getLogger().setLevel(logging.DEBUG)
# requests_log = logging.getLogger("requests.packages.urllib3")
# requests_log.setLevel(logging.DEBUG)
# requests_log.propagate = True

# global proxies
# proxies = headers.getProxy()

def requests_get(url, proxy=None):
    try:
        sleep_time = random.randint(1, 5)
        time.sleep(sleep_time)

        client = ZenRowsClient("6026db40fdbc3db28235753087be6225f047542f")
        params = {"js_render": "true", "antibot": "true"}

        user_agents = [
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
            # Add more User-Agent strings as needed
        ]

        headers = {
            'User-Agent': random.choice(user_agents),
            'Referer': 'https://seekingalpha.com/search?q=&tab=headlines'
        }

        # print("Headers:", headers)
        session = requests.Session()
        session.headers.update(headers)
        response = session.get(url)
        # response = requests.get(url)
        # response = requests.get(url, headers=headers.getHeaders(1))
        return response
    except Exception as e:
        print("Error: " + str(e))
        return None

def requests_get_for_seeking_alpha(url, subject):
    print("amazon.com method for requesting seeking alpha")
    headers = {
        "accept": "*/*",
        "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
        "cache-control": "no-cache",
        "origin": "https://seekingalpha.com",
        "pragma": "no-cache",
        "referer": "https://seekingalpha.com/",
        "sec-ch-ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": "\"Windows\"",
        "sec-fetch-dest": "empty",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "cross-site",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
    }
    url = "https://r4rrlsfs4a.execute-api.us-west-2.amazonaws.com/production/search"
    params = {
        "q": "(and '{}' content_type:'news')".format(subject),
        "q.parser": "structured",
        "sort": "rank1 desc",
        "size": "10",
        "start": "0",
        "q.options": "{\"fields\":[\"author\",\"author_url\",\"content^1\",\"content_type\",\"image_url\",\"primary_symbols\",\"secondary_symbols\",\"summary\",\"tags\",\"title^3\",\"uri\"]}",
        "highlight.title": "{pre_tag:'<strong>',post_tag:'<<<<strong>'},",
        "highlight.summary": "{pre_tag:'<strong>',post_tag:'<<<<strong>'},",
        "highlight.content": "{pre_tag:'<strong>',post_tag:'<<<<strong>'},",
        "highlight.author": "{pre_tag:'<strong>',post_tag:'<<<<strong>'},",
        "highlight.primary_symbols": "{pre_tag:'<strong>',post_tag:'<<<<strong>'}"
    }
    print("Sending request to", url, "with headers", headers, "with params", params)
    response = requests.get(url, headers=headers, params=params)

    response.encoding = 'utf-8'
    print(html.unescape(response.json().get("hits").get("hit")[0].get("highlights")))
    return "N/A", subject

def get_redirected_domain(url):
    try:
        if len(url) == 0:
            return None
        response = requests.head(url[0], allow_redirects=True)
        final_url = response.url
        return final_url
    except requests.exceptions.RequestException as e:
        print("Error:", e)
        return None