File size: 3,159 Bytes
9df4cc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from finnlp.utils.get_proxy import get_china_free_proxy, get_us_free_proxy, Kuaidaili
import requests

class FinNLP_Downloader:
    def __init__(self, args = {}):
        self.use_proxy = True if "use_proxy" in args.keys() else False
        if self.use_proxy:
            self.country = args["use_proxy"]
        else:
            self.country = None
        self.max_retry = args["max_retry"] if "max_retry" in args.keys() else 1
        self.proxy_pages = args["proxy_pages"] if "proxy_pages" in args.keys() else 5
        if self.use_proxy:
            if "kuaidaili" in self.country:
                # tunnel, username, password
                assert "tunnel" in args.keys(), "Please make sure \'tunnel\' in your keys"
                assert "username" in args.keys(), "Please make sure \'username\' in your keys"
                assert "password" in args.keys(), "Please make sure \'password\' in your keys"
                self.proxy_list = Kuaidaili(args["tunnel"], args["username"], args["password"])
            else:
                self.proxy_id = 0
                self.proxy_list = self._update_proxy()
        else:
            self.proxy_list = []   
        
    def _get_proxy(self):
        if self.use_proxy:
            if "kuaidaili" in self.country:
                proxy = self.proxy_list.get_kuaidaili_tunnel_proxy()
                return proxy
            elif len(self.proxy_list) >0:
                proxy = self.proxy_list[self.proxy_id]
                self.proxy_id += 1
                if self.proxy_id == len(self.proxy_list):
                    self.proxy_id = 0 
                return proxy
        else:
            return None

    def _update_proxy(self):
        if "china" in self.country or "China" in self.country:
            return get_china_free_proxy(self.proxy_pages)
        else:
            return get_us_free_proxy(self.proxy_pages)

    def _request_get(self, url, headers = None, verify = None, params = None):
        if headers is None:
            headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0"
            }
        max_retry = self.max_retry
        proxies = self._get_proxy()
        for _ in range(max_retry):
            try:
                response = requests.get(url = url, proxies = proxies, headers = headers, verify = verify, params = params)
                if response.status_code == 200:
                    break
            except:
                response = None

        if response is not None and response.status_code != 200:
            response = None

        return response
    
    def _request_post(self, url, headers, json):
        max_retry = self.max_retry
        proxies = self._get_proxy()
        for _ in range(max_retry):
            try:
                response = requests.post(url = url, headers = headers, json = json, proxies = proxies)
                if response.status_code == 200:
                    break
            except:
                response = None

        if response is not None and response.status_code != 200:
            response = None

        return response