File size: 1,909 Bytes
a2ee974
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Created by Leandro Carneiro at 19/01/2024
# Description: 
# ------------------------------------------------
import os.path
import time

from googlesearch import search
import requests
from bs4 import BeautifulSoup

import constants


def search_google(subject, sites):
    try:

        results = []
        for site in sites:
            print('    Buscando notícias no domínio: ' + site)
            query = f"{subject} site:{site}"
            sites_searched = search(query, num_results=constants.num_sites)
            for s in sites_searched:
                results.append(s)
                #time.sleep(3)
        print('    Total de sites encontrados: ' + str(len(results)))

        return results
    except Exception as e:
        print(str(e))
        return str(e)

def retrieve_text_from_site(sites):
    try:
        result = []
        for site in sites:
            print('    Baixando texto do site: ' + site)
            response = requests.get(site)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')
            result.append(soup.get_text())
        return result
    except Exception as e:
        return str(e)

def delete_base(local_base):
    try:
        for i in os.listdir(local_base):
            file_path = os.path.join(local_base, i)
            os.remove(file_path)
        return 0
    except Exception as e:
        return str(e)

def save_on_base(sites, texts, local_base):
    try:
        for i in range(len(sites)):
            filename = f'news{i}.txt'
            with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file:
                file.write(texts[i])
            with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file:
                file.write(filename + ';' + sites[i] + '\n')

        return 0
    except Exception as e:
        return str(e)