Spaces:
Sleeping
Sleeping
File size: 3,372 Bytes
1a57d8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
#-------------------------------------libraries ----------------------------------
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json
import os
import pandas as pd
import numpy as np
import logging
from dotenv import load_dotenv
load_dotenv()
#-------------------------------------env vars----------------------------------
url = os.getenv("URL_CMC")
endpoints = ["v1/cryptocurrency/listings/latest",
"/v1/cryptocurrency/trending/latest",
]
start = "1"
stop = "100"
parameters = {
'start':start,
'limit':stop,
'convert':'USD'
}
headers = {
'Accepts': 'application/json',
'X-CMC_PRO_API_KEY': os.getenv("API_KEY_CMC"),
}
# Configure the logging settings
log_folder = "./logs/scrapping/"
os.makedirs(log_folder, exist_ok=True) # Ensure the log folder exists
log_file = os.path.join(log_folder, "scrapping.log")
log_format = "%(asctime)s [%(levelname)s] - %(message)s"
logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format)
#-------------------------------------api call----------------------------------
session = Session()
session.headers.update(headers)
for endpoint in endpoints:
target = f"{url}/{endpoint}"
try:
response = session.get(target, params=parameters)
data = json.loads(response.text)
with open(f'output/cmc_data_{endpoint.replace("/", "_")}_{stop}.json', 'w') as f:
json.dump(data, f)
logging.info(f"Successfully fetched data from {target}")
except (ConnectionError, Timeout, TooManyRedirects) as e:
logging.error(f"Error while fetching data from {target}: {e}")
#-------------------------------------process data----------------------------------
# create data frame with chosen columns
df = pd.DataFrame(data["data"])[["name","symbol","circulating_supply","total_supply","quote"]]
# explode column quote then chose columns
quote_df = pd.json_normalize(df['quote'].apply(lambda x: x['USD']))[["price","percent_change_24h","percent_change_7d","percent_change_90d","market_cap","fully_diluted_market_cap","last_updated"]]
# drop quote
df = df.drop("quote",axis=1)
# create features
df["percent_tokens_circulation"] = np.round((df["circulating_supply"]/df["total_supply"])*100,1)
# merge dataframe
df = df.join(quote_df)
df["last_updated"] = pd.to_datetime(df["last_updated"])
#df.to_csv(f"output/top_{stop}_update.csv")
#-------------------------------------save data----------------------------------
# Check if the file exists
output_file = f"output/top_{stop}_update.csv"
if os.path.isfile(output_file):
logging.info("Updating dataset"+f"top_{stop}_update"+". ")
# Read the existing data
existing_data = pd.read_csv(output_file)
# Concatenate the existing data with the new data vertically
updated_data = pd.concat([existing_data, df], axis=0, ignore_index=True)
# Remove duplicates (if any) based on a unique identifier column
updated_data.drop_duplicates(subset=["symbol", "last_updated"], inplace=True)
# Save the updated data back to the same file
updated_data.to_csv(output_file, index=False)
else:
# If the file doesn't exist, save the current data to it
df.to_csv(output_file, index=False)
logging.info("Script execution completed.")
#-------------------------------------end----------------------------------
|