mohcineelharras commited on
Commit
349c960
·
1 Parent(s): 1a57d8f

working scheduler

Browse files
.env CHANGED
@@ -1,3 +1,14 @@
 
1
  AIRFLOW_UID=1000
 
 
2
  URL_CMC=https://pro-api.coinmarketcap.com
3
- API_KEY_CMC=8057498e-ad35-465c-8359-8f6cc9d1ae1b
 
 
 
 
 
 
 
 
 
1
+ # Airflow
2
  AIRFLOW_UID=1000
3
+
4
+ #CMC
5
  URL_CMC=https://pro-api.coinmarketcap.com
6
+ API_KEY_CMC=8057498e-ad35-465c-8359-8f6cc9d1ae1b
7
+
8
+ # Etherscan API
9
+ URL_ETHERSCAN=https://api.etherscan.io/api
10
+ API_KEY_ETHERSCAN=VVJFE7IG5WSK8P2UJIJN5MST5UBQEZYVYW
11
+
12
+ # Logging
13
+ LOG_FOLDER=./logs/scrapping/
14
+ LOG_STREAMLIT=./logs/streamlit/
app.py CHANGED
@@ -1,39 +1,76 @@
1
- #-------------------------------------libraries ----------------------------------
2
-
3
  import os
4
  import pandas as pd
5
  import streamlit as st
6
  import plotly.graph_objs as go
7
- import numpy as np
8
- import plotly.express as px
9
  import logging
10
- # Set up logging basic configuration
11
- logging.basicConfig(level=logging.INFO)
12
- # Example of logging
 
 
 
 
 
 
 
 
 
 
 
 
13
  logging.info("Streamlit app has started")
 
 
 
 
 
 
14
 
 
 
 
 
 
 
15
 
16
- #-------------------------------------back ----------------------------------
17
 
18
  # etherscan
19
  ## Load the data from the CSV files
20
- dataframes = []
21
  for filename in os.listdir('output'):
22
- if filename.endswith('.csv'):
23
- df_temp = pd.read_csv(os.path.join('output', filename), sep=';')
24
- dataframes.append(df_temp)
25
- df_etherscan = pd.concat(dataframes)
26
- del df_temp
27
 
28
  # CMC
29
  ## Load cmc data
30
- df_temp = pd.read_csv("output/top_100_update.csv", sep=',')
31
- df_cmc = df_temp[df_temp["last_updated"] == df_temp["last_updated"].max()]
32
- del df_temp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  #-------------------------------------streamlit ----------------------------------
34
 
35
  # Set the title and other page configurations
36
  st.title('Crypto Analysis')
 
37
  # Create two columns for the two plots
38
  col1, col2 = st.columns(2)
39
 
 
1
+ # ------------------------ Libraries --------------------------
 
2
  import os
3
  import pandas as pd
4
  import streamlit as st
5
  import plotly.graph_objs as go
 
 
6
  import logging
7
+ import subprocess
8
+ import threading
9
+ from dotenv import load_dotenv
10
+ from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
11
+
12
+ # ------------------------ Environment Variables --------------------------
13
+
14
+ load_dotenv()
15
+ log_folder = os.getenv("LOG_FOLDER")
16
+ # Logging
17
+ log_folder = os.getenv("LOG_STREAMLIT")
18
+ os.makedirs(log_folder, exist_ok=True)
19
+ log_file = os.path.join(log_folder, "front.log")
20
+ log_format = "%(asctime)s [%(levelname)s] - %(message)s"
21
+ logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format)
22
  logging.info("Streamlit app has started")
23
+ # Create output folder if it doesn't exist
24
+ if not os.path.exists("output"):
25
+ os.makedirs("output")
26
+
27
+
28
+ #-------------------------------------back----------------------------------
29
 
30
+ def safe_read_csv(file_path, sep=','):
31
+ if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
32
+ return pd.read_csv(file_path, sep=sep)
33
+ else:
34
+ logging.warning(f"File {file_path} is empty or does not exist.")
35
+ return pd.DataFrame() # return an empty DataFrame
36
 
 
37
 
38
  # etherscan
39
  ## Load the data from the CSV files
40
+ df_etherscan = pd.DataFrame()
41
  for filename in os.listdir('output'):
42
+ if filename.endswith('.csv') and 'transactions_' in filename:
43
+ df_temp = safe_read_csv(os.path.join('output', filename), sep=',')
44
+ df_etherscan = pd.concat([df_etherscan, df_temp], ignore_index=True)
 
 
45
 
46
  # CMC
47
  ## Load cmc data
48
+ df_cmc = safe_read_csv("output/top_100_update.csv", sep=',')
49
+ df_cmc = df_cmc[df_cmc["last_updated"] == df_cmc["last_updated"].max()]
50
+
51
+ # Function to execute the scraping functions
52
+ def execute_etherscan_scraping():
53
+ subprocess.call(["python", "utils/scrap_etherscan.py"])
54
+ logging.info("Etherscan scraping completed")
55
+ threading.Timer(3600, execute_etherscan_scraping).start()
56
+
57
+ # Function to execute the scraping functions
58
+ def execute_cmc_scraping():
59
+ subprocess.call(["python", "utils/scrap_cmc.py"])
60
+ logging.info("CMC scraping completed")
61
+ threading.Timer(2592000 / 9000, execute_cmc_scraping).start()
62
+
63
+ if "initialized" not in st.session_state:
64
+ # Start the scraping threads
65
+ threading.Thread(target=execute_etherscan_scraping).start()
66
+ threading.Thread(target=execute_cmc_scraping).start()
67
+ st.session_state["initialized"] = True
68
+
69
  #-------------------------------------streamlit ----------------------------------
70
 
71
  # Set the title and other page configurations
72
  st.title('Crypto Analysis')
73
+
74
  # Create two columns for the two plots
75
  col1, col2 = st.columns(2)
76
 
logs/scrapping/cmc_scrapping.log ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-11-27 01:13:54,747 [INFO] - CMC data script execution completed.
2
+ 2023-11-27 01:13:54,748 [INFO] - Function fetch_and_process_cmc_data executed in 0.37 seconds
3
+ 2023-11-27 01:13:55,094 [INFO] - CMC data script execution completed.
4
+ 2023-11-27 01:13:55,095 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
5
+ 2023-11-27 01:13:55,162 [INFO] - CMC data script execution completed.
6
+ 2023-11-27 01:13:55,163 [INFO] - Function fetch_and_process_cmc_data executed in 0.25 seconds
7
+ 2023-11-27 01:13:55,971 [INFO] - CMC data script execution completed.
8
+ 2023-11-27 01:13:55,972 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
9
+ 2023-11-27 01:14:01,229 [INFO] - CMC data script execution completed.
10
+ 2023-11-27 01:14:01,230 [INFO] - Function fetch_and_process_cmc_data executed in 0.43 seconds
11
+ 2023-11-27 01:14:01,957 [INFO] - CMC data script execution completed.
12
+ 2023-11-27 01:14:01,958 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
13
+ 2023-11-27 01:14:08,224 [INFO] - CMC data script execution completed.
14
+ 2023-11-27 01:14:08,225 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
15
+ 2023-11-27 01:14:25,974 [INFO] - CMC data script execution completed.
16
+ 2023-11-27 01:14:25,975 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
17
+ 2023-11-27 01:14:28,006 [INFO] - CMC data script execution completed.
18
+ 2023-11-27 01:14:28,006 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
19
+ 2023-11-27 01:14:28,131 [INFO] - CMC data script execution completed.
20
+ 2023-11-27 01:14:28,132 [INFO] - Function fetch_and_process_cmc_data executed in 0.14 seconds
21
+ 2023-11-27 01:14:28,994 [INFO] - CMC data script execution completed.
22
+ 2023-11-27 01:14:28,995 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
23
+ 2023-11-27 01:14:29,992 [INFO] - CMC data script execution completed.
24
+ 2023-11-27 01:14:29,993 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
25
+ 2023-11-27 01:14:30,178 [INFO] - CMC data script execution completed.
26
+ 2023-11-27 01:14:30,178 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
27
+ 2023-11-27 01:14:32,026 [INFO] - CMC data script execution completed.
28
+ 2023-11-27 01:14:32,027 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
29
+ 2023-11-27 01:14:32,136 [INFO] - CMC data script execution completed.
30
+ 2023-11-27 01:14:32,136 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
31
+ 2023-11-27 01:14:33,268 [INFO] - CMC data script execution completed.
32
+ 2023-11-27 01:14:33,269 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
33
+ 2023-11-27 01:14:33,274 [INFO] - CMC data script execution completed.
34
+ 2023-11-27 01:14:33,275 [INFO] - Function fetch_and_process_cmc_data executed in 0.23 seconds
35
+ 2023-11-27 01:14:33,609 [INFO] - CMC data script execution completed.
36
+ 2023-11-27 01:14:33,610 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
37
+ 2023-11-27 01:14:33,702 [INFO] - CMC data script execution completed.
38
+ 2023-11-27 01:14:33,703 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
39
+ 2023-11-27 01:14:33,705 [INFO] - CMC data script execution completed.
40
+ 2023-11-27 01:14:33,707 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
41
+ 2023-11-27 01:14:34,272 [INFO] - CMC data script execution completed.
42
+ 2023-11-27 01:14:34,273 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
43
+ 2023-11-27 01:14:34,402 [INFO] - CMC data script execution completed.
44
+ 2023-11-27 01:14:34,403 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
45
+ 2023-11-27 01:14:34,962 [INFO] - CMC data script execution completed.
46
+ 2023-11-27 01:14:34,963 [INFO] - Function fetch_and_process_cmc_data executed in 0.14 seconds
47
+ 2023-11-27 01:14:36,072 [INFO] - CMC data script execution completed.
48
+ 2023-11-27 01:14:36,073 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
49
+ 2023-11-27 01:14:36,137 [INFO] - CMC data script execution completed.
50
+ 2023-11-27 01:14:36,137 [INFO] - Function fetch_and_process_cmc_data executed in 0.14 seconds
51
+ 2023-11-27 01:14:36,979 [INFO] - CMC data script execution completed.
52
+ 2023-11-27 01:14:36,979 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
53
+ 2023-11-27 01:14:38,087 [INFO] - CMC data script execution completed.
54
+ 2023-11-27 01:14:38,088 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
55
+ 2023-11-27 01:14:38,274 [INFO] - CMC data script execution completed.
56
+ 2023-11-27 01:14:38,275 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
57
+ 2023-11-27 01:14:38,291 [INFO] - CMC data script execution completed.
58
+ 2023-11-27 01:14:38,292 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
59
+ 2023-11-27 01:18:43,253 [INFO] - CMC data script execution completed.
60
+ 2023-11-27 01:18:43,254 [INFO] - Function fetch_and_process_cmc_data executed in 0.20 seconds
61
+ 2023-11-27 01:18:43,564 [INFO] - CMC data script execution completed.
62
+ 2023-11-27 01:18:43,565 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
63
+ 2023-11-27 01:18:43,626 [INFO] - CMC data script execution completed.
64
+ 2023-11-27 01:18:43,627 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
65
+ 2023-11-27 01:18:44,377 [INFO] - CMC data script execution completed.
66
+ 2023-11-27 01:18:44,377 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
67
+ 2023-11-27 01:18:49,644 [INFO] - CMC data script execution completed.
68
+ 2023-11-27 01:18:49,645 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
69
+ 2023-11-27 01:18:50,367 [INFO] - CMC data script execution completed.
70
+ 2023-11-27 01:18:50,368 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
71
+ 2023-11-27 01:18:56,635 [INFO] - CMC data script execution completed.
72
+ 2023-11-27 01:18:56,636 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
73
+ 2023-11-27 01:19:14,411 [INFO] - CMC data script execution completed.
74
+ 2023-11-27 01:19:14,412 [INFO] - Function fetch_and_process_cmc_data executed in 0.19 seconds
75
+ 2023-11-27 01:19:16,433 [INFO] - CMC data script execution completed.
76
+ 2023-11-27 01:19:16,434 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
77
+ 2023-11-27 01:19:16,630 [INFO] - CMC data script execution completed.
78
+ 2023-11-27 01:19:16,631 [INFO] - Function fetch_and_process_cmc_data executed in 0.24 seconds
79
+ 2023-11-27 01:19:17,395 [INFO] - CMC data script execution completed.
80
+ 2023-11-27 01:19:17,396 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
81
+ 2023-11-27 01:19:18,456 [INFO] - CMC data script execution completed.
82
+ 2023-11-27 01:19:18,457 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
83
+ 2023-11-27 01:19:18,606 [INFO] - CMC data script execution completed.
84
+ 2023-11-27 01:19:18,606 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
85
+ 2023-11-27 01:19:20,439 [INFO] - CMC data script execution completed.
86
+ 2023-11-27 01:19:20,440 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
87
+ 2023-11-27 01:19:20,549 [INFO] - CMC data script execution completed.
88
+ 2023-11-27 01:19:20,550 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
89
+ 2023-11-27 01:19:21,867 [INFO] - CMC data script execution completed.
90
+ 2023-11-27 01:19:21,869 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
91
+ 2023-11-27 01:19:21,897 [INFO] - CMC data script execution completed.
92
+ 2023-11-27 01:19:21,899 [INFO] - Function fetch_and_process_cmc_data executed in 0.20 seconds
93
+ 2023-11-27 01:19:22,052 [INFO] - CMC data script execution completed.
94
+ 2023-11-27 01:19:22,053 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
95
+ 2023-11-27 01:19:22,231 [INFO] - CMC data script execution completed.
96
+ 2023-11-27 01:19:22,231 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
97
+ 2023-11-27 01:19:22,243 [INFO] - CMC data script execution completed.
98
+ 2023-11-27 01:19:22,244 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
99
+ 2023-11-27 01:19:22,724 [INFO] - CMC data script execution completed.
100
+ 2023-11-27 01:19:22,725 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
101
+ 2023-11-27 01:19:22,857 [INFO] - CMC data script execution completed.
102
+ 2023-11-27 01:19:22,858 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
103
+ 2023-11-27 01:19:23,381 [INFO] - CMC data script execution completed.
104
+ 2023-11-27 01:19:23,382 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
105
+ 2023-11-27 01:19:24,509 [INFO] - CMC data script execution completed.
106
+ 2023-11-27 01:19:24,510 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
107
+ 2023-11-27 01:19:24,553 [INFO] - CMC data script execution completed.
108
+ 2023-11-27 01:19:24,554 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
109
+ 2023-11-27 01:19:25,377 [INFO] - CMC data script execution completed.
110
+ 2023-11-27 01:19:25,378 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
111
+ 2023-11-27 01:19:26,517 [INFO] - CMC data script execution completed.
112
+ 2023-11-27 01:19:26,518 [INFO] - Function fetch_and_process_cmc_data executed in 0.18 seconds
113
+ 2023-11-27 01:19:26,729 [INFO] - CMC data script execution completed.
114
+ 2023-11-27 01:19:26,730 [INFO] - Function fetch_and_process_cmc_data executed in 0.15 seconds
115
+ 2023-11-27 01:19:27,019 [INFO] - CMC data script execution completed.
116
+ 2023-11-27 01:19:27,020 [INFO] - Function fetch_and_process_cmc_data executed in 0.46 seconds
117
+ 2023-11-27 01:23:31,981 [INFO] - CMC data script execution completed.
118
+ 2023-11-27 01:23:31,982 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
119
+ 2023-11-27 01:23:32,065 [INFO] - CMC data script execution completed.
120
+ 2023-11-27 01:23:32,066 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
121
+ 2023-11-27 01:23:32,087 [INFO] - CMC data script execution completed.
122
+ 2023-11-27 01:23:32,088 [INFO] - Function fetch_and_process_cmc_data executed in 0.58 seconds
123
+ 2023-11-27 01:23:32,782 [INFO] - CMC data script execution completed.
124
+ 2023-11-27 01:23:32,782 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
125
+ 2023-11-27 01:23:39,214 [INFO] - CMC data script execution completed.
126
+ 2023-11-27 01:23:39,215 [INFO] - Function fetch_and_process_cmc_data executed in 1.33 seconds
127
+ 2023-11-27 01:23:39,226 [INFO] - CMC data script execution completed.
128
+ 2023-11-27 01:23:39,227 [INFO] - Function fetch_and_process_cmc_data executed in 0.62 seconds
129
+ 2023-11-27 01:23:45,140 [INFO] - CMC data script execution completed.
130
+ 2023-11-27 01:23:45,141 [INFO] - Function fetch_and_process_cmc_data executed in 0.27 seconds
131
+ 2023-11-27 01:24:02,946 [INFO] - CMC data script execution completed.
132
+ 2023-11-27 01:24:02,946 [INFO] - Function fetch_and_process_cmc_data executed in 0.30 seconds
133
+ 2023-11-27 01:24:05,151 [INFO] - CMC data script execution completed.
134
+ 2023-11-27 01:24:05,152 [INFO] - Function fetch_and_process_cmc_data executed in 0.48 seconds
135
+ 2023-11-27 01:24:05,276 [INFO] - CMC data script execution completed.
136
+ 2023-11-27 01:24:05,276 [INFO] - Function fetch_and_process_cmc_data executed in 0.41 seconds
137
+ 2023-11-27 01:24:06,876 [INFO] - CMC data script execution completed.
138
+ 2023-11-27 01:24:06,877 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
139
+ 2023-11-27 01:24:07,313 [INFO] - CMC data script execution completed.
140
+ 2023-11-27 01:24:07,314 [INFO] - Function fetch_and_process_cmc_data executed in 0.45 seconds
141
+ 2023-11-27 01:24:08,859 [INFO] - CMC data script execution completed.
142
+ 2023-11-27 01:24:08,860 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
143
+ 2023-11-27 01:24:08,976 [INFO] - CMC data script execution completed.
144
+ 2023-11-27 01:24:08,976 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
145
+ 2023-11-27 01:24:10,373 [INFO] - CMC data script execution completed.
146
+ 2023-11-27 01:24:10,375 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
147
+ 2023-11-27 01:24:10,620 [INFO] - CMC data script execution completed.
148
+ 2023-11-27 01:24:10,620 [INFO] - Function fetch_and_process_cmc_data executed in 0.40 seconds
149
+ 2023-11-27 01:24:10,689 [INFO] - CMC data script execution completed.
150
+ 2023-11-27 01:24:10,689 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
151
+ 2023-11-27 01:24:10,790 [INFO] - CMC data script execution completed.
152
+ 2023-11-27 01:24:10,792 [INFO] - Function fetch_and_process_cmc_data executed in 5.16 seconds
153
+ 2023-11-27 01:24:11,279 [INFO] - CMC data script execution completed.
154
+ 2023-11-27 01:24:11,280 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
155
+ 2023-11-27 01:24:11,298 [INFO] - CMC data script execution completed.
156
+ 2023-11-27 01:24:11,299 [INFO] - Function fetch_and_process_cmc_data executed in 0.30 seconds
157
+ 2023-11-27 01:24:11,781 [INFO] - CMC data script execution completed.
158
+ 2023-11-27 01:24:11,782 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
159
+ 2023-11-27 01:24:12,955 [INFO] - CMC data script execution completed.
160
+ 2023-11-27 01:24:12,956 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
161
+ 2023-11-27 01:24:14,058 [INFO] - CMC data script execution completed.
162
+ 2023-11-27 01:24:14,059 [INFO] - Function fetch_and_process_cmc_data executed in 0.44 seconds
163
+ 2023-11-27 01:24:14,921 [INFO] - CMC data script execution completed.
164
+ 2023-11-27 01:24:14,922 [INFO] - Function fetch_and_process_cmc_data executed in 0.16 seconds
165
+ 2023-11-27 01:24:15,135 [INFO] - CMC data script execution completed.
166
+ 2023-11-27 01:24:15,136 [INFO] - Function fetch_and_process_cmc_data executed in 0.17 seconds
167
+ 2023-11-27 01:24:15,500 [INFO] - CMC data script execution completed.
168
+ 2023-11-27 01:24:15,501 [INFO] - Function fetch_and_process_cmc_data executed in 0.25 seconds
169
+ 2023-11-27 01:24:15,701 [INFO] - CMC data script execution completed.
170
+ 2023-11-27 01:24:15,701 [INFO] - Function fetch_and_process_cmc_data executed in 5.18 seconds
171
+ 2023-11-27 01:24:15,742 [INFO] - CMC data script execution completed.
172
+ 2023-11-27 01:24:15,742 [INFO] - Function fetch_and_process_cmc_data executed in 5.44 seconds
173
+ 2023-11-27 01:24:17,986 [INFO] - CMC data script execution completed.
174
+ 2023-11-27 01:24:17,987 [INFO] - Function fetch_and_process_cmc_data executed in 5.17 seconds
175
+ 2023-11-27 23:21:54,744 [INFO] - CMC data script execution completed.
176
+ 2023-11-27 23:21:54,745 [INFO] - Function fetch_and_process_cmc_data executed in 0.45 seconds
177
+ 2023-11-27 23:26:43,314 [INFO] - CMC data script execution completed.
178
+ 2023-11-27 23:26:43,315 [INFO] - Function fetch_and_process_cmc_data executed in 0.28 seconds
logs/scrapping/etherscan_scrap.log ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-11-27 01:14:01,381 [ERROR] - Invalid data format for transactions: Max rate limit reached
2
+ 2023-11-27 01:14:01,382 [ERROR] - 'timeStamp' key not found in the response data.
3
+ 2023-11-27 01:14:02,293 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
4
+ 2023-11-27 01:14:02,293 [ERROR] - Failed to retrieve latest block number for token: APE
5
+ 2023-11-27 01:14:02,305 [ERROR] - Invalid data format for transactions: Max rate limit reached
6
+ 2023-11-27 01:14:02,305 [ERROR] - 'timeStamp' key not found in the response data.
7
+ 2023-11-27 01:14:02,364 [ERROR] - Invalid data format for transactions: Max rate limit reached
8
+ 2023-11-27 01:14:02,364 [ERROR] - 'timeStamp' key not found in the response data.
9
+ 2023-11-27 01:14:02,397 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
10
+ 2023-11-27 01:14:16,638 [INFO] - Created files:
11
+ 2023-11-27 01:14:16,638 [INFO] - Updated files: output/transactions_APE.csv, output/transactions_AXIE.csv, output/transactions_GALA.csv, output/transactions_MANA.csv, output/transactions_PET.csv, output/transactions_WEAOPON.csv
12
+ 2023-11-27 01:14:16,638 [INFO] - Etherscan scraping script execution completed.
13
+ 2023-11-27 01:14:16,638 [INFO] - Function fetch_and_update_etherscan executed in 21.73 seconds
14
+ 2023-11-27 01:14:19,015 [INFO] - Created files:
15
+ 2023-11-27 01:14:19,015 [INFO] - Updated files: output/transactions_APE.csv, output/transactions_AXIE.csv, output/transactions_GALA.csv, output/transactions_MANA.csv, output/transactions_PET.csv, output/transactions_WEAOPON.csv
16
+ 2023-11-27 01:14:19,015 [INFO] - Etherscan scraping script execution completed.
17
+ 2023-11-27 01:14:19,015 [INFO] - Function fetch_and_update_etherscan executed in 17.22 seconds
18
+ 2023-11-27 01:14:28,488 [INFO] - Created files:
19
+ 2023-11-27 01:14:28,488 [INFO] - Updated files: output/transactions_APE.csv, output/transactions_AXIE.csv, output/transactions_GALA.csv, output/transactions_MANA.csv, output/transactions_PET.csv, output/transactions_WEAOPON.csv
20
+ 2023-11-27 01:14:28,488 [INFO] - Etherscan scraping script execution completed.
21
+ 2023-11-27 01:14:28,488 [INFO] - Function fetch_and_update_etherscan executed in 20.44 seconds
22
+ 2023-11-27 01:14:29,290 [ERROR] - Invalid data format for transactions: Max rate limit reached
23
+ 2023-11-27 01:14:29,290 [ERROR] - 'timeStamp' key not found in the response data.
24
+ 2023-11-27 01:14:32,935 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
25
+ 2023-11-27 01:14:33,267 [ERROR] - Invalid data format for transactions: Max rate limit reached
26
+ 2023-11-27 01:14:33,268 [ERROR] - 'timeStamp' key not found in the response data.
27
+ 2023-11-27 01:14:33,347 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
28
+ 2023-11-27 01:14:33,349 [ERROR] - Failed to retrieve latest block number for token: AXIE
29
+ 2023-11-27 01:14:33,987 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
30
+ 2023-11-27 01:14:33,988 [ERROR] - Failed to retrieve latest block number for token: APE
31
+ 2023-11-27 01:14:34,029 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
32
+ 2023-11-27 01:14:34,043 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
33
+ 2023-11-27 01:14:34,044 [ERROR] - Failed to retrieve latest block number for token: APE
34
+ 2023-11-27 01:14:34,088 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
35
+ 2023-11-27 01:14:34,211 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
36
+ 2023-11-27 01:14:34,364 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
37
+ 2023-11-27 01:14:34,402 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
38
+ 2023-11-27 01:14:34,458 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
39
+ 2023-11-27 01:14:34,459 [ERROR] - Failed to retrieve latest block number for token: AXIE
40
+ 2023-11-27 01:14:34,517 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
41
+ 2023-11-27 01:14:34,518 [ERROR] - Failed to retrieve latest block number for token: AXIE
42
+ 2023-11-27 01:14:34,995 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
43
+ 2023-11-27 01:14:34,995 [ERROR] - Failed to retrieve latest block number for token: GALA
44
+ 2023-11-27 01:14:35,058 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
45
+ 2023-11-27 01:14:35,096 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
46
+ 2023-11-27 01:14:35,116 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
47
+ 2023-11-27 01:14:35,189 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
48
+ 2023-11-27 01:14:35,217 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
49
+ 2023-11-27 01:14:35,218 [ERROR] - Failed to retrieve latest block number for token: AXIE
50
+ 2023-11-27 01:14:35,298 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
51
+ 2023-11-27 01:14:35,299 [ERROR] - Failed to retrieve latest block number for token: APE
52
+ 2023-11-27 01:14:35,418 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
53
+ 2023-11-27 01:14:35,464 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
54
+ 2023-11-27 01:14:35,465 [ERROR] - Failed to retrieve latest block number for token: MANA
55
+ 2023-11-27 01:14:36,369 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
56
+ 2023-11-27 01:14:36,370 [ERROR] - Failed to retrieve latest block number for token: APE
57
+ 2023-11-27 01:14:36,378 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
58
+ 2023-11-27 01:14:36,436 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
59
+ 2023-11-27 01:14:36,437 [ERROR] - Failed to retrieve latest block number for token: APE
60
+ 2023-11-27 01:14:36,646 [ERROR] - Invalid data format for transactions: Max rate limit reached
61
+ 2023-11-27 01:14:36,647 [ERROR] - 'timeStamp' key not found in the response data.
62
+ 2023-11-27 01:14:37,386 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
63
+ 2023-11-27 01:14:38,647 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
64
+ 2023-11-27 01:14:38,648 [ERROR] - Failed to retrieve latest block number for token: APE
65
+ 2023-11-27 01:14:39,378 [ERROR] - Invalid data format for transactions: Max rate limit reached
66
+ 2023-11-27 01:14:39,379 [ERROR] - 'timeStamp' key not found in the response data.
67
+ 2023-11-27 01:14:39,533 [ERROR] - Invalid data format for transactions: Max rate limit reached
68
+ 2023-11-27 01:14:39,533 [ERROR] - 'timeStamp' key not found in the response data.
69
+ 2023-11-27 01:14:39,576 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
70
+ 2023-11-27 01:14:39,835 [ERROR] - Invalid data format for transactions: Max rate limit reached
71
+ 2023-11-27 01:14:39,836 [ERROR] - 'timeStamp' key not found in the response data.
72
+ 2023-11-27 01:14:39,847 [ERROR] - Invalid response format or missing data in response: {'status': '0', 'message': 'NOTOK', 'result': 'Max rate limit reached'}
73
+ 2023-11-27 01:14:39,848 [ERROR] - Failed to retrieve latest block number for token: GALA
74
+ 2023-11-27 01:14:49,754 [INFO] - Created files:
75
+ 2023-11-27 01:14:49,754 [INFO] - Updated files: output/transactions_APE.csv, output/transactions_AXIE.csv, output/transactions_GALA.csv, output/transactions_MANA.csv, output/transactions_PET.csv, output/transactions_WEAOPON.csv
76
+ 2023-11-27 01:14:49,754 [INFO] - Etherscan scraping script execution completed.
77
+ 2023-11-27 01:14:49,754 [INFO] - Function fetch_and_update_etherscan executed in 13.88 seconds
78
+ 2023-11-27 01:14:56,933 [INFO] - Created files:
79
+ 2023-11-27 01:14:56,934 [INFO] - Updated files: output/transactions_APE.csv, output/transactions_AXIE.csv, output/transactions_GALA.csv, output/transactions_MANA.csv, output/transactions_PET.csv, output/transactions_WEAOPON.csv
80
+ 2023-11-27 01:14:56,934 [INFO] - Etherscan scraping script execution completed.
81
+ 2023-11-27 01:14:56,934 [INFO] - Function fetch_and_update_etherscan executed in 20.11 seconds
82
+ 2023-11-27 23:22:15,713 [INFO] - Created files:
83
+ 2023-11-27 23:22:15,713 [INFO] - Updated files: output/transactions_APE.csv, output/transactions_AXIE.csv, output/transactions_GALA.csv, output/transactions_MANA.csv, output/transactions_PET.csv, output/transactions_WEAOPON.csv
84
+ 2023-11-27 23:22:15,713 [INFO] - Etherscan scraping script execution completed.
85
+ 2023-11-27 23:22:15,713 [INFO] - Function fetch_and_update_etherscan executed in 21.43 seconds
logs/streamlit/front.log ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-11-27 01:13:54,065 [INFO] - Streamlit app has started
2
+ 2023-11-27 01:13:54,506 [INFO] - Streamlit app has started
3
+ 2023-11-27 01:13:54,509 [INFO] - Streamlit app has started
4
+ 2023-11-27 01:13:54,822 [INFO] - CMC scraping completed
5
+ 2023-11-27 01:13:55,142 [INFO] - CMC scraping completed
6
+ 2023-11-27 01:13:55,205 [INFO] - CMC scraping completed
7
+ 2023-11-27 01:13:55,499 [INFO] - Streamlit app has started
8
+ 2023-11-27 01:13:56,005 [INFO] - CMC scraping completed
9
+ 2023-11-27 01:14:00,500 [INFO] - Streamlit app has started
10
+ 2023-11-27 01:14:01,263 [INFO] - CMC scraping completed
11
+ 2023-11-27 01:14:01,435 [INFO] - Etherscan scraping completed
12
+ 2023-11-27 01:14:01,500 [INFO] - Streamlit app has started
13
+ 2023-11-27 01:14:01,991 [INFO] - CMC scraping completed
14
+ 2023-11-27 01:14:02,347 [INFO] - Etherscan scraping completed
15
+ 2023-11-27 01:14:02,409 [INFO] - Etherscan scraping completed
16
+ 2023-11-27 01:14:02,444 [INFO] - Etherscan scraping completed
17
+ 2023-11-27 01:14:07,733 [INFO] - Streamlit app has started
18
+ 2023-11-27 01:14:08,262 [INFO] - CMC scraping completed
19
+ 2023-11-27 01:14:16,675 [INFO] - Etherscan scraping completed
20
+ 2023-11-27 01:14:19,051 [INFO] - Etherscan scraping completed
21
+ 2023-11-27 01:14:25,491 [INFO] - Streamlit app has started
22
+ 2023-11-27 01:14:26,011 [INFO] - CMC scraping completed
23
+ 2023-11-27 01:14:27,490 [INFO] - Streamlit app has started
24
+ 2023-11-27 01:14:27,540 [INFO] - Streamlit app has started
25
+ 2023-11-27 01:14:28,047 [INFO] - CMC scraping completed
26
+ 2023-11-27 01:14:28,170 [INFO] - CMC scraping completed
27
+ 2023-11-27 01:14:28,488 [INFO] - Streamlit app has started
28
+ 2023-11-27 01:14:28,532 [INFO] - Etherscan scraping completed
29
+ 2023-11-27 01:14:29,028 [INFO] - CMC scraping completed
30
+ 2023-11-27 01:14:29,332 [INFO] - Etherscan scraping completed
31
+ 2023-11-27 01:14:29,489 [INFO] - Streamlit app has started
32
+ 2023-11-27 01:14:29,502 [INFO] - Streamlit app has started
33
+ 2023-11-27 01:14:30,050 [INFO] - CMC scraping completed
34
+ 2023-11-27 01:14:30,212 [INFO] - CMC scraping completed
35
+ 2023-11-27 01:14:31,489 [INFO] - Streamlit app has started
36
+ 2023-11-27 01:14:31,502 [INFO] - Streamlit app has started
37
+ 2023-11-27 01:14:32,060 [INFO] - CMC scraping completed
38
+ 2023-11-27 01:14:32,171 [INFO] - CMC scraping completed
39
+ 2023-11-27 01:14:32,490 [INFO] - Streamlit app has started
40
+ 2023-11-27 01:14:32,502 [INFO] - Streamlit app has started
41
+ 2023-11-27 01:14:32,513 [INFO] - Streamlit app has started
42
+ 2023-11-27 01:14:32,530 [INFO] - Streamlit app has started
43
+ 2023-11-27 01:14:32,550 [INFO] - Streamlit app has started
44
+ 2023-11-27 01:14:33,150 [INFO] - Etherscan scraping completed
45
+ 2023-11-27 01:14:33,453 [INFO] - CMC scraping completed
46
+ 2023-11-27 01:14:33,453 [INFO] - Etherscan scraping completed
47
+ 2023-11-27 01:14:33,455 [INFO] - CMC scraping completed
48
+ 2023-11-27 01:14:33,649 [INFO] - CMC scraping completed
49
+ 2023-11-27 01:14:33,688 [INFO] - Streamlit app has started
50
+ 2023-11-27 01:14:33,702 [INFO] - Streamlit app has started
51
+ 2023-11-27 01:14:33,815 [INFO] - CMC scraping completed
52
+ 2023-11-27 01:14:33,819 [INFO] - CMC scraping completed
53
+ 2023-11-27 01:14:34,143 [INFO] - Etherscan scraping completed
54
+ 2023-11-27 01:14:34,176 [INFO] - Etherscan scraping completed
55
+ 2023-11-27 01:14:34,295 [INFO] - Etherscan scraping completed
56
+ 2023-11-27 01:14:34,338 [INFO] - CMC scraping completed
57
+ 2023-11-27 01:14:34,423 [INFO] - Etherscan scraping completed
58
+ 2023-11-27 01:14:34,457 [INFO] - Etherscan scraping completed
59
+ 2023-11-27 01:14:34,467 [INFO] - CMC scraping completed
60
+ 2023-11-27 01:14:34,489 [INFO] - Streamlit app has started
61
+ 2023-11-27 01:14:35,000 [INFO] - CMC scraping completed
62
+ 2023-11-27 01:14:35,105 [INFO] - Etherscan scraping completed
63
+ 2023-11-27 01:14:35,148 [INFO] - Etherscan scraping completed
64
+ 2023-11-27 01:14:35,169 [INFO] - Etherscan scraping completed
65
+ 2023-11-27 01:14:35,231 [INFO] - Etherscan scraping completed
66
+ 2023-11-27 01:14:35,459 [INFO] - Etherscan scraping completed
67
+ 2023-11-27 01:14:35,488 [INFO] - Streamlit app has started
68
+ 2023-11-27 01:14:35,501 [INFO] - Streamlit app has started
69
+ 2023-11-27 01:14:36,109 [INFO] - CMC scraping completed
70
+ 2023-11-27 01:14:36,174 [INFO] - CMC scraping completed
71
+ 2023-11-27 01:14:36,417 [INFO] - Etherscan scraping completed
72
+ 2023-11-27 01:14:36,488 [INFO] - Streamlit app has started
73
+ 2023-11-27 01:14:36,714 [INFO] - Etherscan scraping completed
74
+ 2023-11-27 01:14:37,016 [INFO] - CMC scraping completed
75
+ 2023-11-27 01:14:37,429 [INFO] - Etherscan scraping completed
76
+ 2023-11-27 01:14:37,491 [INFO] - Streamlit app has started
77
+ 2023-11-27 01:14:37,538 [INFO] - Streamlit app has started
78
+ 2023-11-27 01:14:37,547 [INFO] - Streamlit app has started
79
+ 2023-11-27 01:14:38,138 [INFO] - CMC scraping completed
80
+ 2023-11-27 01:14:38,314 [INFO] - CMC scraping completed
81
+ 2023-11-27 01:14:38,336 [INFO] - CMC scraping completed
82
+ 2023-11-27 01:14:39,425 [INFO] - Etherscan scraping completed
83
+ 2023-11-27 01:14:39,575 [INFO] - Etherscan scraping completed
84
+ 2023-11-27 01:14:39,616 [INFO] - Etherscan scraping completed
85
+ 2023-11-27 01:14:39,886 [INFO] - Etherscan scraping completed
86
+ 2023-11-27 01:14:49,790 [INFO] - Etherscan scraping completed
87
+ 2023-11-27 01:14:56,975 [INFO] - Etherscan scraping completed
88
+ 2023-11-27 01:18:43,301 [INFO] - CMC scraping completed
89
+ 2023-11-27 01:18:43,600 [INFO] - CMC scraping completed
90
+ 2023-11-27 01:18:43,662 [INFO] - CMC scraping completed
91
+ 2023-11-27 01:18:44,413 [INFO] - CMC scraping completed
92
+ 2023-11-27 01:18:49,681 [INFO] - CMC scraping completed
93
+ 2023-11-27 01:18:50,404 [INFO] - CMC scraping completed
94
+ 2023-11-27 01:18:56,671 [INFO] - CMC scraping completed
95
+ 2023-11-27 01:19:14,447 [INFO] - CMC scraping completed
96
+ 2023-11-27 01:19:16,469 [INFO] - CMC scraping completed
97
+ 2023-11-27 01:19:16,665 [INFO] - CMC scraping completed
98
+ 2023-11-27 01:19:17,431 [INFO] - CMC scraping completed
99
+ 2023-11-27 01:19:18,492 [INFO] - CMC scraping completed
100
+ 2023-11-27 01:19:18,642 [INFO] - CMC scraping completed
101
+ 2023-11-27 01:19:20,475 [INFO] - CMC scraping completed
102
+ 2023-11-27 01:19:20,583 [INFO] - CMC scraping completed
103
+ 2023-11-27 01:19:21,955 [INFO] - CMC scraping completed
104
+ 2023-11-27 01:19:21,961 [INFO] - CMC scraping completed
105
+ 2023-11-27 01:19:22,091 [INFO] - CMC scraping completed
106
+ 2023-11-27 01:19:22,277 [INFO] - CMC scraping completed
107
+ 2023-11-27 01:19:22,290 [INFO] - CMC scraping completed
108
+ 2023-11-27 01:19:22,761 [INFO] - CMC scraping completed
109
+ 2023-11-27 01:19:22,892 [INFO] - CMC scraping completed
110
+ 2023-11-27 01:19:23,416 [INFO] - CMC scraping completed
111
+ 2023-11-27 01:19:24,545 [INFO] - CMC scraping completed
112
+ 2023-11-27 01:19:24,589 [INFO] - CMC scraping completed
113
+ 2023-11-27 01:19:25,414 [INFO] - CMC scraping completed
114
+ 2023-11-27 01:19:26,562 [INFO] - CMC scraping completed
115
+ 2023-11-27 01:19:26,763 [INFO] - CMC scraping completed
116
+ 2023-11-27 01:19:27,054 [INFO] - CMC scraping completed
117
+ 2023-11-27 01:23:32,015 [INFO] - CMC scraping completed
118
+ 2023-11-27 01:23:32,104 [INFO] - CMC scraping completed
119
+ 2023-11-27 01:23:32,132 [INFO] - CMC scraping completed
120
+ 2023-11-27 01:23:32,822 [INFO] - CMC scraping completed
121
+ 2023-11-27 01:23:39,263 [INFO] - CMC scraping completed
122
+ 2023-11-27 01:23:39,268 [INFO] - CMC scraping completed
123
+ 2023-11-27 01:23:45,177 [INFO] - CMC scraping completed
124
+ 2023-11-27 01:24:02,981 [INFO] - CMC scraping completed
125
+ 2023-11-27 01:24:05,187 [INFO] - CMC scraping completed
126
+ 2023-11-27 01:24:05,310 [INFO] - CMC scraping completed
127
+ 2023-11-27 01:24:06,910 [INFO] - CMC scraping completed
128
+ 2023-11-27 01:24:07,347 [INFO] - CMC scraping completed
129
+ 2023-11-27 01:24:08,894 [INFO] - CMC scraping completed
130
+ 2023-11-27 01:24:09,011 [INFO] - CMC scraping completed
131
+ 2023-11-27 01:24:10,421 [INFO] - CMC scraping completed
132
+ 2023-11-27 01:24:10,653 [INFO] - CMC scraping completed
133
+ 2023-11-27 01:24:10,723 [INFO] - CMC scraping completed
134
+ 2023-11-27 01:24:10,837 [INFO] - CMC scraping completed
135
+ 2023-11-27 01:24:11,319 [INFO] - CMC scraping completed
136
+ 2023-11-27 01:24:11,339 [INFO] - CMC scraping completed
137
+ 2023-11-27 01:24:11,819 [INFO] - CMC scraping completed
138
+ 2023-11-27 01:24:12,989 [INFO] - CMC scraping completed
139
+ 2023-11-27 01:24:14,094 [INFO] - CMC scraping completed
140
+ 2023-11-27 01:24:14,959 [INFO] - CMC scraping completed
141
+ 2023-11-27 01:24:15,177 [INFO] - CMC scraping completed
142
+ 2023-11-27 01:24:15,537 [INFO] - CMC scraping completed
143
+ 2023-11-27 01:24:15,737 [INFO] - CMC scraping completed
144
+ 2023-11-27 01:24:15,777 [INFO] - CMC scraping completed
145
+ 2023-11-27 01:24:18,023 [INFO] - CMC scraping completed
146
+ 2023-11-27 23:13:14,736 [INFO] - Streamlit app has started
147
+ 2023-11-27 23:13:30,823 [INFO] - Streamlit app has started
148
+ 2023-11-27 23:13:49,122 [INFO] - Streamlit app has started
149
+ 2023-11-27 23:13:56,305 [INFO] - Streamlit app has started
150
+ 2023-11-27 23:13:56,320 [INFO] - Streamlit app has started
151
+ 2023-11-27 23:13:56,328 [INFO] - Streamlit app has started
152
+ 2023-11-27 23:13:56,351 [INFO] - Streamlit app has started
153
+ 2023-11-27 23:13:56,355 [INFO] - Streamlit app has started
154
+ 2023-11-27 23:13:56,366 [INFO] - Streamlit app has started
155
+ 2023-11-27 23:13:56,399 [INFO] - Streamlit app has started
156
+ 2023-11-27 23:13:56,414 [INFO] - Streamlit app has started
157
+ 2023-11-27 23:13:56,443 [INFO] - Streamlit app has started
158
+ 2023-11-27 23:13:56,472 [INFO] - Streamlit app has started
159
+ 2023-11-27 23:13:56,502 [INFO] - Streamlit app has started
160
+ 2023-11-27 23:13:56,558 [INFO] - Streamlit app has started
161
+ 2023-11-27 23:13:56,611 [INFO] - Streamlit app has started
162
+ 2023-11-27 23:13:56,647 [INFO] - Streamlit app has started
163
+ 2023-11-27 23:13:56,674 [INFO] - Streamlit app has started
164
+ 2023-11-27 23:13:56,688 [INFO] - Streamlit app has started
165
+ 2023-11-27 23:13:56,724 [INFO] - Streamlit app has started
166
+ 2023-11-27 23:13:56,733 [INFO] - Streamlit app has started
167
+ 2023-11-27 23:13:56,782 [INFO] - Streamlit app has started
168
+ 2023-11-27 23:13:56,787 [INFO] - Streamlit app has started
169
+ 2023-11-27 23:13:57,217 [INFO] - Streamlit app has started
170
+ 2023-11-27 23:13:58,139 [INFO] - Streamlit app has started
171
+ 2023-11-27 23:13:58,148 [INFO] - Streamlit app has started
172
+ 2023-11-27 23:14:00,124 [INFO] - Streamlit app has started
173
+ 2023-11-27 23:14:01,129 [INFO] - Streamlit app has started
174
+ 2023-11-27 23:14:07,138 [INFO] - Streamlit app has started
175
+ 2023-11-27 23:21:53,901 [INFO] - Streamlit app has started
176
+ 2023-11-27 23:21:54,811 [INFO] - CMC scraping completed
177
+ 2023-11-27 23:22:15,775 [INFO] - Etherscan scraping completed
178
+ 2023-11-27 23:26:43,359 [INFO] - CMC scraping completed
output/{transactions_ILV.csv → cmc_data_v1_cryptocurrency_listings_latest_100.json} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ce31d9e8d9c7b39bf1f73c2e05ac655f652ebffca121a6a662a0a89eaa62c9
3
- size 5552703
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6565d58fdcb4f22b4ba93cb879ca3c74d71096a5b1ed911e625c35ae56432a0
3
+ size 119080
output/top_100_update.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ff89b933c5ee4694a0ec72fe7660677ed15012d95a0d6184767d05eb33fd397
3
- size 16258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767a255420ffd1a3e5480673610f1ca21a10b9c2a9f402d8a421212f5e63270e
3
+ size 129574
output/transactions_APE.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c6094c453a4ae217cd7e6334ad0b92880e042d698db8b19978af61f42ceda1f
3
- size 25981544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe609b08835b469af3f98a03959a75ecaaa119d50d222bd95981117cce95a56
3
+ size 3430047
output/transactions_AXIE.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21eddd1decbb2f70f6fe9102cf81f0a4309c10d838bc9a172255ff70a2461cb8
3
- size 7599371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497efc710a52dfb17f9dacc50500293cde3450644a84069fbdd4c1cae8462b37
3
+ size 1586211
output/transactions_GALA.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd264d44fff732f21b170dcd839968ecb0408fba89cc6a993fa0da8f20fa8e05
3
- size 32066355
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbdcc912f4193543018955be769a5418e98ab8dd3ebfaec7e47b290ac7c74f5d
3
+ size 4094956
output/transactions_MANA.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8162afa63a588a222422d06a1f93508f92d225d124915c6ea51d5d051d4db1e
3
- size 12039331
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d530a0dd8a8f62f11f8c912b0f90cccc849349fa743873d05fbc5c14254fa84b
3
+ size 2223053
output/transactions_PET.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa42451d8ab6696d44d5b648754791e1462f630de79439c580b8e92be5f016df
3
  size 885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffde90e51dd3bedbe01ab04e38ea840fd7d9c01094c6f0b6d4ee3779ed3e09c
3
  size 885
output/transactions_WEAOPON.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e366dc8930a78bd4f37409447da3c6b7f53f3b6a699c89a4c9d9d5740622f5
3
- size 537
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beac438541c99c972ae4322faae5e06a27171e15bb73436849aeed530e80b4da
3
+ size 6082
requirements.txt CHANGED
@@ -9,4 +9,5 @@ python-dotenv
9
  streamlit
10
  requests
11
  plotly
12
- nbformat
 
 
9
  streamlit
10
  requests
11
  plotly
12
+ nbformat
13
+ schedule
ressources/dict_influencers_addr.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"alexBecker":"0x4d224452801aced8b2f0aebe155379bb5d594381",
2
+ "elliotrades":"0xc02f318365582557a482eb5d1834dfd7af4a3f59",
3
+ "jrny":"0x08c1ae7e46d4a13b766566033b5c47c735e19f6f"
4
+ }
ressources/dict_tokens_addr.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"APE":"0x4d224452801aced8b2f0aebe155379bb5d594381",
2
+ "AXIE":"0xbb0e17ef65f82ab018d8edd776e8dd940327b28b",
3
+ "GALA":"0xd1d2Eb1B1e90B638588728b4130137D262C87cae",
4
+ "MANA":"0x0f5d2fb29fb7d3cfee444a200298f468908cc942",
5
+ "PET":"0xb870679a7fa65b924026f496de7f27c1dd0e5c5f",
6
+ "WEAOPON":"0x0b8057c3cc676c329c25c1d0cd27776efa73762d"
7
+ }
scrap_data_CMC.py → scrap_data_cmc.py RENAMED
@@ -14,7 +14,7 @@ load_dotenv()
14
 
15
  url = os.getenv("URL_CMC")
16
  endpoints = ["v1/cryptocurrency/listings/latest",
17
- "/v1/cryptocurrency/trending/latest",
18
  ]
19
  start = "1"
20
  stop = "100"
@@ -64,7 +64,7 @@ df["percent_tokens_circulation"] = np.round((df["circulating_supply"]/df["total_
64
  # merge dataframe
65
  df = df.join(quote_df)
66
  df["last_updated"] = pd.to_datetime(df["last_updated"])
67
- #df.to_csv(f"output/top_{stop}_update.csv")
68
 
69
  #-------------------------------------save data----------------------------------
70
 
@@ -85,4 +85,6 @@ else:
85
  df.to_csv(output_file, index=False)
86
  logging.info("Script execution completed.")
87
 
88
- #-------------------------------------end----------------------------------
 
 
 
14
 
15
  url = os.getenv("URL_CMC")
16
  endpoints = ["v1/cryptocurrency/listings/latest",
17
+ #"/v1/cryptocurrency/trending/latest",
18
  ]
19
  start = "1"
20
  stop = "100"
 
64
  # merge dataframe
65
  df = df.join(quote_df)
66
  df["last_updated"] = pd.to_datetime(df["last_updated"])
67
+ df.to_csv(f"output/top_{stop}_update.csv")
68
 
69
  #-------------------------------------save data----------------------------------
70
 
 
85
  df.to_csv(output_file, index=False)
86
  logging.info("Script execution completed.")
87
 
88
+
89
+
90
+ #-------------------------------------end----------------------------------
scrap_data_etherscan.py CHANGED
@@ -3,7 +3,7 @@ import time
3
  import pandas as pd
4
  import json
5
  import os
6
- from utils.functions import update_and_save_csv
7
 
8
  # Create output folder
9
  if not os.path.exists("output"):
 
3
  import pandas as pd
4
  import json
5
  import os
6
+ from utils.scrap import update_and_save_csv
7
 
8
  # Create output folder
9
  if not os.path.exists("output"):
utils/__pycache__/functions.cpython-310.pyc ADDED
Binary file (2.67 kB). View file
 
utils/__pycache__/functions.cpython-311.pyc ADDED
Binary file (5.5 kB). View file
 
utils/__pycache__/scrap.cpython-311.pyc ADDED
Binary file (12.3 kB). View file
 
utils/scrap.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------- Library Imports ----------------------
2
+ import time
3
+ import os
4
+ import json
5
+ import pandas as pd
6
+ import numpy as np
7
+ import logging
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from requests import Session
11
+
12
+ # ---------------------- Environment Variables ----------------------
13
+
14
+ load_dotenv()
15
+ # Etherscan API
16
+ url_etherscan = os.getenv("URL_ETHERSCAN")
17
+ api_key_etherscan = os.getenv("API_KEY_ETHERSCAN")
18
+
19
+ # CoinMarketCap API
20
+ url_cmc = os.getenv("URL_CMC")
21
+ api_key_cmc = os.getenv("API_KEY_CMC")
22
+
23
+ # Logging
24
+ log_folder = os.getenv("LOG_FOLDER")
25
+ os.makedirs(log_folder, exist_ok=True)
26
+ log_file = os.path.join(log_folder, "scrapping.log")
27
+ log_format = "%(asctime)s [%(levelname)s] - %(message)s"
28
+ logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format)
29
+
30
+ # Load the JSON file into a dictionary
31
+ with open("ressources/dict_tokens_addr.json", "r") as file:
32
+ dict_addresses = json.load(file)
33
+
34
+ L_created = []
35
+ L_updated = []
36
+
37
+ # Define the number of blocks to retrieve transactions from
38
+ n_blocks = 20000
39
+ n_loop = n_blocks // 10_000
40
+
41
+ # ---------------------- Processing ----------------------
42
+
43
+ # Helper function for logging execution time
44
+ def log_execution_time(func):
45
+ def wrapper(*args, **kwargs):
46
+ start_time = time.time()
47
+ result = func(*args, **kwargs)
48
+ end_time = time.time()
49
+ logging.info(f"Function {func.__name__} executed in {end_time - start_time:.2f} seconds")
50
+ return result
51
+ return wrapper
52
+
53
+
54
+ @log_execution_time
55
+ # function 1: Fetch and Update Etherscan Data
56
+ def fetch_and_update_etherscan():
57
+ for tokenSymbol, contractAddr in dict_addresses.items():
58
+ file = f"output/transactions_{tokenSymbol}.csv"
59
+ if not os.path.exists(file):
60
+ L_created.append(file)
61
+ df_transactions = get_coin_data(contractAddr, n_loop)
62
+ df_transactions_no_dup = df_transactions.drop(["confirmations", "input"], axis=1).drop_duplicates(subset="hash")
63
+ df_transactions_no_dup.to_csv(file, sep=",", index=False)
64
+ else:
65
+ L_updated.append(file)
66
+ df_temp = pd.read_csv(file, sep=",")
67
+ df_temp = df_temp.sort_values("blockNumber", ascending=False)
68
+ start_block = df_temp["blockNumber"].iloc[0]
69
+
70
+ # Retrieve latest block number and calculate the difference
71
+ latest_block_number, diff = latest_block(start_block)
72
+ if latest_block_number is None:
73
+ logging.error(f"Failed to retrieve latest block number for token: {tokenSymbol}")
74
+ continue # Skip to the next token if the latest block number could not be retrieved
75
+
76
+ n_loop_to_concat = (diff // 10000) + 1
77
+ df_transactions = get_coin_data(contractAddr, n_loop_to_concat)
78
+ df_latest = pd.concat([df_transactions, df_temp]).drop(["confirmations", "input"], axis=1)
79
+ df_latest_no_dup = df_latest.drop_duplicates(subset="hash")
80
+ df_latest_no_dup.loc[:, "blockNumber"] = df_latest_no_dup["blockNumber"].astype(int)
81
+ df_latest_no_dup = df_latest_no_dup.sort_values(by="blockNumber")
82
+ df_latest_no_dup.to_csv(file, sep=",", index=False)
83
+
84
+ logging.info("Created files: " + ", ".join(L_created))
85
+ logging.info("Updated files: " + ", ".join(L_updated))
86
+ logging.info("Script execution completed.")
87
+
88
+ # Helper function to get latest block number
89
+ def latest_block(start_block=None):
90
+ params = {
91
+ "module": "proxy",
92
+ "action": "eth_blockNumber",
93
+ "apikey": api_key_etherscan
94
+ }
95
+ response = requests.get(url_etherscan, params=params)
96
+ if response.status_code == 200:
97
+ try:
98
+ latest_block_number = int(response.json()["result"], 16)
99
+ if start_block is not None:
100
+ return latest_block_number, latest_block_number - start_block
101
+ return latest_block_number
102
+ except (ValueError, KeyError):
103
+ logging.error(f"Invalid response format or missing data in response: {response.json()}")
104
+ return None, None
105
+ else:
106
+ logging.error(f"API call failed with status code {response.status_code}: {response.json()}")
107
+ return None, None
108
+
109
+ def get_coin_data(contractAddr, n):
110
+ latest_block_number = latest_block()
111
+ if latest_block_number is None:
112
+ logging.error(f"Could not retrieve latest block number for contract address {contractAddr}")
113
+ return pd.DataFrame() # Return an empty DataFrame
114
+
115
+ df_transactions = pd.DataFrame()
116
+ transactions_per_call = 10_000
117
+ for i in range(n):
118
+ start_block = latest_block_number - (n - i) * transactions_per_call
119
+ end_block = latest_block_number - (n - 1 - i) * transactions_per_call
120
+ params = {
121
+ "module": "account",
122
+ "action": "tokentx",
123
+ "contractaddress": contractAddr,
124
+ "startblock": start_block,
125
+ "endblock": end_block,
126
+ "sort": "asc",
127
+ "apikey": api_key_etherscan
128
+ }
129
+ response = requests.get(url_etherscan, params=params)
130
+ transactions = response.json().get("result", [])
131
+
132
+ # Check if transactions is a list of dictionaries
133
+ if not isinstance(transactions, list) or not all(isinstance(item, dict) for item in transactions):
134
+ logging.error(f"Invalid data format for transactions: {transactions}")
135
+ continue # Skip this iteration if transactions data is invalid
136
+
137
+ df_temp = pd.DataFrame(transactions)
138
+ if not df_temp.empty:
139
+ df_transactions = pd.concat([df_transactions, df_temp])
140
+ time.sleep(1)
141
+
142
+ if 'timeStamp' in df_transactions:
143
+ df_transactions['timeStamp'] = pd.to_datetime(df_transactions['timeStamp'].astype(int), unit='s')
144
+ else:
145
+ logging.error("'timeStamp' key not found in the response data.")
146
+ return pd.DataFrame() # Return an empty DataFrame if key is missing
147
+ df_transactions['value'] = df_transactions['value'].astype(float) / 1e18
148
+ return df_transactions
149
+
150
+ # function 2: Fetch and Process CMC Data
151
+ @log_execution_time
152
+ def fetch_and_process_cmc_data():
153
+ session = Session()
154
+ session.headers.update({
155
+ 'Accepts': 'application/json',
156
+ 'X-CMC_PRO_API_KEY': api_key_cmc,
157
+ })
158
+ parameters = {
159
+ 'start': '1',
160
+ 'limit': '100',
161
+ 'convert': 'USD'
162
+ }
163
+
164
+ for endpoint in ["v1/cryptocurrency/listings/latest"]:
165
+ target = f"{url_cmc}/{endpoint}"
166
+ try:
167
+ response = session.get(target, params=parameters)
168
+ data = json.loads(response.text)
169
+ with open(f'output/cmc_data_{endpoint.replace("/", "_")}_100.json', 'w') as f:
170
+ json.dump(data, f)
171
+ process_cmc_data(data, '100')
172
+ except (ConnectionError, Timeout, TooManyRedirects) as e:
173
+ logging.error(f"Error while fetching data from {target}: {e}")
174
+
175
+ def process_cmc_data(data, stop):
176
+ df = pd.DataFrame(data["data"])[["name", "symbol", "circulating_supply", "total_supply", "quote"]]
177
+ quote_df = pd.json_normalize(df['quote'].apply(lambda x: x['USD']))[["price", "percent_change_24h", "percent_change_7d", "percent_change_90d", "market_cap", "fully_diluted_market_cap", "last_updated"]]
178
+ df = df.drop("quote", axis=1)
179
+ df["percent_tokens_circulation"] = np.round((df["circulating_supply"] / df["total_supply"]) * 100, 1)
180
+ df = df.join(quote_df)
181
+ df["last_updated"] = pd.to_datetime(df["last_updated"])
182
+ save_cmc_data(df, stop)
183
+
184
+ def save_cmc_data(df, stop):
185
+ output_file = f"output/top_{stop}_update.csv"
186
+ if os.path.isfile(output_file):
187
+ existing_data = pd.read_csv(output_file)
188
+ updated_data = pd.concat([existing_data, df], axis=0, ignore_index=True)
189
+ updated_data.drop_duplicates(subset=["symbol", "last_updated"], inplace=True)
190
+ updated_data.to_csv(output_file, index=False)
191
+ else:
192
+ df.to_csv(output_file, index=False)
193
+ logging.info("CMC data script execution completed.")
194
+
195
+ # ---------------------- Execution ----------------------
196
+ if __name__ == "__main__":
197
+ fetch_and_update_etherscan()
198
+ fetch_and_process_cmc_data()
utils/scrap_cmc.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------- Library Imports ----------------------
2
+ import os
3
+ import json
4
+ import pandas as pd
5
+ import numpy as np
6
+ import logging
7
+ import requests
8
+ from dotenv import load_dotenv
9
+ from requests import Session
10
+ from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
11
+ import time
12
+
13
+ # ---------------------- Environment Variables ----------------------
14
+ load_dotenv()
15
+ url_cmc = os.getenv("URL_CMC")
16
+ api_key_cmc = os.getenv("API_KEY_CMC")
17
+ log_folder = os.getenv("LOG_FOLDER")
18
+ os.makedirs(log_folder, exist_ok=True)
19
+ log_file = os.path.join(log_folder, "cmc_scrapping.log")
20
+ log_format = "%(asctime)s [%(levelname)s] - %(message)s"
21
+ logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format)
22
+
23
+ # ---------------------- Helper Functions ----------------------
24
+
25
+ def log_execution_time(func):
26
+ def wrapper(*args, **kwargs):
27
+ start_time = time.time()
28
+ result = func(*args, **kwargs)
29
+ end_time = time.time()
30
+ logging.info(f"Function {func.__name__} executed in {end_time - start_time:.2f} seconds")
31
+ return result
32
+ return wrapper
33
+
34
+ def process_cmc_data(data, stop):
35
+ df = pd.DataFrame(data["data"])[["name", "symbol", "circulating_supply", "total_supply", "quote"]]
36
+ quote_df = pd.json_normalize(df['quote'].apply(lambda x: x['USD']))[["price", "percent_change_24h", "percent_change_7d", "percent_change_90d", "market_cap", "fully_diluted_market_cap", "last_updated"]]
37
+ df = df.drop("quote", axis=1)
38
+ df["percent_tokens_circulation"] = np.round((df["circulating_supply"] / df["total_supply"]) * 100, 1)
39
+ df = df.join(quote_df)
40
+ df["last_updated"] = pd.to_datetime(df["last_updated"])
41
+ save_cmc_data(df, stop)
42
+
43
+ def save_cmc_data(df, stop):
44
+ output_file = f"output/top_{stop}_update.csv"
45
+ if os.path.isfile(output_file):
46
+ existing_data = pd.read_csv(output_file)
47
+ updated_data = pd.concat([existing_data, df], axis=0, ignore_index=True)
48
+ updated_data.drop_duplicates(subset=["symbol", "last_updated"], inplace=True)
49
+ updated_data.to_csv(output_file, index=False)
50
+ else:
51
+ df.to_csv(output_file, index=False)
52
+ logging.info("CMC data script execution completed.")
53
+
54
+ # ---------------------- CMC Scraping Function ----------------------
55
+
56
+ @log_execution_time
57
+ def fetch_and_process_cmc_data():
58
+ session = Session()
59
+ session.headers.update({
60
+ 'Accepts': 'application/json',
61
+ 'X-CMC_PRO_API_KEY': api_key_cmc,
62
+ })
63
+ parameters = {
64
+ 'start': '1',
65
+ 'limit': '100',
66
+ 'convert': 'USD'
67
+ }
68
+
69
+ for endpoint in ["v1/cryptocurrency/listings/latest"]:
70
+ target = f"{url_cmc}/{endpoint}"
71
+ try:
72
+ response = session.get(target, params=parameters)
73
+ data = json.loads(response.text)
74
+ with open(f'output/cmc_data_{endpoint.replace("/", "_")}_100.json', 'w') as f:
75
+ json.dump(data, f)
76
+ process_cmc_data(data, '100')
77
+ except (ConnectionError, Timeout, TooManyRedirects) as e:
78
+ logging.error(f"Error while fetching data from {target}: {e}")
79
+
80
+ # ---------------------- Execution ----------------------
81
+ if __name__ == "__main__":
82
+ fetch_and_process_cmc_data()
utils/scrap_etherscan.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------- Library Imports ----------------------
2
+ import time
3
+ import os
4
+ import json
5
+ import pandas as pd
6
+ import logging
7
+ import requests
8
+ from dotenv import load_dotenv
9
+
10
+ # ---------------------- Environment Variables ----------------------
11
+ load_dotenv()
12
+ url_etherscan = os.getenv("URL_ETHERSCAN")
13
+ api_key_etherscan = os.getenv("API_KEY_ETHERSCAN")
14
+ log_folder = os.getenv("LOG_FOLDER")
15
+ os.makedirs(log_folder, exist_ok=True)
16
+ log_file = os.path.join(log_folder, "etherscan_scrap.log")
17
+ log_format = "%(asctime)s [%(levelname)s] - %(message)s"
18
+ logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format)
19
+
20
+ # Load the JSON file into a dictionary
21
+ with open("ressources/dict_tokens_addr.json", "r") as file:
22
+ dict_addresses = json.load(file)
23
+
24
+ L_created = []
25
+ L_updated = []
26
+ n_blocks = 20000
27
+ n_loop = n_blocks // 10_000
28
+
29
+ # ---------------------- Helper Functions ----------------------
30
+ def log_execution_time(func):
31
+ def wrapper(*args, **kwargs):
32
+ start_time = time.time()
33
+ result = func(*args, **kwargs)
34
+ end_time = time.time()
35
+ logging.info(f"Function {func.__name__} executed in {end_time - start_time:.2f} seconds")
36
+ return result
37
+ return wrapper
38
+
39
+ def latest_block(start_block=None):
40
+ params = {
41
+ "module": "proxy",
42
+ "action": "eth_blockNumber",
43
+ "apikey": api_key_etherscan
44
+ }
45
+ response = requests.get(url_etherscan, params=params)
46
+ if response.status_code == 200:
47
+ try:
48
+ latest_block_number = int(response.json()["result"], 16)
49
+ if start_block is not None:
50
+ return latest_block_number, latest_block_number - start_block
51
+ return latest_block_number
52
+ except (ValueError, KeyError):
53
+ logging.error(f"Invalid response format or missing data in response: {response.json()}")
54
+ return None, None
55
+ else:
56
+ logging.error(f"API call failed with status code {response.status_code}: {response.json()}")
57
+ return None, None
58
+
59
+ def get_coin_data(contractAddr, n):
60
+ latest_block_number = latest_block()
61
+ if latest_block_number is None:
62
+ logging.error(f"Could not retrieve latest block number for contract address {contractAddr}")
63
+ return pd.DataFrame() # Return an empty DataFrame
64
+
65
+ df_transactions = pd.DataFrame()
66
+ transactions_per_call = 10_000
67
+ for i in range(n):
68
+ start_block = latest_block_number - (n - i) * transactions_per_call
69
+ end_block = latest_block_number - (n - 1 - i) * transactions_per_call
70
+ params = {
71
+ "module": "account",
72
+ "action": "tokentx",
73
+ "contractaddress": contractAddr,
74
+ "startblock": start_block,
75
+ "endblock": end_block,
76
+ "sort": "asc",
77
+ "apikey": api_key_etherscan
78
+ }
79
+ response = requests.get(url_etherscan, params=params)
80
+ transactions = response.json().get("result", [])
81
+
82
+ if not isinstance(transactions, list) or not all(isinstance(item, dict) for item in transactions):
83
+ logging.error(f"Invalid data format for transactions: {transactions}")
84
+ continue # Skip this iteration if transactions data is invalid
85
+
86
+ df_temp = pd.DataFrame(transactions)
87
+ if not df_temp.empty:
88
+ df_transactions = pd.concat([df_transactions, df_temp])
89
+ time.sleep(1)
90
+
91
+ if 'timeStamp' in df_transactions:
92
+ df_transactions['timeStamp'] = pd.to_datetime(df_transactions['timeStamp'].astype(int), unit='s')
93
+ else:
94
+ logging.error("'timeStamp' key not found in the response data.")
95
+ return pd.DataFrame() # Return an empty DataFrame if key is missing
96
+ df_transactions['value'] = df_transactions['value'].astype(float) / 1e18
97
+ return df_transactions
98
+
99
+ # ---------------------- Main Function ----------------------
100
+ @log_execution_time
101
+ def fetch_and_update_etherscan():
102
+ for tokenSymbol, contractAddr in dict_addresses.items():
103
+ file = f"output/transactions_{tokenSymbol}.csv"
104
+ if not os.path.exists(file):
105
+ L_created.append(file)
106
+ df_transactions = get_coin_data(contractAddr, n_loop)
107
+ df_transactions_no_dup = df_transactions.drop(["confirmations", "input"], axis=1).drop_duplicates(subset="hash")
108
+ df_transactions_no_dup.to_csv(file, sep=",", index=False)
109
+ else:
110
+ L_updated.append(file)
111
+ df_temp = pd.read_csv(file, sep=",")
112
+ df_temp = df_temp.sort_values("blockNumber", ascending=False)
113
+ start_block = df_temp["blockNumber"].iloc[0]
114
+
115
+ latest_block_number, diff = latest_block(start_block)
116
+ if latest_block_number is None:
117
+ logging.error(f"Failed to retrieve latest block number for token: {tokenSymbol}")
118
+ continue
119
+
120
+ n_loop_to_concat = (diff // 10000) + 1
121
+ df_transactions = get_coin_data(contractAddr, n_loop_to_concat)
122
+ df_latest = pd.concat([df_transactions, df_temp]).drop(["confirmations", "input"], axis=1)
123
+ df_latest_no_dup = df_latest.drop_duplicates(subset="hash")
124
+ df_latest_no_dup.loc[:, "blockNumber"] = df_latest_no_dup["blockNumber"].astype(int)
125
+ df_latest_no_dup = df_latest_no_dup.sort_values(by="blockNumber")
126
+ df_latest_no_dup.to_csv(file, sep=",", index=False)
127
+
128
+ logging.info("Created files: " + ", ".join(L_created))
129
+ logging.info("Updated files: " + ", ".join(L_updated))
130
+ logging.info("Etherscan scraping script execution completed.")
131
+
132
+ # ---------------------- Script Execution ----------------------
133
+ if __name__ == "__main__":
134
+ fetch_and_update_etherscan()