In [3]:
import pickle
import pandas as pd
from pathlib import Path
from web3 import Web3
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from functools import partial
from datetime import datetime


### Make t_map

In [None]:
tools = pd.read_csv("../data/tools.csv")

In [None]:
tools.columns

In [None]:
import pickle
t_map = tools[['request_block', 'request_time']].set_index('request_block').to_dict()['request_time']

with open('../data/t_map.pkl', 'wb') as f:
    pickle.dump(t_map, f)



In [None]:
with open('../data/t_map.pkl', 'rb') as f:
    t_map = pickle.load(f)

### Markets

In [4]:
fpmms = pd.read_csv("../data/fpmms.csv")
fpmms.columns

Index(['id', 'currentAnswer', 'title'], dtype='object')

In [6]:
delivers = pd.read_csv("../data/delivers.csv")
delivers.shape


  delivers = pd.read_csv("../data/delivers.csv")


(263613, 12)

In [7]:
requests = pd.read_csv("../data/requests.csv")
requests.columns

requests.shape

(245092, 6)

In [8]:
tools = pd.read_csv("../data/tools.csv")
tools.columns

  tools = pd.read_csv("../data/tools.csv")


Index(['request_id', 'request_block', 'prompt_request', 'tool', 'nonce',
       'trader_address', 'deliver_block', 'error', 'error_message',
       'prompt_response', 'mech_address', 'p_yes', 'p_no', 'confidence',
       'info_utility', 'vote', 'win_probability', 'title', 'currentAnswer',
       'request_time', 'request_month_year', 'request_month_year_week'],
      dtype='object')

In [9]:
tools['request_time'].isna().sum()

841

In [10]:
def block_number_to_timestamp(block_number: int, web3: Web3) -> str:
    """Convert a block number to a timestamp."""
    block = web3.eth.get_block(block_number)
    timestamp = datetime.utcfromtimestamp(block['timestamp'])
    return timestamp.strftime('%Y-%m-%d %H:%M:%S')


def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:
    """Parallelize the timestamp conversion."""
    block_numbers = df['request_block'].tolist()
    with ThreadPoolExecutor(max_workers=10) as executor:
        results = list(tqdm(executor.map(function, block_numbers), total=len(block_numbers)))    
    return results


In [11]:
rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
web3 = Web3(Web3.HTTPProvider(rpc))

partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)

In [15]:
missing_time_indices = tools[tools['request_time'].isna()].index
if not missing_time_indices.empty:
    partial_block_number_to_timestamp = partial(block_number_to_timestamp, web3=web3)
    missing_timestamps = parallelize_timestamp_conversion(tools.loc[missing_time_indices], partial_block_number_to_timestamp)
    
    # Update the original DataFrame with the missing timestamps
    for i, timestamp in zip(missing_time_indices, missing_timestamps):
        tools.at[i, 'request_time'] = timestamp

100%|██████████| 841/841 [00:25<00:00, 33.18it/s]


In [16]:
tools['request_time'].isna().sum()

0

In [17]:
tools['request_month_year'] = pd.to_datetime(tools['request_time']).dt.strftime('%Y-%m')
tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)

In [18]:
tools['request_month_year_week'].isna().sum()


0

In [19]:
tools.to_csv("../data/tools.csv", index=False)

In [23]:
with open('../data/t_map.pkl', 'rb') as f:
    t_map = pickle.load(f)
new_timestamps = tools[['request_block', 'request_time']].dropna().set_index('request_block').to_dict()['request_time']
t_map.update(new_timestamps)

with open('../data/t_map.pkl', 'wb') as f:
    pickle.dump(t_map, f)

