seanpedrickcase commited on
Commit
94e514b
·
1 Parent(s): 36f8e9f

Updated logging format for timestamps to be compatible with AWS. Added load_dynamo_logs.py example file.

Browse files
load_dynamo_logs.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+ import csv
3
+ from decimal import Decimal
4
+ from boto3.dynamodb.conditions import Key
5
+
6
+ from tools.config import AWS_REGION, ACCESS_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, OUTPUT_FOLDER
7
+
8
+ # Replace with your actual table name and region
9
+ TABLE_NAME = USAGE_LOG_DYNAMODB_TABLE_NAME # Choose as appropriate
10
+ REGION = AWS_REGION
11
+ CSV_OUTPUT = OUTPUT_FOLDER + 'dynamodb_logs_export.csv'
12
+
13
+ # Create DynamoDB resource
14
+ dynamodb = boto3.resource('dynamodb', region_name=REGION)
15
+ table = dynamodb.Table(TABLE_NAME)
16
+
17
+ # Helper function to convert Decimal to float or int
18
+ def convert_types(item):
19
+ for key, value in item.items():
20
+ if isinstance(value, Decimal):
21
+ # Convert to int if no decimal places, else float
22
+ item[key] = int(value) if value % 1 == 0 else float(value)
23
+ return item
24
+
25
+ # Paginated scan
26
+ def scan_table():
27
+ items = []
28
+ response = table.scan()
29
+ items.extend(response['Items'])
30
+
31
+ while 'LastEvaluatedKey' in response:
32
+ response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
33
+ items.extend(response['Items'])
34
+
35
+ return items
36
+
37
+ # Export to CSV
38
+ def export_to_csv(items, output_path):
39
+ if not items:
40
+ print("No items found.")
41
+ return
42
+
43
+ fieldnames = sorted(items[0].keys())
44
+
45
+ with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
46
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
47
+ writer.writeheader()
48
+
49
+ for item in items:
50
+ writer.writerow(convert_types(item))
51
+
52
+ print(f"Exported {len(items)} items to {output_path}")
53
+
54
+ # Run export
55
+ items = scan_table()
56
+ export_to_csv(items, CSV_OUTPUT)
load_s3_logs.py CHANGED
@@ -2,7 +2,7 @@ import boto3
2
  import pandas as pd
3
  from io import StringIO
4
  from datetime import datetime
5
- from tools.config import DOCUMENT_REDACTION_BUCKET, AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION
6
 
7
  # Combine together log files that can be then used for e.g. dashboarding and financial tracking.
8
 
@@ -71,7 +71,7 @@ if df_list:
71
  concatenated_df = pd.concat(df_list, ignore_index=True)
72
 
73
  # Save the concatenated DataFrame to a CSV file
74
- concatenated_df.to_csv('consolidated_logs.csv', index=False)
75
- print("Consolidated CSV saved as 'consolidated_logs.csv'")
76
  else:
77
  print("No log files found in the given date range.")
 
2
  import pandas as pd
3
  from io import StringIO
4
  from datetime import datetime
5
+ from tools.config import DOCUMENT_REDACTION_BUCKET, AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, OUTPUT_FOLDER
6
 
7
  # Combine together log files that can be then used for e.g. dashboarding and financial tracking.
8
 
 
71
  concatenated_df = pd.concat(df_list, ignore_index=True)
72
 
73
  # Save the concatenated DataFrame to a CSV file
74
+ concatenated_df.to_csv(OUTPUT_FOLDER + 'consolidated_s3_logs.csv', index=False)
75
+ print("Consolidated CSV saved as 'consolidated_s3_logs.csv'")
76
  else:
77
  print("No log files found in the given date range.")
tools/custom_csvlogger.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
  import contextlib
3
  import csv
4
  import datetime
 
5
  import os
6
  import re
7
  import boto3
@@ -177,7 +178,7 @@ class CSVLogger_custom(FlaggingCallback):
177
  csv_data.append(username)
178
 
179
 
180
- timestamp = str(datetime.datetime.now())
181
  csv_data.append(timestamp)
182
 
183
  generated_id = str(uuid.uuid4())
 
2
  import contextlib
3
  import csv
4
  import datetime
5
+ from datetime import datetime
6
  import os
7
  import re
8
  import boto3
 
178
  csv_data.append(username)
179
 
180
 
181
+ timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] # Correct format for Amazon Athena
182
  csv_data.append(timestamp)
183
 
184
  generated_id = str(uuid.uuid4())