### Create db structure

In [22]:
import json
import re

In [23]:
base_url = "https://raw.githubusercontent.com/piadonabauer/magicbrush-dev/main/images"

In [24]:
output_data = []

def extract_turn(output_filename):
    match = re.search(r"output(\d+)", output_filename)
    return int(match.group(1)) if match else None

with open("edit_sessions.json", "r") as file:
    edit_sessions = json.load(file)

for id, sessions in edit_sessions.items():
    for session in sessions:
        input_link = f"{base_url}/{id}/{session['input']}"
        output_link = f"{base_url}/{id}/{session['output']}"

        turn = extract_turn(session['output'])
        if turn is None:
            print(f"No turn value found in {session['output']} - skip.")
            continue

        document = {
            "meta_information": {
                "id": int(id),
                "turn": int(turn),
                "input_img_link": input_link,
                "output_img_link": output_link,
                "instruction": session["instruction"]
            },
            "ratings": []
        }
        output_data.append(document)

output_json_path = "mongo_init.json"
with open(output_json_path, "w") as outfile:
    json.dump(output_data, outfile, indent=4)

print(f"Data saved at {output_json_path}")

Data saved at mongo_init.json


### Upload structure to mongo db

In [25]:
from pymongo import MongoClient
from dotenv import load_dotenv
import os

In [26]:
import pymongo
print(pymongo.__version__)

4.8.0


In [29]:
#os.environ.pop('MONGO_PASSWORD', None)
load_dotenv() # load gitignore

mongo_user = os.getenv('MONGO_USER')
mongo_password = os.getenv('MONGO_PASSWORD')
cluster_url = os.getenv('MONGO_CLUSTER_URL')
#print(mongo_user, mongo_password, cluster_url)

In [30]:
connection_url = f"mongodb+srv://{mongo_user}:{mongo_password}@{cluster_url}"
client = MongoClient(connection_url)

db = client["thesis"]
collection = db["labeling"]

with open(output_json_path, "r") as infile:
    documents = json.load(infile)
    collection.insert_many(documents)

print("Data added.")

Data added.
