import json
import logging
import argparse
import numpy as np
import sys
import os
import re
from collections import defaultdict
from gensim.models import Word2Vec
from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES

# --- Konfiguration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
HOLOGRAPHIC_STATE_SIZE_BITS = 4096  # Feste State-Größe in Bits
BITS_PER_CHAR = 16                  # Bits pro Gematria-Wert
BOOK_RANGE = range(1, 40)           # Bücher 1 bis 39
MODELS_DIR = "models_by_book"
INDICES_DIR = "indices_by_book"
SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True)

# --- Hilfsfunktionen für das Laden von Daten ---

def load_all_data():
    """Lädt alle 39 Index- und Modelldateien."""
    all_indices = {}
    all_models = {}
    logging.info("Lade Daten für alle Bücher...")
    for i in BOOK_RANGE:
        index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json")
        model_path = os.path.join(MODELS_DIR, f"book_{i:02}.w2v")
        if os.path.exists(index_path) and os.path.exists(model_path):
            try:
                with open(index_path, 'r', encoding='utf-8') as f:
                    all_indices[i] = json.load(f)
                all_models[i] = Word2Vec.load(model_path)
            except Exception as e:
                logging.error(f"Konnte Daten für Buch {i:02} nicht laden: {e}")
                continue
    if not all_indices:
        logging.error("Keine Index/Modell-Dateien gefunden. Bitte 'build_all_indices.py' ausführen.")
        return None, None
    logging.info(f"{len(all_indices)} Buch-Netzwerke erfolgreich geladen.")
    return all_indices, all_models

# --- Kernlogik: Holographic State Engine ---

def text_to_gematria_binary(text):
    """Wandelt einen Text in einen Gematria-basierten Binärstring um."""
    clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text)
    return "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text)

def fold_into_state(binary_string, state_size=HOLOGRAPHIC_STATE_SIZE_BITS):
    """
    Faltet einen beliebig langen Binärstring in einen State fester Größe.
    Dies ist die Kernfunktion zur Vermeidung von Padding.
    """
    state = np.zeros(state_size, dtype=np.int8)
    for i in range(0, len(binary_string), state_size):
        block = binary_string[i:i+state_size]
        block_padded = block.ljust(state_size, '0')
        block_array = np.array(list(block_padded), dtype=np.int8)
        state = np.bitwise_xor(state, block_array)
    return "".join(state.astype(str))

def create_holographic_context():
    """
    Erstellt den sequentiellen, holographischen State, indem alle Bücher
    nacheinander in einen Zustand fester Größe gefaltet werden.
    """
    logging.info("Erstelle holographischen State durch sequentielles Einfalten aller Bücher...")
    final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS

    for i in BOOK_RANGE:
        try:
            with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file:
                logging.info(f"Falte Buch {i:02} in den State ein...")
                data = json.load(file)
                full_text = ' '.join([' '.join(block) for block in data.get("text", [])])
                clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL))

                if not clean_text:
                    continue

                book_binary = text_to_gematria_binary(clean_text)
                final_state = fold_into_state(book_binary, HOLOGRAPHIC_STATE_SIZE_BITS)
        except FileNotFoundError:
            logging.warning(f"Datei für Buch {i:02} nicht gefunden, wird übersprungen.")
            continue

    logging.info("Holographischer Tanach-State wurde erfolgreich erstellt.")
    return final_state

# --- Funktionen zur Phrasen-Auswahl und -Verarbeitung ---

def cosine_similarity(v1, v2):
    """Berechnet die Kosinus-Ähnlichkeit zwischen zwei Vektoren."""
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-9)

def get_decomposed_word(number):
    """Fallback: Zerlegt eine Zahl algorithmisch in hebräische Buchstaben."""
    text, remainder = "", number
    for char, value in SORTED_GEMATRIA:
        while remainder >= value:
            text += char
            remainder -= value
    return text

def get_best_phrase_from_book(gematria_val, book_index, method, query_vector=None):
    """Findet die beste Phrase in einem EINZELNEN Buch-Index basierend auf der Methode."""
    candidates = book_index.get(str(gematria_val), {}).get('phrases', [])
    if not candidates: return None
    if method == 'frequency': return min(candidates, key=lambda p: p.get('count', 1))
    if method == 'semantic' and query_vector is not None and not np.all(query_vector == 0):
        return max(candidates, key=lambda p: cosine_similarity(np.array(p['vector']), query_vector))
    if method == 'network':
        pagerank_score = book_index.get(str(gematria_val), {}).get('pagerank', 0)
        return max(candidates, key=lambda p: pagerank_score / p.get('count', 1))
    return candidates[0]

def process_query_holographic(query_text, tanakh_state, all_indices, all_models, method):
    """
    Verarbeitet die Abfrage gegen den holographischen State und gibt die
    nach Büchern gruppierten Ergebnisse zurück.
    """
    # Schritt 1: Falte die Anfrage in den State ein, um das finale "Konzept" zu erhalten
    query_binary = text_to_gematria_binary(query_text)
    konzept_state = fold_into_state(query_binary, HOLOGRAPHIC_STATE_SIZE_BITS)
    final_konzept = "".join(str(int(a) ^ int(b)) for a, b in zip(tanakh_state, konzept_state))

    # Schritt 2: Extrahiere Gematria-Werte aus dem Konzept und frage Netzwerke ab
    results_by_book = defaultdict(list)
    for i in range(0, HOLOGRAPHIC_STATE_SIZE_BITS, BITS_PER_CHAR):
        gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2)
        if gematria_val == 0: continue

        for book_num, book_index in all_indices.items():
            book_model = all_models[book_num]
            query_vector = np.mean([book_model.wv[w] for w in query_text.split() if w in book_model.wv] or [np.zeros(book_model.vector_size)], axis=0) if method == 'semantic' else None
            best_phrase_data = get_best_phrase_from_book(gematria_val, book_index, method, query_vector)
            if best_phrase_data:
                results_by_book[book_num].append(best_phrase_data['text'])

    # Schritt 3: Formatiere die Ausgabe
    output_string = ""
    for book_num in sorted(results_by_book.keys()):
        unique_phrases = sorted(list(set(results_by_book[book_num])), key=results_by_book[book_num].index)
        phrases_str = " | ".join(unique_phrases)
        if phrases_str:
             output_string += f"\n--- Buch {book_num:02} ---\n{phrases_str}"

    return output_string

# --- Hauptprogramm ---
def main(args):
    """Lädt Daten, erstellt den State und führt die holographische Abfrage aus."""
    all_indices, all_models = load_all_data()
    if not all_indices:
        sys.exit(1)

    holographic_tanakh_state = create_holographic_context()
    if not holographic_tanakh_state:
        sys.exit(1)

    # Es gibt keine Iterationen mehr in diesem Modell, da die Anfrage Teil der State-Erstellung ist.
    print(f"\n" + "="*15 + f" HOLOGRAPHISCHE ABFRAGE (Methode: {args.method}) " + "="*15)
    logging.info(f"Aktuelle Abfrage: '{args.query}'")

    result_text = process_query_holographic(args.query, holographic_tanakh_state, all_indices, all_models, args.method)

    print("\nErgebnis aus dem holographischen State, geordnet nach Büchern:")
    print(result_text)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Holographic XOR Gematria Machine.")
    parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase.")
    parser.add_argument("--method", type=str, choices=['frequency', 'semantic', 'network', 'default'], default='default', help="Die Gewichtungsmethode für die Phrasenauswahl.")

    args = parser.parse_args()
    main(args)