File size: 8,139 Bytes
f054e62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import json
import logging
import argparse
import numpy as np
import sys
import os
import re
from collections import defaultdict
from gensim.models import Word2Vec
from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES

# --- Konfiguration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
HOLOGRAPHIC_STATE_SIZE_BITS = 4096  # Feste State-Größe in Bits
BITS_PER_CHAR = 16                  # Bits pro Gematria-Wert
BOOK_RANGE = range(1, 40)           # Bücher 1 bis 39
MODELS_DIR = "models_by_book"
INDICES_DIR = "indices_by_book"
SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True)

# --- Hilfsfunktionen für das Laden von Daten ---

def load_all_data():
    """Lädt alle 39 Index- und Modelldateien."""
    all_indices = {}
    all_models = {}
    logging.info("Lade Daten für alle Bücher...")
    for i in BOOK_RANGE:
        index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json")
        model_path = os.path.join(MODELS_DIR, f"book_{i:02}.w2v")
        if os.path.exists(index_path) and os.path.exists(model_path):
            try:
                with open(index_path, 'r', encoding='utf-8') as f:
                    all_indices[i] = json.load(f)
                all_models[i] = Word2Vec.load(model_path)
            except Exception as e:
                logging.error(f"Konnte Daten für Buch {i:02} nicht laden: {e}")
                continue
    if not all_indices:
        logging.error("Keine Index/Modell-Dateien gefunden. Bitte 'build_all_indices.py' ausführen.")
        return None, None
    logging.info(f"{len(all_indices)} Buch-Netzwerke erfolgreich geladen.")
    return all_indices, all_models

# --- Kernlogik: Holographic State Engine ---

def text_to_gematria_binary(text):
    """Wandelt einen Text in einen Gematria-basierten Binärstring um."""
    clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text)
    return "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text)

def fold_into_state(binary_string, state_size=HOLOGRAPHIC_STATE_SIZE_BITS):
    """
    Faltet einen beliebig langen Binärstring in einen State fester Größe.
    Dies ist die Kernfunktion zur Vermeidung von Padding.
    """
    state = np.zeros(state_size, dtype=np.int8)
    for i in range(0, len(binary_string), state_size):
        block = binary_string[i:i+state_size]
        block_padded = block.ljust(state_size, '0')
        block_array = np.array(list(block_padded), dtype=np.int8)
        state = np.bitwise_xor(state, block_array)
    return "".join(state.astype(str))

def create_holographic_context():
    """
    Erstellt den sequentiellen, holographischen State, indem alle Bücher
    nacheinander in einen Zustand fester Größe gefaltet werden.
    """
    logging.info("Erstelle holographischen State durch sequentielles Einfalten aller Bücher...")
    final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS

    for i in BOOK_RANGE:
        try:
            with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file:
                logging.info(f"Falte Buch {i:02} in den State ein...")
                data = json.load(file)
                full_text = ' '.join([' '.join(block) for block in data.get("text", [])])
                clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL))

                if not clean_text:
                    continue

                book_binary = text_to_gematria_binary(clean_text)
                final_state = fold_into_state(book_binary, HOLOGRAPHIC_STATE_SIZE_BITS)
        except FileNotFoundError:
            logging.warning(f"Datei für Buch {i:02} nicht gefunden, wird übersprungen.")
            continue

    logging.info("Holographischer Tanach-State wurde erfolgreich erstellt.")
    return final_state

# --- Funktionen zur Phrasen-Auswahl und -Verarbeitung ---

def cosine_similarity(v1, v2):
    """Berechnet die Kosinus-Ähnlichkeit zwischen zwei Vektoren."""
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-9)

def get_decomposed_word(number):
    """Fallback: Zerlegt eine Zahl algorithmisch in hebräische Buchstaben."""
    text, remainder = "", number
    for char, value in SORTED_GEMATRIA:
        while remainder >= value:
            text += char
            remainder -= value
    return text

def get_best_phrase_from_book(gematria_val, book_index, method, query_vector=None):
    """Findet die beste Phrase in einem EINZELNEN Buch-Index basierend auf der Methode."""
    candidates = book_index.get(str(gematria_val), {}).get('phrases', [])
    if not candidates: return None
    if method == 'frequency': return min(candidates, key=lambda p: p.get('count', 1))
    if method == 'semantic' and query_vector is not None and not np.all(query_vector == 0):
        return max(candidates, key=lambda p: cosine_similarity(np.array(p['vector']), query_vector))
    if method == 'network':
        pagerank_score = book_index.get(str(gematria_val), {}).get('pagerank', 0)
        return max(candidates, key=lambda p: pagerank_score / p.get('count', 1))
    return candidates[0]

def process_query_holographic(query_text, tanakh_state, all_indices, all_models, method):
    """
    Verarbeitet die Abfrage gegen den holographischen State und gibt die
    nach Büchern gruppierten Ergebnisse zurück.
    """
    # Schritt 1: Falte die Anfrage in den State ein, um das finale "Konzept" zu erhalten
    query_binary = text_to_gematria_binary(query_text)
    konzept_state = fold_into_state(query_binary, HOLOGRAPHIC_STATE_SIZE_BITS)
    final_konzept = "".join(str(int(a) ^ int(b)) for a, b in zip(tanakh_state, konzept_state))

    # Schritt 2: Extrahiere Gematria-Werte aus dem Konzept und frage Netzwerke ab
    results_by_book = defaultdict(list)
    for i in range(0, HOLOGRAPHIC_STATE_SIZE_BITS, BITS_PER_CHAR):
        gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2)
        if gematria_val == 0: continue

        for book_num, book_index in all_indices.items():
            book_model = all_models[book_num]
            query_vector = np.mean([book_model.wv[w] for w in query_text.split() if w in book_model.wv] or [np.zeros(book_model.vector_size)], axis=0) if method == 'semantic' else None
            best_phrase_data = get_best_phrase_from_book(gematria_val, book_index, method, query_vector)
            if best_phrase_data:
                results_by_book[book_num].append(best_phrase_data['text'])

    # Schritt 3: Formatiere die Ausgabe
    output_string = ""
    for book_num in sorted(results_by_book.keys()):
        unique_phrases = sorted(list(set(results_by_book[book_num])), key=results_by_book[book_num].index)
        phrases_str = " | ".join(unique_phrases)
        if phrases_str:
             output_string += f"\n--- Buch {book_num:02} ---\n{phrases_str}"

    return output_string

# --- Hauptprogramm ---
def main(args):
    """Lädt Daten, erstellt den State und führt die holographische Abfrage aus."""
    all_indices, all_models = load_all_data()
    if not all_indices:
        sys.exit(1)

    holographic_tanakh_state = create_holographic_context()
    if not holographic_tanakh_state:
        sys.exit(1)

    # Es gibt keine Iterationen mehr in diesem Modell, da die Anfrage Teil der State-Erstellung ist.
    print(f"\n" + "="*15 + f" HOLOGRAPHISCHE ABFRAGE (Methode: {args.method}) " + "="*15)
    logging.info(f"Aktuelle Abfrage: '{args.query}'")

    result_text = process_query_holographic(args.query, holographic_tanakh_state, all_indices, all_models, args.method)

    print("\nErgebnis aus dem holographischen State, geordnet nach Büchern:")
    print(result_text)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Holographic XOR Gematria Machine.")
    parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase.")
    parser.add_argument("--method", type=str, choices=['frequency', 'semantic', 'network', 'default'], default='default', help="Die Gewichtungsmethode für die Phrasenauswahl.")

    args = parser.parse_args()
    main(args)