Spaces:
Sleeping
Sleeping
import json | |
import logging | |
import argparse | |
import numpy as np | |
import sys | |
import os | |
import re | |
from collections import defaultdict | |
from gensim.models import Word2Vec | |
from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES | |
# --- Konfiguration --- | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
HOLOGRAPHIC_STATE_SIZE_BITS = 4096 # Feste State-Größe in Bits | |
BITS_PER_CHAR = 16 # Bits pro Gematria-Wert | |
BOOK_RANGE = range(1, 40) # Bücher 1 bis 39 | |
MODELS_DIR = "models_by_book" | |
INDICES_DIR = "indices_by_book" | |
SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True) | |
# --- Hilfsfunktionen für das Laden von Daten --- | |
def load_all_data(): | |
"""Lädt alle 39 Index- und Modelldateien.""" | |
all_indices = {} | |
all_models = {} | |
logging.info("Lade Daten für alle Bücher...") | |
for i in BOOK_RANGE: | |
index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json") | |
model_path = os.path.join(MODELS_DIR, f"book_{i:02}.w2v") | |
if os.path.exists(index_path) and os.path.exists(model_path): | |
try: | |
with open(index_path, 'r', encoding='utf-8') as f: | |
all_indices[i] = json.load(f) | |
all_models[i] = Word2Vec.load(model_path) | |
except Exception as e: | |
logging.error(f"Konnte Daten für Buch {i:02} nicht laden: {e}") | |
continue | |
if not all_indices: | |
logging.error("Keine Index/Modell-Dateien gefunden. Bitte 'build_all_indices.py' ausführen.") | |
return None, None | |
logging.info(f"{len(all_indices)} Buch-Netzwerke erfolgreich geladen.") | |
return all_indices, all_models | |
# --- Kernlogik: Holographic State Engine --- | |
def text_to_gematria_binary(text): | |
"""Wandelt einen Text in einen Gematria-basierten Binärstring um.""" | |
clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text) | |
return "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text) | |
def fold_into_state(binary_string, state_size=HOLOGRAPHIC_STATE_SIZE_BITS): | |
""" | |
Faltet einen beliebig langen Binärstring in einen State fester Größe. | |
Dies ist die Kernfunktion zur Vermeidung von Padding. | |
""" | |
state = np.zeros(state_size, dtype=np.int8) | |
for i in range(0, len(binary_string), state_size): | |
block = binary_string[i:i+state_size] | |
block_padded = block.ljust(state_size, '0') | |
block_array = np.array(list(block_padded), dtype=np.int8) | |
state = np.bitwise_xor(state, block_array) | |
return "".join(state.astype(str)) | |
def create_holographic_context(): | |
""" | |
Erstellt den sequentiellen, holographischen State, indem alle Bücher | |
nacheinander in einen Zustand fester Größe gefaltet werden. | |
""" | |
logging.info("Erstelle holographischen State durch sequentielles Einfalten aller Bücher...") | |
final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS | |
for i in BOOK_RANGE: | |
try: | |
with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file: | |
logging.info(f"Falte Buch {i:02} in den State ein...") | |
data = json.load(file) | |
full_text = ' '.join([' '.join(block) for block in data.get("text", [])]) | |
clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL)) | |
if not clean_text: | |
continue | |
book_binary = text_to_gematria_binary(clean_text) | |
final_state = fold_into_state(book_binary, HOLOGRAPHIC_STATE_SIZE_BITS) | |
except FileNotFoundError: | |
logging.warning(f"Datei für Buch {i:02} nicht gefunden, wird übersprungen.") | |
continue | |
logging.info("Holographischer Tanach-State wurde erfolgreich erstellt.") | |
return final_state | |
# --- Funktionen zur Phrasen-Auswahl und -Verarbeitung --- | |
def cosine_similarity(v1, v2): | |
"""Berechnet die Kosinus-Ähnlichkeit zwischen zwei Vektoren.""" | |
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-9) | |
def get_decomposed_word(number): | |
"""Fallback: Zerlegt eine Zahl algorithmisch in hebräische Buchstaben.""" | |
text, remainder = "", number | |
for char, value in SORTED_GEMATRIA: | |
while remainder >= value: | |
text += char | |
remainder -= value | |
return text | |
def get_best_phrase_from_book(gematria_val, book_index, method, query_vector=None): | |
"""Findet die beste Phrase in einem EINZELNEN Buch-Index basierend auf der Methode.""" | |
candidates = book_index.get(str(gematria_val), {}).get('phrases', []) | |
if not candidates: return None | |
if method == 'frequency': return min(candidates, key=lambda p: p.get('count', 1)) | |
if method == 'semantic' and query_vector is not None and not np.all(query_vector == 0): | |
return max(candidates, key=lambda p: cosine_similarity(np.array(p['vector']), query_vector)) | |
if method == 'network': | |
pagerank_score = book_index.get(str(gematria_val), {}).get('pagerank', 0) | |
return max(candidates, key=lambda p: pagerank_score / p.get('count', 1)) | |
return candidates[0] | |
def process_query_holographic(query_text, tanakh_state, all_indices, all_models, method): | |
""" | |
Verarbeitet die Abfrage gegen den holographischen State und gibt die | |
nach Büchern gruppierten Ergebnisse zurück. | |
""" | |
# Schritt 1: Falte die Anfrage in den State ein, um das finale "Konzept" zu erhalten | |
query_binary = text_to_gematria_binary(query_text) | |
konzept_state = fold_into_state(query_binary, HOLOGRAPHIC_STATE_SIZE_BITS) | |
final_konzept = "".join(str(int(a) ^ int(b)) for a, b in zip(tanakh_state, konzept_state)) | |
# Schritt 2: Extrahiere Gematria-Werte aus dem Konzept und frage Netzwerke ab | |
results_by_book = defaultdict(list) | |
for i in range(0, HOLOGRAPHIC_STATE_SIZE_BITS, BITS_PER_CHAR): | |
gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2) | |
if gematria_val == 0: continue | |
for book_num, book_index in all_indices.items(): | |
book_model = all_models[book_num] | |
query_vector = np.mean([book_model.wv[w] for w in query_text.split() if w in book_model.wv] or [np.zeros(book_model.vector_size)], axis=0) if method == 'semantic' else None | |
best_phrase_data = get_best_phrase_from_book(gematria_val, book_index, method, query_vector) | |
if best_phrase_data: | |
results_by_book[book_num].append(best_phrase_data['text']) | |
# Schritt 3: Formatiere die Ausgabe | |
output_string = "" | |
for book_num in sorted(results_by_book.keys()): | |
unique_phrases = sorted(list(set(results_by_book[book_num])), key=results_by_book[book_num].index) | |
phrases_str = " | ".join(unique_phrases) | |
if phrases_str: | |
output_string += f"\n--- Buch {book_num:02} ---\n{phrases_str}" | |
return output_string | |
# --- Hauptprogramm --- | |
def main(args): | |
"""Lädt Daten, erstellt den State und führt die holographische Abfrage aus.""" | |
all_indices, all_models = load_all_data() | |
if not all_indices: | |
sys.exit(1) | |
holographic_tanakh_state = create_holographic_context() | |
if not holographic_tanakh_state: | |
sys.exit(1) | |
# Es gibt keine Iterationen mehr in diesem Modell, da die Anfrage Teil der State-Erstellung ist. | |
print(f"\n" + "="*15 + f" HOLOGRAPHISCHE ABFRAGE (Methode: {args.method}) " + "="*15) | |
logging.info(f"Aktuelle Abfrage: '{args.query}'") | |
result_text = process_query_holographic(args.query, holographic_tanakh_state, all_indices, all_models, args.method) | |
print("\nErgebnis aus dem holographischen State, geordnet nach Büchern:") | |
print(result_text) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Holographic XOR Gematria Machine.") | |
parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase.") | |
parser.add_argument("--method", type=str, choices=['frequency', 'semantic', 'network', 'default'], default='default', help="Die Gewichtungsmethode für die Phrasenauswahl.") | |
args = parser.parse_args() | |
main(args) | |