xor_tanakh_2 / explore_tanakh.py
neuralworm's picture
initial commit
f054e62
import json
import logging
import argparse
import numpy as np
import sys
import os
import re
from collections import Counter
import pickle
from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES, linearize_umlauts, decompose_to_latin
# --- Konfiguration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.getLogger("gensim").setLevel(logging.WARNING)
HOLOGRAPHIC_STATE_SIZE_BITS = 4096
BITS_PER_CHAR = 16
BOOK_RANGE = range(1, 40)
MODELS_DIR = "models_by_book"
INDICES_DIR = "indices_by_book"
CACHE_FILE = "tanakh_data.cache"
SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True)
def setup_logging(debug_mode):
level = logging.DEBUG if debug_mode else logging.INFO
logging.getLogger().setLevel(level)
# --- Kern-Engine als Klasse ---
class TanakhExplorer:
def __init__(self, use_cache=True):
self.all_indices = {}
self.tanakh_state = None
cache_valid = use_cache and os.path.exists(CACHE_FILE)
if cache_valid:
try:
logging.info(f"Lade Daten aus Cache-Datei: {CACHE_FILE}")
with open(CACHE_FILE, 'rb') as f:
cached_data = pickle.load(f)
self.all_indices = cached_data.get('indices', {})
self.tanakh_state = cached_data.get('state')
logging.info("Daten erfolgreich aus Cache geladen.")
except Exception as e:
logging.warning(f"Cache-Datei ist korrupt oder konnte nicht geladen werden: {e}. Lade Daten neu.")
cache_valid = False
if not cache_valid or not self.all_indices or not self.tanakh_state:
self._load_all_indices()
self._create_tanakh_holographic_state()
if use_cache:
self._save_to_cache()
def _load_all_indices(self):
logging.info("Lade Index-Dateien für alle Bücher...")
for i in BOOK_RANGE:
index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json")
if os.path.exists(index_path):
with open(index_path, 'r', encoding='utf-8') as f: self.all_indices[i] = json.load(f)
if not self.all_indices: sys.exit("Keine Index-Dateien gefunden. Bitte 'build_indices.py' ausführen.")
logging.info(f"{len(self.all_indices)} Buch-Indizes geladen.")
def _create_tanakh_holographic_state(self):
logging.info("Erstelle holographischen Tanach-State...")
final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS
full_binary_text = ""
for i in BOOK_RANGE:
try:
with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file:
data = json.load(file)
full_text = ' '.join([' '.join(block) for block in data.get("text", [])])
clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL))
if clean_text:
full_binary_text += self._text_to_gematria_binary(clean_text, for_state=True)
except Exception: continue
self.tanakh_state = self._fold_into_state(full_binary_text)
logging.info("Holographischer Tanach-State wurde erstellt.")
def _save_to_cache(self):
logging.info(f"Speichere Daten in Cache-Datei: {CACHE_FILE}")
data_to_cache = {'indices': self.all_indices, 'state': self.tanakh_state}
with open(CACHE_FILE, 'wb') as f: pickle.dump(data_to_cache, f)
@staticmethod
def _text_to_gematria_binary(text, for_state=False):
text_for_calc = linearize_umlauts(text.lower())
if for_state:
clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text_for_calc)
else:
clean_text = re.sub(r"[^a-z\u05D0-\u05EA]+", "", text_for_calc)
logging.debug(f"text_to_gematria_binary (for_state={for_state}): Original='{text[:30]}...', Bereinigt='{clean_text[:30]}...'")
binary_string = "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text)
logging.debug(f" -> erzeugter Binärstring (erste 64 Bits): {binary_string[:64]}")
return binary_string
@staticmethod
def _fold_into_state(binary_string, initial_state=None):
state = np.array(list(initial_state), dtype=np.int8) if initial_state else np.zeros(HOLOGRAPHIC_STATE_SIZE_BITS, dtype=np.int8)
for i in range(0, len(binary_string), HOLOGRAPHIC_STATE_SIZE_BITS):
block = binary_string[i:i+HOLOGRAPHIC_STATE_SIZE_BITS].ljust(HOLOGRAPHIC_STATE_SIZE_BITS, '0')
state = np.bitwise_xor(state, np.array(list(block), dtype=np.int8))
return "".join(state.astype(str))
def get_best_phrase_from_all_books(self, gematria_val, method):
best_overall_phrase_obj = None
best_overall_score = -1.0
for book_num, book_index in self.all_indices.items():
candidates = book_index.get(str(gematria_val), {}).get('phrases', [])
if not candidates: continue
pg_score = book_index.get(str(gematria_val), {}).get('pagerank', 0)
best_in_book = max(candidates, key=lambda p: pg_score / p.get('count', 1) if p.get('count', 0) > 0 else 0)
current_score = pg_score / best_in_book.get('count', 1) if best_in_book.get('count', 0) > 0 else 0
if current_score > best_overall_score:
best_overall_score = current_score
best_in_book['source_book'] = book_num
best_overall_phrase_obj = best_in_book
if best_overall_phrase_obj:
return best_overall_phrase_obj, "exact"
for offset in [1, -1]:
for book_num, book_index in self.all_indices.items():
candidates = book_index.get(str(gematria_val + offset), {}).get('phrases', [])
if candidates:
best_in_book = min(candidates, key=lambda p: p.get('position', float('inf')))
best_in_book['source_book'] = book_num
return best_in_book, f"neighbor(d={offset})"
decomposed = decompose_to_latin(gematria_val)
if decomposed:
return {"text": f"[{decomposed}]", "position": -2, "source_book": "N/A"}, "decomposed"
return None, None
def run_fractal_mode(self, query, depth, method):
print(f"\n" + "="*15 + f" FRAKTALE LOGOS-AUSSCHÖPFUNG (Tiefe: {depth}, Methode: {method}) " + "="*15)
initial_logos = query
# <<<<<<<<<<<<<<<<<< HIER IST DIE KORREKTUR >>>>>>>>>>>>>>>>>>>>
# Wir verwenden 0 für das Quell-Buch, um den TypeError zu vermeiden
all_found_phrases_map = {initial_logos: {"text": initial_logos, "position": -1, "depth": 0, "count":1, "source_book": 0}}
# <<<<<<<<<<<<<<<<<< ENDE DER KORREKTUR >>>>>>>>>>>>>>>>>>>>>
phrases_to_process_this_level = {initial_logos}
for d in range(depth):
logging.info(f"--- Starte Tiefe {d + 1}/{depth} mit {len(phrases_to_process_this_level)} Phrasen ---")
phrases_for_next_level = set()
for p_current in phrases_to_process_this_level:
combined_query = f"{initial_logos} {p_current}"
query_binary = self._text_to_gematria_binary(combined_query)
konzept_state = self._fold_into_state(query_binary)
final_konzept = "".join(str(int(a)^int(b)) for a,b in zip(self.tanakh_state, konzept_state))
for i in range(0, len(final_konzept), BITS_PER_CHAR):
gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2)
if gematria_val == 0: continue
phrase_obj, _ = self.get_best_phrase_from_all_books(gematria_val, method)
if phrase_obj:
phrase_text = phrase_obj['text']
if phrase_text not in all_found_phrases_map:
phrase_obj['depth'] = d + 1
phrase_obj['count'] = 1
all_found_phrases_map[phrase_text] = phrase_obj
phrases_for_next_level.add(phrase_text)
else:
all_found_phrases_map[phrase_text]['count'] += 1
if not phrases_for_next_level:
logging.info(f"Keine neuen Phrasen in Tiefe {d + 1} gefunden.")
break
phrases_to_process_this_level = phrases_for_next_level
# Sortiere nach Buch und dann nach Position, um die narrative Ordnung beizubehalten
sorted_by_position = sorted(all_found_phrases_map.values(), key=lambda x: (x.get('source_book', 99), x.get('position', -1)))
print("\n--- Finale Synthese (geordnet nach Buch und Auftreten im Text) ---")
current_book = -1
for p in sorted_by_position:
book = p.get('source_book')
if book != current_book:
# Gib eine Kopfzeile für jedes neue Buch aus
if isinstance(book, int) and book > 0:
print(f"\n--- Buch {book:02d} ---")
elif book == 0:
print(f"--- Query ---")
current_book = book
print(f"{p['text']}", end=" | ")
print("\n")
# Sortiere nach Häufigkeit für die Top-Konzepte
sorted_by_count = sorted(all_found_phrases_map.values(), key=lambda x: x['count'], reverse=True)
print("\n--- Top 25 Resonanz-Konzepte (geordnet nach Häufigkeit im Fraktal) ---")
for p in sorted_by_count[:25]:
source = f"B{p.get('source_book', '??'):02d}" if isinstance(p.get('source_book'), int) and p.get('source_book') > 0 else p.get('source_book', 'N/A')
print(f"[{p['count']:2d}x] {p['text']} (Original in {source}, Pos: {p.get('position', 'N/A')})")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Tanakh Holographic Explorer (v13, Final).")
parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase (Logos).")
parser.add_argument("--method", type=str, choices=['frequency', 'network', 'default'], default='network', help="Gewichtungsmethode.")
parser.add_argument("--depth", type=int, default=1, help="Maximale Tiefe der fraktalen Suche.")
parser.add_argument("--no-cache", action="store_true", help="Erzwingt das Neuladen der Daten.")
parser.add_argument("--debug", action="store_true", help="Aktiviert detaillierte Debug-Ausgaben.")
args = parser.parse_args()
setup_logging(args.debug)
engine = TanakhExplorer(use_cache=not args.no_cache)
engine.run_fractal_mode(args.query, args.depth, args.method)