Spaces:

neuralworm
/

xor_tanakh_2

Sleeping

File size: 10,744 Bytes

f054e62

import json
import logging
import argparse
import numpy as np
import sys
import os
import re
from collections import Counter
import pickle
from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES, linearize_umlauts, decompose_to_latin

# --- Konfiguration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.getLogger("gensim").setLevel(logging.WARNING)

HOLOGRAPHIC_STATE_SIZE_BITS = 4096
BITS_PER_CHAR = 16
BOOK_RANGE = range(1, 40)
MODELS_DIR = "models_by_book"
INDICES_DIR = "indices_by_book"
CACHE_FILE = "tanakh_data.cache"
SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True)

def setup_logging(debug_mode):
    level = logging.DEBUG if debug_mode else logging.INFO
    logging.getLogger().setLevel(level)

# --- Kern-Engine als Klasse ---

class TanakhExplorer:
    def __init__(self, use_cache=True):
        self.all_indices = {}
        self.tanakh_state = None

        cache_valid = use_cache and os.path.exists(CACHE_FILE)
        if cache_valid:
            try:
                logging.info(f"Lade Daten aus Cache-Datei: {CACHE_FILE}")
                with open(CACHE_FILE, 'rb') as f:
                    cached_data = pickle.load(f)
                self.all_indices = cached_data.get('indices', {})
                self.tanakh_state = cached_data.get('state')
                logging.info("Daten erfolgreich aus Cache geladen.")
            except Exception as e:
                logging.warning(f"Cache-Datei ist korrupt oder konnte nicht geladen werden: {e}. Lade Daten neu.")
                cache_valid = False

        if not cache_valid or not self.all_indices or not self.tanakh_state:
            self._load_all_indices()
            self._create_tanakh_holographic_state()
            if use_cache:
                self._save_to_cache()

    def _load_all_indices(self):
        logging.info("Lade Index-Dateien für alle Bücher...")
        for i in BOOK_RANGE:
            index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json")
            if os.path.exists(index_path):
                with open(index_path, 'r', encoding='utf-8') as f: self.all_indices[i] = json.load(f)
        if not self.all_indices: sys.exit("Keine Index-Dateien gefunden. Bitte 'build_indices.py' ausführen.")
        logging.info(f"{len(self.all_indices)} Buch-Indizes geladen.")

    def _create_tanakh_holographic_state(self):
        logging.info("Erstelle holographischen Tanach-State...")
        final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS
        full_binary_text = ""
        for i in BOOK_RANGE:
            try:
                with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file:
                    data = json.load(file)
                    full_text = ' '.join([' '.join(block) for block in data.get("text", [])])
                    clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL))
                    if clean_text:
                        full_binary_text += self._text_to_gematria_binary(clean_text, for_state=True)
            except Exception: continue
        self.tanakh_state = self._fold_into_state(full_binary_text)
        logging.info("Holographischer Tanach-State wurde erstellt.")

    def _save_to_cache(self):
        logging.info(f"Speichere Daten in Cache-Datei: {CACHE_FILE}")
        data_to_cache = {'indices': self.all_indices, 'state': self.tanakh_state}
        with open(CACHE_FILE, 'wb') as f: pickle.dump(data_to_cache, f)

    @staticmethod
    def _text_to_gematria_binary(text, for_state=False):
        text_for_calc = linearize_umlauts(text.lower())
        if for_state:
            clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text_for_calc)
        else:
            clean_text = re.sub(r"[^a-z\u05D0-\u05EA]+", "", text_for_calc)
        logging.debug(f"text_to_gematria_binary (for_state={for_state}): Original='{text[:30]}...', Bereinigt='{clean_text[:30]}...'")
        binary_string = "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text)
        logging.debug(f"  -> erzeugter Binärstring (erste 64 Bits): {binary_string[:64]}")
        return binary_string

    @staticmethod
    def _fold_into_state(binary_string, initial_state=None):
        state = np.array(list(initial_state), dtype=np.int8) if initial_state else np.zeros(HOLOGRAPHIC_STATE_SIZE_BITS, dtype=np.int8)
        for i in range(0, len(binary_string), HOLOGRAPHIC_STATE_SIZE_BITS):
            block = binary_string[i:i+HOLOGRAPHIC_STATE_SIZE_BITS].ljust(HOLOGRAPHIC_STATE_SIZE_BITS, '0')
            state = np.bitwise_xor(state, np.array(list(block), dtype=np.int8))
        return "".join(state.astype(str))

    def get_best_phrase_from_all_books(self, gematria_val, method):
        best_overall_phrase_obj = None
        best_overall_score = -1.0
        for book_num, book_index in self.all_indices.items():
            candidates = book_index.get(str(gematria_val), {}).get('phrases', [])
            if not candidates: continue

            pg_score = book_index.get(str(gematria_val), {}).get('pagerank', 0)
            best_in_book = max(candidates, key=lambda p: pg_score / p.get('count', 1) if p.get('count', 0) > 0 else 0)
            current_score = pg_score / best_in_book.get('count', 1) if best_in_book.get('count', 0) > 0 else 0

            if current_score > best_overall_score:
                best_overall_score = current_score
                best_in_book['source_book'] = book_num
                best_overall_phrase_obj = best_in_book

        if best_overall_phrase_obj:
            return best_overall_phrase_obj, "exact"

        for offset in [1, -1]:
            for book_num, book_index in self.all_indices.items():
                candidates = book_index.get(str(gematria_val + offset), {}).get('phrases', [])
                if candidates:
                    best_in_book = min(candidates, key=lambda p: p.get('position', float('inf')))
                    best_in_book['source_book'] = book_num
                    return best_in_book, f"neighbor(d={offset})"

        decomposed = decompose_to_latin(gematria_val)
        if decomposed:
            return {"text": f"[{decomposed}]", "position": -2, "source_book": "N/A"}, "decomposed"

        return None, None

    def run_fractal_mode(self, query, depth, method):
        print(f"\n" + "="*15 + f" FRAKTALE LOGOS-AUSSCHÖPFUNG (Tiefe: {depth}, Methode: {method}) " + "="*15)

        initial_logos = query
        # <<<<<<<<<<<<<<<<<< HIER IST DIE KORREKTUR >>>>>>>>>>>>>>>>>>>>
        # Wir verwenden 0 für das Quell-Buch, um den TypeError zu vermeiden
        all_found_phrases_map = {initial_logos: {"text": initial_logos, "position": -1, "depth": 0, "count":1, "source_book": 0}}
        # <<<<<<<<<<<<<<<<<< ENDE DER KORREKTUR >>>>>>>>>>>>>>>>>>>>>

        phrases_to_process_this_level = {initial_logos}

        for d in range(depth):
            logging.info(f"--- Starte Tiefe {d + 1}/{depth} mit {len(phrases_to_process_this_level)} Phrasen ---")
            phrases_for_next_level = set()

            for p_current in phrases_to_process_this_level:
                combined_query = f"{initial_logos} {p_current}"
                query_binary = self._text_to_gematria_binary(combined_query)
                konzept_state = self._fold_into_state(query_binary)
                final_konzept = "".join(str(int(a)^int(b)) for a,b in zip(self.tanakh_state, konzept_state))

                for i in range(0, len(final_konzept), BITS_PER_CHAR):
                    gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2)
                    if gematria_val == 0: continue

                    phrase_obj, _ = self.get_best_phrase_from_all_books(gematria_val, method)

                    if phrase_obj:
                        phrase_text = phrase_obj['text']
                        if phrase_text not in all_found_phrases_map:
                            phrase_obj['depth'] = d + 1
                            phrase_obj['count'] = 1
                            all_found_phrases_map[phrase_text] = phrase_obj
                            phrases_for_next_level.add(phrase_text)
                        else:
                            all_found_phrases_map[phrase_text]['count'] += 1
            if not phrases_for_next_level:
                logging.info(f"Keine neuen Phrasen in Tiefe {d + 1} gefunden.")
                break
            phrases_to_process_this_level = phrases_for_next_level

        # Sortiere nach Buch und dann nach Position, um die narrative Ordnung beizubehalten
        sorted_by_position = sorted(all_found_phrases_map.values(), key=lambda x: (x.get('source_book', 99), x.get('position', -1)))

        print("\n--- Finale Synthese (geordnet nach Buch und Auftreten im Text) ---")
        current_book = -1
        for p in sorted_by_position:
            book = p.get('source_book')
            if book != current_book:
                # Gib eine Kopfzeile für jedes neue Buch aus
                if isinstance(book, int) and book > 0:
                    print(f"\n--- Buch {book:02d} ---")
                elif book == 0:
                     print(f"--- Query ---")
                current_book = book
            print(f"{p['text']}", end=" | ")
        print("\n")

        # Sortiere nach Häufigkeit für die Top-Konzepte
        sorted_by_count = sorted(all_found_phrases_map.values(), key=lambda x: x['count'], reverse=True)
        print("\n--- Top 25 Resonanz-Konzepte (geordnet nach Häufigkeit im Fraktal) ---")
        for p in sorted_by_count[:25]:
            source = f"B{p.get('source_book', '??'):02d}" if isinstance(p.get('source_book'), int) and p.get('source_book') > 0 else p.get('source_book', 'N/A')
            print(f"[{p['count']:2d}x] {p['text']} (Original in {source}, Pos: {p.get('position', 'N/A')})")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Tanakh Holographic Explorer (v13, Final).")
    parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase (Logos).")
    parser.add_argument("--method", type=str, choices=['frequency', 'network', 'default'], default='network', help="Gewichtungsmethode.")
    parser.add_argument("--depth", type=int, default=1, help="Maximale Tiefe der fraktalen Suche.")
    parser.add_argument("--no-cache", action="store_true", help="Erzwingt das Neuladen der Daten.")
    parser.add_argument("--debug", action="store_true", help="Aktiviert detaillierte Debug-Ausgaben.")

    args = parser.parse_args()
    setup_logging(args.debug)

    engine = TanakhExplorer(use_cache=not args.no_cache)
    engine.run_fractal_mode(args.query, args.depth, args.method)