Spaces:
Sleeping
Sleeping
import json | |
import logging | |
import argparse | |
import numpy as np | |
import sys | |
import os | |
import re | |
from collections import Counter | |
import pickle | |
from gematria import letter_to_value, HEBREW_GEMATRIA_VALUES, linearize_umlauts, decompose_to_latin | |
# --- Konfiguration --- | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logging.getLogger("gensim").setLevel(logging.WARNING) | |
HOLOGRAPHIC_STATE_SIZE_BITS = 4096 | |
BITS_PER_CHAR = 16 | |
BOOK_RANGE = range(1, 40) | |
MODELS_DIR = "models_by_book" | |
INDICES_DIR = "indices_by_book" | |
CACHE_FILE = "tanakh_data.cache" | |
SORTED_GEMATRIA = sorted(HEBREW_GEMATRIA_VALUES.items(), key=lambda item: item[1], reverse=True) | |
def setup_logging(debug_mode): | |
level = logging.DEBUG if debug_mode else logging.INFO | |
logging.getLogger().setLevel(level) | |
# --- Kern-Engine als Klasse --- | |
class TanakhExplorer: | |
def __init__(self, use_cache=True): | |
self.all_indices = {} | |
self.tanakh_state = None | |
cache_valid = use_cache and os.path.exists(CACHE_FILE) | |
if cache_valid: | |
try: | |
logging.info(f"Lade Daten aus Cache-Datei: {CACHE_FILE}") | |
with open(CACHE_FILE, 'rb') as f: | |
cached_data = pickle.load(f) | |
self.all_indices = cached_data.get('indices', {}) | |
self.tanakh_state = cached_data.get('state') | |
logging.info("Daten erfolgreich aus Cache geladen.") | |
except Exception as e: | |
logging.warning(f"Cache-Datei ist korrupt oder konnte nicht geladen werden: {e}. Lade Daten neu.") | |
cache_valid = False | |
if not cache_valid or not self.all_indices or not self.tanakh_state: | |
self._load_all_indices() | |
self._create_tanakh_holographic_state() | |
if use_cache: | |
self._save_to_cache() | |
def _load_all_indices(self): | |
logging.info("Lade Index-Dateien für alle Bücher...") | |
for i in BOOK_RANGE: | |
index_path = os.path.join(INDICES_DIR, f"book_{i:02}_index.json") | |
if os.path.exists(index_path): | |
with open(index_path, 'r', encoding='utf-8') as f: self.all_indices[i] = json.load(f) | |
if not self.all_indices: sys.exit("Keine Index-Dateien gefunden. Bitte 'build_indices.py' ausführen.") | |
logging.info(f"{len(self.all_indices)} Buch-Indizes geladen.") | |
def _create_tanakh_holographic_state(self): | |
logging.info("Erstelle holographischen Tanach-State...") | |
final_state = '0' * HOLOGRAPHIC_STATE_SIZE_BITS | |
full_binary_text = "" | |
for i in BOOK_RANGE: | |
try: | |
with open(f"texts/torah/{i:02}.json", 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
full_text = ' '.join([' '.join(block) for block in data.get("text", [])]) | |
clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", re.sub(r"\[.*?\]", "", full_text, flags=re.DOTALL)) | |
if clean_text: | |
full_binary_text += self._text_to_gematria_binary(clean_text, for_state=True) | |
except Exception: continue | |
self.tanakh_state = self._fold_into_state(full_binary_text) | |
logging.info("Holographischer Tanach-State wurde erstellt.") | |
def _save_to_cache(self): | |
logging.info(f"Speichere Daten in Cache-Datei: {CACHE_FILE}") | |
data_to_cache = {'indices': self.all_indices, 'state': self.tanakh_state} | |
with open(CACHE_FILE, 'wb') as f: pickle.dump(data_to_cache, f) | |
def _text_to_gematria_binary(text, for_state=False): | |
text_for_calc = linearize_umlauts(text.lower()) | |
if for_state: | |
clean_text = re.sub(r"[^\u05D0-\u05EA]+", "", text_for_calc) | |
else: | |
clean_text = re.sub(r"[^a-z\u05D0-\u05EA]+", "", text_for_calc) | |
logging.debug(f"text_to_gematria_binary (for_state={for_state}): Original='{text[:30]}...', Bereinigt='{clean_text[:30]}...'") | |
binary_string = "".join(format(letter_to_value(c), f'0{BITS_PER_CHAR}b') for c in clean_text) | |
logging.debug(f" -> erzeugter Binärstring (erste 64 Bits): {binary_string[:64]}") | |
return binary_string | |
def _fold_into_state(binary_string, initial_state=None): | |
state = np.array(list(initial_state), dtype=np.int8) if initial_state else np.zeros(HOLOGRAPHIC_STATE_SIZE_BITS, dtype=np.int8) | |
for i in range(0, len(binary_string), HOLOGRAPHIC_STATE_SIZE_BITS): | |
block = binary_string[i:i+HOLOGRAPHIC_STATE_SIZE_BITS].ljust(HOLOGRAPHIC_STATE_SIZE_BITS, '0') | |
state = np.bitwise_xor(state, np.array(list(block), dtype=np.int8)) | |
return "".join(state.astype(str)) | |
def get_best_phrase_from_all_books(self, gematria_val, method): | |
best_overall_phrase_obj = None | |
best_overall_score = -1.0 | |
for book_num, book_index in self.all_indices.items(): | |
candidates = book_index.get(str(gematria_val), {}).get('phrases', []) | |
if not candidates: continue | |
pg_score = book_index.get(str(gematria_val), {}).get('pagerank', 0) | |
best_in_book = max(candidates, key=lambda p: pg_score / p.get('count', 1) if p.get('count', 0) > 0 else 0) | |
current_score = pg_score / best_in_book.get('count', 1) if best_in_book.get('count', 0) > 0 else 0 | |
if current_score > best_overall_score: | |
best_overall_score = current_score | |
best_in_book['source_book'] = book_num | |
best_overall_phrase_obj = best_in_book | |
if best_overall_phrase_obj: | |
return best_overall_phrase_obj, "exact" | |
for offset in [1, -1]: | |
for book_num, book_index in self.all_indices.items(): | |
candidates = book_index.get(str(gematria_val + offset), {}).get('phrases', []) | |
if candidates: | |
best_in_book = min(candidates, key=lambda p: p.get('position', float('inf'))) | |
best_in_book['source_book'] = book_num | |
return best_in_book, f"neighbor(d={offset})" | |
decomposed = decompose_to_latin(gematria_val) | |
if decomposed: | |
return {"text": f"[{decomposed}]", "position": -2, "source_book": "N/A"}, "decomposed" | |
return None, None | |
def run_fractal_mode(self, query, depth, method): | |
print(f"\n" + "="*15 + f" FRAKTALE LOGOS-AUSSCHÖPFUNG (Tiefe: {depth}, Methode: {method}) " + "="*15) | |
initial_logos = query | |
# <<<<<<<<<<<<<<<<<< HIER IST DIE KORREKTUR >>>>>>>>>>>>>>>>>>>> | |
# Wir verwenden 0 für das Quell-Buch, um den TypeError zu vermeiden | |
all_found_phrases_map = {initial_logos: {"text": initial_logos, "position": -1, "depth": 0, "count":1, "source_book": 0}} | |
# <<<<<<<<<<<<<<<<<< ENDE DER KORREKTUR >>>>>>>>>>>>>>>>>>>>> | |
phrases_to_process_this_level = {initial_logos} | |
for d in range(depth): | |
logging.info(f"--- Starte Tiefe {d + 1}/{depth} mit {len(phrases_to_process_this_level)} Phrasen ---") | |
phrases_for_next_level = set() | |
for p_current in phrases_to_process_this_level: | |
combined_query = f"{initial_logos} {p_current}" | |
query_binary = self._text_to_gematria_binary(combined_query) | |
konzept_state = self._fold_into_state(query_binary) | |
final_konzept = "".join(str(int(a)^int(b)) for a,b in zip(self.tanakh_state, konzept_state)) | |
for i in range(0, len(final_konzept), BITS_PER_CHAR): | |
gematria_val = int(final_konzept[i:i+BITS_PER_CHAR], 2) | |
if gematria_val == 0: continue | |
phrase_obj, _ = self.get_best_phrase_from_all_books(gematria_val, method) | |
if phrase_obj: | |
phrase_text = phrase_obj['text'] | |
if phrase_text not in all_found_phrases_map: | |
phrase_obj['depth'] = d + 1 | |
phrase_obj['count'] = 1 | |
all_found_phrases_map[phrase_text] = phrase_obj | |
phrases_for_next_level.add(phrase_text) | |
else: | |
all_found_phrases_map[phrase_text]['count'] += 1 | |
if not phrases_for_next_level: | |
logging.info(f"Keine neuen Phrasen in Tiefe {d + 1} gefunden.") | |
break | |
phrases_to_process_this_level = phrases_for_next_level | |
# Sortiere nach Buch und dann nach Position, um die narrative Ordnung beizubehalten | |
sorted_by_position = sorted(all_found_phrases_map.values(), key=lambda x: (x.get('source_book', 99), x.get('position', -1))) | |
print("\n--- Finale Synthese (geordnet nach Buch und Auftreten im Text) ---") | |
current_book = -1 | |
for p in sorted_by_position: | |
book = p.get('source_book') | |
if book != current_book: | |
# Gib eine Kopfzeile für jedes neue Buch aus | |
if isinstance(book, int) and book > 0: | |
print(f"\n--- Buch {book:02d} ---") | |
elif book == 0: | |
print(f"--- Query ---") | |
current_book = book | |
print(f"{p['text']}", end=" | ") | |
print("\n") | |
# Sortiere nach Häufigkeit für die Top-Konzepte | |
sorted_by_count = sorted(all_found_phrases_map.values(), key=lambda x: x['count'], reverse=True) | |
print("\n--- Top 25 Resonanz-Konzepte (geordnet nach Häufigkeit im Fraktal) ---") | |
for p in sorted_by_count[:25]: | |
source = f"B{p.get('source_book', '??'):02d}" if isinstance(p.get('source_book'), int) and p.get('source_book') > 0 else p.get('source_book', 'N/A') | |
print(f"[{p['count']:2d}x] {p['text']} (Original in {source}, Pos: {p.get('position', 'N/A')})") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Tanakh Holographic Explorer (v13, Final).") | |
parser.add_argument("query", type=str, help="Die anfängliche Abfragephrase (Logos).") | |
parser.add_argument("--method", type=str, choices=['frequency', 'network', 'default'], default='network', help="Gewichtungsmethode.") | |
parser.add_argument("--depth", type=int, default=1, help="Maximale Tiefe der fraktalen Suche.") | |
parser.add_argument("--no-cache", action="store_true", help="Erzwingt das Neuladen der Daten.") | |
parser.add_argument("--debug", action="store_true", help="Aktiviert detaillierte Debug-Ausgaben.") | |
args = parser.parse_args() | |
setup_logging(args.debug) | |
engine = TanakhExplorer(use_cache=not args.no_cache) | |
engine.run_fractal_mode(args.query, args.depth, args.method) | |