import json import os import logging import sqlite3 from typing import Dict, List, Any logger = logging.getLogger(__name__) def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]: """ Processes Quran JSON files and returns a dictionary mapping sura IDs to their data. Args: start: The starting sura ID (inclusive). end: The ending sura ID (inclusive). Returns: A dictionary where keys are sura IDs and values are dictionaries containing 'name' and 'text' fields. """ base_path = "texts/quran" results = {} for i in range(start, end + 1): file_name = f"{base_path}/{i:03d}.json" try: with open(file_name, 'r', encoding='utf-8') as file: data = json.load(file) if data: # Extract name and verses name = data.get("name", "No title") verses = data.get("verse", {}) text = [verses[key] for key in sorted(verses.keys())] # Store sura ID as key and sura data as value results[i] = {"name": name, "text": text} except FileNotFoundError: logger.warning(f"File {file_name} not found.") except json.JSONDecodeError as e: logger.warning(f"File {file_name} could not be read as JSON: {e}") except KeyError as e: logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}") return results def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]: """ Finds the shortest Quran sura verse in abjad.db. Args: gematria_sum: The gematria value to search for db_file: The database file to search in Returns: A dictionary containing the matched verse information or None if no match is found """ logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}") with sqlite3.connect(db_file) as conn: cursor = conn.cursor() # First check if there are any Quran entries in the database cursor.execute(''' SELECT COUNT(*) FROM results WHERE book != 'Psalms' ''') count = cursor.fetchone()[0] if count == 0: logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.") return None # Search for a match, prioritizing shorter phrases cursor.execute(''' SELECT words, book, chapter, verse, phrase_length, word_position FROM results WHERE gematria_sum = ? AND book != 'Psalms' ORDER BY phrase_length ASC, LENGTH(words) ASC LIMIT 1 ''', (gematria_sum,)) result = cursor.fetchone() if result: logger.debug(f"Shortest sura match found: {result}") return { "words": result[0], "book": result[1], "chapter": result[2], "verse": result[3], "phrase_length": result[4], "word_position": result[5] if len(result) > 5 else None } # If no exact match, try to find the closest match # This is similar to how quran_network handles it cursor.execute(''' SELECT gematria_sum, ABS(gematria_sum - ?) as diff FROM results WHERE book != 'Psalms' GROUP BY gematria_sum ORDER BY diff ASC LIMIT 1 ''', (gematria_sum,)) closest = cursor.fetchone() if closest: closest_gematria = closest[0] logger.debug(f"No exact match found. Closest gematria: {closest_gematria}") # Find the shortest verse with this gematria cursor.execute(''' SELECT words, book, chapter, verse, phrase_length, word_position FROM results WHERE gematria_sum = ? AND book != 'Psalms' ORDER BY phrase_length ASC, LENGTH(words) ASC LIMIT 1 ''', (closest_gematria,)) result = cursor.fetchone() if result: logger.debug(f"Closest sura match found: {result}") return { "words": result[0], "book": result[1], "chapter": result[2], "verse": result[3], "phrase_length": result[4], "word_position": result[5] if len(result) > 5 else None } logger.debug("No matching sura found.") return None def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str: """Creates an iframe HTML string for displaying a Quran verse.""" logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}") # Use surahquran.com URL format url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html" iframe = f'' logger.debug(f"Generated iframe: {iframe}") return iframe def get_sura_count() -> int: """Returns the total number of suras in the Quran.""" base_path = "texts/quran" # Count the number of JSON files in the quran directory try: files = [f for f in os.listdir(base_path) if f.endswith('.json')] return len(files) except FileNotFoundError: logger.error(f"Directory {base_path} not found.") return 114 # Default number of suras in the Quran def get_first_els_result_quran(gematria_sum: int, tlang: str = "en") -> Dict[str, Any]: """ Gets the first ELS result from the Quran using the gematria sum as the step. Args: gematria_sum: The gematria value to use as the ELS step tlang: Target language for results Returns: The first ELS result found or None """ import hashlib import json from gematria import strip_diacritics logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}") # Create a cache key cache_key = f"quran_els_{gematria_sum}_{tlang}" cache_file = "els_cache.db" # Check cache first try: with sqlite3.connect(cache_file) as conn: cursor = conn.cursor() cursor.execute( "SELECT results FROM els_cache WHERE query_hash = ?", (hashlib.sha256(cache_key.encode()).hexdigest(),)) result = cursor.fetchone() if result: logger.info(f"Cache hit for Quran ELS query: {cache_key}") return json.loads(result[0]) except sqlite3.Error as e: logger.error(f"Database error checking cache: {e}") # Cache miss, perform ELS search logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search") # Load all Quran text sura_count = get_sura_count() quran_data = process_quran_files(1, sura_count) # Concatenate all verses from all suras into a single text all_text = "" for sura_id, sura_info in sorted(quran_data.items()): # Add a space between suras to prevent cross-sura word formation if all_text: all_text += " " # Add all verses from this sura verses = sura_info['text'] all_text += " ".join(verses) # Clean up the text: strip diacritics, remove any special characters, etc. clean_text = strip_diacritics(all_text) clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace()) # Perform ELS search with the gematria_sum as the step result = None if clean_text: # Remove spaces for ELS search text_no_spaces = clean_text.replace(" ", "") # Track character positions to their original sura/verse char_map = [] # List of (sura_id, verse_id) for each character # Build character position mapping current_pos = 0 for sura_id, sura_info in sorted(quran_data.items()): sura_name = sura_info['name'] verses = sura_info['text'] for verse_idx, verse in enumerate(verses, 1): cleaned_verse = strip_diacritics(verse).replace(" ", "") for _ in cleaned_verse: if current_pos < len(text_no_spaces): char_map.append((sura_id, sura_name, verse_idx)) current_pos += 1 # Start positions to try (we'll try the first 100 positions for better coverage) for start_pos in range(min(100, len(text_no_spaces))): # Extract characters at positions: start_pos, start_pos+step, start_pos+2*step, etc. extracted = "" positions = [] pos = start_pos # Extract up to 7 characters (typical ELS result length) for _ in range(7): if pos < len(text_no_spaces): extracted += text_no_spaces[pos] positions.append(pos) pos += gematria_sum else: break if len(extracted) >= 3: # At least 3 characters # Look up the sura/verse for the first and last character first_pos = positions[0] last_pos = positions[-1] if first_pos < len(char_map) and last_pos < len(char_map): first_loc = char_map[first_pos] last_loc = char_map[last_pos] result = { "result_text": extracted, "source": "Quran", "start_position": start_pos, "step": gematria_sum, "start_sura": first_loc[0], "start_sura_name": first_loc[1], "start_verse": first_loc[2], "end_sura": last_loc[0], "end_sura_name": last_loc[1], "end_verse": last_loc[2], "positions": positions } break # Found a result, stop searching else: logger.warning(f"Character position mapping inconsistency: {first_pos}, {last_pos} vs {len(char_map)}") continue # Cache the result if result: try: with sqlite3.connect(cache_file) as conn: cursor = conn.cursor() # Make sure the table exists cursor.execute(''' CREATE TABLE IF NOT EXISTS els_cache ( query_hash TEXT PRIMARY KEY, function_name TEXT, args TEXT, kwargs TEXT, results TEXT ) ''') cursor.execute( "INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)", (hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran", json.dumps([gematria_sum]), json.dumps({"tlang": tlang}), json.dumps(result))) conn.commit() logger.debug("Cached Quran ELS results in database.") except sqlite3.Error as e: logger.error(f"Database error caching results: {e}") logger.debug(f"Exiting get_first_els_result_quran, returning: {result}") return result def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1): """ Initializes the abjad database with Quran verses. This function processes all Quran JSON files and adds their gematria values to the database. Args: db_file: The SQLite database file to use max_phrase_length: Maximum phrase length to process """ from gematria import calculate_gematria, strip_diacritics from tqdm import tqdm # Import tqdm for progress bars logger.info(f"Initializing Quran database: {db_file}") # Create the database if it doesn't exist with sqlite3.connect(db_file) as conn: cursor = conn.cursor() # Create results table cursor.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, translation TEXT, book TEXT, chapter INTEGER, verse INTEGER, phrase_length INTEGER, word_position TEXT, PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) ) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_results_gematria ON results (gematria_sum) ''') # Create processed_books table to track processing cursor.execute(''' CREATE TABLE IF NOT EXISTS processed_books ( book TEXT PRIMARY KEY, max_phrase_length INTEGER ) ''') conn.commit() # Process all Quran files sura_count = get_sura_count() logger.info(f"Found {sura_count} suras to process") # Global counter for word position tracking total_word_count = 0 book_names = {} with sqlite3.connect(db_file) as conn: cursor = conn.cursor() # Process each sura (book) for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"): # Load sura data sura_data = process_quran_files(sura_id, sura_id) if sura_id in sura_data: sura_info = sura_data[sura_id] sura_name = sura_info['name'] book_names[sura_id] = sura_name # Check if this sura has already been processed cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,)) result = cursor.fetchone() if result and result[0] >= max_phrase_length: logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}") continue verses = sura_info['text'] phrases_to_insert = [] for verse_idx, verse_text in enumerate(verses, 1): # Split verse into words words = verse_text.split() # Process phrases of different lengths for length in range(1, max_phrase_length + 1): for start in range(len(words) - length + 1): phrase = " ".join(words[start:start + length]) cleaned_phrase = strip_diacritics(phrase) gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", "")) # Calculate word position range word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}" # Add to batch insert list phrases_to_insert.append( (gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range) ) # Update total word count after processing each verse total_word_count += len(words) # If we have phrases to insert, do a batch insert if phrases_to_insert: try: cursor.executemany(''' INSERT OR IGNORE INTO results (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', phrases_to_insert) # Update processed_books after processing each book cursor.execute(''' INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?) ''', (sura_name, max_phrase_length)) conn.commit() except sqlite3.Error as e: logger.error(f"Database error: {e} for sura {sura_id}") else: logger.warning(f"Sura {sura_id} not found in processed data") logger.info("Quran database initialization completed successfully")