daily_psalm / quran.py
neuralworm's picture
rounds combination
9063a8e
raw
history blame
19 kB
import json
import os
import logging
import sqlite3
from typing import Dict, List, Any
logger = logging.getLogger(__name__)
def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
"""
Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.
Args:
start: The starting sura ID (inclusive).
end: The ending sura ID (inclusive).
Returns:
A dictionary where keys are sura IDs and values are dictionaries
containing 'name' and 'text' fields.
"""
base_path = "texts/quran"
results = {}
for i in range(start, end + 1):
file_name = f"{base_path}/{i:03d}.json"
try:
with open(file_name, 'r', encoding='utf-8') as file:
data = json.load(file)
if data:
# Extract name and verses
name = data.get("name", "No title")
verses = data.get("verse", {})
text = [verses[key] for key in sorted(verses.keys())]
# Store sura ID as key and sura data as value
results[i] = {"name": name, "text": text}
except FileNotFoundError:
logger.warning(f"File {file_name} not found.")
except json.JSONDecodeError as e:
logger.warning(f"File {file_name} could not be read as JSON: {e}")
except KeyError as e:
logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")
return results
def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
"""
Finds the shortest Quran sura verse in abjad.db.
Args:
gematria_sum: The gematria value to search for
db_file: The database file to search in
Returns:
A dictionary containing the matched verse information or None if no match is found
"""
logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# First check if there are any Quran entries in the database
cursor.execute('''
SELECT COUNT(*)
FROM results
WHERE book != 'Psalms'
''')
count = cursor.fetchone()[0]
if count == 0:
logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
return None
# Search for a match, prioritizing shorter phrases
cursor.execute('''
SELECT words, book, chapter, verse, phrase_length, word_position
FROM results
WHERE gematria_sum = ? AND book != 'Psalms'
ORDER BY phrase_length ASC, LENGTH(words) ASC
LIMIT 1
''', (gematria_sum,))
result = cursor.fetchone()
if result:
logger.debug(f"Shortest sura match found: {result}")
return {
"words": result[0],
"book": result[1],
"chapter": result[2],
"verse": result[3],
"phrase_length": result[4],
"word_position": result[5] if len(result) > 5 else None
}
# If no exact match, try to find the closest match
# This is similar to how quran_network handles it
cursor.execute('''
SELECT gematria_sum, ABS(gematria_sum - ?) as diff
FROM results
WHERE book != 'Psalms'
GROUP BY gematria_sum
ORDER BY diff ASC
LIMIT 1
''', (gematria_sum,))
closest = cursor.fetchone()
if closest:
closest_gematria = closest[0]
logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")
# Find the shortest verse with this gematria
cursor.execute('''
SELECT words, book, chapter, verse, phrase_length, word_position
FROM results
WHERE gematria_sum = ? AND book != 'Psalms'
ORDER BY phrase_length ASC, LENGTH(words) ASC
LIMIT 1
''', (closest_gematria,))
result = cursor.fetchone()
if result:
logger.debug(f"Closest sura match found: {result}")
return {
"words": result[0],
"book": result[1],
"chapter": result[2],
"verse": result[3],
"phrase_length": result[4],
"word_position": result[5] if len(result) > 5 else None
}
logger.debug("No matching sura found.")
return None
def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
"""Creates an iframe HTML string for displaying a Quran verse."""
logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")
# Use surahquran.com URL format
url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
logger.debug(f"Generated iframe: {iframe}")
return iframe
def get_sura_count() -> int:
"""Returns the total number of suras in the Quran."""
base_path = "texts/quran"
# Count the number of JSON files in the quran directory
try:
files = [f for f in os.listdir(base_path) if f.endswith('.json')]
return len(files)
except FileNotFoundError:
logger.error(f"Directory {base_path} not found.")
return 114 # Default number of suras in the Quran
def get_first_els_result_quran(gematria_sum: int, tlang: str = "en", rounds_combination: str = "1,-1") -> Dict[str, Any]:
"""
Gets the first ELS result from the Quran using the gematria sum as the step,
following the same method as Torah ELS: combined +1/-1 rounds.
For Quran, the implementation specifically:
1. Takes +1 ELS round from the start of book 1 to the end of book 2
2. Takes -1 ELS round from the end of book 2 to the start of book 1
Args:
gematria_sum: The gematria value to use as the ELS step
tlang: Target language for results
rounds_combination: Comma-separated string of round directions, defaults to "1,-1"
Returns:
The first ELS result found or None
"""
import hashlib
import json
import math
from gematria import strip_diacritics, calculate_gematria
logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}, rounds_combination: {rounds_combination}")
# Create a cache key including the rounds_combination
cache_key = f"quran_els_{gematria_sum}_{tlang}_{rounds_combination}"
cache_file = "els_cache.db"
# Check cache first
try:
with sqlite3.connect(cache_file) as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT results FROM els_cache WHERE query_hash = ?",
(hashlib.sha256(cache_key.encode()).hexdigest(),))
result = cursor.fetchone()
if result:
logger.info(f"Cache hit for Quran ELS query: {cache_key}")
return json.loads(result[0])
except sqlite3.Error as e:
logger.error(f"Database error checking cache: {e}")
# Cache miss, perform ELS search
logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")
# Load Quran text for books 1 and 2 only (based on the requirement)
quran_data = process_quran_files(1, 2) # Only books 1 and 2 as specified
# Concatenate verses into a single text
all_text = ""
sura_verse_map = [] # Track (sura_id, sura_name, verse_idx) for each character
for sura_id, sura_info in sorted(quran_data.items()):
sura_name = sura_info['name']
verses = sura_info['text']
# Add a space between suras to prevent cross-sura word formation
if all_text:
all_text += " "
# Add all verses from this sura and track the mapping
sura_start_pos = len(all_text)
all_text += " ".join(verses)
# Track character positions to their original sura/verse for later lookup
current_verse_start = sura_start_pos
for verse_idx, verse in enumerate(verses, 1):
for _ in range(len(verse) + (1 if verse_idx < len(verses) else 0)): # Add 1 for space between verses
sura_verse_map.append((sura_id, sura_name, verse_idx))
# Clean up the text: strip diacritics, remove special characters
clean_text = strip_diacritics(all_text)
clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())
# Remove spaces for ELS search
text_no_spaces = clean_text.replace(" ", "")
text_length = len(text_no_spaces)
if text_length == 0:
logger.warning("No text available after cleaning")
return None
# Build a more accurate character map without spaces
char_map = []
char_idx = 0
for i, c in enumerate(clean_text):
if c.isalpha():
if char_idx < len(sura_verse_map):
char_map.append(sura_verse_map[i])
char_idx += 1
# Parse rounds combination - default is "1,-1"
rounds_list = list(map(float, rounds_combination.split(',')))
result = None
complete_result = ""
complete_positions = []
first_position = None
last_position = None
# Process each round direction (similar to Torah ELS)
for round_dir in rounds_list:
# Determine if this is a forward or backward search
is_forward = round_dir > 0
start_index = 0 if is_forward else (text_length - 1)
# Set step and direction
step = gematria_sum
direction = 1 if is_forward else -1
# Extract ELS characters
round_text = ""
positions = []
pos = start_index
# Extract up to 10 characters, but we'll use at least 3 for a valid result
for _ in range(10):
if 0 <= pos < text_length:
round_text += text_no_spaces[pos]
positions.append(pos)
pos += direction * step
else:
break
if len(round_text) >= 3:
# Save this round's results
complete_result += round_text
complete_positions.extend(positions)
# Track first and last positions for the overall result
if first_position is None or (is_forward and positions[0] < first_position):
first_position = positions[0]
first_loc = char_map[first_position] if first_position < len(char_map) else None
if last_position is None or (not is_forward and positions[-1] > last_position):
last_position = positions[-1]
last_loc = char_map[last_position] if last_position < len(char_map) else None
# Create result if we found something
if complete_result and len(complete_result) >= 3 and first_position is not None and last_position is not None:
if first_position < len(char_map) and last_position < len(char_map):
first_loc = char_map[first_position]
last_loc = char_map[last_position]
result = {
"result_text": complete_result,
"source": "Quran",
"step": gematria_sum,
"start_sura": first_loc[0],
"start_sura_name": first_loc[1],
"start_verse": first_loc[2],
"end_sura": last_loc[0],
"end_sura_name": last_loc[1],
"end_verse": last_loc[2],
"positions": complete_positions,
"rounds_combination": rounds_combination
}
# Calculate gematria of the result text
result["result_sum"] = calculate_gematria(complete_result)
logger.debug(f"Found ELS result: {complete_result} with gematria {result['result_sum']}")
else:
logger.warning(f"Character position mapping inconsistency: {first_position}, {last_position} vs {len(char_map)}")
# Cache the result
if result:
try:
with sqlite3.connect(cache_file) as conn:
cursor = conn.cursor()
# Make sure the table exists
cursor.execute('''
CREATE TABLE IF NOT EXISTS els_cache (
query_hash TEXT PRIMARY KEY,
function_name TEXT,
args TEXT,
kwargs TEXT,
results TEXT
)
''')
cursor.execute(
"INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
(hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran",
json.dumps([gematria_sum]), json.dumps({"tlang": tlang, "rounds_combination": rounds_combination}), json.dumps(result)))
conn.commit()
logger.debug("Cached Quran ELS results in database.")
except sqlite3.Error as e:
logger.error(f"Database error caching results: {e}")
logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
return result
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
"""
Initializes the abjad database with Quran verses.
This function processes all Quran JSON files and adds their gematria values to the database.
Args:
db_file: The SQLite database file to use
max_phrase_length: Maximum phrase length to process
"""
from gematria import calculate_gematria, strip_diacritics
from tqdm import tqdm # Import tqdm for progress bars
logger.info(f"Initializing Quran database: {db_file}")
# Create the database if it doesn't exist
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# Create results table
cursor.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
translation TEXT,
book TEXT,
chapter INTEGER,
verse INTEGER,
phrase_length INTEGER,
word_position TEXT,
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_results_gematria
ON results (gematria_sum)
''')
# Create processed_books table to track processing
cursor.execute('''
CREATE TABLE IF NOT EXISTS processed_books (
book TEXT PRIMARY KEY,
max_phrase_length INTEGER
)
''')
conn.commit()
# Process all Quran files
sura_count = get_sura_count()
logger.info(f"Found {sura_count} suras to process")
# Global counter for word position tracking
total_word_count = 0
book_names = {}
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# Process each sura (book)
for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
# Load sura data
sura_data = process_quran_files(sura_id, sura_id)
if sura_id in sura_data:
sura_info = sura_data[sura_id]
sura_name = sura_info['name']
book_names[sura_id] = sura_name
# Check if this sura has already been processed
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
result = cursor.fetchone()
if result and result[0] >= max_phrase_length:
logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
continue
verses = sura_info['text']
phrases_to_insert = []
for verse_idx, verse_text in enumerate(verses, 1):
# Split verse into words
words = verse_text.split()
# Process phrases of different lengths
for length in range(1, max_phrase_length + 1):
for start in range(len(words) - length + 1):
phrase = " ".join(words[start:start + length])
cleaned_phrase = strip_diacritics(phrase)
gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
# Calculate word position range
word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
# Add to batch insert list
phrases_to_insert.append(
(gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
)
# Update total word count after processing each verse
total_word_count += len(words)
# If we have phrases to insert, do a batch insert
if phrases_to_insert:
try:
cursor.executemany('''
INSERT OR IGNORE INTO results
(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', phrases_to_insert)
# Update processed_books after processing each book
cursor.execute('''
INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
VALUES (?, ?)
''', (sura_name, max_phrase_length))
conn.commit()
except sqlite3.Error as e:
logger.error(f"Database error: {e} for sura {sura_id}")
else:
logger.warning(f"Sura {sura_id} not found in processed data")
logger.info("Quran database initialization completed successfully")