daily_psalm / quran.py
neuralworm's picture
simple calculation for Sura
5d23d59
import json
import os
import logging
import sqlite3
from typing import Dict, List, Any
logger = logging.getLogger(__name__)
def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
"""
Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.
Args:
start: The starting sura ID (inclusive).
end: The ending sura ID (inclusive).
Returns:
A dictionary where keys are sura IDs and values are dictionaries
containing 'name' and 'text' fields.
"""
base_path = "texts/quran"
results = {}
for i in range(start, end + 1):
file_name = f"{base_path}/{i:03d}.json"
try:
with open(file_name, 'r', encoding='utf-8') as file:
data = json.load(file)
if data:
# Extract name and verses
name = data.get("name", "No title")
verses = data.get("verse", {})
text = [verses[key] for key in sorted(verses.keys())]
# Store sura ID as key and sura data as value
results[i] = {"name": name, "text": text}
except FileNotFoundError:
logger.warning(f"File {file_name} not found.")
except json.JSONDecodeError as e:
logger.warning(f"File {file_name} could not be read as JSON: {e}")
except KeyError as e:
logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")
return results
def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
"""
Finds the shortest Quran sura verse in abjad.db.
Args:
gematria_sum: The gematria value to search for
db_file: The database file to search in
Returns:
A dictionary containing the matched verse information or None if no match is found
"""
logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# First check if there are any Quran entries in the database
cursor.execute('''
SELECT COUNT(*)
FROM results
WHERE book != 'Psalms'
''')
count = cursor.fetchone()[0]
if count == 0:
logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
return None
# Search for a match, prioritizing shorter phrases
cursor.execute('''
SELECT words, book, chapter, verse, phrase_length, word_position
FROM results
WHERE gematria_sum = ? AND book != 'Psalms'
ORDER BY phrase_length ASC, LENGTH(words) ASC
LIMIT 1
''', (gematria_sum,))
result = cursor.fetchone()
if result:
logger.debug(f"Shortest sura match found: {result}")
return {
"words": result[0],
"book": result[1],
"chapter": result[2],
"verse": result[3],
"phrase_length": result[4],
"word_position": result[5] if len(result) > 5 else None
}
# If no exact match, try to find the closest match
# This is similar to how quran_network handles it
cursor.execute('''
SELECT gematria_sum, ABS(gematria_sum - ?) as diff
FROM results
WHERE book != 'Psalms'
GROUP BY gematria_sum
ORDER BY diff ASC
LIMIT 1
''', (gematria_sum,))
closest = cursor.fetchone()
if closest:
closest_gematria = closest[0]
logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")
# Find the shortest verse with this gematria
cursor.execute('''
SELECT words, book, chapter, verse, phrase_length, word_position
FROM results
WHERE gematria_sum = ? AND book != 'Psalms'
ORDER BY phrase_length ASC, LENGTH(words) ASC
LIMIT 1
''', (closest_gematria,))
result = cursor.fetchone()
if result:
logger.debug(f"Closest sura match found: {result}")
return {
"words": result[0],
"book": result[1],
"chapter": result[2],
"verse": result[3],
"phrase_length": result[4],
"word_position": result[5] if len(result) > 5 else None
}
logger.debug("No matching sura found.")
return None
def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
"""Creates an iframe HTML string for displaying a Quran verse."""
logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")
# Use surahquran.com URL format
url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
logger.debug(f"Generated iframe: {iframe}")
return iframe
def get_sura_count() -> int:
"""Returns the total number of suras in the Quran."""
base_path = "texts/quran"
# Count the number of JSON files in the quran directory
try:
files = [f for f in os.listdir(base_path) if f.endswith('.json')]
return len(files)
except FileNotFoundError:
logger.error(f"Directory {base_path} not found.")
return 114 # Default number of suras in the Quran
def get_first_els_result_quran(gematria_sum: int, tlang: str = "en") -> Dict[str, Any]:
"""
Gets the first ELS result from the Quran using the gematria sum as the step.
Args:
gematria_sum: The gematria value to use as the ELS step
tlang: Target language for results
Returns:
The first ELS result found or None
"""
import hashlib
import json
from gematria import strip_diacritics
logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}")
# Create a cache key
cache_key = f"quran_els_{gematria_sum}_{tlang}"
cache_file = "els_cache.db"
# Check cache first
try:
with sqlite3.connect(cache_file) as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT results FROM els_cache WHERE query_hash = ?",
(hashlib.sha256(cache_key.encode()).hexdigest(),))
result = cursor.fetchone()
if result:
logger.info(f"Cache hit for Quran ELS query: {cache_key}")
return json.loads(result[0])
except sqlite3.Error as e:
logger.error(f"Database error checking cache: {e}")
# Cache miss, perform ELS search
logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")
# Load all Quran text
sura_count = get_sura_count()
quran_data = process_quran_files(1, sura_count)
# Concatenate all verses from all suras into a single text
all_text = ""
for sura_id, sura_info in sorted(quran_data.items()):
# Add a space between suras to prevent cross-sura word formation
if all_text:
all_text += " "
# Add all verses from this sura
verses = sura_info['text']
all_text += " ".join(verses)
# Clean up the text: strip diacritics, remove any special characters, etc.
clean_text = strip_diacritics(all_text)
clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())
# Perform ELS search with the gematria_sum as the step
result = None
if clean_text:
# Remove spaces for ELS search
text_no_spaces = clean_text.replace(" ", "")
# Track character positions to their original sura/verse
char_map = [] # List of (sura_id, verse_id) for each character
# Build character position mapping
current_pos = 0
for sura_id, sura_info in sorted(quran_data.items()):
sura_name = sura_info['name']
verses = sura_info['text']
for verse_idx, verse in enumerate(verses, 1):
cleaned_verse = strip_diacritics(verse).replace(" ", "")
for _ in cleaned_verse:
if current_pos < len(text_no_spaces):
char_map.append((sura_id, sura_name, verse_idx))
current_pos += 1
# Start positions to try (we'll try the first 100 positions for better coverage)
for start_pos in range(min(100, len(text_no_spaces))):
# Extract characters at positions: start_pos, start_pos+step, start_pos+2*step, etc.
extracted = ""
positions = []
pos = start_pos
# Extract up to 7 characters (typical ELS result length)
for _ in range(7):
if pos < len(text_no_spaces):
extracted += text_no_spaces[pos]
positions.append(pos)
pos += gematria_sum
else:
break
if len(extracted) >= 3: # At least 3 characters
# Look up the sura/verse for the first and last character
first_pos = positions[0]
last_pos = positions[-1]
if first_pos < len(char_map) and last_pos < len(char_map):
first_loc = char_map[first_pos]
last_loc = char_map[last_pos]
result = {
"result_text": extracted,
"source": "Quran",
"start_position": start_pos,
"step": gematria_sum,
"start_sura": first_loc[0],
"start_sura_name": first_loc[1],
"start_verse": first_loc[2],
"end_sura": last_loc[0],
"end_sura_name": last_loc[1],
"end_verse": last_loc[2],
"positions": positions
}
break # Found a result, stop searching
else:
logger.warning(f"Character position mapping inconsistency: {first_pos}, {last_pos} vs {len(char_map)}")
continue
# Cache the result
if result:
try:
with sqlite3.connect(cache_file) as conn:
cursor = conn.cursor()
# Make sure the table exists
cursor.execute('''
CREATE TABLE IF NOT EXISTS els_cache (
query_hash TEXT PRIMARY KEY,
function_name TEXT,
args TEXT,
kwargs TEXT,
results TEXT
)
''')
cursor.execute(
"INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
(hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran",
json.dumps([gematria_sum]), json.dumps({"tlang": tlang}), json.dumps(result)))
conn.commit()
logger.debug("Cached Quran ELS results in database.")
except sqlite3.Error as e:
logger.error(f"Database error caching results: {e}")
logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
return result
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
"""
Initializes the abjad database with Quran verses.
This function processes all Quran JSON files and adds their gematria values to the database.
Args:
db_file: The SQLite database file to use
max_phrase_length: Maximum phrase length to process
"""
from gematria import calculate_gematria, strip_diacritics
from tqdm import tqdm # Import tqdm for progress bars
logger.info(f"Initializing Quran database: {db_file}")
# Create the database if it doesn't exist
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# Create results table
cursor.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
translation TEXT,
book TEXT,
chapter INTEGER,
verse INTEGER,
phrase_length INTEGER,
word_position TEXT,
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_results_gematria
ON results (gematria_sum)
''')
# Create processed_books table to track processing
cursor.execute('''
CREATE TABLE IF NOT EXISTS processed_books (
book TEXT PRIMARY KEY,
max_phrase_length INTEGER
)
''')
conn.commit()
# Process all Quran files
sura_count = get_sura_count()
logger.info(f"Found {sura_count} suras to process")
# Global counter for word position tracking
total_word_count = 0
book_names = {}
with sqlite3.connect(db_file) as conn:
cursor = conn.cursor()
# Process each sura (book)
for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
# Load sura data
sura_data = process_quran_files(sura_id, sura_id)
if sura_id in sura_data:
sura_info = sura_data[sura_id]
sura_name = sura_info['name']
book_names[sura_id] = sura_name
# Check if this sura has already been processed
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
result = cursor.fetchone()
if result and result[0] >= max_phrase_length:
logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
continue
verses = sura_info['text']
phrases_to_insert = []
for verse_idx, verse_text in enumerate(verses, 1):
# Split verse into words
words = verse_text.split()
# Process phrases of different lengths
for length in range(1, max_phrase_length + 1):
for start in range(len(words) - length + 1):
phrase = " ".join(words[start:start + length])
cleaned_phrase = strip_diacritics(phrase)
gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
# Calculate word position range
word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
# Add to batch insert list
phrases_to_insert.append(
(gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
)
# Update total word count after processing each verse
total_word_count += len(words)
# If we have phrases to insert, do a batch insert
if phrases_to_insert:
try:
cursor.executemany('''
INSERT OR IGNORE INTO results
(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', phrases_to_insert)
# Update processed_books after processing each book
cursor.execute('''
INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
VALUES (?, ?)
''', (sura_name, max_phrase_length))
conn.commit()
except sqlite3.Error as e:
logger.error(f"Database error: {e} for sura {sura_id}")
else:
logger.warning(f"Sura {sura_id} not found in processed data")
logger.info("Quran database initialization completed successfully")