Spaces:

neuralworm
/

daily_psalm

Running

App Files Files Community

daily_psalm / quran.py

neuralworm

rounds combination

9063a8e 26 days ago

raw

history blame

19 kB

	import json
	import os
	import logging
	import sqlite3
	from typing import Dict, List, Any

	logger = logging.getLogger(__name__)

	def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
	"""
	Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.

	Args:
	start: The starting sura ID (inclusive).
	end: The ending sura ID (inclusive).

	Returns:
	A dictionary where keys are sura IDs and values are dictionaries
	containing 'name' and 'text' fields.
	"""
	base_path = "texts/quran"
	results = {}

	for i in range(start, end + 1):
	file_name = f"{base_path}/{i:03d}.json"
	try:
	with open(file_name, 'r', encoding='utf-8') as file:
	data = json.load(file)
	if data:
	# Extract name and verses
	name = data.get("name", "No title")
	verses = data.get("verse", {})
	text = [verses[key] for key in sorted(verses.keys())]

	# Store sura ID as key and sura data as value
	results[i] = {"name": name, "text": text}

	except FileNotFoundError:
	logger.warning(f"File {file_name} not found.")
	except json.JSONDecodeError as e:
	logger.warning(f"File {file_name} could not be read as JSON: {e}")
	except KeyError as e:
	logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")

	return results

	def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
	"""
	Finds the shortest Quran sura verse in abjad.db.

	Args:
	gematria_sum: The gematria value to search for
	db_file: The database file to search in

	Returns:
	A dictionary containing the matched verse information or None if no match is found
	"""
	logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")

	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()

	# First check if there are any Quran entries in the database
	cursor.execute('''
	SELECT COUNT(*)
	FROM results
	WHERE book != 'Psalms'
	''')

	count = cursor.fetchone()[0]
	if count == 0:
	logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
	return None

	# Search for a match, prioritizing shorter phrases
	cursor.execute('''
	SELECT words, book, chapter, verse, phrase_length, word_position
	FROM results
	WHERE gematria_sum = ? AND book != 'Psalms'
	ORDER BY phrase_length ASC, LENGTH(words) ASC
	LIMIT 1
	''', (gematria_sum,))
	result = cursor.fetchone()

	if result:
	logger.debug(f"Shortest sura match found: {result}")
	return {
	"words": result[0],
	"book": result[1],
	"chapter": result[2],
	"verse": result[3],
	"phrase_length": result[4],
	"word_position": result[5] if len(result) > 5 else None
	}

	# If no exact match, try to find the closest match
	# This is similar to how quran_network handles it
	cursor.execute('''
	SELECT gematria_sum, ABS(gematria_sum - ?) as diff
	FROM results
	WHERE book != 'Psalms'
	GROUP BY gematria_sum
	ORDER BY diff ASC
	LIMIT 1
	''', (gematria_sum,))

	closest = cursor.fetchone()
	if closest:
	closest_gematria = closest[0]
	logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")

	# Find the shortest verse with this gematria
	cursor.execute('''
	SELECT words, book, chapter, verse, phrase_length, word_position
	FROM results
	WHERE gematria_sum = ? AND book != 'Psalms'
	ORDER BY phrase_length ASC, LENGTH(words) ASC
	LIMIT 1
	''', (closest_gematria,))

	result = cursor.fetchone()
	if result:
	logger.debug(f"Closest sura match found: {result}")
	return {
	"words": result[0],
	"book": result[1],
	"chapter": result[2],
	"verse": result[3],
	"phrase_length": result[4],
	"word_position": result[5] if len(result) > 5 else None
	}

	logger.debug("No matching sura found.")
	return None

	def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
	"""Creates an iframe HTML string for displaying a Quran verse."""
	logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")

	# Use surahquran.com URL format
	url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
	iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'

	logger.debug(f"Generated iframe: {iframe}")
	return iframe

	def get_sura_count() -> int:
	"""Returns the total number of suras in the Quran."""
	base_path = "texts/quran"

	# Count the number of JSON files in the quran directory
	try:
	files = [f for f in os.listdir(base_path) if f.endswith('.json')]
	return len(files)
	except FileNotFoundError:
	logger.error(f"Directory {base_path} not found.")
	return 114 # Default number of suras in the Quran


	def get_first_els_result_quran(gematria_sum: int, tlang: str = "en", rounds_combination: str = "1,-1") -> Dict[str, Any]:
	"""
	Gets the first ELS result from the Quran using the gematria sum as the step,
	following the same method as Torah ELS: combined +1/-1 rounds.

	For Quran, the implementation specifically:
	1. Takes +1 ELS round from the start of book 1 to the end of book 2
	2. Takes -1 ELS round from the end of book 2 to the start of book 1

	Args:
	gematria_sum: The gematria value to use as the ELS step
	tlang: Target language for results
	rounds_combination: Comma-separated string of round directions, defaults to "1,-1"

	Returns:
	The first ELS result found or None
	"""
	import hashlib
	import json
	import math
	from gematria import strip_diacritics, calculate_gematria

	logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}, rounds_combination: {rounds_combination}")

	# Create a cache key including the rounds_combination
	cache_key = f"quran_els_{gematria_sum}_{tlang}_{rounds_combination}"
	cache_file = "els_cache.db"

	# Check cache first
	try:
	with sqlite3.connect(cache_file) as conn:
	cursor = conn.cursor()
	cursor.execute(
	"SELECT results FROM els_cache WHERE query_hash = ?",
	(hashlib.sha256(cache_key.encode()).hexdigest(),))
	result = cursor.fetchone()
	if result:
	logger.info(f"Cache hit for Quran ELS query: {cache_key}")
	return json.loads(result[0])
	except sqlite3.Error as e:
	logger.error(f"Database error checking cache: {e}")

	# Cache miss, perform ELS search
	logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")

	# Load Quran text for books 1 and 2 only (based on the requirement)
	quran_data = process_quran_files(1, 2) # Only books 1 and 2 as specified

	# Concatenate verses into a single text
	all_text = ""
	sura_verse_map = [] # Track (sura_id, sura_name, verse_idx) for each character

	for sura_id, sura_info in sorted(quran_data.items()):
	sura_name = sura_info['name']
	verses = sura_info['text']

	# Add a space between suras to prevent cross-sura word formation
	if all_text:
	all_text += " "

	# Add all verses from this sura and track the mapping
	sura_start_pos = len(all_text)
	all_text += " ".join(verses)

	# Track character positions to their original sura/verse for later lookup
	current_verse_start = sura_start_pos
	for verse_idx, verse in enumerate(verses, 1):
	for _ in range(len(verse) + (1 if verse_idx < len(verses) else 0)): # Add 1 for space between verses
	sura_verse_map.append((sura_id, sura_name, verse_idx))

	# Clean up the text: strip diacritics, remove special characters
	clean_text = strip_diacritics(all_text)
	clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())

	# Remove spaces for ELS search
	text_no_spaces = clean_text.replace(" ", "")
	text_length = len(text_no_spaces)

	if text_length == 0:
	logger.warning("No text available after cleaning")
	return None

	# Build a more accurate character map without spaces
	char_map = []
	char_idx = 0
	for i, c in enumerate(clean_text):
	if c.isalpha():
	if char_idx < len(sura_verse_map):
	char_map.append(sura_verse_map[i])
	char_idx += 1

	# Parse rounds combination - default is "1,-1"
	rounds_list = list(map(float, rounds_combination.split(',')))

	result = None
	complete_result = ""
	complete_positions = []
	first_position = None
	last_position = None

	# Process each round direction (similar to Torah ELS)
	for round_dir in rounds_list:
	# Determine if this is a forward or backward search
	is_forward = round_dir > 0
	start_index = 0 if is_forward else (text_length - 1)

	# Set step and direction
	step = gematria_sum
	direction = 1 if is_forward else -1

	# Extract ELS characters
	round_text = ""
	positions = []
	pos = start_index

	# Extract up to 10 characters, but we'll use at least 3 for a valid result
	for _ in range(10):
	if 0 <= pos < text_length:
	round_text += text_no_spaces[pos]
	positions.append(pos)
	pos += direction * step
	else:
	break

	if len(round_text) >= 3:
	# Save this round's results
	complete_result += round_text
	complete_positions.extend(positions)

	# Track first and last positions for the overall result
	if first_position is None or (is_forward and positions[0] < first_position):
	first_position = positions[0]
	first_loc = char_map[first_position] if first_position < len(char_map) else None

	if last_position is None or (not is_forward and positions[-1] > last_position):
	last_position = positions[-1]
	last_loc = char_map[last_position] if last_position < len(char_map) else None

	# Create result if we found something
	if complete_result and len(complete_result) >= 3 and first_position is not None and last_position is not None:
	if first_position < len(char_map) and last_position < len(char_map):
	first_loc = char_map[first_position]
	last_loc = char_map[last_position]

	result = {
	"result_text": complete_result,
	"source": "Quran",
	"step": gematria_sum,
	"start_sura": first_loc[0],
	"start_sura_name": first_loc[1],
	"start_verse": first_loc[2],
	"end_sura": last_loc[0],
	"end_sura_name": last_loc[1],
	"end_verse": last_loc[2],
	"positions": complete_positions,
	"rounds_combination": rounds_combination
	}

	# Calculate gematria of the result text
	result["result_sum"] = calculate_gematria(complete_result)

	logger.debug(f"Found ELS result: {complete_result} with gematria {result['result_sum']}")
	else:
	logger.warning(f"Character position mapping inconsistency: {first_position}, {last_position} vs {len(char_map)}")

	# Cache the result
	if result:
	try:
	with sqlite3.connect(cache_file) as conn:
	cursor = conn.cursor()

	# Make sure the table exists
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS els_cache (
	query_hash TEXT PRIMARY KEY,
	function_name TEXT,
	args TEXT,
	kwargs TEXT,
	results TEXT
	)
	''')

	cursor.execute(
	"INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
	(hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran",
	json.dumps([gematria_sum]), json.dumps({"tlang": tlang, "rounds_combination": rounds_combination}), json.dumps(result)))
	conn.commit()
	logger.debug("Cached Quran ELS results in database.")
	except sqlite3.Error as e:
	logger.error(f"Database error caching results: {e}")

	logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
	return result

	def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
	"""
	Initializes the abjad database with Quran verses.
	This function processes all Quran JSON files and adds their gematria values to the database.

	Args:
	db_file: The SQLite database file to use
	max_phrase_length: Maximum phrase length to process
	"""
	from gematria import calculate_gematria, strip_diacritics
	from tqdm import tqdm # Import tqdm for progress bars

	logger.info(f"Initializing Quran database: {db_file}")

	# Create the database if it doesn't exist
	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()
	# Create results table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS results (
	gematria_sum INTEGER,
	words TEXT,
	translation TEXT,
	book TEXT,
	chapter INTEGER,
	verse INTEGER,
	phrase_length INTEGER,
	word_position TEXT,
	PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
	)
	''')

	cursor.execute('''
	CREATE INDEX IF NOT EXISTS idx_results_gematria
	ON results (gematria_sum)
	''')

	# Create processed_books table to track processing
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS processed_books (
	book TEXT PRIMARY KEY,
	max_phrase_length INTEGER
	)
	''')

	conn.commit()

	# Process all Quran files
	sura_count = get_sura_count()
	logger.info(f"Found {sura_count} suras to process")

	# Global counter for word position tracking
	total_word_count = 0
	book_names = {}

	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()

	# Process each sura (book)
	for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
	# Load sura data
	sura_data = process_quran_files(sura_id, sura_id)

	if sura_id in sura_data:
	sura_info = sura_data[sura_id]
	sura_name = sura_info['name']
	book_names[sura_id] = sura_name

	# Check if this sura has already been processed
	cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
	result = cursor.fetchone()
	if result and result[0] >= max_phrase_length:
	logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
	continue

	verses = sura_info['text']
	phrases_to_insert = []

	for verse_idx, verse_text in enumerate(verses, 1):
	# Split verse into words
	words = verse_text.split()

	# Process phrases of different lengths
	for length in range(1, max_phrase_length + 1):
	for start in range(len(words) - length + 1):
	phrase = " ".join(words[start:start + length])
	cleaned_phrase = strip_diacritics(phrase)
	gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))

	# Calculate word position range
	word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"

	# Add to batch insert list
	phrases_to_insert.append(
	(gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
	)

	# Update total word count after processing each verse
	total_word_count += len(words)

	# If we have phrases to insert, do a batch insert
	if phrases_to_insert:
	try:
	cursor.executemany('''
	INSERT OR IGNORE INTO results
	(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
	VALUES (?, ?, ?, ?, ?, ?, ?, ?)
	''', phrases_to_insert)

	# Update processed_books after processing each book
	cursor.execute('''
	INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
	VALUES (?, ?)
	''', (sura_name, max_phrase_length))

	conn.commit()
	except sqlite3.Error as e:
	logger.error(f"Database error: {e} for sura {sura_id}")
	else:
	logger.warning(f"Sura {sura_id} not found in processed data")

	logger.info("Quran database initialization completed successfully")