Spaces:

neuralworm
/

daily_psalm

Running

App Files Files Community

daily_psalm / quran.py

neuralworm

simple calculation for Sura

5d23d59 21 days ago

raw

history blame contribute delete

17.2 kB

	import json
	import os
	import logging
	import sqlite3
	from typing import Dict, List, Any

	logger = logging.getLogger(__name__)

	def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
	"""
	Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.

	Args:
	start: The starting sura ID (inclusive).
	end: The ending sura ID (inclusive).

	Returns:
	A dictionary where keys are sura IDs and values are dictionaries
	containing 'name' and 'text' fields.
	"""
	base_path = "texts/quran"
	results = {}

	for i in range(start, end + 1):
	file_name = f"{base_path}/{i:03d}.json"
	try:
	with open(file_name, 'r', encoding='utf-8') as file:
	data = json.load(file)
	if data:
	# Extract name and verses
	name = data.get("name", "No title")
	verses = data.get("verse", {})
	text = [verses[key] for key in sorted(verses.keys())]

	# Store sura ID as key and sura data as value
	results[i] = {"name": name, "text": text}

	except FileNotFoundError:
	logger.warning(f"File {file_name} not found.")
	except json.JSONDecodeError as e:
	logger.warning(f"File {file_name} could not be read as JSON: {e}")
	except KeyError as e:
	logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")

	return results

	def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
	"""
	Finds the shortest Quran sura verse in abjad.db.

	Args:
	gematria_sum: The gematria value to search for
	db_file: The database file to search in

	Returns:
	A dictionary containing the matched verse information or None if no match is found
	"""
	logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")

	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()

	# First check if there are any Quran entries in the database
	cursor.execute('''
	SELECT COUNT(*)
	FROM results
	WHERE book != 'Psalms'
	''')

	count = cursor.fetchone()[0]
	if count == 0:
	logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
	return None

	# Search for a match, prioritizing shorter phrases
	cursor.execute('''
	SELECT words, book, chapter, verse, phrase_length, word_position
	FROM results
	WHERE gematria_sum = ? AND book != 'Psalms'
	ORDER BY phrase_length ASC, LENGTH(words) ASC
	LIMIT 1
	''', (gematria_sum,))
	result = cursor.fetchone()

	if result:
	logger.debug(f"Shortest sura match found: {result}")
	return {
	"words": result[0],
	"book": result[1],
	"chapter": result[2],
	"verse": result[3],
	"phrase_length": result[4],
	"word_position": result[5] if len(result) > 5 else None
	}

	# If no exact match, try to find the closest match
	# This is similar to how quran_network handles it
	cursor.execute('''
	SELECT gematria_sum, ABS(gematria_sum - ?) as diff
	FROM results
	WHERE book != 'Psalms'
	GROUP BY gematria_sum
	ORDER BY diff ASC
	LIMIT 1
	''', (gematria_sum,))

	closest = cursor.fetchone()
	if closest:
	closest_gematria = closest[0]
	logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")

	# Find the shortest verse with this gematria
	cursor.execute('''
	SELECT words, book, chapter, verse, phrase_length, word_position
	FROM results
	WHERE gematria_sum = ? AND book != 'Psalms'
	ORDER BY phrase_length ASC, LENGTH(words) ASC
	LIMIT 1
	''', (closest_gematria,))

	result = cursor.fetchone()
	if result:
	logger.debug(f"Closest sura match found: {result}")
	return {
	"words": result[0],
	"book": result[1],
	"chapter": result[2],
	"verse": result[3],
	"phrase_length": result[4],
	"word_position": result[5] if len(result) > 5 else None
	}

	logger.debug("No matching sura found.")
	return None

	def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
	"""Creates an iframe HTML string for displaying a Quran verse."""
	logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")

	# Use surahquran.com URL format
	url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
	iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'

	logger.debug(f"Generated iframe: {iframe}")
	return iframe

	def get_sura_count() -> int:
	"""Returns the total number of suras in the Quran."""
	base_path = "texts/quran"

	# Count the number of JSON files in the quran directory
	try:
	files = [f for f in os.listdir(base_path) if f.endswith('.json')]
	return len(files)
	except FileNotFoundError:
	logger.error(f"Directory {base_path} not found.")
	return 114 # Default number of suras in the Quran


	def get_first_els_result_quran(gematria_sum: int, tlang: str = "en") -> Dict[str, Any]:
	"""
	Gets the first ELS result from the Quran using the gematria sum as the step.

	Args:
	gematria_sum: The gematria value to use as the ELS step
	tlang: Target language for results

	Returns:
	The first ELS result found or None
	"""
	import hashlib
	import json
	from gematria import strip_diacritics

	logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}")

	# Create a cache key
	cache_key = f"quran_els_{gematria_sum}_{tlang}"
	cache_file = "els_cache.db"

	# Check cache first
	try:
	with sqlite3.connect(cache_file) as conn:
	cursor = conn.cursor()
	cursor.execute(
	"SELECT results FROM els_cache WHERE query_hash = ?",
	(hashlib.sha256(cache_key.encode()).hexdigest(),))
	result = cursor.fetchone()
	if result:
	logger.info(f"Cache hit for Quran ELS query: {cache_key}")
	return json.loads(result[0])
	except sqlite3.Error as e:
	logger.error(f"Database error checking cache: {e}")

	# Cache miss, perform ELS search
	logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")

	# Load all Quran text
	sura_count = get_sura_count()
	quran_data = process_quran_files(1, sura_count)

	# Concatenate all verses from all suras into a single text
	all_text = ""
	for sura_id, sura_info in sorted(quran_data.items()):
	# Add a space between suras to prevent cross-sura word formation
	if all_text:
	all_text += " "

	# Add all verses from this sura
	verses = sura_info['text']
	all_text += " ".join(verses)

	# Clean up the text: strip diacritics, remove any special characters, etc.
	clean_text = strip_diacritics(all_text)
	clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())

	# Perform ELS search with the gematria_sum as the step
	result = None
	if clean_text:
	# Remove spaces for ELS search
	text_no_spaces = clean_text.replace(" ", "")

	# Track character positions to their original sura/verse
	char_map = [] # List of (sura_id, verse_id) for each character

	# Build character position mapping
	current_pos = 0
	for sura_id, sura_info in sorted(quran_data.items()):
	sura_name = sura_info['name']
	verses = sura_info['text']

	for verse_idx, verse in enumerate(verses, 1):
	cleaned_verse = strip_diacritics(verse).replace(" ", "")
	for _ in cleaned_verse:
	if current_pos < len(text_no_spaces):
	char_map.append((sura_id, sura_name, verse_idx))
	current_pos += 1

	# Start positions to try (we'll try the first 100 positions for better coverage)
	for start_pos in range(min(100, len(text_no_spaces))):
	# Extract characters at positions: start_pos, start_pos+step, start_pos+2*step, etc.
	extracted = ""
	positions = []
	pos = start_pos

	# Extract up to 7 characters (typical ELS result length)
	for _ in range(7):
	if pos < len(text_no_spaces):
	extracted += text_no_spaces[pos]
	positions.append(pos)
	pos += gematria_sum
	else:
	break

	if len(extracted) >= 3: # At least 3 characters
	# Look up the sura/verse for the first and last character
	first_pos = positions[0]
	last_pos = positions[-1]

	if first_pos < len(char_map) and last_pos < len(char_map):
	first_loc = char_map[first_pos]
	last_loc = char_map[last_pos]

	result = {
	"result_text": extracted,
	"source": "Quran",
	"start_position": start_pos,
	"step": gematria_sum,
	"start_sura": first_loc[0],
	"start_sura_name": first_loc[1],
	"start_verse": first_loc[2],
	"end_sura": last_loc[0],
	"end_sura_name": last_loc[1],
	"end_verse": last_loc[2],
	"positions": positions
	}
	break # Found a result, stop searching
	else:
	logger.warning(f"Character position mapping inconsistency: {first_pos}, {last_pos} vs {len(char_map)}")
	continue

	# Cache the result
	if result:
	try:
	with sqlite3.connect(cache_file) as conn:
	cursor = conn.cursor()

	# Make sure the table exists
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS els_cache (
	query_hash TEXT PRIMARY KEY,
	function_name TEXT,
	args TEXT,
	kwargs TEXT,
	results TEXT
	)
	''')

	cursor.execute(
	"INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
	(hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran",
	json.dumps([gematria_sum]), json.dumps({"tlang": tlang}), json.dumps(result)))
	conn.commit()
	logger.debug("Cached Quran ELS results in database.")
	except sqlite3.Error as e:
	logger.error(f"Database error caching results: {e}")

	logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
	return result

	def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
	"""
	Initializes the abjad database with Quran verses.
	This function processes all Quran JSON files and adds their gematria values to the database.

	Args:
	db_file: The SQLite database file to use
	max_phrase_length: Maximum phrase length to process
	"""
	from gematria import calculate_gematria, strip_diacritics
	from tqdm import tqdm # Import tqdm for progress bars

	logger.info(f"Initializing Quran database: {db_file}")

	# Create the database if it doesn't exist
	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()
	# Create results table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS results (
	gematria_sum INTEGER,
	words TEXT,
	translation TEXT,
	book TEXT,
	chapter INTEGER,
	verse INTEGER,
	phrase_length INTEGER,
	word_position TEXT,
	PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
	)
	''')

	cursor.execute('''
	CREATE INDEX IF NOT EXISTS idx_results_gematria
	ON results (gematria_sum)
	''')

	# Create processed_books table to track processing
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS processed_books (
	book TEXT PRIMARY KEY,
	max_phrase_length INTEGER
	)
	''')

	conn.commit()

	# Process all Quran files
	sura_count = get_sura_count()
	logger.info(f"Found {sura_count} suras to process")

	# Global counter for word position tracking
	total_word_count = 0
	book_names = {}

	with sqlite3.connect(db_file) as conn:
	cursor = conn.cursor()

	# Process each sura (book)
	for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
	# Load sura data
	sura_data = process_quran_files(sura_id, sura_id)

	if sura_id in sura_data:
	sura_info = sura_data[sura_id]
	sura_name = sura_info['name']
	book_names[sura_id] = sura_name

	# Check if this sura has already been processed
	cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
	result = cursor.fetchone()
	if result and result[0] >= max_phrase_length:
	logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
	continue

	verses = sura_info['text']
	phrases_to_insert = []

	for verse_idx, verse_text in enumerate(verses, 1):
	# Split verse into words
	words = verse_text.split()

	# Process phrases of different lengths
	for length in range(1, max_phrase_length + 1):
	for start in range(len(words) - length + 1):
	phrase = " ".join(words[start:start + length])
	cleaned_phrase = strip_diacritics(phrase)
	gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))

	# Calculate word position range
	word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"

	# Add to batch insert list
	phrases_to_insert.append(
	(gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
	)

	# Update total word count after processing each verse
	total_word_count += len(words)

	# If we have phrases to insert, do a batch insert
	if phrases_to_insert:
	try:
	cursor.executemany('''
	INSERT OR IGNORE INTO results
	(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
	VALUES (?, ?, ?, ?, ?, ?, ?, ?)
	''', phrases_to_insert)

	# Update processed_books after processing each book
	cursor.execute('''
	INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
	VALUES (?, ?)
	''', (sura_name, max_phrase_length))

	conn.commit()
	except sqlite3.Error as e:
	logger.error(f"Database error: {e} for sura {sura_id}")
	else:
	logger.warning(f"Sura {sura_id} not found in processed data")

	logger.info("Quran database initialization completed successfully")