Spaces:
Running
Running
import json | |
import os | |
import logging | |
import sqlite3 | |
from typing import Dict, List, Any | |
logger = logging.getLogger(__name__) | |
def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]: | |
""" | |
Processes Quran JSON files and returns a dictionary mapping sura IDs to their data. | |
Args: | |
start: The starting sura ID (inclusive). | |
end: The ending sura ID (inclusive). | |
Returns: | |
A dictionary where keys are sura IDs and values are dictionaries | |
containing 'name' and 'text' fields. | |
""" | |
base_path = "texts/quran" | |
results = {} | |
for i in range(start, end + 1): | |
file_name = f"{base_path}/{i:03d}.json" | |
try: | |
with open(file_name, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
if data: | |
# Extract name and verses | |
name = data.get("name", "No title") | |
verses = data.get("verse", {}) | |
text = [verses[key] for key in sorted(verses.keys())] | |
# Store sura ID as key and sura data as value | |
results[i] = {"name": name, "text": text} | |
except FileNotFoundError: | |
logger.warning(f"File {file_name} not found.") | |
except json.JSONDecodeError as e: | |
logger.warning(f"File {file_name} could not be read as JSON: {e}") | |
except KeyError as e: | |
logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}") | |
return results | |
def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]: | |
""" | |
Finds the shortest Quran sura verse in abjad.db. | |
Args: | |
gematria_sum: The gematria value to search for | |
db_file: The database file to search in | |
Returns: | |
A dictionary containing the matched verse information or None if no match is found | |
""" | |
logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}") | |
with sqlite3.connect(db_file) as conn: | |
cursor = conn.cursor() | |
# First check if there are any Quran entries in the database | |
cursor.execute(''' | |
SELECT COUNT(*) | |
FROM results | |
WHERE book != 'Psalms' | |
''') | |
count = cursor.fetchone()[0] | |
if count == 0: | |
logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.") | |
return None | |
# Search for a match, prioritizing shorter phrases | |
cursor.execute(''' | |
SELECT words, book, chapter, verse, phrase_length, word_position | |
FROM results | |
WHERE gematria_sum = ? AND book != 'Psalms' | |
ORDER BY phrase_length ASC, LENGTH(words) ASC | |
LIMIT 1 | |
''', (gematria_sum,)) | |
result = cursor.fetchone() | |
if result: | |
logger.debug(f"Shortest sura match found: {result}") | |
return { | |
"words": result[0], | |
"book": result[1], | |
"chapter": result[2], | |
"verse": result[3], | |
"phrase_length": result[4], | |
"word_position": result[5] if len(result) > 5 else None | |
} | |
# If no exact match, try to find the closest match | |
# This is similar to how quran_network handles it | |
cursor.execute(''' | |
SELECT gematria_sum, ABS(gematria_sum - ?) as diff | |
FROM results | |
WHERE book != 'Psalms' | |
GROUP BY gematria_sum | |
ORDER BY diff ASC | |
LIMIT 1 | |
''', (gematria_sum,)) | |
closest = cursor.fetchone() | |
if closest: | |
closest_gematria = closest[0] | |
logger.debug(f"No exact match found. Closest gematria: {closest_gematria}") | |
# Find the shortest verse with this gematria | |
cursor.execute(''' | |
SELECT words, book, chapter, verse, phrase_length, word_position | |
FROM results | |
WHERE gematria_sum = ? AND book != 'Psalms' | |
ORDER BY phrase_length ASC, LENGTH(words) ASC | |
LIMIT 1 | |
''', (closest_gematria,)) | |
result = cursor.fetchone() | |
if result: | |
logger.debug(f"Closest sura match found: {result}") | |
return { | |
"words": result[0], | |
"book": result[1], | |
"chapter": result[2], | |
"verse": result[3], | |
"phrase_length": result[4], | |
"word_position": result[5] if len(result) > 5 else None | |
} | |
logger.debug("No matching sura found.") | |
return None | |
def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str: | |
"""Creates an iframe HTML string for displaying a Quran verse.""" | |
logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}") | |
# Use surahquran.com URL format | |
url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html" | |
iframe = f'<iframe src="{url}" width="800" height="600"></iframe>' | |
logger.debug(f"Generated iframe: {iframe}") | |
return iframe | |
def get_sura_count() -> int: | |
"""Returns the total number of suras in the Quran.""" | |
base_path = "texts/quran" | |
# Count the number of JSON files in the quran directory | |
try: | |
files = [f for f in os.listdir(base_path) if f.endswith('.json')] | |
return len(files) | |
except FileNotFoundError: | |
logger.error(f"Directory {base_path} not found.") | |
return 114 # Default number of suras in the Quran | |
def get_first_els_result_quran(gematria_sum: int, tlang: str = "en", rounds_combination: str = "1,-1") -> Dict[str, Any]: | |
""" | |
Gets the first ELS result from the Quran using the gematria sum as the step, | |
following the same method as Torah ELS: combined +1/-1 rounds. | |
For Quran, the implementation specifically: | |
1. Takes +1 ELS round from the start of book 1 to the end of book 2 | |
2. Takes -1 ELS round from the end of book 2 to the start of book 1 | |
Args: | |
gematria_sum: The gematria value to use as the ELS step | |
tlang: Target language for results | |
rounds_combination: Comma-separated string of round directions, defaults to "1,-1" | |
Returns: | |
The first ELS result found or None | |
""" | |
import hashlib | |
import json | |
import math | |
from gematria import strip_diacritics, calculate_gematria | |
logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}, rounds_combination: {rounds_combination}") | |
# Create a cache key including the rounds_combination | |
cache_key = f"quran_els_{gematria_sum}_{tlang}_{rounds_combination}" | |
cache_file = "els_cache.db" | |
# Check cache first | |
try: | |
with sqlite3.connect(cache_file) as conn: | |
cursor = conn.cursor() | |
cursor.execute( | |
"SELECT results FROM els_cache WHERE query_hash = ?", | |
(hashlib.sha256(cache_key.encode()).hexdigest(),)) | |
result = cursor.fetchone() | |
if result: | |
logger.info(f"Cache hit for Quran ELS query: {cache_key}") | |
return json.loads(result[0]) | |
except sqlite3.Error as e: | |
logger.error(f"Database error checking cache: {e}") | |
# Cache miss, perform ELS search | |
logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search") | |
# Load Quran text for books 1 and 2 only (based on the requirement) | |
quran_data = process_quran_files(1, 2) # Only books 1 and 2 as specified | |
# Concatenate verses into a single text | |
all_text = "" | |
sura_verse_map = [] # Track (sura_id, sura_name, verse_idx) for each character | |
for sura_id, sura_info in sorted(quran_data.items()): | |
sura_name = sura_info['name'] | |
verses = sura_info['text'] | |
# Add a space between suras to prevent cross-sura word formation | |
if all_text: | |
all_text += " " | |
# Add all verses from this sura and track the mapping | |
sura_start_pos = len(all_text) | |
all_text += " ".join(verses) | |
# Track character positions to their original sura/verse for later lookup | |
current_verse_start = sura_start_pos | |
for verse_idx, verse in enumerate(verses, 1): | |
for _ in range(len(verse) + (1 if verse_idx < len(verses) else 0)): # Add 1 for space between verses | |
sura_verse_map.append((sura_id, sura_name, verse_idx)) | |
# Clean up the text: strip diacritics, remove special characters | |
clean_text = strip_diacritics(all_text) | |
clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace()) | |
# Remove spaces for ELS search | |
text_no_spaces = clean_text.replace(" ", "") | |
text_length = len(text_no_spaces) | |
if text_length == 0: | |
logger.warning("No text available after cleaning") | |
return None | |
# Build a more accurate character map without spaces | |
char_map = [] | |
char_idx = 0 | |
for i, c in enumerate(clean_text): | |
if c.isalpha(): | |
if char_idx < len(sura_verse_map): | |
char_map.append(sura_verse_map[i]) | |
char_idx += 1 | |
# Parse rounds combination - default is "1,-1" | |
rounds_list = list(map(float, rounds_combination.split(','))) | |
result = None | |
complete_result = "" | |
complete_positions = [] | |
first_position = None | |
last_position = None | |
# Process each round direction (similar to Torah ELS) | |
for round_dir in rounds_list: | |
# Determine if this is a forward or backward search | |
is_forward = round_dir > 0 | |
start_index = 0 if is_forward else (text_length - 1) | |
# Set step and direction | |
step = gematria_sum | |
direction = 1 if is_forward else -1 | |
# Extract ELS characters | |
round_text = "" | |
positions = [] | |
pos = start_index | |
# Extract up to 10 characters, but we'll use at least 3 for a valid result | |
for _ in range(10): | |
if 0 <= pos < text_length: | |
round_text += text_no_spaces[pos] | |
positions.append(pos) | |
pos += direction * step | |
else: | |
break | |
if len(round_text) >= 3: | |
# Save this round's results | |
complete_result += round_text | |
complete_positions.extend(positions) | |
# Track first and last positions for the overall result | |
if first_position is None or (is_forward and positions[0] < first_position): | |
first_position = positions[0] | |
first_loc = char_map[first_position] if first_position < len(char_map) else None | |
if last_position is None or (not is_forward and positions[-1] > last_position): | |
last_position = positions[-1] | |
last_loc = char_map[last_position] if last_position < len(char_map) else None | |
# Create result if we found something | |
if complete_result and len(complete_result) >= 3 and first_position is not None and last_position is not None: | |
if first_position < len(char_map) and last_position < len(char_map): | |
first_loc = char_map[first_position] | |
last_loc = char_map[last_position] | |
result = { | |
"result_text": complete_result, | |
"source": "Quran", | |
"step": gematria_sum, | |
"start_sura": first_loc[0], | |
"start_sura_name": first_loc[1], | |
"start_verse": first_loc[2], | |
"end_sura": last_loc[0], | |
"end_sura_name": last_loc[1], | |
"end_verse": last_loc[2], | |
"positions": complete_positions, | |
"rounds_combination": rounds_combination | |
} | |
# Calculate gematria of the result text | |
result["result_sum"] = calculate_gematria(complete_result) | |
logger.debug(f"Found ELS result: {complete_result} with gematria {result['result_sum']}") | |
else: | |
logger.warning(f"Character position mapping inconsistency: {first_position}, {last_position} vs {len(char_map)}") | |
# Cache the result | |
if result: | |
try: | |
with sqlite3.connect(cache_file) as conn: | |
cursor = conn.cursor() | |
# Make sure the table exists | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS els_cache ( | |
query_hash TEXT PRIMARY KEY, | |
function_name TEXT, | |
args TEXT, | |
kwargs TEXT, | |
results TEXT | |
) | |
''') | |
cursor.execute( | |
"INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)", | |
(hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran", | |
json.dumps([gematria_sum]), json.dumps({"tlang": tlang, "rounds_combination": rounds_combination}), json.dumps(result))) | |
conn.commit() | |
logger.debug("Cached Quran ELS results in database.") | |
except sqlite3.Error as e: | |
logger.error(f"Database error caching results: {e}") | |
logger.debug(f"Exiting get_first_els_result_quran, returning: {result}") | |
return result | |
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1): | |
""" | |
Initializes the abjad database with Quran verses. | |
This function processes all Quran JSON files and adds their gematria values to the database. | |
Args: | |
db_file: The SQLite database file to use | |
max_phrase_length: Maximum phrase length to process | |
""" | |
from gematria import calculate_gematria, strip_diacritics | |
from tqdm import tqdm # Import tqdm for progress bars | |
logger.info(f"Initializing Quran database: {db_file}") | |
# Create the database if it doesn't exist | |
with sqlite3.connect(db_file) as conn: | |
cursor = conn.cursor() | |
# Create results table | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS results ( | |
gematria_sum INTEGER, | |
words TEXT, | |
translation TEXT, | |
book TEXT, | |
chapter INTEGER, | |
verse INTEGER, | |
phrase_length INTEGER, | |
word_position TEXT, | |
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) | |
) | |
''') | |
cursor.execute(''' | |
CREATE INDEX IF NOT EXISTS idx_results_gematria | |
ON results (gematria_sum) | |
''') | |
# Create processed_books table to track processing | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS processed_books ( | |
book TEXT PRIMARY KEY, | |
max_phrase_length INTEGER | |
) | |
''') | |
conn.commit() | |
# Process all Quran files | |
sura_count = get_sura_count() | |
logger.info(f"Found {sura_count} suras to process") | |
# Global counter for word position tracking | |
total_word_count = 0 | |
book_names = {} | |
with sqlite3.connect(db_file) as conn: | |
cursor = conn.cursor() | |
# Process each sura (book) | |
for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"): | |
# Load sura data | |
sura_data = process_quran_files(sura_id, sura_id) | |
if sura_id in sura_data: | |
sura_info = sura_data[sura_id] | |
sura_name = sura_info['name'] | |
book_names[sura_id] = sura_name | |
# Check if this sura has already been processed | |
cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,)) | |
result = cursor.fetchone() | |
if result and result[0] >= max_phrase_length: | |
logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}") | |
continue | |
verses = sura_info['text'] | |
phrases_to_insert = [] | |
for verse_idx, verse_text in enumerate(verses, 1): | |
# Split verse into words | |
words = verse_text.split() | |
# Process phrases of different lengths | |
for length in range(1, max_phrase_length + 1): | |
for start in range(len(words) - length + 1): | |
phrase = " ".join(words[start:start + length]) | |
cleaned_phrase = strip_diacritics(phrase) | |
gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", "")) | |
# Calculate word position range | |
word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}" | |
# Add to batch insert list | |
phrases_to_insert.append( | |
(gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range) | |
) | |
# Update total word count after processing each verse | |
total_word_count += len(words) | |
# If we have phrases to insert, do a batch insert | |
if phrases_to_insert: | |
try: | |
cursor.executemany(''' | |
INSERT OR IGNORE INTO results | |
(gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position) | |
VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
''', phrases_to_insert) | |
# Update processed_books after processing each book | |
cursor.execute(''' | |
INSERT OR REPLACE INTO processed_books (book, max_phrase_length) | |
VALUES (?, ?) | |
''', (sura_name, max_phrase_length)) | |
conn.commit() | |
except sqlite3.Error as e: | |
logger.error(f"Database error: {e} for sura {sura_id}") | |
else: | |
logger.warning(f"Sura {sura_id} not found in processed data") | |
logger.info("Quran database initialization completed successfully") | |