File size: 19,025 Bytes
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9063a8e
434c55b
9063a8e
 
 
 
 
 
434c55b
 
 
 
9063a8e
434c55b
 
 
 
 
 
9063a8e
 
434c55b
9063a8e
434c55b
9063a8e
 
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9063a8e
 
434c55b
9063a8e
434c55b
9063a8e
 
434c55b
9063a8e
 
 
434c55b
 
 
 
9063a8e
 
434c55b
9063a8e
 
 
 
 
 
434c55b
9063a8e
434c55b
 
 
9063a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434c55b
9063a8e
 
 
 
 
 
 
 
 
 
434c55b
9063a8e
 
 
434c55b
9063a8e
 
 
 
434c55b
9063a8e
 
 
 
 
 
 
 
 
 
 
 
 
434c55b
9063a8e
 
 
 
434c55b
9063a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9063a8e
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
import json
import os
import logging
import sqlite3
from typing import Dict, List, Any

logger = logging.getLogger(__name__)

def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
    """
    Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.

    Args:
        start: The starting sura ID (inclusive).
        end: The ending sura ID (inclusive).

    Returns:
        A dictionary where keys are sura IDs and values are dictionaries
        containing 'name' and 'text' fields.
    """
    base_path = "texts/quran"
    results = {}

    for i in range(start, end + 1):
        file_name = f"{base_path}/{i:03d}.json"
        try:
            with open(file_name, 'r', encoding='utf-8') as file:
                data = json.load(file)
                if data:
                    # Extract name and verses
                    name = data.get("name", "No title")
                    verses = data.get("verse", {})
                    text = [verses[key] for key in sorted(verses.keys())]

                    # Store sura ID as key and sura data as value
                    results[i] = {"name": name, "text": text}

        except FileNotFoundError:
            logger.warning(f"File {file_name} not found.")
        except json.JSONDecodeError as e:
            logger.warning(f"File {file_name} could not be read as JSON: {e}")
        except KeyError as e:
            logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")

    return results

def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
    """
    Finds the shortest Quran sura verse in abjad.db.
    
    Args:
        gematria_sum: The gematria value to search for
        db_file: The database file to search in
        
    Returns:
        A dictionary containing the matched verse information or None if no match is found
    """
    logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # First check if there are any Quran entries in the database
        cursor.execute('''
            SELECT COUNT(*) 
            FROM results 
            WHERE book != 'Psalms'
        ''')
        
        count = cursor.fetchone()[0]
        if count == 0:
            logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
            return None
            
        # Search for a match, prioritizing shorter phrases
        cursor.execute('''
            SELECT words, book, chapter, verse, phrase_length, word_position
            FROM results
            WHERE gematria_sum = ? AND book != 'Psalms'
            ORDER BY phrase_length ASC, LENGTH(words) ASC
            LIMIT 1
        ''', (gematria_sum,))
        result = cursor.fetchone()
        
        if result:
            logger.debug(f"Shortest sura match found: {result}")
            return {
                "words": result[0], 
                "book": result[1], 
                "chapter": result[2], 
                "verse": result[3], 
                "phrase_length": result[4],
                "word_position": result[5] if len(result) > 5 else None
            }

        # If no exact match, try to find the closest match
        # This is similar to how quran_network handles it
        cursor.execute('''
            SELECT gematria_sum, ABS(gematria_sum - ?) as diff
            FROM results 
            WHERE book != 'Psalms'
            GROUP BY gematria_sum
            ORDER BY diff ASC
            LIMIT 1
        ''', (gematria_sum,))
        
        closest = cursor.fetchone()
        if closest:
            closest_gematria = closest[0]
            logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")
            
            # Find the shortest verse with this gematria
            cursor.execute('''
                SELECT words, book, chapter, verse, phrase_length, word_position
                FROM results
                WHERE gematria_sum = ? AND book != 'Psalms'
                ORDER BY phrase_length ASC, LENGTH(words) ASC
                LIMIT 1
            ''', (closest_gematria,))
            
            result = cursor.fetchone()
            if result:
                logger.debug(f"Closest sura match found: {result}")
                return {
                    "words": result[0], 
                    "book": result[1], 
                    "chapter": result[2], 
                    "verse": result[3], 
                    "phrase_length": result[4],
                    "word_position": result[5] if len(result) > 5 else None
                }

        logger.debug("No matching sura found.")
        return None

def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
    """Creates an iframe HTML string for displaying a Quran verse."""
    logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")
    
    # Use surahquran.com URL format
    url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
    iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
    
    logger.debug(f"Generated iframe: {iframe}")
    return iframe

def get_sura_count() -> int:
    """Returns the total number of suras in the Quran."""
    base_path = "texts/quran"
    
    # Count the number of JSON files in the quran directory
    try:
        files = [f for f in os.listdir(base_path) if f.endswith('.json')]
        return len(files)
    except FileNotFoundError:
        logger.error(f"Directory {base_path} not found.")
        return 114  # Default number of suras in the Quran


def get_first_els_result_quran(gematria_sum: int, tlang: str = "en", rounds_combination: str = "1,-1") -> Dict[str, Any]:
    """
    Gets the first ELS result from the Quran using the gematria sum as the step,
    following the same method as Torah ELS: combined +1/-1 rounds.
    
    For Quran, the implementation specifically:
    1. Takes +1 ELS round from the start of book 1 to the end of book 2
    2. Takes -1 ELS round from the end of book 2 to the start of book 1
    
    Args:
        gematria_sum: The gematria value to use as the ELS step
        tlang: Target language for results
        rounds_combination: Comma-separated string of round directions, defaults to "1,-1"
        
    Returns:
        The first ELS result found or None
    """
    import hashlib
    import json
    import math
    from gematria import strip_diacritics, calculate_gematria
    
    logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}, rounds_combination: {rounds_combination}")
    
    # Create a cache key including the rounds_combination
    cache_key = f"quran_els_{gematria_sum}_{tlang}_{rounds_combination}"
    cache_file = "els_cache.db"
    
    # Check cache first
    try:
        with sqlite3.connect(cache_file) as conn:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT results FROM els_cache WHERE query_hash = ?", 
                (hashlib.sha256(cache_key.encode()).hexdigest(),))
            result = cursor.fetchone()
            if result:
                logger.info(f"Cache hit for Quran ELS query: {cache_key}")
                return json.loads(result[0])
    except sqlite3.Error as e:
        logger.error(f"Database error checking cache: {e}")
    
    # Cache miss, perform ELS search
    logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")
    
    # Load Quran text for books 1 and 2 only (based on the requirement)
    quran_data = process_quran_files(1, 2)  # Only books 1 and 2 as specified
    
    # Concatenate verses into a single text
    all_text = ""
    sura_verse_map = []  # Track (sura_id, sura_name, verse_idx) for each character
    
    for sura_id, sura_info in sorted(quran_data.items()):
        sura_name = sura_info['name']
        verses = sura_info['text']
        
        # Add a space between suras to prevent cross-sura word formation
        if all_text:
            all_text += " "
        
        # Add all verses from this sura and track the mapping
        sura_start_pos = len(all_text)
        all_text += " ".join(verses)
        
        # Track character positions to their original sura/verse for later lookup
        current_verse_start = sura_start_pos
        for verse_idx, verse in enumerate(verses, 1):
            for _ in range(len(verse) + (1 if verse_idx < len(verses) else 0)):  # Add 1 for space between verses
                sura_verse_map.append((sura_id, sura_name, verse_idx))
    
    # Clean up the text: strip diacritics, remove special characters
    clean_text = strip_diacritics(all_text)
    clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())
    
    # Remove spaces for ELS search
    text_no_spaces = clean_text.replace(" ", "")
    text_length = len(text_no_spaces)
    
    if text_length == 0:
        logger.warning("No text available after cleaning")
        return None
    
    # Build a more accurate character map without spaces
    char_map = []
    char_idx = 0
    for i, c in enumerate(clean_text):
        if c.isalpha():
            if char_idx < len(sura_verse_map):
                char_map.append(sura_verse_map[i])
            char_idx += 1
    
    # Parse rounds combination - default is "1,-1"
    rounds_list = list(map(float, rounds_combination.split(',')))
    
    result = None
    complete_result = ""
    complete_positions = []
    first_position = None
    last_position = None
    
    # Process each round direction (similar to Torah ELS)
    for round_dir in rounds_list:
        # Determine if this is a forward or backward search
        is_forward = round_dir > 0
        start_index = 0 if is_forward else (text_length - 1)
        
        # Set step and direction
        step = gematria_sum
        direction = 1 if is_forward else -1
        
        # Extract ELS characters
        round_text = ""
        positions = []
        pos = start_index
        
        # Extract up to 10 characters, but we'll use at least 3 for a valid result
        for _ in range(10):
            if 0 <= pos < text_length:
                round_text += text_no_spaces[pos]
                positions.append(pos)
                pos += direction * step
            else:
                break
        
        if len(round_text) >= 3:
            # Save this round's results
            complete_result += round_text
            complete_positions.extend(positions)
            
            # Track first and last positions for the overall result
            if first_position is None or (is_forward and positions[0] < first_position):
                first_position = positions[0]
                first_loc = char_map[first_position] if first_position < len(char_map) else None
            
            if last_position is None or (not is_forward and positions[-1] > last_position):
                last_position = positions[-1]
                last_loc = char_map[last_position] if last_position < len(char_map) else None
    
    # Create result if we found something
    if complete_result and len(complete_result) >= 3 and first_position is not None and last_position is not None:
        if first_position < len(char_map) and last_position < len(char_map):
            first_loc = char_map[first_position]
            last_loc = char_map[last_position]
            
            result = {
                "result_text": complete_result,
                "source": "Quran",
                "step": gematria_sum,
                "start_sura": first_loc[0],
                "start_sura_name": first_loc[1],
                "start_verse": first_loc[2],
                "end_sura": last_loc[0],
                "end_sura_name": last_loc[1],
                "end_verse": last_loc[2],
                "positions": complete_positions,
                "rounds_combination": rounds_combination
            }
            
            # Calculate gematria of the result text
            result["result_sum"] = calculate_gematria(complete_result)
            
            logger.debug(f"Found ELS result: {complete_result} with gematria {result['result_sum']}")
        else:
            logger.warning(f"Character position mapping inconsistency: {first_position}, {last_position} vs {len(char_map)}")
    
    # Cache the result
    if result:
        try:
            with sqlite3.connect(cache_file) as conn:
                cursor = conn.cursor()
                
                # Make sure the table exists
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS els_cache (
                        query_hash TEXT PRIMARY KEY,
                        function_name TEXT,
                        args TEXT,
                        kwargs TEXT,
                        results TEXT
                    )
                ''')
                
                cursor.execute(
                    "INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
                    (hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran", 
                     json.dumps([gematria_sum]), json.dumps({"tlang": tlang, "rounds_combination": rounds_combination}), json.dumps(result)))
                conn.commit()
                logger.debug("Cached Quran ELS results in database.")
        except sqlite3.Error as e:
            logger.error(f"Database error caching results: {e}")
    
    logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
    return result
        
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
    """
    Initializes the abjad database with Quran verses.
    This function processes all Quran JSON files and adds their gematria values to the database.
    
    Args:
        db_file: The SQLite database file to use
        max_phrase_length: Maximum phrase length to process
    """
    from gematria import calculate_gematria, strip_diacritics
    from tqdm import tqdm  # Import tqdm for progress bars
    
    logger.info(f"Initializing Quran database: {db_file}")
    
    # Create the database if it doesn't exist
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        # Create results table
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book TEXT,
            chapter INTEGER,
            verse INTEGER,
            phrase_length INTEGER,
            word_position TEXT,
            PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
        )
        ''')
        
        cursor.execute('''
        CREATE INDEX IF NOT EXISTS idx_results_gematria
        ON results (gematria_sum)
        ''')
        
        # Create processed_books table to track processing
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS processed_books (
            book TEXT PRIMARY KEY,
            max_phrase_length INTEGER
        )
        ''')
        
        conn.commit()
    
    # Process all Quran files
    sura_count = get_sura_count()
    logger.info(f"Found {sura_count} suras to process")
    
    # Global counter for word position tracking
    total_word_count = 0
    book_names = {}
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # Process each sura (book)
        for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
            # Load sura data
            sura_data = process_quran_files(sura_id, sura_id)
            
            if sura_id in sura_data:
                sura_info = sura_data[sura_id]
                sura_name = sura_info['name']
                book_names[sura_id] = sura_name
                
                # Check if this sura has already been processed
                cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
                result = cursor.fetchone()
                if result and result[0] >= max_phrase_length:
                    logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
                    continue
                
                verses = sura_info['text']
                phrases_to_insert = []
                
                for verse_idx, verse_text in enumerate(verses, 1):
                    # Split verse into words
                    words = verse_text.split()
                    
                    # Process phrases of different lengths
                    for length in range(1, max_phrase_length + 1):
                        for start in range(len(words) - length + 1):
                            phrase = " ".join(words[start:start + length])
                            cleaned_phrase = strip_diacritics(phrase)
                            gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
                            
                            # Calculate word position range
                            word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
                            
                            # Add to batch insert list
                            phrases_to_insert.append(
                                (gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
                            )
                    
                    # Update total word count after processing each verse
                    total_word_count += len(words)
                    
                # If we have phrases to insert, do a batch insert
                if phrases_to_insert:
                    try:
                        cursor.executemany('''
                        INSERT OR IGNORE INTO results 
                        (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
                        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                        ''', phrases_to_insert)
                        
                        # Update processed_books after processing each book
                        cursor.execute('''
                        INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
                        VALUES (?, ?)
                        ''', (sura_name, max_phrase_length))
                        
                        conn.commit()
                    except sqlite3.Error as e:
                        logger.error(f"Database error: {e} for sura {sura_id}")
            else:
                logger.warning(f"Sura {sura_id} not found in processed data")
    
    logger.info("Quran database initialization completed successfully")