File size: 17,185 Bytes
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d23d59
434c55b
5d23d59
434c55b
 
 
 
 
 
 
 
 
 
5d23d59
434c55b
5d23d59
434c55b
5d23d59
 
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d23d59
 
 
434c55b
5d23d59
434c55b
 
 
 
 
 
5d23d59
 
434c55b
 
5d23d59
434c55b
 
 
5d23d59
434c55b
5d23d59
 
 
434c55b
5d23d59
 
434c55b
5d23d59
 
 
 
 
434c55b
5d23d59
 
 
 
 
 
 
 
 
 
 
 
 
9063a8e
5d23d59
 
 
 
 
 
 
 
9063a8e
5d23d59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d23d59
434c55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
import json
import os
import logging
import sqlite3
from typing import Dict, List, Any

logger = logging.getLogger(__name__)

def process_quran_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
    """
    Processes Quran JSON files and returns a dictionary mapping sura IDs to their data.

    Args:
        start: The starting sura ID (inclusive).
        end: The ending sura ID (inclusive).

    Returns:
        A dictionary where keys are sura IDs and values are dictionaries
        containing 'name' and 'text' fields.
    """
    base_path = "texts/quran"
    results = {}

    for i in range(start, end + 1):
        file_name = f"{base_path}/{i:03d}.json"
        try:
            with open(file_name, 'r', encoding='utf-8') as file:
                data = json.load(file)
                if data:
                    # Extract name and verses
                    name = data.get("name", "No title")
                    verses = data.get("verse", {})
                    text = [verses[key] for key in sorted(verses.keys())]

                    # Store sura ID as key and sura data as value
                    results[i] = {"name": name, "text": text}

        except FileNotFoundError:
            logger.warning(f"File {file_name} not found.")
        except json.JSONDecodeError as e:
            logger.warning(f"File {file_name} could not be read as JSON: {e}")
        except KeyError as e:
            logger.warning(f"Expected key 'verse' is missing in {file_name}: {e}")

    return results

def find_shortest_sura_match(gematria_sum: int, db_file: str = 'abjad.db') -> Dict[str, Any]:
    """
    Finds the shortest Quran sura verse in abjad.db.
    
    Args:
        gematria_sum: The gematria value to search for
        db_file: The database file to search in
        
    Returns:
        A dictionary containing the matched verse information or None if no match is found
    """
    logger.debug(f"Entering find_shortest_sura_match with gematria_sum: {gematria_sum}")
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # First check if there are any Quran entries in the database
        cursor.execute('''
            SELECT COUNT(*) 
            FROM results 
            WHERE book != 'Psalms'
        ''')
        
        count = cursor.fetchone()[0]
        if count == 0:
            logger.warning("No Quran entries found in database. Run initialize_quran_db.py first.")
            return None
            
        # Search for a match, prioritizing shorter phrases
        cursor.execute('''
            SELECT words, book, chapter, verse, phrase_length, word_position
            FROM results
            WHERE gematria_sum = ? AND book != 'Psalms'
            ORDER BY phrase_length ASC, LENGTH(words) ASC
            LIMIT 1
        ''', (gematria_sum,))
        result = cursor.fetchone()
        
        if result:
            logger.debug(f"Shortest sura match found: {result}")
            return {
                "words": result[0], 
                "book": result[1], 
                "chapter": result[2], 
                "verse": result[3], 
                "phrase_length": result[4],
                "word_position": result[5] if len(result) > 5 else None
            }

        # If no exact match, try to find the closest match
        # This is similar to how quran_network handles it
        cursor.execute('''
            SELECT gematria_sum, ABS(gematria_sum - ?) as diff
            FROM results 
            WHERE book != 'Psalms'
            GROUP BY gematria_sum
            ORDER BY diff ASC
            LIMIT 1
        ''', (gematria_sum,))
        
        closest = cursor.fetchone()
        if closest:
            closest_gematria = closest[0]
            logger.debug(f"No exact match found. Closest gematria: {closest_gematria}")
            
            # Find the shortest verse with this gematria
            cursor.execute('''
                SELECT words, book, chapter, verse, phrase_length, word_position
                FROM results
                WHERE gematria_sum = ? AND book != 'Psalms'
                ORDER BY phrase_length ASC, LENGTH(words) ASC
                LIMIT 1
            ''', (closest_gematria,))
            
            result = cursor.fetchone()
            if result:
                logger.debug(f"Closest sura match found: {result}")
                return {
                    "words": result[0], 
                    "book": result[1], 
                    "chapter": result[2], 
                    "verse": result[3], 
                    "phrase_length": result[4],
                    "word_position": result[5] if len(result) > 5 else None
                }

        logger.debug("No matching sura found.")
        return None

def create_quran_display_iframe(sura_name: str, chapter: int, verse: int) -> str:
    """Creates an iframe HTML string for displaying a Quran verse."""
    logger.debug(f"Creating Quran display iframe for sura: {sura_name}, chapter: {chapter}, verse: {verse}")
    
    # Use surahquran.com URL format
    url = f"https://surahquran.com/aya-{verse}-sora-{chapter}.html"
    iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
    
    logger.debug(f"Generated iframe: {iframe}")
    return iframe

def get_sura_count() -> int:
    """Returns the total number of suras in the Quran."""
    base_path = "texts/quran"
    
    # Count the number of JSON files in the quran directory
    try:
        files = [f for f in os.listdir(base_path) if f.endswith('.json')]
        return len(files)
    except FileNotFoundError:
        logger.error(f"Directory {base_path} not found.")
        return 114  # Default number of suras in the Quran


def get_first_els_result_quran(gematria_sum: int, tlang: str = "en") -> Dict[str, Any]:
    """
    Gets the first ELS result from the Quran using the gematria sum as the step.
    
    Args:
        gematria_sum: The gematria value to use as the ELS step
        tlang: Target language for results
        
    Returns:
        The first ELS result found or None
    """
    import hashlib
    import json
    from gematria import strip_diacritics
    
    logger.debug(f"Entering get_first_els_result_quran with gematria_sum: {gematria_sum}, tlang: {tlang}")
    
    # Create a cache key
    cache_key = f"quran_els_{gematria_sum}_{tlang}"
    cache_file = "els_cache.db"
    
    # Check cache first
    try:
        with sqlite3.connect(cache_file) as conn:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT results FROM els_cache WHERE query_hash = ?", 
                (hashlib.sha256(cache_key.encode()).hexdigest(),))
            result = cursor.fetchone()
            if result:
                logger.info(f"Cache hit for Quran ELS query: {cache_key}")
                return json.loads(result[0])
    except sqlite3.Error as e:
        logger.error(f"Database error checking cache: {e}")
    
    # Cache miss, perform ELS search
    logger.info(f"Cache miss for Quran ELS query: {cache_key}, performing search")
    
    # Load all Quran text
    sura_count = get_sura_count()
    quran_data = process_quran_files(1, sura_count)
    
    # Concatenate all verses from all suras into a single text
    all_text = ""
    for sura_id, sura_info in sorted(quran_data.items()):
        # Add a space between suras to prevent cross-sura word formation
        if all_text:
            all_text += " "
        
        # Add all verses from this sura
        verses = sura_info['text']
        all_text += " ".join(verses)
    
    # Clean up the text: strip diacritics, remove any special characters, etc.
    clean_text = strip_diacritics(all_text)
    clean_text = ''.join(c for c in clean_text if c.isalpha() or c.isspace())
    
    # Perform ELS search with the gematria_sum as the step
    result = None
    if clean_text:
        # Remove spaces for ELS search
        text_no_spaces = clean_text.replace(" ", "")
        
        # Track character positions to their original sura/verse
        char_map = []  # List of (sura_id, verse_id) for each character
        
        # Build character position mapping
        current_pos = 0
        for sura_id, sura_info in sorted(quran_data.items()):
            sura_name = sura_info['name']
            verses = sura_info['text']
            
            for verse_idx, verse in enumerate(verses, 1):
                cleaned_verse = strip_diacritics(verse).replace(" ", "")
                for _ in cleaned_verse:
                    if current_pos < len(text_no_spaces):
                        char_map.append((sura_id, sura_name, verse_idx))
                        current_pos += 1
        
        # Start positions to try (we'll try the first 100 positions for better coverage)
        for start_pos in range(min(100, len(text_no_spaces))):
            # Extract characters at positions: start_pos, start_pos+step, start_pos+2*step, etc.
            extracted = ""
            positions = []
            pos = start_pos
            
            # Extract up to 7 characters (typical ELS result length)
            for _ in range(7):
                if pos < len(text_no_spaces):
                    extracted += text_no_spaces[pos]
                    positions.append(pos)
                    pos += gematria_sum
                else:
                    break
            
            if len(extracted) >= 3:  # At least 3 characters
                # Look up the sura/verse for the first and last character
                first_pos = positions[0]
                last_pos = positions[-1]
                
                if first_pos < len(char_map) and last_pos < len(char_map):
                    first_loc = char_map[first_pos]
                    last_loc = char_map[last_pos]
                    
                    result = {
                        "result_text": extracted,
                        "source": "Quran",
                        "start_position": start_pos,
                        "step": gematria_sum,
                        "start_sura": first_loc[0],
                        "start_sura_name": first_loc[1],
                        "start_verse": first_loc[2],
                        "end_sura": last_loc[0],
                        "end_sura_name": last_loc[1],
                        "end_verse": last_loc[2],
                        "positions": positions
                    }
                    break  # Found a result, stop searching
                else:
                    logger.warning(f"Character position mapping inconsistency: {first_pos}, {last_pos} vs {len(char_map)}")
                    continue
    
    # Cache the result
    if result:
        try:
            with sqlite3.connect(cache_file) as conn:
                cursor = conn.cursor()
                
                # Make sure the table exists
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS els_cache (
                        query_hash TEXT PRIMARY KEY,
                        function_name TEXT,
                        args TEXT,
                        kwargs TEXT,
                        results TEXT
                    )
                ''')
                
                cursor.execute(
                    "INSERT OR REPLACE INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
                    (hashlib.sha256(cache_key.encode()).hexdigest(), "get_first_els_result_quran", 
                     json.dumps([gematria_sum]), json.dumps({"tlang": tlang}), json.dumps(result)))
                conn.commit()
                logger.debug("Cached Quran ELS results in database.")
        except sqlite3.Error as e:
            logger.error(f"Database error caching results: {e}")
    
    logger.debug(f"Exiting get_first_els_result_quran, returning: {result}")
    return result
        
def initialize_quran_database(db_file: str = 'abjad.db', max_phrase_length: int = 1):
    """
    Initializes the abjad database with Quran verses.
    This function processes all Quran JSON files and adds their gematria values to the database.
    
    Args:
        db_file: The SQLite database file to use
        max_phrase_length: Maximum phrase length to process
    """
    from gematria import calculate_gematria, strip_diacritics
    from tqdm import tqdm  # Import tqdm for progress bars
    
    logger.info(f"Initializing Quran database: {db_file}")
    
    # Create the database if it doesn't exist
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        # Create results table
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book TEXT,
            chapter INTEGER,
            verse INTEGER,
            phrase_length INTEGER,
            word_position TEXT,
            PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
        )
        ''')
        
        cursor.execute('''
        CREATE INDEX IF NOT EXISTS idx_results_gematria
        ON results (gematria_sum)
        ''')
        
        # Create processed_books table to track processing
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS processed_books (
            book TEXT PRIMARY KEY,
            max_phrase_length INTEGER
        )
        ''')
        
        conn.commit()
    
    # Process all Quran files
    sura_count = get_sura_count()
    logger.info(f"Found {sura_count} suras to process")
    
    # Global counter for word position tracking
    total_word_count = 0
    book_names = {}
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # Process each sura (book)
        for sura_id in tqdm(range(1, sura_count + 1), desc="Processing Suras"):
            # Load sura data
            sura_data = process_quran_files(sura_id, sura_id)
            
            if sura_id in sura_data:
                sura_info = sura_data[sura_id]
                sura_name = sura_info['name']
                book_names[sura_id] = sura_name
                
                # Check if this sura has already been processed
                cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (sura_name,))
                result = cursor.fetchone()
                if result and result[0] >= max_phrase_length:
                    logger.info(f"Skipping sura {sura_name}: Already processed with max_phrase_length {result[0]}")
                    continue
                
                verses = sura_info['text']
                phrases_to_insert = []
                
                for verse_idx, verse_text in enumerate(verses, 1):
                    # Split verse into words
                    words = verse_text.split()
                    
                    # Process phrases of different lengths
                    for length in range(1, max_phrase_length + 1):
                        for start in range(len(words) - length + 1):
                            phrase = " ".join(words[start:start + length])
                            cleaned_phrase = strip_diacritics(phrase)
                            gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
                            
                            # Calculate word position range
                            word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
                            
                            # Add to batch insert list
                            phrases_to_insert.append(
                                (gematria_sum, cleaned_phrase, "", sura_name, sura_id, verse_idx, length, word_position_range)
                            )
                    
                    # Update total word count after processing each verse
                    total_word_count += len(words)
                    
                # If we have phrases to insert, do a batch insert
                if phrases_to_insert:
                    try:
                        cursor.executemany('''
                        INSERT OR IGNORE INTO results 
                        (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
                        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                        ''', phrases_to_insert)
                        
                        # Update processed_books after processing each book
                        cursor.execute('''
                        INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
                        VALUES (?, ?)
                        ''', (sura_name, max_phrase_length))
                        
                        conn.commit()
                    except sqlite3.Error as e:
                        logger.error(f"Database error: {e} for sura {sura_id}")
            else:
                logger.warning(f"Sura {sura_id} not found in processed data")
    
    logger.info("Quran database initialization completed successfully")