File size: 1,496 Bytes
79c7b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import requests
import psycopg2
import math

def chunk_text(text, chunk_size=4096):
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

def is_screenplay_in_db(cursor, title):
    cursor.execute("SELECT COUNT(*) FROM items WHERE title LIKE %s", (f"{title}%",))
    return cursor.fetchone()[0] > 0

def send_screenplay(file_path, cursor):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    title = os.path.splitext(os.path.basename(file_path))[0]
    
    if is_screenplay_in_db(cursor, title):
        print(f"Screenplay '{title}' already exists in the database. Skipping.")
        return

    chunks = chunk_text(content)

    for i, chunk in enumerate(chunks):
        payload = {
            "title": f"{title}_chunk_{i+1}",
            "doc_text": chunk
        }
        response = requests.post("http://localhost:8080/add_document", json=payload)
        print(f"Chunk {i+1} response: {response.status_code}")

def process_scripts_folder():
    conn = psycopg2.connect("dbname=ragtag user=jc password=!1newmedia host=localhost")
    cursor = conn.cursor()

    scripts_folder = "scripts"
    for filename in os.listdir(scripts_folder):
        if filename.endswith(".txt"):
            file_path = os.path.join(scripts_folder, filename)
            send_screenplay(file_path, cursor)

    cursor.close()
    conn.close()

if __name__ == "__main__":
    process_scripts_folder()