Spaces:
Runtime error
Runtime error
File size: 1,496 Bytes
79c7b05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import requests
import psycopg2
import math
def chunk_text(text, chunk_size=4096):
words = text.split()
return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
def is_screenplay_in_db(cursor, title):
cursor.execute("SELECT COUNT(*) FROM items WHERE title LIKE %s", (f"{title}%",))
return cursor.fetchone()[0] > 0
def send_screenplay(file_path, cursor):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
title = os.path.splitext(os.path.basename(file_path))[0]
if is_screenplay_in_db(cursor, title):
print(f"Screenplay '{title}' already exists in the database. Skipping.")
return
chunks = chunk_text(content)
for i, chunk in enumerate(chunks):
payload = {
"title": f"{title}_chunk_{i+1}",
"doc_text": chunk
}
response = requests.post("http://localhost:8080/add_document", json=payload)
print(f"Chunk {i+1} response: {response.status_code}")
def process_scripts_folder():
conn = psycopg2.connect("dbname=ragtag user=jc password=!1newmedia host=localhost")
cursor = conn.cursor()
scripts_folder = "scripts"
for filename in os.listdir(scripts_folder):
if filename.endswith(".txt"):
file_path = os.path.join(scripts_folder, filename)
send_screenplay(file_path, cursor)
cursor.close()
conn.close()
if __name__ == "__main__":
process_scripts_folder() |