Spaces:
Sleeping
Sleeping
import sqlite3 | |
import os | |
from datetime import datetime | |
def initialize_database(): | |
""" | |
Initialize the SQLite database and create the 'documents' table if it doesn't exist. | |
This function performs the following steps: | |
1. Connects to the SQLite database (or creates it if it doesn't exist). | |
2. Creates the 'documents' table with the following columns: | |
- `id`: An auto-incrementing primary key. | |
- `text`: The main text content of the document (required, non-nullable). | |
- `topics`: A string representing associated topics (optional). | |
- `date`: A timestamp indicating when the row was inserted (default: current timestamp). | |
3. Commits the changes and closes the connection. | |
The `date` column is automatically populated with the current timestamp when a new row is inserted. | |
Example: | |
-------- | |
>>> initialize_database() | |
# Creates or updates the 'dataset.db' file with the 'documents' table schema. | |
""" | |
# Connect to the SQLite database (or create it if it doesn't exist) | |
conn = sqlite3.connect('dataset.db') | |
cursor = conn.cursor() | |
# Create the 'documents' table if it doesn't exist | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS documents ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
text TEXT NOT NULL, | |
topics TEXT, | |
date TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
) | |
''') | |
# Commit changes and close the connection | |
conn.commit() | |
conn.close() | |
from huggingface_hub import HfApi | |
def commit_to_huggingface(): | |
"""Commit the dataset.db file to the Hugging Face Space repository.""" | |
api_token = os.getenv("hf_key") | |
api = HfApi(token=api_token) | |
# Replace with your Space's repository name | |
repo_id = "Danielrahmai1991/dataset_interface" | |
# Upload and commit the dataset.db file | |
api.upload_file( | |
path_or_fileobj="dataset.db", | |
path_in_repo="dataset.db", | |
repo_id=repo_id, | |
repo_type="space" | |
) | |
def save_to_db(chunks, topics=None): | |
""" | |
Save chunks of text to the SQLite database. | |
This function performs the following steps: | |
1. Ensures the database and 'documents' table are initialized by calling `initialize_database`. | |
2. Connects to the SQLite database. | |
3. Inserts each chunk of text into the 'documents' table along with associated topics. | |
- The `text` column stores the chunk of text. | |
- The `topics` column stores the associated topics (optional). | |
- The `date` column is automatically populated with the current timestamp when the row is inserted. | |
4. Commits the changes and closes the connection. | |
5. Calls `commit_to_huggingface` to synchronize the database with an external repository (if applicable). | |
Parameters: | |
---------- | |
chunks : list of str | |
A list of text chunks to be saved to the database. | |
topics : str or None, optional | |
A string representing the topics associated with the chunks. Defaults to None. | |
Example: | |
-------- | |
>>> save_to_db(["This is the first chunk.", "This is the second chunk."], "Example Topics") | |
# Saves two rows to the 'documents' table with the provided text and topics. | |
""" | |
# Ensure the database and table are initialized | |
initialize_database() | |
# Connect to the database | |
conn = sqlite3.connect('dataset.db') | |
cursor = conn.cursor() | |
# Insert chunks into the database | |
for chunk in chunks: | |
cursor.execute('INSERT INTO documents (text, topics) VALUES (?, ?)', (chunk, topics)) | |
# Commit changes and close the connection | |
conn.commit() | |
conn.close() | |
commit_to_huggingface() | |