DamarJati's picture
Tag character selection and processing with DuckDB
ae4be59 verified
raw
history blame
3.22 kB
import gradio as gr
import pandas as pd
import duckdb
PARQUET_FILES = {
"Animagine XL 3.1": 'data.parquet',
}
preset_values = [
"1girl, kamado nezuko, kimetsu no yaiba",
"1girl, kanroji mitsuri, kimetsu no yaiba",
"1girl, fern (sousou no frieren), sousou no frieren",
"1girl, elaina (majo no tabitabi), majo no tabitabi"
]
def search_data(search_term, selected_file, use_duckdb=False):
try:
if use_duckdb:
con = duckdb.connect(database=':memory:', read_only=False)
con.execute(f"CREATE TABLE my_table AS SELECT * FROM read_parquet('{PARQUET_FILES[selected_file]}')")
if search_term.strip():
results = con.execute(f"SELECT * FROM my_table WHERE lower(teks) LIKE '%{search_term.lower()}%'").fetchdf()
else:
search_terms_sql = " OR ".join([f"lower(teks) LIKE '%{term.lower()}%'" for term in preset_values])
results = con.execute(f"SELECT * FROM my_table WHERE {search_terms_sql}").fetchdf()
con.close()
else:
df = pd.read_parquet(PARQUET_FILES[selected_file])
if search_term.strip():
results = df[df['teks'].str.lower().str.contains(search_term.lower(), na=False)]
else:
results = df[df['teks'].str.lower().str.contains("|".join(preset_values).lower(), na=False)]
if len(results.columns) > 12:
results = results.iloc[:, :12]
return results
except FileNotFoundError:
return pd.DataFrame({'Error': ['Parquet file not found. Please check the file path.']})
except Exception as e:
return pd.DataFrame({'Error': [f'An error occurred: {e}']})
if __name__ == "__main__":
with gr.Blocks() as app:
gr.Markdown("## Text Search for Animagine tag characters")
with gr.Row(): #Menambahkan row untuk dropdown dan checkbox
file_dropdown = gr.Dropdown(
choices=list(PARQUET_FILES.keys()),
label="Select Parquet File",
value=list(PARQUET_FILES.keys())[0], # Default value
)
use_duckdb_checkbox = gr.Checkbox(label="Use DuckDB", value=True) # Checkbox untuk DuckDB, default true
with gr.Column():
search_input = gr.Textbox(
label="Search for characters or series:",
placeholder="sousou no frieren",
)
search_output = gr.Dataframe(
label="Search Results",
value=pd.DataFrame({'Characters tag': preset_values}),
headers="auto",
)
inputs = [search_input, file_dropdown, use_duckdb_checkbox] # Menambahkan input dropdown dan checkbox
search_input.change(
fn=search_data,
inputs=inputs,
outputs=search_output,
)
file_dropdown.change( #Menambahkan event change untuk dropdown
fn=search_data,
inputs=inputs,
outputs=search_output,
)
use_duckdb_checkbox.change( #Menambahkan event change untuk checkbox
fn=search_data,
inputs=inputs,
outputs=search_output,
)
app.launch()