Spaces:
Sleeping
Sleeping
Update database.py
Browse files- database.py +86 -92
database.py
CHANGED
@@ -1,102 +1,96 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
String,
|
7 |
-
Integer,
|
8 |
-
Float,
|
9 |
-
insert,
|
10 |
-
text,
|
11 |
-
)
|
12 |
-
|
13 |
-
# Use a persistent SQLite database file
|
14 |
-
engine = create_engine("sqlite:///database.db")
|
15 |
-
metadata_obj = MetaData()
|
16 |
-
|
17 |
-
def create_dynamic_table(df):
|
18 |
-
"""
|
19 |
-
Creates a table dynamically based on DataFrame schema.
|
20 |
-
|
21 |
-
Args:
|
22 |
-
df: pandas DataFrame containing the data
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
""
|
27 |
-
|
28 |
-
|
29 |
-
metadata_obj.remove(metadata_obj.tables['data_table'])
|
30 |
-
|
31 |
-
# Define columns with a new auto-incrementing primary key
|
32 |
-
columns = [Column("uuid", Integer, primary_key=True, autoincrement=True)] # New primary key
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
else:
|
41 |
-
|
42 |
-
|
43 |
-
columns.append(Column(col_name, col_type))
|
44 |
-
|
45 |
-
# Create new table
|
46 |
-
table = Table('data_table', metadata_obj, *columns)
|
47 |
-
|
48 |
-
# Create table in database
|
49 |
-
metadata_obj.create_all(engine, tables=[table])
|
50 |
-
|
51 |
-
return table
|
52 |
-
|
53 |
-
def clear_database():
|
54 |
-
"""
|
55 |
-
Removes all tables from the database.
|
56 |
-
"""
|
57 |
-
metadata_obj.drop_all(engine)
|
58 |
-
metadata_obj.clear()
|
59 |
-
|
60 |
-
def insert_rows_into_table(rows, table):
|
61 |
-
"""
|
62 |
-
Inserts rows into the specified table.
|
63 |
|
64 |
-
|
65 |
-
rows: List of dictionaries containing the row data
|
66 |
-
table: SQLAlchemy Table object
|
67 |
-
"""
|
68 |
-
with engine.begin() as connection:
|
69 |
-
connection.execute(insert(table), rows)
|
70 |
|
71 |
-
def
|
72 |
-
"""
|
73 |
-
Returns the current table schema as a string.
|
74 |
-
"""
|
75 |
try:
|
76 |
-
|
77 |
-
|
78 |
-
tables = con.execute(text(
|
79 |
-
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
|
80 |
-
)).fetchall()
|
81 |
-
|
82 |
-
if not tables:
|
83 |
-
return None
|
84 |
-
|
85 |
-
# Use the first table found
|
86 |
-
table_name = tables[0][0]
|
87 |
|
88 |
-
#
|
89 |
-
|
90 |
-
columns = con.execute(text(f"PRAGMA table_info({table_name})")).fetchall()
|
91 |
-
|
92 |
-
schema = []
|
93 |
-
for col in columns:
|
94 |
-
col_name = col[1]
|
95 |
-
col_type = col[2]
|
96 |
-
is_primary = "primary key" if col[5] == 1 else ""
|
97 |
-
schema.append(f"- {col_name} ({col_type}) {is_primary}".strip())
|
98 |
|
99 |
-
|
|
|
100 |
|
|
|
|
|
101 |
except Exception as e:
|
102 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import gradio as gr
|
3 |
+
from smolagents import CodeAgent, HfApiModel
|
4 |
+
import pandas as pd
|
5 |
+
from io import StringIO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
agent = CodeAgent(
|
8 |
+
tools=[],
|
9 |
+
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
|
10 |
+
additional_authorized_imports=['json']
|
11 |
+
)
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def parse_text_content(content):
|
14 |
+
"""Universal text parser with multiple pattern handling"""
|
15 |
+
lines = content.split('\n')
|
16 |
+
csv_lines = ["id,description"]
|
17 |
+
|
18 |
+
for line in lines:
|
19 |
+
line = line.strip()
|
20 |
+
if not line:
|
21 |
+
continue
|
22 |
+
|
23 |
+
# Handle ID-description pattern
|
24 |
+
if ' ' in line and len(line.split()) > 1:
|
25 |
+
parts = line.split(' ', 1)
|
26 |
+
clean_desc = parts[1].replace('(edited)', '').strip()
|
27 |
+
csv_lines.append(f"{parts[0]},{clean_desc}")
|
28 |
+
|
29 |
+
# Handle separator pattern
|
30 |
+
elif '!' in line:
|
31 |
+
csv_lines.append(line.replace('!', ',', 1))
|
32 |
+
|
33 |
+
# Handle single-value lines
|
34 |
else:
|
35 |
+
csv_lines.append(f"{line},")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
return '\n'.join(csv_lines)
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
def process_file(file_path):
|
40 |
+
"""Process individual files with error handling"""
|
|
|
|
|
41 |
try:
|
42 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
43 |
+
content = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
# Generate CSV
|
46 |
+
csv_data = parse_text_content(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
# Convert to DataFrame
|
49 |
+
df = pd.read_csv(StringIO(csv_data), keep_default_na=False)
|
50 |
|
51 |
+
return True, df, csv_data
|
52 |
+
|
53 |
except Exception as e:
|
54 |
+
return False, pd.DataFrame(), str(e)
|
55 |
+
|
56 |
+
def handle_upload(files):
|
57 |
+
"""Process multiple files"""
|
58 |
+
all_dfs = []
|
59 |
+
full_content = ""
|
60 |
+
|
61 |
+
for file in files:
|
62 |
+
success, df, content = process_file(file)
|
63 |
+
if success:
|
64 |
+
all_dfs.append(df)
|
65 |
+
full_content += f"\n\n--- {file.name} ---\n{content}"
|
66 |
+
|
67 |
+
combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
|
68 |
+
|
69 |
+
# Generate analysis
|
70 |
+
analysis = analyze_content(full_content)
|
71 |
+
|
72 |
+
return combined_df, analysis
|
73 |
+
|
74 |
+
with gr.Blocks() as demo:
|
75 |
+
gr.Markdown("# Advanced Text Analyzer")
|
76 |
+
|
77 |
+
with gr.Row():
|
78 |
+
file_input = gr.File(
|
79 |
+
file_count="multiple",
|
80 |
+
file_types=[".txt"],
|
81 |
+
label="Upload Documents"
|
82 |
+
)
|
83 |
+
submit_btn = gr.Button("Analyze")
|
84 |
+
|
85 |
+
with gr.Row():
|
86 |
+
data_output = gr.Dataframe(label="Structured Data")
|
87 |
+
json_output = gr.JSON(label="Content Analysis")
|
88 |
+
|
89 |
+
submit_btn.click(
|
90 |
+
handle_upload,
|
91 |
+
inputs=file_input,
|
92 |
+
outputs=[data_output, json_output]
|
93 |
+
)
|
94 |
+
|
95 |
+
if __name__ == "__main__":
|
96 |
+
demo.launch()
|