Quazim0t0 commited on
Commit
ca8ef7d
·
verified ·
1 Parent(s): fd26ad9

Update database.py

Browse files
Files changed (1) hide show
  1. database.py +86 -92
database.py CHANGED
@@ -1,102 +1,96 @@
1
- from sqlalchemy import (
2
- create_engine,
3
- MetaData,
4
- Table,
5
- Column,
6
- String,
7
- Integer,
8
- Float,
9
- insert,
10
- text,
11
- )
12
-
13
- # Use a persistent SQLite database file
14
- engine = create_engine("sqlite:///database.db")
15
- metadata_obj = MetaData()
16
-
17
- def create_dynamic_table(df):
18
- """
19
- Creates a table dynamically based on DataFrame schema.
20
-
21
- Args:
22
- df: pandas DataFrame containing the data
23
 
24
- Returns:
25
- SQLAlchemy Table object
26
- """
27
- # Drop existing table if it exists
28
- if 'data_table' in metadata_obj.tables:
29
- metadata_obj.remove(metadata_obj.tables['data_table'])
30
-
31
- # Define columns with a new auto-incrementing primary key
32
- columns = [Column("uuid", Integer, primary_key=True, autoincrement=True)] # New primary key
33
 
34
- # Add columns based on DataFrame dtypes
35
- for col_name, dtype in df.dtypes.items():
36
- if 'int' in str(dtype):
37
- col_type = Integer
38
- elif 'float' in str(dtype):
39
- col_type = Float
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
- col_type = String(255) # Using a generous length for string columns
42
-
43
- columns.append(Column(col_name, col_type))
44
-
45
- # Create new table
46
- table = Table('data_table', metadata_obj, *columns)
47
-
48
- # Create table in database
49
- metadata_obj.create_all(engine, tables=[table])
50
-
51
- return table
52
-
53
- def clear_database():
54
- """
55
- Removes all tables from the database.
56
- """
57
- metadata_obj.drop_all(engine)
58
- metadata_obj.clear()
59
-
60
- def insert_rows_into_table(rows, table):
61
- """
62
- Inserts rows into the specified table.
63
 
64
- Args:
65
- rows: List of dictionaries containing the row data
66
- table: SQLAlchemy Table object
67
- """
68
- with engine.begin() as connection:
69
- connection.execute(insert(table), rows)
70
 
71
- def get_table_schema():
72
- """
73
- Returns the current table schema as a string.
74
- """
75
  try:
76
- # Get list of tables
77
- with engine.connect() as con:
78
- tables = con.execute(text(
79
- "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
80
- )).fetchall()
81
-
82
- if not tables:
83
- return None
84
-
85
- # Use the first table found
86
- table_name = tables[0][0]
87
 
88
- # Get column information
89
- with engine.connect() as con:
90
- columns = con.execute(text(f"PRAGMA table_info({table_name})")).fetchall()
91
-
92
- schema = []
93
- for col in columns:
94
- col_name = col[1]
95
- col_type = col[2]
96
- is_primary = "primary key" if col[5] == 1 else ""
97
- schema.append(f"- {col_name} ({col_type}) {is_primary}".strip())
98
 
99
- return "\n".join(schema)
 
100
 
 
 
101
  except Exception as e:
102
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import gradio as gr
3
+ from smolagents import CodeAgent, HfApiModel
4
+ import pandas as pd
5
+ from io import StringIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ agent = CodeAgent(
8
+ tools=[],
9
+ model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
10
+ additional_authorized_imports=['json']
11
+ )
 
 
 
 
12
 
13
+ def parse_text_content(content):
14
+ """Universal text parser with multiple pattern handling"""
15
+ lines = content.split('\n')
16
+ csv_lines = ["id,description"]
17
+
18
+ for line in lines:
19
+ line = line.strip()
20
+ if not line:
21
+ continue
22
+
23
+ # Handle ID-description pattern
24
+ if ' ' in line and len(line.split()) > 1:
25
+ parts = line.split(' ', 1)
26
+ clean_desc = parts[1].replace('(edited)', '').strip()
27
+ csv_lines.append(f"{parts[0]},{clean_desc}")
28
+
29
+ # Handle separator pattern
30
+ elif '!' in line:
31
+ csv_lines.append(line.replace('!', ',', 1))
32
+
33
+ # Handle single-value lines
34
  else:
35
+ csv_lines.append(f"{line},")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ return '\n'.join(csv_lines)
 
 
 
 
 
38
 
39
+ def process_file(file_path):
40
+ """Process individual files with error handling"""
 
 
41
  try:
42
+ with open(file_path, 'r', encoding='utf-8') as f:
43
+ content = f.read()
 
 
 
 
 
 
 
 
 
44
 
45
+ # Generate CSV
46
+ csv_data = parse_text_content(content)
 
 
 
 
 
 
 
 
47
 
48
+ # Convert to DataFrame
49
+ df = pd.read_csv(StringIO(csv_data), keep_default_na=False)
50
 
51
+ return True, df, csv_data
52
+
53
  except Exception as e:
54
+ return False, pd.DataFrame(), str(e)
55
+
56
+ def handle_upload(files):
57
+ """Process multiple files"""
58
+ all_dfs = []
59
+ full_content = ""
60
+
61
+ for file in files:
62
+ success, df, content = process_file(file)
63
+ if success:
64
+ all_dfs.append(df)
65
+ full_content += f"\n\n--- {file.name} ---\n{content}"
66
+
67
+ combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
68
+
69
+ # Generate analysis
70
+ analysis = analyze_content(full_content)
71
+
72
+ return combined_df, analysis
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown("# Advanced Text Analyzer")
76
+
77
+ with gr.Row():
78
+ file_input = gr.File(
79
+ file_count="multiple",
80
+ file_types=[".txt"],
81
+ label="Upload Documents"
82
+ )
83
+ submit_btn = gr.Button("Analyze")
84
+
85
+ with gr.Row():
86
+ data_output = gr.Dataframe(label="Structured Data")
87
+ json_output = gr.JSON(label="Content Analysis")
88
+
89
+ submit_btn.click(
90
+ handle_upload,
91
+ inputs=file_input,
92
+ outputs=[data_output, json_output]
93
+ )
94
+
95
+ if __name__ == "__main__":
96
+ demo.launch()