Quazim0t0 commited on
Commit
dbbcf50
·
verified ·
1 Parent(s): ca8ef7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -157
app.py CHANGED
@@ -1,185 +1,78 @@
1
- import os
2
  import gradio as gr
3
- from sqlalchemy import text
4
- from smolagents import CodeAgent, HfApiModel
5
  import pandas as pd
6
  from io import StringIO
7
- import tempfile
8
- from datetime import datetime
9
- from database import (
10
- engine,
11
- create_dynamic_table,
12
- clear_database,
13
- insert_rows_into_table
14
- )
15
 
 
16
  agent = CodeAgent(
17
  tools=[],
18
  model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
19
  )
20
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def analyze_content(full_text):
22
- """Determine document type and key themes"""
23
  analysis_prompt = f"""
24
- Analyze this text and identify its primary domain:
25
- {full_text[:10000]} # First 10k characters for analysis
26
 
27
- Possible domains:
28
- - Business/Financial
29
- - Historical
30
- - Scientific
31
- - Technical
32
- - Legal
33
- - Literary
34
 
35
- Return JSON format:
36
- {{
37
- "domain": "primary domain",
38
- "keywords": ["list", "of", "key", "terms"],
39
- "report_type": "business|historical|scientific|technical|legal|literary"
40
- }}
41
  """
42
- return agent.run(analysis_prompt, output_type="json")
43
 
44
- def generate_report(full_text, domain, file_names):
45
- """Generate domain-specific report"""
46
- report_prompt = f"""
47
- Create a comprehensive {domain} report from these documents:
48
- Files: {', '.join(file_names)}
49
-
50
- Content:
51
- {full_text[:20000]} # First 20k chars for report
52
-
53
- Report structure:
54
- 1. Executive Summary
55
- 2. Key Findings/Analysis
56
- 3. Important Metrics/Statistics (if applicable)
57
- 4. Timeline of Events (historical) or Financial Overview (business)
58
- 5. Conclusions/Recommendations
59
-
60
- Include markdown formatting with headings, bullet points, and tables where appropriate.
61
- """
62
- return agent.run(report_prompt)
63
-
64
- def process_files(file_paths):
65
- """Process multiple files and generate report"""
66
  full_text = ""
67
- file_names = []
68
- structured_data = []
69
-
70
- for file_path in file_paths:
71
- try:
72
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
73
- content = f.read()
74
- full_text += f"\n\n--- {os.path.basename(file_path)} ---\n{content}"
75
- file_names.append(os.path.basename(file_path))
76
-
77
- # Structure detection for tables
78
- structure_prompt = f"Convert to CSV:\n{content}\nReturn ONLY CSV:"
79
- csv_output = agent.run(structure_prompt)
80
- df = pd.read_csv(StringIO(csv_output), dtype=str).dropna(how='all')
81
- structured_data.append(df)
82
-
83
- except Exception as e:
84
- print(f"Error processing {file_path}: {str(e)}")
85
-
86
- # Domain analysis
87
- domain_info = analyze_content(full_text)
88
 
89
- # Report generation
90
- report = generate_report(full_text, domain_info["report_type"], file_names)
 
 
 
91
 
92
- # Combine structured data
93
- combined_df = pd.concat(structured_data, ignore_index=True) if structured_data else pd.DataFrame()
94
 
95
- return domain_info, report, combined_df
96
-
97
- def handle_upload(files):
98
- """Handle multiple file uploads"""
99
- if not files:
100
- return [gr.update()]*6 + [gr.update(visible=False)]
101
-
102
- domain_info, report, df = process_files(files)
103
-
104
- outputs = [
105
- gr.Markdown(value=f"**Document Type:** {domain_info['domain']}"),
106
- gr.Markdown(value=f"**Key Themes:** {', '.join(domain_info['keywords'][:5])}"),
107
- gr.Dataframe(value=df.head(10) if not df.empty else None),
108
- gr.Markdown(value=report),
109
- gr.update(visible=True),
110
- gr.update(visible=True),
111
- gr.update(visible=not df.empty)
112
- ]
113
-
114
- return outputs
115
-
116
- def download_report(report_type):
117
- """Generate downloadable reports"""
118
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
119
- filename = f"{report_type}_report_{timestamp}"
120
-
121
- temp_dir = tempfile.gettempdir()
122
- formats = {
123
- 'pdf': f"{filename}.pdf",
124
- 'docx': f"{filename}.docx",
125
- 'csv': f"{filename}.csv"
126
- }
127
-
128
- # Generate files (implementation depends on your PDF/DOCX libraries)
129
- # Add your preferred reporting libraries here
130
-
131
- return [os.path.join(temp_dir, f) for f in formats.values()]
132
 
133
  with gr.Blocks() as demo:
134
- gr.Markdown("# Multi-Document Analysis System")
135
 
136
  with gr.Row():
137
- with gr.Column(scale=1):
138
- file_input = gr.File(
139
- label="Upload Documents",
140
- file_count="multiple",
141
- file_types=[".txt", ".doc", ".docx"],
142
- type="filepath"
143
- )
144
- process_btn = gr.Button("Analyze Documents", variant="primary")
145
-
146
- with gr.Group(visible=False) as meta_group:
147
- domain_display = gr.Markdown()
148
- keywords_display = gr.Markdown()
149
-
150
- with gr.Column(scale=2):
151
- with gr.Tabs():
152
- with gr.TabItem("Structured Data"):
153
- data_table = gr.Dataframe(label="Combined Data Preview", interactive=False)
154
-
155
- with gr.TabItem("Analysis Report"):
156
- report_display = gr.Markdown()
157
-
158
- with gr.Group(visible=False) as download_group:
159
- gr.Markdown("### Download Options")
160
- with gr.Row():
161
- pdf_btn = gr.DownloadButton("PDF Report")
162
- docx_btn = gr.DownloadButton("Word Report")
163
- csv_btn = gr.DownloadButton("CSV Data")
164
 
165
- process_btn.click(
166
- fn=handle_upload,
167
  inputs=file_input,
168
- outputs=[
169
- domain_display,
170
- keywords_display,
171
- data_table,
172
- report_display,
173
- meta_group,
174
- download_group,
175
- csv_btn
176
- ]
177
  )
178
-
179
- # Connect download buttons (implement actual file generation)
180
- # pdf_btn.click(fn=lambda: download_report("pdf"), outputs=pdf_btn)
181
- # docx_btn.click(fn=lambda: download_report("docx"), outputs=docx_btn)
182
- # csv_btn.click(fn=lambda: download_report("csv"), outputs=csv_btn)
183
 
184
  if __name__ == "__main__":
185
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
1
  import gradio as gr
 
 
2
  import pandas as pd
3
  from io import StringIO
4
+ from smolagents import CodeAgent, HfApiModel
 
 
 
 
 
 
 
5
 
6
+ # Initialize the AI agent
7
  agent = CodeAgent(
8
  tools=[],
9
  model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
10
  )
11
 
12
+ def process_text(content):
13
+ """Handle text processing without database dependency"""
14
+ # Get CSV conversion from AI
15
+ csv_output = agent.run(f"Convert to CSV:\n{content}\nReturn ONLY valid CSV:")
16
+
17
+ # Process CSV data
18
+ try:
19
+ df = pd.read_csv(StringIO(csv_output), keep_default_na=False)
20
+ return df.head(10), csv_output
21
+ except Exception as e:
22
+ return pd.DataFrame(), f"Error processing data: {str(e)}"
23
+
24
  def analyze_content(full_text):
25
+ """Analyze text content for reporting"""
26
  analysis_prompt = f"""
27
+ Analyze this text and generate a structured report:
28
+ {full_text[:5000]}
29
 
30
+ Include:
31
+ 1. Key themes/topics
32
+ 2. Important entities
33
+ 3. Summary statistics
34
+ 4. Recommendations/insights
 
 
35
 
36
+ Use markdown formatting with headers.
 
 
 
 
 
37
  """
38
+ return agent.run(analysis_prompt)
39
 
40
+ def handle_upload(*files):
41
+ """Process uploaded files"""
42
+ all_dfs = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  full_text = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ for file in files:
46
+ content = file.read().decode()
47
+ df, _ = process_text(content)
48
+ all_dfs.append(df)
49
+ full_text += f"\n\n--- {file.name} ---\n{content}"
50
 
51
+ combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
52
+ report = analyze_content(full_text) if full_text else "No content to analyze"
53
 
54
+ return combined_df, report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  with gr.Blocks() as demo:
57
+ gr.Markdown("# Document Analysis System")
58
 
59
  with gr.Row():
60
+ file_input = gr.File(file_count="multiple", file_types=[".txt"])
61
+ upload_btn = gr.Button("Process Files", variant="primary")
62
+
63
+ with gr.Row():
64
+ data_output = gr.Dataframe(label="Structured Data Preview")
65
+ report_output = gr.Markdown(label="Analysis Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ upload_btn.click(
68
+ handle_upload,
69
  inputs=file_input,
70
+ outputs=[data_output, report_output]
 
 
 
 
 
 
 
 
71
  )
 
 
 
 
 
72
 
73
  if __name__ == "__main__":
74
+ demo.launch(
75
+ server_name="0.0.0.0",
76
+ server_port=7860,
77
+ show_error=True
78
+ )