ReportAgent

Build error

App Files Files Community

Quazim0t0 commited on Feb 18

Commit

a808dce

verified ·

1 Parent(s): 31ed30a

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -185

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from smolagents import CodeAgent, HfApiModel
 import pandas as pd
 from io import StringIO
 import tempfile
 from database import (
     engine,
     create_dynamic_table,
@@ -12,211 +13,173 @@ from database import (
     insert_rows_into_table
 )
-# Initialize the AI agent
 agent = CodeAgent(
     tools=[],
     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
-def get_data_table():
-    """Fetch and return the current table data as DataFrame"""
-    try:
-        with engine.connect() as con:
-            tables = con.execute(text(
-                "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
-            )).fetchall()
-        if not tables:
-            return pd.DataFrame()
-        table_name = tables[0][0]
-        with engine.connect() as con:
-            result = con.execute(text(f"SELECT * FROM {table_name}"))
-            rows = result.fetchall()
-            columns = result.keys()
-        return pd.DataFrame(rows, columns=columns) if rows else pd.DataFrame()
-    except Exception as e:
-        return pd.DataFrame({"Error": [str(e)]})
-def process_txt_file(file_path):
-    """Analyze text file and convert to structured table"""
-    try:
-        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
-            content = f.read()
-        structure_prompt = f"""
-        Convert this text into valid CSV format:
-        {content}
-        Requirements:
-        1. First row must be headers
-        2. Consistent columns per row
-        3. Quote fields containing commas
-        4. Maintain original data relationships
-        Return ONLY the CSV content.
-        """
-        csv_output = agent.run(structure_prompt)
-        try:
-            df = pd.read_csv(
-                StringIO(csv_output),
-                on_bad_lines='warn',
-                dtype=str,
-                encoding_errors='ignore'
-            ).dropna(how='all')
-        except pd.errors.ParserError as pe:
-            return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
-        if df.empty or len(df.columns) == 0:
-            return False, "No structured data found", pd.DataFrame()
-        clear_database()
-        table = create_dynamic_table(df)
-        insert_rows_into_table(df.to_dict('records'), table)
-        return True, "Text analyzed successfully!", df.head(10)
-    except Exception as e:
-        return False, f"Processing error: {str(e)}", pd.DataFrame()
-def handle_upload(file_obj):
-    """Handle file upload and processing"""
-    if file_obj is None:
-        return [
-            "Please upload a text file.",
-            None,
-            "No schema",
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False)
-        ]
-    success, message, df = process_txt_file(file_obj)
-    if success:
-        schema = "\n".join([f"- {col} (text)" for col in df.columns])
-        return [
-            message,
-            df,
-            f"### Detected Schema:\n```\n{schema}\n```",
-            gr.update(visible=False),
-            gr.update(visible=True),
-            gr.update(visible=True)
-        ]
-    return [
-        message,
-        None,
-        "No schema",
-        gr.update(visible=True),
-        gr.update(visible=False),
-        gr.update(visible=False)
     ]
-def query_analysis(user_query: str) -> str:
-    """Handle natural language queries about the data"""
-    try:
-        df = get_data_table()
-        if df.empty:
-            return "Please upload and process a file first."
-        analysis_prompt = f"""
-        Analyze this data:
-        {df.head().to_csv()}
-        Question: {user_query}
-        Provide:
-        1. Direct answer
-        2. Numerical formatting
-        3. Data references
-        Use Markdown formatting.
-        """
-        return agent.run(analysis_prompt)
-    except Exception as e:
-        return f"Query error: {str(e)}"
-def download_csv():
-    """Generate CSV file for download"""
-    df = get_data_table()
-    if not df.empty:
-        temp_dir = tempfile.gettempdir()
-        file_path = os.path.join(temp_dir, "processed_data.csv")
-        df.to_csv(file_path, index=False)
-        return file_path
-    return None
-# Gradio interface setup
 with gr.Blocks() as demo:
-    with gr.Group() as upload_group:
-        gr.Markdown("""
-        # Text Data Analyzer
-        Upload unstructured text files to analyze and query their data
-        """)
-        file_input = gr.File(
-            label="Upload Text File",
-            file_types=[".txt"],
-            type="filepath"
-        )
-        status = gr.Textbox(label="Processing Status", interactive=False)
-    with gr.Group(visible=False) as query_group:
-        with gr.Row():
-            with gr.Column(scale=1):
                 with gr.Row():
-                    user_input = gr.Textbox(label="Ask about the data", scale=4)
-                    submit_btn = gr.Button("Submit", scale=1)
-                query_output = gr.Markdown(label="Analysis Results")
-            with gr.Column(scale=2):
-                gr.Markdown("### Extracted Data Preview")
-                data_table = gr.Dataframe(
-                    label="Structured Data",
-                    interactive=False
-                )
-                download_btn = gr.DownloadButton(
-                    "Download as CSV",
-                    visible=False
-                )
-        schema_display = gr.Markdown()
-        refresh_btn = gr.Button("Refresh View")
-    # Event handlers
-    file_input.upload(
         fn=handle_upload,
         inputs=file_input,
-        outputs=[status, data_table, schema_display, upload_group, query_group, download_btn]
-    )
-    submit_btn.click(
-        fn=query_analysis,
-        inputs=user_input,
-        outputs=query_output
-    )
-    user_input.submit(
-        fn=query_analysis,
-        inputs=user_input,
-        outputs=query_output
-    )
-    refresh_btn.click(
-        fn=lambda: (get_data_table().head(10), "Schema refreshed"),
-        outputs=[data_table, schema_display]
     )
-    download_btn.click(
-        fn=download_csv,
-        outputs=download_btn
-    )
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import pandas as pd
 from io import StringIO
 import tempfile
+from datetime import datetime
 from database import (
     engine,
     create_dynamic_table,
     insert_rows_into_table
 )
 agent = CodeAgent(
     tools=[],
     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
+def analyze_content(full_text):
+    """Determine document type and key themes"""
+    analysis_prompt = f"""
+    Analyze this text and identify its primary domain:
+    {full_text[:10000]}  # First 10k characters for analysis
+    Possible domains:
+    - Business/Financial
+    - Historical
+    - Scientific
+    - Technical
+    - Legal
+    - Literary
+    Return JSON format:
+    {{
+        "domain": "primary domain",
+        "keywords": ["list", "of", "key", "terms"],
+        "report_type": "business|historical|scientific|technical|legal|literary"
+    }}
+    """
+    return agent.run(analysis_prompt, output_type="json")
+def generate_report(full_text, domain, file_names):
+    """Generate domain-specific report"""
+    report_prompt = f"""
+    Create a comprehensive {domain} report from these documents:
+    Files: {', '.join(file_names)}
+    Content:
+    {full_text[:20000]}  # First 20k chars for report
+    Report structure:
+    1. Executive Summary
+    2. Key Findings/Analysis
+    3. Important Metrics/Statistics (if applicable)
+    4. Timeline of Events (historical) or Financial Overview (business)
+    5. Conclusions/Recommendations
+    Include markdown formatting with headings, bullet points, and tables where appropriate.
+    """
+    return agent.run(report_prompt)
+def process_files(file_paths):
+    """Process multiple files and generate report"""
+    full_text = ""
+    file_names = []
+    structured_data = []
+    for file_path in file_paths:
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+                full_text += f"\n\n--- {os.path.basename(file_path)} ---\n{content}"
+                file_names.append(os.path.basename(file_path))
+                # Structure detection for tables
+                structure_prompt = f"Convert to CSV:\n{content}\nReturn ONLY CSV:"
+                csv_output = agent.run(structure_prompt)
+                df = pd.read_csv(StringIO(csv_output), dtype=str).dropna(how='all')
+                structured_data.append(df)
+        except Exception as e:
+            print(f"Error processing {file_path}: {str(e)}")
+    # Domain analysis
+    domain_info = analyze_content(full_text)
+    # Report generation
+    report = generate_report(full_text, domain_info["report_type"], file_names)
+    # Combine structured data
+    combined_df = pd.concat(structured_data, ignore_index=True) if structured_data else pd.DataFrame()
+    return domain_info, report, combined_df
+def handle_upload(files):
+    """Handle multiple file uploads"""
+    if not files:
+        return [gr.update()]*6 + [gr.update(visible=False)]
+    domain_info, report, df = process_files(files)
+    outputs = [
+        gr.Markdown(value=f"**Document Type:** {domain_info['domain']}"),
+        gr.Markdown(value=f"**Key Themes:** {', '.join(domain_info['keywords'][:5])}"),
+        gr.Dataframe(value=df.head(10) if not df.empty else None),
+        gr.Markdown(value=report),
+        gr.update(visible=True),
+        gr.update(visible=True),
+        gr.update(visible=not df.empty)
     ]
+    return outputs
+def download_report(report_type):
+    """Generate downloadable reports"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{report_type}_report_{timestamp}"
+    temp_dir = tempfile.gettempdir()
+    formats = {
+        'pdf': f"{filename}.pdf",
+        'docx': f"{filename}.docx",
+        'csv': f"{filename}.csv"
+    }
+    # Generate files (implementation depends on your PDF/DOCX libraries)
+    # Add your preferred reporting libraries here
+    return [os.path.join(temp_dir, f) for f in formats.values()]
 with gr.Blocks() as demo:
+    gr.Markdown("# Multi-Document Analysis System")
+    with gr.Row():
+        with gr.Column(scale=1):
+            file_input = gr.File(
+                label="Upload Documents",
+                file_count="multiple",
+                file_types=[".txt", ".doc", ".docx"],
+                type="filepath"
+            )
+            process_btn = gr.Button("Analyze Documents", variant="primary")
+            with gr.Group(visible=False) as meta_group:
+                domain_display = gr.Markdown()
+                keywords_display = gr.Markdown()
+        with gr.Column(scale=2):
+            with gr.Tabs():
+                with gr.TabItem("Structured Data"):
+                    data_table = gr.Dataframe(label="Combined Data Preview", interactive=False)
+                with gr.TabItem("Analysis Report"):
+                    report_display = gr.Markdown()
+            with gr.Group(visible=False) as download_group:
+                gr.Markdown("### Download Options")
                 with gr.Row():
+                    pdf_btn = gr.DownloadButton("PDF Report")
+                    docx_btn = gr.DownloadButton("Word Report")
+                    csv_btn = gr.DownloadButton("CSV Data")
+    process_btn.click(
         fn=handle_upload,
         inputs=file_input,
+        outputs=[
+            domain_display,
+            keywords_display,
+            data_table,
+            report_display,
+            meta_group,
+            download_group,
+            csv_btn
+        ]
     )
+    # Connect download buttons (implement actual file generation)
+    # pdf_btn.click(fn=lambda: download_report("pdf"), outputs=pdf_btn)
+    # docx_btn.click(fn=lambda: download_report("docx"), outputs=docx_btn)
+    # csv_btn.click(fn=lambda: download_report("csv"), outputs=csv_btn)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)