File size: 2,335 Bytes
6a0ec6a
1767e22
6d4e0a3
dbbcf50
91561ce
dbbcf50
6d4e0a3
e1e2089
6d4e0a3
 
 
dbbcf50
e3ecb0f
 
dbbcf50
e3ecb0f
 
 
dbbcf50
a808dce
e3ecb0f
 
 
dbbcf50
a808dce
dbbcf50
e3ecb0f
 
 
 
a808dce
dbbcf50
e3ecb0f
 
a573881
e3ecb0f
 
dbbcf50
e3ecb0f
a808dce
dbbcf50
e3ecb0f
 
 
 
 
 
 
a808dce
dbbcf50
e3ecb0f
a808dce
dbbcf50
f3a5662
6d4e0a3
e3ecb0f
a808dce
 
e3ecb0f
 
 
 
 
 
dbbcf50
 
e3ecb0f
dbbcf50
237bccb
e3ecb0f
dbbcf50
3df9eeb
dbbcf50
811c7ec
6a0ec6a
 
dbbcf50
 
 
e3ecb0f
dbbcf50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import pandas as pd
from io import StringIO
from smolagents import CodeAgent, HfApiModel

# Initialize the AI agent
agent = CodeAgent(
    tools=[],
    model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
)

def process_text(content):
    """Convert text content to structured format"""
    csv_output = agent.run(f"Convert to CSV (include headers):\n{content}\nOutput ONLY valid CSV:")
    try:
        return pd.read_csv(StringIO(csv_output), keep_default_na=False)
    except:
        return pd.DataFrame()

def analyze_content(full_text):
    """Generate comprehensive report"""
    report = agent.run(f"""
    Create detailed analysis report from this data:
    {full_text[:5000]}
    
    Include:
    1. Key insights and patterns
    2. Important statistics
    3. Actionable recommendations
    4. Potential anomalies
    
    Use markdown formatting with headers.
    """)
    return report

def handle_upload(files):
    """Handle multiple file uploads correctly"""
    all_dfs = []
    full_content = []
    
    for file in files:
        try:
            content = file.name + "\n" + file.read().decode()
            df = process_text(content)
            all_dfs.append(df)
            full_content.append(content)
        except Exception as e:
            print(f"Error processing {file.name}: {str(e)}")
    
    combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
    report = analyze_content("\n\n".join(full_content)) if full_content else "No valid content found"
    
    return combined_df, report

with gr.Blocks() as demo:
    gr.Markdown("# Multi-Document Analyzer")
    
    with gr.Row():
        file_input = gr.File(
            file_count="multiple",
            file_types=[".txt"],
            label="Upload Text Files"
        )
        process_btn = gr.Button("Analyze Documents", variant="primary")
    
    with gr.Row():
        data_output = gr.Dataframe(label="Structured Data Preview", wrap=True)
        report_output = gr.Markdown(label="Analysis Report")

    process_btn.click(
        handle_upload,
        inputs=file_input,
        outputs=[data_output, report_output]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True  # Enable public sharing
    )