File size: 2,227 Bytes
6a0ec6a
1767e22
6d4e0a3
dbbcf50
91561ce
dbbcf50
6d4e0a3
e1e2089
6d4e0a3
 
 
dbbcf50
 
 
 
 
 
 
 
 
 
 
 
a808dce
dbbcf50
a808dce
dbbcf50
 
a808dce
dbbcf50
 
 
 
 
a808dce
dbbcf50
a808dce
dbbcf50
a573881
dbbcf50
 
 
a808dce
 
dbbcf50
 
 
 
 
a808dce
dbbcf50
 
a808dce
dbbcf50
f3a5662
6d4e0a3
dbbcf50
a808dce
 
dbbcf50
 
 
 
 
 
237bccb
dbbcf50
 
3df9eeb
dbbcf50
811c7ec
6a0ec6a
 
dbbcf50
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import pandas as pd
from io import StringIO
from smolagents import CodeAgent, HfApiModel

# Initialize the AI agent
agent = CodeAgent(
    tools=[],
    model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
)

def process_text(content):
    """Handle text processing without database dependency"""
    # Get CSV conversion from AI
    csv_output = agent.run(f"Convert to CSV:\n{content}\nReturn ONLY valid CSV:")
    
    # Process CSV data
    try:
        df = pd.read_csv(StringIO(csv_output), keep_default_na=False)
        return df.head(10), csv_output
    except Exception as e:
        return pd.DataFrame(), f"Error processing data: {str(e)}"

def analyze_content(full_text):
    """Analyze text content for reporting"""
    analysis_prompt = f"""
    Analyze this text and generate a structured report:
    {full_text[:5000]}
    
    Include:
    1. Key themes/topics
    2. Important entities
    3. Summary statistics
    4. Recommendations/insights
    
    Use markdown formatting with headers.
    """
    return agent.run(analysis_prompt)

def handle_upload(*files):
    """Process uploaded files"""
    all_dfs = []
    full_text = ""
    
    for file in files:
        content = file.read().decode()
        df, _ = process_text(content)
        all_dfs.append(df)
        full_text += f"\n\n--- {file.name} ---\n{content}"
    
    combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
    report = analyze_content(full_text) if full_text else "No content to analyze"
    
    return combined_df, report

with gr.Blocks() as demo:
    gr.Markdown("# Document Analysis System")
    
    with gr.Row():
        file_input = gr.File(file_count="multiple", file_types=[".txt"])
        upload_btn = gr.Button("Process Files", variant="primary")
    
    with gr.Row():
        data_output = gr.Dataframe(label="Structured Data Preview")
        report_output = gr.Markdown(label="Analysis Report")

    upload_btn.click(
        handle_upload,
        inputs=file_input,
        outputs=[data_output, report_output]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )