File size: 2,641 Bytes
ca8ef7d
 
 
 
 
ef08977
ca8ef7d
 
 
 
 
ef08977
ca8ef7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ab0aad
ca8ef7d
9ab0aad
ca8ef7d
a400f6e
ca8ef7d
 
dddb5bd
ca8ef7d
 
9ab0aad
ca8ef7d
 
dddb5bd
ca8ef7d
 
dddb5bd
ca8ef7d
 
dddb5bd
ca8ef7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import json
import gradio as gr
from smolagents import CodeAgent, HfApiModel
import pandas as pd
from io import StringIO

agent = CodeAgent(
    tools=[],
    model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
    additional_authorized_imports=['json']
)

def parse_text_content(content):
    """Universal text parser with multiple pattern handling"""
    lines = content.split('\n')
    csv_lines = ["id,description"]
    
    for line in lines:
        line = line.strip()
        if not line:
            continue
            
        # Handle ID-description pattern
        if ' ' in line and len(line.split()) > 1:
            parts = line.split(' ', 1)
            clean_desc = parts[1].replace('(edited)', '').strip()
            csv_lines.append(f"{parts[0]},{clean_desc}")
            
        # Handle separator pattern
        elif '!' in line:
            csv_lines.append(line.replace('!', ',', 1))
            
        # Handle single-value lines
        else:
            csv_lines.append(f"{line},")
    
    return '\n'.join(csv_lines)

def process_file(file_path):
    """Process individual files with error handling"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Generate CSV
        csv_data = parse_text_content(content)
        
        # Convert to DataFrame
        df = pd.read_csv(StringIO(csv_data), keep_default_na=False)
        
        return True, df, csv_data
    
    except Exception as e:
        return False, pd.DataFrame(), str(e)

def handle_upload(files):
    """Process multiple files"""
    all_dfs = []
    full_content = ""
    
    for file in files:
        success, df, content = process_file(file)
        if success:
            all_dfs.append(df)
            full_content += f"\n\n--- {file.name} ---\n{content}"
    
    combined_df = pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()
    
    # Generate analysis
    analysis = analyze_content(full_content)
    
    return combined_df, analysis

with gr.Blocks() as demo:
    gr.Markdown("# Advanced Text Analyzer")
    
    with gr.Row():
        file_input = gr.File(
            file_count="multiple",
            file_types=[".txt"],
            label="Upload Documents"
        )
        submit_btn = gr.Button("Analyze")
    
    with gr.Row():
        data_output = gr.Dataframe(label="Structured Data")
        json_output = gr.JSON(label="Content Analysis")

    submit_btn.click(
        handle_upload,
        inputs=file_input,
        outputs=[data_output, json_output]
    )

if __name__ == "__main__":
    demo.launch()