File size: 4,696 Bytes
513882d
 
 
 
 
 
 
 
 
0b3ce2d
513882d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8765c2a
 
513882d
0b3ce2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513882d
8765c2a
513882d
8765c2a
513882d
 
8765c2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513882d
 
 
 
 
2623a01
513882d
 
 
 
 
 
 
 
 
 
 
 
0b3ce2d
 
 
 
 
 
 
 
 
513882d
0b3ce2d
513882d
 
 
8765c2a
513882d
 
 
 
 
 
 
8765c2a
 
 
 
513882d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b3ce2d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import pandas as pd
import re
from openai import OpenAI
import concurrent.futures
import json
import os

def extract_and_parse_json_from_markdown(markdown_text: str) -> dict:
    code_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
    code_block_match = re.search(code_block_pattern, markdown_text)
    
    if code_block_match:
        json_str = code_block_match.group(1).strip()
    else:
        json_str = markdown_text.strip()
    
    try:
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON format: {e}")

def process_event(event):
    openai = OpenAI(
        api_key=os.environ.get('DEEP_API_KEY'),
        base_url="https://api.deepinfra.com/v1/openai",
    )

    llm_prompt = f""" 
    You are a digital marketing campaign analyst designed to analyze and report digital marketing campaign data for Rod Wave concerts. Your job is to convert the given text into JSON.
    Don't make any assumptions; if a value doesn't exist, consider it as zero.
    {{
    "market": "str",
    "total_spend": "float",
    "impressions": "float",
    "clicks": "float",
    "metrics_cpc": "float",
    "metrics_cpm": "float",
    "metrics_ctr": "float",
    "metrics_cpa": "float",
    "platform_spend_meta_total": "float",
    "platform_spend_meta_instagram": "float",
    "platform_spend_meta_facebook": "float",
    "platform_spend_google_total": "float",
    "platform_spend_google_youtube": "float",
    "platform_spend_google_search_display": "float",
    "platform_spend_programmatic": "float",
    "revenue_average_ticket_price": "float",
    "revenue_total_revenue": "float",
    "revenue_roi": "float"
    }}
    Here is the text for it:
    {event}
    Return in only JSON adhering to the above schema.
    """
    
    # Attempt to process the event and validate JSON
    for attempt in range(2):  # Try twice
        chat_completion = openai.chat.completions.create(
            model="Qwen/Qwen2.5-Coder-32B-Instruct",
            messages=[{"role": "user", "content": llm_prompt}],
        )
        json_output = chat_completion.choices[0].message.content
        
        try:
            return extract_and_parse_json_from_markdown(json_output)
        except ValueError:
            if attempt == 0:
                st.warning("JSON validation failed, retrying...")
            else:
                st.error("Failed to validate JSON after retrying.")
                return None  # Return None if it fails after retrying

def process_all_events(events):
    json_all = []
    progress_bar = st.progress(0)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
        futures = [executor.submit(process_event, event) for event in events]
        
        for i, future in enumerate(concurrent.futures.as_completed(futures)):
            progress = (i + 1) / len(events)
            progress_bar.progress(progress)
            json_all.append(future.result())
    
    return json_all

def main():
    st.title("Rod Wave Concert Marketing Data Processor")
    
    input_method = st.radio("Choose input method:", ["Text Area", "File Upload"])
    
    text = None
    if input_method == "Text Area":
        text = st.text_area("Enter concert marketing data:", height=300)
    else:
        uploaded_file = st.file_uploader("Choose a text file", type="txt")
        if uploaded_file is not None:
            text = uploaded_file.read().decode("utf-8")
    
    if text:
        events = re.split(r'\n(?=Rod Wave Concert)', text)
        events = [event for event in events if event.strip()]
        
        st.write(f"Found **{len(events)}** events to process")
        
        if st.button("Process Data"):
            with st.spinner("Processing events..."):
                json_all = process_all_events(events)
                
                json_sanity = []
                for ele in json_all:
                    if ele is not None:  # Only process valid JSON
                        json_sanity.append(ele)
                        st.subheader("Processed JSON for Event")
                        st.json(ele)
                
                df = pd.DataFrame(json_sanity)
                
                st.success("Processing complete!")
                st.write("Preview of processed data:")
                st.dataframe(df.head())
                
                csv = df.to_csv(index=False)
                st.download_button(
                    label="Download CSV",
                    data=csv,
                    file_name="processed_concert_data.csv",
                    mime="text/csv"
                )

if __name__ == "__main__":
    main()