test / app.py
ngrigg's picture
Fix processing to handle subset of descriptions and match DataFrame length
cfa4436
raw
history blame
1.6 kB
import streamlit as st
import pandas as pd
import asyncio
from llama_models import process_text
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
async def process_csv(file):
df = pd.read_csv(file, header=None) # Read the CSV file without a header
descriptions = df[0].tolist() # Access the first column directly
SAMPLE_SIZE = min(5, len(descriptions)) # Adjust sample size as needed
descriptions_subset = descriptions[:SAMPLE_SIZE]
model_name = "instruction-pretrain/finance-Llama3-8B" # or any other model you want to use
results = []
for desc in descriptions_subset:
result = await process_text(model_name, desc)
results.append(result)
# Fill the rest of the results with empty strings to match the length of the DataFrame
results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
df['predictions'] = results
return df
st.title("Finance Model Deployment")
st.write("""
### Upload a CSV file with company descriptions to extract key products, geographies, and important keywords:
""")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
if st.button("Predict"):
with st.spinner("Processing..."):
df = asyncio.run(process_csv(uploaded_file))
st.write(df)
st.download_button(
label="Download Predictions as CSV",
data=df.to_csv(index=False).encode('utf-8'),
file_name='predictions.csv',
mime='text/csv'
)