Upload 6 files
Browse files- bank_statement.csv +12 -0
- pages/1_a.py +66 -0
- pages/2_hotdog.py +19 -0
- pages/3_hf_tutorial.py +84 -0
- pages/4_all.py +36 -0
- pages/5_Uber_from_doc.py +36 -0
bank_statement.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Date,Description,Amount (USD)
|
2 |
+
2024-01-01,Walmart,35.20
|
3 |
+
2024-01-02,Shell Gas Station,45.00
|
4 |
+
2024-01-03,Netflix,15.99
|
5 |
+
2024-01-04,Starbucks,4.50
|
6 |
+
2024-01-05,Verizon Wireless,60.00
|
7 |
+
2024-01-06,Target,85.00
|
8 |
+
2024-01-07,Whole Foods,70.30
|
9 |
+
2024-01-08,AMC Theatres,25.00
|
10 |
+
2024-01-09,Spotify,9.99
|
11 |
+
2024-01-10,Chick-fil-A,18.40
|
12 |
+
2024-01-11,ExxonMobil,50.00
|
pages/1_a.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain.llms.openai import OpenAI
|
3 |
+
from langchain.llms.bedrock import Bedrock
|
4 |
+
from langchain_google_genai import GoogleGenerativeAI
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from langchain.chains import SimpleSequentialChain
|
7 |
+
from langchain.chains import LLMChain
|
8 |
+
# Pixegami
|
9 |
+
|
10 |
+
PROMPT_TEMPLATE_TEXT = """
|
11 |
+
Generate a table in CSV format from the following bank statement data.
|
12 |
+
|
13 |
+
Add a column called "Category" and populate it with one of the following values:
|
14 |
+
[Groceries, Transport, Entertainment, Shopping, Utilities, Eating Out, Unknown]
|
15 |
+
|
16 |
+
{statement}
|
17 |
+
"""
|
18 |
+
|
19 |
+
CODING_TEMPLATE_TEXT = """
|
20 |
+
First, hard-code this data as a Python variable called 'items', with the category name and value.
|
21 |
+
Then write a Python script to sum this data by 'Category' and print the results.
|
22 |
+
|
23 |
+
{categorized_transactions}
|
24 |
+
"""
|
25 |
+
|
26 |
+
|
27 |
+
# OpenAI (GPT-3.5) LLM
|
28 |
+
llm_open_ai = OpenAI(max_tokens=1024)
|
29 |
+
|
30 |
+
# AWS Bedrock LLM
|
31 |
+
BEDROCK_CLAUDE_MODEL = "anthropic.claude-v2"
|
32 |
+
BEDROCK_LLAMA_MODEL = "meta.llama2-70b-chat-v1"
|
33 |
+
llm_bedrock = Bedrock(
|
34 |
+
credentials_profile_name="default",
|
35 |
+
model_id=BEDROCK_CLAUDE_MODEL,
|
36 |
+
model_kwargs={"max_tokens_to_sample": 1024},
|
37 |
+
)
|
38 |
+
|
39 |
+
# Google Gemini LLM
|
40 |
+
llm_gemini = GoogleGenerativeAI(
|
41 |
+
model="gemini-pro",
|
42 |
+
max_output_tokens=1024,
|
43 |
+
google_api_key=os.environ["GOOGLE_AI_API_KEY"],
|
44 |
+
)
|
45 |
+
|
46 |
+
llm = llm_gemini # Or llm_bedrock or llm_open_ai.
|
47 |
+
|
48 |
+
# Create the individual prompt templates.
|
49 |
+
categorization_template = PromptTemplate.from_template(PROMPT_TEMPLATE_TEXT)
|
50 |
+
coding_template = PromptTemplate.from_template(CODING_TEMPLATE_TEXT)
|
51 |
+
|
52 |
+
# Create the chains.
|
53 |
+
categorization_chain = LLMChain(llm=llm, prompt=categorization_template)
|
54 |
+
coding_chain = LLMChain(llm=llm, prompt=coding_template)
|
55 |
+
|
56 |
+
# Join them into a sequential chain.
|
57 |
+
overall_chain = SimpleSequentialChain(
|
58 |
+
chains=[categorization_chain, coding_chain], verbose=True
|
59 |
+
)
|
60 |
+
|
61 |
+
# Load the bank statement data.
|
62 |
+
with open("bank_statement.csv", "r") as f:
|
63 |
+
bank_statement_data = f.read()
|
64 |
+
|
65 |
+
# Run the chain using the bank statement data as input.
|
66 |
+
overall_chain.run(bank_statement_data)
|
pages/2_hotdog.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
|
6 |
+
st.header(':red[2]',divider='violet')
|
7 |
+
|
8 |
+
st.subheader('Hotdog or Not Hotdog?')
|
9 |
+
pipeline = pipeline(task='image-classification', model='julien-c/hotdog-not-hotdog')
|
10 |
+
file_name = st.file_uploader("Upload a hotdog candidate image")
|
11 |
+
|
12 |
+
if file_name is not None:
|
13 |
+
col1, col2 = st.columns(2)
|
14 |
+
image = Image.open(file_name)
|
15 |
+
col1.image(image, use_column_width=True)
|
16 |
+
predictions = pipeline(image)
|
17 |
+
col2.header("Probabilities")
|
18 |
+
for p in predictions:
|
19 |
+
col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
|
pages/3_hf_tutorial.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from transformers import pipeline
|
5 |
+
import time
|
6 |
+
import html
|
7 |
+
|
8 |
+
st.title('3 - *HuggingFace* :blue[Tutorial]')
|
9 |
+
|
10 |
+
def slowly_display_text(text, delay=0.05):
|
11 |
+
# Define the CSS for the text container
|
12 |
+
css = """
|
13 |
+
<style>
|
14 |
+
.text-container {
|
15 |
+
width: 80%;
|
16 |
+
max-width: 600px;
|
17 |
+
white-space: pre-wrap; /* Ensure text wraps */
|
18 |
+
word-wrap: break-word; /* Ensure long words wrap */
|
19 |
+
font-family: 'Courier New', Courier, monospace;
|
20 |
+
font-size: 1.1em;
|
21 |
+
line-height: 1.5;
|
22 |
+
}
|
23 |
+
</style>
|
24 |
+
"""
|
25 |
+
|
26 |
+
# Create a placeholder for the text
|
27 |
+
placeholder = st.empty()
|
28 |
+
displayed_text = ""
|
29 |
+
|
30 |
+
# Iterate over each character and update the text incrementally
|
31 |
+
for char in text:
|
32 |
+
displayed_text += html.escape(char) # Escape HTML special characters
|
33 |
+
# Replace newlines with <br> tags to handle empty lines correctly
|
34 |
+
formatted_text = displayed_text.replace("\n", "<br>")
|
35 |
+
placeholder.markdown(css + f'<div class="text-container">{formatted_text}</div>', unsafe_allow_html=True)
|
36 |
+
time.sleep(delay)
|
37 |
+
|
38 |
+
######################################################
|
39 |
+
st.subheader('Pipe1 :- Sentiment Analysis',divider='orange')
|
40 |
+
|
41 |
+
if st.checkbox(label='Show Pipe1'):
|
42 |
+
classifier = pipeline('sentiment-analysis')
|
43 |
+
|
44 |
+
x = st.text_input(label='Enter text', value="I've been waiting for a huggingface course my whoole life.")
|
45 |
+
res = classifier(x)
|
46 |
+
# st.markdown(body=f"*Prediction*: :green-background[{res[0]['label']}]")
|
47 |
+
# st.markdown(f"*Score*: :green-background[{res[0]['score']}]")
|
48 |
+
col1, col2 = st.columns(2)
|
49 |
+
col1.metric(label='Prediction', value=res[0]['label'])
|
50 |
+
col2.metric(label='Score', value=res[0]['score'])
|
51 |
+
st.write(res)
|
52 |
+
|
53 |
+
######################################################
|
54 |
+
st.subheader('Pipe2 :- Text Generation',divider='orange')
|
55 |
+
|
56 |
+
if st.checkbox(label='Show Pipe2'):
|
57 |
+
generator = pipeline('text-generation', model='distilgpt2')
|
58 |
+
sentence = "In this course we'll teach you how to"
|
59 |
+
res2 = generator(
|
60 |
+
sentence,
|
61 |
+
max_length = 30,
|
62 |
+
)
|
63 |
+
x = st.text_input(label='Enter text', value="In this course we'll teach you how to")
|
64 |
+
res2 = generator(x,max_length=70)
|
65 |
+
st.write("Generated text is:")
|
66 |
+
slowly_display_text(res2[0]['generated_text'])
|
67 |
+
st.write(res2)
|
68 |
+
|
69 |
+
|
70 |
+
######################################################
|
71 |
+
st.subheader('Pipe3 :- Zero-shot classification', divider='orange')
|
72 |
+
|
73 |
+
if st.checkbox(label='Show Pipe3'):
|
74 |
+
clf2 = pipeline(
|
75 |
+
task='zero-shot-classification',
|
76 |
+
model = 'distilbert/distilbert-base-uncased-finetuned-sst-2-english',
|
77 |
+
framework='pt'
|
78 |
+
)
|
79 |
+
x = st.text_input(label='Enter text', value="This is a course about python list comprehension")
|
80 |
+
res3 = clf2(
|
81 |
+
x,
|
82 |
+
candidate_labels = ['education', 'politics', 'business']
|
83 |
+
)
|
84 |
+
st.write(res3)
|
pages/4_all.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
|
6 |
+
st.header(body=':red[4]', divider='violet')
|
7 |
+
st.sidebar.markdown('Page 4 :snowflake:')
|
8 |
+
if st.checkbox(label='Show DataFrame'):
|
9 |
+
chart_data = pd.DataFrame(
|
10 |
+
np.random.randn(20,3),
|
11 |
+
columns=['a','b','c']
|
12 |
+
)
|
13 |
+
chart_data
|
14 |
+
|
15 |
+
if st.checkbox(label='Show time progess bar'):
|
16 |
+
'Starting a long computation'
|
17 |
+
|
18 |
+
# add a placeholder
|
19 |
+
latest_iteration = st.empty()
|
20 |
+
bar = st.progress(0)
|
21 |
+
|
22 |
+
for i in range(100):
|
23 |
+
# Update the progress bar with each iteration
|
24 |
+
latest_iteration.text(f"Iteration {i+1}")
|
25 |
+
bar.progress(i + 1)
|
26 |
+
time.sleep(0.2)
|
27 |
+
|
28 |
+
'.. and now we\'re done!'
|
29 |
+
|
30 |
+
if "counter" not in st.session_state:
|
31 |
+
st.session_state.counter = 0
|
32 |
+
|
33 |
+
st.session_state.counter += 1
|
34 |
+
|
35 |
+
st.write(f"This page has run {st.session_state.counter} times.")
|
36 |
+
st.button("Run it again")
|
pages/5_Uber_from_doc.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
st.title('Uber pickups in NYC')
|
6 |
+
|
7 |
+
DATE_COLUMN = 'date/time'
|
8 |
+
DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
|
9 |
+
'streamlit-demo-data/uber-raw-data-sep14.csv.gz')
|
10 |
+
|
11 |
+
@st.cache_data
|
12 |
+
def load_data(nrows):
|
13 |
+
data = pd.read_csv(DATA_URL, nrows=nrows)
|
14 |
+
lowercase = lambda x: str(x).lower()
|
15 |
+
data.rename(lowercase, axis='columns', inplace=True)
|
16 |
+
data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
|
17 |
+
return data
|
18 |
+
|
19 |
+
data_load_state = st.text('Loading data...')
|
20 |
+
data = load_data(10000)
|
21 |
+
data_load_state.text("Done! (using st.cache_data)")
|
22 |
+
|
23 |
+
if st.checkbox('Show raw data'):
|
24 |
+
st.subheader('Raw data')
|
25 |
+
st.write(data)
|
26 |
+
|
27 |
+
st.subheader('Number of pickups by hour')
|
28 |
+
hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
|
29 |
+
st.bar_chart(hist_values)
|
30 |
+
|
31 |
+
# Some number in the range 0-23
|
32 |
+
hour_to_filter = st.slider('hour', 0, 23, 17)
|
33 |
+
filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
|
34 |
+
|
35 |
+
st.subheader('Map of all pickups at %s:00' % hour_to_filter)
|
36 |
+
st.map(filtered_data)
|