loubnabnl's picture
loubnabnl HF Staff
Update app.py
bbfd188
raw
history blame
1.09 kB
import streamlit as st
from datasets import load_dataset
import os
HF_TOKEN = os.environ.get("HF_TOKEN", None)
st.set_page_config(page_title="Synthetic textbooks inspection", layout="wide")
st.title("Synthetic textbooks inspection")
st.markdown("Inspection of synthetic textbooks generated by `Falcon-180B-chat`")
@st.cache_data()
def load_data(source="all"):
ds = load_dataset("HuggingFaceTB/synthetic_textbooks_subset", split="train", use_auth_token=HF_TOKEN)
if source != "all":
ds = ds.filter(lambda x: x["source"] == source)
return ds
source = st.selectbox("Data source", ['all', 'wikihow','khan_academy', 'stanford_cources', 'rw_wikihow', 'rw_stanford'])
samples = load_data(source)
n_samples = len(samples)
index_example = st.number_input(f"Index of the sample (out of {n_samples}):", min_value=0, max_value=n_samples-1, value=0, step=1)
st.markdown(f"Example belongs to source: {samples[index_example]['source']}")
st.subheader("Prompt")
st.markdown(samples[index_example]["prompt"])
st.subheader("Textbook")
st.markdown(samples[index_example]['textbook'])