# -*- coding: utf-8 -*- __company__ = '' __project__ = 'Observatory News - Final Project' __author__ = 'Strauss' __intial__ = '2023-08-07' import sys import os import pandas as pd import streamlit as st from database.data import Data from utils.graphs import Altair from database.data import Data import glob from datasets import load_dataset sys.path.append(os.path.dirname(os.getcwd())) ## Label Metrics Size st.markdown("""""", unsafe_allow_html=True,) data = Data() def load_ds(media): dir = '{0}/{1}'.format(data.pth_data, media) st.info(dir) files = glob.glob(dir + '/*.csv') df_list = (pd.read_csv(file) for file in files) st.info(files) df = pd.concat(df_list, ignore_index=True) st.info('Loaded {0} rows and {1} columns'.format(df.shape[0], df.shape[1])) return df def load_hugging_face_ds(media): # If the dataset is gated/private, make sure you have run huggingface-cli login dataset = load_dataset("strauss-oak/observatory-brazilian-news") dir = '{0}{1}/202101.csv'.format(data.pth_data, media) st.info(dir) dataset = load_dataset('csv', data_files=[dir], delimiter=',') return dataset c1 = st.container() with c1: #col1, col2 = st.columns(2) #df_news = load_ds('jpan') #st.dataframe(df_news) #df_news = load_ds('globo') #st.dataframe(df_news) #ds = load_hugging_face_ds('jpan') #st.dataframe(ds) #df = load_ds('jpan') #st.dataframe(df) #dataset = load_dataset("strauss-oak/observatory-brazilian-news") #df = pd.DataFrame.from_dict(dataset["train"]) #st.dataframe(df) from huggingface_hub import login access_token = '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' login(access_token) dataset = load_dataset("strauss-oak/observatory-brazilian-news", token=True) st.dataframe(dataset) df = pd.DataFrame.from_dict(dataset["train"]) st.dataframe(df)