Spaces:

HamidBekam
/

Test_LLM

Runtime error

App Files Files Community

HamidBekam commited on Mar 6, 2023

Commit

5ac59b2

1 Parent(s): 5146ca2

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -90

app.py CHANGED Viewed

@@ -1,91 +1,2 @@
-import streamlit as st
-import pandas as pd
-import re
-import nltk
-from PIL import Image
-import os
-import numpy as np
-import seaborn as sns
-from wordcloud import WordCloud, STOPWORDS
-from nltk.corpus import stopwords
-import datasets
-from datasets import load_dataset
-import matplotlib.pyplot as plt
-import sklearn
-from sklearn.preprocessing import LabelEncoder
-sns.set_palette("RdBu")
-# loading dataset
-dataset = load_dataset("merve/poetry", streaming=True)
-df = pd.DataFrame.from_dict(dataset["train"])
-d = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd()
-nltk.download("stopwords")
-stop = stopwords.words('english')
-# standardizing dataset by removing special characters and lowercasing
-def standardize(text, remove_digits=True):
-    text=re.sub('[^a-zA-Z\d\s]', '',text)
-    text = text.lower()
-    return text
-st.set_option('deprecation.showPyplotGlobalUse', False)
-st.write("Poetry dataset, content column cleaned from special characters and lowercased")
-df.content = df.content.apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
-df.content=df.content.apply(standardize)
-st.dataframe(df)
-st.subheader("Visualization on dataset statistics")
-st.write("Number of poems written in each type")
-sns.catplot(x="type", data=df, kind="count")
-plt.xticks(rotation=0)
-st.pyplot()
-st.write("Number of poems for each age")
-sns.catplot(x="age", data=df, kind="count")
-plt.xticks(rotation=0)
-st.pyplot()
-st.write("Number of poems for each author")
-sns.catplot(x="author", data=df, kind="count", aspect = 4)
-plt.xticks(rotation=90)
-st.pyplot()
-# distributions of poem types according to ages and authors
-st.write("Distributions of poem types according to ages and authors, seems that folks in renaissance loved the love themed poems  and nature themed poems became popular later")
-le = LabelEncoder()
-df.author = le.fit_transform(df.author)
-sns.catplot(x="age", y="author",hue="type", data=df)
-st.pyplot()
-#words = df.content.str.split(expand=True).unstack().value_counts()
-# most appearing words other than stop words
-words = df.content.str.split(expand=True).unstack().value_counts()
-renaissance = df.content.loc[df.age == "Renaissance"].str.split(expand=True).unstack().value_counts()
-modern = df.content.loc[df.age == "Modern"].str.split(expand=True).unstack().value_counts()
-st.subheader("Visualizing content")
-mask = np.array(Image.open(os.path.join(d, "poet.png")))
-import matplotlib.pyplot as plt
-def word_cloud(content, title):
-    wc = WordCloud(background_color="white", max_words=200,contour_width=3,
-                  stopwords=STOPWORDS, max_font_size=50)
-    wc.generate(" ".join(content.index.values))
-    fig = plt.figure(figsize=(10, 10))
-    plt.title(title, fontsize=20)
-    plt.imshow(wc.recolor(colormap='magma', random_state=42), cmap=plt.cm.gray, interpolation = "bilinear", alpha=0.98)
-    plt.axis('off')
-    st.pyplot()
-st.subheader("Most appearing words excluding stopwords in poems according to ages")
-word_cloud(modern, "Word Cloud of Modern Poems")
-word_cloud(renaissance, "Word Cloud Renaissance Poems")
-# most appearing words including stopwords
-st.write("Most appearing words including stopwords")
-st.bar_chart(words[0:50])


1	+ gr.Interface.load("huggingface/bigscience/bloom-560m",title="Text Generator Five w/ Variables", description="Input your text, submit and the machine willoutput text.").launch()


















2