gradiotest / app.py
Abrahamau's picture
Update app.py
2c1af14 verified
raw
history blame
4.28 kB
import torch
import os
import random
import gradio as gr
from TTS.api import TTS
from transformers import pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np
import spaces
import time
@spaces.GPU
def guessanImage(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU
def guessanAge(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU(duration=120)
def text2speech(text, no0, sample):
device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["COQUI_TOS_AGREED"] = "1"
if sample is None:
sample = "sampleaudio/abraham.wav"
if len(text) > 0:
epoch_time = str(int(time.time()))
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
wav = tts.tts_to_file(text=text, file_path="output-"+epoch_time+".wav", speaker_wav=sample, language="en")
return wav
@spaces.GPU
def ImageGenFromText(text, model):
api_key = os.getenv("fluxauth")
login(token=api_key)
if len(text) > 0:
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
seed = random.randint(0, MAX_SEED)
pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt = text,
width = 512,
height = 512,
num_inference_steps = 4,
generator = generator,
guidance_scale=0.0
).images[0]
print(image)
return image
@spaces.GPU
def RunLegalModel(text, model):
pettyfogger = pipeline("text-generation", model=model)
if text is not None:
shoddyadvice = pettyfogger(text)
print(shoddyadvice)
return shoddyadvice[0]['generated_text']
radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
fn=guessanImage,
inputs=[radio1, gr.Image(type="pil")],
outputs=["text"],
)
radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
fn=guessanAge,
inputs=[radio2, gr.Image(type="pil")],
outputs=["text"],
)
textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!", label="Type text to convert to your voice:")
sampletext = gr.HTML("""
<h3>If you do not sample your voice my voice will be used as input:<h3>
<audio controls>
<source src="https://huggingface.co/spaces/Abrahamau/gradiotest/resolve/main/sampleaudio/abraham.wav" type="audio/wav">
Your browser does not support the audio element.
</audio>
""")
micinput = gr.Audio(sources=['microphone'], type="filepath", format="wav", label="Please Provide a Sample Voice for the Model to Mimic")
outaudio = gr.Audio(show_download_button=True, show_share_button=True)
tab3 = gr.Interface(
fn=text2speech,
inputs=[textbox, sampletext, micinput],
outputs=[outaudio],
)
radio4 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
fn=ImageGenFromText,
inputs=["text", radio4],
outputs=["image"],
)
classifiertypes = ["umarbutler/open-australian-legal-llm"]
radio5 = gr.Radio(classifiertypes, value="umarbutler/open-australian-legal-llm", label="Select", info="Legal Model")
textinput5 = gr.Textbox(value="Under the purposes of Part 6 Division 2 of the Act, regulations may confer power on an applicant for")
tab5 = gr.Interface(
fn=RunLegalModel,
inputs=[textinput5, radio5],
outputs=["text"],
)
demo = gr.TabbedInterface([tab1, tab2, tab3, tab4, tab5], ["Describe", "Estimage Age", "Speak", "Generate Image", "Aus. Legal"])
demo.launch()