Spaces:
Sleeping
Sleeping
File size: 3,506 Bytes
cf49c4b 801ca00 33da63d 1269470 cf49c4b fdd85c1 27c643e 801ca00 fdd85c1 a1ff6d2 cf49c4b 5d7c9cd cf49c4b 801ca00 fdd85c1 5614a83 313d70a 2997b62 9f9ad3d 31b43e4 27c643e 78384ac 8392bfa ba6a087 d2f7a34 33da63d 27c643e 33da63d 2997b62 fdd85c1 abc4ec6 e8f96e7 abc4ec6 8fb404e c3f5319 cf49c4b b3fa9dd e02c7a0 8fb404e c3f5319 313d70a b3fa9dd c3f5319 69c8b67 a1fdb32 aa12349 a1fdb32 d20b560 a1fdb32 e093a97 a1fdb32 b35d9ab ba6a087 7ea039d abc4ec6 7ea039d abc4ec6 2abedae e02c7a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import torch
import os
import random
import gradio as gr
from TTS.api import TTS
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np
import spaces
import time
@spaces.GPU
def guessanImage(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU
def guessanAge(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
@spaces.GPU(duration=120)
def text2speech(text, no, sample):
device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["COQUI_TOS_AGREED"] = "1"
if sample is None:
sample = "sampleaudio/abraham.wav"
if len(text) > 0:
epoch_time = str(nt(time.time()))
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
wav = tts.tts_to_file(text=text, file_path="output-"+epoch_time+".wav", speaker_wav=sample, language="en").to(device)
return wav
@spaces.GPU
def ImageGenFromText(text, model):
api_key = os.getenv("fluxauth")
login(token=api_key)
if len(text) > 0:
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
seed = random.randint(0, MAX_SEED)
pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt = text,
width = 512,
height = 512,
num_inference_steps = 4,
generator = generator,
guidance_scale=0.0
).images[0]
print(image)
return image
radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
fn=guessanImage,
inputs=[radio1, gr.Image(type="pil")],
outputs=["text"],
)
radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
fn=guessanAge,
inputs=[radio2, gr.Image(type="pil")],
outputs=["text"],
)
textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
sampletext = gr.HTML("""
<span>If you do not sample your voice my voice will be used as input:<span>
<audio controls autoplay>
<source src="https://huggingface.co/spaces/Abrahamau/gradiotest/resolve/main/sampleaudio/abraham.wav" type="audio/wav">
Your browser does not support the audio element.
</audio>
""")
micinput = gr.Audio(sources=['microphone'], type="filepath", format="wav")
tab3 = gr.Interface(
fn=text2speech,
inputs=[textbox, sampletext, micinput],
outputs=["audio"],
)
radio4 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
fn=ImageGenFromText,
inputs=["text", radio4],
outputs=["image"],
)
demo = gr.TabbedInterface([tab1, tab2, tab3, tab4], ["Describe", "Estimage Age", "Speak", "Generate Image"])
demo.launch()
|