Spaces:
Sleeping
Sleeping
File size: 3,413 Bytes
cf49c4b 801ca00 cf49c4b 801ca00 a1ff6d2 cf49c4b 5d7c9cd cf49c4b 801ca00 5614a83 313d70a 2997b62 72ec095 0ca3b80 ba6a087 71a19ea 460308e ba6a087 72ec095 460308e c70c2b2 71a19ea 2997b62 abc4ec6 8fb404e c3f5319 cf49c4b b3fa9dd e02c7a0 8fb404e c3f5319 313d70a b3fa9dd c3f5319 8984851 8fb404e f5044e0 ba6a087 8984851 c70c2b2 ba6a087 abc4ec6 e02c7a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import torch
import os
import random
import gradio as gr
from transformers import pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np
def guessanImage(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
def guessanAge(model, image):
imgclassifier = pipeline("image-classification", model=model)
if image is not None:
description = imgclassifier(image)
return description
def text2speech(model, text, voice):
print(voice)
if len(text) > 0:
synthesiser = pipeline("text-to-speech", model=model)
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[voice]["xvector"]).unsqueeze(0)
speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
audio_data = np.frombuffer(speech["audio"], dtype=np.float32)
audio_data_16bit = (audio_data * 32767).astype(np.int16)
return speech["sampling_rate"], audio_data_16bit
def ImageGenFromText(text, model):
api_key = os.getenv("fluxauthtoken")
login(token=api_key)
if len(text) > 0:
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
seed = random.randint(0, MAX_SEED)
pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt = text,
width = 512,
height = 512,
num_inference_steps = 4,
generator = generator,
guidance_scale=0.0
).images[0]
print(image)
return image
radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
fn=guessanImage,
inputs=[radio1, gr.Image(type="pil")],
outputs=["text"],
)
radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
fn=guessanAge,
inputs=[radio2, gr.Image(type="pil")],
outputs=["text"],
)
textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
radio3 = gr.Radio(["microsoft/speecht5_tts"], value="microsoft/speecht5_tts", label="Select an tts", info="Age Classifier")
radio3_1 = gr.Radio([("Scottish male (awb)", 0), ("US male (bdl)", 1138), ("US female (clb)", 2271), ("Canadian male (jmk)",3403), ("Indian male (ksp)", 4535), ("US male (rms)", 5667), ("US female (slt)", 6799)], value=4535)
tab3 = gr.Interface(
fn=text2speech,
inputs=[radio3, textbox, radio3_1],
outputs=["audio"],
)
radio3 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
fn=ImageGenFromText,
inputs=["text", "model"],
outputs=["image"],
)
demo = gr.TabbedInterface([tab1, tab2, tab3, tab4], ["tab1", "tab2", "tab3", "tab4"])
demo.launch()
|