File size: 3,413 Bytes
cf49c4b
 
 
801ca00
cf49c4b
 
 
 
 
 
801ca00
a1ff6d2
cf49c4b
 
5d7c9cd
cf49c4b
801ca00
5614a83
313d70a
 
 
 
2997b62
72ec095
0ca3b80
ba6a087
71a19ea
460308e
ba6a087
72ec095
460308e
 
c70c2b2
 
71a19ea
2997b62
abc4ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fb404e
c3f5319
cf49c4b
b3fa9dd
e02c7a0
 
 
8fb404e
c3f5319
313d70a
b3fa9dd
c3f5319
 
8984851
8fb404e
f5044e0
ba6a087
 
8984851
c70c2b2
ba6a087
 
abc4ec6
 
 
 
 
 
 
 
e02c7a0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import torch
import os
import random
import gradio as gr
from transformers import pipeline
import base64
from datasets import load_dataset
from diffusers import DiffusionPipeline
from huggingface_hub import login
import numpy as np

def guessanImage(model, image):
    imgclassifier  = pipeline("image-classification", model=model)
    if image is not None:  
        description = imgclassifier(image)
    return description

def guessanAge(model, image):
    imgclassifier  = pipeline("image-classification", model=model)
    if image is not None:  
        description = imgclassifier(image)
    return description    

def text2speech(model, text, voice):
    print(voice)
    if len(text) > 0:
        synthesiser = pipeline("text-to-speech", model=model)
        
        embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
        speaker_embedding = torch.tensor(embeddings_dataset[voice]["xvector"]).unsqueeze(0)
        
        speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
        audio_data = np.frombuffer(speech["audio"], dtype=np.float32)
        audio_data_16bit = (audio_data * 32767).astype(np.int16)
        return speech["sampling_rate"], audio_data_16bit

def ImageGenFromText(text, model):
    api_key = os.getenv("fluxauthtoken")
    login(token=api_key)
    
    if len(text) > 0:
        dtype = torch.bfloat16
        device = "cuda" if torch.cuda.is_available() else "cpu"
        MAX_SEED = np.iinfo(np.int32).max
        seed = random.randint(0, MAX_SEED)
        pipe = DiffusionPipeline.from_pretrained(model, torch_dtype=dtype).to(device)
        generator = torch.Generator().manual_seed(seed)
        image = pipe(
                prompt = text, 
                width = 512,
                height = 512,
                num_inference_steps = 4, 
                generator = generator,
                guidance_scale=0.0
        ).images[0]
        print(image)
        return image


radio1 = gr.Radio(["microsoft/resnet-50", "google/vit-base-patch16-224", "apple/mobilevit-small"], value="microsoft/resnet-50", label="Select a Classifier", info="Image Classifier")
tab1 = gr.Interface(
    fn=guessanImage,
    inputs=[radio1, gr.Image(type="pil")],
    outputs=["text"],
)

radio2 = gr.Radio(["nateraw/vit-age-classifier"], value="nateraw/vit-age-classifier", label="Select an Age Classifier", info="Age Classifier")
tab2 = gr.Interface(
    fn=guessanAge,
    inputs=[radio2, gr.Image(type="pil")],
    outputs=["text"],
)
textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
radio3 = gr.Radio(["microsoft/speecht5_tts"], value="microsoft/speecht5_tts", label="Select an tts", info="Age Classifier")
radio3_1 = gr.Radio([("Scottish male (awb)", 0), ("US male (bdl)", 1138), ("US female (clb)", 2271), ("Canadian male (jmk)",3403), ("Indian male (ksp)", 4535), ("US male (rms)", 5667), ("US female (slt)", 6799)], value=4535)
tab3 = gr.Interface(
    fn=text2speech,
    inputs=[radio3, textbox, radio3_1],
    outputs=["audio"],
)

radio3 = gr.Radio(["black-forest-labs/FLUX.1-schnell"], value="black-forest-labs/FLUX.1-schnell", label="Select", info="text to image")
tab4 = gr.Interface(
    fn=ImageGenFromText,
    inputs=["text", "model"],
    outputs=["image"],
)

demo = gr.TabbedInterface([tab1, tab2, tab3, tab4], ["tab1", "tab2", "tab3", "tab4"])
demo.launch()