gskdsrikrishna commited on
Commit
b5ab290
·
verified ·
1 Parent(s): 6559825

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -15
app.py CHANGED
@@ -1,21 +1,54 @@
1
  import gradio as gr
2
- from diffusers import StableDiffusionPipeline
3
  import torch
 
 
 
 
 
 
4
 
5
- # Load the Stable Diffusion model from Hugging Face's diffusers library
6
- pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4-original", torch_dtype=torch.float16)
7
- pipe.to("cuda")
 
8
 
9
- def generate_image(prompt):
10
- # Generate the image based on the text prompt
11
- image = pipe(prompt).images[0]
12
- return image
13
 
14
- # Create Gradio interface
15
- interface = gr.Interface(fn=generate_image,
16
- inputs="text",
17
- outputs="image",
18
- title="Text to Image Generator",
19
- description="Generate images from text using Stable Diffusion.")
20
 
21
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import CLIPProcessor, CLIPModel
4
+ from torch import nn
5
+ import numpy as np
6
+ import PIL
7
+ from PIL import Image
8
+ from torchvision import transforms
9
 
10
+ # Load CLIP model and processor
11
+ model_name = "openai/clip-vit-base-patch16"
12
+ clip_model = CLIPModel.from_pretrained(model_name)
13
+ clip_processor = CLIPProcessor.from_pretrained(model_name)
14
 
15
+ # Generate a random noise tensor (this will be transformed into an image)
16
+ def generate_image_from_text(text_input):
17
+ # Preprocess the input text for CLIP model
18
+ inputs = clip_processor(text=text_input, return_tensors="pt", padding=True)
19
 
20
+ # Extract image-text features using CLIP
21
+ text_features = clip_model.get_text_features(**inputs)
 
 
 
 
22
 
23
+ # Create a simple GAN-like generator using a random noise tensor
24
+ class SimpleGenerator(nn.Module):
25
+ def __init__(self):
26
+ super(SimpleGenerator, self).__init__()
27
+ self.fc = nn.Linear(512, 256*256*3) # Adjust output size to match image dimensions
28
+ self.relu = nn.ReLU()
29
+
30
+ def forward(self, z):
31
+ x = self.fc(z)
32
+ x = self.relu(x)
33
+ x = x.view(-1, 3, 256, 256) # Reshape to match image shape
34
+ return x
35
+
36
+ # Initialize the generator
37
+ generator = SimpleGenerator()
38
+
39
+ # Generate random noise based on the text features
40
+ random_input = torch.randn(1, 512) # Matching CLIP output size (text_features shape)
41
+ generated_image_tensor = generator(random_input)
42
+
43
+ # Convert generated image tensor to PIL Image
44
+ generated_image = generated_image_tensor.squeeze().permute(1, 2, 0).detach().numpy()
45
+ generated_image = np.clip(generated_image, 0, 1) # Normalize pixel values
46
+ generated_image = (generated_image * 255).astype(np.uint8)
47
+ generated_image = Image.fromarray(generated_image)
48
+
49
+ return generated_image
50
+
51
+ # Gradio interface
52
+ iface = gr.Interface(fn=generate_image_from_text, inputs="text", outputs="image", live=True)
53
+
54
+ iface.launch()