Spaces:

KevinSmith94624
/

Text-to-Any

Runtime error

File size: 38,214 Bytes

4ee9262

{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# pip install torch_xla -q","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T07:29:06.028570Z","iopub.execute_input":"2024-12-23T07:29:06.028897Z","iopub.status.idle":"2024-12-23T07:29:06.040164Z","shell.execute_reply.started":"2024-12-23T07:29:06.028804Z","shell.execute_reply":"2024-12-23T07:29:06.039321Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# ! nvidia-smi -L","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T07:29:06.046744Z","iopub.execute_input":"2024-12-23T07:29:06.047018Z","iopub.status.idle":"2024-12-23T07:29:06.051114Z","shell.execute_reply.started":"2024-12-23T07:29:06.046989Z","shell.execute_reply":"2024-12-23T07:29:06.050374Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"!pip install gradio diffusers gTTS together -q","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:17:36.027884Z","iopub.execute_input":"2024-12-25T00:17:36.028181Z","iopub.status.idle":"2024-12-25T00:17:50.860673Z","shell.execute_reply.started":"2024-12-25T00:17:36.028159Z","shell.execute_reply":"2024-12-25T00:17:50.859768Z"}},"outputs":[{"name":"stdout","text":"\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.8/41.8 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.2/57.2 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.4/320.4 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m82.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.6/70.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.8/94.8 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.5/73.5 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.6/78.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m450.5/450.5 kB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m131.3/131.3 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.2/11.2 MB\u001b[0m \u001b[31m106.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.2/73.2 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25h","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"# import torch_xla.core.xla_model as xm\n# tpu = xm.xla_device()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T07:29:20.075258Z","iopub.execute_input":"2024-12-23T07:29:20.075501Z","iopub.status.idle":"2024-12-23T07:29:20.078665Z","shell.execute_reply.started":"2024-12-23T07:29:20.075480Z","shell.execute_reply":"2024-12-23T07:29:20.078023Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import torch\nfrom diffusers import TextToVideoSDPipeline, DiffusionPipeline\nfrom diffusers.utils import export_to_video\nimport gradio as gr\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\nimport PIL\nfrom io import BytesIO\nfrom gtts import gTTS\nimport time\nfrom pydub import AudioSegment\nimport nltk\nfrom together import Together\nimport base64\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:17:53.042072Z","iopub.execute_input":"2024-12-25T00:17:53.042422Z","iopub.status.idle":"2024-12-25T00:18:10.158571Z","shell.execute_reply.started":"2024-12-25T00:17:53.042362Z","shell.execute_reply":"2024-12-25T00:18:10.157888Z"}},"outputs":[{"name":"stderr","text":"The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"af4ba4ed88764d96a965454ca9f53a61"}},"metadata":{}}],"execution_count":2},{"cell_type":"code","source":"# tokenizer = AutoTokenizer.from_pretrained(\"MBZUAI/LaMini-GPT-774M\")\n# model0 = AutoModelForCausalLM.from_pretrained(\"MBZUAI/LaMini-GPT-774M\")\n\ntokenizer = AutoTokenizer.from_pretrained(\"ParisNeo/LLama-3.2-3B-Lollms-Finetuned-GGUF\")\nmodel0 = AutoModelForCausalLM.from_pretrained(\"ParisNeo/LLama-3.2-3B-Lollms-Finetuned-GGUF\", ignore_mismatched_sizes=True)\n\n# tokenizer = AutoTokenizer.from_pretrained(\"gokaygokay/tiny_llama_chat_description_to_prompt\", cache_dir = '/kaggle/working')\n# model0 = AutoModelForCausalLM.from_pretrained(\"gokaygokay/tiny_llama_chat_description_to_prompt\", ignore_mismatched_sizes=True, cache_dir = '/kaggle/working')\n# model0 = AutoModelForCausalLM.from_pretrained(\"MJ199999/gpt3_model\",ignore_mismatched_sizes=True, from_tf=True)\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:18:12.707855Z","iopub.execute_input":"2024-12-25T00:18:12.708546Z","iopub.status.idle":"2024-12-25T00:21:21.315758Z","shell.execute_reply.started":"2024-12-25T00:18:12.708513Z","shell.execute_reply":"2024-12-25T00:21:21.314829Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7235efc1f1b148c3b7e26d17f2142e12"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b79e61717a4b4370b36d9eca861fcf6e"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d654e5a8f22b42f1a07005ad6b59e9a6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f34bfdcbb15b49428684c0aa736dc5af"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model.bin.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c20f6bbdae684e8bbe45670fd13e1e1f"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ebcef1a01874b588ca94e1a8850b371"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model-00001-of-00002.bin:   0%|          | 0.00/4.97G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ceedcb97ee54916811dfbf0c6769884"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"pytorch_model-00002-of-00002.bin:   0%|          | 0.00/1.46G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d224f4f9a18d42439e6ffd092666604c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6eae4086663a4eb28e3032a12d71ac22"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"60e9176e1c044beaae7bd7cbcd2abc92"}},"metadata":{}}],"execution_count":3},{"cell_type":"code","source":"device = torch.device(\"cuda:1\" if torch.cuda.is_available() else \"cpu\")\nmodel0 = model0.to(device)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:21.316973Z","iopub.execute_input":"2024-12-25T00:21:21.317277Z","iopub.status.idle":"2024-12-25T00:21:25.573305Z","shell.execute_reply.started":"2024-12-25T00:21:21.317253Z","shell.execute_reply":"2024-12-25T00:21:25.572655Z"}},"outputs":[],"execution_count":4},{"cell_type":"code","source":"from transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Initialize Chat History\ndef chat_with_llama(user_input, chat_history):\n    # Prepare formatted prompt\n    prompt = \"You are a helpful, respectful and honest general-purpose assistant.\"\n    for user_content, assist_content in chat_history:\n        prompt += f\"user: {user_content}\\n\"\n        prompt += f\"assistant: {assist_content}\\n\"\n    prompt += f\"user: {user_input}\\n'assistant:\"\n\n    # Tokenize and generate response\n    inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda:1\")\n    output = model0.generate(inputs[\"input_ids\"], max_length=4096, max_new_tokens = 1024, temperature=0.7, max_time = 10.0, repetition_penalty = 1.0)\n    response = tokenizer.decode(output[0], skip_special_tokens=True)\n\n    # Extract and append assistant's response\n    assistant_reply = response.split(\"assistant:\")[-1].split('user:')[0].strip()\n    chat_history.append((user_input, assistant_reply))\n\n    return assistant_reply, chat_history\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.574627Z","iopub.execute_input":"2024-12-25T00:21:25.574948Z","iopub.status.idle":"2024-12-25T00:21:25.580502Z","shell.execute_reply.started":"2024-12-25T00:21:25.574919Z","shell.execute_reply":"2024-12-25T00:21:25.579626Z"}},"outputs":[],"execution_count":5},{"cell_type":"code","source":"# chat_history = []\n# answer0, chat_history = chat_with_llama('Hi. My name is Smith', [])\n# print(answer0)\n# answer1, chat_history = chat_with_llama('What is my name?', chat_history)\n# print(answer1)\n# answer2, chat_history = chat_with_llama('Can you guess my wife\\'s name?', chat_history)\n# print(answer2)\n# print(chat_history)","metadata":{"trusted":true,"execution":{"execution_failed":"2024-12-23T07:33:54.513Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"api_key='ac2619935e6a25d4bae2890260822fa0379ec7d8726114ff9744a38127bf8525'\nclient = Together(api_key=api_key)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.581779Z","iopub.execute_input":"2024-12-25T00:21:25.582063Z","iopub.status.idle":"2024-12-25T00:21:25.599681Z","shell.execute_reply.started":"2024-12-25T00:21:25.582042Z","shell.execute_reply":"2024-12-25T00:21:25.598921Z"}},"outputs":[],"execution_count":6},{"cell_type":"code","source":"def chat_api(user_input, chat_history):\n    messages = []\n    for user_content, assist_content in chat_history:\n        messages += [\n            {\"role\":\"user\", \"content\":user_content},\n            {\"role\":\"assistant\", \"content\":assist_content}\n        ]\n    messages += [{\"role\":\"user\", \"content\":user_input}]\n    \n    response = client.chat.completions.create(\n        model=\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\n        messages=messages,\n    )\n    reply = response.choices[0].message.content\n    chat_history.append((user_input, reply))\n    return reply, chat_history","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.600510Z","iopub.execute_input":"2024-12-25T00:21:25.600797Z","iopub.status.idle":"2024-12-25T00:21:25.615159Z","shell.execute_reply.started":"2024-12-25T00:21:25.600769Z","shell.execute_reply":"2024-12-25T00:21:25.614457Z"}},"outputs":[],"execution_count":7},{"cell_type":"code","source":"# chat_history = []\n# answer0, chat_history = chat_api('Hi. My name is Smith', [])\n# print(answer0, '\\n-----------------------------------------\\n')\n# answer1, chat_history = chat_api('What is my name?', chat_history)\n# print(answer1, '\\n-----------------------------------------\\n')\n# answer2, chat_history = chat_api('Can you guess my wife\\'s name?', chat_history)\n\n# # Chat Example\n# print(answer2, '\\n-----------------------------------------\\n')\n# print(chat_history)","metadata":{"trusted":true,"execution":{"execution_failed":"2024-12-23T07:33:54.513Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"def tti_api(prompt, num_steps = 25, width = 512, heights = 512):\n    response = client.images.generate(\n        prompt=prompt,\n        model=\"black-forest-labs/FLUX.1-dev\",\n        width=width,\n        height=heights,\n        steps=num_steps,\n        n=1,\n        response_format=\"b64_json\"\n    )\n    \n    image_data = base64.b64decode(response.data[0].b64_json)\n    return image_data","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.615864Z","iopub.execute_input":"2024-12-25T00:21:25.616087Z","iopub.status.idle":"2024-12-25T00:21:25.629523Z","shell.execute_reply.started":"2024-12-25T00:21:25.616067Z","shell.execute_reply":"2024-12-25T00:21:25.628707Z"}},"outputs":[],"execution_count":8},{"cell_type":"code","source":"prompt = 'A nice black lexus 570 car running on the snowy road.'\nimage = tti_api(prompt, num_steps = 25)\nimage = PIL.Image.open(BytesIO(image))\nimage.save('result.png')\nimage.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:25.630369Z","iopub.execute_input":"2024-12-25T00:21:25.630605Z","iopub.status.idle":"2024-12-25T00:21:27.106671Z","shell.execute_reply.started":"2024-12-25T00:21:25.630586Z","shell.execute_reply":"2024-12-25T00:21:27.105616Z"}},"outputs":[],"execution_count":9},{"cell_type":"code","source":"def ttv(prompt, num_steps = 50):\n    # Load the text-to-video model from Hugging Face\n    model_id = \"damo-vilab/text-to-video-ms-1.7b\"  # ModelScope Text-to-Video model\n    #model_id = \"guoyww/animatediff-motion-adapter-v1-5-2\"  # ModelScope Text-to-Video \n    \n    pipe = TextToVideoSDPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant=\"fp16\")\n    pipe.to(\"cuda:0\")  # Use GPU if available\n   \n    # Generate video frames\n    print(\"Generating video... This may take some time.\")\n    with torch.no_grad():\n        video_frames = pipe(prompt, num_frames=32, height=256, width=256, num_inference_steps=num_steps).frames[0]\n    # Save the generated video\n    video_path = export_to_video(video_frames, output_video_path=\"output_video.mp4\")\n    return video_path\ntest_video = ttv('An awesome lexus 570 car running on the snowy road, high quality', num_steps = 50)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:21:27.109018Z","iopub.execute_input":"2024-12-25T00:21:27.109275Z","iopub.status.idle":"2024-12-25T00:22:58.858457Z","shell.execute_reply.started":"2024-12-25T00:21:27.109252Z","shell.execute_reply":"2024-12-25T00:22:58.857305Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"model_index.json:   0%|          | 0.00/384 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a0b27eacce0a4d7c98b7046ac0823f3b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e950f3d637f544e3bc06117c0757049a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"text_encoder/config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3d672c53342640d0825b8999a03da969"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"scheduler/scheduler_config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a64ba6a5a03f4882a63799c7cd8fca73"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"unet/config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"be96cdd3a8544e3b96e8e5e15d5ffb30"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/tokenizer_config.json:   0%|          | 0.00/755 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ceadd9718d0f44ef837ea787213019c4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.fp16.safetensors:   0%|          | 0.00/681M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5c50cc639f074f5fbc4f69cc7042ce21"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6845746769ce42a7baaacdf4122f6ddf"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"439b3f3d79434ba097a91c8890e08b55"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8a6c73d18ba34fe3b94ee026735a6cb8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vae/config.json:   0%|          | 0.00/657 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"246245042f9f4dcf8c516b1e22098b9d"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9ebd3903ed6e4b658f8bac11173bcdaa"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/2.82G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b1f7d772727a4dc99571bce356ef8f53"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"baf0f1b569fa43ea99bb417301b3a80f"}},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n  warnings.warn(\n","output_type":"stream"},{"name":"stdout","text":"Generating video... This may take some time.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"  0%|          | 0/50 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3248592582c44362aac5fa52be5853e6"}},"metadata":{}},{"name":"stderr","text":"/usr/lib/python3.10/subprocess.py:1796: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n  self.pid = _posixsubprocess.fork_exec(\n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"# Ensure the sentence tokenizer is downloaded (if not already)\nnltk.download('punkt')\n\n# Function to convert text to speech and generate SRT content\ndef tts(text):\n    # Initialize the Google TTS engine with language (e.g., 'en' for English)\n    tts = gTTS(text=text, lang='en', slow=False)\n    \n    # Save to an audio file\n    audio_path = \"output.mp3\"\n    tts.save(audio_path)\n    \n    # Load the audio file with pydub to get the duration\n    audio = AudioSegment.from_mp3(audio_path)\n    duration_ms = len(audio)  # Duration in milliseconds\n    \n    # Split the text into sentences using NLTK\n    sentences = nltk.sent_tokenize(text)\n    \n    # Estimate the duration per sentence\n    chunk_duration_ms = duration_ms // len(sentences)  # Estimated duration per sentence\n    \n    # Generate SRT content\n    srt_content = \"\"\n    start_time = 0  # Start time of the first subtitle\n    for idx, sentence in enumerate(sentences):\n        end_time = start_time + chunk_duration_ms\n        start_time_formatted = time.strftime('%H:%M:%S', time.gmtime(start_time / 1000)) + ',' + f'{start_time % 1000:03d}'\n        end_time_formatted = time.strftime('%H:%M:%S', time.gmtime(end_time / 1000)) + ',' + f'{end_time % 1000:03d}'\n        \n        srt_content += f\"{idx + 1}\\n\"\n        srt_content += f\"{start_time_formatted} --> {end_time_formatted}\\n\"\n        srt_content += f\"{sentence}\\n\\n\"\n        \n        start_time = end_time  # Update start time for the next sentence\n    \n    return audio_path, srt_content","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:22:58.860290Z","iopub.execute_input":"2024-12-25T00:22:58.860668Z","iopub.status.idle":"2024-12-25T00:22:59.006324Z","shell.execute_reply.started":"2024-12-25T00:22:58.860633Z","shell.execute_reply":"2024-12-25T00:22:59.005656Z"}},"outputs":[{"name":"stdout","text":"[nltk_data] Downloading package punkt to /usr/share/nltk_data...\n[nltk_data]   Package punkt is already up-to-date!\n","output_type":"stream"}],"execution_count":11},{"cell_type":"code","source":"def tti(prompt, num_steps = 50, width = 512, heights = 512):\n    # Load the pre-trained Stable Diffusion pipeline from Hugging Face\n    pipe = DiffusionPipeline.from_pretrained(\"stabilityai/stable-diffusion-2-1\")\n    #pipe.load_lora_weights(\"FradigmaDangerYT/dalle-e-mini\")\n    \n    # Move the pipeline to GPU (you can select the GPU with cuda:1 for the second GPU)\n    device0 = torch.device(\"cuda:0\")  # Use \"cuda:0\" for the first GPU, \"cuda:1\" for the second GPU\n    pipe.to(device0)\n    print(heights)\n    # Generate an image\n    image = pipe(prompt, num_inference_steps = num_steps, width = width, height = heights).images[0]  # Generate image from the prompt\n    return image\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:22:59.007053Z","iopub.execute_input":"2024-12-25T00:22:59.007324Z","iopub.status.idle":"2024-12-25T00:22:59.011814Z","shell.execute_reply.started":"2024-12-25T00:22:59.007302Z","shell.execute_reply":"2024-12-25T00:22:59.010754Z"}},"outputs":[],"execution_count":12},{"cell_type":"code","source":"prompt = 'A nice black lexus 570 car running on the snowy road.'\nimage = tti(prompt, num_steps = 25, width = 320, heights = 240)\n# image = PIL.Image.open(BytesIO(image))\nimage.save('result.png')\nimage.show()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:22:59.012694Z","iopub.execute_input":"2024-12-25T00:22:59.012942Z","iopub.status.idle":"2024-12-25T00:23:23.213670Z","shell.execute_reply.started":"2024-12-25T00:22:59.012911Z","shell.execute_reply":"2024-12-25T00:23:23.212714Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"model_index.json:   0%|          | 0.00/537 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"00315ece5482462094ef53e3ccc0ad69"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"624b98d12a21487293ade164658da7d4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"text_encoder/config.json:   0%|          | 0.00/633 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7e263911d57945308978e63ad218c197"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1fdce8f0fabd4f92b1a48f1fe144b4ed"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ff7371dec38c49118ac6ec9a87e38db3"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"scheduler/scheduler_config.json:   0%|          | 0.00/345 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1c11753bf15a4cea8782bea421fc5978"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"688221b8e00c48598659f4a5fa999b21"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/tokenizer_config.json:   0%|          | 0.00/824 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"96f56565644346d2b4ae667ee6cccccd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"823d1d5a1e8e4330916b66263db32ed5"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"312ef1679c8a4af7ab15f634c4b67dcd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"unet/config.json:   0%|          | 0.00/939 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6ec60a454ba347958279d82fd21955f7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"09536bea3884449c85a44a2793133c8a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f4dd6779748945309137f21c1d23bb33"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vae/config.json:   0%|          | 0.00/611 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"021d7c9db3024591b1990d3d116abcf4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3beb2daec308468c894d5ba06e9feacb"}},"metadata":{}},{"name":"stdout","text":"240\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"  0%|          | 0/25 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8a38b7b1101c4325bd50bf5974274d15"}},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"\n# If demo is on, turn off demo\ntry:\n    demo.close()\nexcept:\n    pass\n\nwith gr.Blocks() as demo:\n    gr.Markdown(\"\"\"\n    # Gradio based Text-to-Any Project\n    \"\"\")\n    with gr.Tab(label=\"Llama-Chat\"):\n        radios0 = gr.Radio(['use api', 'use loaded model'], value=\"use api\", show_label = False)\n        gptDialog = gr.Chatbot(label = \"Llama-Chat\", max_height=512, min_height=512,\n                                       autoscroll= True)\n        with gr.Row(equal_height=True):\n            prompt0 = gr.Textbox(label = 'Prompt Input', lines = 1, scale = 9, max_lines=2,\n                                autofocus=True, autoscroll=True, placeholder='Type your message here...')\n            with gr.Column(scale = 1):\n                generate_btn0 = gr.Button('generate')\n                clear_btn0 = gr.Button('clear')\n            \n    with gr.Tab(label=\"Text-to-Image/Video\"):\n        with gr.Row():\n            radios1 = gr.Radio(['use api', 'use loaded model'], value=\"use api\", show_label = False)\n            steps = gr.Slider(value = 50, minimum = 20, maximum = 100, step = 1, label = 'num_steps')\n            width = gr.Slider(value = 1024, minimum = 240, maximum = 1792, step = 16, label = 'width')\n            heights = gr.Slider(value = 512, minimum = 160, maximum = 1792, step = 16, label = 'heights')\n            \n        with gr.Row():\n            outputImg = gr.Image(type='pil',height= 512, width=512, label=\"Output Image\", interactive=False)\n            outputVideo = gr.Video(width=512, height=512, label = \"Output Video\", interactive=False)\n        with gr.Row(equal_height=True):\n            prompt1 = gr.Textbox(label = 'Prompt Input', lines = 1, scale = 9, max_lines=2,\n                                autofocus=True, autoscroll=True, placeholder='Type your message here...')\n            with gr.Column(scale = 1):\n                generate_btn1 = gr.Button('generate image')\n                generate_btn11 = gr.Button('generate video')\n\n    with gr.Tab(label = \"Text-to-Speech\"):\n        outputAudio = gr.Audio(label=\"Audio Output\", interactive = False)\n        outputSrt = gr.Textbox(label = 'Script Output', lines = 10, max_lines = 5, placeholder = 'Script output here')\n        with gr.Row(equal_height=False):\n            prompt2 = gr.Textbox(label = 'Prompt Input', lines = 5, scale = 9, max_lines=5,\n                                autofocus=True, autoscroll=True, placeholder='Type your message here...')\n            with gr.Column(scale = 1):\n                generate_btn2 = gr.Button('generate')\n                clear_btn2 = gr.Button('clear')\n\n    with gr.Tab(label = 'About'):\n        pass\n\n    def generate_txt(prompt, check, history):\n        if check == 'use api':\n            response, history = chat_api(prompt, history)\n            if response == None:\n                gr.Warning('Can not reach api.')\n        else:\n            response, history = chat_with_llama(prompt, history)\n            if response == None:\n                gr.Warning('Failed to load model.')\n        return '', history\n    \n    def clear_chat():\n        history = []\n        gr.Info('Cleaned successfully!')\n        return history\n\n    def generate_img(prompt, check, num_steps, width, heights):\n        if check == 'use api':\n            image = tti_api(prompt, num_steps = num_steps, width = width, heights = heights)\n            image = PIL.Image.open(BytesIO(image))\n            if not image:\n                gr.Warning('Can not reach api')\n            gr.Info('Generated Image Successfully!')\n        else:\n            image = tti(prompt, num_steps = num_steps, width = width, heights = heights)\n            gr.Info('Generated Image Successfully!')\n        return image\n        \n    def generate_video(prompt, num_steps):\n        video = ttv(prompt, num_steps)\n        gr.Info('Generated Video Successfully!')\n        return video\n        \n    def generate_speech(prompt):\n        audio, script = tts(prompt)\n        gr.Info('Generated Speech Successfully!')                           \n        return audio, script\n        \n    def clear_speech():\n        gr.Info('Cleaned Successfully!')\n        return None, ''\n    \n    prompt0.submit(generate_txt, [prompt0, radios0, gptDialog], [prompt0, gptDialog])\n    prompt1.submit(generate_img, [prompt1, radios1], [outputImg])\n\n    # generate button click event\n    generate_btn0.click(generate_txt, [prompt0, radios0, gptDialog], [prompt0, gptDialog])\n    generate_btn1.click(generate_img, [prompt1, radios1, steps, width, heights], [outputImg])\n    generate_btn11.click(generate_video, [prompt1, steps], [outputVideo])\n    generate_btn2.click(generate_speech, [prompt2], [outputAudio, outputSrt])\n    \n    # clear button click event\n    clear_btn0.click(clear_chat, [], [gptDialog])\n    clear_btn2.click(clear_speech, [], [outputAudio, outputSrt])\ndemo.launch()\n        ","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-25T00:23:23.214982Z","iopub.execute_input":"2024-12-25T00:23:23.215347Z","iopub.status.idle":"2024-12-25T00:23:24.396633Z","shell.execute_reply.started":"2024-12-25T00:23:23.215306Z","shell.execute_reply":"2024-12-25T00:23:24.395829Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.10/dist-packages/gradio/components/chatbot.py:242: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.\n  warnings.warn(\n/usr/local/lib/python3.10/dist-packages/gradio/utils.py:1003: UserWarning: Expected 5 arguments for function <function generate_img at 0x799439ab0dc0>, received 2.\n  warnings.warn(\n/usr/local/lib/python3.10/dist-packages/gradio/utils.py:1007: UserWarning: Expected at least 5 arguments for function <function generate_img at 0x799439ab0dc0>, received 2.\n  warnings.warn(\n","output_type":"stream"},{"name":"stdout","text":"* Running on local URL:  http://127.0.0.1:7860\nKaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n\n* Running on public URL: https://d2c6c018093abcee72.gradio.live\n\nThis share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"<div><iframe src=\"https://d2c6c018093abcee72.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"},"metadata":{}},{"execution_count":14,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}],"execution_count":14},{"cell_type":"code","source":"demo.close()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2024-12-23T08:38:27.489912Z","iopub.execute_input":"2024-12-23T08:38:27.490274Z","iopub.status.idle":"2024-12-23T08:38:27.609053Z","shell.execute_reply.started":"2024-12-23T08:38:27.490243Z","shell.execute_reply":"2024-12-23T08:38:27.607832Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}