Spaces:

Hieucyber2208
/

know-flow

Running

File size: 7,303 Bytes

e62cec6

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import InferenceClient\n",
    "import os\n",
    "import glob\n",
    "from collections import defaultdict\n",
    "import google.generativeai as genai\n",
    "from tqdm import tqdm\n",
    "from huggingface_hub.utils import HfHubHTTPError\n",
    "import random\n",
    "from dotenv import load_dotenv\n",
    "import time\n",
    "\n",
    "\n",
    "load_dotenv()\n",
    "HF_API_KEY = os.getenv(\"HUGGINGFACE_API_KEY\")\n",
    "GOOGLE_API_KEY = os.getenv(\"GOOGLE_API_KEY\")\n",
    "genai.configure(api_key=GOOGLE_API_KEY)\n",
    "client = InferenceClient(provider=\"hf-inference\", api_key=HF_API_KEY)\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_text_by_semantics(number_of_images):\n",
    "    with open(\"../data/text/text.txt\", \"r\", encoding=\"utf-8\") as file:\n",
    "        text = file.read()\n",
    "    prompt = f\"\"\"\n",
    "    Bạn là một chuyên gia xử lý văn bản. Hãy chia văn bản sau thành {number_of_images} đoạn có ý nghĩa sao cho mỗi đoạn vừa đủ để giải thích trong khoảng 3 đến 5 câu.\n",
    "\n",
    "    Văn bản:\n",
    "    {text}\n",
    "\n",
    "    Định dạng đầu ra:\n",
    "    - Phần 1: [Nội dung]\n",
    "    - Phần 2: [Nội dung]\n",
    "    - Phần 3: [Nội dung]\n",
    "    \"\"\"\n",
    "\n",
    "    try:\n",
    "        model = genai.GenerativeModel(\"gemini-pro\")\n",
    "        response = model.generate_content(prompt)\n",
    "        result_text = response.text.strip()\n",
    "\n",
    "        chunks = result_text.split(\"- Phần \")\n",
    "        chunks = [chunk.strip() for chunk in chunks if chunk]\n",
    "        return chunks\n",
    "    except Exception as e:\n",
    "        print(f\"Lỗi khi gọi API Gemini: {e}\")\n",
    "        return []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def describe_image(description, detail_level=\"short\", perspective=\"neutral\", emotion=None, time_setting=None, art_style=None):\n",
    "    \"\"\"\n",
    "    Nhận một đoạn văn mô tả chi tiết và trả về một câu mô tả hình ảnh theo các tùy chỉnh.\n",
    "\n",
    "    Args:\n",
    "        description (str): Đoạn văn mô tả chi tiết.\n",
    "        detail_level (str): Mức độ chi tiết (\"short\" hoặc \"detailed\").\n",
    "        perspective (str): Góc nhìn (\"subjective\" hoặc \"neutral\").\n",
    "        emotion (str, optional): Cảm xúc chủ đạo (nếu có, ví dụ: \"mysterious\", \"romantic\").\n",
    "        time_setting (str, optional): Bối cảnh thời gian (ví dụ: \"modern\", \"medieval\", \"futuristic\").\n",
    "        art_style (str, optional): Phong cách nghệ thuật (ví dụ: \"realistic\", \"abstract\", \"sketch\").\n",
    "\n",
    "    Returns:\n",
    "        str: Một câu mô tả hình ảnh theo yêu cầu.\n",
    "    \"\"\"\n",
    "    \n",
    "    prompt = f\"\"\"\n",
    "    Bạn là chuyên gia mô tả hình ảnh. Hãy đọc đoạn mô tả dưới đây và tạo một mô tả hình ảnh theo các tiêu chí sau:\n",
    "    - Mức độ chi tiết: {\"Ngắn gọn\" if detail_level == \"short\" else \"Chi tiết\"}.\n",
    "    - Góc nhìn: {\"Chủ quan\" if perspective == \"subjective\" else \"Trung lập\"}.\n",
    "    {f\"- Cảm xúc chủ đạo: {emotion}.\" if emotion else \"\"}\n",
    "    {f\"- Bối cảnh thời gian: {time_setting}.\" if time_setting else \"\"}\n",
    "    {f\"- Phong cách nghệ thuật: {art_style}.\" if art_style else \"\"}\n",
    "\n",
    "    Đoạn mô tả:\n",
    "    {description}\n",
    "\n",
    "    Hãy tạo một mô tả hình ảnh phù hợp với yêu cầu trên bằng Tiếng Anh.\n",
    "    \"\"\"\n",
    "\n",
    "    try:\n",
    "        model = genai.GenerativeModel(\"gemini-pro\")\n",
    "        response = model.generate_content(prompt)\n",
    "        return response.text.strip()\n",
    "    except Exception as e:\n",
    "        print(f\"Lỗi khi gọi API Gemini: {e}\")\n",
    "        return \"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_image(prompt, output_path, model=\"stabilityai/stable-diffusion-3.5-large\", resolution=(512, 512), style=None, color_palette=None):\n",
    "    \"\"\"\n",
    "    Tạo hình ảnh từ mô tả văn bản với các tùy chỉnh linh hoạt.\n",
    "    \n",
    "    :param prompt: Mô tả hình ảnh đầu vào.\n",
    "    :param output_path: Đường dẫn lưu ảnh đầu ra.\n",
    "    :param model: Mô hình AI sử dụng để tạo ảnh.\n",
    "    :param style: Phong cách hình ảnh (nếu có, ví dụ: 'realistic', 'anime', 'cyberpunk').\n",
    "    :param color_palette: Bảng màu ưu tiên (nếu có, ví dụ: 'vibrant', 'monochrome').\n",
    "    \"\"\"\n",
    "    \n",
    "    custom_prompt = prompt\n",
    "    \n",
    "    if style:\n",
    "        custom_prompt += f\" in {style} style\"\n",
    "    if color_palette:\n",
    "        custom_prompt += f\" with {color_palette} color scheme\"\n",
    "    \n",
    "    image = client.text_to_image(custom_prompt, model=model, resolution=resolution)\n",
    "    image.save(output_path)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = split_text_by_semantics(number_of_images=3)\n",
    "index = 0\n",
    "for merged_text in tqdm(texts, desc=\"Processing\", unit=\"image\"):\n",
    "    output_path = f\"../data/image/{index}.png\"\n",
    "    prompt = describe_image(merged_text)\n",
    "    print(prompt)\n",
    "\n",
    "    # Cơ chế retry với backoff\n",
    "    max_retries = 5\n",
    "    retry_count = 0\n",
    "\n",
    "    while retry_count < max_retries:\n",
    "        try:\n",
    "            generate_image(prompt, output_path)\n",
    "            time.sleep(60)  # Chờ sau khi tạo ảnh thành công\n",
    "            break  # Nếu thành công thì thoát khỏi vòng lặp retry\n",
    "        except HfHubHTTPError as e:\n",
    "            print(f\"Lỗi khi gọi API: {e}\")\n",
    "            retry_count += 1\n",
    "            wait_time = 2 ** retry_count + random.uniform(0, 1)  # Exponential backoff\n",
    "            print(f\"Thử lại sau {wait_time:.2f} giây...\")\n",
    "            time.sleep(wait_time)\n",
    "\n",
    "    index += 1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}