diff --git a/dataset/.DS_Store b/dataset/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..686e03e0e960978a24145b62606ae0f47cdca707
Binary files /dev/null and b/dataset/.DS_Store differ
diff --git a/dataset/3d_effect_generation_single_reference_0002/auto_eval.jsonl b/dataset/3d_effect_generation_single_reference_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..620039eafd88e996cd86e9b78505178004a210cb
--- /dev/null
+++ b/dataset/3d_effect_generation_single_reference_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two images, with the top image as the reference picture for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided design sketch and text requirements.\nThe text requirement is:\n\"Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.\"\nYour review question is:\nDoes the generated 3D rendering accurately retain every details of the shapes and outlines of the line drawing? 0 points: The shapes and outlines in the 3D rendering show noticeable deviations or distortions compared to the line drawing. 1 point: The 3D rendering accurately preserves the shapes and outlines from the line drawing in every detail.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two images, with the top image as the reference picture for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided design sketch and text requirements.\nThe text requirement is:\n\"Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.\"\nYour review question is:\nDoes the generated 3D rendering maintain the overall structure and proportions of the line drawing, ensuring consistency between the line drawing and the generated image? 0 points: The object's structure in the 3D rendering has been noticeably altered, with unbalanced proportions. 1 point: The structure and proportions of the object in the 3D rendering are consistent with the line drawing and are well-balanced.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two images, with the top image as the reference picture for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided design sketch and text requirements.\nThe text requirement is:\n\"Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.\"\nYour review question is:\nDoes the generated image effectively convey a sense of depth and three-dimensionality, presenting as a realistic 3D rendered image? 0 points: The image lacks a convincing 3D appearance, with minimal or no sense of depth. Key elements like lighting, shadows, and material effects are either absent or insufficiently applied, resulting in a flat or two-dimensional look. 1 point: The image successfully conveys a realistic 3D appearance, with well-applied lighting, shadows, and material effects that contribute to a sense of depth and dimensionality. The rendering effectively represents a believable 3D object with spatial volume.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two images, with the top image as the reference picture for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided design sketch and text requirements.\nThe text requirement is:\n\"Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.\"\nYour review question is:\nIf there is multiple angles, does the generated 3D rendering maintain consistency of the object across multiple angles in every detail? 0 points: The object appears inconsistent across different angles, with noticeable discrepancies in shape, proportions, or details between views. 1 point: There is no multiple angles, or the object remains consistent across all angles, with uniform shape, proportions, and details, creating a coherent and accurate representation from every perspective.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two images, with the top image as the reference picture for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided design sketch and text requirements.\nThe text requirement is:\n\"Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.\"\nYour review question is:\nDoes the rendering display realistic and well-defined shadows that enhance the 3D appearance of the object? 0 points: Shadows are poorly rendered, with inconsistent or unrealistic positioning, depth, or softness.  1 point: Shadows are accurately rendered, with appropriate depth, softness, and alignment to the light source.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two images, with the top image as the reference picture for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided design sketch and text requirements.\nThe text requirement is:\n\"Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.\"\nYour review question is:\nDoes the generated 3D rendering have an overall aesthetic appeal, with high-quality visual detailing and a cohesive style that meets professional standards? 0 points: The 3D rendering lacks aesthetic appeal, with poor visual detailing and an incohesive style. 1 point: The 3D rendering demonstrates strong aesthetic appeal, with high-quality visual detailing and a cohesive style.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
diff --git a/dataset/3d_effect_generation_single_reference_0002/eval.json b/dataset/3d_effect_generation_single_reference_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdb35772ec80d7da7daa0d927335a18ee3366abf
--- /dev/null
+++ b/dataset/3d_effect_generation_single_reference_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated 3D rendering accurately preserve every detail of the line drawing, including shapes and contours?",
+            "0_point_standard": "There are noticeable deviations or distortions in shapes and contours in the 3D rendering compared to the line drawing.",
+            "1_point_standard": "The 3D rendering accurately preserves every detail of the shapes and contours in the line drawing."
+        },
+        {
+            "question": "Does the generated 3D rendering maintain the overall structure and proportions of the line drawing to ensure consistency between the sketch and the generated image?",
+            "0_point_standard": "There are significant changes in the structure of objects in the 3D rendering, with imbalanced proportions.",
+            "1_point_standard": "The structure and proportions of objects in the 3D rendering are consistent with the line drawing, and the proportions are balanced."
+        },
+        {
+            "question": "Does the generated image effectively convey depth and a sense of three-dimensionality, presenting a realistic 3D rendering effect?",
+            "0_point_standard": "The image lacks a convincing 3D appearance, with little sense of depth. Key elements like lighting and material effects are missing or inadequately applied, causing the image to appear flat.",
+            "1_point_standard": "The image successfully conveys a realistic 3D appearance, with well-applied lighting and material effects enhancing depth and dimensionality, presenting a believable spatial volume."
+        },
+        {
+            "question": "If the generated 3D rendering includes multiple angles, does it maintain consistency of the object in each angle, ensuring no deviation in details?",
+            "0_point_standard": "The object appears inconsistent across different angles, with noticeable differences in shape, proportion, or detail between perspectives.",
+            "1_point_standard": "Either there are no multiple angles, or the object remains consistent across all angles, with uniform shape, proportion, and details, ensuring accurate representation from every perspective."
+        },
+        {
+            "question": "Does the rendering display realistic and clear shadow effects, enhancing the 3D appearance of the object?",
+            "0_point_standard": "The shadow effects are poorly rendered, with inaccuracies in position, depth, or softness.",
+            "1_point_standard": "The shadow effects are accurately rendered, with appropriate depth, softness, and alignment with the light source."
+        },
+        {
+            "question": "Does the generated 3D rendering have an overall aesthetic appeal, with high-quality visual details and a unified style that meets professional standards?",
+            "0_point_standard": "The 3D rendering lacks aesthetic appeal, with poor visual details and an inconsistent style.",
+            "1_point_standard": "The 3D rendering exhibits strong aesthetic appeal, with high-quality visual details and a unified style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/3d_effect_generation_single_reference_0002/images.txt b/dataset/3d_effect_generation_single_reference_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4df9ab222d8b59962ab31f861d407dbd3bec4e74
--- /dev/null
+++ b/dataset/3d_effect_generation_single_reference_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01l6dTKS1mfoDwCHFdm_!!6000000004982-0-tps-1794-1280.jpg
diff --git a/dataset/3d_effect_generation_single_reference_0002/instruction.txt b/dataset/3d_effect_generation_single_reference_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cba7fa783fdaee99a21b0dcc17b6ca1a4c10805
--- /dev/null
+++ b/dataset/3d_effect_generation_single_reference_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a 3D rendering based on the provided interior design sketch. The task objective is to accurately convert the room layout, furniture, and decorative elements in the image into a three-dimensional rendering. The model should infer the appropriate spatial structure of the room, the proportions and placement of the furniture, and render realistic lighting, shadows, and material effects. Ensure that the generated 3D rendering aligns with the design details in the sketch, showcasing the overall spatial feel and visual impact of the bedroom, ultimately producing a high-quality, realistic 3D rendering.
\ No newline at end of file
diff --git a/dataset/3d_effect_generation_single_reference_0002/meta.json b/dataset/3d_effect_generation_single_reference_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..94f5d840ee2bef111ab654cb12d075d4eb4f211e
--- /dev/null
+++ b/dataset/3d_effect_generation_single_reference_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "3D effect generation with single reference",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0072",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/3d_effect_generation_three-view_reference_0001/auto_eval.jsonl b/dataset/3d_effect_generation_three-view_reference_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..773b9052c021d21ec82fd3b645ea40e2a69ea49a
--- /dev/null
+++ b/dataset/3d_effect_generation_three-view_reference_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference three-view pictures for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided three-view pictures and text requirements.\nThe text requirement is:\n\"The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.\"\nYour review question is:\nLogical Consistency with Input Reference: 0 points: The output 3D rendering lacks logical consistency with the input three-view sketches, showing significant discrepancies in shape or structure that contradict the references. 1 point: The output 3D rendering logically aligns with the input three-view sketches, maintaining structural coherence and faithfully representing the object's intended form.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference three-view pictures for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided three-view pictures and text requirements.\nThe text requirement is:\n\"The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.\"\nYour review question is:\nCompleteness of Detail Representation: 0 points: The 3D rendering lacks certain intricate details or specific design elements present in the input sketches, resulting in a simplified or incomplete representation. 1 point: The 3D rendering fully captures all details and intricate features from the input sketches, providing a complete and precise depiction of the object.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference three-view pictures for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided three-view pictures and text requirements.\nThe text requirement is:\n\"The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.\"\nYour review question is:\nConsistency in Material and Texture Representation: 0 points: The 3D rendering fails to accurately represent materials or textures specified or implied in the input sketches, with inconsistencies that detract from realism. 1 point: The 3D rendering maintains consistent and realistic material and texture representations across all views, effectively enhancing the appearance and believability of the object.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference three-view pictures for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided three-view pictures and text requirements.\nThe text requirement is:\n\"The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.\"\nYour review question is:\nAdherence to Text Description Instructions: 0 points: The model-generated 3D rendering does not fulfill the specific requirements outlined in the text description, failing to incorporate directed changes or enhancements. 1 point: The model effectively incorporates and adheres to the specific instructions provided in the text description, accurately reflecting all requested modifications or features.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference three-view pictures for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided three-view pictures and text requirements.\nThe text requirement is:\n\"The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.\"\nYour review question is:\nIntegration of Realistic Lighting and Shadows: 0 points: The 3D rendering shows unrealistic or poorly implemented lighting and shadow effects, with unnatural or inconsistent lighting that detracts from the image’s overall quality. 1 point: The lighting and shadows are realistically integrated into the rendering, creating a natural sense of depth and enhancing the three-dimensional effect in a way that complements the object’s design.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference three-view pictures for the design task and the bottom image as the response provided by a student. The task objective is to generate a realistic 3D rendering based on the provided three-view pictures and text requirements.\nThe text requirement is:\n\"The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.\"\nYour review question is:\nAesthetic Cohesion and Visual Appeal: 0 points: The 3D rendering lacks visual harmony, with elements that appear disjointed or poorly composed. The design may have awkward proportions, distracting artifacts, or an inconsistent style, resulting in a rendering that feels visually unbalanced or unpolished. 1 point: The 3D rendering demonstrates strong aesthetic cohesion, with a harmonious composition and well-balanced proportions. The style is consistent throughout, and the image has a polished, visually appealing look that aligns with professional design standards.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
diff --git a/dataset/3d_effect_generation_three-view_reference_0001/eval.json b/dataset/3d_effect_generation_three-view_reference_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b798808aebd756853ca9ae2df48b471267d483b
--- /dev/null
+++ b/dataset/3d_effect_generation_three-view_reference_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Consistency with the input reference image:",
+            "0_point_standard": "The 3D rendering output lacks logical consistency with the input three-view sketch, showing significant differences in shape or structure, contradicting the reference image.",
+            "1_point_standard": "The 3D rendering output maintains logical consistency with the input three-view sketch, with coherent structure, faithfully presenting the object's intended form."
+        },
+        {
+            "question": "Completeness of detail representation:",
+            "0_point_standard": "The 3D rendering lacks some fine details or specific design elements from the input sketch, resulting in a simplified or incomplete representation.",
+            "1_point_standard": "The 3D rendering fully captures all details and intricate features from the input sketch, providing a complete and accurate representation of the object."
+        },
+        {
+            "question": "Consistency of material and texture representation:",
+            "0_point_standard": "The 3D rendering fails to accurately represent the materials or textures specified or implied in the input sketch, leading to inconsistencies that affect realism.",
+            "1_point_standard": "The 3D rendering consistently and realistically represents materials and textures from all angles, effectively enhancing the object's appearance and credibility."
+        },
+        {
+            "question": "Adherence to text description instructions:",
+            "0_point_standard": "The 3D rendering generated by the model fails to meet specific requirements in the text description, lacking the changes or enhancements specified in the instructions.",
+            "1_point_standard": "The model effectively follows and incorporates the specific instructions provided in the text description, accurately reflecting all required modifications or features."
+        },
+        {
+            "question": "Integration of realistic lighting and shadows:",
+            "0_point_standard": "The lighting and shadow effects in the 3D rendering are unrealistic or poorly handled, with unnatural or inconsistent light source effects, affecting the overall image quality.",
+            "1_point_standard": "Lighting and shadow effects are realistically integrated in the rendering, creating a natural sense of depth and enhancing the three-dimensional effect, complementing the object design."
+        },
+        {
+            "question": "Aesthetic unity and visual appeal:",
+            "0_point_standard": "The 3D rendering lacks visual harmony, with elements appearing uncoordinated or poorly combined. The design may suffer from disproportionate elements, distracting artifacts, or inconsistent styles, making the image appear visually unbalanced or unrefined.",
+            "1_point_standard": "The 3D rendering displays a strong aesthetic unity, with harmonious combinations and balanced proportions. The style is consistent, and the image has a refined, appealing appearance that meets professional design standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/3d_effect_generation_three-view_reference_0001/images.txt b/dataset/3d_effect_generation_three-view_reference_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe6089fc03ff45695a3c6b48dabb5781d26b831c
--- /dev/null
+++ b/dataset/3d_effect_generation_three-view_reference_0001/images.txt
@@ -0,0 +1,3 @@
+https://img.alicdn.com/imgextra/i2/O1CN01NM3tRQ1c9cE5Cc1xe_!!6000000003558-0-tps-575-583.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01SBwJC225rhCZpyi0m_!!6000000007580-0-tps-443-586.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01ek3ujs1e3mPVAGkrw_!!6000000003816-0-tps-583-401.jpg
diff --git a/dataset/3d_effect_generation_three-view_reference_0001/instruction.txt b/dataset/3d_effect_generation_three-view_reference_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3aab7189a0a0cbc99cfacd81c6643a734997404
--- /dev/null
+++ b/dataset/3d_effect_generation_three-view_reference_0001/instruction.txt
@@ -0,0 +1 @@
+The image provided is a three view of a kettle with a stainless steel subject and a plastic base and lid.Please generate a 3D rendering of the item based on the three views (front view, side view, top view) and text description of the item provided. The 3D form of the item should be accurately constructed by combining the information from all angles in the three views and refining the details of the material, colour, light and shadow effects of the item. The final 3D rendering should show the complete three-dimensional appearance of the item, conform to the scale and structure provided in the three-view drawings, and be consistent with the features of the item.
\ No newline at end of file
diff --git a/dataset/3d_effect_generation_three-view_reference_0001/meta.json b/dataset/3d_effect_generation_three-view_reference_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a8354ca01011899a1328a5a59f13e122155a07d
--- /dev/null
+++ b/dataset/3d_effect_generation_three-view_reference_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "3D effect generation with three-view reference",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0073",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_hair_editing_0001/eval.json b/dataset/animal_attribute_editing_hair_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f7233116ff1cc3be4b8c60af5499c711e3bcc21
--- /dev/null
+++ b/dataset/animal_attribute_editing_hair_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the edited image accurately retain the unchanged parts of the original animal image, ensuring no unintended alterations were made?",
+            "0_point_standard": "The edited image shows noticeable changes in parts that should not have been altered.",
+            "1_point_standard": "The edited image accurately retains the unchanged parts, with no unintended modifications outside the specified areas."
+        },
+        {
+            "question": "Does the edited image preserve the overall content and identity of the original animal, ensuring consistency between the input and output images?",
+            "0_point_standard": "The edited image significantly alters the identity or main features of the animal, making it look different from the original image.",
+            "1_point_standard": "The edited image preserves the identity and main features of the animal, ensuring consistency with the original image."
+        },
+        {
+            "question": "Does the hair editing comply with the specific requirements provided in the text description, such as style, color, or length modifications?",
+            "0_point_standard": "The hair editing does not meet the specific requirements described in the text, showing noticeable differences in style, color, or length.",
+            "1_point_standard": "The hair editing accurately meets the requirements specified in the text description."
+        },
+        {
+            "question": "Does the edited image effectively integrate the hair modifications, providing a seamless and natural appearance consistent with the animal's existing features?",
+            "0_point_standard": "The hair modifications appear unnatural or poorly integrated, disrupting the overall appearance of the animal.",
+            "1_point_standard": "The hair modifications are seamlessly integrated, maintaining a natural cohesive appearance with the animal's existing features."
+        },
+        {
+            "question": "Are the texture and details of the edited hair realistic, enhancing the visual quality of the animal image?",
+            "0_point_standard": "The texture and details of the edited hair appear unrealistic or lack depth, reducing the visual quality of the image.",
+            "1_point_standard": "The texture and details of the edited hair are realistic, enhancing the overall visual quality of the image."
+        },
+        {
+            "question": "Does the edited image possess strong aesthetic appeal, providing a visually attractive and professionally executed final result?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, appearing unprofessional or visually unpleasing.",
+            "1_point_standard": "The edited image exhibits strong aesthetic appeal, with high visual attractiveness and professional execution."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_hair_editing_0001/images.txt b/dataset/animal_attribute_editing_hair_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae8a454ed5edf5f6a96280fdb6d4071112e9adbc
--- /dev/null
+++ b/dataset/animal_attribute_editing_hair_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01EMWYoj1meR2Fa4Bol_!!6000000004979-0-tps-1440-2560.jpg
diff --git a/dataset/animal_attribute_editing_hair_editing_0001/instruction.txt b/dataset/animal_attribute_editing_hair_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5620075b4d309558941a2c5ec43e8ac136cb4147
--- /dev/null
+++ b/dataset/animal_attribute_editing_hair_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Please modify the hair color of the Golden Retriever in the input image from golden yellow to dark brown. The goal is to keep the dog's posture and background unchanged but adjust the hair color to dark brown, ensuring the texture and lighting effects of the fur remain natural and realistic. The generated image should maintain the overall style of the original, but the fur color should clearly change to dark brown.
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_hair_editing_0001/meta.json b/dataset/animal_attribute_editing_hair_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..08848c97bcd2275d2e6a4e82c855aa2a79978398
--- /dev/null
+++ b/dataset/animal_attribute_editing_hair_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal hair editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0087",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_posture_editing_0002/eval.json b/dataset/animal_attribute_editing_posture_editing_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7ba9a27e1feea96bf55010f47feccc31eb6873c9
--- /dev/null
+++ b/dataset/animal_attribute_editing_posture_editing_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the edited image accurately preserve the original state of the unmodified parts of the animal, ensuring consistency with the original image where no changes were specified?",
+            "0_point_standard": "The unmodified parts of the animal show obvious changes or inconsistencies compared to the original image.",
+            "1_point_standard": "The unmodified parts of the animal remain consistent and unchanged, accurately reflecting the original image."
+        },
+        {
+            "question": "Does the edited image maintain the overall style and characteristics of the animal, ensuring that the edited pose does not alter its recognizable features or species-specific traits?",
+            "0_point_standard": "The edited pose alters the animal's recognizable features or species-specific traits, making it difficult to identify.",
+            "1_point_standard": "The edited pose retains the animal's style and features, maintaining intact recognizable features and species-specific traits."
+        },
+        {
+            "question": "Does the edited animal pose accurately reflect the requirements described in the text, effectively conveying the intended pose adjustments?",
+            "0_point_standard": "The edited pose does not reflect the changes specified in the text description, or misrepresents the intended pose.",
+            "1_point_standard": "The edited pose accurately reflects the changes specified in the text description, conveying the intended pose adjustments."
+        },
+        {
+            "question": "Does the edited image include the modifications specified in the text without introducing inconsistencies or unrealistic elements in the animal's anatomical structure and positioning?",
+            "0_point_standard": "The modifications introduce inconsistencies or unrealistic elements in the animal's anatomical structure and positioning.",
+            "1_point_standard": "The modifications are consistent with real anatomical structure and positioning, without introducing unrealistic elements."
+        },
+        {
+            "question": "Does the edited image showcase high-quality texture details, maintaining the natural appearance and texture of the animal's fur, skin, or scales?",
+            "0_point_standard": "The texture details are poor, resulting in an unnatural appearance of the animal's fur, skin, or scales.",
+            "1_point_standard": "The texture details are of high quality, preserving the natural appearance of the animal's fur, skin, or scales."
+        },
+        {
+            "question": "Does the edited image possess overall aesthetic appeal, with visually pleasing composition that meets professional standards and user expectations?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, with poor visual composition.",
+            "1_point_standard": "The edited image showcases strong aesthetic appeal, with high-quality visual composition meeting professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_posture_editing_0002/images.txt b/dataset/animal_attribute_editing_posture_editing_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff3ce48c66f7ebb38bc8d5e852783d97251fcbd1
--- /dev/null
+++ b/dataset/animal_attribute_editing_posture_editing_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01DcZHQM1fcsNPgBWks_!!6000000004028-0-tps-1140-641.jpg
diff --git a/dataset/animal_attribute_editing_posture_editing_0002/instruction.txt b/dataset/animal_attribute_editing_posture_editing_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12a84b06cda6469e4a343c555936767d1e65c4fd
--- /dev/null
+++ b/dataset/animal_attribute_editing_posture_editing_0002/instruction.txt
@@ -0,0 +1 @@
+Please modify the posture of the two horses in the input image from leaning against each other to walking. The goal is to keep the relative position of the two horses and the background unchanged, but adjust their posture to a natural walking stance, ensuring that the movement of the legs and the dynamic positioning of their bodies are consistent with the natural gait of horses. The generated image should look natural and realistic, with a smooth and accurate posture transition.
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_posture_editing_0002/meta.json b/dataset/animal_attribute_editing_posture_editing_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..342e7364b0ad7e4a1db3562890c1fa79d0e07012
--- /dev/null
+++ b/dataset/animal_attribute_editing_posture_editing_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal posture editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0088",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_species_editing_0001/eval.json b/dataset/animal_attribute_editing_species_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..1fabf7cfbcc9208c1f3c2d418bc3016681e4f18d
--- /dev/null
+++ b/dataset/animal_attribute_editing_species_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the edited image accurately preserve the basic shape and outline of the original animal, except for the specified species modifications?",
+            "0_point_standard": "The shape and outline of the animal show significant deviations or distortions beyond the specified species modifications.",
+            "1_point_standard": "The edited image accurately preserves the shape and outline of the original animal, except for the specified species modifications."
+        },
+        {
+            "question": "Aside from the specified modifications, does the edited image maintain the overall structure and proportions of the animal, ensuring consistency between the original and edited images?",
+            "0_point_standard": "The structure of the animal in the edited image has significantly changed in unintended areas, with imbalanced proportions.",
+            "1_point_standard": "The structure and proportions of the animal in the edited image are consistent with the original (except for intended modifications) and are well-proportioned."
+        },
+        {
+            "question": "Does the edited image accurately reflect the specific species changes described in the text input?",
+            "0_point_standard": "The species changes are not accurately represented or are inconsistent with the description provided in the text input.",
+            "1_point_standard": "The species changes are accurately represented and consistent with the description provided in the text input."
+        },
+        {
+            "question": "Does the edited image correctly implement any additional features specified in the text description, such as colors, patterns, or unique traits?",
+            "0_point_standard": "The edited image fails to include the specified additional features, or includes them inaccurately.",
+            "1_point_standard": "The edited image correctly includes all specified additional features, such as colors, patterns, or unique traits."
+        },
+        {
+            "question": "Does the edited image seamlessly integrate the modified species features with the rest of the image, ensuring a natural appearance?",
+            "0_point_standard": "The integration of modified species features is poor, leading to a disjointed or unnatural appearance.",
+            "1_point_standard": "The modified species features are seamlessly integrated with the rest of the image, presenting a natural and harmonious appearance."
+        },
+        {
+            "question": "Does the edited image have an overall aesthetic appeal, being visually attractive and meeting the professional expectations of digital image editing?",
+            "0_point_standard": "The edited image lacks aesthetic appeal and has poor visual quality.",
+            "1_point_standard": "The edited image exhibits strong aesthetic appeal, with high visual attractiveness, meeting professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_species_editing_0001/images.txt b/dataset/animal_attribute_editing_species_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c920fb976c3fc090be3c13f3400d7b5665c2d91
--- /dev/null
+++ b/dataset/animal_attribute_editing_species_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01x2FT6o1PRqzbDozJk_!!6000000001838-0-tps-1899-2048.jpg
diff --git a/dataset/animal_attribute_editing_species_editing_0001/instruction.txt b/dataset/animal_attribute_editing_species_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0bd2c96ddcf051a47ce8fefc8826ed3667f779d
--- /dev/null
+++ b/dataset/animal_attribute_editing_species_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Please transform the input image of a domestic cat into a wolf. The goal is to keep the cat's posture and background unchanged but edit its species characteristics to resemble a wolf. Ensure the edited image reflects the wolf's facial structure, ear shape, body proportions, and fur characteristics while maintaining the same pose and background as the original image. The resulting image should look natural and realistic, accurately showcasing the wolf's species traits.
\ No newline at end of file
diff --git a/dataset/animal_attribute_editing_species_editing_0001/meta.json b/dataset/animal_attribute_editing_species_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2704609613bbb6bcafb6e14fd87f12247044bc80
--- /dev/null
+++ b/dataset/animal_attribute_editing_species_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal species editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0089",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_with_reference_0002/eval.json b/dataset/animal_growth_process_generation_with_reference_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e086dc690dec5c0417cff5d9f6111c998b84e440
--- /dev/null
+++ b/dataset/animal_growth_process_generation_with_reference_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image originate from the input image and maintain a clear association in content and style?",
+            "0_point_standard": "The output image shows no apparent association with the input image, with elements not matching the content or style of the original animal.",
+            "1_point_standard": "The output image clearly originates from the input image, maintaining consistent content and style while preserving recognizable features of the original animal."
+        },
+        {
+            "question": "Does the generated growth process image follow the specified timeline and show reasonable progression?",
+            "0_point_standard": "The image does not follow a reasonable timeline, with growth stages appearing inconsistent or in a jumbled order.",
+            "1_point_standard": "The image clearly depicts the reasonable progression of animal growth stages, following the specified timeline."
+        },
+        {
+            "question": "Did the model correctly implement any specific modifications mentioned in the text description, such as changes in size or color?",
+            "0_point_standard": "The specified modifications were not accurately implemented, with changes not matching the text description.",
+            "1_point_standard": "The model accurately implemented the specified modifications mentioned in the text description, such as changes in size or color."
+        },
+        {
+            "question": "Do the unspecified parts of the image remain unchanged and maintain integrity with the input image?",
+            "0_point_standard": "Parts of the image that should not be altered have undergone unnecessary changes or distortion, affecting the overall content.",
+            "1_point_standard": "The unspecified parts of the image remain unchanged, preserving the integrity and structure of the input image."
+        },
+        {
+            "question": "Is the image style consistent across the entire sequence of growth process images?",
+            "0_point_standard": "The image style is inconsistent, with variations disrupting the visual coherence of the growth stages.",
+            "1_point_standard": "The images maintain a consistent style throughout the sequence, ensuring visual coherence and continuity."
+        },
+        {
+            "question": "Does each image in the growth process sequence retain the original animal's key details and recognizable features, ensuring ID consistency?",
+            "0_point_standard": "Key details and recognizable features of the original animal are lost in the growth process images, making it difficult to identify them as the same animal.",
+            "1_point_standard": "Each image retains the original animal's key details and recognizable features, ensuring identity consistency throughout the growth process."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_with_reference_0002/images.txt b/dataset/animal_growth_process_generation_with_reference_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..880cf10c49dbcd61c7ef108539988a500edb28ef
--- /dev/null
+++ b/dataset/animal_growth_process_generation_with_reference_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01xOvcfj1OpNitxskMq_!!6000000001754-0-tps-889-500.jpg
diff --git a/dataset/animal_growth_process_generation_with_reference_0002/instruction.txt b/dataset/animal_growth_process_generation_with_reference_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a7c79d2b2fe3ea54fe89f9d54e82fbcea9b1a86
--- /dev/null
+++ b/dataset/animal_growth_process_generation_with_reference_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate 3 images showing the intermediate growth stages of this adult fox based on the provided picture. The first image should depict the fox in its cub stage, with a smaller body, shorter fur, and undeveloped facial features, with relatively larger ears. The second image should show the fox in its juvenile stage, where the body is growing larger, the fur is becoming denser, but it has not yet fully developed the appearance of an adult fox. The third image should depict the fox in its sub-adult stage, with a body close to adult size, thicker fur, and a bushier tail, though the overall appearance is still between a cub and an adult fox. Ensure that all the generated images clearly show the same fox, with visible continuity as the fox progresses through different growth stages, making it feel like the same animal at different points in time.
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_with_reference_0002/meta.json b/dataset/animal_growth_process_generation_with_reference_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2b05bd9cc703b0da2600e1f0d23f37625003fefd
--- /dev/null
+++ b/dataset/animal_growth_process_generation_with_reference_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal growth process generation with reference",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0048",
+    "output_image_count": 3,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0001/eval.json b/dataset/animal_growth_process_generation_without_reference_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d4c8ea35a425613aadf47c41bab98a1251d82f6
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the animal's growth process in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical flow, failing to illustrate the animal's growth process.",
+            "1_point_standard": "The sequence of images clearly presents the animal's growth stages in a logical chronological order."
+        },
+        {
+            "question": "Does the content of the images match the animal growth process specified in the text description?",
+            "0_point_standard": "The content of the images fails to accurately reflect the animal growth stages described in the text, with noticeable deviations.",
+            "1_point_standard": "The content of the images completely matches the text description, accurately depicting the specified animal growth stages."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent?",
+            "0_point_standard": "The style of the images is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a coherent visual effect."
+        },
+        {
+            "question": "Does the generated image sequence maintain the consistency of the animal's identity (e.g., recognizable as the same animal throughout the process)?",
+            "0_point_standard": "The appearance of the animal is inconsistent across different images, making it difficult to recognize as the same animal.",
+            "1_point_standard": "The appearance of the animal is consistent, clearly recognizable as the same animal in all images."
+        },
+        {
+            "question": "Is the description of the animal's growth process reasonable and logical?",
+            "0_point_standard": "The description of the animal's growth process is illogical or unreasonable, with obvious errors or unrealistic portrayals.",
+            "1_point_standard": "The description of the animal's growth process is reasonable, logical, and accurately reflects expected biological principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and possess visual appeal?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The images are rich in detail, have excellent aesthetics, meet professional standards, and possess visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0001/images.txt b/dataset/animal_growth_process_generation_without_reference_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/animal_growth_process_generation_without_reference_0001/instruction.txt b/dataset/animal_growth_process_generation_without_reference_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..043028f3536d518b8121eee7e71c681fdc877be9
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images showcasing the growth process of a real-world kitten from birth to adulthood. The first image shows a newborn kitten in a nest with its siblings, eyes still closed; the second image shows a young kitten learning to walk, attempting to climb onto a sofa in a cozy living room; the third image shows a half-grown cat curiously looking out the window as sunlight streams into the room; the fourth image shows an adult cat lounging elegantly on a balcony, with a cityscape in the background.
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0001/meta.json b/dataset/animal_growth_process_generation_without_reference_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..8147ed558508f86a941dcd3f9673f452e13816ba
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0021",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0002/eval.json b/dataset/animal_growth_process_generation_without_reference_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9568824f36d761b054e2c918d01a18c6a5b50cd7
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the growth process of the animal in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical flow, failing to illustrate the animal's growth process.",
+            "1_point_standard": "The sequence of images clearly presents the growth stages of the animal in logical chronological order."
+        },
+        {
+            "question": "Does the content of the images match the animal growth process specified in the text description?",
+            "0_point_standard": "The content of the images fails to accurately reflect the animal growth stages described in the text, with significant deviations.",
+            "1_point_standard": "The content of the images completely matches the text description, accurately displaying the specified animal growth stages."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent?",
+            "0_point_standard": "The style of the images is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect."
+        },
+        {
+            "question": "Does the generated sequence of images maintain the consistency of the animal's identity (e.g., can it be recognized as the same animal throughout the process)?",
+            "0_point_standard": "The appearance of the animal is inconsistent across different images, making it difficult to recognize as the same animal.",
+            "1_point_standard": "The appearance of the animal remains consistent, clearly recognizable as the same animal in all images."
+        },
+        {
+            "question": "Is the description of the animal growth process reasonable and logical?",
+            "0_point_standard": "The description of the animal growth process is illogical or unreasonable, with obvious errors or unrealistic descriptions.",
+            "1_point_standard": "The description of the animal growth process is reasonable, logical, and accurately reflects expected biological principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and possess visual appeal?",
+            "0_point_standard": "The images lack detail, aesthetics are poor, and do not meet visual standards.",
+            "1_point_standard": "The images are rich in detail, have excellent aesthetics, meet professional standards, and possess visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0002/images.txt b/dataset/animal_growth_process_generation_without_reference_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/animal_growth_process_generation_without_reference_0002/instruction.txt b/dataset/animal_growth_process_generation_without_reference_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91cf0abc77824b6d587c9ae3c6433a45025d88ac
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting the growth process of a mythical dragon from egg to adulthood. The first image shows the dragon hatching from its egg in a mysterious cave; the second image depicts a young dragon with underdeveloped wings exploring the cave around it; the third image shows a half-grown dragon, its wings strong enough for short flights, soaring above vast mountains and sky; the fourth image shows the adult dragon, spiraling through the sky and overlooking the land below, with an ancient castle in the background.
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0002/meta.json b/dataset/animal_growth_process_generation_without_reference_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..46549534125432ebea8cef264a824686c7aa729e
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0021",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0003/eval.json b/dataset/animal_growth_process_generation_without_reference_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f7afc7767ed19fbe13eb7615207020e5a9b82ff
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the growth process of an animal in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical flow, failing to illustrate the animal's growth process.",
+            "1_point_standard": "The sequence of images clearly presents the stages of the animal's growth in a logical chronological order."
+        },
+        {
+            "question": "Does the content of the images match the animal growth process specified in the text description?",
+            "0_point_standard": "The content of the images fails to accurately reflect the animal growth stages described in the text, showing significant deviation.",
+            "1_point_standard": "The content of the images completely matches the text description, accurately depicting the specified animal growth stages."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent?",
+            "0_point_standard": "The image styles are inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect."
+        },
+        {
+            "question": "Does the generated sequence of images maintain the consistency of the animal's identity (e.g., recognizable as the same animal throughout the process)?",
+            "0_point_standard": "The appearance of the animal is inconsistent across different images, making it difficult to recognize as the same animal.",
+            "1_point_standard": "The appearance of the animal remains consistent, clearly recognizable as the same animal in all images."
+        },
+        {
+            "question": "Is the description of the animal's growth process reasonable and logical?",
+            "0_point_standard": "The description of the animal's growth process is illogical or unreasonable, containing obvious errors or unrealistic descriptions.",
+            "1_point_standard": "The description of the animal's growth process is reasonable, logical, and accurately reflects expected biological principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and have visual appeal?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The images are rich in detail, have excellent aesthetics, meet professional standards, and are visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0003/images.txt b/dataset/animal_growth_process_generation_without_reference_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/animal_growth_process_generation_without_reference_0003/instruction.txt b/dataset/animal_growth_process_generation_without_reference_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a163a740ffcd7d9ebbe39899def37fe86cc3e57f
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting the growth of a sea turtle from hatching to adulthood. The first image shows a baby sea turtle crawling from the beach toward the ocean, with the vast shore and waves in the background; the second image shows a young sea turtle swimming in shallow waters, surrounded by colorful coral and fish; the third image shows a half-grown sea turtle gliding through the deep ocean, with a mysterious underwater world around it; the fourth image shows an adult sea turtle swimming leisurely through the ocean, with towering kelp forests in the background.
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0003/meta.json b/dataset/animal_growth_process_generation_without_reference_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9bcd882d95a778ce393953e8327c49fafb27c931
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0021",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0004/eval.json b/dataset/animal_growth_process_generation_without_reference_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d26887daaf9cc651a578b0523e2d8d87f1568e8d
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the growth process of the animal in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks a logical flow, failing to illustrate the animal's growth process.",
+            "1_point_standard": "The sequence of images clearly presents the stages of the animal's growth in a logical chronological order."
+        },
+        {
+            "question": "Does the content of the images match the animal growth process specified in the text description?",
+            "0_point_standard": "The content of the images does not accurately reflect the stages of animal growth described in the text, showing significant discrepancies.",
+            "1_point_standard": "The content of the images completely matches the text description, accurately depicting the specified stages of animal growth."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent?",
+            "0_point_standard": "The style of the images is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect."
+        },
+        {
+            "question": "Does the generated sequence of images maintain consistency in the animal's identity (e.g., can the same animal be recognized throughout the process)?",
+            "0_point_standard": "The appearance of the animal is inconsistent across different images, making it difficult to recognize it as the same animal.",
+            "1_point_standard": "The appearance of the animal remains consistent, allowing it to be clearly recognized as the same animal in all images."
+        },
+        {
+            "question": "Is the description of the animal's growth process reasonable and logical?",
+            "0_point_standard": "The description of the animal's growth process is illogical or unreasonable, with obvious errors or unrealistic descriptions.",
+            "1_point_standard": "The description of the animal's growth process is reasonable, logical, and accurately reflects expected biological principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and are visually appealing?",
+            "0_point_standard": "The images lack detail, are aesthetically poor, and do not meet visual standards.",
+            "1_point_standard": "The images are rich in detail, aesthetically excellent, meet professional standards, and are visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0004/images.txt b/dataset/animal_growth_process_generation_without_reference_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/animal_growth_process_generation_without_reference_0004/instruction.txt b/dataset/animal_growth_process_generation_without_reference_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8a0d3158f6b7cfde66848636897459e7b8adcfe
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images showing the growth of a virtual robotic dog. The first image shows a newly constructed robotic puppy with parts still being assembled on a lab table; the second image shows a semi-assembled robotic dog testing its movements in the lab; the third image shows a more complete robotic dog with its intelligence activated, playing with children in a home setting; the fourth image shows a fully mature robotic dog walking through city streets, fully integrated into human society.
\ No newline at end of file
diff --git a/dataset/animal_growth_process_generation_without_reference_0004/meta.json b/dataset/animal_growth_process_generation_without_reference_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9978fac95a821f813133a9d566cdbb013e84fd54
--- /dev/null
+++ b/dataset/animal_growth_process_generation_without_reference_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "animal growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0021",
+    "output_image_count": 4,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0001/eval.json b/dataset/architectural_style_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5c7dde62fe07b42e48f0bdda79ebf7e0894923d
--- /dev/null
+++ b/dataset/architectural_style_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated architectural style accurately reflect the basic elements of architecture described in the text?",
+            "0_point_standard": "The architectural style does not match the text description and fails to accurately depict the basic elements of architecture.",
+            "1_point_standard": "The architectural style fully reflects the basic elements of architecture described in the text."
+        },
+        {
+            "question": "Is the outdoor photography effect applied consistently throughout the entire image, creating a realistic and coherent representation?",
+            "0_point_standard": "The outdoor photography effect is inconsistent or unrealistic, resulting in a disjointed representation of the architecture.",
+            "1_point_standard": "The outdoor photography effect is applied consistently, making the architectural representation realistic and coherent."
+        },
+        {
+            "question": "Does the model accurately follow the specific architectural style or materials mentioned in the text description (e.g., Gothic, glass curtain walls, brickwork)?",
+            "0_point_standard": "The model fails to accurately represent the specific architectural style or materials mentioned in the text description.",
+            "1_point_standard": "The model accurately showcases the specific architectural style or materials described in the text."
+        },
+        {
+            "question": "Are structural details, such as windows, doors, and facade elements, rendered accurately and clearly, consistent with the described architectural style?",
+            "0_point_standard": "Structural details are inaccurate or unclear and do not reflect the specified architectural style.",
+            "1_point_standard": "Structural details are rendered accurately and clearly, closely matching the architectural style described in the text."
+        },
+        {
+            "question": "Does the overall composition and logical consistency of the image meet professional architectural photography standards?",
+            "0_point_standard": "The image lacks logical consistency or proper composition, not meeting professional architectural photography standards.",
+            "1_point_standard": "The image is logically consistent and well-composed, meeting professional architectural photography standards."
+        },
+        {
+            "question": "Does the image exhibit captivating aesthetic appeal and visual impact, meeting professional architectural visualization standards?",
+            "0_point_standard": "The image lacks aesthetic appeal and does not provide significant visual impact, failing to meet professional visualization standards.",
+            "1_point_standard": "The image has strong aesthetic appeal and visual impact, meeting professional architectural visualization standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0001/images.txt b/dataset/architectural_style_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/architectural_style_generation_0001/instruction.txt b/dataset/architectural_style_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb61e5da3eb62bc6ed2a808ac2282f052074301e
--- /dev/null
+++ b/dataset/architectural_style_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This is a rendering of a modern architectural design showcasing a single-family home nestled within a dense pine forest. The rectangular house features a clean, contemporary aesthetic with dark gray metal cladding, creating a striking contrast against the surrounding natural environment. The house is elevated on a substantial wooden deck, beneath which lies lush green grass and scattered boulders, connected to the ground by a wooden staircase. Large glass windows and doors allow ample natural light to fill the interior, offering a glimpse into the sparsely visible interior furnishings. Tall pine trees encircle the home, their full foliage and dappled sunlight casting shadows on the grassy terrain, establishing a serene and peaceful atmosphere. The overall color palette is subdued, contrasted with bright sunlight highlighting the texture of the building and layering of the surrounding environment. 
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0001/meta.json b/dataset/architectural_style_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b6d9500812610725f0caf7ffda7100a9b708296
--- /dev/null
+++ b/dataset/architectural_style_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "architectural style generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0027",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0002/eval.json b/dataset/architectural_style_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..74e2db9e920873238b0f3191dc148ac18c344ad9
--- /dev/null
+++ b/dataset/architectural_style_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated architectural style accurately reflect the basic elements of architecture described in the text?",
+            "0_point_standard": "The architectural style does not match the text description, failing to accurately depict the basic elements of the architecture.",
+            "1_point_standard": "The architectural style fully reflects the basic elements of architecture described in the text."
+        },
+        {
+            "question": "Is the outdoor photographic effect applied consistently throughout the image, creating a realistic and coherent representation?",
+            "0_point_standard": "The outdoor photographic effect is inconsistent or unrealistic, leading to a disjointed architectural representation.",
+            "1_point_standard": "The outdoor photographic effect is applied consistently, making the architectural representation realistic and coherent."
+        },
+        {
+            "question": "Does the model accurately follow specific architectural styles or materials mentioned in the text (e.g., Gothic, glass curtain walls, brickwork)?",
+            "0_point_standard": "The model fails to accurately represent the specific architectural styles or materials mentioned in the text.",
+            "1_point_standard": "The model accurately exhibits the specific architectural styles or materials described in the text."
+        },
+        {
+            "question": "Are the structural details, such as windows, doors, and facade elements, rendered accurately and clearly in accordance with the described architectural style?",
+            "0_point_standard": "The structural details are inaccurate or unclear, failing to reflect the specified architectural style.",
+            "1_point_standard": "The structural details are rendered accurately and clearly, matching the architectural style described in the text very well."
+        },
+        {
+            "question": "Does the overall composition and logical consistency of the image meet professional architectural photography standards?",
+            "0_point_standard": "The image lacks logical consistency or is poorly composed, not meeting professional architectural photography standards.",
+            "1_point_standard": "The image is logically consistent and well-composed, meeting professional architectural photography standards."
+        },
+        {
+            "question": "Does the image exhibit compelling aesthetic appeal and visual impact, meeting professional architectural visualization standards?",
+            "0_point_standard": "The image lacks aesthetic appeal and does not provide significant visual impact, falling short of professional visualization standards.",
+            "1_point_standard": "The image possesses strong aesthetic appeal and visual impact, meeting professional architectural visualization standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0002/images.txt b/dataset/architectural_style_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/architectural_style_generation_0002/instruction.txt b/dataset/architectural_style_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..049830a18f8b538d442370249ba2cfaf024ed54e
--- /dev/null
+++ b/dataset/architectural_style_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This is a rendering of a modern office building complex integrated with a commercial street. The main building is three stories high, exhibiting a clean and contemporary style with a glass curtain wall and light-colored metal panels. A green roof tops the structure. The ground floor houses a bustling commercial street with open shops and seating areas; people are walking, talking, and interacting, creating a lively and vibrant atmosphere. A spacious plaza with lush landscaping and trees sits in front of the building. Steps provide easy access to the building's entrance. In the background, taller buildings and dense trees are visible. The sunlight is bright, the sky is clear blue, and birds are flying in the distance. The overall image has bright colors, high clarity, rich details, and conveys a comfortable, modern, and energetic feeling.
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0002/meta.json b/dataset/architectural_style_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b4a6631a3ea07f092cf7501ccd3ac85eb929ab39
--- /dev/null
+++ b/dataset/architectural_style_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "architectural style generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0027",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0003/eval.json b/dataset/architectural_style_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..86d1769f6966d29fc704a9d8cdcdc838b5d80423
--- /dev/null
+++ b/dataset/architectural_style_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated architectural style accurately reflect the basic elements of architecture described in the text?",
+            "0_point_standard": "The architectural style does not match the text description and fails to accurately depict the basic elements of architecture.",
+            "1_point_standard": "The architectural style fully embodies the basic elements of architecture described in the text."
+        },
+        {
+            "question": "Is the outdoor photography effect consistently applied throughout the entire image, creating a realistic and coherent representation?",
+            "0_point_standard": "The outdoor photography effect is inconsistent or unrealistic, leading to a disjointed architectural representation.",
+            "1_point_standard": "The outdoor photography effect is consistently applied, making the architectural representation realistic and coherent."
+        },
+        {
+            "question": "Does the model accurately follow the specific architectural style or materials mentioned in the text (e.g., Gothic, glass curtain walls, brickwork)?",
+            "0_point_standard": "The model fails to accurately represent the specific architectural style or materials mentioned in the text.",
+            "1_point_standard": "The model accurately showcases the specific architectural style or materials described in the text."
+        },
+        {
+            "question": "Are structural details such as windows, doors, and facade elements rendered accurately and clearly, consistent with the described architectural style?",
+            "0_point_standard": "The structural details are inaccurate or unclear, failing to reflect the specified architectural style.",
+            "1_point_standard": "The structural details are rendered accurately and clearly, closely matching the architectural style described in the text."
+        },
+        {
+            "question": "Does the overall composition and logical consistency of the image meet professional architectural photography standards?",
+            "0_point_standard": "The image lacks logical consistency or is poorly composed, not meeting professional architectural photography standards.",
+            "1_point_standard": "The image is logically consistent and well-composed, meeting professional architectural photography standards."
+        },
+        {
+            "question": "Does the image exhibit captivating aesthetic appeal and visual impact, in line with professional architectural visualization standards?",
+            "0_point_standard": "The image lacks aesthetic appeal and does not provide significant visual impact, falling short of professional visualization standards.",
+            "1_point_standard": "The image has strong aesthetic appeal and visual impact, meeting professional architectural visualization standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0003/images.txt b/dataset/architectural_style_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/architectural_style_generation_0003/instruction.txt b/dataset/architectural_style_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..795e74eb95f00ab81c3fb21b9d07d2dfb4b393d1
--- /dev/null
+++ b/dataset/architectural_style_generation_0003/instruction.txt
@@ -0,0 +1 @@
+The image showcases a modern architectural complex, primarily composed of two interconnected white buildings. The main building is larger, rectangular in shape, featuring multi-story glass curtain walls and partial white metal paneling, with some sunshades on the top floor. The facade is clean and simple, with smooth lines and a unified color scheme. A smaller secondary building is connected to the main building via a gentle ramp, maintaining a consistent architectural style. In front of the buildings is a wide road with several parked private cars. On both sides of the road are landscaped areas with short grass and a mix of shrubs of varying heights, creating a peaceful and comfortable environment. The background features a clear blue sky with abundant sunlight, making the buildings appear bright and modern. Near the entrance, several people in business attire are seen conversing, adding a touch of life to the scene. The overall image is simple, grand, and possesses a strong sense of modernity and artistry.
\ No newline at end of file
diff --git a/dataset/architectural_style_generation_0003/meta.json b/dataset/architectural_style_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..220d6b9a424051d5d4f47b6200e08738d8bbb52a
--- /dev/null
+++ b/dataset/architectural_style_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "architectural style generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0027",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/brand_merchandise_generation_0001/auto_eval.jsonl b/dataset/brand_merchandise_generation_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..ca2c2dcfb56938b9849e1e5fa19113abda852e11
--- /dev/null
+++ b/dataset/brand_merchandise_generation_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to generate brand peripheral products containing the given brand visual pattern based on the text requirements.\nThe text requirement is:\n\"Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.\"\nYour review question is:\nBrand Visual Pattern Consistency: 0 points: The brand’s unique visual elements are missing or altered, making it hard to recognize the brand in the merchandise. 1 point: The merchandise clearly reflects every detail of the visual elements in the referenced pattern image, accurately transferring the brand’s pattern and style as specified. The visual pattern in the two images is exactly the same. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to generate brand peripheral products containing the given brand visual pattern based on the text requirements.\nThe text requirement is:\n\"Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.\"\nYour review question is:\nProduct Type Accuracy: 0 points: The generated product does not match the specified type (e.g., a mug instead of a tote bag), or the structure is inconsistent with the description. 1 point: The merchandise accurately aligns with the specified product type, showing the correct form and structure as outlined.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to generate brand peripheral products containing the given brand visual pattern based on the text requirements.\nThe text requirement is:\n\"Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.\"\nYour review question is:\nFidelity to Visual Details: 0 points: Key visual details requested, such as specific color adjustments or logo placements, are missing or incorrectly applied. 1 point: All specified visual details, including color adjustments and logo placement, are accurately and thoughtfully applied as per the description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to generate brand peripheral products containing the given brand visual pattern based on the text requirements.\nThe text requirement is:\n\"Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.\"\nYour review question is:\nPositioning and Proportion of the Visual Pattern: 0 points: The brand’s visual pattern is positioned awkwardly or disproportionately, failing to integrate naturally with the product’s shape and surface. 1 point: The brand’s visual pattern is applied with correct positioning and proportion, fitting naturally onto the product’s surface and enhancing its visual appeal.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to generate brand peripheral products containing the given brand visual pattern based on the text requirements.\nThe text requirement is:\n\"Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.\"\nYour review question is:\nClarity and Quality of Text or Graphics: 0 points: The text or graphics appear blurry, pixelated, or washed out, reducing the professional quality and clarity of the brand. 1 point: The text and graphics are rendered sharply and vividly, enhancing both readability and the brand’s visual appeal.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to generate brand peripheral products containing the given brand visual pattern based on the text requirements.\nThe text requirement is:\n\"Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.\"\nYour review question is:\nOverall Aesthetic and Professional Quality: 0 points: The merchandise lacks aesthetic cohesion or professionalism, with issues like poor composition, lighting, or unrealistic presentation, making it unfit for brand representation. 1 point: The merchandise displays high aesthetic appeal and professional quality, with balanced composition, effective lighting, and a realistic appearance suitable for showcasing as branded merchandise.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
diff --git a/dataset/brand_merchandise_generation_0001/eval.json b/dataset/brand_merchandise_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d13366cb7cb8d2aaa6de2f810dab967e0163f99e
--- /dev/null
+++ b/dataset/brand_merchandise_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Brand visual pattern consistency:",
+            "0_point_standard": "The unique visual elements of the brand are missing or altered, making it difficult to recognize the brand in the product.",
+            "1_point_standard": "The product clearly reflects every visual element detail from the reference pattern image, accurately conveying the brand's pattern and style. The visual patterns in both images are identical."
+        },
+        {
+            "question": "Product type accuracy:",
+            "0_point_standard": "The generated product does not match the specified type (e.g., a cup is generated instead of a handbag), or its structure is inconsistent with the description.",
+            "1_point_standard": "The product accurately matches the specified product type, displaying the correct form and structure as described."
+        },
+        {
+            "question": "Faithfulness to visual details:",
+            "0_point_standard": "Key required visual details are missing, such as specific color adjustments or logo placements, or are applied incorrectly.",
+            "1_point_standard": "All specified visual details, including color adjustments and logo placements, are accurately and thoughtfully applied according to the description."
+        },
+        {
+            "question": "Positioning and proportion of visual patterns:",
+            "0_point_standard": "The brand's visual pattern is improperly positioned or disproportionate, failing to blend naturally with the product's shape and surface.",
+            "1_point_standard": "The brand's visual pattern is applied with correct positioning and proportion, naturally adapting to the product's surface, enhancing visual appeal."
+        },
+        {
+            "question": "Clarity and quality of text or graphics:",
+            "0_point_standard": "Text or graphics appear blurred, pixelated, or faded, reducing the brand's professional quality and clarity.",
+            "1_point_standard": "Text and graphics are presented clearly and vividly, enhancing readability and the brand's visual appeal."
+        },
+        {
+            "question": "Overall aesthetic and professional quality:",
+            "0_point_standard": "The product lacks aesthetic unity or professionalism, with poor composition, insufficient lighting, or unrealistic presentation, making it unsuitable for brand display.",
+            "1_point_standard": "The product demonstrates high aesthetic and professional quality, with balanced composition, good lighting effects, and a realistic appearance suitable for brand product display."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/brand_merchandise_generation_0001/images.txt b/dataset/brand_merchandise_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aba6f6e9e317b8ae9af5b1e385c4f3112bc8ffa8
--- /dev/null
+++ b/dataset/brand_merchandise_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01Z1uzKk1QMBi0SCZW0_!!6000000001961-0-tps-1200-1200.jpg
diff --git a/dataset/brand_merchandise_generation_0001/instruction.txt b/dataset/brand_merchandise_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0146d57e6427c438e8bb8b2631b5393761a27b62
--- /dev/null
+++ b/dataset/brand_merchandise_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Generate an image of a canvas tote bag designed to match the visual identity of the original green character with sneakers on stairs. The character should be printed on the bag, maintaining the same playful, minimalist style and color scheme as the original image. The tote bag is white, with the green character design prominently displayed in the center. The background is a minimalist studio setup with a soft gray backdrop, enhancing the focus on the bag. The bag is displayed hanging on a hook, with subtle shadows adding depth. The design should clearly reflect the original character, so it’s immediately recognizable as a brand product.
\ No newline at end of file
diff --git a/dataset/brand_merchandise_generation_0001/meta.json b/dataset/brand_merchandise_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4930de12aa6448966de6b7df8515299e248769e
--- /dev/null
+++ b/dataset/brand_merchandise_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "brand merchandise generation",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0063",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/business_card_generation_0001/eval.json b/dataset/business_card_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0db96362792310987d8316abd2d640fe31710b1b
--- /dev/null
+++ b/dataset/business_card_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the business card design match the text description and include all key information (e.g., name, position, contact information)?",
+            "0_point_standard": "The business card design does not match the description, and key information is missing or displayed incorrectly.",
+            "1_point_standard": "The business card design matches the description, accurately displaying all key information."
+        },
+        {
+            "question": "Is the text on the business card clear and easy to read, and do the font style and layout meet design requirements?",
+            "0_point_standard": "The text is unclear, and the font style or layout does not meet the requirements, affecting overall readability.",
+            "1_point_standard": "The text is clear and easy to read, and the font style and layout meet the design requirements."
+        },
+        {
+            "question": "Does the overall color scheme and visual style of the business card match the style requirements described in the text (e.g., simple, modern)?",
+            "0_point_standard": "The color scheme and visual style do not match the text description and fail to convey the intended style.",
+            "1_point_standard": "The color scheme and visual style match the text description, conveying the intended design style."
+        },
+        {
+            "question": "Has the model accurately implemented the special design requirements mentioned in the text (e.g., logo, icon, or background pattern)?",
+            "0_point_standard": "The special design requirements mentioned in the text are not accurately implemented or lack detail.",
+            "1_point_standard": "The special design requirements mentioned in the text are accurately implemented with precise details."
+        },
+        {
+            "question": "Is the layout of the business card clear and logical, and is the information organization reasonable and easy to understand?",
+            "0_point_standard": "The layout is cluttered, the organization of information is poor, and the visual effect is chaotic.",
+            "1_point_standard": "The layout is clear and logical, the organization of information is reasonable, and it is easy to understand and read."
+        },
+        {
+            "question": "Does the overall aesthetic and design quality of the business card meet professional standards, and does it have strong visual appeal?",
+            "0_point_standard": "The overall aesthetic of the business card is lacking, with weak design sense and insufficient visual appeal.",
+            "1_point_standard": "The business card has high aesthetic quality, strong design sense, and good visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/business_card_generation_0001/images.txt b/dataset/business_card_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/business_card_generation_0001/instruction.txt b/dataset/business_card_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eb9b55483f9bb255b05e454b41b0c0ddf682790
--- /dev/null
+++ b/dataset/business_card_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This business card design is vibrant and modern, using a bold color scheme of red, blue, white, and orange on both sides. The top side has a white background with large, bold, red letters spelling “kylie” in a playful, diagonal layout that fills most of the card. Smaller blue text reading “GRAPHIC” is placed above the “k” and “DESIGNER” below the “e,” indicating the profession. Around the text, various blue and orange circles of different sizes are scattered randomly, adding a playful and dynamic element. The bottom side of the card has a solid blue background with “kylie” written again in large, white letters, tilted diagonally similar to the top side but slightly smaller. This side also features scattered orange and blue circles, maintaining the cohesive design. On the bottom right, contact details are displayed in white text: “Kylie Herrera,” “Graphic Designer,” followed by placeholders for the website, email, social media handle, and phone number as “www.website.com | mail@website.com | @socialmedia | +123 639 8726.” The card is set against a bright yellow background, adding extra vibrancy to the overall design. The only generated image contains both sides of the business card.
\ No newline at end of file
diff --git a/dataset/business_card_generation_0001/meta.json b/dataset/business_card_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b2d21f855da5255faeb165e5a6cd8dfd4ca09f4d
--- /dev/null
+++ b/dataset/business_card_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "business card generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0032",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_0001/eval.json b/dataset/childrens_book_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d1c2092025a28c96ed7e81f06cce2257eb79a25
--- /dev/null
+++ b/dataset/childrens_book_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present a coherent narrative?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical flow, disrupting the narrative.",
+            "1_point_standard": "The sequence of images clearly presents a coherent narrative in logical chronological order."
+        },
+        {
+            "question": "Do the image contents match the text description provided in the children's book?",
+            "0_point_standard": "The images do not accurately reflect the text description, showing significant deviations from the story.",
+            "1_point_standard": "The images are completely consistent with the text description, accurately portraying the story elements."
+        },
+        {
+            "question": "Is the illustration style consistent throughout the entire book?",
+            "0_point_standard": "The illustration style is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All illustrations maintain a consistent style, creating a harmonious visual effect throughout the book."
+        },
+        {
+            "question": "Are the depictions of main characters or objects consistent across all illustrations?",
+            "0_point_standard": "The depictions of main characters or objects are inconsistent across different images, making them hard to recognize as the same character or object.",
+            "1_point_standard": "The depictions of main characters or objects are consistent, clearly identifiable as the same character or object across all illustrations."
+        },
+        {
+            "question": "Is the portrayal of narrative and characters logically accurate and suitable for the children's age group?",
+            "0_point_standard": "The portrayal is illogical, inaccurate, or inappropriate for the target age group, with noticeable errors.",
+            "1_point_standard": "The portrayal is logical, accurate, and suitable for the target age group, reflecting the expected narrative standards."
+        },
+        {
+            "question": "Do the illustrations exhibit professional-level detail and aesthetics, enhancing the book's visual appeal?",
+            "0_point_standard": "The illustrations lack detail, aesthetic quality is poor, and they do not meet the visual standards for children's books.",
+            "1_point_standard": "The illustrations are rich in detail, aesthetically excellent, meet professional standards, and have significant visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_0001/images.txt b/dataset/childrens_book_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/childrens_book_generation_0001/instruction.txt b/dataset/childrens_book_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dd71a38021e0e5879cf8a0d28deb1ae1f5c8887
--- /dev/null
+++ b/dataset/childrens_book_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This is a children's picture book illustration generation task, consisting of 5-10 pages. The story is titled “The Animal Orchestra's Musical Journey.” Scene and character IDs need to remain consistent throughout the book to ensure stylistic uniformity and character continuity. The main characters include a conductor fox (ID: Foxy), a drumming bear (ID: Bruno), a flute-playing rabbit (ID: Tino), a guitar-playing squirrel (ID: Nelly), and a piano-playing elephant (ID: Ellie). The following are detailed descriptions for each page: Page 1: It's early morning in the forest, with sunlight streaming through the leaves onto a stage where the animals are rehearsing. The conductor Foxy stands in the center, raising the baton, preparing to start. The other animals are by their instruments, ready to play. The atmosphere is serene, and the background is a lush forest, with simple music stands and sheet music on stage. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie. Page 2: As the music begins, a gust of wind blows away one of the sheets of music, carrying it toward the distant plains. The animals stop playing, watching the sheet float away, while Foxy looks worried. The background transitions from the forest to the wide-open plains, with grass swaying in the wind, and the music sheet flying off. The animals look puzzled. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie. Page 3: The animals decide to embark on a journey to retrieve the lost music. They cross the plains, and the sound of their music grows louder with each step. The scene shows the animals walking: Bruno with his drum, Tino holding his flute, Nelly with her guitar, and Ellie rolling her piano with her trunk. The background is an expansive plain, with sunlight illuminating their adventure. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie. Page 4: Deep in the plains, they encounter a group of confused birds who have lost their song. Foxy suggests that they help the birds find their melody with music. The scene is set in the open plains, with a lake in the distance. The birds sit on branches, looking worried. The animal orchestra starts playing, and their music helps the birds sing again. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie. Page 5: The animals continue their journey and arrive at the seashore. The ocean waves are rough, and a fish (ID: Finny) is trapped between rocks, unable to return to the sea. Ellie uses her music to summon gentle tides, helping the fish swim back into the ocean. The scene shows a vast beach with waves crashing on the rocks, while a few seabirds fly overhead. The atmosphere is peaceful and harmonious. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie, Finny. Page 6: Finally, the animals find the lost music sheet, which has blown to the top of a mountain village. The villagers are worried about an approaching storm, unable to celebrate their festival. Foxy decides to hold a concert to drive away their fears with music. The scene is set in the village square, with mountains and clearing storm clouds in the background. The villagers gather around the animals, looking hopeful. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie. Page 7: The animal orchestra plays a warm melody, and the music fills the village. The storm gradually dissipates, and the sun emerges from behind the clouds. The villagers start dancing and celebrating. The scene is set in the village square, with colorful flags waving, and the villagers cheering and dancing, while the animals are immersed in their music. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie. Page 8: After the concert, the animals are surrounded by the grateful villagers, who thank them for bringing beautiful music and the power of unity. The orchestra members smile with satisfaction, knowing that this journey was not only about retrieving the music but also strengthening their friendship. The scene shows the village square at sunset, with a warm evening glow in the background, as the animals and villagers celebrate together. Character IDs: Foxy, Bruno, Tino, Nelly, Ellie.
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_0001/meta.json b/dataset/childrens_book_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab08478b0a732258016290c6c63d52fdf127edde
--- /dev/null
+++ b/dataset/childrens_book_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "childrens book generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0019",
+    "output_image_count": 8,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_0002/eval.json b/dataset/childrens_book_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a114298644ca5fb4129ecc9c615c9616f3f49ea
--- /dev/null
+++ b/dataset/childrens_book_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images present a coherent narrative in a logical order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical flow, disrupting the narrative.",
+            "1_point_standard": "The sequence of images clearly presents a coherent narrative in a logical chronological order."
+        },
+        {
+            "question": "Do the images correspond to the text descriptions provided in the children's book?",
+            "0_point_standard": "The images do not accurately reflect the text descriptions and have noticeable deviations from the story.",
+            "1_point_standard": "The images completely match the text descriptions and accurately portray the story elements."
+        },
+        {
+            "question": "Is the illustration style consistent throughout the entire book?",
+            "0_point_standard": "The illustration style is inconsistent, resulting in a disjointed visual effect.",
+            "1_point_standard": "All illustrations maintain a consistent style, creating a harmonious visual effect throughout the entire book."
+        },
+        {
+            "question": "Is the depiction of main characters or objects consistent across all illustrations?",
+            "0_point_standard": "The depiction of main characters or objects is inconsistent across different images, making it difficult to recognize them as the same characters or objects.",
+            "1_point_standard": "The depiction of main characters or objects is consistent, clearly recognizable as the same characters or objects across all illustrations."
+        },
+        {
+            "question": "Is the portrayal of the narrative and characters logically accurate and suitable for the children's age group?",
+            "0_point_standard": "The portrayal is illogical, inaccurate, or unsuitable for the target age group, with noticeable errors.",
+            "1_point_standard": "The portrayal is logically accurate, suitable for the target age group, and reflects the expected narrative standards."
+        },
+        {
+            "question": "Do the illustrations exhibit a professional level of detail and aesthetic, enhancing the book's visual appeal?",
+            "0_point_standard": "The illustrations lack detail, have poor aesthetics, and do not meet the visual standards of children's books.",
+            "1_point_standard": "The illustrations are rich in detail, have excellent aesthetics, meet professional standards, and have significant visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_0002/images.txt b/dataset/childrens_book_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/childrens_book_generation_0002/instruction.txt b/dataset/childrens_book_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..953008dcee9150d9cdce722c99002f3cb643699a
--- /dev/null
+++ b/dataset/childrens_book_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This is a children's picture book illustration generation task consisting of 7 pages, titled “The Lost Rainbow Stone.” Scene and character IDs need to remain consistent throughout the book to ensure stylistic uniformity and character continuity. The main characters include a little girl named Lily (ID: Lily), her friend Tom (ID: Tom), and their three animal friends: a rabbit named Fanny (ID: Fanny), a squirrel named Rocky (ID: Rocky), and a bird named Bobby (ID: Bobby). The following are detailed descriptions for each page: Page 1: The story begins on a sunny afternoon, with Lily and Tom playing in the park. Lily finds a mysterious stone glowing faintly with rainbow colors in the grass. She curiously picks up the stone, and suddenly a rainbow pattern appears on it. The background is the park's grassy field, with sunlight filtering through the leaves, and children playing in the distance, creating a warm and relaxed atmosphere. Character IDs: Lily, Tom. Page 2: As Lily and Tom marvel at the stone, a sudden gust of wind blows it out of Lily's hands, carrying it into the sky. Lily and Tom watch in alarm as the stone disappears into the distance. The scene transitions from the park to the windy sky, with trees swaying and the sky growing darker in the distance as the stone is carried away. Character IDs: Lily, Tom. Page 3: Lily and Tom decide to embark on a journey with their animal friends to find the lost rainbow stone. They arrive at the entrance of the forest, where the rabbit Fanny jumps out, offering to help. The scene is set at the edge of the forest, with tall, dense trees, and Fanny standing beneath them, beckoning Lily and Tom to venture deeper into the forest. The background is lush and green, with a sense of mystery and anticipation at the forest's entrance. Character IDs: Lily, Tom, Fanny. Page 4: As they enter the forest, Lily, Tom, and Fanny encounter Rocky the squirrel. Rocky tells them that the rainbow stone may have been blown to the other side of the forest, near a flowing river. Rocky joins them as they continue their journey. The scene is set in the heart of the forest, with towering trees and a thick carpet of leaves, with a distant glimpse of the river. The forest looks deep and beautiful in the background. Character IDs: Lily, Tom, Fanny, Rocky. Page 5: When they reach the riverbank, Bobby the bird flies over and tells them that he saw the rainbow stone land on a large rock across the river. Lily and Tom decide to build a bridge with their animal friends to cross the river. The scene is set by the river, with clear water and Bobby flying above, pointing toward the distant rock. Lily and Tom are working with Fanny and Rocky, building the bridge with wooden logs. Character IDs: Lily, Tom, Fanny, Rocky, Bobby. Page 6: Finally, Lily and her friends cross the river and find the rainbow stone, still glowing with beautiful colors. Lily gently picks up the stone and discovers that if she wishes upon it while looking at the sky, a rainbow will appear. The scene is set on the grassy field on the other side of the river, with the rainbow stone casting a glow on everyone's faces, and a rainbow beginning to form in the sky. Character IDs: Lily, Tom, Fanny, Rocky, Bobby. Page 7: Lily lifts the rainbow stone, and a rainbow descends from the sky, covering the entire field. Everyone cheers for finding the stone, which symbolizes their friendship and teamwork. The story ends with a peaceful scene of the rainbow spanning across the sky, with sunshine on their faces and smiles filled with joy. The scene is a vast grassy field with a rainbow arching across the sky, and Lily and her friends standing under it, smiling up at the sky. Character IDs: Lily, Tom, Fanny, Rocky, Bobby.
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_0002/meta.json b/dataset/childrens_book_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..415b1c77db6636b94fe9a645753d3f3da5c37e7e
--- /dev/null
+++ b/dataset/childrens_book_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "childrens book generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0019",
+    "output_image_count": 7,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_role_definition_0002/auto_eval.jsonl b/dataset/childrens_book_generation_role_definition_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..41f396a04220c18e6b357e2de3c49f9638b6007b
--- /dev/null
+++ b/dataset/childrens_book_generation_role_definition_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg", "0005.jpg", "0006.jpg", "0007.jpg", "0008.jpg"], "question": "Is the number in the image the digit 8? 0 points: The number in the image is not the digit 8; 1 point: The number in the image is the digit 8. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first input image and first output image of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided character definitions and text descriptions. \nThe text requirement is:\nPlease generate the illustrations based on the following page descriptions, ensuring all characters are consistent in appearance and style, matching the five defined characters. Page 1: Lulu stands in the center of the image, with a focused and serious expression on her cute giraffe face. She holds one hoof near her face, preparing to count, and leans slightly forward as if she can't wait to find her friends. Lulu's innocence and energy should be fully displayed in this image. Page 2: Yoko is sneakily hiding behind a large group of mushrooms, with her signature big ears peeking out. The image should emphasize Yoko's playful and expectant mood, with her eyes slightly squinted and a small smile on her face, capturing the joy she feels while hiding. Page 3: Noah hides behind a tree, with his tail sticking out a little. His eyes glint mischievously, and he leans against the tree in a relaxed, confident pose. The image should highlight Noah's clever and agile personality, reflecting his sense of satisfaction with his hiding spot. Page 4: DonDon reluctantly joins the game. In the image, he's standing near a mailbox, half-hidden by his tail. Despite his usual cool demeanor, there's a hint of playfulness in his gaze. This contrast between DonDon's cold exterior and his subtle enjoyment should be a key focus of the image. Page 5: Ajim sits on a floating cloud high above the ground. He props his head up with his hands, wearing a smug smile, his eyes filled with confidence. Ajim believes he has found the perfect hiding place, and the image should reflect his self-assured and boastful personality. Page 6: Lulu opens her eyes and begins to search for her friends. She first finds Yoko hiding behind the mushrooms. In the image, Lulu and Yoko are laughing together, with Lulu pointing at Yoko, who stands up and pats Lulu on the back. The image should capture the warmth and joy of their interaction. Page 7: Lulu spots Noah's tail sticking out, sneaks around the tree, and suddenly jumps out to catch him. Noah pretends to be scared, with his mouth open and eyes wide in mock surprise, but quickly breaks into laughter. Lulu is laughing so hard that she's almost falling over. The image should show the fun and friendly bond between the two characters. Page 8: All the friends have been found, even Ajim floating in the sky. They gather together, laughing and smiling. Lulu, Yoko, Noah, Ajim, and even DonDon are all laughing, with DonDon showing a slight smile despite his usual cool demeanor. This image should radiate the warmth of friendship and the joy of their shared fun.\nYour review question is:\nIs Lulu’s appearance in the Page 1 output image consistent with her defined character traits, specifically her innocence, energy, and giraffe-like features? 0 points: Lulu’s appearance deviates from her character definition, missing key features such as her giraffe characteristics or expressions of innocence and energy. 1 point: Lulu’s appearance is consistent with her character traits, accurately capturing her giraffe features, innocence, and energetic personality.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third output images of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided character definitions and text descriptions. \nThe text requirement is:\nPlease generate images based on the description of each page, ensuring that all images reflect the story's narrative and maintain the consistency of the characters, with all characters matching the given definitions of the four characters. Page 1: Lina (character 1) is happily walking down a bright city street. She's wearing a green outfit, looking cheerful, with her hand raised slightly as if waving to the world. The background features a warm orange sky, with simple modern buildings on both sides of the street, and light-colored brick paving. The entire city exudes energy and vibrancy. The model should reflect Lina's lively personality, with the bright tones of the street complementing the city's energetic atmosphere. Page 2: Lina meets Ajay (character 2), a boy with blue hair and a floral shirt, at the corner of the street. They stand in a candy-colored park, surrounded by dense trees and benches, with colorful flowers dotting the grass. Sunlight filters through the leaves, casting light on them as Ajay shares some recent fun stories, and Lina listens with a bright smile. The model should depict their friendly interaction, with the warm sunlight and natural background enhancing the harmony of the scene. Page 3: As they walk further, they meet Kyle (character 3), a cool boy wearing gray sportswear and sunglasses, performing a high-energy skateboarding trick in the skate park. Kyle is airborne, his skateboard spinning beneath him. The background is a modern skate park, with a dark backdrop highlighting his impressive moves. The model should capture Kyle's dynamic action in the air, emphasizing his personality and the movement of the skateboard. Page 4: Lina, Ajay, and Kyle arrive at the beach, where they meet Xiaohai (character 4), a lively boy holding a surfboard, getting ready to surf. He's wearing a T-shirt with eye patterns, with the backdrop of a vast ocean. The blue sky and white waves meet at the horizon, with the water gently lapping at the shore. The model should convey the freshness of the beach and Xiaohai's excitement, showcasing his anticipation for surfing. Page 5: The four friends play together on the beach under the setting sun, holding hands and forming a circle as laughter fills the air. The background is the golden sunlight reflecting off the sea, with waves gently hitting the shore. The entire scene radiates warmth and the bond of friendship. The model should use the warm glow of the sunset and the friends' smiles to convey the joy and beauty of their time together.\nYour review question is:\nDo the expressions and body language of Yoko in Page 2 and Noah in Page 3 accurately reflect their defined personalities (Yoko’s playful and expectant mood, and Noah’s mischievous, confident demeanor)? 0 points: The expressions or poses do not match the character descriptions, failing to reflect Yoko’s playful hiding or Noah’s mischievous, relaxed attitude. 1 point: Both Yoko and Noah’s expressions and body language align well with their personalities, showcasing Yoko’s playfulness and Noah’s clever confidence.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0006.jpg", "0007.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the sixth and seventh output images of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided character definitions and text descriptions. \nThe text requirement is:\nPlease generate images based on the description of each page, ensuring that all images reflect the story's narrative and maintain the consistency of the characters, with all characters matching the given definitions of the four characters. Page 1: Lina (character 1) is happily walking down a bright city street. She's wearing a green outfit, looking cheerful, with her hand raised slightly as if waving to the world. The background features a warm orange sky, with simple modern buildings on both sides of the street, and light-colored brick paving. The entire city exudes energy and vibrancy. The model should reflect Lina's lively personality, with the bright tones of the street complementing the city's energetic atmosphere. Page 2: Lina meets Ajay (character 2), a boy with blue hair and a floral shirt, at the corner of the street. They stand in a candy-colored park, surrounded by dense trees and benches, with colorful flowers dotting the grass. Sunlight filters through the leaves, casting light on them as Ajay shares some recent fun stories, and Lina listens with a bright smile. The model should depict their friendly interaction, with the warm sunlight and natural background enhancing the harmony of the scene. Page 3: As they walk further, they meet Kyle (character 3), a cool boy wearing gray sportswear and sunglasses, performing a high-energy skateboarding trick in the skate park. Kyle is airborne, his skateboard spinning beneath him. The background is a modern skate park, with a dark backdrop highlighting his impressive moves. The model should capture Kyle's dynamic action in the air, emphasizing his personality and the movement of the skateboard. Page 4: Lina, Ajay, and Kyle arrive at the beach, where they meet Xiaohai (character 4), a lively boy holding a surfboard, getting ready to surf. He's wearing a T-shirt with eye patterns, with the backdrop of a vast ocean. The blue sky and white waves meet at the horizon, with the water gently lapping at the shore. The model should convey the freshness of the beach and Xiaohai's excitement, showcasing his anticipation for surfing. Page 5: The four friends play together on the beach under the setting sun, holding hands and forming a circle as laughter fills the air. The background is the golden sunlight reflecting off the sea, with waves gently hitting the shore. The entire scene radiates warmth and the bond of friendship. The model should use the warm glow of the sunset and the friends' smiles to convey the joy and beauty of their time together.\nYour review question is:\nAre the interactions between Lulu and Yoko in Page 6, and between Lulu and Noah in Page 7, depicted as described, capturing the warmth and playfulness of their friendships? 0 points: The interactions do not accurately reflect the warmth or playfulness described, missing elements like shared laughter, physical gestures, or friendly expressions. 1 point: The interactions are accurately illustrated, showing Lulu and Yoko’s joyful encounter and Lulu and Noah’s playful moment with authentic expressions and body language.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0004.jpg", "0005.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the fourth and fifth output images of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided character definitions and text descriptions. \nThe text requirement is:\nPlease generate images based on the description of each page, ensuring that all images reflect the story's narrative and maintain the consistency of the characters, with all characters matching the given definitions of the four characters. Page 1: Lina (character 1) is happily walking down a bright city street. She's wearing a green outfit, looking cheerful, with her hand raised slightly as if waving to the world. The background features a warm orange sky, with simple modern buildings on both sides of the street, and light-colored brick paving. The entire city exudes energy and vibrancy. The model should reflect Lina's lively personality, with the bright tones of the street complementing the city's energetic atmosphere. Page 2: Lina meets Ajay (character 2), a boy with blue hair and a floral shirt, at the corner of the street. They stand in a candy-colored park, surrounded by dense trees and benches, with colorful flowers dotting the grass. Sunlight filters through the leaves, casting light on them as Ajay shares some recent fun stories, and Lina listens with a bright smile. The model should depict their friendly interaction, with the warm sunlight and natural background enhancing the harmony of the scene. Page 3: As they walk further, they meet Kyle (character 3), a cool boy wearing gray sportswear and sunglasses, performing a high-energy skateboarding trick in the skate park. Kyle is airborne, his skateboard spinning beneath him. The background is a modern skate park, with a dark backdrop highlighting his impressive moves. The model should capture Kyle's dynamic action in the air, emphasizing his personality and the movement of the skateboard. Page 4: Lina, Ajay, and Kyle arrive at the beach, where they meet Xiaohai (character 4), a lively boy holding a surfboard, getting ready to surf. He's wearing a T-shirt with eye patterns, with the backdrop of a vast ocean. The blue sky and white waves meet at the horizon, with the water gently lapping at the shore. The model should convey the freshness of the beach and Xiaohai's excitement, showcasing his anticipation for surfing. Page 5: The four friends play together on the beach under the setting sun, holding hands and forming a circle as laughter fills the air. The background is the golden sunlight reflecting off the sea, with waves gently hitting the shore. The entire scene radiates warmth and the bond of friendship. The model should use the warm glow of the sunset and the friends' smiles to convey the joy and beauty of their time together.\nYour review question is:\nDo the illustrations maintain a cohesive style, ensuring that DonDon in Page 4 and Ajim in Page 5 appear visually consistent within the same illustration style, even as their unique traits (DonDon’s cool demeanor, Ajim’s smugness) are highlighted? 0 points: The style between the two images is inconsistent, with variations in line work, shading, or other stylistic elements that detract from a unified look. 1 point: Both characters are depicted with a consistent illustration style, while also capturing DonDon’s cool demeanor and Ajim’s smug personality accurately.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0008.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and eighth output images of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided character definitions and text descriptions. \nThe text requirement is:\nPlease generate images based on the description of each page, ensuring that all images reflect the story's narrative and maintain the consistency of the characters, with all characters matching the given definitions of the four characters. Page 1: Lina (character 1) is happily walking down a bright city street. She's wearing a green outfit, looking cheerful, with her hand raised slightly as if waving to the world. The background features a warm orange sky, with simple modern buildings on both sides of the street, and light-colored brick paving. The entire city exudes energy and vibrancy. The model should reflect Lina's lively personality, with the bright tones of the street complementing the city's energetic atmosphere. Page 2: Lina meets Ajay (character 2), a boy with blue hair and a floral shirt, at the corner of the street. They stand in a candy-colored park, surrounded by dense trees and benches, with colorful flowers dotting the grass. Sunlight filters through the leaves, casting light on them as Ajay shares some recent fun stories, and Lina listens with a bright smile. The model should depict their friendly interaction, with the warm sunlight and natural background enhancing the harmony of the scene. Page 3: As they walk further, they meet Kyle (character 3), a cool boy wearing gray sportswear and sunglasses, performing a high-energy skateboarding trick in the skate park. Kyle is airborne, his skateboard spinning beneath him. The background is a modern skate park, with a dark backdrop highlighting his impressive moves. The model should capture Kyle's dynamic action in the air, emphasizing his personality and the movement of the skateboard. Page 4: Lina, Ajay, and Kyle arrive at the beach, where they meet Xiaohai (character 4), a lively boy holding a surfboard, getting ready to surf. He's wearing a T-shirt with eye patterns, with the backdrop of a vast ocean. The blue sky and white waves meet at the horizon, with the water gently lapping at the shore. The model should convey the freshness of the beach and Xiaohai's excitement, showcasing his anticipation for surfing. Page 5: The four friends play together on the beach under the setting sun, holding hands and forming a circle as laughter fills the air. The background is the golden sunlight reflecting off the sea, with waves gently hitting the shore. The entire scene radiates warmth and the bond of friendship. The model should use the warm glow of the sunset and the friends' smiles to convey the joy and beauty of their time together.\nYour review question is:\nDoes Noah appear consistent in his character design from Page 3 to Page 8, with his unique features and personality clearly recognizable? 0 points: Noah’s design in Page 8 does not match his character design from Page 3, with noticeable differences in his features or overall appearance. 1 point: Noah’s design is consistent in both Page 3 and Page 8, with his unique features and personality clearly recognizable.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/childrens_book_generation_role_definition_0002/eval.json b/dataset/childrens_book_generation_role_definition_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..05b9cb75fdcf6eb036300f8138fda1e6a4f781a0
--- /dev/null
+++ b/dataset/childrens_book_generation_role_definition_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images match the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "In the output image on Page 1, does Lulu's appearance align with her defined character traits, specifically her innocence, energy, and giraffe-like features?",
+            "0_point_standard": "Lulu's appearance deviates from her character definition, lacking key features such as her giraffe-like traits or expressions of innocence and energy.",
+            "1_point_standard": "Lulu's appearance is consistent with her character traits, accurately capturing her giraffe-like features, innocence, and vibrant personality."
+        },
+        {
+            "question": "Do the expressions and body language of Yoko on Page 2 and Noah on Page 3 accurately reflect their defined personalities (Yoko's playfulness and anticipation, Noah's mischievousness and confidence)?",
+            "0_point_standard": "The expressions or postures do not match the character descriptions, failing to reflect Yoko's playful hiding or Noah's relaxed mischievous attitude.",
+            "1_point_standard": "Yoko and Noah's expressions and body language highly match their personalities, showcasing Yoko's playfulness and Noah's clever confidence."
+        },
+        {
+            "question": "Do the interactions between Lulu and Yoko on Page 6, and Lulu and Noah on Page 7, as described, show the warmth and playfulness of their friendship?",
+            "0_point_standard": "The interactions fail to accurately reflect the described warmth or playfulness, lacking elements such as shared laughter, physical contact, or friendly expressions.",
+            "1_point_standard": "The interactions are accurately portrayed, showing Lulu and Yoko's joyful encounter and Lulu and Noah's playful moments with genuine expressions and body language."
+        },
+        {
+            "question": "Is the illustration style consistent, ensuring that Dongdong on Page 4 and Ajimu on Page 5 appear visually coordinated within the same illustration style, even while highlighting their unique traits (Dongdong's coolness, Ajimu's arrogance)?",
+            "0_point_standard": "The style between the two images is inconsistent, with differences in lines, shading, or other stylistic elements disrupting a unified appearance.",
+            "1_point_standard": "Both characters are depicted in a consistent illustration style while accurately capturing Dongdong's coolness and Ajimu's arrogant personality."
+        },
+        {
+            "question": "From Page 3 to Page 8, is Noah's character design consistent, with unique traits and personality clearly identifiable?",
+            "0_point_standard": "Noah's design on Page 8 does not match that on Page 3, with noticeable differences in features or overall appearance.",
+            "1_point_standard": "Noah's design is consistent between Page 3 and Page 8, with unique traits and personality clearly identifiable."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_role_definition_0002/images.txt b/dataset/childrens_book_generation_role_definition_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..273403a11f93494167f201c72befe752ff5860fd
--- /dev/null
+++ b/dataset/childrens_book_generation_role_definition_0002/images.txt
@@ -0,0 +1,5 @@
+https://img.alicdn.com/imgextra/i2/O1CN01nGEdTx1eR8bskLzKE_!!6000000003867-0-tps-3334-6828.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01L9nnCE1kfgtDMpPKX_!!6000000004711-0-tps-3334-6828.jpg
+https://img.alicdn.com/imgextra/i4/O1CN01dl8HBW1pX3V9Ihjsi_!!6000000005369-0-tps-3334-6828.jpg
+https://img.alicdn.com/imgextra/i3/O1CN017nyzts1LritviWXCc_!!6000000001353-0-tps-3334-6828.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01nJbafS1EE9Mwh7Gzp_!!6000000000319-0-tps-3334-6828.jpg
diff --git a/dataset/childrens_book_generation_role_definition_0002/instruction.txt b/dataset/childrens_book_generation_role_definition_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6fae5a6e45e61f18ecc4a3b49fafecf6f60d656
--- /dev/null
+++ b/dataset/childrens_book_generation_role_definition_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate the illustrations based on the following page descriptions, ensuring all characters are consistent in appearance and style, matching the five defined characters. Page 1: Lulu stands in the center of the image, with a focused and serious expression on her cute giraffe face. She holds one hoof near her face, preparing to count, and leans slightly forward as if she can't wait to find her friends. Lulu's innocence and energy should be fully displayed in this image. Page 2: Yoko is sneakily hiding behind a large group of mushrooms, with her signature big ears peeking out. The image should emphasize Yoko's playful and expectant mood, with her eyes slightly squinted and a small smile on her face, capturing the joy she feels while hiding. Page 3: Noah hides behind a tree, with his tail sticking out a little. His eyes glint mischievously, and he leans against the tree in a relaxed, confident pose. The image should highlight Noah's clever and agile personality, reflecting his sense of satisfaction with his hiding spot. Page 4: DonDon reluctantly joins the game. In the image, he's standing near a mailbox, half-hidden by his tail. Despite his usual cool demeanor, there's a hint of playfulness in his gaze. This contrast between DonDon's cold exterior and his subtle enjoyment should be a key focus of the image. Page 5: Ajim sits on a floating cloud high above the ground. He props his head up with his hands, wearing a smug smile, his eyes filled with confidence. Ajim believes he has found the perfect hiding place, and the image should reflect his self-assured and boastful personality. Page 6: Lulu opens her eyes and begins to search for her friends. She first finds Yoko hiding behind the mushrooms. In the image, Lulu and Yoko are laughing together, with Lulu pointing at Yoko, who stands up and pats Lulu on the back. The image should capture the warmth and joy of their interaction. Page 7: Lulu spots Noah's tail sticking out, sneaks around the tree, and suddenly jumps out to catch him. Noah pretends to be scared, with his mouth open and eyes wide in mock surprise, but quickly breaks into laughter. Lulu is laughing so hard that she's almost falling over. The image should show the fun and friendly bond between the two characters. Page 8: All the friends have been found, even Ajim floating in the sky. They gather together, laughing and smiling. Lulu, Yoko, Noah, Ajim, and even DonDon are all laughing, with DonDon showing a slight smile despite his usual cool demeanor. This image should radiate the warmth of friendship and the joy of their shared fun.
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_role_definition_0002/meta.json b/dataset/childrens_book_generation_role_definition_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..38876c77798929705b6fd6ced97cb48206c47e8b
--- /dev/null
+++ b/dataset/childrens_book_generation_role_definition_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "childrens book generation with role definition",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": true,
+    "uid": "0044",
+    "output_image_count": 8,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_scenario_definition_0001/auto_eval.jsonl b/dataset/childrens_book_generation_scenario_definition_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..1f088b78bcc94ac1414356f435a85e0766e01757
--- /dev/null
+++ b/dataset/childrens_book_generation_scenario_definition_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg", "0005.jpg", "0006.jpg"], "question": "Is the number in the image the digit 6? 0 points: The number in the image is not the digit 6; 1 point: The number in the image is the digit 6. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third output images of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided scene definitions and text descriptions. \nThe text requirement is:\nPlease generate images according to the descriptions for each page below, ensuring consistency with the given scenes. There may be new scenes in a similar style, or variations of the provided scenes. Page 1: Spring Awakening. The magical forest welcomes the spring as warm sunlight filters through the tree branches, illuminating the forest. In Scene 1, the yellow mushroom is surrounded by glowing fireflies, and small green plants are sprouting from the ground. A curious rabbit peeks out from behind the large mushroom, looking around with wonder. The entire scene is filled with warmth and vitality. Page 2: The Animals' Home. This page shows a cozy little house in the forest, with a sleepy fox just waking up from hibernation at the door. In Scene 2, the house is surrounded by lush greenery and large blooming flowers. The fox, carrying a small backpack, looks eager to start its adventure for the day. Page 3: Magical Summer Night. As night falls, Scene 1 reappears, but this time under the moonlight. The yellow mushroom emits a warm glow, creating a mysterious atmosphere in the forest. Small fairies flutter around the mushroom, their wings reflecting the light of twinkling stars. The whole scene feels magical and enchanting. Page 4: Autumn Harvest. Scene 2 returns, showing a cozy autumn landscape in the forest. The plants around the house have turned golden, and leaves are falling gently. A small hedgehog walks by, carrying apples on its back, preparing for winter storage. The trees in the background are painted in shades of gold and orange, creating a warm and bountiful autumn scene. Page 5: Winter Wonderland. Scene 3 depicts the forest blanketed in thick snow. The large mushroom is covered in snow, and the forest feels calm and serene. Most animals have retreated into their warm homes, but a few birds hop around in the snow, adding a touch of life to the peaceful, wintry scene. Page 6: The Cycle of Seasons. Scene 1 returns to spring, where the plants around the mushroom begin to grow again. The forest wakes up from its winter slumber, and the animals start moving around, ready for new adventures. The warm sunlight shines once more on the forest, symbolizing the cycle of life and the promise of renewal.\nYour review question is:\nDoes the scene on Page 3 (Magical Summer Night) effectively recreate the scene from Page 1 (Spring Awakening), with appropriate nighttime elements and atmosphere? 0 points: The nighttime scene does not resemble the spring setting from Page 1, or it lacks expected nighttime elements like moonlight and stars, creating a disjointed effect. 1 point: The nighttime scene successfully reflects the original setting from Page 1 with adjustments for a magical nighttime atmosphere, including moonlight and stars, preserving continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth output image of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided scene definitions and text descriptions. \nThe text requirement is:\nPlease generate images according to the descriptions for each page below, ensuring consistency with the given scenes. There may be new scenes in a similar style, or variations of the provided scenes. Page 1: Spring Awakening. The magical forest welcomes the spring as warm sunlight filters through the tree branches, illuminating the forest. In Scene 1, the yellow mushroom is surrounded by glowing fireflies, and small green plants are sprouting from the ground. A curious rabbit peeks out from behind the large mushroom, looking around with wonder. The entire scene is filled with warmth and vitality. Page 2: The Animals' Home. This page shows a cozy little house in the forest, with a sleepy fox just waking up from hibernation at the door. In Scene 2, the house is surrounded by lush greenery and large blooming flowers. The fox, carrying a small backpack, looks eager to start its adventure for the day. Page 3: Magical Summer Night. As night falls, Scene 1 reappears, but this time under the moonlight. The yellow mushroom emits a warm glow, creating a mysterious atmosphere in the forest. Small fairies flutter around the mushroom, their wings reflecting the light of twinkling stars. The whole scene feels magical and enchanting. Page 4: Autumn Harvest. Scene 2 returns, showing a cozy autumn landscape in the forest. The plants around the house have turned golden, and leaves are falling gently. A small hedgehog walks by, carrying apples on its back, preparing for winter storage. The trees in the background are painted in shades of gold and orange, creating a warm and bountiful autumn scene. Page 5: Winter Wonderland. Scene 3 depicts the forest blanketed in thick snow. The large mushroom is covered in snow, and the forest feels calm and serene. Most animals have retreated into their warm homes, but a few birds hop around in the snow, adding a touch of life to the peaceful, wintry scene. Page 6: The Cycle of Seasons. Scene 1 returns to spring, where the plants around the mushroom begin to grow again. The forest wakes up from its winter slumber, and the animals start moving around, ready for new adventures. The warm sunlight shines once more on the forest, symbolizing the cycle of life and the promise of renewal.\nYour review question is:\nAre the elements in the scene on Page 4 (Autumn Harvest) appropriately transformed to represent autumn, while maintaining consistency with the spring scene from Page 2 (The Animals’ Home)? 0 points: The autumn scene lacks expected seasonal changes, such as golden hues and fallen leaves, or it fails to resemble the spring scene’s structure. 1 point: The autumn scene is consistent with the spring scene’s structure, with clear seasonal changes like golden colors and fallen leaves, enhancing the feeling of a natural progression.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0005.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fifth output images of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided scene definitions and text descriptions. \nThe text requirement is:\nPlease generate images according to the descriptions for each page below, ensuring consistency with the given scenes. There may be new scenes in a similar style, or variations of the provided scenes. Page 1: Spring Awakening. The magical forest welcomes the spring as warm sunlight filters through the tree branches, illuminating the forest. In Scene 1, the yellow mushroom is surrounded by glowing fireflies, and small green plants are sprouting from the ground. A curious rabbit peeks out from behind the large mushroom, looking around with wonder. The entire scene is filled with warmth and vitality. Page 2: The Animals' Home. This page shows a cozy little house in the forest, with a sleepy fox just waking up from hibernation at the door. In Scene 2, the house is surrounded by lush greenery and large blooming flowers. The fox, carrying a small backpack, looks eager to start its adventure for the day. Page 3: Magical Summer Night. As night falls, Scene 1 reappears, but this time under the moonlight. The yellow mushroom emits a warm glow, creating a mysterious atmosphere in the forest. Small fairies flutter around the mushroom, their wings reflecting the light of twinkling stars. The whole scene feels magical and enchanting. Page 4: Autumn Harvest. Scene 2 returns, showing a cozy autumn landscape in the forest. The plants around the house have turned golden, and leaves are falling gently. A small hedgehog walks by, carrying apples on its back, preparing for winter storage. The trees in the background are painted in shades of gold and orange, creating a warm and bountiful autumn scene. Page 5: Winter Wonderland. Scene 3 depicts the forest blanketed in thick snow. The large mushroom is covered in snow, and the forest feels calm and serene. Most animals have retreated into their warm homes, but a few birds hop around in the snow, adding a touch of life to the peaceful, wintry scene. Page 6: The Cycle of Seasons. Scene 1 returns to spring, where the plants around the mushroom begin to grow again. The forest wakes up from its winter slumber, and the animals start moving around, ready for new adventures. The warm sunlight shines once more on the forest, symbolizing the cycle of life and the promise of renewal.\nYour review question is:\nDoes the scene on Page 5 (Winter Wonderland) accurately depict the transition to winter, while maintaining the fundamental layout of the scene from Page 3? 0 points: The winter scene does not adequately reflect expected winter elements like snow and a serene atmosphere, or it significantly deviates from the layout of the scene on Page 3. 1 point: The winter scene maintains the structure of the scene from Page 3 and accurately incorporates winter elements, such as snow-covered features and a calm ambiance, indicating a seasonal transition.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first input image and fifth output image of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided scene definitions and text descriptions. \nThe text requirement is:\nPlease generate images according to the descriptions for each page below, ensuring consistency with the given scenes. There may be new scenes in a similar style, or variations of the provided scenes. Page 1: Spring Awakening. The magical forest welcomes the spring as warm sunlight filters through the tree branches, illuminating the forest. In Scene 1, the yellow mushroom is surrounded by glowing fireflies, and small green plants are sprouting from the ground. A curious rabbit peeks out from behind the large mushroom, looking around with wonder. The entire scene is filled with warmth and vitality. Page 2: The Animals' Home. This page shows a cozy little house in the forest, with a sleepy fox just waking up from hibernation at the door. In Scene 2, the house is surrounded by lush greenery and large blooming flowers. The fox, carrying a small backpack, looks eager to start its adventure for the day. Page 3: Magical Summer Night. As night falls, Scene 1 reappears, but this time under the moonlight. The yellow mushroom emits a warm glow, creating a mysterious atmosphere in the forest. Small fairies flutter around the mushroom, their wings reflecting the light of twinkling stars. The whole scene feels magical and enchanting. Page 4: Autumn Harvest. Scene 2 returns, showing a cozy autumn landscape in the forest. The plants around the house have turned golden, and leaves are falling gently. A small hedgehog walks by, carrying apples on its back, preparing for winter storage. The trees in the background are painted in shades of gold and orange, creating a warm and bountiful autumn scene. Page 5: Winter Wonderland. Scene 3 depicts the forest blanketed in thick snow. The large mushroom is covered in snow, and the forest feels calm and serene. Most animals have retreated into their warm homes, but a few birds hop around in the snow, adding a touch of life to the peaceful, wintry scene. Page 6: The Cycle of Seasons. Scene 1 returns to spring, where the plants around the mushroom begin to grow again. The forest wakes up from its winter slumber, and the animals start moving around, ready for new adventures. The warm sunlight shines once more on the forest, symbolizing the cycle of life and the promise of renewal.\nYour review question is:\nDoes the generated image on Page 1 align stylistically with the given Input Scene Definition Image 1, maintaining a consistent illustrative style and magical forest theme? 0 points: The style of Page 1 differs significantly from the Input Scene Definition Image 1, appearing inconsistent in illustration style or lacking the magical forest theme. 1 point: The style of Page 1 is consistent with the Input Scene Definition Image 1, displaying a unified illustration style and capturing the intended magical forest atmosphere.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0006.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first input image and fifth output image of the response provided by a student. The task objective is to generate a children's storybook with a sequence of images based on the provided scene definitions and text descriptions. \nThe text requirement is:\nPlease generate images according to the descriptions for each page below, ensuring consistency with the given scenes. There may be new scenes in a similar style, or variations of the provided scenes. Page 1: Spring Awakening. The magical forest welcomes the spring as warm sunlight filters through the tree branches, illuminating the forest. In Scene 1, the yellow mushroom is surrounded by glowing fireflies, and small green plants are sprouting from the ground. A curious rabbit peeks out from behind the large mushroom, looking around with wonder. The entire scene is filled with warmth and vitality. Page 2: The Animals' Home. This page shows a cozy little house in the forest, with a sleepy fox just waking up from hibernation at the door. In Scene 2, the house is surrounded by lush greenery and large blooming flowers. The fox, carrying a small backpack, looks eager to start its adventure for the day. Page 3: Magical Summer Night. As night falls, Scene 1 reappears, but this time under the moonlight. The yellow mushroom emits a warm glow, creating a mysterious atmosphere in the forest. Small fairies flutter around the mushroom, their wings reflecting the light of twinkling stars. The whole scene feels magical and enchanting. Page 4: Autumn Harvest. Scene 2 returns, showing a cozy autumn landscape in the forest. The plants around the house have turned golden, and leaves are falling gently. A small hedgehog walks by, carrying apples on its back, preparing for winter storage. The trees in the background are painted in shades of gold and orange, creating a warm and bountiful autumn scene. Page 5: Winter Wonderland. Scene 3 depicts the forest blanketed in thick snow. The large mushroom is covered in snow, and the forest feels calm and serene. Most animals have retreated into their warm homes, but a few birds hop around in the snow, adding a touch of life to the peaceful, wintry scene. Page 6: The Cycle of Seasons. Scene 1 returns to spring, where the plants around the mushroom begin to grow again. The forest wakes up from its winter slumber, and the animals start moving around, ready for new adventures. The warm sunlight shines once more on the forest, symbolizing the cycle of life and the promise of renewal.\nYour review question is:\nDoes the scene on Page 6 (The Cycle of Seasons) successfully evoke the rebirth and continuity of the forest, reminiscent of the initial elements on Page 1? 0 points: The cyclical theme is unclear, and the spring elements on Page 6 do not effectively echo those from Page 1. 1 point: The cyclical theme is well-executed, with spring elements on Page 6 that effectively mirror Page 1, creating a sense of renewal and continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/childrens_book_generation_scenario_definition_0001/eval.json b/dataset/childrens_book_generation_scenario_definition_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..58f0b423c31219bf9142a3048e3d3b4029cff0df
--- /dev/null
+++ b/dataset/childrens_book_generation_scenario_definition_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements of the text description?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Does the scene in Page 3 (Magical Summer Night) effectively recreate the scene from Page 1 (Spring Awakening) with appropriate nighttime elements and atmosphere?",
+            "0_point_standard": "The nighttime scene does not resemble the spring scene from Page 1, or it lacks expected nighttime elements (such as moonlight and stars), resulting in a disconnected effect.",
+            "1_point_standard": "The nighttime scene successfully reflects the original spring scene and adjusts to a magical nighttime atmosphere, including moonlight and stars, maintaining continuity."
+        },
+        {
+            "question": "Do the elements in Page 4 (Autumn Harvest) appropriately transform into an autumn theme while maintaining consistency with the spring scene in Page 2 (Animal Home)?",
+            "0_point_standard": "The autumn scene lacks expected seasonal changes (such as golden hues and fallen leaves) or fails to maintain structural consistency with the spring scene.",
+            "1_point_standard": "The autumn scene is structurally consistent with the spring scene and features clear seasonal changes (such as golden colors and fallen leaves), enhancing the sense of natural transition."
+        },
+        {
+            "question": "Does the scene in Page 5 (Winter Wonderland) accurately portray the transition to winter while maintaining the basic layout of the scene from Page 3?",
+            "0_point_standard": "The winter scene fails to adequately reflect expected winter elements (such as snow and a tranquil atmosphere) or significantly deviates from the layout of the scene from Page 3.",
+            "1_point_standard": "The winter scene maintains the structure of the scene from Page 3 and accurately incorporates winter elements, such as snow-covered features and a tranquil atmosphere, reflecting the seasonal transition."
+        },
+        {
+            "question": "Does the generated image for Page 1 align stylistically with the given input scene definition image 1, maintaining a consistent illustration style and magical forest theme?",
+            "0_point_standard": "The style of Page 1 is significantly different from the input scene definition image 1, with an inconsistent illustration style or lacking a magical forest theme.",
+            "1_point_standard": "The style of Page 1 is consistent with the input scene definition image 1, showcasing a unified illustration style and capturing the intended magical forest atmosphere."
+        },
+        {
+            "question": "Does the scene in Page 6 (Cycle of the Seasons) successfully evoke the rebirth and continuity of the forest, similar to the initial elements in Page 1?",
+            "0_point_standard": "The cycle theme is unclear, and the spring elements in Page 6 fail to effectively echo Page 1.",
+            "1_point_standard": "The cycle theme is well presented, and the spring elements in Page 6 effectively echo Page 1, creating a sense of rebirth and continuity."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_scenario_definition_0001/images.txt b/dataset/childrens_book_generation_scenario_definition_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aad55177048279c165e1078c482730a56cdebbbb
--- /dev/null
+++ b/dataset/childrens_book_generation_scenario_definition_0001/images.txt
@@ -0,0 +1,3 @@
+https://img.alicdn.com/imgextra/i2/O1CN01hC59Ri1knTcdA5NZ7_!!6000000004728-0-tps-1280-720.jpg
+https://img.alicdn.com/imgextra/i2/O1CN017yju4F1zSboV3ieCY_!!6000000006713-0-tps-1280-720.jpg
+https://img.alicdn.com/imgextra/i4/O1CN01NeUsQL1Zi17ziwwQo_!!6000000003227-0-tps-1280-720.jpg
diff --git a/dataset/childrens_book_generation_scenario_definition_0001/instruction.txt b/dataset/childrens_book_generation_scenario_definition_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f6a4c0a1735f24a24817a678fcd9cd80bc516e2
--- /dev/null
+++ b/dataset/childrens_book_generation_scenario_definition_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate images according to the descriptions for each page below, ensuring consistency with the given scenes. There may be new scenes in a similar style, or variations of the provided scenes. Page 1: Spring Awakening. The magical forest welcomes the spring as warm sunlight filters through the tree branches, illuminating the forest. In Scene 1, the yellow mushroom is surrounded by glowing fireflies, and small green plants are sprouting from the ground. A curious rabbit peeks out from behind the large mushroom, looking around with wonder. The entire scene is filled with warmth and vitality. Page 2: The Animals' Home. This page shows a cozy little house in the forest, with a sleepy fox just waking up from hibernation at the door. In Scene 2, the house is surrounded by lush greenery and large blooming flowers. The fox, carrying a small backpack, looks eager to start its adventure for the day. Page 3: Magical Summer Night. As night falls, Scene 1 reappears, but this time under the moonlight. The yellow mushroom emits a warm glow, creating a mysterious atmosphere in the forest. Small fairies flutter around the mushroom, their wings reflecting the light of twinkling stars. The whole scene feels magical and enchanting. Page 4: Autumn Harvest. Scene 2 returns, showing a cozy autumn landscape in the forest. The plants around the house have turned golden, and leaves are falling gently. A small hedgehog walks by, carrying apples on its back, preparing for winter storage. The trees in the background are painted in shades of gold and orange, creating a warm and bountiful autumn scene. Page 5: Winter Wonderland. Scene 3 depicts the forest blanketed in thick snow. The large mushroom is covered in snow, and the forest feels calm and serene. Most animals have retreated into their warm homes, but a few birds hop around in the snow, adding a touch of life to the peaceful, wintry scene. Page 6: The Cycle of Seasons. Scene 1 returns to spring, where the plants around the mushroom begin to grow again. The forest wakes up from its winter slumber, and the animals start moving around, ready for new adventures. The warm sunlight shines once more on the forest, symbolizing the cycle of life and the promise of renewal.
\ No newline at end of file
diff --git a/dataset/childrens_book_generation_scenario_definition_0001/meta.json b/dataset/childrens_book_generation_scenario_definition_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..066f2531d730a2c4ca4c7691cca337d14bde77e2
--- /dev/null
+++ b/dataset/childrens_book_generation_scenario_definition_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "childrens book generation with scenario definition",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": true,
+    "uid": "0045",
+    "output_image_count": 6,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0001/auto_eval.jsonl b/dataset/concept_visualization_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..05833ed4f794c81792706e5523d1d2a9375d5ec9
--- /dev/null
+++ b/dataset/concept_visualization_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "question": "Is the number in the image the digit 3? 0 points: The number in the image is not the digit 3; 1 point: The number in the image is the digit 3. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that illustrate the phenomenon of quantum entanglement between particles. The first image should depict two particles spatially separated by a large distance but connected through entangled light beams. The second image should show how one particle reacts instantly when the other is observed. The third image should present a holistic view where the states of both particles are correlated, overcoming spatial distance and forming an inseparable unified field.\nYour review question is:\nDo the particles in these two images maintain a consistent representation, with identifiable features that allow them to be recognized as the same entangled pair? 0 points: The particle representations differ significantly between the first and second images, making it unclear that they are the same entangled pair. 1 point: The particles are represented consistently in both images, with identifiable features that link them as the same entangled pair.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the seconed image of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that illustrate the phenomenon of quantum entanglement between particles. The first image should depict two particles spatially separated by a large distance but connected through entangled light beams. The second image should show how one particle reacts instantly when the other is observed. The third image should present a holistic view where the states of both particles are correlated, overcoming spatial distance and forming an inseparable unified field.\nYour review question is:\nDoes the second image clearly show the concept of instantaneous reaction between the particles, illustrating the phenomenon of non-locality in quantum entanglement? 0 points: The second image fails to depict the instantaneous interaction clearly, making the concept of non-locality hard to understand. 1 point: The second image effectively shows the instantaneous interaction, providing a clear visualization of non-locality.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third imageof the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that illustrate the phenomenon of quantum entanglement between particles. The first image should depict two particles spatially separated by a large distance but connected through entangled light beams. The second image should show how one particle reacts instantly when the other is observed. The third image should present a holistic view where the states of both particles are correlated, overcoming spatial distance and forming an inseparable unified field.\nYour review question is:\nDoes the image effectively represent the correlation of states and the unified field concept, visually depicting the inseparable nature of the particles across spatial distance? 0 points: The third image lacks visual indicators of correlation or unity, failing to convey the inseparable nature of entangled particles. 1 point: The third image effectively shows the correlated states and unified field, illustrating the inseparable nature of the entangled particles.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that illustrate the phenomenon of quantum entanglement between particles. The first image should depict two particles spatially separated by a large distance but connected through entangled light beams. The second image should show how one particle reacts instantly when the other is observed. The third image should present a holistic view where the states of both particles are correlated, overcoming spatial distance and forming an inseparable unified field.\nYour review question is:\nAre these images consistent in visual style, maintaining similar aesthetic qualities and artistic coherence? 0 points: The first and third images have noticeable differences in style, reducing the coherence of the series. 1 point: The first and third images maintain a consistent style, enhancing the visual coherence of the series.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that illustrate the phenomenon of quantum entanglement between particles. The first image should depict two particles spatially separated by a large distance but connected through entangled light beams. The second image should show how one particle reacts instantly when the other is observed. The third image should present a holistic view where the states of both particles are correlated, overcoming spatial distance and forming an inseparable unified field.\nYour review question is:\nDo the first and second images exhibit high aesthetic quality and scientific accuracy, with visuals that are both conceptually faithful to quantum entanglement and visually appealing? 0 points: The first and second images lack aesthetic appeal or fail to accurately represent quantum entanglement. 1 point: The first and second images are aesthetically pleasing, professionally rendered, and accurately represent the principles of quantum entanglement.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/concept_visualization_0001/eval.json b/dataset/concept_visualization_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..54a5a608cc9faba08988cc2cd341ae2f5b79a5ce
--- /dev/null
+++ b/dataset/concept_visualization_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do the particles in these two images maintain a consistent form, with recognizable features, allowing them to be identified as the same entangled pair?",
+            "0_point_standard": "The particles in the first and second images differ significantly in form, making it difficult to recognize them as the same entangled pair.",
+            "1_point_standard": "The particles are consistent in both images, with recognizable features linking them as the same entangled pair."
+        },
+        {
+            "question": "Does the second image clearly demonstrate the instantaneous reaction between particles, reflecting the phenomenon of non-locality in quantum entanglement?",
+            "0_point_standard": "The second image fails to clearly demonstrate instantaneous interaction, making the concept of non-locality difficult to understand.",
+            "1_point_standard": "The second image effectively demonstrates instantaneous interaction, providing a clear visualization of non-locality."
+        },
+        {
+            "question": "Does the image effectively represent the concept of state association and unified field, visually demonstrating the inseparable nature of particles across spatial distances?",
+            "0_point_standard": "The third image lacks visual indications of state association or unity, failing to convey the inseparable nature of entangled particles.",
+            "1_point_standard": "The third image effectively demonstrates state association and unified field, illustrating the inseparable nature of entangled particles."
+        },
+        {
+            "question": "Do these images maintain consistency in visual style, possessing similar aesthetic qualities and artistic coherence?",
+            "0_point_standard": "The first and third images differ significantly in style, reducing the coherence of the series.",
+            "1_point_standard": "The first and third images are consistent in style, enhancing the visual coherence of the series."
+        },
+        {
+            "question": "Do the first and second images possess a high level of aesthetic quality and scientific accuracy, being visually faithful to the concept of quantum entanglement while also being visually appealing?",
+            "0_point_standard": "The first and second images lack aesthetic appeal or fail to accurately represent quantum entanglement.",
+            "1_point_standard": "The first and second images are visually appealing, professionally rendered, and accurately represent the principles of quantum entanglement."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0001/images.txt b/dataset/concept_visualization_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/concept_visualization_0001/instruction.txt b/dataset/concept_visualization_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..584c8a3b4e8cdc8350ee2ae791ba48a1328d4caa
--- /dev/null
+++ b/dataset/concept_visualization_0001/instruction.txt
@@ -0,0 +1 @@
+Generate three images that illustrate the phenomenon of quantum entanglement between particles. The first image should depict two particles spatially separated by a large distance but connected through entangled light beams. The second image should show how one particle reacts instantly when the other is observed. The third image should present a holistic view where the states of both particles are correlated, overcoming spatial distance and forming an inseparable unified field.
\ No newline at end of file
diff --git a/dataset/concept_visualization_0001/meta.json b/dataset/concept_visualization_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5e22b341dad69a2c1fb97c2537b60851db832f44
--- /dev/null
+++ b/dataset/concept_visualization_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "concept visualization",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0014",
+    "output_image_count": 3,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0002/auto_eval.jsonl b/dataset/concept_visualization_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..7dbc1776c50d1f8f9ae481b97936dd183d5758b3
--- /dev/null
+++ b/dataset/concept_visualization_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "question": "Is the number in the image the digit 4? 0 points: The number in the image is not the digit 4; 1 point: The number in the image is the digit 4. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate four images that show how electromagnetic fields propagate around a charge. The first image depicts a single charge with electromagnetic waves radiating around it. The second image shows how these waves gradually expand to a larger area. The third image illustrates the interaction between multiple charges, forming a complex electromagnetic field structure. The fourth image presents the overall structure of these fields and their propagation paths through space.\nYour review question is:\nDo the first and second images show a consistent and logical representation of electromagnetic waves expanding from a single charge, with clear progression from a localized field to a broader area? 0 points: The wave representation between the first and second images is inconsistent or fails to show a clear progression in wave expansion. 1 point: The waves are represented consistently across both images, with a logical progression from a localized field to a broader area.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate four images that show how electromagnetic fields propagate around a charge. The first image depicts a single charge with electromagnetic waves radiating around it. The second image shows how these waves gradually expand to a larger area. The third image illustrates the interaction between multiple charges, forming a complex electromagnetic field structure. The fourth image presents the overall structure of these fields and their propagation paths through space.\nYour review question is:\nDoes the third image effectively expand upon the second image’s single-charge field to illustrate the interaction between multiple charges, forming a complex electromagnetic field structure? 0 points: The transition from single-charge fields in the second image to complex, multi-charge interactions in the third image is unclear or lacks detail. 1 point: The transition is clear, with the third image effectively building on the previous field structure to show interactions between multiple charges.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fourth images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate four images that show how electromagnetic fields propagate around a charge. The first image depicts a single charge with electromagnetic waves radiating around it. The second image shows how these waves gradually expand to a larger area. The third image illustrates the interaction between multiple charges, forming a complex electromagnetic field structure. The fourth image presents the overall structure of these fields and their propagation paths through space.\nYour review question is:\nDo the first and fourth images maintain a consistent visual style, creating a cohesive aesthetic across the series? 0 points: The first and fourth images differ significantly in style, creating a visual dissonance that detracts from the series’ coherence. 1 point: The first and fourth images maintain a consistent style, enhancing the series’ visual cohesion.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fourth images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate four images that show how electromagnetic fields propagate around a charge. The first image depicts a single charge with electromagnetic waves radiating around it. The second image shows how these waves gradually expand to a larger area. The third image illustrates the interaction between multiple charges, forming a complex electromagnetic field structure. The fourth image presents the overall structure of these fields and their propagation paths through space.\nYour review question is:\nDoes the fourth image clearly represent the overall field structure, expanding upon the multi-charge field complexity depicted in the third image and showing a cohesive propagation path through space? 0 points: The fourth image does not fully convey the overall field structure or lacks cohesion with the complex field shown in the third image. 1 point: The fourth image provides a complete view of the overall field structure, logically extending from the third image to show propagation paths through space.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate four images that show how electromagnetic fields propagate around a charge. The first image depicts a single charge with electromagnetic waves radiating around it. The second image shows how these waves gradually expand to a larger area. The third image illustrates the interaction between multiple charges, forming a complex electromagnetic field structure. The fourth image presents the overall structure of these fields and their propagation paths through space.\nYour review question is:\nDo the second and fourth images accurately reflect the scientific concept of electromagnetic wave propagation, showing clear and realistic wave patterns and field behavior? 0 points: The wave patterns and field behavior lack scientific accuracy or realism, making it difficult to interpret the field dynamics. 1 point: The wave patterns and field behavior are depicted accurately, enhancing the scientific clarity and realism of electromagnetic wave propagation.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/concept_visualization_0002/eval.json b/dataset/concept_visualization_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..73259a89dd7bae0874fda80e2a4b7529904890f7
--- /dev/null
+++ b/dataset/concept_visualization_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements of the text description?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do the first and second images consistently and reasonably depict the process of electromagnetic waves expanding from a single charge, showing a clear progression from a local field to a larger area?",
+            "0_point_standard": "The wave representations in the first and second images are inconsistent or fail to show a clear progression of wave expansion.",
+            "1_point_standard": "The wave representations in both images are consistent, and the progression from a local field to a larger area is logically reasonable."
+        },
+        {
+            "question": "Does the third image effectively expand on the single charge field in the second image to demonstrate the interaction of multiple charges, forming a complex electromagnetic field structure?",
+            "0_point_standard": "The transition from the single charge field in the second image to the interaction of multiple charges in the third image is unclear or lacking in detail.",
+            "1_point_standard": "The transition is clear, and the third image effectively demonstrates the interaction of multiple charges on the previous field structure."
+        },
+        {
+            "question": "Do the first and fourth images maintain consistency in visual style, creating a harmonious aesthetic effect for the entire series?",
+            "0_point_standard": "There are significant style differences between the first and fourth images, affecting the visual coherence of the series.",
+            "1_point_standard": "The first and fourth images have a consistent style, enhancing the visual coherence of the series."
+        },
+        {
+            "question": "Does the fourth image clearly display the overall field structure, expand on the complexity of the multi-charge field in the third image, and show its propagation path in space?",
+            "0_point_standard": "The fourth image fails to adequately represent the overall field structure or lacks coherence with the complex field structure in the third image.",
+            "1_point_standard": "The fourth image fully displays the overall field structure, logically extending from the third image, and shows the propagation path in space."
+        },
+        {
+            "question": "Do the second and fourth images accurately reflect the scientific concepts of electromagnetic wave propagation, displaying clear and realistic waveforms and field behaviors?",
+            "0_point_standard": "The waveforms and field behaviors lack scientific accuracy or realism, making the field dynamics difficult to interpret.",
+            "1_point_standard": "The waveforms and field behaviors are accurately depicted, enhancing the scientific clarity and realism of electromagnetic wave propagation."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0002/images.txt b/dataset/concept_visualization_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/concept_visualization_0002/instruction.txt b/dataset/concept_visualization_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5072b6d59d09fd51cb3f8646d8ee3a1ac8979a82
--- /dev/null
+++ b/dataset/concept_visualization_0002/instruction.txt
@@ -0,0 +1 @@
+Generate four images that show how electromagnetic fields propagate around a charge. The first image depicts a single charge with electromagnetic waves radiating around it. The second image shows how these waves gradually expand to a larger area. The third image illustrates the interaction between multiple charges, forming a complex electromagnetic field structure. The fourth image presents the overall structure of these fields and their propagation paths through space.
\ No newline at end of file
diff --git a/dataset/concept_visualization_0002/meta.json b/dataset/concept_visualization_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..de4d3c6a2b52a393934aad0d0937ba8c6605d5b1
--- /dev/null
+++ b/dataset/concept_visualization_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "concept visualization",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0014",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0003/auto_eval.jsonl b/dataset/concept_visualization_0003/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e1666b339de223f19228b30ff32c107f09c95203
--- /dev/null
+++ b/dataset/concept_visualization_0003/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "Is the number in the image the digit 2? 0 points: The number in the image is not the digit 2; 1 point: The number in the image is the digit 2. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first image of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate two images that depict the formation and propagation of gravitational waves. The first image should show how the interaction between two celestial bodies generates gravitational waves, with ripples emanating from their mutual rotation. The second image should present these gravitational waves propagating outward, creating ripples in spacetime as they spread through the universe.\nYour review question is:\nDoes the first image effectively illustrate gravitational waves being generated by the interaction between two celestial bodies, with clear depictions of ripples emanating from their rotation? 0 points: The image does not clearly represent gravitational wave formation, lacking distinct ripples or a clear indication of interaction between celestial bodies. 1 point: The image effectively shows gravitational wave formation, with clear ripples emanating from the rotating celestial bodies.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second image of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate two images that depict the formation and propagation of gravitational waves. The first image should show how the interaction between two celestial bodies generates gravitational waves, with ripples emanating from their mutual rotation. The second image should present these gravitational waves propagating outward, creating ripples in spacetime as they spread through the universe.\nYour review question is:\nDoes the second image accurately depict the propagation of gravitational waves through spacetime, with ripples extending outward in a way that represents spreading waves? 0 points: The image fails to show a clear or realistic depiction of gravitational wave propagation, making the concept hard to interpret. 1 point: The image clearly depicts gravitational waves propagating through spacetime, with ripples spreading outward in a realistic manner.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate two images that depict the formation and propagation of gravitational waves. The first image should show how the interaction between two celestial bodies generates gravitational waves, with ripples emanating from their mutual rotation. The second image should present these gravitational waves propagating outward, creating ripples in spacetime as they spread through the universe.\nYour review question is:\nDo the first and second images maintain a consistent visual style, creating a cohesive aesthetic that connects the two stages of gravitational wave formation and propagation? 0 points: The images have noticeable differences in style, reducing the coherence of the visual sequence. 1 point: The images maintain a consistent visual style, enhancing the series’ cohesion and presenting a unified portrayal of gravitational waves.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate two images that depict the formation and propagation of gravitational waves. The first image should show how the interaction between two celestial bodies generates gravitational waves, with ripples emanating from their mutual rotation. The second image should present these gravitational waves propagating outward, creating ripples in spacetime as they spread through the universe.\nYour review question is:\nDo both images accurately reflect scientific concepts related to gravitational waves, including the realistic representation of wave formation and propagation in spacetime? 0 points: The images lack scientific accuracy, with unrealistic depictions of wave formation or propagation that do not align with gravitational wave theory. 1 point: Both images are scientifically accurate, with realistic portrayals of wave formation and propagation in spacetime.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate two images that depict the formation and propagation of gravitational waves. The first image should show how the interaction between two celestial bodies generates gravitational waves, with ripples emanating from their mutual rotation. The second image should present these gravitational waves propagating outward, creating ripples in spacetime as they spread through the universe.\nYour review question is:\nDo both images exhibit a high level of aesthetic quality, with a visually appealing composition and strong visual impact that effectively communicates the concept of gravitational waves? 0 points: The images lack aesthetic appeal, with weak composition or unprofessional design that detracts from the concept’s visual impact. 1 point: The images have strong aesthetic appeal, with balanced composition and impactful visuals that effectively communicate the concept of gravitational waves.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/concept_visualization_0003/eval.json b/dataset/concept_visualization_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..97b350b7fbe105dc3b7e3225adca340de0cc14d1
--- /dev/null
+++ b/dataset/concept_visualization_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the number of output images consistent with the requirements of the text description?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Does the first image effectively demonstrate the gravitational waves produced by the interaction of two celestial bodies, clearly depicting the ripples emitted by their rotation?",
+            "0_point_standard": "The image fails to clearly represent the formation of gravitational waves, lacking distinct ripples or clear indications of celestial interaction.",
+            "1_point_standard": "The image effectively demonstrates the formation of gravitational waves, clearly depicting the ripples emitted by rotating celestial bodies."
+        },
+        {
+            "question": "Does the second image accurately depict the process of gravitational waves propagating through spacetime, with ripples expanding outward to illustrate the wave's diffusion?",
+            "0_point_standard": "The image fails to clearly or realistically demonstrate the propagation of gravitational waves, making the concept difficult to understand.",
+            "1_point_standard": "The image clearly demonstrates the propagation of gravitational waves through spacetime, with ripples expanding outward in a realistic manner."
+        },
+        {
+            "question": "Do the first and second images maintain a consistent visual style, creating a harmonious aesthetic that connects the two stages of gravitational wave formation and propagation?",
+            "0_point_standard": "The images have noticeable stylistic differences, reducing the coherence of the visual sequence.",
+            "1_point_standard": "The images maintain a consistent visual style, enhancing the coherence of the series and presenting a unified depiction of gravitational waves."
+        },
+        {
+            "question": "Do these two images accurately reflect the scientific concepts related to gravitational waves, including the true representation of their formation and propagation through spacetime?",
+            "0_point_standard": "The images lack scientific accuracy, with unrealistic representations of wave formation or propagation that do not align with gravitational wave theory.",
+            "1_point_standard": "Both images are scientifically accurate, realistically representing the formation and propagation of gravitational waves through spacetime."
+        },
+        {
+            "question": "Do these two images possess a high level of aesthetic quality, with visually appealing composition and strong visual impact, effectively conveying the concept of gravitational waves?",
+            "0_point_standard": "The images lack aesthetic appeal, with weak composition or unprofessional design, diminishing the visual impact of the concept.",
+            "1_point_standard": "The images have strong aesthetic appeal, with balanced composition and strong visual impact, effectively conveying the concept of gravitational waves."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0003/images.txt b/dataset/concept_visualization_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/concept_visualization_0003/instruction.txt b/dataset/concept_visualization_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59a32140cf048f80044085fcbd2b66e3f517fd35
--- /dev/null
+++ b/dataset/concept_visualization_0003/instruction.txt
@@ -0,0 +1 @@
+Generate two images that depict the formation and propagation of gravitational waves. The first image should show how the interaction between two celestial bodies generates gravitational waves, with ripples emanating from their mutual rotation. The second image should present these gravitational waves propagating outward, creating ripples in spacetime as they spread through the universe.
\ No newline at end of file
diff --git a/dataset/concept_visualization_0003/meta.json b/dataset/concept_visualization_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..65833bb56d598459de37a3eccf656cfb51112e7c
--- /dev/null
+++ b/dataset/concept_visualization_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "concept visualization",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0014",
+    "output_image_count": 2,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0004/auto_eval.jsonl b/dataset/concept_visualization_0004/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..6fe74e25c20093480027508c7683a4eb36af9c4e
--- /dev/null
+++ b/dataset/concept_visualization_0004/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "question": "Is the number in the image the digit 3? 0 points: The number in the image is not the digit 3; 1 point: The number in the image is the digit 3. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that visualize the process of signal transmission between biological cells. The first image should depict two cells separated by a distance, with signaling molecules being emitted from one cell. The second image should show the signaling molecules traveling toward the target cell. The third image should present the target cell receiving the signal and initiating an internal response, dynamically unfolding within the cellular microenvironment.\nYour review question is:\nDo the first and second images clearly illustrate the process of signal emission and the movement of signaling molecules from one cell toward the target cell? 0 points: The emission or movement of signaling molecules is unclear or inconsistent, making it difficult to follow the signal transmission process. 1 point: The emission and movement of signaling molecules are clearly depicted in both images, effectively conveying the signal transmission process.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that visualize the process of signal transmission between biological cells. The first image should depict two cells separated by a distance, with signaling molecules being emitted from one cell. The second image should show the signaling molecules traveling toward the target cell. The third image should present the target cell receiving the signal and initiating an internal response, dynamically unfolding within the cellular microenvironment.\nYour review question is:\nDo the first and third images maintain a consistent visual style, creating a cohesive aesthetic that connects the initial signaling stage with the target cell’s response? 0 points: The first and third images differ noticeably in style, disrupting the visual coherence of the sequence. 1 point: The first and third images maintain a consistent style, providing a cohesive and unified visual representation of the signaling process.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that visualize the process of signal transmission between biological cells. The first image should depict two cells separated by a distance, with signaling molecules being emitted from one cell. The second image should show the signaling molecules traveling toward the target cell. The third image should present the target cell receiving the signal and initiating an internal response, dynamically unfolding within the cellular microenvironment.\nYour review question is:\nDoes the third image clearly show the target cell receiving the signal and initiating an internal response, following from the second image’s portrayal of signal movement? 0 points: The target cell’s response or the reception of the signal is unclear, making the sequence difficult to interpret. 1 point: The third image clearly shows the target cell’s response, logically following the movement of signaling molecules shown in the second image.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that visualize the process of signal transmission between biological cells. The first image should depict two cells separated by a distance, with signaling molecules being emitted from one cell. The second image should show the signaling molecules traveling toward the target cell. The third image should present the target cell receiving the signal and initiating an internal response, dynamically unfolding within the cellular microenvironment.\nYour review question is:\nDo the first and third images accurately depict the biological process of cell signaling, including the realistic emission, reception, and response of signaling molecules? 0 points: The biological process is inaccurately or unrealistically depicted, reducing the scientific validity of the images. 1 point: Both images accurately represent the biological process, showing realistic emission, reception, and cellular response.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third images of the response provided by a student. The task objective is to generate a series of images to express an abstract concept. based on the text requirements.\nThe text requirement is:\nGenerate three images that visualize the process of signal transmission between biological cells. The first image should depict two cells separated by a distance, with signaling molecules being emitted from one cell. The second image should show the signaling molecules traveling toward the target cell. The third image should present the target cell receiving the signal and initiating an internal response, dynamically unfolding within the cellular microenvironment.\nYour review question is:\nDo the second and third images exhibit high aesthetic quality and visual impact, enhancing the clarity and appeal of the cell signaling process? 0 points: The images lack aesthetic appeal, with weak composition or unprofessional design that detracts from the visual impact. 1 point: The images are visually appealing, with balanced composition and strong visual impact that effectively illustrates the cell signaling process.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/concept_visualization_0004/eval.json b/dataset/concept_visualization_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a045811d842d3d39e50b2177162a737c78a83e2
--- /dev/null
+++ b/dataset/concept_visualization_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements of the text description?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do the first and second images clearly demonstrate the emission and movement of signal molecules from one cell to the target cell?",
+            "0_point_standard": "The emission or movement of signal molecules is unclear or inconsistent, making the signal transmission process difficult to understand.",
+            "1_point_standard": "The emission and movement of signal molecules are clearly demonstrated in both images, effectively conveying the signal transmission process."
+        },
+        {
+            "question": "Do the first and third images maintain a consistent visual style, creating a harmonious aesthetics that connects the initial signal stage with the target cell's response?",
+            "0_point_standard": "There are noticeable style differences between the first and third images, disrupting the visual coherence of the sequence.",
+            "1_point_standard": "The first and third images maintain a consistent style, providing a unified visual representation of the signal transmission process."
+        },
+        {
+            "question": "Does the third image clearly show the process of the target cell receiving the signal and starting an internal response, consistent with the signal movement process in the second image?",
+            "0_point_standard": "The response of the target cell or the reception of the signal is unclear, making the sequence difficult to understand.",
+            "1_point_standard": "The third image clearly shows the target cell's response, logically consistent with the signal molecule movement process in the second image."
+        },
+        {
+            "question": "Do the first and third images accurately depict the biological process of cell signal transduction, including the true emission, reception of signal molecules, and cell response?",
+            "0_point_standard": "The depiction of the biological process is inaccurate or unrealistic, reducing the scientific validity of the images.",
+            "1_point_standard": "The two images accurately depict the biological process, showing the true emission, reception of signal molecules, and cell response."
+        },
+        {
+            "question": "Do the second and third images possess a high level of aesthetic quality and visual impact, enhancing the clarity and appeal of the cell signaling process?",
+            "0_point_standard": "The images lack aesthetic appeal, with weak composition or unprofessional design, diminishing visual impact.",
+            "1_point_standard": "The images are visually appealing, with balanced composition and strong visual impact, effectively showcasing the cell signaling process."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/concept_visualization_0004/images.txt b/dataset/concept_visualization_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/concept_visualization_0004/instruction.txt b/dataset/concept_visualization_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93f18d729d624d5bf9b6d33c54a2d3da056edaff
--- /dev/null
+++ b/dataset/concept_visualization_0004/instruction.txt
@@ -0,0 +1 @@
+Generate three images that visualize the process of signal transmission between biological cells. The first image should depict two cells separated by a distance, with signaling molecules being emitted from one cell. The second image should show the signaling molecules traveling toward the target cell. The third image should present the target cell receiving the signal and initiating an internal response, dynamically unfolding within the cellular microenvironment.
\ No newline at end of file
diff --git a/dataset/concept_visualization_0004/meta.json b/dataset/concept_visualization_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9ad02b249f7c93a4e305829581d278d2fcb2e5e9
--- /dev/null
+++ b/dataset/concept_visualization_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "concept visualization",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0014",
+    "output_image_count": 3,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0001/auto_eval.jsonl b/dataset/couple_icon_generation_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..8680d248a12fd9e64135562f5f40aa6420e15a5c
--- /dev/null
+++ b/dataset/couple_icon_generation_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "Is the number in the image the digit 2? 0 points: The number in the image is not the digit 2; 1 point: The number in the image is the digit 2. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male wearing black glasses and a denim jacket, and the female in a white dress with flowing hair. Both share a nighttime city skyline background, with the male's background featuring skyscrapers and lights, while the female's has a starry sky.\nYour review question is:\nDo the two avatars accurately reflect the specified genders as described in the text prompt (e.g., one male and one female)? 0 points: The gender representation does not align with the prompt, lacking clear distinction or accuracy. 1 point: The avatars clearly represent the specified genders, accurately reflecting the text description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male wearing black glasses and a denim jacket, and the female in a white dress with flowing hair. Both share a nighttime city skyline background, with the male's background featuring skyscrapers and lights, while the female's has a starry sky.\nYour review question is:\nAre the two avatars visually consistent in style, including art style, color palette, and overall aesthetic? 0 points: The avatars display noticeable style differences, lacking coherence as a matching pair. 1 point: The avatars maintain a consistent style, including color palette and aesthetic, creating a cohesive and matching appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male wearing black glasses and a denim jacket, and the female in a white dress with flowing hair. Both share a nighttime city skyline background, with the male's background featuring skyscrapers and lights, while the female's has a starry sky.\nYour review question is:\nDo the two avatars reflect a similar emotional tone or theme (e.g., both smiling, both with matching expressions) that conveys the concept of a couple? 0 points: The avatars show conflicting expressions or themes, making it hard to interpret them as a couple. 1 point: The avatars share a harmonious emotional tone or theme, making them recognizable as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male wearing black glasses and a denim jacket, and the female in a white dress with flowing hair. Both share a nighttime city skyline background, with the male's background featuring skyscrapers and lights, while the female's has a starry sky.\nYour review question is:\nDo the two avatars feature similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)? 0 points: The avatars lack connecting design elements, reducing their perceived connection as a pair. 1 point: The avatars include similar or complementary design details, enhancing their connection as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male wearing black glasses and a denim jacket, and the female in a white dress with flowing hair. Both share a nighttime city skyline background, with the male's background featuring skyscrapers and lights, while the female's has a starry sky.\nYour review question is:\nDo the two avatars exhibit a high level of aesthetic quality and balance, creating a visually appealing pair? 0 points: The avatars lack aesthetic appeal or balance, appearing less polished or visually inconsistent as a pair. 1 point: The avatars are visually appealing, with balanced composition and high aesthetic quality that enhances their presentation as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/couple_icon_generation_0001/eval.json b/dataset/couple_icon_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..fb7c4265e44c0196c8a6f7e51fa521c83d19a87b
--- /dev/null
+++ b/dataset/couple_icon_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the number of output images consistent with the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do these two avatars accurately reflect the specified gender described in the text prompt (e.g., one male, one female)?",
+            "0_point_standard": "The gender representation does not match the prompt, lacking clear distinction or accuracy.",
+            "1_point_standard": "The avatars clearly reflect the specified gender, accurately embodying the text description."
+        },
+        {
+            "question": "Do these two avatars maintain consistency in visual style, including artistic style, color scheme, and overall aesthetics?",
+            "0_point_standard": "The avatars exhibit noticeable differences in style, lacking coherence as a matching pair.",
+            "1_point_standard": "The avatars maintain consistency in style, including color scheme and aesthetics, forming a cohesive matching appearance."
+        },
+        {
+            "question": "Do these two avatars present a similar emotional tone or theme (e.g., both smiling, expressing a consistent emotion) that conveys the concept of a couple?",
+            "0_point_standard": "The expressions or themes of the avatars conflict with each other, making it difficult to interpret them as a couple.",
+            "1_point_standard": "The avatars have a harmonious emotional tone or theme, making them easily recognizable as a couple."
+        },
+        {
+            "question": "Do these two avatars have similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)?",
+            "0_point_standard": "The avatars lack connecting design elements, reducing their recognition as a couple pair.",
+            "1_point_standard": "The avatars include similar or complementary design details, enhancing their association as a couple."
+        },
+        {
+            "question": "Do these two avatars exhibit a high level of aesthetic quality and balance, forming a visually appealing combination?",
+            "0_point_standard": "The avatars lack aesthetic appeal or balance, appearing less refined or visually inconsistent as a pair.",
+            "1_point_standard": "The avatars are highly visually appealing, well-balanced in composition, and of high aesthetic quality, enhancing their presentation as a couple."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0001/images.txt b/dataset/couple_icon_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/couple_icon_generation_0001/instruction.txt b/dataset/couple_icon_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37b237c40069c6b5a2d56a9895d931b3c8cf3793
--- /dev/null
+++ b/dataset/couple_icon_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate two images featuring a couple avatars of a male wearing black glasses and a denim jacket and a female wearing a white dress with flowing hair. Both have a city skyline at night in the background, with lights from skyscrapers in the male's background and a sky full of stars in the female's.
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0001/meta.json b/dataset/couple_icon_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..4a99a24211a5ee8b37e75ad9e45661cddb496bda
--- /dev/null
+++ b/dataset/couple_icon_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "couple icon generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0017",
+    "output_image_count": 2,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0002/auto_eval.jsonl b/dataset/couple_icon_generation_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..12534202ae1b3c821a68d36e75d445b60e3fd764
--- /dev/null
+++ b/dataset/couple_icon_generation_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "Is the number in the image the digit 2? 0 points: The number in the image is not the digit 2; 1 point: The number in the image is the digit 2. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with the male holding a cup of coffee and wearing a plaid shirt, while the female wears a straw hat and holds a book. Both share a park scene, with a cat on a bench in the male's background and blooming roses in the female's.\nYour review question is:\nDo the two avatars accurately reflect the specified genders as described in the text prompt (e.g., one male and one female)? 0 points: The gender representation does not align with the prompt, lacking clear distinction or accuracy. 1 point: The avatars clearly represent the specified genders, accurately reflecting the text description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with the male holding a cup of coffee and wearing a plaid shirt, while the female wears a straw hat and holds a book. Both share a park scene, with a cat on a bench in the male's background and blooming roses in the female's.\nYour review question is:\nAre the two avatars visually consistent in style, including art style, color palette, and overall aesthetic? 0 points: The avatars display noticeable style differences, lacking coherence as a matching pair. 1 point: The avatars maintain a consistent style, including color palette and aesthetic, creating a cohesive and matching appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with the male holding a cup of coffee and wearing a plaid shirt, while the female wears a straw hat and holds a book. Both share a park scene, with a cat on a bench in the male's background and blooming roses in the female's.\nYour review question is:\nDo the two avatars reflect a similar emotional tone or theme (e.g., both smiling, both with matching expressions) that conveys the concept of a couple? 0 points: The avatars show conflicting expressions or themes, making it hard to interpret them as a couple. 1 point: The avatars share a harmonious emotional tone or theme, making them recognizable as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with the male holding a cup of coffee and wearing a plaid shirt, while the female wears a straw hat and holds a book. Both share a park scene, with a cat on a bench in the male's background and blooming roses in the female's.\nYour review question is:\nDo the two avatars feature similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)? 0 points: The avatars lack connecting design elements, reducing their perceived connection as a pair. 1 point: The avatars include similar or complementary design details, enhancing their connection as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with the male holding a cup of coffee and wearing a plaid shirt, while the female wears a straw hat and holds a book. Both share a park scene, with a cat on a bench in the male's background and blooming roses in the female's.\nYour review question is:\nDo the two avatars exhibit a high level of aesthetic quality and balance, creating a visually appealing pair? 0 points: The avatars lack aesthetic appeal or balance, appearing less polished or visually inconsistent as a pair. 1 point: The avatars are visually appealing, with balanced composition and high aesthetic quality that enhances their presentation as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/couple_icon_generation_0002/eval.json b/dataset/couple_icon_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9d46fabfa7399a94eefb904282ff5d911859903
--- /dev/null
+++ b/dataset/couple_icon_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do these two avatars accurately reflect the specified genders described in the text prompt (e.g., one male, one female)?",
+            "0_point_standard": "The gender representation does not match the prompt, lacking clear distinction or accuracy.",
+            "1_point_standard": "The avatars clearly reflect the specified genders, accurately representing the text description."
+        },
+        {
+            "question": "Do these two avatars maintain consistency in visual style, including art style, color scheme, and overall aesthetics?",
+            "0_point_standard": "The avatars have noticeable style differences, lacking coherence as a matching pair.",
+            "1_point_standard": "The avatars maintain consistency in style, including color scheme and aesthetics, creating a harmonious matching appearance."
+        },
+        {
+            "question": "Do these two avatars present a similar emotional tone or theme (e.g., both smiling, expressing consistent emotions) conveying the concept of a couple?",
+            "0_point_standard": "The expressions or themes of the avatars are conflicting, making it difficult to interpret them as a couple.",
+            "1_point_standard": "The avatars have a harmonious emotional tone or theme, making them easily recognizable as a couple."
+        },
+        {
+            "question": "Do these two avatars have similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)?",
+            "0_point_standard": "The avatars lack connecting design elements, reducing their recognition as a couple pair.",
+            "1_point_standard": "The avatars contain similar or complementary design details, enhancing their association as a couple."
+        },
+        {
+            "question": "Do these two avatars exhibit a high level of aesthetic quality and balance, forming a visually appealing composition?",
+            "0_point_standard": "The avatars lack aesthetic appeal or balance, appearing less refined or visually inconsistent as a pair.",
+            "1_point_standard": "The avatars are visually appealing, well-balanced, and of high aesthetic quality, enhancing their presentation as a couple."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0002/images.txt b/dataset/couple_icon_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/couple_icon_generation_0002/instruction.txt b/dataset/couple_icon_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6cac34f117aad027d8dfc7c89c511f135468204
--- /dev/null
+++ b/dataset/couple_icon_generation_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate two images featuring a couple headshots of a male holding a cup of coffee and wearing a plaid shirt, and a female wearing a straw hat and holding a book. Both have the same park scene in the background, with a cat on a bench in the male's background and blooming roses in the female's background.
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0002/meta.json b/dataset/couple_icon_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c248a3f1f36897789b1c1f58fbf497344246a35
--- /dev/null
+++ b/dataset/couple_icon_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "couple icon generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0017",
+    "output_image_count": 2,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0003/auto_eval.jsonl b/dataset/couple_icon_generation_0003/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..7ac67a7a1964fde3a778537ca47f522cdb30a09d
--- /dev/null
+++ b/dataset/couple_icon_generation_0003/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "Is the number in the image the digit 2? 0 points: The number in the image is not the digit 2; 1 point: The number in the image is the digit 2. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male dressed in a classic medieval knight's armor holding a sword, and the female in an elegant court dress holding a rose. Both have a castle in the background, with dawn in the male's and sunset in the female's.\nYour review question is:\nDo the two avatars accurately reflect the specified genders as described in the text prompt (e.g., one male and one female)? 0 points: The gender representation does not align with the prompt, lacking clear distinction or accuracy. 1 point: The avatars clearly represent the specified genders, accurately reflecting the text description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male dressed in a classic medieval knight's armor holding a sword, and the female in an elegant court dress holding a rose. Both have a castle in the background, with dawn in the male's and sunset in the female's.\nYour review question is:\nAre the two avatars visually consistent in style, including art style, color palette, and overall aesthetic? 0 points: The avatars display noticeable style differences, lacking coherence as a matching pair. 1 point: The avatars maintain a consistent style, including color palette and aesthetic, creating a cohesive and matching appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male dressed in a classic medieval knight's armor holding a sword, and the female in an elegant court dress holding a rose. Both have a castle in the background, with dawn in the male's and sunset in the female's.\nYour review question is:\nDo the two avatars reflect a similar emotional tone or theme (e.g., both smiling, both with matching expressions) that conveys the concept of a couple? 0 points: The avatars show conflicting expressions or themes, making it hard to interpret them as a couple. 1 point: The avatars share a harmonious emotional tone or theme, making them recognizable as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male dressed in a classic medieval knight's armor holding a sword, and the female in an elegant court dress holding a rose. Both have a castle in the background, with dawn in the male's and sunset in the female's.\nYour review question is:\nDo the two avatars feature similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)? 0 points: The avatars lack connecting design elements, reducing their perceived connection as a pair. 1 point: The avatars include similar or complementary design details, enhancing their connection as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nPlease generate a couple of avatars, with the male dressed in a classic medieval knight's armor holding a sword, and the female in an elegant court dress holding a rose. Both have a castle in the background, with dawn in the male's and sunset in the female's.\nYour review question is:\nDo the two avatars exhibit a high level of aesthetic quality and balance, creating a visually appealing pair? 0 points: The avatars lack aesthetic appeal or balance, appearing less polished or visually inconsistent as a pair. 1 point: The avatars are visually appealing, with balanced composition and high aesthetic quality that enhances their presentation as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/couple_icon_generation_0003/eval.json b/dataset/couple_icon_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..addb89ca3df5d014aadf9f958d13427a4fccc2a8
--- /dev/null
+++ b/dataset/couple_icon_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do these two avatars accurately reflect the specified gender described in the text prompt (e.g., one male, one female)?",
+            "0_point_standard": "The gender representation does not match the prompt, lacking clear distinction or accuracy.",
+            "1_point_standard": "The avatars clearly reflect the specified gender, accurately representing the text description."
+        },
+        {
+            "question": "Do these two avatars maintain consistency in visual style, including artistic style, color scheme, and overall aesthetics?",
+            "0_point_standard": "The avatars have noticeable style differences, lacking coherence as a matching pair.",
+            "1_point_standard": "The avatars maintain consistent style, including color scheme and aesthetics, creating a harmonious matching appearance."
+        },
+        {
+            "question": "Do these two avatars present a similar emotional tone or theme (e.g., both are smiling, expressing a consistent emotion) that conveys the concept of a couple?",
+            "0_point_standard": "The expressions or themes of the avatars are conflicting, difficult to interpret as a couple.",
+            "1_point_standard": "The avatars have a harmonious emotional tone or theme, making them easily recognizable as a couple."
+        },
+        {
+            "question": "Do these two avatars have similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)?",
+            "0_point_standard": "The avatars lack connecting design elements, reducing their recognition as a couple.",
+            "1_point_standard": "The avatars contain similar or complementary design details, enhancing their association as a couple."
+        },
+        {
+            "question": "Do these two avatars exhibit a high level of aesthetic quality and balance, forming a visually appealing combination?",
+            "0_point_standard": "The avatars lack aesthetic appeal or balance, appearing less refined or visually inconsistent as a pair.",
+            "1_point_standard": "The avatars have strong visual appeal, balanced composition, and high aesthetic quality, enhancing their presentation as a couple."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0003/images.txt b/dataset/couple_icon_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/couple_icon_generation_0003/instruction.txt b/dataset/couple_icon_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83071dc9d091a6bf3111d3b48e805b760aab23b4
--- /dev/null
+++ b/dataset/couple_icon_generation_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate two images featuring a couple avatars of a male in classical medieval knight's armor, holding a sword, and a female in a gorgeous court dress, holding a rose. Both have an ancient castle in the background, the male with the dawn in the background and the female with the dusk sunset in the background.
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0003/meta.json b/dataset/couple_icon_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b1550f27bf0744da46cbc48b3444dbb578b955dd
--- /dev/null
+++ b/dataset/couple_icon_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "couple icon generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0017",
+    "output_image_count": 2,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0004/auto_eval.jsonl b/dataset/couple_icon_generation_0004/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..48c470b6d1acf13f5a696f4f22c8a93aad061599
--- /dev/null
+++ b/dataset/couple_icon_generation_0004/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "Is the number in the image the digit 2? 0 points: The number in the image is not the digit 2; 1 point: The number in the image is the digit 2. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with both the male and female wearing spacesuits. The male's background shows the curve of Earth, while the female's shows a galaxy of stars. The male gazes straight ahead while the female smiles looking into the distance.\nYour review question is:\nDo the two avatars accurately reflect the specified genders as described in the text prompt (e.g., one male and one female)? 0 points: The gender representation does not align with the prompt, lacking clear distinction or accuracy. 1 point: The avatars clearly represent the specified genders, accurately reflecting the text description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with both the male and female wearing spacesuits. The male's background shows the curve of Earth, while the female's shows a galaxy of stars. The male gazes straight ahead while the female smiles looking into the distance.\nYour review question is:\nAre the two avatars visually consistent in style, including art style, color palette, and overall aesthetic? 0 points: The avatars display noticeable style differences, lacking coherence as a matching pair. 1 point: The avatars maintain a consistent style, including color palette and aesthetic, creating a cohesive and matching appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with both the male and female wearing spacesuits. The male's background shows the curve of Earth, while the female's shows a galaxy of stars. The male gazes straight ahead while the female smiles looking into the distance.\nYour review question is:\nDo the two avatars reflect a similar emotional tone or theme (e.g., both smiling, both with matching expressions) that conveys the concept of a couple? 0 points: The avatars show conflicting expressions or themes, making it hard to interpret them as a couple. 1 point: The avatars share a harmonious emotional tone or theme, making them recognizable as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with both the male and female wearing spacesuits. The male's background shows the curve of Earth, while the female's shows a galaxy of stars. The male gazes straight ahead while the female smiles looking into the distance.\nYour review question is:\nDo the two avatars feature similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)? 0 points: The avatars lack connecting design elements, reducing their perceived connection as a pair. 1 point: The avatars include similar or complementary design details, enhancing their connection as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the two images of the response provided by a student. The task objective is to generate a pair of couple icon based on the text requirements.\nThe text requirement is:\nGenerate a couple of avatars, with both the male and female wearing spacesuits. The male's background shows the curve of Earth, while the female's shows a galaxy of stars. The male gazes straight ahead while the female smiles looking into the distance.\nYour review question is:\nDo the two avatars exhibit a high level of aesthetic quality and balance, creating a visually appealing pair? 0 points: The avatars lack aesthetic appeal or balance, appearing less polished or visually inconsistent as a pair. 1 point: The avatars are visually appealing, with balanced composition and high aesthetic quality that enhances their presentation as a couple.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/couple_icon_generation_0004/eval.json b/dataset/couple_icon_generation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e837d45e1bda2cb6cc253c35d81c25fe7a6566cc
--- /dev/null
+++ b/dataset/couple_icon_generation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do these two avatars accurately reflect the specified gender described in the text prompt (e.g., one male, one female)?",
+            "0_point_standard": "The gender representation does not match the prompt, lacking clear distinction or accuracy.",
+            "1_point_standard": "The avatars clearly reflect the specified gender, accurately portraying the text description."
+        },
+        {
+            "question": "Do these two avatars maintain consistency in visual style, including art style, color scheme, and overall aesthetics?",
+            "0_point_standard": "The avatars have noticeable differences in style, lacking coherence as a matching pair.",
+            "1_point_standard": "The avatars maintain consistency in style, including color scheme and aesthetics, forming a harmonious matching appearance."
+        },
+        {
+            "question": "Do these two avatars present a similar emotional tone or theme (e.g., both smiling, expressing a consistent emotion), conveying the concept of a couple?",
+            "0_point_standard": "The expressions or themes of the avatars conflict, making it hard to interpret them as a couple.",
+            "1_point_standard": "The avatars have a harmonious emotional tone or theme, making them easily recognizable as a couple."
+        },
+        {
+            "question": "Do these two avatars have similar character design elements that connect them as a couple (e.g., matching accessories, similar clothing style)?",
+            "0_point_standard": "The avatars lack connecting design elements, reducing their recognizability as a couple.",
+            "1_point_standard": "The avatars include similar or complementary design details, enhancing their association as a couple."
+        },
+        {
+            "question": "Do these two avatars have a high level of aesthetic quality and sense of balance, forming a visually appealing combination?",
+            "0_point_standard": "The avatars lack aesthetic appeal or balance, appearing less refined or visually inconsistent as a pair.",
+            "1_point_standard": "The avatars are visually appealing, well-composed, and of high aesthetic quality, enhancing their presentation as a couple."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0004/images.txt b/dataset/couple_icon_generation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/couple_icon_generation_0004/instruction.txt b/dataset/couple_icon_generation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc3cc6940bc260b17a75c69a10d6e20f79ec2f07
--- /dev/null
+++ b/dataset/couple_icon_generation_0004/instruction.txt
@@ -0,0 +1 @@
+Generates two images featuring a couple avatars, male and female both wearing space suits, with the arc of the earth visible in the male's background, and nebulae filling the sky in the female's background. The two are in slightly different poses, with the male gazing ahead and the female smiling and looking into the distance.
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_0004/meta.json b/dataset/couple_icon_generation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad1d9b09600274866138bee67a549ecece8b14ab
--- /dev/null
+++ b/dataset/couple_icon_generation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "couple icon generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0017",
+    "output_image_count": 2,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_with_reference_0002/eval.json b/dataset/couple_icon_generation_with_reference_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f0363ba02220e07b009bd95a0c7373fa90a0dcf
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated dual icon retain the basic style and artistic elements of the reference avatar?",
+            "0_point_standard": "The style of the dual icon is significantly different from the reference avatar, with notable discrepancies in artistic elements.",
+            "1_point_standard": "The dual icon accurately retains the style and artistic elements of the reference avatar, ensuring visual consistency."
+        },
+        {
+            "question": "Does the generated dual icon accurately reflect the expected gender pairing, complement the reference avatar, or follow specific gender indications in the text?",
+            "0_point_standard": "The gender presentation does not match the expected gender pairing, fails to create a complementary appearance, or ignores specified gender indications.",
+            "1_point_standard": "The gender presentation appropriately complements the reference avatar, or aligns with the specific gender specifications provided in the text input."
+        },
+        {
+            "question": "Does the generated dual icon accurately execute any specific instructions from the text input, such as adding specific accessories or expressions?",
+            "0_point_standard": "The dual icon does not include the specified changes or executes them inaccurately, failing to meet the text input requirements.",
+            "1_point_standard": "The dual icon correctly and accurately incorporates the specified changes based on the text input."
+        },
+        {
+            "question": "Do the expressions and character dynamics in the generated dual icon align with the reference avatar, creating a cohesive and complementary pairing?",
+            "0_point_standard": "Expressions, postures, or visual cues do not match the emotion or dynamics of the reference avatar, leading to a disconnect between the two icons.",
+            "1_point_standard": "Expressions, postures, and visual cues align well with the reference avatar, creating a harmonious and consistent dynamic between the two icons."
+        },
+        {
+            "question": "Does the generated dual icon exhibit high-quality rendering in terms of detail, clarity, and resolution?",
+            "0_point_standard": "The rendering quality of the dual icon is poor, with noticeable issues in detail, clarity, or resolution.",
+            "1_point_standard": "The dual icon is rendered with high detail, clarity, and resolution, reflecting professional quality."
+        },
+        {
+            "question": "Does the overall aesthetic of the generated dual icon meet professional standards, providing a visually appealing and cohesive image?",
+            "0_point_standard": "The dual icon lacks aesthetic appeal, with poorly matched elements leading to a disjointed or unattractive image.",
+            "1_point_standard": "The dual icon is aesthetically pleasing, with a cohesive and visually appealing appearance that meets professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_with_reference_0002/images.txt b/dataset/couple_icon_generation_with_reference_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcaabccc68191e41807474ecc41b83a607dc15ae
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01tAqqRD1XQUkANUy0a_!!6000000002918-0-tps-736-1308.jpg
diff --git a/dataset/couple_icon_generation_with_reference_0002/instruction.txt b/dataset/couple_icon_generation_with_reference_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8642c26e12ca314aecbe6978abafb7e547eae1d2
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0002/instruction.txt
@@ -0,0 +1 @@
+Generate a male avatar that matches the style of the given female avatar. The male can wear similar sunglasses and maintain a cool expression, while the overall color scheme remains consistent with the female. The smoking smoke element can be retained. The background should remain the same as the female avatar, creating a unique sense of coordination between the couple.
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_with_reference_0002/meta.json b/dataset/couple_icon_generation_with_reference_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..545045480ec09fc79b4e3651a62d4374485503b2
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "couple icon generation with single reference",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0071",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_with_reference_0003/eval.json b/dataset/couple_icon_generation_with_reference_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8039a2bffb25792a3e518e365832081afee44ad
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated duo icon retain the basic style and artistic elements of the reference avatar?",
+            "0_point_standard": "The style of the duo icon is significantly different from the reference avatar, with notable discrepancies in artistic elements.",
+            "1_point_standard": "The duo icon accurately retains the style and artistic elements of the reference avatar, ensuring visual consistency."
+        },
+        {
+            "question": "Does the generated duo icon accurately reflect the expected gender pairing, complementing the reference avatar or adhering to specific gender instructions in the text?",
+            "0_point_standard": "The gender presentation does not match the expected pairing, fails to create a complementary appearance, or ignores specified gender instructions.",
+            "1_point_standard": "The gender presentation appropriately complements the reference avatar or aligns with specific gender specifications provided in the text input."
+        },
+        {
+            "question": "Does the generated duo icon accurately execute any specific instructions from the text input, such as adding specific accessories or expressions?",
+            "0_point_standard": "The duo icon does not include the specified changes or executes them inaccurately, failing to meet the text input requirements.",
+            "1_point_standard": "The duo icon correctly and accurately incorporates the specified changes based on the text input."
+        },
+        {
+            "question": "Are the expressions and character dynamics in the generated duo icon consistent with the reference avatar, creating a cohesive and complementary pairing?",
+            "0_point_standard": "Expressions, poses, or visual cues do not match the mood or dynamics of the reference avatar, resulting in a disconnect between the two icons.",
+            "1_point_standard": "Expressions, poses, and visual cues align well with the reference avatar, creating a harmonious and consistent dynamic between the two icons."
+        },
+        {
+            "question": "Does the generated duo icon demonstrate high-quality rendering in terms of detail, clarity, and resolution?",
+            "0_point_standard": "The duo icon suffers from poor rendering quality, with notable issues in detail, clarity, or resolution.",
+            "1_point_standard": "The duo icon is rendered with high detail, clarity, and resolution, reflecting professional quality."
+        },
+        {
+            "question": "Does the overall aesthetics of the generated duo icon meet professional standards, providing a visually appealing and cohesive image?",
+            "0_point_standard": "The duo icon lacks aesthetic appeal, with poorly combined elements, resulting in a discordant or unattractive image.",
+            "1_point_standard": "The duo icon is aesthetically pleasing, with a cohesive and visually appealing appearance that meets professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_with_reference_0003/images.txt b/dataset/couple_icon_generation_with_reference_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5f7798dcfe7a27e93d7048d9f0ebf5bb162f460
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0003/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01qA5kPb25VFnN0kPYo_!!6000000007531-0-tps-736-1104.jpg
diff --git a/dataset/couple_icon_generation_with_reference_0003/instruction.txt b/dataset/couple_icon_generation_with_reference_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de4224b67a7ea869e37051a00dd8aeb959e24bb5
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0003/instruction.txt
@@ -0,0 +1 @@
+Generate a male avatar that matches the style of the given female avatar. The male can have a different hairstyle but should retain the bright rainbow colors and shared decorative elements like stars, maintaining consistency in the overall visual style. The background color and decorations should match the female avatar, highlighting the perfect harmony of the couple's avatars.
\ No newline at end of file
diff --git a/dataset/couple_icon_generation_with_reference_0003/meta.json b/dataset/couple_icon_generation_with_reference_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..18d312184bdee1152adce3234d6072aecd19a672
--- /dev/null
+++ b/dataset/couple_icon_generation_with_reference_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "couple icon generation with single reference",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0071",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0001/eval.json b/dataset/creativity_transfer_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6fc81b37feb6afd5df9a2137cd0a55d28c011e4d
--- /dev/null
+++ b/dataset/creativity_transfer_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each generated image clearly reflect the core creative concept of the reference image (e.g., merging instruments with letters or combining food with animals)?",
+            "0_point_standard": "The generated image fails to clearly convey the specified creative concept, deviating from the theme shown in the reference image.",
+            "1_point_standard": "Each generated image clearly reflects the core creative concept of the reference image, remaining true to the specified theme."
+        },
+        {
+            "question": "Is the creative concept consistently applied across all generated images, with each image exhibiting similar creativity and thematic focus?",
+            "0_point_standard": "The creative concept is applied inconsistently, with some images not fully capturing the intended theme.",
+            "1_point_standard": "The creative concept is consistently applied across all images, with each image clearly and focusedly embodying the theme."
+        },
+        {
+            "question": "Do the generated images show variation in design while staying within the bounds of the original creative concept, demonstrating a diverse interpretation of the theme?",
+            "0_point_standard": "The generated images lack design diversity or fail to explore different interpretations within the creative theme.",
+            "1_point_standard": "The generated images showcase a diverse interpretation of the theme, with each image providing a unique and cohesive understanding of the original concept."
+        },
+        {
+            "question": "Are the details and level of refinement in each generated image sufficient to make the concept clear and engaging, thus enhancing the overall quality of the image set?",
+            "0_point_standard": "The generated images lack detail or refinement, making the creative concept unclear or less engaging.",
+            "1_point_standard": "Each generated image possesses a high level of detail and refinement, clearly conveying the concept and making the entire set visually appealing."
+        },
+        {
+            "question": "Do the elements within each image (e.g., color, texture, and lighting) harmoniously blend together, contributing to a cohesive and polished appearance?",
+            "0_point_standard": "The elements within the image do not blend well, resulting in a disjointed or unpolished appearance.",
+            "1_point_standard": "The elements within each image blend well, with color, texture, and lighting harmoniously contributing to a cohesive and polished look."
+        },
+        {
+            "question": "Does the final image set display a high level of aesthetic quality, with each image enhancing the creative concept and contributing to an appealing and professional-looking collection?",
+            "0_point_standard": "The final image set lacks aesthetic appeal or consistency, detracting from the overall visual impact of the collection.",
+            "1_point_standard": "The final image set displays high aesthetic quality, with each image enhancing the creative concept and creating an appealing, professional-looking collection."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0001/images.txt b/dataset/creativity_transfer_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b0543dee40ec23e4227a5d9f2924f1a479d4290
--- /dev/null
+++ b/dataset/creativity_transfer_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01WlSESQ1uVI3JjtTTy_!!6000000006042-0-tps-393-390.jpg
diff --git a/dataset/creativity_transfer_0001/instruction.txt b/dataset/creativity_transfer_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c4e8a76638a44593d0efe28fecd90a36d2a3d4
--- /dev/null
+++ b/dataset/creativity_transfer_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a series of images that follow the same creative style as the provided image. The image I provided shows a combination of garlic and a swan. You are required to generate four additional creative images, each combining different elements in a similar creative fashion. The first image should feature a combination of corn and a bee, with the bee's body resembling an ear of corn; the second image should combine a carrot and a cat, with the cat's body shaped like a carrot; the third image should combine an orange and a fish, with the fish's body being replaced by an orange; the fourth image should combine leaves and a butterfly, where the butterfly's wings are shaped like leaves. All images must maintain the same creative style, reflecting the same level of imagination and fusion of elements.
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0001/meta.json b/dataset/creativity_transfer_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..61875bf7ef299164fe39f0560380b33d03f5b198
--- /dev/null
+++ b/dataset/creativity_transfer_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "creativity transfer",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0042",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0003/eval.json b/dataset/creativity_transfer_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..24d77ba2e439654e72398ac47b2e209b06efb51e
--- /dev/null
+++ b/dataset/creativity_transfer_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each generated image clearly reflect the core creative concept of the reference image (e.g., merging instruments with letters, or combining food with animals)?",
+            "0_point_standard": "The generated image fails to clearly convey the specified creative concept, deviating from the theme shown in the reference image.",
+            "1_point_standard": "Each generated image clearly reflects the core creative concept of the reference image, staying true to the specified theme."
+        },
+        {
+            "question": "Is the creative concept consistently applied across all generated images, with each image showcasing similar creativity and thematic focus?",
+            "0_point_standard": "The creative concept is applied inconsistently, with some images failing to fully capture the intended theme.",
+            "1_point_standard": "The creative concept is consistently applied across all images, with each image clearly and focusedly reflecting the theme."
+        },
+        {
+            "question": "Do the generated images exhibit variation in design while remaining within the scope of the original creative concept, demonstrating diverse interpretations of the theme?",
+            "0_point_standard": "The generated images lack design diversity or fail to explore different interpretations within the creative theme.",
+            "1_point_standard": "The generated images demonstrate diverse interpretations of the theme, with each image offering a unique and cohesive understanding of the original concept."
+        },
+        {
+            "question": "Are the details and level of refinement in each generated image sufficient to make the concept clear and engaging, thereby enhancing the overall quality of the entire set of images?",
+            "0_point_standard": "The generated images lack detail or refinement, making the creative concept unclear or less engaging.",
+            "1_point_standard": "Each generated image features a high level of detail and refinement, clearly conveying the concept and providing visual appeal to the entire set of images."
+        },
+        {
+            "question": "Do the elements within each image (e.g., color, texture, and lighting) harmoniously blend together, contributing to a cohesive and polished appearance?",
+            "0_point_standard": "The elements within the image do not blend well, resulting in a disjointed or unpolished appearance.",
+            "1_point_standard": "The elements within each image blend well together, with harmonious color, texture, and lighting, contributing to a cohesive and polished look."
+        },
+        {
+            "question": "Does the final set of images exhibit a high level of aesthetic quality, with each image enhancing the creative concept and contributing to an appealing and professional-looking collection?",
+            "0_point_standard": "The final set of images lacks aesthetic appeal or consistency, detracting from the overall visual impact of the collection.",
+            "1_point_standard": "The final set of images exhibits high aesthetic quality, with each image enhancing the creative concept and creating an appealing, professional-looking collection."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0003/images.txt b/dataset/creativity_transfer_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4105bb6aabda0ed6af9613c47c75160c4ad3e0d
--- /dev/null
+++ b/dataset/creativity_transfer_0003/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01jVgVm51mHzcyNANO1_!!6000000004930-0-tps-382-484.jpg
diff --git a/dataset/creativity_transfer_0003/instruction.txt b/dataset/creativity_transfer_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a69003a2ab5c5fef037c19f59935b9962811b5a
--- /dev/null
+++ b/dataset/creativity_transfer_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a series of images that follow the same creative style as the provided image. The image merges Leonardo da Vinci's “Vitruvian Man” with modern rock elements, showing a man dynamically playing a guitar. You are required to generate four additional creative images, each retaining the basic form of the “Vitruvian Man” while incorporating modern or creative elements. The first image should depict the “Vitruvian Man” wearing a spacesuit, floating in space, symbolizing exploration and technology; the second image should show the “Vitruvian Man” holding a laptop and smartphone, representing modern connectivity and technology; the third image should show the “Vitruvian Man” riding a skateboard, in a dynamic pose symbolizing movement and sport; the fourth image should show the “Vitruvian Man” holding a paintbrush and palette, symbolizing creativity and art. All images must maintain a consistent creative style, blending the classical figure with modern elements.
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0003/meta.json b/dataset/creativity_transfer_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..cc3ad503535db2ff199b3f242398d0f26fbc418e
--- /dev/null
+++ b/dataset/creativity_transfer_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "creativity transfer",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0042",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0004/eval.json b/dataset/creativity_transfer_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..34320a5dfc3ef99dd4414e7bd0960d8b3c8840fb
--- /dev/null
+++ b/dataset/creativity_transfer_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each generated image clearly reflect the core creative concept of the reference image (e.g., merging musical instruments with letters, or combining food with animals)?",
+            "0_point_standard": "The generated images fail to clearly convey the specified creative concept, deviating from the theme shown in the reference image.",
+            "1_point_standard": "Each generated image clearly reflects the core creative concept of the reference image, staying true to the specified theme."
+        },
+        {
+            "question": "Is the creative concept consistently applied across all generated images, with each image showcasing similar creativity and thematic focus?",
+            "0_point_standard": "The application of the creative concept is inconsistent, with some images not fully capturing the intended theme.",
+            "1_point_standard": "The creative concept is consistently applied across all images, with each image clearly and focusedly reflecting the theme."
+        },
+        {
+            "question": "Do the generated images exhibit variation in design while remaining within the scope of the original creative concept, showcasing diverse interpretations of the theme?",
+            "0_point_standard": "The generated images lack design diversity or fail to explore different interpretations within the creative theme.",
+            "1_point_standard": "The generated images showcase diverse interpretations of the theme, with each image offering a unique and cohesive understanding of the original concept."
+        },
+        {
+            "question": "Is the level of detail and refinement in each generated image sufficient to make the concept clear and engaging, thereby enhancing the overall quality of the set?",
+            "0_point_standard": "The generated images lack detail or refinement, making the creative concept unclear or less engaging.",
+            "1_point_standard": "Each generated image has a high level of detail and refinement, clearly conveying the concept and enhancing the visual appeal of the entire set."
+        },
+        {
+            "question": "Do the elements within each image (e.g., color, texture, and lighting) harmoniously blend together, contributing to a cohesive and polished appearance?",
+            "0_point_standard": "The elements within the images do not blend well, resulting in a disjointed or unpolished appearance.",
+            "1_point_standard": "The elements within each image blend well, with harmonious color, texture, and lighting, contributing to a cohesive and polished look."
+        },
+        {
+            "question": "Does the final set of images exhibit a high level of aesthetic quality, with each image enhancing the creative concept and contributing to an appealing and professional-looking collection?",
+            "0_point_standard": "The final image set lacks aesthetic appeal or consistency, detracting from the overall visual impact of the collection.",
+            "1_point_standard": "The final image set exhibits high aesthetic quality, with each image enhancing the creative concept and creating an appealing, professional-looking collection."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0004/images.txt b/dataset/creativity_transfer_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67d87d7cb913ed13056c6fecdacf3bceb09a1e1b
--- /dev/null
+++ b/dataset/creativity_transfer_0004/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01j82WvA1qRqc6rxiX2_!!6000000005493-0-tps-809-311.jpg
diff --git a/dataset/creativity_transfer_0004/instruction.txt b/dataset/creativity_transfer_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1548f8b9a17f7cbe0c50a9bd71fc40c2237e5204
--- /dev/null
+++ b/dataset/creativity_transfer_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a series of images that follow the same creative style as the provided image. The image features the letters A, B, and C composed of musical instruments. You are required to generate four additional letters, each incorporating different musical instruments while maintaining the same style. The first image should generate the letter D, made from the strings and shape of a harp; the second image should generate the letter E, incorporating the keyboard of an electric piano; the third image should generate the letter F, formed by the shape and keys of a flute; the fourth image should generate the letter G, combining the curves and shape of a saxophone. All images must maintain the same creative style, blending letters with musical instruments.
\ No newline at end of file
diff --git a/dataset/creativity_transfer_0004/meta.json b/dataset/creativity_transfer_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..a31bca9b4b36ec75b857e7453e9f4a24cff61eeb
--- /dev/null
+++ b/dataset/creativity_transfer_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "creativity transfer",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0042",
+    "output_image_count": 4,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0002/eval.json b/dataset/drawing_process_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f21a16524544977728f33f16fe7954fc75c54924
--- /dev/null
+++ b/dataset/drawing_process_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the image sequence logically depict the painting process from sketch to final piece?",
+            "0_point_standard": "The sequence lacks clear progression or logical order, failing to illustrate the painting process.",
+            "1_point_standard": "The sequence clearly and logically depicts the painting process from the initial sketch to the final piece."
+        },
+        {
+            "question": "Does the final piece match the description provided in the text input?",
+            "0_point_standard": "The final piece significantly deviates from the description in the text input.",
+            "1_point_standard": "The final piece accurately matches the description provided in the text input."
+        },
+        {
+            "question": "Is the style of the intermediate process images consistent throughout the sequence?",
+            "0_point_standard": "There is a significant difference in image styles, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect throughout the process."
+        },
+        {
+            "question": "Is the depiction of main objects or characters consistent throughout the image sequence?",
+            "0_point_standard": "Main objects or characters differ greatly between images, making it difficult to recognize them as the same entity.",
+            "1_point_standard": "Main objects or characters are consistent and easily recognizable as the same entity in all images."
+        },
+        {
+            "question": "Is the image sequence logically accurate in representing the expected steps of the painting process (e.g., sketching, inking, coloring)?",
+            "0_point_standard": "The representation of the painting process is illogical or unrealistic, with clear errors in the sequence of steps.",
+            "1_point_standard": "The sequence accurately and logically represents the expected steps of the painting process."
+        },
+        {
+            "question": "Does the final piece's detail and aesthetic quality meet professional standards and have visual appeal?",
+            "0_point_standard": "The final piece lacks detail, has poor aesthetics, and does not meet professional standards.",
+            "1_point_standard": "The final piece is rich in detail, has excellent aesthetics, meets professional standards, and is visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0002/images.txt b/dataset/drawing_process_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/drawing_process_generation_0002/instruction.txt b/dataset/drawing_process_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd6d18233a253c17707d75aa437681f85ad1b713
--- /dev/null
+++ b/dataset/drawing_process_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This artwork ultimately portrays a bustling market scene, with colorful fruits and vegetables piled on stalls, people moving through narrow alleys, and old red-roofed buildings in the background. The steps are: first, sketch the layout of the market and the outlines of the stalls; then add basic tones and shadows to the buildings and people; next, enrich the details of the goods, more colors appear on the stalls, and the people's figures become clearer; finally, refine the textures of the distant buildings, expressions of the people, and the sunlight reflecting on the streets. The model should generate these steps progressively, ensuring the details increase as the process unfolds.
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0002/meta.json b/dataset/drawing_process_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c528d97487210657095d4e2ee901fdb78cfbb547
--- /dev/null
+++ b/dataset/drawing_process_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "drawing process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0011",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0003/eval.json b/dataset/drawing_process_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..843772e55a49a731b14601c2e7e6ed43ccbcb02e
--- /dev/null
+++ b/dataset/drawing_process_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the image sequence logically depict the painting process from sketch to final work?",
+            "0_point_standard": "The sequence lacks clear progression or logical order, failing to illustrate the painting process.",
+            "1_point_standard": "The sequence clearly and logically depicts the painting process from the initial sketch to the final work."
+        },
+        {
+            "question": "Does the final work align with the description provided in the text input?",
+            "0_point_standard": "The final work significantly deviates from the description in the text input.",
+            "1_point_standard": "The final work accurately matches the description provided in the text input."
+        },
+        {
+            "question": "Is the style of the intermediate process images consistent throughout the sequence?",
+            "0_point_standard": "There is a significant difference in style among the images, leading to an incoherent visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect throughout the process."
+        },
+        {
+            "question": "Is the depiction of the main object or character consistent across the entire image sequence?",
+            "0_point_standard": "The main object or character varies greatly between images, making it difficult to identify as the same entity.",
+            "1_point_standard": "The main object or character is consistent and easily identifiable as the same entity across all images."
+        },
+        {
+            "question": "Is the image sequence logically accurate in representing the expected steps of the painting process (e.g., sketch, inking, coloring)?",
+            "0_point_standard": "The representation of the painting process is illogical or unrealistic, with clear errors in the step order.",
+            "1_point_standard": "The sequence accurately and logically represents the expected steps of the painting process."
+        },
+        {
+            "question": "Do the details and aesthetics of the final work meet professional standards and possess visual appeal?",
+            "0_point_standard": "The final work lacks detail, has poor aesthetics, and does not meet professional standards.",
+            "1_point_standard": "The final work is rich in detail, has excellent aesthetics, meets professional standards, and is visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0003/images.txt b/dataset/drawing_process_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/drawing_process_generation_0003/instruction.txt b/dataset/drawing_process_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d3a0fdf5857c375969db98fbe4f5068e72c9a69
--- /dev/null
+++ b/dataset/drawing_process_generation_0003/instruction.txt
@@ -0,0 +1 @@
+The final piece is a vibrant city night scene, with neon lights shining, cars flowing through the streets, and tall buildings lit up against a dark sky with a few scattered clouds. The process starts with sketching the city's outline; then add basic colors and shadows to the buildings, with initial light reflections on the streets; next, enrich the lighting details of the buildings, and the figures of pedestrians and cars become clearer; finally, refine the light reflections and sky tones, making the city feel more dynamic and atmospheric. The model should follow these steps, gradually enhancing the scene's details.
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0003/meta.json b/dataset/drawing_process_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..921458d6c7f19be110d50a96177c7abea4b62535
--- /dev/null
+++ b/dataset/drawing_process_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "drawing process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0011",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0004/eval.json b/dataset/drawing_process_generation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..fa2930b2d7601904c57ece08885b062952395037
--- /dev/null
+++ b/dataset/drawing_process_generation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the image sequence logically depict the painting process from sketch to final work?",
+            "0_point_standard": "The sequence lacks clear progression or logical order, failing to illustrate the painting process.",
+            "1_point_standard": "The sequence clearly and logically depicts the painting process from the initial sketch to the final work."
+        },
+        {
+            "question": "Does the final work match the description provided in the text input?",
+            "0_point_standard": "The final work significantly deviates from the description in the text input.",
+            "1_point_standard": "The final work accurately matches the description provided in the text input."
+        },
+        {
+            "question": "Is the style of the intermediate process images consistent throughout the sequence?",
+            "0_point_standard": "The style of the images varies greatly, resulting in a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect throughout the process."
+        },
+        {
+            "question": "Is the depiction of the main objects or characters consistent throughout the image sequence?",
+            "0_point_standard": "The main objects or characters vary greatly between different images, making them difficult to recognize as the same entity.",
+            "1_point_standard": "The main objects or characters are consistent and easily recognizable as the same entity across all images."
+        },
+        {
+            "question": "Is the image sequence logically accurate in representing the expected steps of the painting process (e.g., sketch, ink, coloring)?",
+            "0_point_standard": "The representation of the painting process is illogical or unrealistic, with apparent errors in the step sequence.",
+            "1_point_standard": "The sequence accurately and logically represents the expected steps of the painting process."
+        },
+        {
+            "question": "Do the details and aesthetics of the final work meet professional standards and possess visual appeal?",
+            "0_point_standard": "The final work lacks detail, has poor aesthetics, and does not meet professional standards.",
+            "1_point_standard": "The final work is rich in detail, has excellent aesthetics, meets professional standards, and possesses visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0004/images.txt b/dataset/drawing_process_generation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/drawing_process_generation_0004/instruction.txt b/dataset/drawing_process_generation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cecd781fdf2d4df3bc125f15675ed056bc6ab201
--- /dev/null
+++ b/dataset/drawing_process_generation_0004/instruction.txt
@@ -0,0 +1 @@
+The final painting shows a winter village scene, with snow covering rooftops and trees, smoke rising from chimneys, distant mountains faintly visible, and soft clouds in the sky. The steps start by outlining the basic shapes of the houses and trees; then add soft tones for the snow and sky, and smoke begins to appear from chimneys; next, enrich the details of the houses, with shadows and reflections on the snow becoming more defined; finally, refine the light effects of the smoke, the distant village becomes clearer, and the clouds in the sky gain more depth. The model must generate each step sequentially, showcasing the serenity and details of a winter scene.
\ No newline at end of file
diff --git a/dataset/drawing_process_generation_0004/meta.json b/dataset/drawing_process_generation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..93e1c6f7793ef1ee3cfa291cbe1428930a45d788
--- /dev/null
+++ b/dataset/drawing_process_generation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "drawing process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0011",
+    "output_image_count": 4,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_expression_design_0001/auto_eval.jsonl b/dataset/dynamic_character_design_expression_design_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..61ad722de8538cf1ba9249a46ef3175382445257
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg", "0005.jpg"], "question": "Is the number in the image the digit 5? 0 points: The number in the image is not the digit 5; 1 point: The number in the image is the digit 5. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0005.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fifth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a knight character in a realistic style, showing only the head or upper body. He is wearing silver armor, with a determined face and short hair. Generate a set of 5 images with different expressions: the first image shows him smiling confidently; the second image shows him frowning in tension and seriousness; the third image shows him laughing with joy, eyes gleaming with victory; the fourth image shows him expressing fatigue and pain, with furrowed brows; the fifth image shows him with an angry expression, teeth clenched, and eyes sharp. Ensure all facial expressions are diverse, while the head and upper body remain consistent, with the same character ID in every image.\nYour review question is:\nDo the first and fifth images maintain a consistent character identity, with facial features, armor, and hairstyle clearly representing the same knight? 0 points: The character identity appears inconsistent, with differences in facial features, armor, or hairstyle that make it hard to recognize the same knight. 1 point: The character identity is consistent, with matching facial features, armor, and hairstyle clearly indicating the same knight.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a knight character in a realistic style, showing only the head or upper body. He is wearing silver armor, with a determined face and short hair. Generate a set of 5 images with different expressions: the first image shows him smiling confidently; the second image shows him frowning in tension and seriousness; the third image shows him laughing with joy, eyes gleaming with victory; the fourth image shows him expressing fatigue and pain, with furrowed brows; the fifth image shows him with an angry expression, teeth clenched, and eyes sharp. Ensure all facial expressions are diverse, while the head and upper body remain consistent, with the same character ID in every image.\nYour review question is:\nDo the first and third images maintain a consistent realistic style, with the same level of detail and color scheme in both expressions? 0 points: The style differs noticeably between the two images, reducing the coherence of the image set. 1 point: The style is consistent, with the same level of detail and color scheme, making the images cohesive as a set.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a knight character in a realistic style, showing only the head or upper body. He is wearing silver armor, with a determined face and short hair. Generate a set of 5 images with different expressions: the first image shows him smiling confidently; the second image shows him frowning in tension and seriousness; the third image shows him laughing with joy, eyes gleaming with victory; the fourth image shows him expressing fatigue and pain, with furrowed brows; the fifth image shows him with an angry expression, teeth clenched, and eyes sharp. Ensure all facial expressions are diverse, while the head and upper body remain consistent, with the same character ID in every image.\nYour review question is:\nDo the second and fourth images show clear and distinct expressions (seriousness/tension vs. fatigue/pain) that effectively convey different emotional states? 0 points: The expressions lack distinctness or are too similar, failing to clearly convey different emotional states. 1 point: The expressions are clearly distinct and effectively convey the intended emotions, adding diversity to the image set.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fourth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a knight character in a realistic style, showing only the head or upper body. He is wearing silver armor, with a determined face and short hair. Generate a set of 5 images with different expressions: the first image shows him smiling confidently; the second image shows him frowning in tension and seriousness; the third image shows him laughing with joy, eyes gleaming with victory; the fourth image shows him expressing fatigue and pain, with furrowed brows; the fifth image shows him with an angry expression, teeth clenched, and eyes sharp. Ensure all facial expressions are diverse, while the head and upper body remain consistent, with the same character ID in every image.\nYour review question is:\nDo the first and fourth images display consistent details in armor and clothing, maintaining the design elements specified for the knight character? 0 points: The armor or clothing details differ significantly between images, reducing the visual coherence of the character. 1 point: The armor and clothing details are consistent across both images, reinforcing the character’s continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0005.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fifth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a knight character in a realistic style, showing only the head or upper body. He is wearing silver armor, with a determined face and short hair. Generate a set of 5 images with different expressions: the first image shows him smiling confidently; the second image shows him frowning in tension and seriousness; the third image shows him laughing with joy, eyes gleaming with victory; the fourth image shows him expressing fatigue and pain, with furrowed brows; the fifth image shows him with an angry expression, teeth clenched, and eyes sharp. Ensure all facial expressions are diverse, while the head and upper body remain consistent, with the same character ID in every image.\nYour review question is:\nDo the third and fifth images exhibit high aesthetic quality, with visually appealing composition and realistic detail in the knight's features and expressions? 0 points: The images lack aesthetic appeal, with weak composition or unrealistic details that detract from the visual impact. 1 point: The images are visually appealing, with realistic detail and strong composition that enhance the knight’s expressions and character presence.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/dynamic_character_design_expression_design_0001/eval.json b/dataset/dynamic_character_design_expression_design_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9ede204a1a2f53d9fa917fb2b451cbb5c6585cee
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do the first and fifth images maintain consistent character identity, with facial features, armor, and hairstyle clearly representing the same knight?",
+            "0_point_standard": "Character identity is inconsistent, with differences in facial features, armor, or hairstyle making it difficult to recognize the same knight.",
+            "1_point_standard": "Character identity is consistent, with matching facial features, armor, and hairstyle clearly indicating the same knight."
+        },
+        {
+            "question": "Do the first and third images maintain a consistent realistic style, with the same level of detail and color scheme?",
+            "0_point_standard": "There is a significant style difference between the two images, reducing the coherence of the image set.",
+            "1_point_standard": "The style is consistent, with the same level of detail and color scheme, making the image set appear coherent."
+        },
+        {
+            "question": "Do the second and fourth images display distinct and different expressions (serious/tense vs tired/pained), effectively conveying different emotional states?",
+            "0_point_standard": "The expressions lack significant differences or are too similar, failing to clearly convey different emotional states.",
+            "1_point_standard": "The expressions are distinctly different, effectively conveying the intended emotional states, adding diversity to the image set."
+        },
+        {
+            "question": "Do the first and fourth images maintain consistency in armor and clothing details, in line with the design elements of the knight character?",
+            "0_point_standard": "There are significant differences in armor or clothing details between the images, reducing the visual coherence of the character.",
+            "1_point_standard": "Armor and clothing details are consistent across both images, strengthening the continuity of the character."
+        },
+        {
+            "question": "Do the third and fifth images exhibit a high level of aesthetic quality, with visually appealing composition and realistic details of the knight's features and expressions?",
+            "0_point_standard": "The images lack aesthetic appeal, with weak composition or unrealistic details, diminishing visual impact.",
+            "1_point_standard": "The images are visually appealing, with realistic details and composition enhancing the knight's expressions and presence."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_expression_design_0001/images.txt b/dataset/dynamic_character_design_expression_design_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/dynamic_character_design_expression_design_0001/instruction.txt b/dataset/dynamic_character_design_expression_design_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54e0146414dbed70320e108c472a94356df727ad
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a knight character in a realistic style, showing only the head or upper body. He is wearing silver armor, with a determined face and short hair. Generate a set of 5 images with different expressions: the first image shows him smiling confidently; the second image shows him frowning in tension and seriousness; the third image shows him laughing with joy, eyes gleaming with victory; the fourth image shows him expressing fatigue and pain, with furrowed brows; the fifth image shows him with an angry expression, teeth clenched, and eyes sharp. Ensure all facial expressions are diverse, while the head and upper body remain consistent, with the same character ID in every image.
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_expression_design_0001/meta.json b/dataset/dynamic_character_design_expression_design_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..338ab51eccf46c134adad1d0c638a1b57313db14
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "dynamic character expression design",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0016",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_expression_design_0002/auto_eval.jsonl b/dataset/dynamic_character_design_expression_design_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d099eb03439270cdbf5a00e0fa6d602e75c7195d
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg", "0005.jpg", "0006.jpg"], "question": "Is the number in the image the digit 6? 0 points: The number in the image is not the digit 6; 1 point: The number in the image is the digit 6. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0005.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fifth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style, showing only the head or upper body. She is wearing a large black hat, with sharp facial features and intense eyes. Generate a set of 6 images with different expressions: the first image shows her with a mysterious smile, lips slightly curled; the second image shows her angry, eyes full of rage; the third image shows her laughing maniacally, appearing eerie; the fourth image shows her confused, with furrowed brows; the fifth image shows her calm and composed, with deep, thoughtful eyes; the sixth image shows her highly focused, staring straight ahead. All facial expressions should be varied, while the head and upper body remain consistent, with the same character ID throughout.\nYour review question is:\nDo the first and fifth images maintain a consistent character identity, with the same facial features, hat, and hairstyle representing the witch? 0 points: The character identity appears inconsistent, with noticeable differences in facial features, hat, or hairstyle that make it hard to recognize the same witch. 1 point: The character identity is consistent, with matching facial features, hat, and hairstyle clearly indicating the same character.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style, showing only the head or upper body. She is wearing a large black hat, with sharp facial features and intense eyes. Generate a set of 6 images with different expressions: the first image shows her with a mysterious smile, lips slightly curled; the second image shows her angry, eyes full of rage; the third image shows her laughing maniacally, appearing eerie; the fourth image shows her confused, with furrowed brows; the fifth image shows her calm and composed, with deep, thoughtful eyes; the sixth image shows her highly focused, staring straight ahead. All facial expressions should be varied, while the head and upper body remain consistent, with the same character ID throughout.\nYour review question is:\nDo the first and third images maintain a consistent sketch style, with similar line quality, shading, and overall sketch aesthetic? 0 points: The style differs noticeably between the two images, reducing the cohesion of the image series. 1 point: The style is consistent, with matching line quality, shading, and aesthetic, ensuring a cohesive series.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style, showing only the head or upper body. She is wearing a large black hat, with sharp facial features and intense eyes. Generate a set of 6 images with different expressions: the first image shows her with a mysterious smile, lips slightly curled; the second image shows her angry, eyes full of rage; the third image shows her laughing maniacally, appearing eerie; the fourth image shows her confused, with furrowed brows; the fifth image shows her calm and composed, with deep, thoughtful eyes; the sixth image shows her highly focused, staring straight ahead. All facial expressions should be varied, while the head and upper body remain consistent, with the same character ID throughout.\nYour review question is:\nDo the second and fourth images display clearly distinct expressions (rage vs. confusion) that effectively convey different emotions? 0 points: The expressions lack distinctness or appear too similar, failing to clearly convey different emotional states. 1 point: The expressions are clearly distinct and effectively convey the intended emotions, adding expressive diversity to the series.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0006.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and sixth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style, showing only the head or upper body. She is wearing a large black hat, with sharp facial features and intense eyes. Generate a set of 6 images with different expressions: the first image shows her with a mysterious smile, lips slightly curled; the second image shows her angry, eyes full of rage; the third image shows her laughing maniacally, appearing eerie; the fourth image shows her confused, with furrowed brows; the fifth image shows her calm and composed, with deep, thoughtful eyes; the sixth image shows her highly focused, staring straight ahead. All facial expressions should be varied, while the head and upper body remain consistent, with the same character ID throughout.\nYour review question is:\nDo the third and sixth images display consistent details in the witch’s accessories (e.g., hat shape and positioning) without any unnecessary variation? 0 points: The accessories differ noticeably between the images, reducing continuity in the character’s visual presentation.  point: The accessories are consistently detailed across both images, reinforcing the character’s identity and visual continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0005.jpg", "0006.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the fifth and sixth images of the response provided by a student. The task objective is to generate different facial expression designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style, showing only the head or upper body. She is wearing a large black hat, with sharp facial features and intense eyes. Generate a set of 6 images with different expressions: the first image shows her with a mysterious smile, lips slightly curled; the second image shows her angry, eyes full of rage; the third image shows her laughing maniacally, appearing eerie; the fourth image shows her confused, with furrowed brows; the fifth image shows her calm and composed, with deep, thoughtful eyes; the sixth image shows her highly focused, staring straight ahead. All facial expressions should be varied, while the head and upper body remain consistent, with the same character ID throughout.\nYour review question is:\nDo the fifth and sixth images demonstrate high aesthetic quality, with clear sketch detail and realistic portrayal of the witch’s calm and focused expressions? 0 points: The images lack aesthetic appeal, with weak sketch quality or unclear expression portrayal. 1 point: The images have strong aesthetic appeal, with refined sketch detail and realistic expressions that enhance the character’s depth.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/dynamic_character_design_expression_design_0002/eval.json b/dataset/dynamic_character_design_expression_design_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..65afe57c118c3b3efbfa7deca918e7d07b11e0e2
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements of the text description?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Do the first and fifth images maintain a consistent character identity, with matching facial features, hats, and hairstyles to represent the witch?",
+            "0_point_standard": "The character identity is inconsistent, with noticeable differences in facial features, hats, or hairstyles making it difficult to recognize the same witch.",
+            "1_point_standard": "The character identity is consistent, with matching facial features, hats, and hairstyles clearly indicating the same character."
+        },
+        {
+            "question": "Do the first and third images maintain a consistent sketch style, with similar line quality, shading, and overall sketch aesthetics?",
+            "0_point_standard": "There are significant style differences between the two images, reducing the coherence of the series.",
+            "1_point_standard": "The style is consistent, with matching line quality, shading, and aesthetics, ensuring series coherence."
+        },
+        {
+            "question": "Do the second and fourth images display distinct and different expressions (anger and confusion), effectively conveying different emotions?",
+            "0_point_standard": "The expressions lack clear differences or are too similar, failing to convey distinctly different emotional states.",
+            "1_point_standard": "The expressions are distinctly different, effectively conveying the intended emotions, adding expressive diversity to the series."
+        },
+        {
+            "question": "Do the third and sixth images maintain consistency in the details of the witch's accessories (such as the shape and position of the hat), with no unnecessary changes?",
+            "0_point_standard": "There are noticeable differences in accessories between the images, reducing the visual coherence of the character.",
+            "1_point_standard": "Accessory details are consistent across both images, reinforcing character identity and visual continuity."
+        },
+        {
+            "question": "Do the fifth and sixth images exhibit a high level of aesthetic quality, with clear sketch details and a true depiction of the witch's calm and focused expression?",
+            "0_point_standard": "The images lack aesthetic appeal, with weak sketch quality or unclear expression depiction.",
+            "1_point_standard": "The images have strong aesthetic appeal, with refined sketch details and a true depiction of expressions, enhancing the depth of the character."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_expression_design_0002/images.txt b/dataset/dynamic_character_design_expression_design_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/dynamic_character_design_expression_design_0002/instruction.txt b/dataset/dynamic_character_design_expression_design_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73af09e21d65dbb1fb2cc77a1433d43a894b3f89
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a mystical witch character in a sketch style, showing only the head or upper body. She is wearing a large black hat, with sharp facial features and intense eyes. Generate a set of 6 images with different expressions: the first image shows her with a mysterious smile, lips slightly curled; the second image shows her angry, eyes full of rage; the third image shows her laughing maniacally, appearing eerie; the fourth image shows her confused, with furrowed brows; the fifth image shows her calm and composed, with deep, thoughtful eyes; the sixth image shows her highly focused, staring straight ahead. All facial expressions should be varied, while the head and upper body remain consistent, with the same character ID throughout.
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_expression_design_0002/meta.json b/dataset/dynamic_character_design_expression_design_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..14e727d74d956c66cca4587a7aaf5952d3d1540b
--- /dev/null
+++ b/dataset/dynamic_character_design_expression_design_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "dynamic character expression design",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0016",
+    "output_image_count": 6,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0002/auto_eval.jsonl b/dataset/dynamic_character_design_pose_design_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d84ffe01d9aec5c798600bb28b6e0a56a0498dc3
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "question": "Is the number in the image the digit 4? 0 points: The number in the image is not the digit 4; 1 point: The number in the image is the digit 4. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style. She is wearing a large black hat, with a flowing cloak, holding an ancient staff. The first image shows her holding the staff and pointing with one hand, casting a spell; the second image shows her raising the staff high with one hand, while the other hand is summoning power; the third image shows her gripping the staff with both hands, leaning forward as if gathering strength; the fourth image shows her holding the staff horizontally in front of her in a defensive stance. These images should have no background, focusing on different spell-casting actions, with consistent staff and hat design. Ensure the character ID remains consistent across all images, representing the same witch in each.\nYour review question is:\nAre the witch’s clothing details, especially the large black hat and flowing cloak, consistent between the first image (casting spell with one hand pointing) and the third image (leaning forward, gathering strength)? 0 Points: The hat or cloak design differs significantly between the two images, showing inconsistencies in size, shape, or flow. 1 Point: The hat and cloak design remain consistent in size, shape, and flow between the first and third images, presenting a cohesive appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style. She is wearing a large black hat, with a flowing cloak, holding an ancient staff. The first image shows her holding the staff and pointing with one hand, casting a spell; the second image shows her raising the staff high with one hand, while the other hand is summoning power; the third image shows her gripping the staff with both hands, leaning forward as if gathering strength; the fourth image shows her holding the staff horizontally in front of her in a defensive stance. These images should have no background, focusing on different spell-casting actions, with consistent staff and hat design. Ensure the character ID remains consistent across all images, representing the same witch in each.\nYour review question is:\nIs the character’s facial expression and overall ID consistency maintained between the second image (raising the staff with one hand) and the fourth image (holding the staff horizontally in a defensive stance)? 0 Points: The facial expression or identifiable features (such as face structure or overall appearance) differ noticeably, suggesting a change in character ID. 1 Point: The facial expression and identifiable features are consistent between the second and fourth images, confirming it is the same witch character.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style. She is wearing a large black hat, with a flowing cloak, holding an ancient staff. The first image shows her holding the staff and pointing with one hand, casting a spell; the second image shows her raising the staff high with one hand, while the other hand is summoning power; the third image shows her gripping the staff with both hands, leaning forward as if gathering strength; the fourth image shows her holding the staff horizontally in front of her in a defensive stance. These images should have no background, focusing on different spell-casting actions, with consistent staff and hat design. Ensure the character ID remains consistent across all images, representing the same witch in each.\nYour review question is:\nIs the ancient staff’s design consistent between the first image (casting spell) and the fourth image (defensive stance)? 0 Points: The staff’s design, including its shape, size, or any unique markings, appears different between the two images. 1 Point: The staff’s design remains consistent, with no noticeable changes in shape, size, or unique details between the first and fourth images.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style. She is wearing a large black hat, with a flowing cloak, holding an ancient staff. The first image shows her holding the staff and pointing with one hand, casting a spell; the second image shows her raising the staff high with one hand, while the other hand is summoning power; the third image shows her gripping the staff with both hands, leaning forward as if gathering strength; the fourth image shows her holding the staff horizontally in front of her in a defensive stance. These images should have no background, focusing on different spell-casting actions, with consistent staff and hat design. Ensure the character ID remains consistent across all images, representing the same witch in each.\nYour review question is:\nDoes the witch’s posture accurately represent the specified actions in the second image (raising the staff high while summoning power) and the third image (leaning forward, gripping the staff with both hands)? 0 Points: The postures do not clearly depict the described actions, making it difficult to interpret the specific spell-casting gestures. 1 Point: The postures clearly represent the described actions, showing a distinct power-summoning pose in the second image and a strength-gathering stance in the third image.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a mystical witch character in a sketch style. She is wearing a large black hat, with a flowing cloak, holding an ancient staff. The first image shows her holding the staff and pointing with one hand, casting a spell; the second image shows her raising the staff high with one hand, while the other hand is summoning power; the third image shows her gripping the staff with both hands, leaning forward as if gathering strength; the fourth image shows her holding the staff horizontally in front of her in a defensive stance. These images should have no background, focusing on different spell-casting actions, with consistent staff and hat design. Ensure the character ID remains consistent across all images, representing the same witch in each.\nYour review question is:\nAre the sketch style and line work applied consistently between the first image (casting spell with one hand pointing) and the second image (raising the staff high with one hand)? 0 Points: The sketch style or line work shows noticeable differences in texture, thickness, or detail, disrupting the visual cohesion. 1 Point: The sketch style and line work are consistent in texture, thickness, and detail between the two images, maintaining a uniform artistic approach.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/dynamic_character_design_pose_design_0002/eval.json b/dataset/dynamic_character_design_pose_design_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f63bf44c1dd9335d99488a855879ca75653990a4
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the number of output images consistent with the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "In the first image (single hand pointing casting spell) and the third image (leaning forward gathering power), do the details of the witch's costume, particularly the large black hat and flowing cloak, remain consistent?",
+            "0_point_standard": "The design of the hat or cloak is significantly different between the two images, showing differences in size, shape, or flow.",
+            "1_point_standard": "The design of the hat and cloak is consistent in the first and third images, with the same size, shape, and flow, presenting a coherent appearance."
+        },
+        {
+            "question": "In the second image (single hand holding wand) and the fourth image (horizontal wand holding defensive posture), is the consistency of the character's facial expression and overall identity maintained?",
+            "0_point_standard": "Facial expressions or recognizable features (such as facial structure or overall appearance) show significant differences, suggesting a change in character identity.",
+            "1_point_standard": "Facial expressions and recognizable features are consistent in the second and fourth images, confirming the same witch character."
+        },
+        {
+            "question": "In the first image (casting spell) and the fourth image (defensive posture), is the design of the ancient staff consistent?",
+            "0_point_standard": "The design of the staff, including shape, size, or any unique markings, shows differences between the two images.",
+            "1_point_standard": "The staff design is consistent, with no significant changes in shape, size, and unique details between the first and fourth images."
+        },
+        {
+            "question": "Does the witch's posture accurately represent the specified actions in the second image (raising wand to summon power) and the third image (leaning forward holding wand with both hands)?",
+            "0_point_standard": "The posture fails to clearly represent the described actions, making the specific spell-casting stance difficult to understand.",
+            "1_point_standard": "The posture clearly represents the described actions, presenting a definitive power summoning stance in the second image and a power gathering stance in the third image."
+        },
+        {
+            "question": "In the first image (single hand pointing casting spell) and the second image (single hand raising wand), is the sketch style and line treatment consistent?",
+            "0_point_standard": "The sketch style or line treatment shows significant differences in texture, thickness, or detail, disrupting visual coherence.",
+            "1_point_standard": "The sketch style and line treatment are consistent in terms of texture, thickness, and detail, maintaining a unified artistic expression."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0002/images.txt b/dataset/dynamic_character_design_pose_design_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/dynamic_character_design_pose_design_0002/instruction.txt b/dataset/dynamic_character_design_pose_design_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..697e0830b42e49ca09c2b91f9dfac228a5a40391
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a mystical witch character in a sketch style. She is wearing a large black hat, with a flowing cloak, holding an ancient staff. The first image shows her holding the staff and pointing with one hand, casting a spell; the second image shows her raising the staff high with one hand, while the other hand is summoning power; the third image shows her gripping the staff with both hands, leaning forward as if gathering strength; the fourth image shows her holding the staff horizontally in front of her in a defensive stance. These images should have no background, focusing on different spell-casting actions, with consistent staff and hat design. Ensure the character ID remains consistent across all images, representing the same witch in each.
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0002/meta.json b/dataset/dynamic_character_design_pose_design_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..eab5728393ba2b1a0d75b9c71c612c1dca918e41
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "dynamic character pose design",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0015",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0003/auto_eval.jsonl b/dataset/dynamic_character_design_pose_design_0003/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a6418eb6996c600dd36cf174614a8863d12278b2
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0003/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "question": "Is the number in the image the digit 4? 0 points: The number in the image is not the digit 4; 1 point: The number in the image is the digit 4. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a modern superhero character. He is dressed in a sleek combat suit, wearing a black mask, and wielding high-tech weapons. The first image shows him crossing his arms over his chest in a defensive pose; the second image shows him kneeling on one knee, with his weapon pointed downward; the third image shows him swinging his weapon in a side attack motion; the fourth image shows him jumping into the air, weapon raised for a strike. These images should not include backgrounds, focusing on varied combat stances, with consistent costume and weapon. Ensure the character ID remains consistent across all images, representing the same superhero in each.\nYour review question is:\nIs the superhero’s costume design, including the sleek combat suit and black mask, consistent between the first image (defensive pose with arms crossed) and the third image (side attack motion)? 0 Points: There are noticeable differences in the costume design, such as variations in suit details, mask shape, or overall appearance. 1 Point: The costume design remains consistent in all aspects, with no noticeable changes between the first and third images.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a modern superhero character. He is dressed in a sleek combat suit, wearing a black mask, and wielding high-tech weapons. The first image shows him crossing his arms over his chest in a defensive pose; the second image shows him kneeling on one knee, with his weapon pointed downward; the third image shows him swinging his weapon in a side attack motion; the fourth image shows him jumping into the air, weapon raised for a strike. These images should not include backgrounds, focusing on varied combat stances, with consistent costume and weapon. Ensure the character ID remains consistent across all images, representing the same superhero in each.\nYour review question is:\nIs the character ID, including the facial features under the mask and general appearance, consistent between the second image (kneeling with weapon pointed downward) and the fourth image (jumping into the air with weapon raised)? 0 Points: The character’s ID appears different in the two images, with noticeable discrepancies in identifiable features, body type, or overall look. 1 Point: The character’s ID is clearly consistent, with recognizable features and a cohesive appearance across the second and fourth images.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a modern superhero character. He is dressed in a sleek combat suit, wearing a black mask, and wielding high-tech weapons. The first image shows him crossing his arms over his chest in a defensive pose; the second image shows him kneeling on one knee, with his weapon pointed downward; the third image shows him swinging his weapon in a side attack motion; the fourth image shows him jumping into the air, weapon raised for a strike. These images should not include backgrounds, focusing on varied combat stances, with consistent costume and weapon. Ensure the character ID remains consistent across all images, representing the same superhero in each.\nYour review question is:\nIs the high-tech weapon design maintained consistently between the third image (swinging weapon in a side attack) and the fourth image (jumping into the air with weapon raised for a strike)? 0 Points: The weapon’s design, including its shape, size, or unique elements, appears different in the two images. 1 Point: The weapon design is consistent, with no differences in shape, size, or details between the third and fourth images.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a modern superhero character. He is dressed in a sleek combat suit, wearing a black mask, and wielding high-tech weapons. The first image shows him crossing his arms over his chest in a defensive pose; the second image shows him kneeling on one knee, with his weapon pointed downward; the third image shows him swinging his weapon in a side attack motion; the fourth image shows him jumping into the air, weapon raised for a strike. These images should not include backgrounds, focusing on varied combat stances, with consistent costume and weapon. Ensure the character ID remains consistent across all images, representing the same superhero in each.\nYour review question is:\nDoes each image accurately represent the specified action poses, specifically comparing the defensive stance in the first image and the kneeling stance in the second image? 0 Points: The poses are ambiguous or do not clearly convey the specified actions, making it hard to interpret the character’s intended stance. 1 Point: The poses are clearly represented, with the defensive stance and kneeling stance accurately depicted according to the description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a modern superhero character. He is dressed in a sleek combat suit, wearing a black mask, and wielding high-tech weapons. The first image shows him crossing his arms over his chest in a defensive pose; the second image shows him kneeling on one knee, with his weapon pointed downward; the third image shows him swinging his weapon in a side attack motion; the fourth image shows him jumping into the air, weapon raised for a strike. These images should not include backgrounds, focusing on varied combat stances, with consistent costume and weapon. Ensure the character ID remains consistent across all images, representing the same superhero in each.\nYour review question is:\nIs the visual style, including line work, shading, and overall aesthetic, consistent between the second image (kneeling with weapon pointed downward) and the third image (side attack motion)? 0 Points: The style varies noticeably between the two images, with differences in line thickness, shading, or texture, disrupting visual cohesion. 1 Point: The visual style, including line work, shading, and texture, is consistent across both images, ensuring a cohesive aesthetic.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/dynamic_character_design_pose_design_0003/eval.json b/dataset/dynamic_character_design_pose_design_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e5457d442c565bc3fe2fe8e220787f9314472
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "In the first image (crossed arms defensive posture) and the third image (side attack action), does the superhero's costume design (including the streamlined combat suit and black mask) remain consistent?",
+            "0_point_standard": "There are noticeable differences in the costume design, such as changes in combat suit details, mask shape, or overall appearance.",
+            "1_point_standard": "The costume design remains consistent in all aspects, with no noticeable changes between the first and third images."
+        },
+        {
+            "question": "In the second image (kneeling, weapon pointing downward) and the fourth image (leaping, weapon raised), is the character's identity consistent, including facial features under the mask and overall appearance?",
+            "0_point_standard": "The character's identity appears different in the two images, with noticeable differences in recognizable features, body shape, or overall appearance.",
+            "1_point_standard": "The character's identity is clearly consistent, with coherent facial features and overall appearance in both the second and fourth images."
+        },
+        {
+            "question": "In the third image (side weapon swinging attack) and the fourth image (leaping weapon raised ready to attack), does the high-tech weapon design remain consistent?",
+            "0_point_standard": "The weapon design, including shape, size, or unique elements, appears different in the two images.",
+            "1_point_standard": "The weapon design remains consistent, with no differences in shape, size, and details between the third and fourth images."
+        },
+        {
+            "question": "Does each image accurately depict the specified action poses, especially the defensive posture in the first image and the kneeling posture in the second image?",
+            "0_point_standard": "The poses are unclear or fail to clearly convey the specified actions, making the character's posture difficult to interpret.",
+            "1_point_standard": "The poses are clearly depicted, with the defensive posture and kneeling posture accurately displayed according to the description."
+        },
+        {
+            "question": "In the second image (kneeling, weapon pointing downward) and the third image (side attack action), does the visual style remain consistent, including line treatment, shading, and overall aesthetics?",
+            "0_point_standard": "There are significant style differences between the two images, with variations in line thickness, shading, or texture disrupting visual coherence.",
+            "1_point_standard": "The visual style, including line treatment, shading, and texture, remains consistent in both images, ensuring a unified aesthetic effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0003/images.txt b/dataset/dynamic_character_design_pose_design_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/dynamic_character_design_pose_design_0003/instruction.txt b/dataset/dynamic_character_design_pose_design_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d28795536c296de6b2bda859c4768e19332146b6
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a modern superhero character. He is dressed in a sleek combat suit, wearing a black mask, and wielding high-tech weapons. The first image shows him crossing his arms over his chest in a defensive pose; the second image shows him kneeling on one knee, with his weapon pointed downward; the third image shows him swinging his weapon in a side attack motion; the fourth image shows him jumping into the air, weapon raised for a strike. These images should not include backgrounds, focusing on varied combat stances, with consistent costume and weapon. Ensure the character ID remains consistent across all images, representing the same superhero in each.
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0003/meta.json b/dataset/dynamic_character_design_pose_design_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d6e3ef698ecb68f66b1a27ffce0d5ca6de33d422
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "dynamic character pose design",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0015",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0004/auto_eval.jsonl b/dataset/dynamic_character_design_pose_design_0004/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..546c1c8f6ed57e0d43e7e20e497a44ee06c641fd
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0004/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "question": "Is the number in the image the digit 4? 0 points: The number in the image is not the digit 4; 1 point: The number in the image is the digit 4. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a samurai warrior character in a realistic style. He is wearing traditional Japanese samurai armor, with a katana at his side. The first image shows him gripping the hilt of his katana, with the tip pointing down; the second image shows him bending forward, drawing the sword in a quick motion; the third image shows him holding the katana at waist level, ready to strike; the fourth image shows him holding the sword horizontally in front of him in a defensive stance. These images should have no background, with varied combat poses, and consistent armor and katana. Ensure the character ID remains consistent across all images, representing the same samurai.\nYour review question is:\nIs the visual style, including the realism of the shading, texture, and overall aesthetic, consistent between the first image (gripping the hilt with tip pointing down) and the third image (holding the katana at waist level, ready to strike)? 0 Points: There are noticeable differences in the visual style, with variations in shading, texture, or details that disrupt the overall cohesion. 1 Point: The visual style is consistent across both images, with a cohesive aesthetic in shading, texture, and details.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a samurai warrior character in a realistic style. He is wearing traditional Japanese samurai armor, with a katana at his side. The first image shows him gripping the hilt of his katana, with the tip pointing down; the second image shows him bending forward, drawing the sword in a quick motion; the third image shows him holding the katana at waist level, ready to strike; the fourth image shows him holding the sword horizontally in front of him in a defensive stance. These images should have no background, with varied combat poses, and consistent armor and katana. Ensure the character ID remains consistent across all images, representing the same samurai.\nYour review question is:\nIs the character ID, including facial features, body build, and general appearance, consistent between the second image (bending forward to draw the sword) and the fourth image (holding the sword horizontally in a defensive stance)? 0 Points: The character appears different between the two images, with variations in facial features, body type, or overall look that make the samurai seem like a different person. 1 Point: The character ID is clearly consistent across both images, maintaining recognizable features and a cohesive appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a samurai warrior character in a realistic style. He is wearing traditional Japanese samurai armor, with a katana at his side. The first image shows him gripping the hilt of his katana, with the tip pointing down; the second image shows him bending forward, drawing the sword in a quick motion; the third image shows him holding the katana at waist level, ready to strike; the fourth image shows him holding the sword horizontally in front of him in a defensive stance. These images should have no background, with varied combat poses, and consistent armor and katana. Ensure the character ID remains consistent across all images, representing the same samurai.\nYour review question is:\nIs the traditional Japanese samurai armor design consistent, with no changes in armor structure, detailing, or arrangement between the first image (gripping the hilt with tip pointing down) and the fourth image (holding the sword horizontally in a defensive stance)? 0 Points: There are noticeable differences in the armor’s structure, details, or arrangement between the two images. 1 Point: The armor design remains consistent, with no noticeable changes in structure, details, or arrangement.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and third images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a samurai warrior character in a realistic style. He is wearing traditional Japanese samurai armor, with a katana at his side. The first image shows him gripping the hilt of his katana, with the tip pointing down; the second image shows him bending forward, drawing the sword in a quick motion; the third image shows him holding the katana at waist level, ready to strike; the fourth image shows him holding the sword horizontally in front of him in a defensive stance. These images should have no background, with varied combat poses, and consistent armor and katana. Ensure the character ID remains consistent across all images, representing the same samurai.\nYour review question is:\nIs the katana design, including the shape, length, and hilt details, consistent between the second image (drawing the sword) and the third image (holding the katana at waist level, ready to strike)? 0 Points: The katana’s design varies between the two images, with noticeable differences in its shape, length, or hilt details. 1 Point: The katana design is consistent in both images, with no significant variations in shape, length, or details.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fourth images of the response provided by a student. The task objective is to generate different pose designs for the same character.\nThe text requirement is:\nPlease generate a samurai warrior character in a realistic style. He is wearing traditional Japanese samurai armor, with a katana at his side. The first image shows him gripping the hilt of his katana, with the tip pointing down; the second image shows him bending forward, drawing the sword in a quick motion; the third image shows him holding the katana at waist level, ready to strike; the fourth image shows him holding the sword horizontally in front of him in a defensive stance. These images should have no background, with varied combat poses, and consistent armor and katana. Ensure the character ID remains consistent across all images, representing the same samurai.\nYour review question is:\nDoes each image accurately represent the specified action poses, particularly comparing the third image (holding the katana at waist level, ready to strike) and the fourth image (holding the sword horizontally in a defensive stance)? 0 Points: The poses are not accurately depicted or appear ambiguous, making it unclear if the character is in the specified stance. 1 Point: The poses are clearly depicted, with the character’s stance and sword positioning accurately matching the described actions.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/dynamic_character_design_pose_design_0004/eval.json b/dataset/dynamic_character_design_pose_design_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..b27057d9cd3992947a83b381b36d1d92a1928756
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "In the first image (holding the knife handle, tip down) and the third image (knife at the waist ready to attack), is the visual style consistent, including the realism of shadows, textures, and overall aesthetics?",
+            "0_point_standard": "There are noticeable differences in visual style; changes in shadows, textures, or details disrupt the overall coherence.",
+            "1_point_standard": "Visual style is consistent across both images, with shadows, textures, and details providing a unified aesthetic effect."
+        },
+        {
+            "question": "In the second image (bending body to draw the sword) and the fourth image (horizontal defensive posture with the sword), is the character identity consistent, including facial features, body shape, and overall appearance?",
+            "0_point_standard": "The characters in the two images look different, with differences in facial features, body shape, or overall appearance making the samurai appear as different individuals.",
+            "1_point_standard": "The character identity is clearly consistent across both images, maintaining recognizable features and a coherent appearance."
+        },
+        {
+            "question": "In the first image (holding the knife handle, tip down) and the fourth image (horizontal defensive posture with the sword), is the design of traditional Japanese samurai armor consistent, with no changes in armor structure, details, or arrangement?",
+            "0_point_standard": "There are noticeable differences in armor structure, details, or arrangement between the two images.",
+            "1_point_standard": "The armor design is consistent, with no noticeable changes in structure, details, or arrangement across the two images."
+        },
+        {
+            "question": "In the second image (drawing the sword) and the third image (knife at the waist ready to attack), is the design of the samurai sword consistent, including shape, length, and handle details?",
+            "0_point_standard": "The samurai sword design differs between the two images, with noticeable differences in shape, length, or handle details.",
+            "1_point_standard": "The samurai sword design is consistent, with no significant changes in shape, length, or details across the two images."
+        },
+        {
+            "question": "Does each image accurately depict the specified action pose, particularly comparing the third image (knife at the waist ready to attack) and the fourth image (horizontal defensive posture with the sword)?",
+            "0_point_standard": "The pose fails to be accurately depicted or appears unclear, making the specified posture of the character ambiguous.",
+            "1_point_standard": "The pose is clearly presented, with the character's posture and the position of the sword accurately matching the described action."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0004/images.txt b/dataset/dynamic_character_design_pose_design_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/dynamic_character_design_pose_design_0004/instruction.txt b/dataset/dynamic_character_design_pose_design_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f491b035bad1a3600f734ba518b5081144b5e1a2
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a samurai warrior character in a realistic style. He is wearing traditional Japanese samurai armor, with a katana at his side. The first image shows him gripping the hilt of his katana, with the tip pointing down; the second image shows him bending forward, drawing the sword in a quick motion; the third image shows him holding the katana at waist level, ready to strike; the fourth image shows him holding the sword horizontally in front of him in a defensive stance. These images should have no background, with varied combat poses, and consistent armor and katana. Ensure the character ID remains consistent across all images, representing the same samurai.
\ No newline at end of file
diff --git a/dataset/dynamic_character_design_pose_design_0004/meta.json b/dataset/dynamic_character_design_pose_design_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b77384e116763b3c57dfbd7b5b06624915858160
--- /dev/null
+++ b/dataset/dynamic_character_design_pose_design_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "dynamic character pose design",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0015",
+    "output_image_count": 4,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0001/eval.json b/dataset/game_ui_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6145b4799fedbf81b017d6fb908e1a34cf726763
--- /dev/null
+++ b/dataset/game_ui_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the game UI meet the basic requirements of the textual description, accurately including the specified elements and functions?",
+            "0_point_standard": "The game UI does not match the textual description and fails to include the specified elements or functions.",
+            "1_point_standard": "The game UI fully complies with the textual description, accurately including all specified elements and functions."
+        },
+        {
+            "question": "Is the text in the game UI clear and easy to read, and does the font style and size comply with the specifications in the textual description?",
+            "0_point_standard": "The text is unclear, and the font style or size does not comply with the specifications, affecting readability.",
+            "1_point_standard": "The text is clear and easy to read, and the font style and size comply with the specifications in the textual description."
+        },
+        {
+            "question": "Does the generated UI follow the specific layout and design instructions provided in the textual description (e.g., button positions, menu arrangement)?",
+            "0_point_standard": "The UI does not follow the specific layout and design instructions in the text, with incorrect element positions or improper arrangement.",
+            "1_point_standard": "The UI strictly adheres to the specific layout and design instructions, with elements correctly positioned and well-arranged according to the text description."
+        },
+        {
+            "question": "Are the interactive elements (e.g., buttons, sliders, icons) clear and intuitively positioned, enhancing user interaction within the game UI?",
+            "0_point_standard": "Interactive elements are unclear or not intuitively positioned, making it difficult for users to effectively navigate the UI.",
+            "1_point_standard": "Interactive elements are clear and intuitively positioned, enhancing user interaction and ease of navigation."
+        },
+        {
+            "question": "Does the overall visual consistency and style of the game UI remain coherent, maintaining a logical flow and aesthetic that meets professional design standards?",
+            "0_point_standard": "The UI lacks visual consistency and style, with chaotic flow and insufficient aesthetic, not meeting professional standards.",
+            "1_point_standard": "The UI exhibits strong visual consistency and style, with clear logical flow and great aesthetics, meeting professional design standards."
+        },
+        {
+            "question": "Does the game UI possess strong visual impact and effectively engage users, providing an immersive and intuitive experience?",
+            "0_point_standard": "The UI lacks visual impact and fails to engage users, providing a non-immersive and unintuitive experience.",
+            "1_point_standard": "The UI possesses strong visual impact and effectively engages users, providing an immersive and intuitive experience."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0001/images.txt b/dataset/game_ui_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/game_ui_generation_0001/instruction.txt b/dataset/game_ui_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb60914c7e69e6ea2bb719e008858baabb91b898
--- /dev/null
+++ b/dataset/game_ui_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This is a game UI screenshot with a cartoon hand-drawn style, featuring bright and lively colors. The background depicts a rural scene with the main element being a red and white barn surrounded by green trees, shrubs, and some flowers. In the top right corner, there is a floating “Surprise” icon, showing a small nest with a stylized bee inside. In front of the barn, on the ground, there is a large yellow hexagonal honeycomb-shaped button with “PLAY” written on it. Below the button are some purple and pink flowers. In the top left corner, there is a countdown timer icon showing “3:00:00,” represented by an orange gear. At the top of the interface, there is an orange coin icon displaying a currency value of “9999,” next to a green fruit icon also showing a “9999” currency value. Next to these values are pink plus signs for adding more currency. In the central area, three small jars are hanging from green vines, labeled “Reward,” “Level,” and “Explore,” each containing cartoon images of rewards, medals, and exploration items.
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0001/meta.json b/dataset/game_ui_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5c1a787e9e95c43a35e905b9a43e24ceba2524b
--- /dev/null
+++ b/dataset/game_ui_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "game UI generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0022",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0002/eval.json b/dataset/game_ui_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217cf3dd37ef6bd1fefe42c420b8539a76604bd
--- /dev/null
+++ b/dataset/game_ui_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the game UI interface meet the basic requirements described in the text, accurately including the specified elements and functions?",
+            "0_point_standard": "The game UI interface does not match the text description and fails to include the specified elements or functions.",
+            "1_point_standard": "The game UI interface fully meets the text description, accurately including all specified elements and functions."
+        },
+        {
+            "question": "Is the text in the game UI clear and easy to read, and does the font style and size meet the specifications described in the text?",
+            "0_point_standard": "The text is unclear, and the font style or size does not meet the specifications, affecting readability.",
+            "1_point_standard": "The text is clear and easy to read, and the font style and size meet the specifications described in the text."
+        },
+        {
+            "question": "Does the generated UI follow the specific layout and design instructions provided in the text description (e.g., button positions, menu arrangement)?",
+            "0_point_standard": "The UI does not follow the specific layout and design instructions in the text, with incorrect element positions or improper arrangement.",
+            "1_point_standard": "The UI strictly follows the specific layout and design instructions, with correct element positions and well-arranged according to the text description."
+        },
+        {
+            "question": "Are the interactive elements (e.g., buttons, sliders, icons) clear and intuitively positioned, enhancing user interaction within the game UI?",
+            "0_point_standard": "The interactive elements are unclear or not intuitively positioned, making it difficult for users to effectively navigate the UI.",
+            "1_point_standard": "The interactive elements are clear and intuitively positioned, enhancing user interaction and ease of navigation."
+        },
+        {
+            "question": "Does the game UI maintain overall visual consistency and style, with a logical flow and aesthetic appeal that meets professional design standards?",
+            "0_point_standard": "The UI lacks visual consistency and style, has a chaotic flow, insufficient aesthetic appeal, and does not meet professional standards.",
+            "1_point_standard": "The UI exhibits strong visual consistency and style, with a clear logical flow and high aesthetic appeal that meets professional design standards."
+        },
+        {
+            "question": "Does the game UI have a strong visual impact and effectively attract users, providing an immersive and intuitive experience?",
+            "0_point_standard": "The UI lacks visual impact, fails to attract users, and provides a non-immersive and unintuitive experience.",
+            "1_point_standard": "The UI has a strong visual impact and effectively attracts users, providing an immersive and intuitive experience."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0002/images.txt b/dataset/game_ui_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/game_ui_generation_0002/instruction.txt b/dataset/game_ui_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34641582c897a9c2b7f14552a2aafec94e5256a8
--- /dev/null
+++ b/dataset/game_ui_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This is a game UI screenshot with a cartoon hand-drawn style. The overall color scheme is vibrant with bright oranges and greens. At the top of the interface is a navigation area, with a blue hexagonal arrow button on the left, indicating the back function. On the right are two currency counters, one displaying the amount of coins (9999) and the other showing the amount of green fruit-shaped currency (9999), with pink plus buttons next to both, indicating the option to purchase or increase currency. On the right side of each level module, there are “Get” buttons with exchange hints for 800 coins and 1200 green fruit. The main section of the interface shows five “Level Honor” progress bars, with a honeycomb-shaped hexagonal pattern as the background and a dripping honey design at the top. Each level's star ranking is represented by blue stars, ranging from 1 to 30 in sequence. Below each “Level Honor” is a progress bar, with blue star markers indicating the level numbers on the left, and orange and purple “Get” buttons on the right, signifying that players can redeem rewards by collecting a combination of coins and green fruits. For certain levels, glowing item hints appear on the right side of the progress bar, enticing players to click and claim rewards.
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0002/meta.json b/dataset/game_ui_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f42fa5bcb66684cea8a60f610bce224c619b2feb
--- /dev/null
+++ b/dataset/game_ui_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "game UI generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0022",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0003/eval.json b/dataset/game_ui_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f7b209cd40c0c8acece616305f022536aab9dc79
--- /dev/null
+++ b/dataset/game_ui_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the game UI interface meet the basic requirements of the text description, accurately including the specified elements and functions?",
+            "0_point_standard": "The game UI interface does not match the text description and fails to include the specified elements or functions.",
+            "1_point_standard": "The game UI interface fully complies with the text description, accurately including all specified elements and functions."
+        },
+        {
+            "question": "Is the text in the game UI clear and readable, and do the font style and size comply with the specifications in the text description?",
+            "0_point_standard": "The text is unclear, and the font style or size does not meet the specifications, affecting readability.",
+            "1_point_standard": "The text is clear and readable, and the font style and size comply with the specifications in the text description."
+        },
+        {
+            "question": "Does the generated UI follow the specific layout and design instructions provided in the text description (e.g., button positions, menu arrangement)?",
+            "0_point_standard": "The UI does not follow the specific layout and design instructions in the text, with incorrect element positions or improper arrangement.",
+            "1_point_standard": "The UI strictly follows the specific layout and design instructions, with correct element positions and well-arranged according to the text description."
+        },
+        {
+            "question": "Are the interactive elements (e.g., buttons, sliders, icons) clear and intuitively positioned, enhancing user interaction in the game UI?",
+            "0_point_standard": "The interactive elements are unclear or not intuitively positioned, making it difficult for users to effectively navigate the UI.",
+            "1_point_standard": "The interactive elements are clear and intuitively positioned, enhancing user interaction and ease of navigation."
+        },
+        {
+            "question": "Does the game UI maintain overall visual consistency and style, adhering to a logical flow and aesthetic that meets professional design standards?",
+            "0_point_standard": "The UI lacks visual consistency and style, with a chaotic flow and insufficient aesthetics, failing to meet professional standards.",
+            "1_point_standard": "The UI exhibits strong visual consistency and style, with a clear logical flow and high aesthetic appeal, meeting professional design standards."
+        },
+        {
+            "question": "Does the game UI have a strong visual impact and effectively attract users, providing an immersive and intuitive experience?",
+            "0_point_standard": "The UI lacks visual impact, fails to attract users, and provides a non-immersive and unintuitive experience.",
+            "1_point_standard": "The UI has a strong visual impact and effectively attracts users, providing an immersive and intuitive experience."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0003/images.txt b/dataset/game_ui_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/game_ui_generation_0003/instruction.txt b/dataset/game_ui_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7444f8fef0a5768419e580782aa989c66ba20a92
--- /dev/null
+++ b/dataset/game_ui_generation_0003/instruction.txt
@@ -0,0 +1 @@
+This game UI image shows a reward settlement screen, designed in a cartoon style. The background has a soft green gradient with curly vines and leaf patterns. At the top, the player's level is displayed as “Level 1,” and to the right, a currency display area shows the player's current balance of 9999 coins and 9999 green fruit icons. In the center, a honey-dripping banner displays the word “Congratulations” in yellow with orange shadows, emphasizing the rewarding nature of the screen. Below the banner is a green leaf-shaped bowl, filled with glowing gold coins featuring a leaf symbol, representing a reward of 200 coins, indicated in red “200.” On the left side, a cartoon bee character points toward the coins, signifying the reward for the player. At the bottom, there are two pink buttons; the left button has a white house icon indicating the return to the home screen, while the right button shows the word “Next,” prompting the player to proceed to the next level. In the bottom right corner, various game tool icons are displayed.
\ No newline at end of file
diff --git a/dataset/game_ui_generation_0003/meta.json b/dataset/game_ui_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..54824d79917f9f7e4b04c55c091230d7d16a6736
--- /dev/null
+++ b/dataset/game_ui_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "game UI generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0022",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/historical_narrative_generation_0001/eval.json b/dataset/historical_narrative_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..955bf9580296eeedba1860b5c7c3b3c223fe57da
--- /dev/null
+++ b/dataset/historical_narrative_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Temporal logic of events: Does the sequence of images logically present historical events in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks a logical flow, failing to illustrate the progression of historical events.",
+            "1_point_standard": "The sequence of images clearly presents historical events in a logical chronological order."
+        },
+        {
+            "question": "Consistency with text description: Does the image content accurately match the historical events specified in the text description?",
+            "0_point_standard": "The image content fails to accurately reflect the historical events in the text description, showing obvious deviations or omissions.",
+            "1_point_standard": "The image content fully matches the text description, accurately depicting the specified historical facts."
+        },
+        {
+            "question": "Consistency of image style: Is the style and overall visual effect of the image sequence consistent?",
+            "0_point_standard": "The image style is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a coherent visual effect."
+        },
+        {
+            "question": "Object/Character ID consistency: Does the generated image sequence maintain consistency in the identification of objects or characters (e.g., the same person or object)?",
+            "0_point_standard": "The main subjects or characters are inconsistent across different images, making it difficult to identify them as the same person or object.",
+            "1_point_standard": "The main subjects or characters maintain consistency and can be clearly identified as the same person or object throughout the sequence."
+        },
+        {
+            "question": "Logical accuracy of historical reproduction: Is the reproduction of historical events reasonable and logically accurate based on known historical facts?",
+            "0_point_standard": "The reproduction of historical events is illogical or inconsistent with facts, with obvious errors or unrealistic descriptions.",
+            "1_point_standard": "The reproduction of historical events is reasonable, logical, and accurately reflects known historical facts."
+        },
+        {
+            "question": "Detail and aesthetics of images: Do the details and aesthetics of the images meet professional standards and have visual appeal?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The images have rich details, excellent aesthetics, meet professional standards, and have visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/historical_narrative_generation_0001/images.txt b/dataset/historical_narrative_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/historical_narrative_generation_0001/instruction.txt b/dataset/historical_narrative_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26a92710bc44dae052b630f44aa482a1c9d58d94
--- /dev/null
+++ b/dataset/historical_narrative_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting major historical events of Ancient Greece in the 5th century BCE. The first image shows the Battle of Marathon during the Greco-Persian Wars, where Greek soldiers are fiercely resisting the Persian army; the second image shows the democratic reforms in Athens, with Athenian citizens voting in the public square, and the grand Parthenon in the background; the third image shows the Peloponnesian War, with Spartan and Athenian soldiers clashing on the battlefield; the fourth image shows philosopher Socrates standing in an Athenian court, delivering a speech during his trial. All images must maintain a consistent style, reflecting the historical atmosphere of this period.
\ No newline at end of file
diff --git a/dataset/historical_narrative_generation_0001/meta.json b/dataset/historical_narrative_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..caadcaabd74686e06b2a4f98819af262ba97081c
--- /dev/null
+++ b/dataset/historical_narrative_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "historical narrative generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0012",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/historical_narrative_generation_0002/eval.json b/dataset/historical_narrative_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d98b9f9f2c2a46be1e4f78bb5a945f8f67b3b35
--- /dev/null
+++ b/dataset/historical_narrative_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Temporal Logic of Events: Does the sequence of images logically present historical events in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order, lacks logical flow, and fails to illustrate the progression of historical events.",
+            "1_point_standard": "The sequence of images clearly presents historical events in chronological, logical order."
+        },
+        {
+            "question": "Consistency with Text Description: Does the content of the images accurately match the historical events specified in the text description?",
+            "0_point_standard": "The content of the images fails to accurately reflect the historical events in the text description, with significant deviations or omissions.",
+            "1_point_standard": "The content of the images perfectly matches the text description, accurately depicting the specified historical facts."
+        },
+        {
+            "question": "Consistency of Image Style: Is the style and overall visual effect of the image sequence consistent?",
+            "0_point_standard": "The image styles are inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a coherent visual effect."
+        },
+        {
+            "question": "Consistency of Object/Character ID: Does the generated image sequence maintain consistent IDs for the same objects or characters (e.g., the same person or object)?",
+            "0_point_standard": "The main subjects or characters are inconsistent across different images, making it difficult to identify them as the same person or object.",
+            "1_point_standard": "The main subjects or characters remain consistent and can be clearly identified as the same person or object throughout the sequence."
+        },
+        {
+            "question": "Logical Accuracy of Historical Representation: Is the representation of historical events reasonable and logically accurate according to known historical facts?",
+            "0_point_standard": "The representation of historical events is illogical or inconsistent with facts, with significant errors or unrealistic descriptions.",
+            "1_point_standard": "The representation of historical events is reasonable, logical, and accurately reflects known historical facts."
+        },
+        {
+            "question": "Detail and Aesthetic Quality of Images: Do the details and aesthetic quality of the images meet professional standards and have visual appeal?",
+            "0_point_standard": "The images lack detail, have poor aesthetic quality, and do not meet visual standards.",
+            "1_point_standard": "The images are richly detailed, have excellent aesthetic quality, meet professional standards, and are visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/historical_narrative_generation_0002/images.txt b/dataset/historical_narrative_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/historical_narrative_generation_0002/instruction.txt b/dataset/historical_narrative_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c74d0ce065b75f9b8153759aef714ff75a62d98c
--- /dev/null
+++ b/dataset/historical_narrative_generation_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting key historical events of the Roman Republic from the 2nd century BCE to the 1st century BCE. The first image shows the Battle of Carthage during the Punic Wars, with Roman forces clashing with Hannibal's army; the second image shows the Gracchi brothers' land reform movement, with Roman citizens gathered to support the reforms; the third image shows Julius Caesar's conquest of Gaul, with Roman legions crossing a vast Gallic plain; the fourth image shows Caesar's assassination, as senators conspire in the Roman Forum. All images must maintain a consistent style, representing the visual characteristics of ancient Rome.
\ No newline at end of file
diff --git a/dataset/historical_narrative_generation_0002/meta.json b/dataset/historical_narrative_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1eb1ab5ef57a609d753de4ecb8a65c6043895799
--- /dev/null
+++ b/dataset/historical_narrative_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "historical narrative generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0012",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_age_transformation_0002/eval.json b/dataset/human_attribute_editing_age_transformation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..103faac58f6eec226570a14c59ade50362b0b963
--- /dev/null
+++ b/dataset/human_attribute_editing_age_transformation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the age-transformed image retain the essential facial features of the original portrait subject?",
+            "0_point_standard": "The facial features in the transformed image change significantly, making it unrecognizable.",
+            "1_point_standard": "The transformed image retains the facial features of the subject, making it easily recognizable."
+        },
+        {
+            "question": "Does the age transformation modify the subject's age as specified in the task description (e.g., making them look older or younger)?",
+            "0_point_standard": "The age transformation does not reflect the age modification specified in the task description.",
+            "1_point_standard": "The age transformation accurately reflects the specified age change, making the subject appear older or younger as directed."
+        },
+        {
+            "question": "If the task involves partial modification, do the features unrelated to age (such as background and clothing) remain unchanged in the transformed image?",
+            "0_point_standard": "There are noticeable changes in features unrelated to age, such as alterations in background or clothing.",
+            "1_point_standard": "Features unrelated to age remain unchanged, ensuring that the focus is solely on age transformation."
+        },
+        {
+            "question": "Does the transformed image maintain the consistent style and quality of the original image, ensuring no degradation in image quality?",
+            "0_point_standard": "The style and quality of the transformed image are inconsistent with the original, with a noticeable decline in image quality.",
+            "1_point_standard": "The transformed image maintains the style and quality of the original, preserving high image quality."
+        },
+        {
+            "question": "Is the depiction of age transformation realistic, with appropriate attention to details such as skin texture, hair changes, and facial features?",
+            "0_point_standard": "The age transformation looks unrealistic, with poor handling of details like skin texture, hair, or facial features.",
+            "1_point_standard": "The age transformation looks realistic, with well-handled details in skin texture, hair, and facial features."
+        },
+        {
+            "question": "Does the transformed image exhibit overall aesthetic appeal, being visually attractive and meeting professional presentation standards, fulfilling aesthetic expectations?",
+            "0_point_standard": "The transformed image lacks aesthetic appeal and does not meet professional presentation standards.",
+            "1_point_standard": "The transformed image exhibits strong aesthetic appeal, professionalism, and visual attractiveness."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_age_transformation_0002/images.txt b/dataset/human_attribute_editing_age_transformation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbb8f2a4e89b17cd6465f3901a86ae08c76d8f07
--- /dev/null
+++ b/dataset/human_attribute_editing_age_transformation_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01fxjKkE1yx0U7CAVJx_!!6000000006644-0-tps-800-1199.jpg
diff --git a/dataset/human_attribute_editing_age_transformation_0002/instruction.txt b/dataset/human_attribute_editing_age_transformation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4d781dc581760d1f8fd927f658c9d48a5282ad6
--- /dev/null
+++ b/dataset/human_attribute_editing_age_transformation_0002/instruction.txt
@@ -0,0 +1 @@
+Transform this image of the young girl into her as an adult, keeping key facial features such as the shape of her eyes, nose, and mouth, but showing age-related changes like more defined facial contours and possibly a different hairstyle.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_age_transformation_0002/meta.json b/dataset/human_attribute_editing_age_transformation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2cf8c5a0c4b192f9f1ce47266334f0d011ef43a7
--- /dev/null
+++ b/dataset/human_attribute_editing_age_transformation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human age transformation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0077",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_body_painting_0001/eval.json b/dataset/human_attribute_editing_body_painting_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..b0fa20d55d66765012b3f2b211e901061bd4c758
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the body paint pattern in the generated image accurately applied to the same body part as described in the text?",
+            "0_point_standard": "The body paint is incorrectly positioned or significantly deviates from the specified area, such as applying the pattern to an unexpected part of the body.",
+            "1_point_standard": "The body paint is applied to the specified body part according to the description, such as the face, shoulders, or torso, without notable deviation."
+        },
+        {
+            "question": "Does the generated image retain the identity and key visual features of the person based on the input reference image?",
+            "0_point_standard": "The output image does not resemble the person in the reference image, with significant differences in facial features, pose, or body structure.",
+            "1_point_standard": "The output image closely resembles the person in the reference image, accurately reflecting facial features, pose, and body structure."
+        },
+        {
+            "question": "Does the generated body paint match the design, style, and level of detail specified in the text description?",
+            "0_point_standard": "There are significant differences in the style, level of detail, or design of the body paint from the specified description, lacking the expected visual effect.",
+            "1_point_standard": "The body paint closely follows the specified design, style, and level of detail, accurately reflecting the expected visual effect."
+        },
+        {
+            "question": "In the generated image, are elements other than the specified body paint unchanged?",
+            "0_point_standard": "Elements unrelated to the body paint (such as the background, clothing, or other visible parts of the body) have been altered or modified, affecting the image's consistency.",
+            "1_point_standard": "All other elements aside from the body paint (such as the background, clothing, or other visible parts of the body) remain unchanged, maintaining logical consistency."
+        },
+        {
+            "question": "Does the body paint seamlessly blend with the person's skin, with natural contours and shadows that match the body's form?",
+            "0_point_standard": "The body paint appears artificially applied, lacking natural blending with the skin's contours, or has issues with shadows and perspective.",
+            "1_point_standard": "The body paint seamlessly blends with the skin, following natural contours and shadows, presenting a realistic and harmonious appearance."
+        },
+        {
+            "question": "Does the overall image have high aesthetic quality, with clear details, smooth lines, and balanced composition?",
+            "0_point_standard": "The image has noticeable defects, such as rough lines, blurriness, or unbalanced composition, impacting its visual appeal.",
+            "1_point_standard": "The image is aesthetically pleasing, with clear details, smooth lines, and balanced composition, creating a high-quality visual output."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_body_painting_0001/images.txt b/dataset/human_attribute_editing_body_painting_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38056543558903a75914e4e0a4859d55c1949b21
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0001/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i2/O1CN01FEjF4L1zQmEEVIYzt_!!6000000006709-0-tps-2692-3586.jpg
+https://img.alicdn.com/imgextra/i4/O1CN01Wsd5lf1hVfNCrbFDg_!!6000000004283-0-tps-577-864.jpg
diff --git a/dataset/human_attribute_editing_body_painting_0001/instruction.txt b/dataset/human_attribute_editing_body_painting_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6709a288bbedb200129d1bcf63da833cf57f631
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0001/instruction.txt
@@ -0,0 +1 @@
+Apply the floral pattern from the reference image onto the face in this black-and-white photo. Ensure that the floral design overlays the face with the same color, style, and positioning as seen in the reference. All other elements of the black-and-white photo, including the expression, pose, grayscale tone, and lighting, should remain unchanged. The final image should have the floral pattern blending seamlessly with the face, while maintaining the integrity and monochromatic style of the original portrait.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_body_painting_0001/meta.json b/dataset/human_attribute_editing_body_painting_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e2f824d9dc54525af2f9e6cd0b9f52264d088f66
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "body_painting",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0078",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_body_painting_0003/eval.json b/dataset/human_attribute_editing_body_painting_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c75fbcd5baf53d73031a0bfc690d82ded84e3f3e
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the body paint pattern accurately applied to the same body part as described in the text in the generated image?",
+            "0_point_standard": "The body paint is misplaced or significantly deviates from the specified area, such as applying the pattern to an unexpected part of the body.",
+            "1_point_standard": "The body paint is applied to the specified body part according to the description, such as the face, shoulder, or torso, without significant deviation."
+        },
+        {
+            "question": "Does the generated image retain the identity and key visual features of the person based on the input reference image?",
+            "0_point_standard": "The output image does not resemble the person in the reference image, with major differences in facial features, posture, or body structure.",
+            "1_point_standard": "The output image closely resembles the person in the reference image, accurately reflecting facial features, posture, and body structure."
+        },
+        {
+            "question": "Does the generated body paint match the design, style, and level of detail specified in the text description?",
+            "0_point_standard": "There are significant differences in the style, level of detail, or design of the body paint compared to the specified description, lacking the expected visual effect.",
+            "1_point_standard": "The body paint closely follows the specified design, style, and level of detail, accurately reflecting the intended visual effect."
+        },
+        {
+            "question": "In the generated image, do elements other than the specified body paint remain unchanged?",
+            "0_point_standard": "Elements unrelated to the body paint, such as the background, clothing, or other visible body parts, are altered or modified, affecting image consistency.",
+            "1_point_standard": "All other elements besides the body paint, such as the background, clothing, or other visible body parts, remain unchanged, maintaining logical consistency."
+        },
+        {
+            "question": "Does the body paint seamlessly blend with the person's skin, with natural contours and shadows matching the body form?",
+            "0_point_standard": "The body paint looks artificially applied, lacking natural blending with skin contours, or has issues with shadows and perspective.",
+            "1_point_standard": "The body paint seamlessly blends with the skin, following natural contours and shadows, presenting a realistic and harmonious appearance."
+        },
+        {
+            "question": "Does the overall image have high aesthetic quality, with clear details, smooth lines, and balanced composition?",
+            "0_point_standard": "The image has noticeable defects such as rough lines, blurriness, or an unbalanced composition, affecting visual appeal.",
+            "1_point_standard": "The image is aesthetically pleasing, with clear details, smooth lines, and balanced composition, creating a high-quality visual output."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_body_painting_0003/images.txt b/dataset/human_attribute_editing_body_painting_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9210fd0e1c11b8d36c92552ba1a746556e5f2443
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0003/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i3/O1CN015skwQ21jo6Y9ZgeUl_!!6000000004594-0-tps-2333-3500.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01Me3Wzw1o3SHn6GAl6_!!6000000005169-0-tps-720-1080.jpg
diff --git a/dataset/human_attribute_editing_body_painting_0003/instruction.txt b/dataset/human_attribute_editing_body_painting_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..012a57bf8e7db03b65ef73c48842a9aa701863cc
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0003/instruction.txt
@@ -0,0 +1 @@
+Apply the intricate white body paint patterns from the reference image onto the face and upper body of the person in this photo. The patterns should be transferred with the same design, shape, and level of detail as seen in the reference, covering similar areas on the face and shoulders. Ensure that all other elements of the original photo—such as the person’s expression, pose, lighting, and natural skin tone—remain unchanged. The final image should blend the patterns seamlessly onto the skin, while preserving the authenticity and style of the original photograph.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_body_painting_0003/meta.json b/dataset/human_attribute_editing_body_painting_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..bfa6cad339e7c79129acdf316af4f8e500310954
--- /dev/null
+++ b/dataset/human_attribute_editing_body_painting_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "body_painting",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0078",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0001/eval.json b/dataset/human_attribute_editing_clothes_transformation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..81df01adafa220e84ac5497722015f09d41c60a8
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately preserve the unchanged elements of the original portrait photo, ensuring only the specified clothing parts are altered?",
+            "0_point_standard": "Image elements other than the specified clothing parts have been altered.",
+            "1_point_standard": "Only the specified clothing parts are altered, and all other elements of the image remain unchanged."
+        },
+        {
+            "question": "Does the transformed clothing in the image maintain consistency with the original content, style, and identity of the person in the reference photo?",
+            "0_point_standard": "The transformed clothing disrupts the overall style or alters the identity of the person in the reference photo.",
+            "1_point_standard": "The transformed clothing maintains consistency with the person's original content, style, and identity."
+        },
+        {
+            "question": "Does the modified clothing in the image align with the specific details and requirements listed in the text description?",
+            "0_point_standard": "The clothing modification does not accurately reflect the details or requirements specified in the text description.",
+            "1_point_standard": "The clothing modification closely matches the specific details and requirements listed in the text description."
+        },
+        {
+            "question": "Does the modified clothing naturally fit the person's body and posture, ensuring a seamless and realistic integration?",
+            "0_point_standard": "The modified clothing does not naturally fit the person's body or posture, appearing misaligned, awkward, or unrealistic.",
+            "1_point_standard": "The modified clothing seamlessly fits the person's body and posture, creating a realistic and natural overall appearance."
+        },
+        {
+            "question": "Does the transformed clothing exhibit high-quality visual elements, such as realistic textures, shadows, and folds, to enhance the overall realism of the image?",
+            "0_point_standard": "The transformed clothing lacks realistic textures or shadows, resulting in a stiff or unnatural appearance.",
+            "1_point_standard": "The transformed clothing exhibits high-quality visual elements, with realistic textures, shadows, and folds."
+        },
+        {
+            "question": "Does the final image possess strong aesthetic appeal, maintaining visual harmony and balance while meeting professional standards of image quality?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor visual harmony or balance.",
+            "1_point_standard": "The image possesses strong aesthetic appeal, maintaining visual harmony and balance, meeting professional standards of image quality."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0001/images.txt b/dataset/human_attribute_editing_clothes_transformation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..363d09294aaea1bb2f97bc1562c7843d0c047a62
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01gWritl1MdoUGl2wxr_!!6000000001458-0-tps-1363-2048.jpg
diff --git a/dataset/human_attribute_editing_clothes_transformation_0001/instruction.txt b/dataset/human_attribute_editing_clothes_transformation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d780c1123051a33014ede719a9fa3d6c967ac328
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0001/instruction.txt
@@ -0,0 +1 @@
+Please change the color of her dress in the image to a pink checkered pattern, keeping the background and all other details of the person unchanged, only modifying the color and style of the dress.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0001/meta.json b/dataset/human_attribute_editing_clothes_transformation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..29c0213db40e0ac61d3ee08eec2e561c8ad97d8a
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human clothes transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0075",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0002/eval.json b/dataset/human_attribute_editing_clothes_transformation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5989c7b0e2a120eddf69437acc22e4e9e7d3a57a
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately retain the unchanged elements of the original portrait photo, ensuring that only the specified clothing area has been modified?",
+            "0_point_standard": "Elements of the image other than the specified clothing area have been changed.",
+            "1_point_standard": "Only the specified clothing area has been modified, and all other elements of the image remain unchanged."
+        },
+        {
+            "question": "Does the transformed clothing in the image maintain consistency with the original content, style, and identity of the person in the reference photo?",
+            "0_point_standard": "The transformed clothing disrupts the overall style or alters the identity of the person in the reference photo.",
+            "1_point_standard": "The transformed clothing is consistent with the original content, style, and identity of the person."
+        },
+        {
+            "question": "Does the modified clothing in the image match the specific details and requirements listed in the textual description?",
+            "0_point_standard": "The clothing modification does not accurately reflect the specific details or requirements specified in the textual description.",
+            "1_point_standard": "The clothing modification closely matches the specific details and requirements listed in the textual description."
+        },
+        {
+            "question": "Does the modified clothing fit naturally with the person's body and posture, ensuring a seamless and realistic integration?",
+            "0_point_standard": "The modified clothing does not fit naturally with the person's body or posture, appearing misaligned, awkward, or unrealistic.",
+            "1_point_standard": "The modified clothing seamlessly fits with the person's body and posture, creating a realistic and natural overall appearance."
+        },
+        {
+            "question": "Does the transformed clothing exhibit high-quality visual elements, such as realistic textures, shadows, and folds, to enhance the overall realism of the image?",
+            "0_point_standard": "The transformed clothing lacks realistic textures or shadows, resulting in a stiff or unnatural appearance.",
+            "1_point_standard": "The transformed clothing exhibits high-quality visual elements with realistic textures, shadows, and folds."
+        },
+        {
+            "question": "Does the final image possess strong aesthetic appeal, maintaining visual harmony and balance while meeting professional standards of image quality?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor visual harmony or balance.",
+            "1_point_standard": "The image possesses strong aesthetic appeal, maintaining visual harmony and balance, and meets professional standards of image quality."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0002/images.txt b/dataset/human_attribute_editing_clothes_transformation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39e5f20d78a255d9ab8d40c9bfd8cbe81bf13aac
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01Jf8JAt1SbsVUoRJzs_!!6000000002266-0-tps-4000-5688.jpg
diff --git a/dataset/human_attribute_editing_clothes_transformation_0002/instruction.txt b/dataset/human_attribute_editing_clothes_transformation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da1e94b9042e6e1ac345a396616d183175059ded
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0002/instruction.txt
@@ -0,0 +1 @@
+Please change the shirt he is wearing in the image to a gray round-neck T-shirt, keeping the background and all other details of the person unchanged, only modifying the style of the shirt.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0002/meta.json b/dataset/human_attribute_editing_clothes_transformation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ef62349f8be8204cae5259cbab664981df358b6
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human clothes transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0075",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0003/eval.json b/dataset/human_attribute_editing_clothes_transformation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..1600a6f9da6639ca12a0f104b15d1bb411778c61
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately preserve the unchanged elements of the original portrait photo, ensuring that only the specified clothing parts have been modified?",
+            "0_point_standard": "Elements of the image other than the specified clothing parts have been altered.",
+            "1_point_standard": "Only the specified clothing parts have been modified, and all other elements of the image remain unchanged."
+        },
+        {
+            "question": "Does the transformed clothing in the image maintain consistency with the original content, style, and identity of the person in the reference photo?",
+            "0_point_standard": "The transformed clothing disrupts the overall style or alters the identity of the person in the reference photo.",
+            "1_point_standard": "The transformed clothing is consistent with the original content, style, and identity of the person."
+        },
+        {
+            "question": "Does the modified clothing in the image match the specific details and requirements listed in the text description?",
+            "0_point_standard": "The clothing modification does not accurately reflect the details or requirements specified in the text description.",
+            "1_point_standard": "The clothing modification closely matches the specific details and requirements listed in the text description."
+        },
+        {
+            "question": "Does the modified clothing naturally fit the person's body and posture, ensuring a seamless and realistic integration?",
+            "0_point_standard": "The modified clothing does not naturally fit the person's body or posture, appearing misaligned, awkward, or unrealistic.",
+            "1_point_standard": "The modified clothing seamlessly fits the person's body and posture, creating a realistic and natural overall appearance."
+        },
+        {
+            "question": "Does the transformed clothing exhibit high-quality visual elements, such as realistic textures, shading, and folds, to enhance the overall realism of the image?",
+            "0_point_standard": "The transformed clothing lacks realistic textures or shading, resulting in a stiff or unnatural appearance.",
+            "1_point_standard": "The transformed clothing exhibits high-quality visual elements, featuring realistic textures, shading, and folds."
+        },
+        {
+            "question": "Does the final image possess a strong aesthetic appeal while maintaining visual harmony and balance, meeting professional standards of image quality?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor visual harmony or balance.",
+            "1_point_standard": "The image possesses strong aesthetic appeal, maintaining visual harmony and balance, and meets professional standards of image quality."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0003/images.txt b/dataset/human_attribute_editing_clothes_transformation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e68403577396695b339c9097dbfa60c74293a2b0
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0003/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01ewB0tl1qYiYPPmIyN_!!6000000005508-0-tps-3150-5183.jpg
diff --git a/dataset/human_attribute_editing_clothes_transformation_0003/instruction.txt b/dataset/human_attribute_editing_clothes_transformation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faddd7528764b166bacf060a759ad4e6c465b93f
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0003/instruction.txt
@@ -0,0 +1 @@
+Please change her denim shorts in the image to long denim pants, keeping the background and all other details of the person unchanged, only modifying the style of the pants.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0003/meta.json b/dataset/human_attribute_editing_clothes_transformation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..97f6d6ab029f1decab815fd1032b37b28127ddb6
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human clothes transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0075",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0004/eval.json b/dataset/human_attribute_editing_clothes_transformation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5747d12829f1ba6c84bfbd31ab88845ab33dcbf3
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately preserve unchanged elements from the original portrait photo, ensuring only the specified clothing parts are modified?",
+            "0_point_standard": "Elements of the image other than the specified clothing parts have been changed.",
+            "1_point_standard": "Only the specified clothing parts are modified, all other elements of the image remain unchanged."
+        },
+        {
+            "question": "Does the transformed clothing in the image maintain consistent relevance with the original content, style, and identity of the person in the reference photo?",
+            "0_point_standard": "The transformed clothing disrupts the overall style or changes the identity of the person in the reference photo.",
+            "1_point_standard": "The transformed clothing is consistent with the original content, style, and identity of the person."
+        },
+        {
+            "question": "Does the modified clothing in the image align with the specific details and requirements listed in the text description?",
+            "0_point_standard": "The clothing modification does not accurately reflect the details or requirements specified in the text description.",
+            "1_point_standard": "The clothing modification closely matches the specific details and requirements listed in the text description."
+        },
+        {
+            "question": "Does the modified clothing naturally fit with the person's body and pose, ensuring a seamless and realistic integration?",
+            "0_point_standard": "The modified clothing does not naturally fit with the person's body or pose, appearing misaligned, awkward, or unrealistic.",
+            "1_point_standard": "The modified clothing seamlessly fits with the person's body and pose, creating a realistic and natural overall appearance."
+        },
+        {
+            "question": "Does the transformed clothing exhibit high-quality visual elements, such as realistic textures, shadows, and folds, to enhance the overall realism of the image?",
+            "0_point_standard": "The transformed clothing lacks realistic textures or shadows, resulting in a stiff or unnatural appearance.",
+            "1_point_standard": "The transformed clothing exhibits high-quality visual elements with realistic textures, shadows, and folds."
+        },
+        {
+            "question": "Does the final image possess a strong aesthetic appeal, maintaining visual harmony and balance while meeting professional standards of image quality?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor visual harmony or balance.",
+            "1_point_standard": "The image possesses strong aesthetic appeal, maintaining visual harmony and balance, and meets professional standards of image quality."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0004/images.txt b/dataset/human_attribute_editing_clothes_transformation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2eb7a450d60caf2d036208fd80cea66f40bb73e9
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0004/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01pjv55S1VHDUMFlruE_!!6000000002627-0-tps-3347-5020.jpg
diff --git a/dataset/human_attribute_editing_clothes_transformation_0004/instruction.txt b/dataset/human_attribute_editing_clothes_transformation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99ec47b5297cfdd99265f210d1022b6cfab29034
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0004/instruction.txt
@@ -0,0 +1 @@
+Please change her dress in the image to a sports outfit, keeping the background and all other details of the person unchanged, only modifying the style of the outfit.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_clothes_transformation_0004/meta.json b/dataset/human_attribute_editing_clothes_transformation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b350a9e0bccd3b3e05f6805ea87eb761edd38b5
--- /dev/null
+++ b/dataset/human_attribute_editing_clothes_transformation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human clothes transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0075",
+    "output_image_count": 1,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_hairstyle_transformation_0001/eval.json b/dataset/human_attribute_editing_hairstyle_transformation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..90e267875f1fa45e273afb1af942485861731dbf
--- /dev/null
+++ b/dataset/human_attribute_editing_hairstyle_transformation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image retain the original facial features and expressions of the character, ensuring no unexpected changes to the face?",
+            "0_point_standard": "The modified image shows changes or distortions in facial features or expressions.",
+            "1_point_standard": "The facial features and expressions are consistent with the original image, with no unexpected changes."
+        },
+        {
+            "question": "Aside from the hairstyle, does the rest of the image remain unchanged and consistent with the original portrait?",
+            "0_point_standard": "Apart from the hairstyle, there are noticeable changes or distortions in other parts of the image.",
+            "1_point_standard": "The rest of the image remains unchanged, with only the hairstyle being modified."
+        },
+        {
+            "question": "Does the new hairstyle accurately reflect the specifications and style described in the text input?",
+            "0_point_standard": "The hairstyle does not match the style or specifications provided in the text description.",
+            "1_point_standard": "The hairstyle accurately matches the style and specifications outlined in the text input."
+        },
+        {
+            "question": "Is the modified hairstyle correctly aligned and positioned on the character's head, with no misalignment or awkward placement?",
+            "0_point_standard": "The modified hairstyle is misaligned or awkwardly positioned on the character's head, appearing unnatural or out of place.",
+            "1_point_standard": "The modified hairstyle is accurately aligned and positioned, fitting naturally on the character's head with no apparent misalignment."
+        },
+        {
+            "question": "Does the modified hairstyle maintain a natural appearance with appropriate texture and volume that matches the original hair quality?",
+            "0_point_standard": "The modified hairstyle looks fake, with poor texture or volume that is inconsistent with the original hair quality.",
+            "1_point_standard": "The modified hairstyle looks natural, with texture and volume matching the original hair."
+        },
+        {
+            "question": "Does the overall image have high aesthetic appeal, and does the modified hairstyle enhance the character's appearance in a visually pleasing way?",
+            "0_point_standard": "The modified hairstyle reduces the overall aesthetic appeal of the image.",
+            "1_point_standard": "The modified hairstyle enhances the character's appearance, making the image more visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_hairstyle_transformation_0001/images.txt b/dataset/human_attribute_editing_hairstyle_transformation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52120df6702535d4205749551b4f3a370443f54b
--- /dev/null
+++ b/dataset/human_attribute_editing_hairstyle_transformation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01wkOe2x1p6UWptAJS3_!!6000000005311-0-tps-4480-6720.jpg
diff --git a/dataset/human_attribute_editing_hairstyle_transformation_0001/instruction.txt b/dataset/human_attribute_editing_hairstyle_transformation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0bd77495cfaa12ce7eb57f2896868869c07a591
--- /dev/null
+++ b/dataset/human_attribute_editing_hairstyle_transformation_0001/instruction.txt
@@ -0,0 +1 @@
+Please modify the hairstyle in this image by changing the woman's straight hair into an elegant low bun, with the front bangs left hanging naturally. The rest of the hair should be styled into a simple, neat low bun at the back. Keep all other details, such as clothing and background, unchanged. The goal is to generate a new image that reflects the updated hairstyle, which should look sophisticated and graceful.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_hairstyle_transformation_0001/meta.json b/dataset/human_attribute_editing_hairstyle_transformation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..3d30498b6a479d37583a4b34460bde3f525c3542
--- /dev/null
+++ b/dataset/human_attribute_editing_hairstyle_transformation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human hairstyle transformation",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0080",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0001/eval.json b/dataset/human_attribute_editing_pose_transformation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9baa59b150116f9dc2442bbb49e23c090dd42b8a
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the edited portrait accurately reflect the specified pose changes while retaining the subject's natural anatomy and proportions?",
+            "0_point_standard": "The pose changes look unnatural or distort the subject's anatomy and proportions, reducing realism.",
+            "1_point_standard": "The pose changes are naturally executed, retaining the subject's anatomy and proportions, achieving a realistic adjustment."
+        },
+        {
+            "question": "Are the areas of the image not related to the pose change kept unchanged, retaining their original appearance and details?",
+            "0_point_standard": "The areas of the image not related to the pose change have been altered or show noticeable changes.",
+            "1_point_standard": "The areas of the image not related to the pose change remain unchanged, retaining their original appearance and details."
+        },
+        {
+            "question": "Does the edited image maintain the original content and style of the input image, ensuring consistency of identity and environment?",
+            "0_point_standard": "The content and style of the edited image significantly deviate from the input image, affecting identity recognition or environmental consistency.",
+            "1_point_standard": "The content and style of the edited image are consistent with the input image, preserving identity recognition and environmental context."
+        },
+        {
+            "question": "Are the limbs and body parts in the edited pose placed in a naturally realistic way, consistent with human anatomy?",
+            "0_point_standard": "The limbs or body parts in the new pose look awkward, improperly positioned, or inconsistent with human anatomy, affecting overall realism.",
+            "1_point_standard": "The placement of limbs and body parts is naturally realistic, consistent with human anatomy, enhancing the credibility of the pose."
+        },
+        {
+            "question": "Does the edited pose seamlessly integrate with the subject's environment, including consistent lighting and shadows?",
+            "0_point_standard": "The pose integrates poorly with the environment, with noticeable inconsistencies in lighting, shadows, or spatial orientation.",
+            "1_point_standard": "The edited pose seamlessly integrates with the environment, with consistent lighting, shadows, and spatial orientation, enhancing overall realism."
+        },
+        {
+            "question": "Does the edited image exhibit a high level of professional aesthetics, paying attention to details such as lighting, color balance, and composition?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, with insufficient attention to details such as lighting, color balance, or composition.",
+            "1_point_standard": "The edited image exhibits professional aesthetics, with meticulous attention to details such as lighting, color balance, and composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0001/images.txt b/dataset/human_attribute_editing_pose_transformation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bbce09b4cb31441ea3bc4d852635b6f3eda46fa
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01XpvM1F1J1qoObgjb5_!!6000000000969-0-tps-564-846.jpg
diff --git a/dataset/human_attribute_editing_pose_transformation_0001/instruction.txt b/dataset/human_attribute_editing_pose_transformation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34dc2ed28a4b9e8c7ed3ff63e0f8c669ef9a88b2
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0001/instruction.txt
@@ -0,0 +1 @@
+I am giving you an image of a girl. Please keep the background, facial details, clothing, and all other elements unchanged, but change her action to a jumping pose. The background and all other details must remain the same.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0001/meta.json b/dataset/human_attribute_editing_pose_transformation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..119f0df49c7563a0761c810b2e7d02a3ae77b3db
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human pose transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0079",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0003/eval.json b/dataset/human_attribute_editing_pose_transformation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..75ba4b1322dd45e6c29ae948d5ea01d29b403f83
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the edited portrait accurately reflect the specified pose changes while preserving the natural anatomy and proportions of the figure?",
+            "0_point_standard": "The pose changes appear unnatural or distort the figure's anatomy and proportions, reducing realism.",
+            "1_point_standard": "The pose changes are executed naturally, preserving the figure's anatomy and proportions, achieving a realistic adjustment."
+        },
+        {
+            "question": "Are the areas of the image not related to the pose change left unchanged, maintaining their original appearance and details?",
+            "0_point_standard": "The areas of the image unrelated to the pose change have been altered or show noticeable changes.",
+            "1_point_standard": "The areas of the image unrelated to the pose change remain unchanged, retaining their original appearance and details."
+        },
+        {
+            "question": "Does the edited image maintain the original content and style of the input image, ensuring consistency of identity and environment?",
+            "0_point_standard": "The content and style of the edited image significantly deviate from the input image, affecting identity recognition or environmental consistency.",
+            "1_point_standard": "The content and style of the edited image are consistent with the input image, preserving identity recognition and environmental background."
+        },
+        {
+            "question": "Are the limbs and body parts in the edited pose placed in a natural and realistic manner consistent with human anatomy?",
+            "0_point_standard": "The limbs or body parts in the new pose look awkward, misplaced, or inconsistent with human anatomy, affecting overall realism.",
+            "1_point_standard": "The placement of limbs and body parts is natural and realistic, consistent with human anatomy, enhancing the plausibility of the pose."
+        },
+        {
+            "question": "Does the edited pose seamlessly integrate with the character's environment, including consistent lighting and shadows?",
+            "0_point_standard": "The pose integrates poorly with the environment, with noticeable inconsistencies in lighting, shadows, or spatial orientation.",
+            "1_point_standard": "The edited pose seamlessly integrates with the environment, with consistent lighting, shadows, and spatial orientation, enhancing overall realism."
+        },
+        {
+            "question": "Does the edited image exhibit a high level of professional aesthetics, with attention to detail in lighting, color balance, and composition?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, with insufficient attention to details like lighting, color balance, or composition.",
+            "1_point_standard": "The edited image exhibits professional aesthetics, with meticulous attention to details in lighting, color balance, and composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0003/images.txt b/dataset/human_attribute_editing_pose_transformation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7927065ec8f08d9ad42c2109a60892eb9e279417
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0003/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01A2xABR1NRjerqXOOn_!!6000000001567-0-tps-735-1104.jpg
diff --git a/dataset/human_attribute_editing_pose_transformation_0003/instruction.txt b/dataset/human_attribute_editing_pose_transformation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a15e31c4f80c615e01735d442cf28d917f3c736f
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0003/instruction.txt
@@ -0,0 +1 @@
+I am giving you an image of a girl. Please keep the background, facial details, clothing, and all other elements unchanged, but change her action to a standing pose. The background and all other details must remain the same.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0003/meta.json b/dataset/human_attribute_editing_pose_transformation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b965bf62748a250569190be1c6a89c19c02b456
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human pose transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0079",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0004/eval.json b/dataset/human_attribute_editing_pose_transformation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..3375e5bc705acbd592271d0ce354d95db80c4653
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the edited portrait accurately reflect the specified pose changes while retaining the subject's natural anatomy and proportions?",
+            "0_point_standard": "The pose changes appear unnatural or distort the subject's anatomy and proportions, reducing realism.",
+            "1_point_standard": "The pose changes are executed naturally, retaining the subject's anatomy and proportions, achieving realistic adjustments."
+        },
+        {
+            "question": "Are image areas unrelated to the pose change preserved, maintaining their original appearance and detail?",
+            "0_point_standard": "Image areas unrelated to the pose change have been altered or show noticeable changes.",
+            "1_point_standard": "Image areas unrelated to the pose change remain unchanged, retaining their original appearance and detail."
+        },
+        {
+            "question": "Does the edited image maintain the original content and style of the input image, ensuring consistency in identity and environment?",
+            "0_point_standard": "The edited image's content and style show significant deviations from the input image, affecting identity recognition or environmental consistency.",
+            "1_point_standard": "The edited image's content and style are consistent with the input image, preserving identity recognition and environmental background."
+        },
+        {
+            "question": "Are the limbs and body parts in the edited pose placed naturally and realistically, consistent with human anatomy?",
+            "0_point_standard": "Limbs or body parts in the new pose appear awkward or misplaced, inconsistent with human anatomy, affecting overall realism.",
+            "1_point_standard": "The placement of limbs and body parts is natural and realistic, consistent with human anatomy, enhancing the credibility of the pose."
+        },
+        {
+            "question": "Does the edited pose seamlessly blend with the subject's environment, including consistent lighting and shadowing?",
+            "0_point_standard": "The pose does not blend well with the environment, with noticeable inconsistencies in lighting, shadowing, or spatial orientation.",
+            "1_point_standard": "The edited pose seamlessly blends with the environment, with consistent lighting, shadowing, and spatial orientation, enhancing overall realism."
+        },
+        {
+            "question": "Does the edited image exhibit a high level of professional aesthetics, with attention to details such as lighting, color balance, and composition?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, with insufficient attention to details such as lighting, color balance, or composition.",
+            "1_point_standard": "The edited image exhibits professional aesthetics, with meticulous attention to lighting, color balance, and composition details."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0004/images.txt b/dataset/human_attribute_editing_pose_transformation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7321a046a229ff73620f22d36542d44f5748d6a5
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0004/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01DtEUaI1X1lM8T1h53_!!6000000002864-0-tps-736-981.jpg
diff --git a/dataset/human_attribute_editing_pose_transformation_0004/instruction.txt b/dataset/human_attribute_editing_pose_transformation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a394c281f69ba40c61d59ede18fc527b348d24ad
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0004/instruction.txt
@@ -0,0 +1 @@
+I am giving you an image of a girl. Please keep the background, facial details, clothing, and all other elements unchanged, but change her action to a hand-waving gesture. The background and all other details must remain the same.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_pose_transformation_0004/meta.json b/dataset/human_attribute_editing_pose_transformation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..87146812bcfae6d20f7d49f1aedd0229ee1dd931
--- /dev/null
+++ b/dataset/human_attribute_editing_pose_transformation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human pose transformation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0079",
+    "output_image_count": 1,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_sex_transformation_0002/eval.json b/dataset/human_attribute_editing_sex_transformation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d00cf5f9e4da4ea2d0172095fe5df7dfdae1eae2
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image effectively change the gender of the character while preserving the core facial features and identity of the original portrait?",
+            "0_point_standard": "The gender change is ineffective, or the character's core facial features and identity have been significantly altered.",
+            "1_point_standard": "The gender change is effective, and the character's core facial features and identity are preserved."
+        },
+        {
+            "question": "Does the generated image maintain the composition and style of the original portrait, ensuring that only the intended gender change is applied?",
+            "0_point_standard": "Beyond the gender change, the overall composition or style of the original portrait has been noticeably altered.",
+            "1_point_standard": "The composition and style of the original portrait are preserved, with changes limited to the intended gender change."
+        },
+        {
+            "question": "Does the generated image accurately reflect the specific gender change instructions provided in the text description, such as hairstyle or clothing adjustments?",
+            "0_point_standard": "The gender change does not fully comply with the specific instructions given in the text description.",
+            "1_point_standard": "The gender change accurately reflects all the specific instructions provided in the text description."
+        },
+        {
+            "question": "Is the quality of the gender transformation consistent across the entire image, avoiding any artifacts or unnatural elements in the edited areas?",
+            "0_point_standard": "There are noticeable artifacts or unnatural elements in the gender transformation areas of the image.",
+            "1_point_standard": "The gender transformation is seamless, with the edited areas appearing natural and free of artifacts."
+        },
+        {
+            "question": "Does the generated image display a high level of detail and realism in the transformed features, such as skin texture, hair, and facial expressions?",
+            "0_point_standard": "The transformed features lack detail and realism, appearing artificial or poorly executed.",
+            "1_point_standard": "The transformed features exhibit a high level of detail and realism, enhancing the overall quality of the image."
+        },
+        {
+            "question": "Does the generated image possess overall aesthetic appeal, being visually cohesive and pleasing, and meeting professional standards for portrait editing?",
+            "0_point_standard": "The image lacks aesthetic appeal and does not meet professional standards for portrait editing.",
+            "1_point_standard": "The image exhibits strong aesthetic appeal, being visually cohesive and pleasing, and meets professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_sex_transformation_0002/images.txt b/dataset/human_attribute_editing_sex_transformation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b1961b7d073b852c09b8dc2a3c02f8b14948600
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01fqL4fk1NWJcUZJcZF_!!6000000001577-0-tps-3456-5184.jpg
diff --git a/dataset/human_attribute_editing_sex_transformation_0002/instruction.txt b/dataset/human_attribute_editing_sex_transformation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b1c4f988b28a6a7073bfa8d90458cd15b644868
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0002/instruction.txt
@@ -0,0 +1 @@
+Transform the male character in the image into a female character, keeping the rest unchanged. Modify the hairstyle to medium-length wavy hair. The body should be slimmer, and the outfit should be adjusted to a stylish business attire suited for a female, maintaining a formal yet softer vibe.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_sex_transformation_0002/meta.json b/dataset/human_attribute_editing_sex_transformation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..048ae81942d976e93ba764c1b718daca8fe17dcd
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human sex transformation",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0074",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_sex_transformation_0003/eval.json b/dataset/human_attribute_editing_sex_transformation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c02f63fb8d70dba77fda1b7cbf926db0c30f5e3
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image effectively change the gender of the character while retaining the core facial features and identity of the original portrait?",
+            "0_point_standard": "The gender change is ineffective, or the core facial features and identity of the character have significantly changed.",
+            "1_point_standard": "The gender change is effective, and the core facial features and identity of the character are retained."
+        },
+        {
+            "question": "Does the generated image maintain the composition and style of the original portrait, ensuring only the intended gender change is applied?",
+            "0_point_standard": "There is a noticeable change in the overall composition or style of the original portrait besides the gender change.",
+            "1_point_standard": "The composition and style of the original portrait are preserved, with changes limited to the intended gender change."
+        },
+        {
+            "question": "Does the generated image accurately reflect the specific gender change instructions provided in the text description, such as hairstyle or clothing adjustments?",
+            "0_point_standard": "The gender change does not fully comply with the specific instructions given in the text description.",
+            "1_point_standard": "The gender change accurately reflects all specific instructions provided in the text description."
+        },
+        {
+            "question": "Is the quality of the gender change consistent throughout the image, avoiding any artifacts or unnatural elements in the edited areas?",
+            "0_point_standard": "There are noticeable artifacts or unnatural elements in the gender-changed areas of the image.",
+            "1_point_standard": "The gender change is seamless, with the edited areas looking natural and free of artifacts."
+        },
+        {
+            "question": "Do the transformed features (e.g., skin texture, hair, and facial expression) in the generated image display a high level of detail and realism?",
+            "0_point_standard": "The transformed features lack detail and realism, appearing artificial or poorly executed.",
+            "1_point_standard": "The transformed features exhibit a high level of detail and realism, enhancing the overall quality of the image."
+        },
+        {
+            "question": "Does the generated image possess overall aesthetic appeal, being visually cohesive and pleasing, in line with professional standards for portrait editing?",
+            "0_point_standard": "The image lacks aesthetic appeal and does not meet professional standards for portrait editing.",
+            "1_point_standard": "The image demonstrates strong aesthetic appeal, being cohesive and visually pleasing, meeting professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_sex_transformation_0003/images.txt b/dataset/human_attribute_editing_sex_transformation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a10e10f152b7c69e6d8a336b093ff0ceb96458ea
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0003/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01osXcj41oXE22LVpw3_!!6000000005234-0-tps-1440-2160.jpg
diff --git a/dataset/human_attribute_editing_sex_transformation_0003/instruction.txt b/dataset/human_attribute_editing_sex_transformation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f032ec36de49dd150a52c6518370f480ef8aeeb
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0003/instruction.txt
@@ -0,0 +1 @@
+Change the male character in the image to a female character while keeping the rest of the elements the same. Modify the hairstyle to shoulder-length feminine hair. The body should be slimmer, and the outfit should be changed to an elegant evening dress for a woman, maintaining dark tones but highlighting feminine curves.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_sex_transformation_0003/meta.json b/dataset/human_attribute_editing_sex_transformation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ecee0cef08e43d03b19211db0e97e0547d41d28b
--- /dev/null
+++ b/dataset/human_attribute_editing_sex_transformation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "human sex transformation",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0074",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_tattoo_generation_0001/eval.json b/dataset/human_attribute_editing_tattoo_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c639d1ecf4fb4c22d8db9998ccb99d4596969c7
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the tattoo generation accurately apply the tattoo effect to the specified part of the image without altering other areas?",
+            "0_point_standard": "The tattoo effect is applied to the incorrect area, or the rest of the image shows unintended modifications.",
+            "1_point_standard": "The tattoo effect is accurately applied only to the specified part, with the rest of the image remaining unchanged."
+        },
+        {
+            "question": "Does the generated tattoo retain the style and content from the reference image or text description?",
+            "0_point_standard": "There is a significant deviation in tattoo style or content from the reference image or text description.",
+            "1_point_standard": "The tattoo style and content closely match the reference image or text description."
+        },
+        {
+            "question": "Does the generated tattoo meet the specifications of the text description, including size, orientation, and position?",
+            "0_point_standard": "The tattoo does not meet the size, orientation, or position requirements specified in the text description.",
+            "1_point_standard": "The tattoo meets all size, orientation, and position specifications described in the text."
+        },
+        {
+            "question": "Does the tattoo effect seamlessly integrate into the existing image, respecting the original image's lighting, shadows, and texture?",
+            "0_point_standard": "The tattoo effect appears unnatural or poorly integrated, with noticeable inconsistencies in lighting, shadows, or texture.",
+            "1_point_standard": "The tattoo effect integrates seamlessly, respecting the original image's lighting, shadows, and texture."
+        },
+        {
+            "question": "Does the generated image maintain high aesthetic quality, ensuring the tattoo complements the overall image composition?",
+            "0_point_standard": "The addition of the tattoo diminishes the aesthetic quality of the image or disrupts the composition.",
+            "1_point_standard": "The tattoo enhances or maintains the overall aesthetic quality and composition of the image."
+        },
+        {
+            "question": "Is there a high level of detail and precision in the tattoo design, including clarity of lines and complexity of patterns?",
+            "0_point_standard": "The tattoo design lacks detail and precision, with blurry lines or undefined patterns.",
+            "1_point_standard": "The tattoo design is detailed and precise, with clear lines and well-defined patterns."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_tattoo_generation_0001/images.txt b/dataset/human_attribute_editing_tattoo_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3ea9d15b134437f34f899397926143518f815b1
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN016WCabU1DWdk4KP0Kq_!!6000000000224-0-tps-1200-1044.jpg
diff --git a/dataset/human_attribute_editing_tattoo_generation_0001/instruction.txt b/dataset/human_attribute_editing_tattoo_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46e2c2f7248d401be1c75cf9162a5ebc7c91de8c
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0001/instruction.txt
@@ -0,0 +1 @@
+A tattoo is added to the front of the thigh, featuring a frontal portrait of a wolf surrounded by an intricately detailed arrangement of flowers. The wolf's face is clearly outlined, with sharp eyes and upright ears, exuding an air of authority and calm. The tattoo is rendered in black and white line work, with fine detailing in the wolf's fur using delicate lines and shading, especially around the face and neck, enhancing the sense of depth and texture. Below the wolf's head is a cluster of flowers of varying sizes, accompanied by leaves. The flowers are intricately drawn, with clearly defined petal textures and layers of leaves that gracefully extend from beneath the wolf's jaw, providing a contrast between the softness of the flowers and the wolf's commanding presence. The overall design is both simple and detailed, blending flora and fauna to create a harmonious balance of gentleness and strength, reflecting the wearer's unique aesthetic and personality.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_tattoo_generation_0001/meta.json b/dataset/human_attribute_editing_tattoo_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ccd6385fb7db601cbe29a0115373921ec62f1684
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "tattoo generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0076",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_tattoo_generation_0002/eval.json b/dataset/human_attribute_editing_tattoo_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ea275c0cf3e1b4e37e64a97ee00ce9f0bd03b6e3
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the tattoo generation accurately apply the tattoo effect to the specified part of the image without altering other areas?",
+            "0_point_standard": "The tattoo effect is applied to incorrect areas, or there are unintended modifications to the rest of the image.",
+            "1_point_standard": "The tattoo effect is accurately applied only to the specified part, and the rest of the image remains unchanged."
+        },
+        {
+            "question": "Does the generated tattoo retain the style and content from the reference image or text description?",
+            "0_point_standard": "There is a significant deviation in the tattoo style or content from the reference image or text description.",
+            "1_point_standard": "The tattoo style and content closely match the reference image or text description."
+        },
+        {
+            "question": "Does the generated tattoo conform to the specifications of the text description, including size, orientation, and position?",
+            "0_point_standard": "The tattoo does not meet the size, orientation, or position requirements specified in the text description.",
+            "1_point_standard": "The tattoo conforms to all size, orientation, and position specifications described in the text."
+        },
+        {
+            "question": "Does the tattoo effect seamlessly integrate into the existing image, respecting the original image's lighting, shadows, and texture?",
+            "0_point_standard": "The tattoo effect appears unnatural or poorly integrated, with noticeable inconsistencies in lighting, shadows, or texture.",
+            "1_point_standard": "The tattoo effect seamlessly integrates, respecting the original image's lighting, shadows, and texture."
+        },
+        {
+            "question": "Does the generated image maintain high aesthetic quality, ensuring the tattoo complements the overall composition of the image?",
+            "0_point_standard": "Adding the tattoo reduces the aesthetic quality of the image or disrupts the composition.",
+            "1_point_standard": "The tattoo enhances or maintains the overall aesthetic quality and composition of the image."
+        },
+        {
+            "question": "Is there a high level of detail and precision in the tattoo design, including clarity of lines and complexity of patterns?",
+            "0_point_standard": "The tattoo design lacks detail and precision, with blurry lines or poorly defined patterns.",
+            "1_point_standard": "The tattoo design is detailed and precise, with clear lines and well-defined patterns."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_tattoo_generation_0002/images.txt b/dataset/human_attribute_editing_tattoo_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..831bc7173104248dfcfe56f168f59ad00a87fd30
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01Byjclj221zdtISjq7_!!6000000007061-0-tps-1197-800.jpg
diff --git a/dataset/human_attribute_editing_tattoo_generation_0002/instruction.txt b/dataset/human_attribute_editing_tattoo_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..884e36e865701e62f79e2c4232d8480dea618892
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0002/instruction.txt
@@ -0,0 +1 @@
+A tattoo is added to the shoulder and upper arm, depicting a large wing that spans across the shoulder and extends down the upper arm. The wing design is highly detailed, starting with shorter feathers at the top of the shoulder and gradually transitioning into longer feathers as it moves down the arm. The arrangement of the feathers creates a natural layered effect, with smooth, dynamic lines. Each feather is sharply outlined, with intricate inner textures and splits that make them appear lifelike. The longer feathers cascade down the upper arm, showcasing fine details that evoke the texture of real feathers. The tattoo is rendered in black and gray tones, using meticulous shading and line work to create a realistic play of light and shadow, enhancing the three-dimensional appearance. The edges of the wing are clean and precise, balancing elegance with strength. Near the bottom of the wing, there is a small section of handwritten-style text, adding a personalized touch to the design. The overall style is both graceful and powerful, symbolizing freedom and resilience.
\ No newline at end of file
diff --git a/dataset/human_attribute_editing_tattoo_generation_0002/meta.json b/dataset/human_attribute_editing_tattoo_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..68322a94fe68841da4ebfa12ca67e84bf72c14da
--- /dev/null
+++ b/dataset/human_attribute_editing_tattoo_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "tattoo generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0076",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/id_photo_generation_0001/eval.json b/dataset/id_photo_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..cf4d1e8ae1eedd24f73abfeeeb6c2f62e269a5fd
--- /dev/null
+++ b/dataset/id_photo_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated ID photo retain the facial features and identity of the person from the informal portrait?",
+            "0_point_standard": "The facial features and identity of the person in the ID photo have significantly changed or are unrecognizable compared to the informal portrait.",
+            "1_point_standard": "The ID photo accurately retains the facial features and identity of the person from the informal portrait."
+        },
+        {
+            "question": "Has the background of the generated ID photo been appropriately adjusted according to the standard ID photo requirements, while the other parts of the photo remain unchanged?",
+            "0_point_standard": "The background has not been appropriately adjusted according to the standard ID photo requirements, or other parts of the photo have been unnecessarily altered.",
+            "1_point_standard": "The background has been appropriately adjusted according to the standard ID photo requirements, with no unnecessary changes to other parts of the photo."
+        },
+        {
+            "question": "Does the generated ID photo follow the specific instructions in the text description, such as clothing adjustments or changes in expression?",
+            "0_point_standard": "The ID photo does not follow the specific instructions listed in the text description.",
+            "1_point_standard": "The ID photo accurately follows the specific instructions provided in the text description."
+        },
+        {
+            "question": "Has the lighting in the generated ID photo been adjusted according to the text description to ensure proper exposure and shadow elimination, presenting a professional appearance?",
+            "0_point_standard": "The lighting does not meet the requirements of the text description, resulting in improper exposure or visible shadows.",
+            "1_point_standard": "The lighting has been correctly adjusted to meet the text description requirements, ensuring proper exposure and shadow elimination."
+        },
+        {
+            "question": "Does the generated ID photo exhibit high-quality image details, such as clarity and sharpness, suitable for professional use?",
+            "0_point_standard": "The ID photo lacks clarity and sharpness, resulting in low image quality not suitable for professional use.",
+            "1_point_standard": "The ID photo exhibits high-quality image details, with clarity and sharpness suitable for professional use."
+        },
+        {
+            "question": "Does the generated ID photo have overall aesthetic consistency, balanced composition, and a professional effect?",
+            "0_point_standard": "The ID photo lacks aesthetic consistency, has poor composition, or does not have a professional effect.",
+            "1_point_standard": "The ID photo exhibits aesthetic consistency, balanced composition, and a professional effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/id_photo_generation_0001/images.txt b/dataset/id_photo_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12f675859063b334913bd614e96340d73ca8c746
--- /dev/null
+++ b/dataset/id_photo_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01Wj1iKU1Z0VVAVO8dO_!!6000000003132-0-tps-564-1002.jpg
diff --git a/dataset/id_photo_generation_0001/instruction.txt b/dataset/id_photo_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b684db558dc9538b8543ad972c16b6eb502c62a
--- /dev/null
+++ b/dataset/id_photo_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a corresponding ID photo based on the provided casual photo, keeping the facial details, hairstyle, and other key identity features unchanged. The background should be changed to a solid color, typically light blue or white, in line with standard ID photo requirements. The facial expression should remain natural without significant adjustments to ensure consistency with the original casual photo.
\ No newline at end of file
diff --git a/dataset/id_photo_generation_0001/meta.json b/dataset/id_photo_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..49ce1a9316e91860713091da577d536e1fe8ddcb
--- /dev/null
+++ b/dataset/id_photo_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "ID photo generation",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0097",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_blending_artisic_collage_0001/auto_eval.jsonl b/dataset/image_blending_artisic_collage_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..391ec1f2d5126f239c7bbca56be9e0b4aed092c9
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.\"\nYour review question is:\nIs the output image clearly derived from the input photos, maintaining recognizable elements and ensuring that the collage reflects the content of the input images? 0 points: The output image does not clearly derive from the input photos, with key elements unrecognizable or missing, or the collage appears unrelated to the inputs. Not every input image can be found in the final collage (the bottom row image) with the image details unchanged.1 point: The output image is evidently derived from the input photos with every detail consistent with the input photos, with recognizable elements and a coherent reflection of the original content within the collage.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.\"\nYour review question is:\nAccuracy of Text Display in the Final Collage: 0 points: The text in the final collage does not fully match the requirements outlined in the text prompt, with noticeable inaccuracies or missing elements. 1 point: The text in the final collage accurately reflects the requirements in the text prompt, with all specified wording and details displayed correctly.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.\"\nYour review question is:\nDoes the assembled collage meet the specific requirements and instructions outlined in the text description, such as layout or thematic elements?   0 points: The collage does not adhere to the specified requirements or instructions given in the text description, with evident deviations from the layout or thematic elements.   1 point: The collage closely follows the requirements and instructions from the text description, accurately reflecting the specified layout and thematic elements.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.\"\nYour review question is:\nIs the consistency of style and identity maintained across the collage, ensuring that elements are harmoniously integrated and recognizable as the same subject or theme? 0 points: The collage lacks consistency in style or identity, with elements that clash in appearance or fail to be recognized as the same subject or theme. 1 point: The collage maintains consistency in style and identity, with harmoniously integrated elements that are easily recognizable as the same subject or theme.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.\"\nYour review question is:\nAdherence to Specified Layout Requirements: 0 points: The final collage (the bottom row image) does not fully reflect the layout requirements specified in the text description, with deviations from instructions such as asymmetry, relative sizing, or arrangement details strictly. e.g., If the text requirement contains \"asymmetrical layout\", you should pay attention to whether the output collage adopts a completely symmetrical layout. If so, it should be scored 0 points  1 point: The final collage perfectly adheres to the layout requirements outlined in the text description in every detail, accurately implementing details such as asymmetry, size hierarchy, and arrangement exactly as specified.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.\"\nYour review question is:\nFrom a professional perspective, does the collage exhibit high-quality aesthetics, with attention to detail, composition, and overall visual appeal? 0 points: The collage exhibits poor aesthetics, with a lack of attention to detail, poor composition, and an unappealing visual effect. 1 point: The collage displays high-quality aesthetics, with careful attention to detail, strong composition, and an appealing overall visual effect.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/image_blending_artisic_collage_0001/eval.json b/dataset/image_blending_artisic_collage_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f8f7b240ebbd81cc04be6df4e90a6ddce44a56b7
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image clearly stem from the input photos, maintaining recognizable elements, and ensuring that the collage reflects the content of the input images?",
+            "0_point_standard": "The output image does not clearly stem from the input photos, key elements are unrecognizable or missing, or the collage appears unrelated to the input. The final collage (images in the bottom row) does not include each input image with details preserved.",
+            "1_point_standard": "The output image clearly stems from the input photos, with every detail consistent with the input photos, elements are recognizable, and the collage clearly reflects the original content."
+        },
+        {
+            "question": "Is the text displayed in the final collage accurate?",
+            "0_point_standard": "The text in the final collage does not fully match the requirements in the text prompt, with noticeable inaccuracies or omissions.",
+            "1_point_standard": "The text in the final collage accurately reflects the requirements in the text prompt, with all specified words and details correctly displayed."
+        },
+        {
+            "question": "Does the collage adhere to the specific requirements and instructions stipulated in the text description, such as layout or thematic elements?",
+            "0_point_standard": "The collage does not follow the requirements or instructions stipulated in the text description, with noticeable deviations in layout or thematic elements.",
+            "1_point_standard": "The collage strictly adheres to the requirements and instructions in the text description, accurately reflecting the specified layout and thematic elements."
+        },
+        {
+            "question": "Is the style and recognizability of the collage consistent, ensuring elements are harmoniously integrated and can be identified as part of the same theme or subject?",
+            "0_point_standard": "The collage's style or recognizability is inconsistent, with elements clashing in appearance or unable to be identified as part of the same theme or subject.",
+            "1_point_standard": "The collage maintains consistency in style and recognizability, elements are harmoniously integrated and easily identifiable as part of the same theme or subject."
+        },
+        {
+            "question": "Are the specified layout requirements followed?",
+            "0_point_standard": "The final collage (images in the bottom row) does not fully reflect the layout requirements specified in the text description, with deviations such as symmetry, relative size, or arrangement details. For example, if the text requirement includes 'asymmetrical layout,' attention should be paid to whether the output collage adopts a fully symmetrical layout, in which case it scores 0 points.",
+            "1_point_standard": "The final collage perfectly follows the layout requirements in the text description, with each detail accurately achieving the specified requirements of asymmetry, size hierarchy, and arrangement."
+        },
+        {
+            "question": "From a professional perspective, does the collage exhibit high-quality aesthetic effects, focusing on detail, composition, and overall visual appeal?",
+            "0_point_standard": "The collage exhibits poor aesthetic effects, lacks attention to detail, has poor composition, and overall visual appeal is unattractive.",
+            "1_point_standard": "The collage exhibits high-quality aesthetic effects, with careful attention to detail, good composition, and overall visual appeal is attractive."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blending_artisic_collage_0001/images.txt b/dataset/image_blending_artisic_collage_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..508ae94bfa4f8db405c38d651859ee7aa6dcee0b
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0001/images.txt
@@ -0,0 +1,4 @@
+https://img.alicdn.com/imgextra/i3/O1CN018H6NXh28KfUKIR9Vx_!!6000000007914-0-tps-390-260.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01MnzOCb1d4rjbKlVvU_!!6000000003683-0-tps-390-260.jpg
+https://img.alicdn.com/imgextra/i1/O1CN013LlcOf1tYfM4LsL17_!!6000000005914-0-tps-390-260.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01oMRtWR1cHrM3jQNMz_!!6000000003576-0-tps-390-260.jpg
diff --git a/dataset/image_blending_artisic_collage_0001/instruction.txt b/dataset/image_blending_artisic_collage_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dfe31ab0c0bfe53d9aa4d953fec6d3ac65fd499
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0001/instruction.txt
@@ -0,0 +1 @@
+Please create a collage using the four provided dog photos in a style similar to a “Pet Daily Life” display. Each image can be cropped slightly during the process to maintain visual balance and aesthetic appeal. The final collage should feature the four images on a light beige background, with each photo framed by white borders to evoke a relaxed and heartwarming atmosphere. Arrange the photos in two rows: one image in the top left and one in the bottom right, with another in the top right and one in the bottom left, creating a slightly asymmetrical layout for added visual interest. Add a title above or below the collage that reads “Pet Daily Life” along with a subtitle “Sharing My Fur Babies,” in brown text, using a simple and soft font that complements the overall design. The final collage should feel warm and natural, with the text and images blending harmoniously without detracting from the main focus on the photos.
\ No newline at end of file
diff --git a/dataset/image_blending_artisic_collage_0001/meta.json b/dataset/image_blending_artisic_collage_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..bac96178172debac2e7e4583d76e41c23a7ed10c
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "artistic collage",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0065",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_blending_artisic_collage_0002/auto_eval.jsonl b/dataset/image_blending_artisic_collage_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..30a647ec076c13f00aa8ff4ac125cf7acbca83c7
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0001.jpg", "0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.\"\nYour review question is:\nIs the output image clearly derived from the input photos, maintaining recognizable elements and ensuring that the collage reflects the content of the input images? 0 points: The output image does not clearly derive from the input photos, with key elements unrecognizable or missing, or the collage appears unrelated to the inputs. Not every input image can be found in the final collage (the bottom row image) with the image details unchanged.1 point: The output image is evidently derived from the input photos with every detail consistent with the input photos, with recognizable elements and a coherent reflection of the original content within the collage.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.\"\nYour review question is:\nAccuracy of Text Display in the Final Collage: 0 points: The text in the final collage does not fully match the requirements outlined in the text prompt, with noticeable inaccuracies or missing elements. 1 point: The text in the final collage accurately reflects the requirements in the text prompt, with all specified wording and details displayed correctly.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.\"\nYour review question is:\nDoes the assembled collage meet the specific requirements and instructions outlined in the text description, such as layout or thematic elements?   0 points: The collage does not adhere to the specified requirements or instructions given in the text description, with evident deviations from the layout or thematic elements.   1 point: The collage closely follows the requirements and instructions from the text description, accurately reflecting the specified layout and thematic elements.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.\"\nYour review question is:\nIs the consistency of style and identity maintained across the collage, ensuring that elements are harmoniously integrated and recognizable as the same subject or theme? 0 points: The collage lacks consistency in style or identity, with elements that clash in appearance or fail to be recognized as the same subject or theme. 1 point: The collage maintains consistency in style and identity, with harmoniously integrated elements that are easily recognizable as the same subject or theme.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.\"\nYour review question is:\nAdherence to Specified Layout Requirements: 0 points: The final collage (the bottom row image) does not fully reflect the layout requirements specified in the text description, with deviations from instructions such as asymmetry, relative sizing, or arrangement details strictly. e.g., If the text requirement contains \"asymmetrical layout\", you should pay attention to whether the output collage adopts a completely symmetrical layout. If so, it should be scored 0 points  1 point: The final collage perfectly adheres to the layout requirements outlined in the text description in every detail, accurately implementing details such as asymmetry, size hierarchy, and arrangement exactly as specified.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
+{"input_images": ["0001.jpg", "0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of two rows of images, with the top row as the reference brand visual pattern for the design task and the bottom image as the response provided by a student. The task objective is to combine the given input images into a single image in the specified form. based on the text requirements.\nThe text requirement is:\n\"Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.\"\nYour review question is:\nFrom a professional perspective, does the collage exhibit high-quality aesthetics, with attention to detail, composition, and overall visual appeal? 0 points: The collage exhibits poor aesthetics, with a lack of attention to detail, poor composition, and an unappealing visual effect. 1 point: The collage displays high-quality aesthetics, with careful attention to detail, strong composition, and an appealing overall visual effect.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}\nReturn: Evaluation"}
diff --git a/dataset/image_blending_artisic_collage_0002/eval.json b/dataset/image_blending_artisic_collage_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..fdaf632035c5f978b4e730679dcffbcfaf8dc6f0
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image clearly originate from the input photo, maintaining recognizable elements, and ensuring that the collage reflects the content of the input image?",
+            "0_point_standard": "The output image does not clearly originate from the input photo; key elements are unrecognizable or missing, or the collage appears unrelated to the input. The final collage (images in the bottom row) does not include every input image with details preserved.",
+            "1_point_standard": "The output image clearly originates from the input photo, with every detail consistent with the input photo, elements are recognizable, and the collage clearly reflects the original content."
+        },
+        {
+            "question": "Is the text displayed in the final collage accurate?",
+            "0_point_standard": "The text in the final collage does not fully match the requirements of the text prompt, with noticeable inaccuracies or omissions.",
+            "1_point_standard": "The text in the final collage accurately reflects the requirements of the text prompt, with all specified words and details correctly displayed."
+        },
+        {
+            "question": "Does the collage meet the specific requirements and instructions specified in the text description, such as layout or thematic elements?",
+            "0_point_standard": "The collage does not follow the requirements or instructions specified in the text description; the layout or thematic elements are noticeably deviated.",
+            "1_point_standard": "The collage strictly follows the requirements and instructions in the text description, accurately reflecting the specified layout and thematic elements."
+        },
+        {
+            "question": "Is the style and recognizability within the collage consistent, ensuring elements are harmoniously integrated and recognizable as part of the same theme or subject?",
+            "0_point_standard": "The collage's style or recognizability is inconsistent; elements appear conflicting in appearance or cannot be recognized as part of the same theme or subject.",
+            "1_point_standard": "The collage maintains consistency in style and recognizability, with elements harmoniously integrated and easily recognizable as part of the same theme or subject."
+        },
+        {
+            "question": "Are the specified layout requirements followed?",
+            "0_point_standard": "The final collage (images in the bottom row) does not fully reflect the layout requirements specified in the text description, such as deviations in asymmetry, relative sizes, or arrangement details. For example, if the text requires an 'asymmetrical layout,' attention should be paid to whether the output collage adopts a fully symmetrical layout; if so, it scores 0 points.",
+            "1_point_standard": "The final collage perfectly follows the layout requirements in the text description, with every detail accurately implementing specified requirements such as asymmetry, size hierarchy, and arrangement."
+        },
+        {
+            "question": "From a professional standpoint, does the collage possess a high-quality aesthetic effect, with attention to detail, composition, and overall visual appeal?",
+            "0_point_standard": "The collage has poor aesthetic effects, lacking attention to detail, with poor composition and overall visual appeal.",
+            "1_point_standard": "The collage exhibits high-quality aesthetic effects, with careful attention to detail, good composition, and overall visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blending_artisic_collage_0002/images.txt b/dataset/image_blending_artisic_collage_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dffa0326bd5e92a60473e594d556f54efa5b46c
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i1/O1CN01fOVY3H1rDwCSAdhsY_!!6000000005598-0-tps-3825-5118.jpg
+https://img.alicdn.com/imgextra/i2/O1CN013Wpscq1plFmMa1Dd9_!!6000000005400-0-tps-3745-5021.jpg
diff --git a/dataset/image_blending_artisic_collage_0002/instruction.txt b/dataset/image_blending_artisic_collage_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d5671d9a58f60da918cb4539035d70b41a007f5
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0002/instruction.txt
@@ -0,0 +1 @@
+Combine the two input images into a cohesive artistic collage. Use soft pastel colors as the background, such as light blues and pinks, blending them seamlessly behind the images. Place the first image on the left and the second on the right. Ensure that the two images are neatly aligned and can be slightly cropped if necessary to fit the overall design. Add decorative borders around each image with thin, rounded lines, and create a visually pleasing spacing between them. Above the images, add the text ‘Happy Birthday!' in a large, elegant, light blue font. Below the images, place a small cake icon with three candles in matching blue tones. Maintain the simplicity and elegance of the layout, ensuring that the final composition feels balanced and festive.
\ No newline at end of file
diff --git a/dataset/image_blending_artisic_collage_0002/meta.json b/dataset/image_blending_artisic_collage_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ead369d3b0c33bb879599b665c68f1b68835ebf3
--- /dev/null
+++ b/dataset/image_blending_artisic_collage_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "artistic collage",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0065",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_blending_double_exposure_0002/auto_eval.jsonl b/dataset/image_blending_double_exposure_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..3004f466d22f3c34f44f8fe5c4483e7287034fa2
--- /dev/null
+++ b/dataset/image_blending_double_exposure_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first input image and output image of the response provided by a student. The task objective is to use dual exposure effect to merge two images into one image. \nThe text requirement is:\nPlease generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.\nYour review question is:\nDoes the output image retain the main features and pose of the bride from the first input image? 0 points: The bride’s features or pose are significantly altered or unclear in the output image. 1 point: The bride’s features and pose are clear and match the original appearance in the first input image.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second input image and output image of the response provided by a student. The task objective is to use dual exposure effect to merge two images into one image. \nThe text requirement is:\nPlease generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.\nYour review question is:\nIs the beach scene from the second input image accurately represented in the background of the output image, preserving key elements like the water and sand? 0 points: The beach scene is not clearly visible, or key elements like water and sand are missing or distorted. 1 point: The beach scene, including water and sand, is clear and well-integrated into the background.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the output image of the response provided by a student. The task objective is to use dual exposure effect to merge two images into one image. \nThe text requirement is:\nPlease generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.\nYour review question is:\nDoes the blending between the bride’s outline and the beach scene create a smooth, gradient-like transition? 0 points: The transition between the bride’s outline and the beach background is abrupt or inconsistent, with visible boundaries. 1 point: The transition is smooth and gradient-like, providing a natural blend between the bride and the beach setting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the output image of the response provided by a student. The task objective is to use dual exposure effect to merge two images into one image. \nThe text requirement is:\nPlease generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.\nYour review question is:\nDoes the output image evoke a romantic atmosphere suitable for a wedding, with delicate and gentle light treatment? 0 points: The image lacks a romantic or gentle light feel, appearing harsh or inconsistent with a wedding atmosphere. 1 point: The image has a soft, romantic light treatment that aligns well with a wedding theme.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the output image of the response provided by a student. The task objective is to use dual exposure effect to merge two images into one image. \nThe text requirement is:\nPlease generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.\nYour review question is:\nIs the bride’s silhouette and outline clearly defined, without being overly blended into the beach background? 0 points: The bride’s silhouette is overly blended or unclear, losing distinction against the beach background. 1 point: The bride’s silhouette remains clearly defined and recognizable, while still being smoothly integrated with the beach background.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the output image of the response provided by a student. The task objective is to use dual exposure effect to merge two images into one image. \nThe text requirement is:\nPlease generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.\nYour review question is:\nDoes the final output image appear harmonious, with the bride and beach scene seamlessly integrated as one cohesive image? 0 points: The image appears disjointed, with the bride and background not blending well, giving a layered or artificial look. 1 point: The image is visually harmonious, with the bride and beach scene seamlessly integrated as a cohesive composition.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/image_blending_double_exposure_0002/eval.json b/dataset/image_blending_double_exposure_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ce3b669b0d67e01ee826d6152719cbaa99419a3
--- /dev/null
+++ b/dataset/image_blending_double_exposure_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image retain the main features and pose of the bride from the first input image?",
+            "0_point_standard": "The bride's features or pose are significantly altered or unclear in the output image.",
+            "1_point_standard": "The bride's features and pose are clearly visible and consistent with her original appearance in the first input image."
+        },
+        {
+            "question": "Is the beach scene from the second input image accurately represented in the background of the output image, retaining key elements like the water and sand?",
+            "0_point_standard": "The beach scene is unclear, or key elements like the water and sand are missing or distorted.",
+            "1_point_standard": "The beach scene, including the water and sand, is clear and well-integrated into the background."
+        },
+        {
+            "question": "Is there a smooth gradient transition between the bride's silhouette and the beach scene?",
+            "0_point_standard": "The transition between the bride's silhouette and the beach background is abrupt or inconsistent, with noticeable boundaries.",
+            "1_point_standard": "The transition is smooth with a gradient effect, naturally blending the bride with the beach background."
+        },
+        {
+            "question": "Does the output image create a romantic atmosphere suitable for a wedding, with soft lighting?",
+            "0_point_standard": "The image lacks a romantic or soft lighting effect, with lighting appearing harsh or not fitting the wedding atmosphere.",
+            "1_point_standard": "The image features soft, romantic lighting that perfectly aligns with the wedding theme."
+        },
+        {
+            "question": "Is the bride's silhouette and image clearly visible, without being overly merged into the beach background?",
+            "0_point_standard": "The bride's silhouette is overly merged or unclear, losing distinction from the beach background.",
+            "1_point_standard": "The bride's silhouette is clearly discernible, with good recognition, while smoothly blending with the beach background."
+        },
+        {
+            "question": "Is the final output image harmonious, presenting a seamless integration of the bride and the beach scene?",
+            "0_point_standard": "The image appears disjointed, with poor integration of the bride and background, showing layered or unnatural effects.",
+            "1_point_standard": "The image is visually harmonious, with a seamless integration of the bride and beach scene, forming a unified composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blending_double_exposure_0002/images.txt b/dataset/image_blending_double_exposure_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e23162d06fd0d1647565a5d04d3f914accbedc
--- /dev/null
+++ b/dataset/image_blending_double_exposure_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i1/O1CN01AVWzH127arsjRYkbT_!!6000000007814-0-tps-1080-1080.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01famgVw1CCCRzuv0SK_!!6000000000044-0-tps-1080-1349.jpg
diff --git a/dataset/image_blending_double_exposure_0002/instruction.txt b/dataset/image_blending_double_exposure_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0093c43efb6d7ffeb4b6fb7064461411dae3f45
--- /dev/null
+++ b/dataset/image_blending_double_exposure_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a blended image using the given two images, with the blending effect aiming to achieve a visual style similar to double exposure. The goal is to seamlessly merge the bride from the first image with the beach setting from the second image. Specifically, keep the bride's main outline and details clearly visible, while blending her background with the beach environment, ensuring a smooth transition between the two. Gradient overlay techniques can be used to gradually mix the bride's silhouette with the sky, water, and sand of the beach, creating a soft, dreamlike effect. The final result should evoke a romantic wedding atmosphere, visually giving the impression that the bride is harmoniously integrated with the natural surroundings, with the overall style maintaining a gentle and delicate light treatment.
\ No newline at end of file
diff --git a/dataset/image_blending_double_exposure_0002/meta.json b/dataset/image_blending_double_exposure_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..67f8da872f42c37dabdec68b0fc44f8bf7cca55b
--- /dev/null
+++ b/dataset/image_blending_double_exposure_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "double exposure",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0064",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_blur_filed_blur_0001/eval.json b/dataset/image_blur_filed_blur_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..00996c10e5771b4c57e9903a6ffa79780baeada8
--- /dev/null
+++ b/dataset/image_blur_filed_blur_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the blurred background in the generated image accurately retain the unblurred parts of the original image?",
+            "0_point_standard": "The unblurred parts of the image have noticeable changes or distortions compared to the original image.",
+            "1_point_standard": "The unblurred parts of the image are accurately retained without any changes or distortions."
+        },
+        {
+            "question": "Does the generated image retain the key elements and features of the original image, ensuring consistency between the input and output images?",
+            "0_point_standard": "The key elements or features of the image have been altered or are inconsistent with the original image.",
+            "1_point_standard": "The key elements and features of the image are consistent with the original image, retaining its essential characteristics."
+        },
+        {
+            "question": "Does the blurred background effectively meet the requirement in the text description to separate or emphasize the main subject?",
+            "0_point_standard": "The blurred background fails to effectively separate or emphasize the main subject as per the text description.",
+            "1_point_standard": "The blurred background successfully separates or emphasizes the main subject as per the text description."
+        },
+        {
+            "question": "Does the generated image accurately follow the specific instructions in the text description regarding the degree or style of blurring (e.g., soft, intense)?",
+            "0_point_standard": "The degree or style of blurring in the generated image does not match the specific instructions given in the text.",
+            "1_point_standard": "The degree or style of blurring in the generated image accurately follows the specific instructions in the text."
+        },
+        {
+            "question": "Does the quality of the blur effect enhance the overall image, with smooth transitions and no noticeable flaws?",
+            "0_point_standard": "The blur effect has noticeable flaws or rough transitions, reducing the overall image quality.",
+            "1_point_standard": "The blur effect is applied smoothly without noticeable flaws, enhancing the overall quality of the image."
+        },
+        {
+            "question": "Does the generated image have a high aesthetic appeal, with effective composition and visual balance between blurred and unblurred areas?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor composition or visual balance between blurred and unblurred areas.",
+            "1_point_standard": "The image displays strong aesthetic appeal, with good composition and visual balance between blurred and unblurred areas."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blur_filed_blur_0001/images.txt b/dataset/image_blur_filed_blur_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b5dab5322cc213b93a1add90a410e6ae8559f45
--- /dev/null
+++ b/dataset/image_blur_filed_blur_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN010KVBff1Z3iH6N458q_!!6000000003139-0-tps-1280-853.jpg
diff --git a/dataset/image_blur_filed_blur_0001/instruction.txt b/dataset/image_blur_filed_blur_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..341665a5d34826d1efa296fbde5ca0595404b712
--- /dev/null
+++ b/dataset/image_blur_filed_blur_0001/instruction.txt
@@ -0,0 +1 @@
+Apply a background blur to this image, keeping the person and foreground in sharp focus to highlight the subject's details and expression, while softening the plants in the background to make them less distracting.
\ No newline at end of file
diff --git a/dataset/image_blur_filed_blur_0001/meta.json b/dataset/image_blur_filed_blur_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d051e8a3bbb283be5ecd86ac5ea7427750bf7f12
--- /dev/null
+++ b/dataset/image_blur_filed_blur_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "field blur",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0068",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_blur_motion_blur_0001/eval.json b/dataset/image_blur_motion_blur_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd80fcbb484038a9262342fa3d124463457f928a
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately apply motion blur only to the specified moving parts without affecting other static elements?",
+            "0_point_standard": "Motion blur is applied to non-moving parts or is missing from the specified moving parts, resulting in inaccuracy.",
+            "1_point_standard": "Motion blur is correctly applied only to the moving parts, leaving static elements unchanged."
+        },
+        {
+            "question": "Does the generated image retain the overall structure and proportions of the object, ensuring consistency with the original image aside from the motion blur effect?",
+            "0_point_standard": "The overall structure or proportions of the object have been altered, distorting the original image.",
+            "1_point_standard": "Except for the motion blur effect, the structure and proportions of the object remain consistent with the original image."
+        },
+        {
+            "question": "Does the motion blur effect reflect the motion direction and speed described in the text input?",
+            "0_point_standard": "The motion blur effect fails to accurately represent the motion direction or speed specified in the text input.",
+            "1_point_standard": "The motion blur effect accurately captures the motion direction and speed described in the text."
+        },
+        {
+            "question": "Does the generated image meet all specific requirements mentioned in the text input, such as the intensity or length of the motion blur effect?",
+            "0_point_standard": "The image fails to meet certain specific requirements about the motion blur effect mentioned in the text input.",
+            "1_point_standard": "All specific requirements about the motion blur effect mentioned in the text input are fully met."
+        },
+        {
+            "question": "Does the motion blur effect enhance the visual realism of the image, giving it a natural and dynamic appearance?",
+            "0_point_standard": "The motion blur effect reduces the realism of the image, making it appear unnatural or awkward.",
+            "1_point_standard": "The motion blur effect enhances the realism of the image, providing a natural and dynamic appearance."
+        },
+        {
+            "question": "Does the overall image, including the motion blur effect, maintain a high aesthetic quality and clear visual appeal?",
+            "0_point_standard": "Due to the motion blur effect or other factors, the image lacks aesthetic quality and has poor visual appeal.",
+            "1_point_standard": "The image exhibits high aesthetic quality, with the motion blur effect positively contributing to its visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blur_motion_blur_0001/images.txt b/dataset/image_blur_motion_blur_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d187e923646769ef96581ab1d9367ef3df264fe7
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN019KAsOH1e3K10ieA8H_!!6000000003815-0-tps-1280-853.jpg
diff --git a/dataset/image_blur_motion_blur_0001/instruction.txt b/dataset/image_blur_motion_blur_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84f08513b89ece9a8d184e2a007db894761fe6fb
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0001/instruction.txt
@@ -0,0 +1 @@
+Apply motion blur to this image of the motorcycle rider, blurring the wheels and surrounding road to create a strong sense of speed and motion.
\ No newline at end of file
diff --git a/dataset/image_blur_motion_blur_0001/meta.json b/dataset/image_blur_motion_blur_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..4aa6c170880607c71a5fb8dc0d4509690203671d
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "motion blur",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0070",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_blur_motion_blur_0002/eval.json b/dataset/image_blur_motion_blur_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c70e29ec415ca0f03e2ee01310901fd920b0a7e7
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately apply motion blur only to the specified moving parts without affecting other static elements?",
+            "0_point_standard": "Motion blur is applied to non-moving parts or is missing from specified moving parts, resulting in inaccuracy.",
+            "1_point_standard": "Motion blur is correctly applied only to the moving parts, with static elements remaining unchanged."
+        },
+        {
+            "question": "Does the generated image maintain the overall structure and proportions of the object, ensuring consistency with the original image aside from the motion blur effect?",
+            "0_point_standard": "The overall structure or proportions of the object have been altered, distorting the original image.",
+            "1_point_standard": "Apart from the motion blur effect, the object's structure and proportions remain consistent with the original image."
+        },
+        {
+            "question": "Does the motion blur effect reflect the direction and speed of motion described in the text input?",
+            "0_point_standard": "The motion blur effect does not accurately represent the direction or speed of motion specified in the text input.",
+            "1_point_standard": "The motion blur effect accurately captures the direction and speed of motion described in the text."
+        },
+        {
+            "question": "Does the generated image meet all specific requirements mentioned in the text input, such as the intensity or length of the motion blur effect?",
+            "0_point_standard": "The image fails to meet certain specific requirements regarding the motion blur effect mentioned in the text input.",
+            "1_point_standard": "It fully meets all specific requirements regarding the motion blur effect mentioned in the text input."
+        },
+        {
+            "question": "Does the motion blur effect enhance the visual realism of the image, giving it a natural and dynamic appearance?",
+            "0_point_standard": "The motion blur effect diminishes the realism of the image, making it appear unnatural or awkward.",
+            "1_point_standard": "The motion blur effect enhances the realism of the image, providing a natural and dynamic appearance."
+        },
+        {
+            "question": "Does the overall image, including the motion blur effect, maintain a high aesthetic quality and clear visual appeal?",
+            "0_point_standard": "Due to the motion blur effect or other factors, the image lacks aesthetic quality and has poor visual appeal.",
+            "1_point_standard": "The image exhibits high aesthetic quality, with the motion blur effect positively contributing to its visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blur_motion_blur_0002/images.txt b/dataset/image_blur_motion_blur_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a805f9412fe3809138ff60a9a111ab0068dc058
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01g64mJy1Y4LCWGAI32_!!6000000003005-0-tps-1280-854.jpg
diff --git a/dataset/image_blur_motion_blur_0002/instruction.txt b/dataset/image_blur_motion_blur_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1259e9b95e7517ca76db29e449908c288619afd
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0002/instruction.txt
@@ -0,0 +1 @@
+Apply motion blur to this image of the runner, blurring the legs and background to emphasize the speed and energy of the run.
\ No newline at end of file
diff --git a/dataset/image_blur_motion_blur_0002/meta.json b/dataset/image_blur_motion_blur_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..91270296bbdedf7eb2a765f5792ab2f856678aa1
--- /dev/null
+++ b/dataset/image_blur_motion_blur_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "motion blur",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0070",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_blur_rotation_blur_0002/eval.json b/dataset/image_blur_rotation_blur_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..58fd4f50dac3753c2629cb61cf5648eb6f4eb5f3
--- /dev/null
+++ b/dataset/image_blur_rotation_blur_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified portion of the image display the rotational blur effect specified in the task description?",
+            "0_point_standard": "The rotated part of the image lacks a blur effect, or the blur effect is incorrectly applied.",
+            "1_point_standard": "The rotated part of the image clearly displays the expected rotational blur effect."
+        },
+        {
+            "question": "Does the rest of the image remain unchanged, maintaining its original quality and detail?",
+            "0_point_standard": "The unmodified part of the image shows noticeable changes or degradation.",
+            "1_point_standard": "The unmodified part of the image remains unchanged, retaining its original quality and detail."
+        },
+        {
+            "question": "Is the rotational blur effect well-associated with the original image content, maintaining consistency in style and recognizability?",
+            "0_point_standard": "The rotational blur effect disrupts the overall style or recognizability of the image, causing inconsistency.",
+            "1_point_standard": "The rotational blur effect maintains consistency with the style and recognizability of the original image."
+        },
+        {
+            "question": "Does the rotational blur effect follow any specific instructions in the text description, such as blur intensity or direction?",
+            "0_point_standard": "The blur application does not follow the specific instructions provided in the text description.",
+            "1_point_standard": "The blur application accurately follows the specific instructions provided in the text description."
+        },
+        {
+            "question": "Is the transition between the blurred and non-blurred parts smooth, without creating unnatural boundaries or artifacts?",
+            "0_point_standard": "The transition between the blurred and non-blurred parts is abrupt or contains noticeable artifacts.",
+            "1_point_standard": "The transition between the blurred and non-blurred parts is smooth and natural."
+        },
+        {
+            "question": "After applying the rotational blur, does the image maintain its overall aesthetic, ensuring it meets professional visual standards?",
+            "0_point_standard": "The image lacks aesthetic appeal or does not meet the professional visual standards after modification.",
+            "1_point_standard": "The image maintains good aesthetic appeal and meets professional visual standards after modification."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_blur_rotation_blur_0002/images.txt b/dataset/image_blur_rotation_blur_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60205ba53181a5bc69c2c80a5327b46bbd35d75b
--- /dev/null
+++ b/dataset/image_blur_rotation_blur_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01eptYrs1TybmTWsZzc_!!6000000002451-0-tps-2600-1721.jpg
diff --git a/dataset/image_blur_rotation_blur_0002/instruction.txt b/dataset/image_blur_rotation_blur_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a72a157320f6fea608f54623f7c8d7b8178fa2
--- /dev/null
+++ b/dataset/image_blur_rotation_blur_0002/instruction.txt
@@ -0,0 +1 @@
+Apply rotational blur to the car wheel in this image, blurring the spinning part to create the sensation of the car in motion, while keeping the car body and background in sharp focus.
\ No newline at end of file
diff --git a/dataset/image_blur_rotation_blur_0002/meta.json b/dataset/image_blur_rotation_blur_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e61f8142f121020108eda83a2c4f18b70180ca73
--- /dev/null
+++ b/dataset/image_blur_rotation_blur_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "rotation blur",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0069",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_completion_0002/eval.json b/dataset/image_completion_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c18cf07caa203096b7b7de86533d048468ac6e8
--- /dev/null
+++ b/dataset/image_completion_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the complete image accurately retain the unchanged areas from the original partial image?",
+            "0_point_standard": "The unchanged areas from the original partial image show noticeable alteration or distortion in the complete image.",
+            "1_point_standard": "The unchanged areas from the original partial image are accurately retained in the complete image, with no noticeable changes."
+        },
+        {
+            "question": "Does the complete image maintain a consistent style and features with the original partial image?",
+            "0_point_standard": "The style or features of the complete image are noticeably different from the original partial image.",
+            "1_point_standard": "The complete image maintains a consistent style and features with the original partial image."
+        },
+        {
+            "question": "Does the complete image accurately reflect the specific content requirements described in the text input?",
+            "0_point_standard": "The complete image fails to include the specific content elements described in the text input.",
+            "1_point_standard": "The complete image successfully includes the specific content elements from the text input."
+        },
+        {
+            "question": "Does the complete image follow any style or thematic guidelines specified in the text description?",
+            "0_point_standard": "The complete image does not adhere to the style or thematic guidelines provided in the text description.",
+            "1_point_standard": "The complete image follows the style or thematic guidelines specified in the text description."
+        },
+        {
+            "question": "Does the complete image exhibit a high level of detail and quality in the completed areas?",
+            "0_point_standard": "The completed areas of the image lack detail or are of poor quality compared to the original image.",
+            "1_point_standard": "The completed areas of the image are rich in detail and of high quality, comparable to or better than the original image."
+        },
+        {
+            "question": "Does the complete image exhibit overall aesthetic appeal and coherence, providing a visually pleasing and complete result?",
+            "0_point_standard": "The complete image lacks aesthetic appeal or appears incoherent, with the extended areas looking disjointed.",
+            "1_point_standard": "The complete image is aesthetically pleasing and coherent, with seamless integration of the extended areas."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_completion_0002/images.txt b/dataset/image_completion_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7c0662d9bab362e4a92ceb3b580b1729042beef
--- /dev/null
+++ b/dataset/image_completion_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN018rz9qa1bvPwnM6Ih8_!!6000000003527-0-tps-563-854.jpg
diff --git a/dataset/image_completion_0002/instruction.txt b/dataset/image_completion_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..664642a0b69f2f5762039bc2a9e53113fcce4cc1
--- /dev/null
+++ b/dataset/image_completion_0002/instruction.txt
@@ -0,0 +1 @@
+Please extend the image in all directions to reveal the full body of the person, keeping the current face, hairstyle, and all details unchanged. The input image is a cropped region of the final complete image, and the input section must remain unchanged while seamlessly blending into the extended area. The background should match the style of the person, and the extended region should maintain consistency with the overall visual style.
\ No newline at end of file
diff --git a/dataset/image_completion_0002/meta.json b/dataset/image_completion_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1324ce45c521d498162bf4ae945590c0de7bebba
--- /dev/null
+++ b/dataset/image_completion_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image completion",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0086",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_completion_0003/eval.json b/dataset/image_completion_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e976f24406ffb7f86356ad023e70a563a74b7f77
--- /dev/null
+++ b/dataset/image_completion_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the complete image accurately preserve the unchanged areas of the original partial image?",
+            "0_point_standard": "The unchanged areas of the original partial image show noticeable alterations or distortions in the complete image.",
+            "1_point_standard": "The unchanged areas of the original partial image are accurately preserved in the complete image without any noticeable changes."
+        },
+        {
+            "question": "Does the complete image maintain a consistent style and features with the original partial image?",
+            "0_point_standard": "The style or features of the complete image are noticeably different from the original partial image.",
+            "1_point_standard": "The complete image maintains a consistent style and features with the original partial image."
+        },
+        {
+            "question": "Does the complete image accurately reflect the specific content requirements described in the text input?",
+            "0_point_standard": "The complete image fails to include specific content elements described in the text input.",
+            "1_point_standard": "The complete image successfully includes the specific content elements from the text input."
+        },
+        {
+            "question": "Does the complete image follow any style or theme guidelines specified in the text description?",
+            "0_point_standard": "The complete image does not conform to the style or theme guidelines provided in the text description.",
+            "1_point_standard": "The complete image follows the style or theme guidelines specified in the text description."
+        },
+        {
+            "question": "Does the completed area of the image exhibit a high level of detail and quality?",
+            "0_point_standard": "The completed area of the image lacks detail or is of poor quality compared to the original image.",
+            "1_point_standard": "The completed area of the image is rich in detail and high in quality, comparable to or better than the original image."
+        },
+        {
+            "question": "Does the complete image exhibit overall aesthetics and coherence, providing a visually pleasing and complete result?",
+            "0_point_standard": "The complete image lacks aesthetics or appears incoherent, with the extended areas seeming disjointed.",
+            "1_point_standard": "The complete image is aesthetically pleasing and coherent, with the extended areas seamlessly integrated."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_completion_0003/images.txt b/dataset/image_completion_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e30eef4ac6fa5be959fbb8e8f39baf0ffced249e
--- /dev/null
+++ b/dataset/image_completion_0003/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01WQpZqe1ESo2gt1aJq_!!6000000000351-0-tps-329-626.jpg
diff --git a/dataset/image_completion_0003/instruction.txt b/dataset/image_completion_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..664642a0b69f2f5762039bc2a9e53113fcce4cc1
--- /dev/null
+++ b/dataset/image_completion_0003/instruction.txt
@@ -0,0 +1 @@
+Please extend the image in all directions to reveal the full body of the person, keeping the current face, hairstyle, and all details unchanged. The input image is a cropped region of the final complete image, and the input section must remain unchanged while seamlessly blending into the extended area. The background should match the style of the person, and the extended region should maintain consistency with the overall visual style.
\ No newline at end of file
diff --git a/dataset/image_completion_0003/meta.json b/dataset/image_completion_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..246f9ba7065cbdcf742a57ccbd4524d4b2d2252d
--- /dev/null
+++ b/dataset/image_completion_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image completion",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0086",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/image_completion_0004/eval.json b/dataset/image_completion_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0c169d5ff83e360ef27e17c5048e40713627f88c
--- /dev/null
+++ b/dataset/image_completion_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the complete image accurately retain the unchanged areas of the original partial image?",
+            "0_point_standard": "The unchanged areas of the original partial image show noticeable changes or distortion in the complete image.",
+            "1_point_standard": "The unchanged areas of the original partial image are accurately retained in the complete image without any noticeable changes."
+        },
+        {
+            "question": "Does the complete image maintain consistency in style and features with the original partial image?",
+            "0_point_standard": "The style or features of the complete image are noticeably different from the original partial image.",
+            "1_point_standard": "The complete image maintains consistency in style and features with the original partial image."
+        },
+        {
+            "question": "Does the complete image accurately reflect the specific content requirements described in the text input?",
+            "0_point_standard": "The complete image fails to include the specific content elements described in the text input.",
+            "1_point_standard": "The complete image successfully includes the specific content elements from the text input."
+        },
+        {
+            "question": "Does the complete image follow any style or theme guidelines specified in the text description?",
+            "0_point_standard": "The complete image does not conform to the style or theme guidelines provided in the text description.",
+            "1_point_standard": "The complete image follows the style or theme guidelines specified in the text description."
+        },
+        {
+            "question": "Does the complete image exhibit a high level of detail and quality in the completed areas?",
+            "0_point_standard": "The completed areas of the image lack detail or are of poor quality compared to the original image.",
+            "1_point_standard": "The completed areas of the image are rich in detail, high in quality, and comparable to or better than the original image."
+        },
+        {
+            "question": "Does the complete image exhibit overall aesthetic appeal and coherence, providing a visually pleasing and complete result?",
+            "0_point_standard": "The complete image lacks aesthetic appeal or appears incoherent, with the extended areas looking disjointed.",
+            "1_point_standard": "The complete image is aesthetically pleasing and coherent, with the extended areas seamlessly integrated."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_completion_0004/images.txt b/dataset/image_completion_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee56f38d5f5fd2f14fa01014fa5cac82a05f8065
--- /dev/null
+++ b/dataset/image_completion_0004/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN016UBHZ71PQTntdqBkq_!!6000000001835-0-tps-564-762.jpg
diff --git a/dataset/image_completion_0004/instruction.txt b/dataset/image_completion_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..664642a0b69f2f5762039bc2a9e53113fcce4cc1
--- /dev/null
+++ b/dataset/image_completion_0004/instruction.txt
@@ -0,0 +1 @@
+Please extend the image in all directions to reveal the full body of the person, keeping the current face, hairstyle, and all details unchanged. The input image is a cropped region of the final complete image, and the input section must remain unchanged while seamlessly blending into the extended area. The background should match the style of the person, and the extended region should maintain consistency with the overall visual style.
\ No newline at end of file
diff --git a/dataset/image_completion_0004/meta.json b/dataset/image_completion_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..30b38452638de76ae80869988fe8da744d44579e
--- /dev/null
+++ b/dataset/image_completion_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image completion",
+    "num_of_cases": 4,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0086",
+    "output_image_count": 1,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/image_retouching_landscape_photo_retouching_0001/eval.json b/dataset/image_retouching_landscape_photo_retouching_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e52d56d815ddbb88ef6e4c444913f46ed19885f5
--- /dev/null
+++ b/dataset/image_retouching_landscape_photo_retouching_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the retouched landscape photo, with unspecified areas, retain the original composition and elements, ensuring that only the specified parts are changed?",
+            "0_point_standard": "The areas not specified in the image show changes or inconsistencies compared to the original image.",
+            "1_point_standard": "The unspecified areas remain unchanged, retaining the original composition and elements."
+        },
+        {
+            "question": "Does the retouched landscape photo retain the overall style and features of the original photo, ensuring seamless integration of the modifications?",
+            "0_point_standard": "The style or features of the original photo have been significantly altered, resulting in a disharmonious appearance.",
+            "1_point_standard": "The style and features of the original photo are retained, and the modifications are seamlessly integrated."
+        },
+        {
+            "question": "Do the modifications in the retouched landscape photo accurately reflect the specific instructions provided in the text description?",
+            "0_point_standard": "The modifications do not match the instructions or fail to meet the specified requirements.",
+            "1_point_standard": "The modifications accurately reflect the instructions and meet the specified requirements."
+        },
+        {
+            "question": "Are all elements specified in the text description, such as changes in color, lighting, or specific details, accurately represented in the retouched landscape photo?",
+            "0_point_standard": "Certain specified elements are missing or not accurately represented in the retouched photo.",
+            "1_point_standard": "All specified elements are accurately represented, with no omissions or major deviations."
+        },
+        {
+            "question": "Does the retouched landscape photo exhibit high-quality editing with smooth transitions, precise adjustments, and no visible flaws?",
+            "0_point_standard": "The editing quality is poor, with visible flaws, harsh transitions, or imprecise adjustments.",
+            "1_point_standard": "The editing quality is high, with smooth transitions, precise adjustments, and no visible flaws."
+        },
+        {
+            "question": "Does the retouched landscape photo have enhanced aesthetic appeal, providing a visually pleasing image of professional quality?",
+            "0_point_standard": "The retouched photo lacks aesthetic appeal and does not meet professional quality standards.",
+            "1_point_standard": "The retouched photo has strong aesthetic appeal, meets professional quality standards, and provides a visually pleasing image."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_retouching_landscape_photo_retouching_0001/images.txt b/dataset/image_retouching_landscape_photo_retouching_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfd6291df5a251d34663c4102d32625ee6f0bdd7
--- /dev/null
+++ b/dataset/image_retouching_landscape_photo_retouching_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01fCNx2U1YQmblcpcol_!!6000000003054-0-tps-1000-667.jpg
diff --git a/dataset/image_retouching_landscape_photo_retouching_0001/instruction.txt b/dataset/image_retouching_landscape_photo_retouching_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2638e5804c08077a7e180e7e953eff5a6e3fcb1
--- /dev/null
+++ b/dataset/image_retouching_landscape_photo_retouching_0001/instruction.txt
@@ -0,0 +1 @@
+Adjust the color tones of this grassland landscape image to give it a warmer feel, enhancing the sense of sunset.
\ No newline at end of file
diff --git a/dataset/image_retouching_landscape_photo_retouching_0001/meta.json b/dataset/image_retouching_landscape_photo_retouching_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ca7eb7a233e67618ff493d447611398431c3723
--- /dev/null
+++ b/dataset/image_retouching_landscape_photo_retouching_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "landscape photo retouching",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0053",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_retouching_portrait_photo_retouching_0002/eval.json b/dataset/image_retouching_portrait_photo_retouching_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e13f7f139dab084addf9e0a37bcd26ddd0b5ec68
--- /dev/null
+++ b/dataset/image_retouching_portrait_photo_retouching_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the retouched portrait retain the identity and main facial features of the person in the original image?",
+            "0_point_standard": "The identity or main facial features of the person have been altered to the extent that they are unrecognizable.",
+            "1_point_standard": "The identity and main facial features of the person are retained, ensuring they are clearly recognizable."
+        },
+        {
+            "question": "Has the rest of the image remained unchanged except for the specified retouched areas?",
+            "0_point_standard": "There are noticeable changes or modifications in parts of the image that were not specified for retouching.",
+            "1_point_standard": "Only the specified areas have been modified, with no unexpected changes to the rest of the image."
+        },
+        {
+            "question": "Does the retouched image accurately meet the specific modification requirements described in the text (e.g., smoother skin, adjusted lighting)?",
+            "0_point_standard": "The specified modifications in the text description were not completed or were executed inaccurately.",
+            "1_point_standard": "The retouched image accurately reflects the modification requirements detailed in the text description."
+        },
+        {
+            "question": "Is the retouching style consistent with the guidelines provided in the text description (e.g., natural enhancement vs. dramatic enhancement)?",
+            "0_point_standard": "The retouching style is inconsistent with the provided guidelines, leading to inconsistent or unexpected results.",
+            "1_point_standard": "The retouching style is consistent with the provided guidelines, meeting expected results."
+        },
+        {
+            "question": "Does the retouched portrait maintain consistent and natural skin tone and texture across the entire modified area?",
+            "0_point_standard": "There are noticeable inconsistencies in skin tone or texture in the retouched area, leading to an unnatural or uneven appearance.",
+            "1_point_standard": "The skin tone and texture in the retouched area are consistent and natural, seamlessly blending with the surrounding areas."
+        },
+        {
+            "question": "Does the overall retouched image exhibit high aesthetic quality and enhance the visual appeal of the portrait through balanced modifications?",
+            "0_point_standard": "The retouched image lacks aesthetic appeal, and the modifications have diminished the visual quality.",
+            "1_point_standard": "The retouched image exhibits high aesthetic quality, with modifications enhancing its visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_retouching_portrait_photo_retouching_0002/images.txt b/dataset/image_retouching_portrait_photo_retouching_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c0e4def4f7afe38d3c54146894e172f0beefd1
--- /dev/null
+++ b/dataset/image_retouching_portrait_photo_retouching_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01n6AnjY1zTWbUJ9g3f_!!6000000006715-0-tps-1280-1903.jpg
diff --git a/dataset/image_retouching_portrait_photo_retouching_0002/instruction.txt b/dataset/image_retouching_portrait_photo_retouching_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d610e04f39acaa4dac1911ceb8d640a454a8c1f
--- /dev/null
+++ b/dataset/image_retouching_portrait_photo_retouching_0002/instruction.txt
@@ -0,0 +1 @@
+Edit this bright, smiling portrait to enhance the vibrancy of the smile and emphasize the shine around the eyes, making the image more lively and radiant.
\ No newline at end of file
diff --git a/dataset/image_retouching_portrait_photo_retouching_0002/meta.json b/dataset/image_retouching_portrait_photo_retouching_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..eeb06fbd56450fd06123e6aaec465982a1ce3027
--- /dev/null
+++ b/dataset/image_retouching_portrait_photo_retouching_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "portrait photo retouching",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0052",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_clipping_straighten_0001/eval.json b/dataset/image_straighten_clipping_straighten_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ccc127c4c50d2698a7a8742d1427c029d44c5eb0
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Has the corrected image achieved the correct horizontal alignment, ensuring all skewed elements have been straightened according to task requirements?",
+            "0_point_standard": "The image is still noticeably skewed or misaligned after correction.",
+            "1_point_standard": "The image is correctly straightened, with horizontal elements aligned as expected."
+        },
+        {
+            "question": "Have the excess edges been correctly trimmed, ensuring there are no unnecessary parts extending beyond the intended frame?",
+            "0_point_standard": "Excess edges remain or the cropping is uneven or incorrect.",
+            "1_point_standard": "The excess edges are neatly and accurately trimmed, maintaining a clean image border."
+        },
+        {
+            "question": "Apart from the corrected elements, does the rest of the image remain unchanged and consistent with the original input in terms of content and style?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image that were not intended to be corrected.",
+            "1_point_standard": "The unaltered parts of the image remain consistent in content and style with the original image."
+        },
+        {
+            "question": "Does the corrected image maintain consistent sharpness and resolution, especially in areas affected by straightening or cropping adjustments?",
+            "0_point_standard": "The image appears blurry or has reduced resolution in areas affected by straightening or cropping, affecting overall clarity.",
+            "1_point_standard": "The image maintains consistent sharpness and resolution even in areas affected by straightening or cropping adjustments."
+        },
+        {
+            "question": "Are the corrected edges cleanly and seamlessly integrated, with no visible artifacts or distortions at the edges or boundaries?",
+            "0_point_standard": "There are visible artifacts or distortions at the corrected edges or boundaries, reducing the clean appearance of the image.",
+            "1_point_standard": "The corrected edges are clean and seamless, with no visible artifacts or distortions, giving the image a refined and elegant appearance."
+        },
+        {
+            "question": "Does the final image maintain a professional aesthetic quality, with balanced composition and clarity, after straightening and cropping?",
+            "0_point_standard": "The image lacks professional aesthetic quality, appearing unbalanced or unclear.",
+            "1_point_standard": "The image maintains a high level of professional aesthetic quality, with balanced composition and clarity."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_clipping_straighten_0001/images.txt b/dataset/image_straighten_clipping_straighten_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..169e81b38b9caadc70191310dcc15624e60b2e10
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN012ndBhW2AGfG2gimK0_!!6000000008176-0-tps-1566-1036.jpg
diff --git a/dataset/image_straighten_clipping_straighten_0001/instruction.txt b/dataset/image_straighten_clipping_straighten_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cab8e306fd3ed28f919ec05352efbdcb1bf3eaca
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0001/instruction.txt
@@ -0,0 +1 @@
+Straighten the image so that the horizon in the scene becomes level, then crop away any excess edges after the correction, retaining the core content of the image.
\ No newline at end of file
diff --git a/dataset/image_straighten_clipping_straighten_0001/meta.json b/dataset/image_straighten_clipping_straighten_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b11eba337c151bf1d9fb312d70469eea43e14976
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image straighten with clipping",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0091",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_clipping_straighten_0002/eval.json b/dataset/image_straighten_clipping_straighten_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c538ad4689400b67260d64600d51845a19a2c8d
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the corrected image achieve proper horizontal alignment, ensuring all tilted elements are straightened as per task requirements?",
+            "0_point_standard": "The image remains noticeably tilted or misaligned after correction.",
+            "1_point_standard": "The image is properly straightened, with horizontal elements aligned as expected."
+        },
+        {
+            "question": "Have the extra edges been correctly cropped, ensuring no unnecessary parts extend beyond the intended frame?",
+            "0_point_standard": "There are still extra edges, or the cropping is uneven or incorrect.",
+            "1_point_standard": "Extra edges are neatly and accurately cropped, maintaining a clean image boundary."
+        },
+        {
+            "question": "Apart from the corrected elements, does the rest of the image remain unchanged, consistent with the original input in terms of content and style?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image not intended for correction.",
+            "1_point_standard": "Unchanged parts of the image remain consistent with the original in terms of content and style."
+        },
+        {
+            "question": "Does the corrected image maintain consistent sharpness and resolution, especially in areas affected by straightening or cropping adjustments?",
+            "0_point_standard": "The image appears blurry or reduced in resolution in areas affected by straightening or cropping, impacting overall clarity.",
+            "1_point_standard": "The image maintains consistent sharpness and resolution even in areas affected by straightening or cropping adjustments."
+        },
+        {
+            "question": "Are the corrected edges cleanly and seamlessly integrated, with no visible artifacts or distortions at the edges or boundaries?",
+            "0_point_standard": "Visible artifacts or distortions are present at the edges or boundaries where corrections were applied, reducing the clean appearance of the image.",
+            "1_point_standard": "The corrected edges are clean and seamless, with no noticeable artifacts or distortions, giving the image an elegantly refined appearance."
+        },
+        {
+            "question": "Does the final image maintain a professional aesthetic quality after straightening and cropping, with balanced composition and clarity?",
+            "0_point_standard": "The image lacks professional aesthetic quality, appearing unbalanced or unclear.",
+            "1_point_standard": "The image maintains a high level of professional aesthetic quality, with balanced composition and clarity."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_clipping_straighten_0002/images.txt b/dataset/image_straighten_clipping_straighten_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6759a91fee37a7ee0bc8f131bac47f4048c41fad
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN019UlZAU1nNJqdEKBzA_!!6000000005077-0-tps-1200-753.jpg
diff --git a/dataset/image_straighten_clipping_straighten_0002/instruction.txt b/dataset/image_straighten_clipping_straighten_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a413d2344399b1a810782b6ede244de83a4e3b8
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0002/instruction.txt
@@ -0,0 +1 @@
+Straighten the image so that the reflection of the water and iceberg remains level, then crop any excess edges after the correction, ensuring the subject is centered.
\ No newline at end of file
diff --git a/dataset/image_straighten_clipping_straighten_0002/meta.json b/dataset/image_straighten_clipping_straighten_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..79ac904e8be6b02e2a2765e14c6d2328a38ebf7b
--- /dev/null
+++ b/dataset/image_straighten_clipping_straighten_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image straighten with clipping",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0091",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_completing_straighten_0001/eval.json b/dataset/image_straighten_completing_straighten_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..eae6c96284cd3b46e3bcf2630cbe139dc6b7bef3
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the adjusted image maintain the original horizontal orientation and alignment as required by the task?",
+            "0_point_standard": "The image is not properly straightened, showing a noticeable tilt or misalignment relative to the horizontal axis.",
+            "1_point_standard": "The image is properly straightened, with the horizontal alignment accurately corrected as expected."
+        },
+        {
+            "question": "Do the newly completed edges seamlessly integrate with the existing parts of the image, maintaining consistency in style and content?",
+            "0_point_standard": "The completed edges appear inconsistent or noticeably different from the rest of the image, disrupting visual harmony.",
+            "1_point_standard": "The completed edges seamlessly integrate, maintaining consistent style and content with the original image."
+        },
+        {
+            "question": "Does the rest of the image remain unchanged, preserving the original details and elements, with only the necessary parts modified?",
+            "0_point_standard": "Unnecessary modifications or changes have been made to parts of the image that should have remained unchanged.",
+            "1_point_standard": "The parts of the image not involved in correction remain unchanged, preserving the original details and elements."
+        },
+        {
+            "question": "Do the colors and lighting of the newly completed edges match the original parts of the image, avoiding any noticeable mismatches?",
+            "0_point_standard": "The colors or lighting of the completed edges differ noticeably from the original image, creating a mismatch or incoherent appearance.",
+            "1_point_standard": "The colors and lighting of the completed edges match the original image, blending naturally and maintaining a cohesive appearance."
+        },
+        {
+            "question": "Does the completed image maintain a natural perspective and proportional balance, avoiding any distortions due to straightening or edge adjustments?",
+            "0_point_standard": "The image shows perspective or proportional distortion after adjustments, making it appear unnatural or misaligned.",
+            "1_point_standard": "The image maintains a natural perspective and proportional balance, with the adjustments enhancing realism and visual harmony."
+        },
+        {
+            "question": "Does the modified overall image retain a high aesthetic quality with balanced composition and visual appeal?",
+            "0_point_standard": "The image lacks visual appeal, with poor composition or quality after modifications.",
+            "1_point_standard": "The image retains high aesthetic quality and visual appeal, with a balanced and attractive composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_completing_straighten_0001/images.txt b/dataset/image_straighten_completing_straighten_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91f90b9bbc3750bfb94bfc2613853018818c6f51
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN0119SbLl1fXNcdINauA_!!6000000004016-0-tps-1566-1036.jpg
diff --git a/dataset/image_straighten_completing_straighten_0001/instruction.txt b/dataset/image_straighten_completing_straighten_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca774d213fc4a344bb42415b6826ed60714e62e
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0001/instruction.txt
@@ -0,0 +1 @@
+Straighten the image so that the horizon in the scene becomes level, and after the rotation, fill in the image to ensure the final image retains a complete rectangular shape without losing any content.
\ No newline at end of file
diff --git a/dataset/image_straighten_completing_straighten_0001/meta.json b/dataset/image_straighten_completing_straighten_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ce29576ca2692c6ef2b27ea978c0c55c73ebb9ca
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image straighten with completing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0090",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_completing_straighten_0002/eval.json b/dataset/image_straighten_completing_straighten_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ad0a552b960bb4235ee0c0d4c1e9d0fe5319210
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the adjusted image maintain the original horizontal direction and alignment as required by the task?",
+            "0_point_standard": "The image is not properly straightened, showing a noticeable tilt or misalignment relative to the horizontal axis.",
+            "1_point_standard": "The image is properly straightened, with the horizontal alignment accurately corrected as expected."
+        },
+        {
+            "question": "Do the newly completed edges seamlessly integrate with the existing parts of the image and maintain consistency in style and content?",
+            "0_point_standard": "The completed edges appear inconsistent or noticeably different from the rest of the image, disrupting visual harmony.",
+            "1_point_standard": "The completed edges are seamlessly integrated, maintaining a consistent style and content with the original image."
+        },
+        {
+            "question": "Does the rest of the image remain unchanged, retaining the original details and elements, with only the necessary parts modified?",
+            "0_point_standard": "Unnecessary modifications or changes were made to parts of the image that should remain unchanged.",
+            "1_point_standard": "The parts of the image not involved in the correction remain unchanged, retaining the original details and elements."
+        },
+        {
+            "question": "Are the color and lighting of the newly completed edges consistent with the original parts of the image, avoiding any noticeable mismatch?",
+            "0_point_standard": "The color or lighting of the completed edges is noticeably different from the original image, causing a mismatch or incoherent appearance.",
+            "1_point_standard": "The color and lighting of the completed edges are consistent with the original image, naturally blending and maintaining a cohesive appearance."
+        },
+        {
+            "question": "Does the completed image maintain a natural perspective and proportional balance, avoiding any distortion due to straightening or edge adjustments?",
+            "0_point_standard": "The image exhibits perspective or proportional distortion after adjustments, making it appear unnatural or misaligned.",
+            "1_point_standard": "The image maintains a natural perspective and proportional balance, with adjustments enhancing realism and visual harmony."
+        },
+        {
+            "question": "Does the modified overall image maintain a high aesthetic quality with balanced composition and visual appeal?",
+            "0_point_standard": "The image lacks visual appeal, with poor composition or quality after modifications.",
+            "1_point_standard": "The image maintains high aesthetic quality and visual appeal, with a balanced and attractive composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_straighten_completing_straighten_0002/images.txt b/dataset/image_straighten_completing_straighten_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcef06847bab045e8921720bede49afe75f1f419
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01WS0wm21UK8OcjwRoV_!!6000000002498-0-tps-1200-753.jpg
diff --git a/dataset/image_straighten_completing_straighten_0002/instruction.txt b/dataset/image_straighten_completing_straighten_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e007286558d09add33aa2c123533e3ddfc1f0eaa
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0002/instruction.txt
@@ -0,0 +1 @@
+Straighten the image so that the reflection of the water and iceberg remains level, and after the rotation, fill in the image to ensure the final image retains a complete rectangular shape, with the iceberg still centered.
\ No newline at end of file
diff --git a/dataset/image_straighten_completing_straighten_0002/meta.json b/dataset/image_straighten_completing_straighten_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..53ddfff967afcaee6896730dc9a39adc3b1bf066
--- /dev/null
+++ b/dataset/image_straighten_completing_straighten_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "image straighten with completing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0090",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_digital_makeup_0001/eval.json b/dataset/image_transfer_digital_makeup_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..050a47e282af8bc84f4ce14b46d80d9521553e87
--- /dev/null
+++ b/dataset/image_transfer_digital_makeup_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the model ensure that the digital makeup applied to the character in image A matches the makeup style of the character in image B?",
+            "0_point_standard": "The makeup style applied to image A significantly deviates from the makeup style in image B, with noticeable differences in color, intensity, or pattern.",
+            "1_point_standard": "The makeup style applied to image A closely matches the makeup style in image B, with precise matching in color, intensity, and pattern."
+        },
+        {
+            "question": "When applying the makeup from image B, does the model retain the original features and identity of the character in image A?",
+            "0_point_standard": "The makeup application alters key facial features or identity of the character in image A, making them unrecognizable or significantly different.",
+            "1_point_standard": "The model retains the original facial features and identity of the character in image A, ensuring they remain recognizable after the makeup application."
+        },
+        {
+            "question": "Does the model accurately interpret and apply specific makeup elements from image B (such as lipstick, eyeshadow, or blush) to image A?",
+            "0_point_standard": "There are noticeable errors in interpreting and applying specific makeup elements, such as mismatched color or placement.",
+            "1_point_standard": "The model accurately interprets and applies all specified makeup elements, with color and placement matching from image B to image A."
+        },
+        {
+            "question": "Does the digital makeup transformation only affect the intended makeup areas of image A, leaving other parts of the image unchanged?",
+            "0_point_standard": "The model inadvertently alters other areas of image A that were not meant for makeup application, affecting the overall consistency of the image.",
+            "1_point_standard": "The digital makeup transformation is confined to the intended areas, leaving the rest of image A unchanged and consistent."
+        },
+        {
+            "question": "Does the digital makeup application enhance the aesthetic quality of image A, meeting professional makeup standards?",
+            "0_point_standard": "The makeup application lacks polish or appears unprofessional, with issues such as uneven application or unnatural appearance.",
+            "1_point_standard": "The makeup application is aesthetically pleasing, smoothly and evenly applied, meeting professional makeup standards."
+        },
+        {
+            "question": "Is the digital makeup applied to image A finely detailed, with high-quality rendering in terms of edges, blending, and texture?",
+            "0_point_standard": "The makeup details are coarse, with noticeable defects or blurriness in edges, blending, or texture.",
+            "1_point_standard": "The makeup is finely detailed, with clear edges, seamless blending, and realistic texture, showcasing high-quality rendering."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_digital_makeup_0001/images.txt b/dataset/image_transfer_digital_makeup_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33f4a61e5fd6eb1d6a8e596d3fb612474ee66b81
--- /dev/null
+++ b/dataset/image_transfer_digital_makeup_0001/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i2/O1CN01V0i85B1x8L3Kqs3PA_!!6000000006398-0-tps-2000-3001.jpg
+https://img.alicdn.com/imgextra/i2/O1CN019oXS5d1JYMvqwPswC_!!6000000001040-0-tps-2730-2048.jpg
diff --git a/dataset/image_transfer_digital_makeup_0001/instruction.txt b/dataset/image_transfer_digital_makeup_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1b02420094447d2c1df2676e1b8f7bedccda831
--- /dev/null
+++ b/dataset/image_transfer_digital_makeup_0001/instruction.txt
@@ -0,0 +1 @@
+Please transfer the makeup from the second image onto the face of the girl in the first image. The goal is to keep the expression, posture, and other elements of the first image unchanged while only applying the makeup to her face. Ensure that details like eyeshadow, lipstick, and blush are accurately reflected on the face in the first image, making it look natural and well-integrated.
\ No newline at end of file
diff --git a/dataset/image_transfer_digital_makeup_0001/meta.json b/dataset/image_transfer_digital_makeup_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..16bfefb7aab319eeabb8bc8da44f5dca09e85689
--- /dev/null
+++ b/dataset/image_transfer_digital_makeup_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "digital makeup",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0092",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_id_transfer_0002/eval.json b/dataset/image_transfer_id_transfer_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5266aace2aea5ac46e06f29168c1ab704a34a328
--- /dev/null
+++ b/dataset/image_transfer_id_transfer_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the identity replaced in the output image match the person specified in image B?",
+            "0_point_standard": "The identity replaced in the output image is not similar to the person specified in image B, with noticeable differences in facial features or overall appearance.",
+            "1_point_standard": "The identity replaced in the output image is very similar to the person specified in image B, accurately matching facial features and overall appearance."
+        },
+        {
+            "question": "Aside from the specified identity transfer, does the rest of image A remain unchanged?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of image A that were not intended to be modified, affecting the overall integrity of the image.",
+            "1_point_standard": "Aside from the specified identity transfer, the rest of image A remains unchanged, with the environment and other elements intact."
+        },
+        {
+            "question": "Does the output image maintain logical consistency with image A in terms of lighting, shadows, and perspective?",
+            "0_point_standard": "The replaced identity has inconsistencies in lighting, shadows, or perspective compared to the rest of image A, resulting in an unnatural appearance.",
+            "1_point_standard": "The replaced identity seamlessly blends with the lighting, shadows, and perspective of image A, maintaining a naturally coherent appearance."
+        },
+        {
+            "question": "Does the modification meet the specific requirements of the text description (e.g., expression, pose)?",
+            "0_point_standard": "The model fails to incorporate specific details from the text description (e.g., facial expression or pose) into the replaced identity.",
+            "1_point_standard": "The model accurately incorporates specific details from the text description into the replaced identity, fulfilling the task requirements."
+        },
+        {
+            "question": "Does the skin tone and texture of the replaced identity naturally blend with the surrounding area, ensuring a seamless transition?",
+            "0_point_standard": "The skin tone or texture of the replaced identity is noticeably different or poorly blended, causing a clear disconnect with the surrounding area.",
+            "1_point_standard": "The skin tone and texture of the replaced identity naturally blend with the surrounding area, achieving a seamless and realistic transition."
+        },
+        {
+            "question": "Is the output image clearly derived from the input image, maintaining visual and stylistic connections?",
+            "0_point_standard": "The output image appears unrelated, lacking clear visual or stylistic connections with the input image.",
+            "1_point_standard": "The output image maintains clear visual and stylistic connections with the input image, reflecting the intended identity transfer while preserving the overall style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_id_transfer_0002/images.txt b/dataset/image_transfer_id_transfer_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33107be43e6b1595459691f2ec34fc2e42fe1e19
--- /dev/null
+++ b/dataset/image_transfer_id_transfer_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i1/O1CN01CWicmL1u1WJEjCAMO_!!6000000005977-0-tps-6000-4000.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01drT1Lz1vhwbzcGX4k_!!6000000006205-0-tps-3340-5010.jpg
diff --git a/dataset/image_transfer_id_transfer_0002/instruction.txt b/dataset/image_transfer_id_transfer_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2227f4137b92ab64455ff5e58eebbee634e5c765
--- /dev/null
+++ b/dataset/image_transfer_id_transfer_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate an image by replacing the boy on the far left in the first image with the little girl from the second image, while keeping all other family members and background elements unchanged. The generated image should make appropriate adjustments to the size, angle, or lighting to ensure that the girl's posture and expression blend naturally into the scene, matching the family setting.
\ No newline at end of file
diff --git a/dataset/image_transfer_id_transfer_0002/meta.json b/dataset/image_transfer_id_transfer_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..31f6f3fc13ada331583846cca45163a61c8ddf7c
--- /dev/null
+++ b/dataset/image_transfer_id_transfer_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "ID transfer",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0094",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_light_transfer_0001/eval.json b/dataset/image_transfer_light_transfer_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7ec3d703b7d02e0b118b622643d0c38001b28d73
--- /dev/null
+++ b/dataset/image_transfer_light_transfer_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the model-generated output maintain the overall structure and content of image A while applying the lighting effects of image B?",
+            "0_point_standard": "The output image significantly alters the main structure or content of image A, deviating from the original depiction.",
+            "1_point_standard": "The output image retains the overall structure and content of image A, only changing the described lighting."
+        },
+        {
+            "question": "Does the lighting effect from image B accurately transfer to image A without introducing irrelevant lighting artifacts?",
+            "0_point_standard": "The lighting effect in the output image is not similar to the lighting in image B or introduces irrelevant lighting artifacts.",
+            "1_point_standard": "The lighting effect from image B is accurately applied to image A without introducing irrelevant effects."
+        },
+        {
+            "question": "Aside from lighting modifications, does the model output keep the other elements of image A unchanged?",
+            "0_point_standard": "In the output image, elements of image A, aside from lighting, are altered or modified.",
+            "1_point_standard": "In the output image, all elements of image A, aside from lighting, remain unchanged."
+        },
+        {
+            "question": "If the text description specifies particular lighting details (e.g., direction, intensity), are these details accurately realized in the output image?",
+            "0_point_standard": "The output image fails to reflect the specific lighting details mentioned in the text description.",
+            "1_point_standard": "The output image accurately realizes the specific lighting details according to the text description."
+        },
+        {
+            "question": "Do the shadows and highlights produced by the transferred lighting effect align with the shapes and surfaces in image A, enhancing the realism of the scene?",
+            "0_point_standard": "The shadows and highlights appear inconsistent with the shapes or surfaces in image A, leading to an unrealistic or incoherent appearance.",
+            "1_point_standard": "The shadows and highlights naturally align with the shapes and surfaces in image A, creating a realistic and coherent lighting effect."
+        },
+        {
+            "question": "Does the output image lack obvious technical defects, such as noise, blur, or artifacts, resulting from the lighting transfer?",
+            "0_point_standard": "The output image displays obvious technical defects, such as noise, blur, or artifacts, due to the lighting transfer.",
+            "1_point_standard": "The output image is free of technical defects, maintaining high-quality rendering throughout the lighting transfer process."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_light_transfer_0001/images.txt b/dataset/image_transfer_light_transfer_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e18853a703ad42d5196078264ad6ce3df265295
--- /dev/null
+++ b/dataset/image_transfer_light_transfer_0001/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i1/O1CN01kqVIwa1fVY2W8M7xg_!!6000000004012-0-tps-4442-6663.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01YIICCo21TBXiISx1b_!!6000000006985-0-tps-5472-3648.jpg
diff --git a/dataset/image_transfer_light_transfer_0001/instruction.txt b/dataset/image_transfer_light_transfer_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cd77a4ba9575969ec0c81deb7ae2679b12ab484
--- /dev/null
+++ b/dataset/image_transfer_light_transfer_0001/instruction.txt
@@ -0,0 +1 @@
+Transfer the sunset lighting conditions from the second image into the scene of the first image, while keeping the primary elements (such as the beach and lifeguard tower) unchanged. During the transfer, preserve the composition and details of the first image as much as possible. The lighting should reflect the warm tones of sunset, and slight adjustments to the sky color and shadows may be made to accommodate the new lighting effect.
\ No newline at end of file
diff --git a/dataset/image_transfer_light_transfer_0001/meta.json b/dataset/image_transfer_light_transfer_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e8e92db6194bea21046a548bbc7acbec2e35a10
--- /dev/null
+++ b/dataset/image_transfer_light_transfer_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "light transfer",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0093",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_posture_transfer_0001/eval.json b/dataset/image_transfer_posture_transfer_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0dcdc8690bc721005833b9e76bf78df317b97491
--- /dev/null
+++ b/dataset/image_transfer_posture_transfer_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the model output maintain the identity of the character from Image A while adopting the pose from Image B?",
+            "0_point_standard": "The identity or recognizable features of the character in the output image significantly deviate from those in Image A.",
+            "1_point_standard": "The character in the output image retains the identity and recognizable features of the character in Image A while adopting the new pose."
+        },
+        {
+            "question": "Does the character in the output image accurately adopt the pose from Image B?",
+            "0_point_standard": "The pose of the character in the output image fails to reflect the pose of the character in Image B, with noticeable differences in limb positioning or body orientation.",
+            "1_point_standard": "The character in the output image accurately reflects the pose of the character in Image B, with correct limb positioning and body orientation."
+        },
+        {
+            "question": "Does the model ensure that the background and other non-character content from Image A remain unchanged in the output image?",
+            "0_point_standard": "There are noticeable changes or inconsistencies in the background or other non-character elements from Image A in the output image.",
+            "1_point_standard": "The background and other non-character content from Image A are preserved in the output image without significant changes."
+        },
+        {
+            "question": "Is the transition of the pose from Image B to the character in Image A logical, maintaining the natural anatomical structure of the character?",
+            "0_point_standard": "The pose transfer results in anatomical inconsistencies or unnatural body postures, disrupting the logical flow of the character's form.",
+            "1_point_standard": "The pose transfer is logical, maintaining the natural anatomical structure and coherent body posture of the character."
+        },
+        {
+            "question": "Does the output image maintain high-quality rendering, especially in areas where the pose has been modified, ensuring clarity and sharpness?",
+            "0_point_standard": "The areas of pose modification in the output image are blurry or exhibit rendering artifacts, reducing overall quality.",
+            "1_point_standard": "The pose-modified areas are rendered clearly and sharply, maintaining high-quality visual output."
+        },
+        {
+            "question": "Is the style consistency between input Image A and the output image maintained, ensuring a seamless visual transition?",
+            "0_point_standard": "The output image exhibits stylistic differences or inconsistencies compared to Image A, causing a disjointed visual effect.",
+            "1_point_standard": "The output image maintains the stylistic elements of Image A, ensuring a seamless and consistent visual transition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_posture_transfer_0001/images.txt b/dataset/image_transfer_posture_transfer_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95743b077c08be82b5f780b87441db9985d2b5f9
--- /dev/null
+++ b/dataset/image_transfer_posture_transfer_0001/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i4/O1CN01fCkDxE1icpB9fx3S2_!!6000000004434-0-tps-1792-2304.jpg
+https://img.alicdn.com/imgextra/i4/O1CN019RLYwk1ypg9DK6xmD_!!6000000006628-0-tps-2592-3888.jpg
diff --git a/dataset/image_transfer_posture_transfer_0001/instruction.txt b/dataset/image_transfer_posture_transfer_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdcadb792e287bf190491da62ab2241748cc2294
--- /dev/null
+++ b/dataset/image_transfer_posture_transfer_0001/instruction.txt
@@ -0,0 +1 @@
+The goal is to transfer the pose (action) from the second image to the person in the first image, keeping the other elements in the first image as unchanged as possible. While transferring the action, slight adjustments to the character's body posture, clothing, and surrounding objects may be needed to ensure a natural appearance, but the character's unique features and the overall consistency of the background should remain intact. The generated image should depict the person from the first image in the squatting pose from the second image.
\ No newline at end of file
diff --git a/dataset/image_transfer_posture_transfer_0001/meta.json b/dataset/image_transfer_posture_transfer_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed3a36665d6d2dceda030a61e8b8e27ab03007a7
--- /dev/null
+++ b/dataset/image_transfer_posture_transfer_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "posture transfer",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0096",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_style_transfer_0001/eval.json b/dataset/image_transfer_style_transfer_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..64f01e6a137aeaba85810006b96210e995bded47
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the main content of Image B preserved in the output image, with no significant loss or alteration of key elements?",
+            "0_point_standard": "The main content of Image B is significantly altered or lost, making it difficult to recognize the original elements.",
+            "1_point_standard": "The main content of Image B is clearly preserved, with all key elements intact."
+        },
+        {
+            "question": "Does the output image reflect the overall style characteristics of Image A and apply them consistently to Image B as specified?",
+            "0_point_standard": "The style characteristics of Image A are not effectively applied, or the style is inconsistent in the output image.",
+            "1_point_standard": "The style characteristics of Image A have been applied consistently and accurately to Image B as specified in the task."
+        },
+        {
+            "question": "Is the style transfer limited to the specified areas in the output image, with unmodified areas retaining their original appearance?",
+            "0_point_standard": "The style transfer unexpectedly affects areas that should not be modified, disrupting the original appearance of Image B.",
+            "1_point_standard": "The style transfer is accurately applied only to the specified areas, with unmodified areas retaining their original appearance."
+        },
+        {
+            "question": "Does the output image blend the style of Image A and the content of Image B in a coherent and visually harmonious way?",
+            "0_point_standard": "The combination of style and content appears disjointed or mismatched, resulting in a lack of visual harmony between Image A's style and Image B's content.",
+            "1_point_standard": "The output image achieves a visually coherent blend of Image A's style and Image B's content, resulting in a harmonious unity."
+        },
+        {
+            "question": "Are the textures, colors, and shading elements of Image A naturally integrated into Image B, while maintaining realism?",
+            "0_point_standard": "The application of textures, colors, or shading from Image A appears unnatural or out of place, reducing realism.",
+            "1_point_standard": "The textures, colors, and shading of Image A are seamlessly integrated into Image B, creating a realistic and unified appearance."
+        },
+        {
+            "question": "Does the output image exhibit high aesthetic quality, with balanced composition, clarity, and an appealing visual effect after style transfer?",
+            "0_point_standard": "The output image lacks aesthetic appeal, with poor composition, clarity, or visual effect after style transfer.",
+            "1_point_standard": "The output image exhibits high aesthetic quality, with balanced composition, clear details, and an appealing visual effect that enhances the final outcome."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_style_transfer_0001/images.txt b/dataset/image_transfer_style_transfer_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53a53f8bb95b2319bf39a582c0fe0fd8cfb7c97b
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0001/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i3/O1CN0187RTM11uWfF03fgSI_!!6000000006045-0-tps-1280-1270.jpg
+https://img.alicdn.com/imgextra/i1/O1CN016Lo7oK1LZP3he92Ja_!!6000000001313-0-tps-895-571.jpg
diff --git a/dataset/image_transfer_style_transfer_0001/instruction.txt b/dataset/image_transfer_style_transfer_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..794c1bdeabd897f52f687fb1fbc5b61a6c1ad0b7
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0001/instruction.txt
@@ -0,0 +1 @@
+Please make changes to the drawing style of the second drawing based on the drawing style in the first drawing provided. During the change process, keep the overall composition of the original image unchanged. Modify only the drawing style of the image so that it matches the drawing style of the first image, while keeping the image harmonious and aesthetically pleasing.
\ No newline at end of file
diff --git a/dataset/image_transfer_style_transfer_0001/meta.json b/dataset/image_transfer_style_transfer_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..aaa228aadf7fdcabddb02fb8f5f6bf728a7874e0
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "style transfer",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0095",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_style_transfer_0002/eval.json b/dataset/image_transfer_style_transfer_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..49dd1b1f62e2289e005b44650c54485db987af6a
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the main content of image B preserved in the output image, with no significant loss or alteration of key elements?",
+            "0_point_standard": "The main content of image B is significantly altered or lost, making it difficult to recognize the original elements.",
+            "1_point_standard": "The main content of image B is clearly preserved, with all key elements intact."
+        },
+        {
+            "question": "Does the output image reflect the overall style characteristics of image A and apply them consistently to image B as specified?",
+            "0_point_standard": "The style characteristics of image A are not effectively applied, or the style is inconsistent in the output image.",
+            "1_point_standard": "The style characteristics of image A are consistently and accurately applied to image B as specified in the task."
+        },
+        {
+            "question": "Is the style transfer limited to the specified areas in the output image, and do the unmodified areas retain their original appearance?",
+            "0_point_standard": "The style transfer unexpectedly affects areas that should not be modified, disrupting the original appearance of image B.",
+            "1_point_standard": "The style transfer is accurately applied only to the specified areas, with the unmodified areas retaining their original appearance."
+        },
+        {
+            "question": "Does the output image integrate the style of image A and the content of image B in a coherent and visually harmonious way?",
+            "0_point_standard": "The combination of style and content appears disjointed or mismatched, lacking visual harmony between the style of image A and the content of image B.",
+            "1_point_standard": "The output image achieves a visually coherent integration of the style of image A and the content of image B, resulting in harmonious unity."
+        },
+        {
+            "question": "Are the texture, color, and shadow elements of image A naturally integrated into image B while maintaining realism?",
+            "0_point_standard": "The application of texture, color, or shadows from image A appears unnatural or out of place, diminishing realism.",
+            "1_point_standard": "The texture, color, and shadows of image A are seamlessly integrated into image B, creating a realistic and unified appearance."
+        },
+        {
+            "question": "Does the output image exhibit high aesthetic quality, with balanced composition, clarity, and an appealing visual effect after style transfer?",
+            "0_point_standard": "The output image lacks aesthetic appeal, with poor composition, clarity, or visual effect after style transfer.",
+            "1_point_standard": "The output image exhibits high aesthetic quality, with balanced composition, clear details, and an appealing visual effect that enhances the final result."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/image_transfer_style_transfer_0002/images.txt b/dataset/image_transfer_style_transfer_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b6f073bb6b3e870c7bbd4fefeb74ebe24134880
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i3/O1CN01s9xEZw1W6yFBV00sz_!!6000000002740-0-tps-660-820.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01RFfFbh1VUUyVc35pS_!!6000000002656-0-tps-3840-2160.jpg
diff --git a/dataset/image_transfer_style_transfer_0002/instruction.txt b/dataset/image_transfer_style_transfer_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..794c1bdeabd897f52f687fb1fbc5b61a6c1ad0b7
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0002/instruction.txt
@@ -0,0 +1 @@
+Please make changes to the drawing style of the second drawing based on the drawing style in the first drawing provided. During the change process, keep the overall composition of the original image unchanged. Modify only the drawing style of the image so that it matches the drawing style of the first image, while keeping the image harmonious and aesthetically pleasing.
\ No newline at end of file
diff --git a/dataset/image_transfer_style_transfer_0002/meta.json b/dataset/image_transfer_style_transfer_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d92b48e6dc834adaf4f76c24c42555c59bbea9f
--- /dev/null
+++ b/dataset/image_transfer_style_transfer_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "style transfer",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0095",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0001/eval.json b/dataset/information_chart_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f2cacbf0217c4b5e47f8fd743e86dfb90b908bbc
--- /dev/null
+++ b/dataset/information_chart_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the content of the infographic match the text description, and does it accurately present all key information?",
+            "0_point_standard": "The information in the infographic does not match the text description or is missing important information.",
+            "1_point_standard": "The infographic content fully matches the text description, and the information is complete and accurate."
+        },
+        {
+            "question": "Is the text in the infographic clear and easy to read, and does the format meet the design requirements of the chart?",
+            "0_point_standard": "The text is unclear or the layout is chaotic, affecting readability.",
+            "1_point_standard": "The text is clear and easy to read, with a standard format that meets design criteria."
+        },
+        {
+            "question": "Is the overall structure of the infographic consistent with the layout and logical order specified in the text prompt?",
+            "0_point_standard": "The structure of the infographic is confusing and does not follow the layout or order given in the text.",
+            "1_point_standard": "The structure of the infographic is clear, the layout is reasonable, and it follows the description in the text."
+        },
+        {
+            "question": "Did the model accurately understand the specific requirements in the text prompt (e.g. color scheme, font style) and reflect them in the infographic?",
+            "0_point_standard": "The model did not accurately understand the requirements in the text; the infographic does not show the specified design elements.",
+            "1_point_standard": "The model accurately understood the requirements in the text, and all specified design elements are reflected in the infographic."
+        },
+        {
+            "question": "Is the information in the infographic presented in a clear, logical order, with an easy-to-understand and follow visual hierarchy?",
+            "0_point_standard": "The information lacks clarity or logical order, making it difficult to understand or follow.",
+            "1_point_standard": "The information is presented in a clear, logical order with a strong visual hierarchy, making it easy to understand and follow."
+        },
+        {
+            "question": "Does the infographic meet professional standards for overall aesthetic quality and possess strong visual impact?",
+            "0_point_standard": "The infographic lacks aesthetic appeal and has weak design and visual attraction.",
+            "1_point_standard": "The infographic has excellent aesthetic quality, with beautiful design and strong visual impact."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0001/images.txt b/dataset/information_chart_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/information_chart_generation_0001/instruction.txt b/dataset/information_chart_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2be2657c6e90239d2e4820f9aaf6b26354ea2f75
--- /dev/null
+++ b/dataset/information_chart_generation_0001/instruction.txt
@@ -0,0 +1 @@
+The infographic is titled "ARE YOU PREPARED TO RIDE THROUGH LIFE'S BLIND SPOTS?". It uses a road metaphor, showing a road winding through a city landscape with various buildings and vehicles. The main text states, "Be the driver of your life's journey. Fill in your protection gaps for the well-being of you and your loved ones when an unexpected emergency occurs." Four main insurance types are highlighted: Long-term care insurance, with a question: "I want to be responsible to my loved ones if I need 24/7 care when I get older." A statistic is provided: "1 in 2 healthy Singaporeans age 65 and above could become severely disabled (stroke, spinal cord injuries) in their lifetime, and may need long-term care." Another question is asked: "Am I able to lean on my loved ones as caregivers in the future? If I require a helper when I am old, can I afford it?". The second is Critical Illness Insurance, with the question, "I want to be sure my loved ones can cope if I am critically ill (Heart attack, cancer, stroke)." A statistic is given: "Almost 1 out of 3 deaths in Singapore, is due to heart diseases or stroke." It also asks, "Would I be able to find an alternate source of income if I am recovering from a critical illness?". Third is Life Insurance, with the question, "I want to leave my assets to my loved ones and not have any loans that they need to pay off when I'm gone." It states that "The average home loan for a 3-room HDB flat is $330,500" and asks, "Who would take on my loans (credit card, personal loans, education, car) and support my family's future when I am gone?". The fourth is Medical & Hospitalisation, with the question, "I want to be able to pay my household and medical bills even when I am ill or injured." It provides the statistic: "The average cost per day in a class B public hospital ward is $686 to $1,094," and asks, "What recurring bills do I have that would still have to be paid if I am not working? [Increasing healthcare costs may mean your medisave and medicare funds may not be sufficient]". Finally, Accident Insurance, Auto Insurance, Auto Excess is covered, asking, "I want to be able to recover and cover repair costs without financial strain." It includes the statistic: "There are 3,635 fatalities and injuries in the first half of 2021, up 17% from 3,108 in the same period last year" and asks, "Am I ready to pay for any damages or medical fees should I get into an accident?". A legend explains the icons used: How would this insurance impact me and my loved ones?, Here's what you didn't know, and Factors for consideration. The infographic is branded with the "Grab" logo.
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0001/meta.json b/dataset/information_chart_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ab0eb7aaf774b45e27301c94c5cfe55be616599
--- /dev/null
+++ b/dataset/information_chart_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "information chart generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0023",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0002/eval.json b/dataset/information_chart_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff1f5928abdee9f5e21dcd9cf688e1aad3647554
--- /dev/null
+++ b/dataset/information_chart_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the content of the infographic match the text description, and does it accurately present all key information?",
+            "0_point_standard": "The information in the infographic does not match the text description, or important information is missing.",
+            "1_point_standard": "The content of the infographic completely matches the text description, and the information is complete and accurate."
+        },
+        {
+            "question": "Is the text in the infographic clear and easy to read, and does the formatting meet the design requirements of the chart?",
+            "0_point_standard": "The text is unclear, or the layout is chaotic, affecting readability.",
+            "1_point_standard": "The text is clear and easy to read, the format is standard, and meets design standards."
+        },
+        {
+            "question": "Is the overall structure of the infographic consistent with the layout and logical order specified in the text prompt?",
+            "0_point_standard": "The structure of the infographic is disorganized and does not conform to the layout or order given in the text.",
+            "1_point_standard": "The structure of the infographic is clear, with a reasonable layout that matches the description in the text."
+        },
+        {
+            "question": "Did the model accurately understand the specific requirements in the text prompt (e.g., color scheme, font style) and reflect them in the infographic?",
+            "0_point_standard": "The model did not accurately understand the requirements in the text, and the infographic does not display the specified design elements.",
+            "1_point_standard": "The model accurately understood the requirements in the text, and all specified design elements are reflected in the infographic."
+        },
+        {
+            "question": "Is the information in the infographic presented in a clear and logical order, with an easy-to-understand and follow visual hierarchy?",
+            "0_point_standard": "The information is presented in a way that lacks clarity or logical order, making it difficult to understand or follow.",
+            "1_point_standard": "The information is presented in a clear and logical order, with a strong visual hierarchy that is easy to understand and follow."
+        },
+        {
+            "question": "Does the infographic meet professional standards for overall aesthetic quality and have strong visual impact?",
+            "0_point_standard": "The infographic lacks aesthetic appeal, with weak design and visual attractiveness.",
+            "1_point_standard": "The infographic has excellent aesthetic quality, with a beautiful design and strong visual impact."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0002/images.txt b/dataset/information_chart_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/information_chart_generation_0002/instruction.txt b/dataset/information_chart_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c3ce22df24c6244de64a1b2df21aac024b523b2
--- /dev/null
+++ b/dataset/information_chart_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This infographic is titled "BAGEL: Everything about Bagel Sandwiches," offering a comprehensive overview of bagels and bagel sandwiches. The top features a prominent neon-style “BAGEL” title. The left side starts with “What is a Bagel? Definition: A bagel is a ring-shaped bread made from yeast-fermented flour dough that’s boiled in hot water and then baked. The term originates from the medieval German word böugel (meaning “ring” or “circle”).” with a seal-like graphic labeled “ORIGIN.” Next, “TYPES OF BAGELS” shows ten bagel types with images: Plain, Everything, Walnut, Garlic, Basil, Chocolate, Parmesan Cheese, Cinnamon, Blueberry. “TOPPING” lists three categories with images: Cheeses (Cheddar, Mozzarella, Shredded), Fruits (Apple, Banana, Blueberry, Avocado), Meats (Egg, Ham, Bacon, Smoked Salmon), and Vegetables (Raw Cucumber, Pickled Cucumber, Onion, Arugula, Olive, Tomato, Lettuce, Corn, Caper). “SPREAD” displays six spreads with images: Chocolate, Fruit Jam, Cream Cheese, Tomato Sauce, Greek Yogurt, Butter. “Origin and History” outlines bagel history in three stages: Origins: 16th–18th century – Originated as a traditional food of Polish Jews, Spread: 18th century–early 20th century – Spread with Polish Jewish immigrants, Modernization: Early 20th century–21st century – Developed manufacturing techniques in the U.S. and evolved into sandwiches, Diversification: 21st century–present – Gained worldwide popularity with various ingredients and combinations. “Bagel Making Process” uses four illustrations to detail the process: Mix yeast and flour, Roll out the dough, Let the dough rise, Boil the dough, Bake in the oven. “Calories” displays three bagel sandwich calorie counts with images: 600 kcal, 550 kcal, 500 kcal. “Bagels and Donuts” compares bagels and donuts with images and text. “BREAKFAST VS BRUNCH” contrasts breakfast (Schmear) and brunch (Sandwich). “FAMOUS SANDWICHES” shows ten famous bagel sandwiches with images and names: New York-Style, Italian Salami, Pesto and Egg, Steak Egg and Cheese, Avocado Egg Salad, Bacon Egg Cheese, Pear Apple Cheddar, Romesco, Egg Pastrami Egg and Cheese, Vegetable and Tofu.
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0002/meta.json b/dataset/information_chart_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..35aecfb009b4733d90ddb366fb833c5b64b3933d
--- /dev/null
+++ b/dataset/information_chart_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "information chart generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0023",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0003/eval.json b/dataset/information_chart_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..4c8ae8b0c80220e95ffff7087c300269f6917ff8
--- /dev/null
+++ b/dataset/information_chart_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the content of the infographic align with the text description, accurately presenting all key information?",
+            "0_point_standard": "The information in the infographic does not match the text description, or important information is missing.",
+            "1_point_standard": "The infographic content is completely consistent with the text description, with complete and accurate information."
+        },
+        {
+            "question": "Is the text in the infographic clear and easy to read, and does the format meet the design requirements of the chart?",
+            "0_point_standard": "The text is unclear, or the layout is chaotic, affecting readability.",
+            "1_point_standard": "The text is clear and easy to read, the format is standard, and meets design specifications."
+        },
+        {
+            "question": "Does the overall structure of the infographic match the layout and logical sequence specified in the text prompt?",
+            "0_point_standard": "The structure of the infographic is chaotic and does not match the layout or sequence given in the text.",
+            "1_point_standard": "The structure of the infographic is clear, with a reasonable layout that matches the description in the text."
+        },
+        {
+            "question": "Did the model accurately understand the specific requirements in the text prompt (such as color scheme, font style) and reflect them in the infographic?",
+            "0_point_standard": "The model did not accurately understand the requirements in the text, and the infographic does not show the specified design elements.",
+            "1_point_standard": "The model accurately understood the requirements in the text, and all specified design elements are reflected in the infographic."
+        },
+        {
+            "question": "Is the information in the infographic presented in a clear, logical order with an easy-to-understand and follow visual hierarchy?",
+            "0_point_standard": "The information lacks clarity or logical order, making it difficult to understand or follow.",
+            "1_point_standard": "The information is presented in a clear, logical order with a strong visual hierarchy, making it easy to understand and follow."
+        },
+        {
+            "question": "Does the infographic meet professional standards for overall aesthetic quality, with a strong visual impact?",
+            "0_point_standard": "The infographic lacks aesthetic appeal, with weak design and visual attraction.",
+            "1_point_standard": "The infographic has excellent aesthetic quality, with exquisite design and strong visual impact."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0003/images.txt b/dataset/information_chart_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/information_chart_generation_0003/instruction.txt b/dataset/information_chart_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58c319bca2ce231da316ddf60e4b7979185e8a13
--- /dev/null
+++ b/dataset/information_chart_generation_0003/instruction.txt
@@ -0,0 +1 @@
+The infographic is titled "WHAT COVERAGE DO YOU NEED?". It features a green and white color scheme. The left side displays an illustration depicting a family and their home, emphasizing protection for loved ones. The main text on the left reads, "Protect your loved ones and yourself". A table on the right is divided into two sections: "YOUR PRIVILEGES (BASIC)" and "ADDITIONAL PROTECTION". "YOUR PRIVILEGES (BASIC)" lists coverage options with checkmarks indicating inclusion, such as: Cash Payouts, Death, Permanent Disability (LTC, ACI, AI, AE), Medical treatment (in/out patient) (M&H), Surgical and hospitalization (M&H), Critical Illnesses (e.g., Cancer, Stroke), Bones fracture (M&H, ACI, AI, AE), Personal injury/property damage caused by third parties, Long term household bills due to permanent disability (LTC, ACI, AI, AE), Accidents or vehicle breakdowns (excess in the event of an accident), 24/7 worldwide coverage, Coverage for dependents, and Underwriter. Specific monetary values are provided for each benefit under "Prolonged Medical Leave (PML)", "Personal Accident (PA)", and "3rd Party Liability (3PL)". "ADDITIONAL PROTECTION" lists further optional coverages with monetary values including: Partner Cover ($150 per day/$200 per day), Critical Illnesses: Pay Per Trip (Cippt) ($100, $150 per day, $1000, $1500), and Auto Excess ($500-$5,000). A legend explains the symbols used in the table: Applicable, Claims, and Cash payouts. A small section at the bottom details CASH PAYOUTS (IF ANY), specifying amounts for Prolonged Medical Leave and clarifying that Personal Accident Insurance and 3rd Party Liability have no cash payouts. A "Grab" logo is present at the bottom left, along with a copyright disclaimer and an "APPLY NOW" QR code. The insurer logos CHUBB and Income are displayed at the bottom right.
\ No newline at end of file
diff --git a/dataset/information_chart_generation_0003/meta.json b/dataset/information_chart_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d7ca236afac400cb878b5c0fff43408a25326c61
--- /dev/null
+++ b/dataset/information_chart_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "information chart generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0023",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/interior_design_generation_0002/eval.json b/dataset/interior_design_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..28a71f5b9a73b9659dab8f65d8416dac04a87568
--- /dev/null
+++ b/dataset/interior_design_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly depict an interior space and include recognizable design elements such as furniture, decor, and architectural details?",
+            "0_point_standard": "The image lacks recognizable interior design elements and fails to clearly depict an interior space.",
+            "1_point_standard": "The image clearly depicts an interior space and includes recognizable design elements such as typical interior design furniture and decor."
+        },
+        {
+            "question": "Does the image present a realistic and functional room layout that adheres to interior design principles (e.g., appropriate furniture placement, unobstructed pathways)?",
+            "0_point_standard": "The layout is chaotic or impractical, lacking the functional design principles expected in an interior space.",
+            "1_point_standard": "The layout is realistic and functional, with furniture and decor arranged in a manner consistent with actual interior design standards."
+        },
+        {
+            "question": "Does the generated image accurately reflect the specific style, color scheme, or room features described in the text prompt (e.g., minimalist style, neutral tones, kitchen features)?",
+            "0_point_standard": "The image does not match the described specific style, colors, or room features, deviating from the text requirements.",
+            "1_point_standard": "The image accurately reflects the style, color scheme, and room features specified in the text prompt."
+        },
+        {
+            "question": "Is the lighting in the image realistic, and do the shadows and highlights enhance the depth and sense of space in the interior?",
+            "0_point_standard": "The lighting looks fake or lacks depth, making the image appear unrealistic.",
+            "1_point_standard": "The lighting is realistic, and the shadows and highlights add depth and enhance the sense of space in the interior."
+        },
+        {
+            "question": "Are the materials and textures in the image rendered with high quality, and do the realistic details reflect the described materials (e.g., wood, fabric, metal)?",
+            "0_point_standard": "The materials or textures lack clarity or appear fake, diminishing the realism of the image.",
+            "1_point_standard": "The materials and textures are rendered with realistic detail, accurately reflecting the characteristics of the described materials."
+        },
+        {
+            "question": "Does the image exhibit a high level of aesthetic quality, with a harmonious color scheme, balanced composition, and professional visual appeal?",
+            "0_point_standard": "The image lacks aesthetic appeal, with an incoherent color scheme, poor composition, or an unprofessional effect.",
+            "1_point_standard": "The image exhibits strong aesthetic appeal, with a harmonious color scheme, balanced composition, and a professional, visually pleasing effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/interior_design_generation_0002/images.txt b/dataset/interior_design_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/interior_design_generation_0002/instruction.txt b/dataset/interior_design_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a83649bc16ba4b36e406531f36768155374abee
--- /dev/null
+++ b/dataset/interior_design_generation_0002/instruction.txt
@@ -0,0 +1 @@
+The image depicts a warm and modern dining room design. A rectangular light-colored wooden dining table sits at the center, surrounded by four dining chairs with woven rattan seats, matching the table's color scheme. Simple tableware and a sprig of greenery are arranged on the table, creating a relaxed and natural dining atmosphere. Behind the table is a stunning feature wall composed of multiple wooden lattice panels, showcasing a sophisticated and modern minimalist style. Recessed niches on either side of the lattice wall house books, decorative objects, and yellow ceramic vases, adding depth and visual interest to the space. A matching storage cabinet with woven rattan doors is situated in the corner of the dining room. The floor is light-colored wood, adorned with a light-colored rug, maintaining consistency with the overall color palette. Sheer, flowing curtains hang from a large window, allowing sunlight to gently stream into the room, creating a bright and comfortable ambiance. The walls are painted a soft pink, contributing to the warm and inviting atmosphere. The ceiling is clean and simple, featuring recessed spotlights that provide soft illumination. The overall design is characterized by its simplicity and elegance, emphasizing the careful selection of materials and detailed arrangement, reflecting a fusion of modern minimalism and natural elements.
\ No newline at end of file
diff --git a/dataset/interior_design_generation_0002/meta.json b/dataset/interior_design_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c91d9963ce7b6fa9c501187ac92901e513f0db2e
--- /dev/null
+++ b/dataset/interior_design_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "interior design specific effect generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0025",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/interior_design_generation_0003/eval.json b/dataset/interior_design_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a239942fa2ade3101901ee831b90493a1fb62b8
--- /dev/null
+++ b/dataset/interior_design_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly depict an interior space and include identifiable design elements such as furniture, decor, and architectural details?",
+            "0_point_standard": "The image lacks recognizable interior design elements and does not clearly depict an interior space.",
+            "1_point_standard": "The image clearly depicts an interior space and includes identifiable design elements, such as typical interior design furniture and decor."
+        },
+        {
+            "question": "Does the image present a realistic and practical room layout in accordance with interior design principles (e.g., appropriate furniture placement, unobstructed pathways)?",
+            "0_point_standard": "The layout is chaotic or impractical, lacking the functional design principles expected in interior spaces.",
+            "1_point_standard": "The layout is realistic and practical, with furniture and decor arranged according to actual interior design standards."
+        },
+        {
+            "question": "Does the generated image accurately reflect the specific style, color scheme, or room features described in the text prompt (e.g., minimalist style, neutral tones, kitchen features)?",
+            "0_point_standard": "The image does not match the described specific style, colors, or room features, deviating from the text requirements.",
+            "1_point_standard": "The image accurately reflects the style, color scheme, and room features specified in the text prompt."
+        },
+        {
+            "question": "Is the lighting in the image realistic, and do shadows and highlights enhance the depth and spatial perception of the interior?",
+            "0_point_standard": "The lighting appears fake or lacks depth, making the image look unrealistic.",
+            "1_point_standard": "The lighting is realistic, with shadows and highlights that enhance depth and the spatial perception of the interior."
+        },
+        {
+            "question": "Are the materials and textures in the image rendered with high quality, and do the realistic details reflect the described materials (e.g., wood, fabric, metal)?",
+            "0_point_standard": "Materials or textures lack clarity or appear fake, reducing the realism of the image.",
+            "1_point_standard": "The materials and texture details are realistic and accurately reflect the characteristics of the described materials."
+        },
+        {
+            "question": "Does the image exhibit a high level of aesthetic quality, with a harmonious color scheme, balanced composition, and professional visual appeal?",
+            "0_point_standard": "The image lacks aesthetic quality, with an incoherent color scheme, poor composition, or unprofessional appearance.",
+            "1_point_standard": "The image exhibits strong aesthetic quality, with a harmonious color scheme, balanced composition, and a professional, pleasing effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/interior_design_generation_0003/images.txt b/dataset/interior_design_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/interior_design_generation_0003/instruction.txt b/dataset/interior_design_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cef6e0d2a39119d76499f8afbe82ba0fe023bda1
--- /dev/null
+++ b/dataset/interior_design_generation_0003/instruction.txt
@@ -0,0 +1 @@
+The image showcases a modern minimalist living room design. The overall color scheme is predominantly off-white and light gold, creating a warm and inviting atmosphere. A large window allows ample natural light to flood the space, complemented by beige curtains, giving a soft and natural feel. A cream-colored fabric sectional sofa sits at the center, offering comfort and complemented by throw pillows in black and white checks, light pink, and light green, adding visual interest and texture. A low, dark gray round coffee table sits in front of the sofa, displaying books, a vase, and decorative items. A thick, fluffy white rug underneath the coffee table softens the space and enhances its luxurious feel. A potted green plant adds a touch of life to one corner. The walls are painted in light colors, accented by geometric lines for a clean and simple look. Two uniquely designed pendant lights hang from the ceiling, adding artistic flair. Near the wall, a yellow pear-shaped beanbag chair introduces a playful pop of color. The overall design is clean, spacious, comfortable, and perfectly exemplifies modern minimalist style.
\ No newline at end of file
diff --git a/dataset/interior_design_generation_0003/meta.json b/dataset/interior_design_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4067ad01456f28e5e0aac9344304c2067fbd068
--- /dev/null
+++ b/dataset/interior_design_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "interior design specific effect generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0025",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0001/auto_eval.jsonl b/dataset/landmark_building_generation_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..7350e6894c78e3bcfe14cf9c6bd5a6655241675b
--- /dev/null
+++ b/dataset/landmark_building_generation_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.\"\nYour review question is:\nDoes the generated image clearly represent a recognizable building or architectural structure, typical of a landmark? 0 points: The image lacks identifiable landmark characteristics, making it difficult to recognize as an architectural structure. 1 point: The image has clear landmark qualities, with a distinct structure and recognizable features typical of a notable building.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.\"\nYour review question is:\nIs the building visually complete, with a cohesive architectural design that appears structurally sound and realistic? 0 points: The building appears incomplete or unbalanced, giving the impression of an unrealistic or poorly designed structure. 1 point: The building is visually complete and cohesive, appearing structurally sound and realistic as a landmark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.\"\nYour review question is:\nDoes the landmark accurately reflect the style, era, and specific elements (e.g., architectural style, facade details) described in the text prompt? Analyze the text requirements sentence by sentence, and if one sentence does not match what is shown in the picture, score 0 points. 0 points: The landmark does not align with the specified style, era, or elements, deviating from the text requirements. 1 point: The landmark accurately reflects the architectural style, era, and specific features described in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.\"\nYour review question is:\nIs the lighting applied realistically, and does the perspective enhance the spatial depth and grandeur of the landmark? 0 points: The lighting or perspective appears artificial, making the building look unrealistic or flat. 1 point: The lighting and perspective are realistic, enhancing the building’s spatial depth and giving it a grand, immersive appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.\"\nYour review question is:\nAre architectural details like textures, materials, and ornamental features rendered realistically, with high-quality details that add depth and character to the building? 0 points: The details and textures lack clarity or appear artificial, reducing the architectural depth and realism of the building. 1 point: The details and textures are high-quality, adding depth, realism, and character to the architectural features of the landmark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.\"\nYour review question is:\nDoes the image exhibit a high level of aesthetic quality, with a visually balanced composition, appealing colors, and strong visual impact? 0 points: The image lacks aesthetic appeal, with poor color balance, weak composition, or an unprofessional look. 1 point: The image has strong aesthetic appeal, with balanced composition, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/landmark_building_generation_0001/eval.json b/dataset/landmark_building_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..458b6bc6710816a4ee37d120858873d36ef3f7b0
--- /dev/null
+++ b/dataset/landmark_building_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present a recognizable building or structure with landmark features?",
+            "0_point_standard": "The image lacks recognizable landmark features, making it difficult to identify as a building structure.",
+            "1_point_standard": "The image has clear landmark characteristics, with a distinct structure that features typical recognizable landmark architecture."
+        },
+        {
+            "question": "Is the building visually complete, with coherent architectural design, appearing structurally sound and realistic?",
+            "0_point_standard": "The building appears incomplete or unbalanced, giving an unrealistic or poorly designed impression.",
+            "1_point_standard": "The building is visually complete and coherent, appearing structurally sound and realistic, in line with landmark characteristics."
+        },
+        {
+            "question": "Does the landmark accurately reflect the style, era, and specific elements (such as architectural style, facade details) described in the text prompt? Analyze the text requirements sentence by sentence; if any sentence does not match the content shown in the image, score 0 points.",
+            "0_point_standard": "The landmark fails to embody the specified style, era, or elements, deviating from the text requirements.",
+            "1_point_standard": "The landmark accurately reflects the architectural style, era, and specific features described in the text prompt."
+        },
+        {
+            "question": "Is the lighting applied realistically, and does the perspective enhance the spatial depth and grandeur of the landmark?",
+            "0_point_standard": "The lighting or perspective effects appear unrealistic, making the building look unreal or lacking in depth.",
+            "1_point_standard": "The lighting and perspective effects are realistic, enhancing the spatial depth of the building and giving it a grand, immersive appearance."
+        },
+        {
+            "question": "Are the details such as material, texture, and decorative elements realistically rendered, with high-quality details to add depth and character?",
+            "0_point_standard": "The details and textures lack clarity or appear unrealistic, reducing the depth and authenticity of the building.",
+            "1_point_standard": "The details and textures are high-quality, adding depth, authenticity, and character to the building features."
+        },
+        {
+            "question": "Does the image possess a high level of aesthetic quality, with visually balanced composition, appealing colors, and strong visual impact?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor color balance, weak composition, or appearing unprofessional.",
+            "1_point_standard": "The image has strong aesthetic appeal, with balanced composition, appealing colors, and strong visual impact, achieving a professional effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0001/images.txt b/dataset/landmark_building_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/landmark_building_generation_0001/instruction.txt b/dataset/landmark_building_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b94afe58785541b08879208783aad0daf8be2582
--- /dev/null
+++ b/dataset/landmark_building_generation_0001/instruction.txt
@@ -0,0 +1 @@
+The image showcases the Eiffel Tower under a vibrant clear blue sky. The tower's structure is prominently featured, exhibiting a rich brownish-tan metallic texture with intricate details clearly visible. Olympic rings are displayed prominently mid-way up the tower, their bright colors contrasting sharply with the tower's muted tones. At the base of the tower, numerous people are visible, their small size emphasizing the tower's immense scale. Lush greenery extends in front of the tower, a dense collection of trees displaying varying shades of green. The sky is a bright, deep blue, speckled with a few fluffy white clouds, predominantly concentrated at the bottom of the frame, with a richer, deeper blue hue above. In the far distance, some buildings are faintly visible, lacking sharp definition. Lampposts stand symmetrically on either side of the tower, blending seamlessly with the surrounding landscape. The overall composition is symmetrical, the colors harmonious, and the light bright, creating a powerful image of the Eiffel Tower's majesty and the tranquil beauty of its surroundings.
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0001/meta.json b/dataset/landmark_building_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..505e4427c2469301a56a26d83fc9b686de24bc37
--- /dev/null
+++ b/dataset/landmark_building_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "landmark building generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0028",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0002/auto_eval.jsonl b/dataset/landmark_building_generation_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d30c7725c0b777e41ee1153b505dfd931ef9fd61
--- /dev/null
+++ b/dataset/landmark_building_generation_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.\"\nYour review question is:\nDoes the generated image clearly represent a recognizable building or architectural structure, typical of a landmark? 0 points: The image lacks identifiable landmark characteristics, making it difficult to recognize as an architectural structure. 1 point: The image has clear landmark qualities, with a distinct structure and recognizable features typical of a notable building.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.\"\nYour review question is:\nIs the building visually complete, with a cohesive architectural design that appears structurally sound and realistic? 0 points: The building appears incomplete or unbalanced, giving the impression of an unrealistic or poorly designed structure. 1 point: The building is visually complete and cohesive, appearing structurally sound and realistic as a landmark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.\"\nYour review question is:\nDoes the landmark accurately reflect the style, era, and specific elements (e.g., architectural style, facade details) described in the text prompt? Analyze the text requirements sentence by sentence, and if one sentence does not match what is shown in the picture, score 0 points. 0 points: The landmark does not align with the specified style, era, or elements, deviating from the text requirements. 1 point: The landmark accurately reflects the architectural style, era, and specific features described in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.\"\nYour review question is:\nIs the lighting applied realistically, and does the perspective enhance the spatial depth and grandeur of the landmark? 0 points: The lighting or perspective appears artificial, making the building look unrealistic or flat. 1 point: The lighting and perspective are realistic, enhancing the building’s spatial depth and giving it a grand, immersive appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.\"\nYour review question is:\nAre architectural details like textures, materials, and ornamental features rendered realistically, with high-quality details that add depth and character to the building? 0 points: The details and textures lack clarity or appear artificial, reducing the architectural depth and realism of the building. 1 point: The details and textures are high-quality, adding depth, realism, and character to the architectural features of the landmark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.\"\nYour review question is:\nDoes the image exhibit a high level of aesthetic quality, with a visually balanced composition, appealing colors, and strong visual impact? 0 points: The image lacks aesthetic appeal, with poor color balance, weak composition, or an unprofessional look. 1 point: The image has strong aesthetic appeal, with balanced composition, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/landmark_building_generation_0002/eval.json b/dataset/landmark_building_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9fdce961c068ca58978192f41723a03ca78ffa3a
--- /dev/null
+++ b/dataset/landmark_building_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present a recognizable building or structure with landmark features?",
+            "0_point_standard": "The image lacks recognizable landmark features, making it difficult to identify as a building structure.",
+            "1_point_standard": "The image has clear landmark characteristics, with distinct structures that possess typical recognizable landmark building features."
+        },
+        {
+            "question": "Is the building visually complete, with a coherent architectural design, appearing structurally sound and realistic?",
+            "0_point_standard": "The building appears incomplete or unbalanced, giving an impression of being unrealistic or poorly designed.",
+            "1_point_standard": "The building is visually complete and coherent, appears structurally sound and realistic, aligning with landmark features."
+        },
+        {
+            "question": "Does the landmark accurately reflect the style, era, and specific elements (such as architectural style, facade details) described in the text prompt? Analyze the text requirements sentence by sentence, and if one sentence doesn't match the content shown in the image, score it as 0 points.",
+            "0_point_standard": "The landmark fails to embody the specified style, era, or elements, deviating from the text requirements.",
+            "1_point_standard": "The landmark accurately reflects the architectural style, era, and specific features described in the text prompt."
+        },
+        {
+            "question": "Is the lighting applied realistically, and does the perspective enhance the spatial depth and grandeur of the landmark?",
+            "0_point_standard": "The lighting or perspective effects appear unrealistic, making the building seem unreal or lacking depth.",
+            "1_point_standard": "The lighting and perspective effects are realistic, enhancing the building's spatial depth and giving it a majestic, immersive appearance."
+        },
+        {
+            "question": "Are the building's details such as materials, textures, and decorative elements realistically rendered, with high-quality details to increase depth and features?",
+            "0_point_standard": "Details and textures lack clarity or appear unrealistic, diminishing the building's depth and realism.",
+            "1_point_standard": "Details and textures are of high quality, enhancing the depth, realism, and character of the building's features."
+        },
+        {
+            "question": "Does the image possess a high level of aesthetic quality, with visually balanced composition, appealing colors, and strong visual impact?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor color balance, weak composition, or appears unprofessional.",
+            "1_point_standard": "The image has strong aesthetic appeal, with balanced composition, appealing colors, strong visual impact, and a professional effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0002/images.txt b/dataset/landmark_building_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/landmark_building_generation_0002/instruction.txt b/dataset/landmark_building_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4515599608c7eef9bcf413fb1a80bb8b5eab44
--- /dev/null
+++ b/dataset/landmark_building_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This is a breathtaking panoramic photograph of the Great Wall of China, showcasing its meandering path and harmonious integration with the surrounding landscape. The wall snakes through rolling green hills, rising and falling along the contours of the terrain. Distant mountains appear hazy and blue, veiled in a light mist, while the sky is a clear pale blue, speckled with thin clouds. The Great Wall itself is constructed of brick and stone, displaying earthy yellow tones, warmed by the glow of the setting sun. Several watchtowers are visible along the wall, standing tall and sturdy, demonstrating the superb craftsmanship of ancient architecture. The hillsides are covered in lush green vegetation, with dense, vibrant trees creating a striking contrast with the ancient, muted colors of the wall, resulting in a lively and dynamic scene. The image boasts clear layering, with distant mountains forming the background, the winding Great Wall as the middle ground, and the surrounding vegetation in the foreground, showcasing the perfect blend of the magnificent wall and the natural scenery, creating a serene yet majestic atmosphere.
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0002/meta.json b/dataset/landmark_building_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9095da91c95d2917324fa58ca743aef62fbe23ff
--- /dev/null
+++ b/dataset/landmark_building_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "landmark building generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0028",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0003/auto_eval.jsonl b/dataset/landmark_building_generation_0003/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..42cda7f7d2e2325714a6c15b7cbf574de8a518fe
--- /dev/null
+++ b/dataset/landmark_building_generation_0003/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.\"\nYour review question is:\nDoes the generated image clearly represent a recognizable building or architectural structure, typical of a landmark? 0 points: The image lacks identifiable landmark characteristics, making it difficult to recognize as an architectural structure. 1 point: The image has clear landmark qualities, with a distinct structure and recognizable features typical of a notable building.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.\"\nYour review question is:\nIs the building visually complete, with a cohesive architectural design that appears structurally sound and realistic? 0 points: The building appears incomplete or unbalanced, giving the impression of an unrealistic or poorly designed structure. 1 point: The building is visually complete and cohesive, appearing structurally sound and realistic as a landmark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.\"\nYour review question is:\nDoes the landmark accurately reflect the style, era, and specific elements (e.g., architectural style, facade details) described in the text prompt? Analyze the text requirements sentence by sentence, and if one sentence does not match what is shown in the picture, score 0 points. 0 points: The landmark does not align with the specified style, era, or elements, deviating from the text requirements. 1 point: The landmark accurately reflects the architectural style, era, and specific features described in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.\"\nYour review question is:\nIs the lighting applied realistically, and does the perspective enhance the spatial depth and grandeur of the landmark? 0 points: The lighting or perspective appears artificial, making the building look unrealistic or flat. 1 point: The lighting and perspective are realistic, enhancing the building’s spatial depth and giving it a grand, immersive appearance.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.\"\nYour review question is:\nAre architectural details like textures, materials, and ornamental features rendered realistically, with high-quality details that add depth and character to the building? 0 points: The details and textures lack clarity or appear artificial, reducing the architectural depth and realism of the building. 1 point: The details and textures are high-quality, adding depth, realism, and character to the architectural features of the landmark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a photo of a landmark building based on the text requirements.\nThe text requirement is:\n\"The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.\"\nYour review question is:\nDoes the image exhibit a high level of aesthetic quality, with a visually balanced composition, appealing colors, and strong visual impact? 0 points: The image lacks aesthetic appeal, with poor color balance, weak composition, or an unprofessional look. 1 point: The image has strong aesthetic appeal, with balanced composition, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/landmark_building_generation_0003/eval.json b/dataset/landmark_building_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..8bdbe2a436e4fab68e3547991426d325ad5d33c2
--- /dev/null
+++ b/dataset/landmark_building_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present a recognizable building or structure with landmark features?",
+            "0_point_standard": "The image lacks recognizable landmark features, making it difficult to identify as a building structure.",
+            "1_point_standard": "The image has clear landmark characteristics, with distinct structures typical of recognizable landmark buildings."
+        },
+        {
+            "question": "Is the building visually complete, possessing coherent architectural design, and appearing structurally sound and realistic?",
+            "0_point_standard": "The building appears incomplete or unbalanced, giving an impression of unreality or poor design.",
+            "1_point_standard": "The building is visually complete and coherent, appearing structurally sound and realistic, consistent with landmark characteristics."
+        },
+        {
+            "question": "Does the landmark accurately reflect the style, era, and specific elements (such as architectural style, facade details) described in the text prompt? Analyze the text requirements sentence by sentence; if any sentence does not match the content displayed in the image, score 0 points.",
+            "0_point_standard": "The landmark fails to reflect the specified style, era, or elements, deviating from the text requirements.",
+            "1_point_standard": "The landmark accurately reflects the architectural style, era, and specific features described in the text prompt."
+        },
+        {
+            "question": "Is the lighting realistically applied, and does the perspective enhance the spatial depth and grandeur of the landmark?",
+            "0_point_standard": "The lighting or perspective effects appear unrealistic, making the building look unreal or lack depth.",
+            "1_point_standard": "The lighting and perspective effects are realistic, enhancing the spatial depth of the building and giving it a grand and immersive appearance."
+        },
+        {
+            "question": "Are the details of the building, such as materials, textures, and decorative elements, rendered realistically, with high-quality details to add depth and character?",
+            "0_point_standard": "The details and textures lack clarity or appear unrealistic, reducing the depth and authenticity of the building.",
+            "1_point_standard": "The details and textures are high quality, adding depth, authenticity, and character to the building's features."
+        },
+        {
+            "question": "Does the image have a high level of aesthetic quality, with a composition that is visually balanced, appealing colors, and strong visual impact?",
+            "0_point_standard": "The image lacks aesthetic appeal, with poor color balance, weak composition, or appears unprofessional.",
+            "1_point_standard": "The image has strong aesthetic appeal, with balanced composition, appealing colors, and strong visual impact, achieving a professional effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0003/images.txt b/dataset/landmark_building_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/landmark_building_generation_0003/instruction.txt b/dataset/landmark_building_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3804623d8841d0d2c6bee73a217990217e69ebc
--- /dev/null
+++ b/dataset/landmark_building_generation_0003/instruction.txt
@@ -0,0 +1 @@
+The image showcases a panoramic view of London's iconic Big Ben (Elizabeth Tower), rising majestically against a vibrant blue sky dotted with fluffy white clouds. The tower is a light brownish beige, constructed from numerous layers of meticulously detailed brickwork, exhibiting a clear hierarchical structure. Its spire is sharp, topped with intricate metallic details. Big Ben's clock face is clearly visible, its hands pointing to a specific time. To the right of Big Ben, a portion of the Houses of Parliament is visible, its Gothic architecture harmonizing with Big Ben's style, also featuring light brown brickwork, rich details, numerous windows, and tall, pointed roofs. To the left, the River Thames flows with muddy brown water, its banks lined with lush greenery. In the distant right background, other buildings, comparatively modern in style, contrast with the Houses of Parliament. A bridge stretches across the Thames, connecting the two banks. The overall image boasts harmonious colors, ample sunlight, and high clarity, beautifully capturing this quintessential London landmark.
\ No newline at end of file
diff --git a/dataset/landmark_building_generation_0003/meta.json b/dataset/landmark_building_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..4645225c086b8930b75d1978f02773291d58969f
--- /dev/null
+++ b/dataset/landmark_building_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "landmark building generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0028",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/layer_decomposition_0002/eval.json b/dataset/layer_decomposition_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d367adb3613d7d80164daec8a1fc379a5a7ef86
--- /dev/null
+++ b/dataset/layer_decomposition_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the number of output layers consistent with what is specified in the text description, and does each layer correspond to a separate image?",
+            "0_point_standard": "The number of layers does not match the specifications, or some layers are missing.",
+            "1_point_standard": "The output contains the correct number of layers, and each layer is separated into a single image as specified."
+        },
+        {
+            "question": "Are the specified elements accurately separated into their respective layers, with each image containing only the specified content?",
+            "0_point_standard": "Layers contain unrelated elements or fail to accurately isolate the specified content.",
+            "1_point_standard": "Each layer accurately isolates the specified elements, with each image containing only the specified content."
+        },
+        {
+            "question": "Does each layer maintain the integrity of the original content, ensuring isolated elements are not distorted and retain their visual characteristics?",
+            "0_point_standard": "Isolated elements are deformed or altered, compromising the visual integrity of the original content.",
+            "1_point_standard": "Each layer retains the integrity of the original content, with isolated elements appearing undistorted and consistent with the original image."
+        },
+        {
+            "question": "Are the edges of each isolated element clean and precise, without noticeable artifacts or rough edges?",
+            "0_point_standard": "Isolated elements have rough or jagged edges, or there are noticeable artifacts, reducing the quality of each layer.",
+            "1_point_standard": "The edges of each isolated element are clean and precise, with no noticeable artifacts, ensuring high-quality layer separation."
+        },
+        {
+            "question": "Do the color, texture, and lighting of each layer accurately match the original image, maintaining a consistent visual style across all layers?",
+            "0_point_standard": "The color, texture, or lighting of the layers is inconsistent with the original image, disrupting visual harmony.",
+            "1_point_standard": "Each layer accurately retains the original image's color, texture, and lighting, ensuring consistent visual appearance across all layers."
+        },
+        {
+            "question": "Does the final set of layered images exhibit high aesthetic quality and visual clarity, with each layer contributing to a professional and polished presentation?",
+            "0_point_standard": "The final image set lacks aesthetic appeal, has low visual clarity, or inconsistencies that affect a professional appearance.",
+            "1_point_standard": "The final set of layered images is visually clear, aesthetically pleasing, and presents a polished and professional effect."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/layer_decomposition_0002/images.txt b/dataset/layer_decomposition_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..465364cca8492afede4b05c2f42be660410e74d3
--- /dev/null
+++ b/dataset/layer_decomposition_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01BYOmD926rz4EiIi5V_!!6000000007716-0-tps-2880-1327.jpg
diff --git a/dataset/layer_decomposition_0002/instruction.txt b/dataset/layer_decomposition_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1573e064609b4c365c9c3145654638883c85665d
--- /dev/null
+++ b/dataset/layer_decomposition_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate 4 images based on the provided input image, where each image represents a specific layer from the original image. The final goal is that if the layers are merged, they should recreate the original image. The 1st image should be the background layer, including the distant mountains and sky, with the snow-covered peaks and clouds in the blue sky. The 2nd image should be the car layer, containing the gray car positioned in the middle of the image. The 3rd image should be the foreground figure layer, primarily the partially visible person in a brown jacket on the right side of the image. The 4th image should be the road layer, including the road beneath the car and the person. Ensure the layers are extracted from the input image, and when merged, they should match the original image.
\ No newline at end of file
diff --git a/dataset/layer_decomposition_0002/meta.json b/dataset/layer_decomposition_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..44b3d899e6d7a450bf3d81edc5225da7de7a1de1
--- /dev/null
+++ b/dataset/layer_decomposition_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "layer decomposition",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0035",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/lighting_editing_0001/eval.json b/dataset/lighting_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..3014e642f61186dde4a9ec34cafc90db9704bd4d
--- /dev/null
+++ b/dataset/lighting_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified image retain the same unchanged areas as the original image, ensuring that only the specified lighting conditions have been altered?",
+            "0_point_standard": "The areas not specified in the image show noticeable changes or distortions aside from the lighting adjustments.",
+            "1_point_standard": "The areas not specified in the image remain consistent with the original image, with no changes other than the lighting adjustments."
+        },
+        {
+            "question": "Does the modified image retain the content, style, and features of the original image, maintaining consistency with the input image?",
+            "0_point_standard": "The modified image shows significant differences from the original image in terms of content, style, or features.",
+            "1_point_standard": "The modified image retains the content, style, and features of the original image, maintaining consistency with the input image."
+        },
+        {
+            "question": "Does the modified image accurately reflect the lighting changes described in the text input?",
+            "0_point_standard": "The lighting changes do not conform to the specifications in the text description, showing inaccuracies or deviations.",
+            "1_point_standard": "The lighting changes have been accurately implemented according to the text description, with no inaccuracies or deviations."
+        },
+        {
+            "question": "Has the modified image correctly implemented any additional lighting effects or modifications specified in the text description?",
+            "0_point_standard": "Additional lighting effects or modifications specified in the text description are missing or incorrectly applied.",
+            "1_point_standard": "All additional lighting effects or modifications specified in the text description have been correctly and accurately applied."
+        },
+        {
+            "question": "Does the lighting edit enhance the visual quality of the image, providing a realistic or aesthetically pleasing effect?",
+            "0_point_standard": "The lighting edit has reduced the visual quality, making the image appear unrealistic or unappealing.",
+            "1_point_standard": "The lighting edit enhances the visual quality, providing a realistic and aesthetically pleasing effect."
+        },
+        {
+            "question": "Has the overall aesthetic appeal of the modified image been enhanced or maintained, meeting professional visual standards?",
+            "0_point_standard": "The modified image lacks aesthetic appeal and does not meet professional visual standards.",
+            "1_point_standard": "The modified image demonstrates strong aesthetic appeal, meeting or exceeding professional visual standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/lighting_editing_0001/images.txt b/dataset/lighting_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..097c7053e8add1a1a4ea98d2f0115a9aa91173a2
--- /dev/null
+++ b/dataset/lighting_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN016bYaWe28Hv6zvmY1Z_!!6000000007908-0-tps-3200-1500.jpg
diff --git a/dataset/lighting_editing_0001/instruction.txt b/dataset/lighting_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb12c02a08252e972d1863bdd9d69d34b962e4c4
--- /dev/null
+++ b/dataset/lighting_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Adjust the light angle in this image to come from the top left, with the light slightly slanting downwards. This will alter the shadow direction, creating soft shadows on the back and right side of the chair.
\ No newline at end of file
diff --git a/dataset/lighting_editing_0001/meta.json b/dataset/lighting_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c3644f0e7592a3a0e0e642d50f0847ac712d988
--- /dev/null
+++ b/dataset/lighting_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "lighting editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0067",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/lighting_effect_simulation_0001/eval.json b/dataset/lighting_effect_simulation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d7dbd2d13da39d5db9e391dd57fa22cecbb9635f
--- /dev/null
+++ b/dataset/lighting_effect_simulation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each generated image maintain the original perspective and composition of the input image, without any changes to the layout or object positions?",
+            "0_point_standard": "There are changes in perspective or object layout between the images, resulting in a loss of scene consistency.",
+            "1_point_standard": "The perspective and composition of all images have been preserved, with no changes in object layout or position."
+        },
+        {
+            "question": "Is the specified lighting effect correctly applied in each image according to the description, and are the light sources or conditions accurately represented?",
+            "0_point_standard": "The lighting effect does not match the specified conditions, with errors in light direction, intensity, or representation of the light source.",
+            "1_point_standard": "The lighting effect is applied exactly as described, accurately reflecting the specified light direction, intensity, and source."
+        },
+        {
+            "question": "Are areas outside the specified lighting effect unchanged, retaining the original texture and details of the input image?",
+            "0_point_standard": "Unexpected changes occur in areas that should remain consistent, altering textures or details outside the lighting effect.",
+            "1_point_standard": "Areas outside the specified lighting effect remain unchanged, retaining the same textures and details as the original image."
+        },
+        {
+            "question": "Do the images reflect a consistent style across all simulated lighting conditions, with uniform texture, color grading, and visual tone?",
+            "0_point_standard": "There are differences in style, texture, or color grading between the images, resulting in an inconsistent set.",
+            "1_point_standard": "All images maintain a consistent style, texture, and color grading, presenting a cohesive appearance despite different lighting conditions."
+        },
+        {
+            "question": "Does the lighting effect enhance the realism of each image, providing accurate shadows, depth, and reflection quality consistent with the scene's 3D structure?",
+            "0_point_standard": "The lighting effect appears unrealistic, with inaccurate shadows, inconsistent depth, or poorly handled reflections.",
+            "1_point_standard": "The lighting effect enhances realism, with accurate shadows, depth, and reflections that align well with the scene's 3D structure."
+        },
+        {
+            "question": "Does each image retain high-quality details, especially in areas affected by lighting changes, with careful handling of texture, edges, and contrast?",
+            "0_point_standard": "Details are lost or poorly handled in areas affected by lighting, resulting in rough textures or decreased clarity.",
+            "1_point_standard": "Details are preserved, with well-rendered textures, sharp edges, and balanced contrast in areas affected by lighting effects."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/lighting_effect_simulation_0001/images.txt b/dataset/lighting_effect_simulation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0dce299af490b04c1eb2063d328b2b50adaa96f
--- /dev/null
+++ b/dataset/lighting_effect_simulation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01hzFqmE1e6zBPlSmK4_!!6000000003823-0-tps-1280-544.jpg
diff --git a/dataset/lighting_effect_simulation_0001/instruction.txt b/dataset/lighting_effect_simulation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9dcc06738cce78ac968a9e112a8e6ac973feb7f
--- /dev/null
+++ b/dataset/lighting_effect_simulation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate five images based on the following description, showcasing different lighting effect variations of the given interior image. The goal is to apply different lighting conditions to the room to create diverse atmospheres. The first image should feature soft morning sunlight filtering through the curtains, creating a warm and gentle morning ambiance. The second image should display bright midday sunlight filling the room with natural light, conveying a vibrant and energetic scene. The third image should depict the room at midnight, with dim lighting coming only from a few warm-colored lamps and wall lights, creating a calm and quiet nighttime atmosphere. The fourth image should switch all lighting to cool white lights, casting a bright, modern, and tech-inspired feel. The fifth image should showcase warm lighting, with soft, cozy lights that create a comfortable and homey atmosphere. All images should maintain the original room layout while emphasizing the emotional and visual effects brought by the different lighting conditions.
\ No newline at end of file
diff --git a/dataset/lighting_effect_simulation_0001/meta.json b/dataset/lighting_effect_simulation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..6cf65667bc5344b14c4b455631ca3584e959291a
--- /dev/null
+++ b/dataset/lighting_effect_simulation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "lighting effect simulation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0033",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/local_enlargement_0001/eval.json b/dataset/local_enlargement_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2694cbb1bde01a0198d8662bee0df8b97b666b0c
--- /dev/null
+++ b/dataset/local_enlargement_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the enlarged image accurately focus on the specified area and crop out the parts outside the specified area as described in the task?",
+            "0_point_standard": "The enlargement fails to focus solely on the specified area, including unintended areas or missing parts of the specified area.",
+            "1_point_standard": "The enlargement accurately focuses on the specified area, excluding all unintended areas, in accordance with the instructions."
+        },
+        {
+            "question": "Is the resolution and clarity of the enlarged area preserved, ensuring clear and sharp details?",
+            "0_point_standard": "The enlarged area appears blurry or pixelated, with noticeable loss of detail or clarity.",
+            "1_point_standard": "The enlarged area maintains high resolution and clarity, with clear and sharp details."
+        },
+        {
+            "question": "Does the enlarged image maintain relevance in content and style with the input image, ensuring consistency?",
+            "0_point_standard": "The enlarged image shows inconsistency in content or style, deviating from the characteristics of the original image.",
+            "1_point_standard": "The enlarged image maintains consistent content and style relevance with the original image, accurately reflecting its characteristics."
+        },
+        {
+            "question": "Does the enlargement meet the requirements of the text description, such as specific details mentioned in the task (e.g., focus, orientation)?",
+            "0_point_standard": "The enlargement fails to include specific details or instructions mentioned in the text description.",
+            "1_point_standard": "The enlargement successfully includes all specific details and instructions listed in the text description."
+        },
+        {
+            "question": "Are the edges of the enlarged area smoothly transitioned, avoiding abrupt changes or artifacts that disrupt the natural appearance of the image?",
+            "0_point_standard": "The edges of the enlarged area have noticeable artifacts or abrupt changes, causing a discontinuous or unnatural appearance.",
+            "1_point_standard": "The edges of the enlarged area transition smoothly, blending naturally with the surrounding image, presenting a cohesive appearance."
+        },
+        {
+            "question": "Does the enlarged image have high aesthetic appeal, with pleasing composition and enhanced focus area?",
+            "0_point_standard": "The enlarged image lacks aesthetic appeal, with poor composition or an unattractive focus area.",
+            "1_point_standard": "The enlarged image exhibits strong aesthetic appeal, with pleasing composition and an enhanced focus area."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/local_enlargement_0001/images.txt b/dataset/local_enlargement_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a1d830fce16e4ecafe02df16f07a04843d996e4
--- /dev/null
+++ b/dataset/local_enlargement_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01A9uTko233CIkBNLsp_!!6000000007199-0-tps-1200-800.jpg
diff --git a/dataset/local_enlargement_0001/instruction.txt b/dataset/local_enlargement_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f422ad7ec6bfb88811c9b61bd5c27d6835b1a75
--- /dev/null
+++ b/dataset/local_enlargement_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a zoomed-in image based on the input picture, focusing on enlarging the ship while cropping out the rest of the image. To ensure image quality and realism, some additional details should be reasonably supplemented during the zooming process. The zoomed-in image should present the ship clearly and realistically, maintaining high quality with rich details, consistent with the style of the original image, emphasizing the ship's shape and texture.
\ No newline at end of file
diff --git a/dataset/local_enlargement_0001/meta.json b/dataset/local_enlargement_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..60f9c2267cf633ff468a993f72c88eef1cd3f64c
--- /dev/null
+++ b/dataset/local_enlargement_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "local enlargement",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0051",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/logo_generation_0001/auto_eval.jsonl b/dataset/logo_generation_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a7b97bd6778a1d3a7a963a23171c8c308f8728c8
--- /dev/null
+++ b/dataset/logo_generation_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words \"WAKE CROCODILE\" are written in bold, playful grey font. Above \"CROCODILE\", smaller text reads \"COMFORTABLE ELEGANT\", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.\"\nYour review question is:\nDoes the generated image clearly resemble a logo, with distinct and simplified design elements suitable for brand identity? 0 points: The image lacks identifiable logo characteristics, making it unclear as a brand identifier. 1 point: The image has clear logo qualities, with distinct, simplified elements typical of a logo.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words \"WAKE CROCODILE\" are written in bold, playful grey font. Above \"CROCODILE\", smaller text reads \"COMFORTABLE ELEGANT\", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.\"\nYour review question is:\nIs the logo visually complete, with a cohesive design that functions well as a standalone brand mark? 0 points: The logo appears incomplete or unbalanced, lacking cohesion needed for a recognizable brand mark. 1 point: The logo is visually complete and cohesive, suitable for use as a standalone brand mark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words \"WAKE CROCODILE\" are written in bold, playful grey font. Above \"CROCODILE\", smaller text reads \"COMFORTABLE ELEGANT\", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.\"\nYour review question is:\nDoes the logo accurately represent the specified brand theme, color scheme, or symbolic elements described in the text prompt? Compare the image with the text requirement sentence by sentence. 0 points: The logo does not align with the specified brand theme, colors, or symbolism, deviating from the text requirements. 1 point: The logo accurately reflects the brand theme, color scheme, and symbolic elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words \"WAKE CROCODILE\" are written in bold, playful grey font. Above \"CROCODILE\", smaller text reads \"COMFORTABLE ELEGANT\", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.\"\nYour review question is:\nIf the logo includes text, is it clear, readable, and appropriately integrated into the design? 0 points: The text is unclear or poorly integrated, making it difficult to read or understand within the logo design. 1 point: The text is clear, readable, and well-integrated, complementing the overall logo design effectively.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words \"WAKE CROCODILE\" are written in bold, playful grey font. Above \"CROCODILE\", smaller text reads \"COMFORTABLE ELEGANT\", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.\"\nYour review question is:\nIs the logo design simple and clear enough to remain recognizable and effective at various sizes, supporting scalability for different applications? 0 points: The logo loses clarity or detail when resized, reducing its effectiveness for multiple uses. 1 point: The logo remains recognizable and clear at different sizes, showing good scalability for versatile applications.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words \"WAKE CROCODILE\" are written in bold, playful grey font. Above \"CROCODILE\", smaller text reads \"COMFORTABLE ELEGANT\", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.\"\nYour review question is:\nDoes the logo exhibit a high level of aesthetic quality, with a balanced composition, appealing color choices, and a professional finish? 0 points: The logo lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The logo has strong aesthetic appeal, with balanced composition, attractive colors, and a visually polished, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/logo_generation_0001/eval.json b/dataset/logo_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..4b4a00d4e2b3963f2e53f0e9010808b52f9e5e33
--- /dev/null
+++ b/dataset/logo_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present itself as a logo, featuring unique and simplified design elements suitable for brand recognition?",
+            "0_point_standard": "The image lacks recognizable logo characteristics, making it difficult to serve as a brand identifier.",
+            "1_point_standard": "The image features clear logo characteristics, with typical logo elements, and is simple yet unique in design."
+        },
+        {
+            "question": "Is the logo visually complete with a coherent design, able to serve as an independent brand identifier?",
+            "0_point_standard": "The logo appears incomplete or unbalanced, lacking the coherence needed to be a recognizable brand identifier.",
+            "1_point_standard": "The logo is visually complete and coherent, suitable for use as an independent brand identifier."
+        },
+        {
+            "question": "Does the logo accurately reflect the brand theme, color scheme, or symbolic elements described in the text prompt? Compare the image with the text requirements sentence by sentence.",
+            "0_point_standard": "The logo fails to reflect the specified brand theme, colors, or symbolic elements, deviating from the text requirements.",
+            "1_point_standard": "The logo accurately reflects the brand theme, color scheme, and symbolic elements specified in the text prompt."
+        },
+        {
+            "question": "If the logo contains text, is the text clear, legible, and appropriately integrated into the design?",
+            "0_point_standard": "The text is unclear or poorly integrated into the design, making it difficult to read or understand within the logo.",
+            "1_point_standard": "The text is clear and legible, effectively integrated with the overall logo design, and serves as a good complement."
+        },
+        {
+            "question": "Is the logo design simple and clear enough to maintain recognizability and effectiveness at different sizes, supporting scalability for various applications?",
+            "0_point_standard": "The logo loses clarity or detail when resized, reducing its effectiveness in various applications.",
+            "1_point_standard": "The logo maintains clarity and recognizability at different sizes, demonstrating good scalability suitable for various applications."
+        },
+        {
+            "question": "Does the logo have a high level of aesthetic quality, with balanced composition, appealing color scheme, and a professional appearance?",
+            "0_point_standard": "The logo lacks aesthetic appeal, has poor color scheme, weak composition, or appears unprofessional.",
+            "1_point_standard": "The logo has strong aesthetic appeal, balanced composition, appealing color scheme, and a refined professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/logo_generation_0001/images.txt b/dataset/logo_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/logo_generation_0001/instruction.txt b/dataset/logo_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91dffdc8765136238b274bb0ccdb6626aaf72fb4
--- /dev/null
+++ b/dataset/logo_generation_0001/instruction.txt
@@ -0,0 +1 @@
+The logo features a cartoon crocodile as its main element. The crocodile is green, with two small circular protrusions above its head, resembling eyes. The body is streamlined and connects to a green circular clock at its lower part. The clock has clearly visible hands indicating time. Three downward-pointing triangles are positioned in the middle of the crocodile's body, possibly representing a function or feature. Below the crocodile, the words "WAKE CROCODILE" are written in bold, playful grey font. Above "CROCODILE", smaller text reads "COMFORTABLE ELEGANT", describing the brand's characteristics. The overall color scheme is fresh and natural, with a simple and lively style, conveying a sense of childlike fun. The background is white, providing a stark contrast to the green and grey elements of the logo.
\ No newline at end of file
diff --git a/dataset/logo_generation_0001/meta.json b/dataset/logo_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7adb7023eb58b1b15c7ec79d87b6ac3bbbfd5157
--- /dev/null
+++ b/dataset/logo_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "logo generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0024",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/logo_generation_0002/auto_eval.jsonl b/dataset/logo_generation_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..b270fc91017f54de9415065ab66d5b6cadcc9a00
--- /dev/null
+++ b/dataset/logo_generation_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text \"Lala witch\" is prominently displayed in bold, sans-serif font, with \"THE WITCH'S TRAINEE ROOM\" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.\"\nYour review question is:\nDoes the generated image clearly resemble a logo, with distinct and simplified design elements suitable for brand identity? 0 points: The image lacks identifiable logo characteristics, making it unclear as a brand identifier. 1 point: The image has clear logo qualities, with distinct, simplified elements typical of a logo.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text \"Lala witch\" is prominently displayed in bold, sans-serif font, with \"THE WITCH'S TRAINEE ROOM\" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.\"\nYour review question is:\nIs the logo visually complete, with a cohesive design that functions well as a standalone brand mark? 0 points: The logo appears incomplete or unbalanced, lacking cohesion needed for a recognizable brand mark. 1 point: The logo is visually complete and cohesive, suitable for use as a standalone brand mark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text \"Lala witch\" is prominently displayed in bold, sans-serif font, with \"THE WITCH'S TRAINEE ROOM\" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.\"\nYour review question is:\nDoes the logo accurately represent the specified brand theme, color scheme, or symbolic elements described in the text prompt? Compare the image with the text requirement sentence by sentence. 0 points: The logo does not align with the specified brand theme, colors, or symbolism, deviating from the text requirements. 1 point: The logo accurately reflects the brand theme, color scheme, and symbolic elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text \"Lala witch\" is prominently displayed in bold, sans-serif font, with \"THE WITCH'S TRAINEE ROOM\" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.\"\nYour review question is:\nIf the logo includes text, is it clear, readable, and appropriately integrated into the design? 0 points: The text is unclear or poorly integrated, making it difficult to read or understand within the logo design. 1 point: The text is clear, readable, and well-integrated, complementing the overall logo design effectively.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text \"Lala witch\" is prominently displayed in bold, sans-serif font, with \"THE WITCH'S TRAINEE ROOM\" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.\"\nYour review question is:\nIs the logo design simple and clear enough to remain recognizable and effective at various sizes, supporting scalability for different applications? 0 points: The logo loses clarity or detail when resized, reducing its effectiveness for multiple uses. 1 point: The logo remains recognizable and clear at different sizes, showing good scalability for versatile applications.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text \"Lala witch\" is prominently displayed in bold, sans-serif font, with \"THE WITCH'S TRAINEE ROOM\" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.\"\nYour review question is:\nDoes the logo exhibit a high level of aesthetic quality, with a balanced composition, appealing color choices, and a professional finish? 0 points: The logo lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The logo has strong aesthetic appeal, with balanced composition, attractive colors, and a visually polished, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/logo_generation_0002/eval.json b/dataset/logo_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6144c6b9dc41a1c9fd24e802a6f79fc536cc96c2
--- /dev/null
+++ b/dataset/logo_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present as a logo with unique and simplified design elements suitable for brand recognition?",
+            "0_point_standard": "The image lacks recognizable logo characteristics, making it difficult to serve as a brand identifier.",
+            "1_point_standard": "The image has clear logo characteristics with typical logo elements, and the design is simple yet unique."
+        },
+        {
+            "question": "Is the logo visually complete with a coherent design that can function as a standalone brand identifier?",
+            "0_point_standard": "The logo appears incomplete or unbalanced, lacking the coherence needed to be a recognizable brand identifier.",
+            "1_point_standard": "The logo is visually complete and coherent, suitable for use as a standalone brand identifier."
+        },
+        {
+            "question": "Does the logo accurately reflect the brand theme, color scheme, or symbolic elements described in the text prompt? Compare the image with the text requirements sentence by sentence.",
+            "0_point_standard": "The logo fails to reflect the specified brand theme, colors, or symbolic elements, deviating from the text requirements.",
+            "1_point_standard": "The logo accurately reflects the brand theme, color scheme, and symbolic elements specified in the text prompt."
+        },
+        {
+            "question": "If the logo includes text, is the text clear, legible, and appropriately integrated into the design?",
+            "0_point_standard": "The text is unclear or not well integrated into the design, making it difficult to read or understand within the logo.",
+            "1_point_standard": "The text is clear and legible, effectively integrated with the overall logo design, serving as a good complement."
+        },
+        {
+            "question": "Is the logo design sufficiently simple and clear to maintain recognizability and effectiveness at different sizes, supporting scalability for various applications?",
+            "0_point_standard": "The logo loses clarity or detail when resized, reducing its effectiveness in various uses.",
+            "1_point_standard": "The logo maintains clarity and recognizability at different sizes, showing good scalability suitable for various applications."
+        },
+        {
+            "question": "Does the logo exhibit a high level of aesthetic quality, with balanced composition, appealing color scheme, and a professional appearance?",
+            "0_point_standard": "The logo lacks aesthetic appeal, has poor color scheme, weak composition, or appears unprofessional.",
+            "1_point_standard": "The logo has strong aesthetic appeal, balanced composition, attractive color scheme, and a refined, professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/logo_generation_0002/images.txt b/dataset/logo_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/logo_generation_0002/instruction.txt b/dataset/logo_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd49ebe0c2e83989f5bb433aea10c9df316b0dee
--- /dev/null
+++ b/dataset/logo_generation_0002/instruction.txt
@@ -0,0 +1 @@
+The logo is a stylized grayscale image featuring an abstract representation of a woman's face and hair, forming a shape reminiscent of a witch's hat or a flowing, curved form. The face is profile view, partially obscured by the larger, darker gray, flowing lines that suggest hair or a mystical element. These curves are organic and fluid, creating a sense of movement and magic. Below the image, the text "Lala witch" is prominently displayed in bold, sans-serif font, with "THE WITCH'S TRAINEE ROOM" in a smaller, lighter font underneath. The overall style is modern, minimalist, and slightly mysterious, with the dark gray against the white background enhancing the visual impact.
\ No newline at end of file
diff --git a/dataset/logo_generation_0002/meta.json b/dataset/logo_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed2fe45f986fae738be21ffb0aab70941bc687cd
--- /dev/null
+++ b/dataset/logo_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "logo generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0024",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/logo_generation_0003/auto_eval.jsonl b/dataset/logo_generation_0003/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..78370b909ff5d154995d07c6c0961f9c40e380ae
--- /dev/null
+++ b/dataset/logo_generation_0003/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words \"Roast chicken\" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads \"BURN\" and the right box reads \"MEAT\". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.\"\nYour review question is:\nDoes the generated image clearly resemble a logo, with distinct and simplified design elements suitable for brand identity? 0 points: The image lacks identifiable logo characteristics, making it unclear as a brand identifier. 1 point: The image has clear logo qualities, with distinct, simplified elements typical of a logo.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words \"Roast chicken\" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads \"BURN\" and the right box reads \"MEAT\". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.\"\nYour review question is:\nIs the logo visually complete, with a cohesive design that functions well as a standalone brand mark? 0 points: The logo appears incomplete or unbalanced, lacking cohesion needed for a recognizable brand mark. 1 point: The logo is visually complete and cohesive, suitable for use as a standalone brand mark.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words \"Roast chicken\" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads \"BURN\" and the right box reads \"MEAT\". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.\"\nYour review question is:\nDoes the logo accurately represent the specified brand theme, color scheme, or symbolic elements described in the text prompt? Compare the image with the text requirement sentence by sentence. 0 points: The logo does not align with the specified brand theme, colors, or symbolism, deviating from the text requirements. 1 point: The logo accurately reflects the brand theme, color scheme, and symbolic elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words \"Roast chicken\" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads \"BURN\" and the right box reads \"MEAT\". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.\"\nYour review question is:\nIf the logo includes text, is it clear, readable, and appropriately integrated into the design? 0 points: The text is unclear or poorly integrated, making it difficult to read or understand within the logo design. 1 point: The text is clear, readable, and well-integrated, complementing the overall logo design effectively.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words \"Roast chicken\" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads \"BURN\" and the right box reads \"MEAT\". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.\"\nYour review question is:\nIs the logo design simple and clear enough to remain recognizable and effective at various sizes, supporting scalability for different applications? 0 points: The logo loses clarity or detail when resized, reducing its effectiveness for multiple uses. 1 point: The logo remains recognizable and clear at different sizes, showing good scalability for versatile applications.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a LOGO based on the text requirements.\nThe text requirement is:\n\"The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words \"Roast chicken\" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads \"BURN\" and the right box reads \"MEAT\". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.\"\nYour review question is:\nDoes the logo exhibit a high level of aesthetic quality, with a balanced composition, appealing color choices, and a professional finish? 0 points: The logo lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The logo has strong aesthetic appeal, with balanced composition, attractive colors, and a visually polished, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/logo_generation_0003/eval.json b/dataset/logo_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c3ffdae952960a4835bc59995f3a04959c868a47
--- /dev/null
+++ b/dataset/logo_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present itself as a logo with distinctive and simplified design elements suitable for brand recognition?",
+            "0_point_standard": "The image lacks recognizable logo features, making it difficult to serve as a brand identifier.",
+            "1_point_standard": "The image has clear logo characteristics, featuring typical logo elements with a simple and unique design."
+        },
+        {
+            "question": "Is the logo visually complete with a coherent design that can serve as an independent brand identifier?",
+            "0_point_standard": "The logo appears incomplete or unbalanced, lacking the coherence needed to be a recognizable brand identifier.",
+            "1_point_standard": "The logo is visually complete and coherent, suitable for use as an independent brand identifier."
+        },
+        {
+            "question": "Does the logo accurately reflect the brand theme, color scheme, or symbolic elements described in the text prompt? Compare the image to the text requirements sentence by sentence.",
+            "0_point_standard": "The logo fails to represent the specified brand theme, colors, or symbolic elements, deviating from the text requirements.",
+            "1_point_standard": "The logo accurately reflects the brand theme, color scheme, and symbolic elements specified in the text prompt."
+        },
+        {
+            "question": "If the logo contains text, is the text clear, readable, and appropriately integrated into the design?",
+            "0_point_standard": "The text is unclear or not well integrated into the design, making it difficult to read or understand within the logo.",
+            "1_point_standard": "The text is clear and readable, effectively integrated into the overall logo design, and serves as a good complement."
+        },
+        {
+            "question": "Is the logo design simple and clear enough to maintain recognizability and effectiveness at different sizes, supporting scalability for various applications?",
+            "0_point_standard": "The logo loses clarity or detail when resized, reducing its effectiveness in various uses.",
+            "1_point_standard": "The logo remains clear and recognizable at different sizes, demonstrating good scalability suitable for various applications."
+        },
+        {
+            "question": "Does the logo exhibit a high level of aesthetic quality, with balanced composition, appealing color schemes, and a professional appearance?",
+            "0_point_standard": "The logo lacks aesthetic appeal, has poor color choices, weak composition, or appears unprofessional.",
+            "1_point_standard": "The logo has strong aesthetic appeal, balanced composition, appealing color schemes, and a refined, professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/logo_generation_0003/images.txt b/dataset/logo_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/logo_generation_0003/instruction.txt b/dataset/logo_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae47186386f7a94c72a5dc5215f6b3e9e23fca5c
--- /dev/null
+++ b/dataset/logo_generation_0003/instruction.txt
@@ -0,0 +1 @@
+The logo features a stylized red flame shape as its primary element, within which a simplified white chicken is subtly incorporated. The flame's top is pointed, while the bottom curves gently, creating a circular shape. Outside the flame ring, the words "Roast chicken" are written in bold, dark gray text, arcing around the flame. Small dark gray stars and rectangular text boxes flank the flame; the left box reads "BURN" and the right box reads "MEAT". The overall color scheme uses primarily dark red and dark gray, resulting in a clean and straightforward design that emphasizes the roast chicken theme. The chicken's depiction is abstract, prioritizing the overall visual impact.
\ No newline at end of file
diff --git a/dataset/logo_generation_0003/meta.json b/dataset/logo_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d39be7c555b95757521e8911b1a32c3614cc80f
--- /dev/null
+++ b/dataset/logo_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "logo generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0024",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_0002/eval.json b/dataset/movie_shots_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..883867ce456779e39c5c5467e3a278e46b662030
--- /dev/null
+++ b/dataset/movie_shots_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the events in the script in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order, or it lacks logical flow, failing to show the progression of events.",
+            "1_point_standard": "The sequence of images clearly presents the events in the script logically and in chronological order."
+        },
+        {
+            "question": "Do the image contents accurately reflect the scenes described in the script?",
+            "0_point_standard": "The image contents do not accurately reflect the scenes described in the script, with significant deviations.",
+            "1_point_standard": "The image contents perfectly match the script and accurately depict the specified scenes."
+        },
+        {
+            "question": "Is the style and overall visual effect of the storyboard images consistent throughout the sequence?",
+            "0_point_standard": "The style of the storyboard images is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All storyboard images maintain a consistent style, creating a cohesive visual effect."
+        },
+        {
+            "question": "Does the generated storyboard maintain consistency of object or character IDs (e.g., the same character or object is recognizable across different images)?",
+            "0_point_standard": "Key characters or objects are inconsistent across different frames, making it difficult to recognize them as the same.",
+            "1_point_standard": "Key characters or objects are consistent and clearly identifiable throughout the storyboard."
+        },
+        {
+            "question": "Given the context of the script, is the logical presentation of the scene reasonable?",
+            "0_point_standard": "The presentation of the scene is illogical or unreasonable, with noticeable errors or unrealistic depictions.",
+            "1_point_standard": "The presentation of the scene is logical, reasonable, and reflects the intended context of the script."
+        },
+        {
+            "question": "Do the details and aesthetics of the storyboard images meet professional standards and possess visual appeal?",
+            "0_point_standard": "The storyboard images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The storyboard images are richly detailed, have excellent aesthetics, meet professional standards, and possess visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_0002/images.txt b/dataset/movie_shots_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/movie_shots_generation_0002/instruction.txt b/dataset/movie_shots_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb9e7a6207407c2cc9d6f887833326382173d233
--- /dev/null
+++ b/dataset/movie_shots_generation_0002/instruction.txt
@@ -0,0 +1,18 @@
+Please generate a set of storyboard images based on the provided script, with each frame corresponding to one image. The appearance of the same characters should remain consistent across different storyboard frames, as well as the overall style.
+
+**Background Synopsis:** During an archaeological expedition, the young archaeologist Emma and her team discover a mysterious scroll that describes an ancient kingdom forgotten by time. Emma, along with the adventurer Leo and local guide Kadan, decide to search for this kingdom, unaware of the huge conspiracy behind it.
+
+| Shot No. | Location | Shot Size | Camera Angle | Description | Dubbing Content | Subtitles | Music/SFX | Camera/Technique |
+|----------|----------|-----------|--------------|-------------|------------------|-----------|------------|------------------|
+| 1A | Deep in the jungle | Extreme long shot | Eye level | A sea of green leaves, sunlight piercing through the gaps casting mottled shadows. Tall ancient trees intertwine, forming a natural barrier. Birds fly in the sky as monkeys play and leap. | - | - | Sounds of birds chirping, rustling leaves | Zoom out to reveal the lush forest |
+| 1B | Jungle path | Full shot | Eye level | Emma, Leo, and Kadan move along a narrow path. Emma wears a sun hat and carries the scroll, Leo has a large backpack on his shoulder, Kadan swings a machete to clear the bushes. | Kadan: “This is the place described in the scroll.” | "This is the place described in the scroll." | Distant wind sound, footsteps | Dolly in, following the trio walking |
+| 2A | In front of a rock | Medium shot | Eye level | The trio stops in front of a huge rock. The surface of the rock is mottled with ancient symbols, one of which is the shape of a waterfall with a small round hole underneath. | Emma: “This is the keyhole to the entrance!” | "This is the keyhole to the entrance!" | Sound of rock collision | Static, capturing the trio's expressions |
+| 2B | In Emma's hands | Close-up | Eye level | Emma takes out a golden circular object from her bag, with a protruding waterfall shape and ancient patterns etched along the rusty edges. | Leo: “Quick, put it in!” | "Quick, put it in!" | Sound of metal scraping | Push in, close-up of Emma's hands |
+| 3A | In front of the rock | Full shot | Low angle | The rock suddenly shakes, the ground trembles, and small animals scatter in panic. The waterfall symbol on the rock gradually disappears, revealing a hidden entrance. | Kadan: “The kingdom really exists!” | "The kingdom really exists!" | Sound of shaking, entrance opening | Pan up from the ground to reveal the entrance |
+| 3B | Hidden entrance | Medium close-up | Eye level | At the entrance are ancient stone steps flanked by broken torches. At the end of the steps is a dark tunnel emanating a faint blue light. | Emma: “Let's go in.” | "Let's go in." | Echo of stone steps, footsteps | Zoom out as the three walk into the tunnel |
+| 4A | Inside the tunnel | Full shot | Eye level | The trio moves deeper into the tunnel. The walls are covered with moss and ancient murals depicting the kingdom's prosperity. The light from the torches illuminates their path, casting dancing shadows on the walls. | Leo: “These murals...they tell an ancient story.” | "These murals...they tell an ancient story." | Echo inside the tunnel, sound of torches burning | Dolly in, the trio moves deeper into the tunnel |
+| 4B | In front of an ancient door | Medium shot | High angle | They arrive in front of a huge bronze door adorned with a giant lion's head, its eyes are two blue gems twinkling faintly. | Emma: “This should be the entrance...but how do we open it?” | "This should be the entrance... but how do we open it?" | Wind sound in front of the door, deep background music | Static, focus on the door |
+| 5A | Beside the door | Close-up | Eye level | Emma notices a recess similar to the golden circular object in her hand on one side of the door, with intricate patterns along the edges. | Kadan: “Try that key?” | "Try that key?" | Sound of metal sliding | Push in, close-up of the recess |
+| 5B | In front of the door | Full shot | Eye level | Emma places the circular object into the recess and the door slowly opens, revealing a vast underground palace. In the center of the palace is a tall jade throne, on which sits a golden crown embedded with five shining gems. | Leo: “This...this is the lost kingdom!” | "This...this is the lost kingdom!" | Sound of the door opening, wind inside the palace | Zoom out to reveal the underground palace |
+| 6A | Palace entrance | Medium Close-Up | Low Angle | The three of them walk into the palace, Emma's gaze is drawn to the crown, she approaches the throne, ready to take the crown. | Emma (filled with curiosity): “This is...” | "This is..." | Echo inside the palace, low background music | Pan shot, capturing their movements from the bottom up |
+| 6B | Center of the palace | Close-Up | Eye Level | Just as Emma is about to touch the crown, a shadow swiftly approaches from behind, brandishing a sharp dagger. | Shadowy Figure: “This is mine!” | "This is mine!" | Tense sound effects, the sharp sound of the dagger | Push-in shot, capturing the appearance of the shadowy figure |
diff --git a/dataset/movie_shots_generation_0002/meta.json b/dataset/movie_shots_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..cc2e8af6d3c0d6715aed031a5ef4dbb058802ecd
--- /dev/null
+++ b/dataset/movie_shots_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "movie shots generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0013",
+    "output_image_count": 12,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_0003/eval.json b/dataset/movie_shots_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d985518000cb5d4b0ec537092b6353812e6c6993
--- /dev/null
+++ b/dataset/movie_shots_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the events in the script in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical flow, failing to showcase the progression of events.",
+            "1_point_standard": "The sequence of images clearly and logically presents the events in the script in chronological order."
+        },
+        {
+            "question": "Does the content of the images accurately reflect the scenes described in the script?",
+            "0_point_standard": "The content of the images does not accurately reflect the scenes described in the script, with significant deviations.",
+            "1_point_standard": "The content of the images matches the script completely and accurately depicts the specified scenes."
+        },
+        {
+            "question": "Is the style and overall visual effect of the storyboard images consistent throughout the sequence?",
+            "0_point_standard": "The style of the storyboard images is inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All storyboard images maintain a consistent style, creating a coherent visual effect."
+        },
+        {
+            "question": "Does the generated storyboard maintain consistency in the identity of objects or characters (e.g., the same character or object is recognizable across different images)?",
+            "0_point_standard": "The main characters or objects are inconsistent across different frames, making it difficult to recognize them as the same.",
+            "1_point_standard": "The main characters or objects are consistent and clearly recognizable throughout the storyboard."
+        },
+        {
+            "question": "Given the context of the script, is the logical presentation of the scene reasonable?",
+            "0_point_standard": "The presentation of the scene is illogical or unreasonable, with obvious errors or unrealistic depictions.",
+            "1_point_standard": "The presentation of the scene is logical, reasonable, and reflects the intended context of the script."
+        },
+        {
+            "question": "Do the details and aesthetics of the storyboard images meet professional standards and have visual appeal?",
+            "0_point_standard": "The storyboard images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The storyboard images are rich in detail, have excellent aesthetics, meet professional standards, and have visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_0003/images.txt b/dataset/movie_shots_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/movie_shots_generation_0003/instruction.txt b/dataset/movie_shots_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84ebd0525c3233a849aac6e08e3685bb6dfe9dcb
--- /dev/null
+++ b/dataset/movie_shots_generation_0003/instruction.txt
@@ -0,0 +1,18 @@
+Please generate a set of storyboard images based on the provided script, with each frame corresponding to one image. The appearance of the same characters should remain consistent across different storyboard frames, as well as the overall style.
+
+**Background Synopsis:** In 1940's Shanghai, the bustling Bund coexists with the chaos of war. Su Mei, a young woman studying the piano, and Jerry, an American journalist, meet by chance. Their relationship develops from a shared love of music, blossoming into deep affection. However, as the war spreads, their fate takes a turn, and they face the heart-wrenching decision of separation.
+
+| Shot No. | Location | Shot Size | Camera Angle | Description | Dubbing Content | Subtitles | Music/SFX | Camera/Technique |
+|----------|----------|-----------|--------------|----------------------------------------------------------------|------------------|----------|----------------|-------------------|
+| 1A | Interior of an old western-style house | Full Shot | Eye Level | Spacious living room with opulent Sino-Western murals and gilded frames on the wall. A large black piano sits at the center, its lid open, reflecting faint light. The half-open window allows the curtains to flutter gently, as moonlight streams onto the wooden floor. | - | - | The quiet of the room, with a faint breeze | Subjective shot, slow pan |
+| 1B | Interior of an old western-style house | Medium Close Up | Eye Level | Su Mei, in a light purple cheongsam, stands in front of the piano. Her long hair is done up in a simple bun, and her makeup is understated yet refined. Her fingers glide over each key as if having an intimate conversation with the piano. | Jerry: "Play something for me?" | "Play something for me?" | Faint sound of the wind, Su Mei's soft breathing | Push-in on Su Mei |
+| 2A | Interior of an old western-style house | Close Up | High Angle | Jerry sits on a vintage leather sofa, holding a glass with an amber liquid, giving off a fine mist. His eyes are fixed on Su Mei, a soft smile on his face. | - | - | Gentle clinking of the glass | Tilt up slowly |
+| 2B | Interior of an old western-style house | Extreme Close Up | Eye Level | Su Mei's fingers delicately press the piano keys, which have an ivory-like sheen. Her nails are painted a shade of pale purple, complementing her cheongsam. Each key is touched tenderly by her fingers. | - | - | The piano's prelude | Follow the keys' movement |
+| 3A | Interior of an old western-style house | Full Shot | Eye Level | Su Mei begins to play, the music resonates throughout the living room, as if time has stopped at this moment. Jerry is captivated by the beautiful melody, his gaze locked on Su Mei. The entire room is filled with the music. | - | - | The piano melody intensifies | Crane shot, smooth lift |
+| 3B | Interior of an old western-style house | Close Up | Eye Level | A moved expression surfaces on Jerry's face. His eyes sparkle as if the whole world is reflected in them. His lips curl slightly, as if reminiscing something pleasant. | - | - | The piano melody deepens | Pan left and right |
+| 4A | Interior of an old western-style house | Extreme Close Up | Eye Level | Tears brim in Su Mei's eyes, shining like stars. A single tear glistens at the corner of her eye like a pearl. | - | - | Piano reaches a climax, filled with emotion | Static, deep focus |
+| 4B | Interior of an old western-style house | Medium Close Up | Eye Level | Jerry slowly rises and steps toward Su Mei. Each step is steady and firm, as if he's approaching his destiny. His suit is crisp, a small pattern on his tie matching perfectly with his wristwatch. | Su Mei sings softly, accompanied by the piano melody | - | Piano softens, Su Mei's singing emerges | Tracking shot on Jerry |
+| 5A | Interior of an old western-style house | Close Up | Low Angle | Su Mei lifts her head, locking eyes with Jerry. Anticipation fills her eyes, her lips part as if awaiting a kiss. A blush tints her cheeks, as if colored by the moonlight. | Jerry: “I love you.” | "I love you." | Ambient silence, leaving only their breathing | Slow push-in towards the pair |
+| 5B | Interior of an old western-style house | Extreme Close Up | Eye Level | Jerry gently extends his hand, touching Su Mei's cheek. His palm is large and strong, with a gold ring on his finger. His hand lingers on her face, as if time has paused. | Su Mei softly: “Me too.” | "Me too." | Soft piano music starts, filled with subtle emotion | Pan gently left and right |
+| 6A | Interior of an old western-style house | Medium Shot | Eye Level | Jerry and Su Mei slowly draw closer, their lips touching in a long, emotional kiss. Everything else seems to vanish, leaving only the two of them. The lighting in the living room softens, and the moonlight grows brighter, illuminating the lovers. | - | - | Piano fades out, quiet descends | Combined shot, focus on the pair |
+| 6B | Interior of an old western-style house | Full Shot | Eye Level | The couple embraces tightly, as if they are the only two people in the world. The last note of the piano lingers before the room falls into silence. Moonlight bathes them, casting a sacred halo around them. | - | - | All ambient sound disappears, leaving only their breathing | Slow pull-back to reveal the full scene |
diff --git a/dataset/movie_shots_generation_0003/meta.json b/dataset/movie_shots_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..768aeaa7b2a176d074cdb5f0b08c7791ba5b1c6a
--- /dev/null
+++ b/dataset/movie_shots_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "movie shots generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0013",
+    "output_image_count": 12,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_0004/eval.json b/dataset/movie_shots_generation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0831ac5b61f8023a9c96a10fe2e50d4f1d3ab247
--- /dev/null
+++ b/dataset/movie_shots_generation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the events in the script in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged chronologically, or lacks logical flow, failing to show the progression of events.",
+            "1_point_standard": "The sequence of images clearly and logically presents the events in the script in chronological order."
+        },
+        {
+            "question": "Does the image content accurately reflect the scenes described in the script?",
+            "0_point_standard": "The image content does not accurately reflect the scenes described in the script, with noticeable discrepancies.",
+            "1_point_standard": "The image content perfectly matches the script, accurately depicting the specified scenes."
+        },
+        {
+            "question": "Is the style and overall visual effect of the storyboard images consistent throughout the sequence?",
+            "0_point_standard": "The style of the storyboard images is inconsistent, resulting in a disjointed visual effect.",
+            "1_point_standard": "All storyboard images maintain a consistent style, creating a cohesive visual effect."
+        },
+        {
+            "question": "Does the generated storyboard maintain consistency in the identity of objects or characters (e.g., the same character or object is recognizable across different images)?",
+            "0_point_standard": "Key characters or objects are inconsistent between frames, making it difficult to recognize them as the same.",
+            "1_point_standard": "Key characters or objects are consistent and clearly recognizable throughout the storyboard."
+        },
+        {
+            "question": "Considering the context of the script, is the logical presentation of the scenes reasonable?",
+            "0_point_standard": "The presentation of the scenes is illogical or unreasonable, with noticeable errors or unrealistic depictions.",
+            "1_point_standard": "The presentation of the scenes is logical, reasonable, and reflects the intended context of the script."
+        },
+        {
+            "question": "Do the details and aesthetics of the storyboard images meet professional standards and possess visual appeal?",
+            "0_point_standard": "The storyboard images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The storyboard images are rich in detail, have excellent aesthetics, meet professional standards, and possess visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_0004/images.txt b/dataset/movie_shots_generation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/movie_shots_generation_0004/instruction.txt b/dataset/movie_shots_generation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c309a49f6ae3017dccaeeb4ddc96f05c65cfa9f0
--- /dev/null
+++ b/dataset/movie_shots_generation_0004/instruction.txt
@@ -0,0 +1,20 @@
+Please generate a set of storyboard images based on the provided script, with each frame corresponding to one image. The appearance of the same characters should remain consistent across different storyboard frames, as well as the overall style.
+
+**Background Synopsis:**
+During a war in the 20th century, a group of soldiers is trapped in a small village in Country A. As the troops of Country B approach, Captain Michael and local villager Sophie fight side by side for survival. While retreating from the pursuit, they find themselves in an abandoned tunnel that holds the key to turning the tide and surviving.
+
+| Shot No. | Location | Shot Size | Camera Angle | Description | Dubbing Content | Subtitles | Music/SFX | Camera/Technique |
+|----------|----------|-----------|--------------|----------------------------------------------------------------|------------------|----------|----------------|-------------------|
+| 1A       | Village Square | Long Shot | Eye Level | A pitch-black night dotted with a few flickering lights. Ancient stone houses and cobblestone paths create a rustic village scene. The fountain in the center of the village is frozen and covered in snow. In the distance, the sound of enemy tanks grows closer. | - | - | Silence, distant tank sounds | Tracking shot, revealing the village |
+| 1B       | Corner of the Village | Medium Shot | High Angle | Michael and other soldiers are hidden behind a stone house, wearing green military uniforms, intently observing their surroundings. Sophie holds an old-fashioned rifle, standing beside the soldiers with other civilians, appearing tense. | Michael: "We need to move!" | "We need to move!" | Footsteps, whispered orders | Follow shot, moving |
+| 2A       | Inside the Stone House | Close-up | Eye Level | Wooden furniture has been overturned, and tattered curtains hang by the window, gently swaying in the wind. A kerosene lamp emits a faint light, reflecting the tense atmosphere inside the room. Bullets and first aid supplies are scattered on the ground. | Sophie: "They're coming. We should hide in the tunnel." | "They're coming. We should hide in the tunnel." | Sound of curtains swaying, wind | Tracking shot, showing the interior of the room |
+| 2B       | Beside the Window | Close-up | Eye Level | The window glass is shattered, and pieces of glass are scattered on the wooden windowsill. In the dim sky outside, the lights of the enemy forces and the dark silhouettes of tanks can be seen. | Soldier: "Not much time!" | "Not much time!" | Distant tank sounds, shouting | Follow shot, capturing the movement of the tanks |
+| 3A       | Outside the Village | Panoramic Shot | Low Angle | On the snow-covered cobblestone road, Michael and Sophie lead a group of people moving swiftly. The ancient village is behind them, with snowflakes fluttering in the air, contrasting sharply with the approaching tanks in the distance. | - | - | Wind sound, tank sounds | Pan shot, showing the moving group |
+| 4A       | Outside the Village in the Snow | Close-up | Eye Level | Michael and Sophie walk side by side, their breath forming white mist in the cold air. Their footsteps leave deep marks in the snow, with the sound of tanks getting closer in the distance. | Michael: "To the tunnel!" | "To the tunnel!" | Footsteps, the sound of tanks getting closer | Follow shot, showing the two side by side |
+| 4B       | Tunnel Entrance | Close-up | Eye Level | The abandoned tunnel entrance is covered in moss, with some bricks coming loose. A wooden door at the entrance is slightly open, looking dilapidated. | Sophie: "Hurry!" | "Hurry!" | Footsteps, creaking door | Tracking shot, entering the tunnel |
+| 5A       | Inside the Tunnel | Panoramic Shot | Eye Level | The tunnel is dimly lit by a few flickering torches. Drops of water fall from the ancient stone walls, and the ground is wet and slippery. People enter one by one, with the sound of breathing and whispering echoing in the tunnel. | Soldier: "Stay low, keep quiet." | "Stay low, keep quiet." | Sound of dripping water, whispering | Tracking shot, showcasing the interior of the tunnel |
+| 5B       | Deep in the Tunnel | Medium Shot | Eye Level | At the end of the tunnel is an old iron door. Sophie finds a set of keys, unlocking it for everyone. | Sophie: "This was used during World War I, a secret storage for the French resistance." | "This was used during World War I, a secret storage for the French resistance." | Sound of keys turning, door creaking | Follow shot, moving |
+| 6A       | Secret Storage Room | Panoramic Shot | Eye Level | The room is filled with various supplies, including old weapons, maps, and food. The flag of the resistance hangs on the wall, and the air has a musty smell of long enclosure. | Michael: "This could be our chance. We can use these to hold them off!" | "This could be our chance. We can use these to hold them off!" | Sounds of items being handled, whispering | Tracking shot, revealing the interior of the storage room |
+| 6B       | Corner of the Storage Room | Close-up | Eye Level | An old radio equipment sits quietly in the corner; Sophie gently touches it. | Sophie: "We can use this to contact the nearby allied forces. They might help us." | "We can use this to contact the nearby allied forces. They might help us." | Soft metallic friction sound | Follow shot, moving |
+| 7A       | Center of the Storage Room | Medium Shot | Eye Level | Michael and a few soldiers start to organize the weapons, checking the ammunition. Sophie studies an old map carefully on the side. | Soldier: "These weapons are old, but they'll have to do." | "These weapons are old, but they'll have to do." | Sound of bullets being loaded, metal friction | Static shot, showcasing the organizing process |
+| 7B       | Edge of the Storage Room | Close-up | Eye Level | Sophie's fingers move gently on the map, pointing to a place marked "trap" outside the village, her expression determined. | Sophie: "There's a trap set by the resistance back in WWI, just outside the village. This could be our chance." | "There's a trap set by the resistance back in WWI, just outside the village. This could be our chance." | Sound of paper rustling | Follow shot, capturing the movement of the fingers |
diff --git a/dataset/movie_shots_generation_0004/meta.json b/dataset/movie_shots_generation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..8373d6ce137f61432bdaeff50c6462a10929a40d
--- /dev/null
+++ b/dataset/movie_shots_generation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "movie shots generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0013",
+    "output_image_count": 13,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_character_definition_0001/eval.json b/dataset/movie_shots_generation_character_definition_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..25ab1f3ddc09f60ffb35cb8941e46e9a575219b2
--- /dev/null
+++ b/dataset/movie_shots_generation_character_definition_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Are the characters in the storyboard clearly derived from the provided character definition images?",
+            "0_point_standard": "The characters in the storyboard are not similar to or recognizable from the provided character definition images.",
+            "1_point_standard": "The characters in the storyboard are clearly derived from the provided character definition images and can be identified accordingly."
+        },
+        {
+            "question": "Are any modifications to characters or scenes limited to the specified requirements, and are unchanged elements consistent with the input?",
+            "0_point_standard": "There are unnecessary modifications or distortions in parts of the image that are not intended to be changed, affecting overall consistency.",
+            "1_point_standard": "Modifications are limited to the specified areas, with other elements remaining consistent and unchanged."
+        },
+        {
+            "question": "Does the storyboard accurately reflect the key elements and instructions given in the text description (e.g., location, emotion, actions)?",
+            "0_point_standard": "The storyboard fails to capture key elements or instructions from the text description, resulting in incoherent or inaccurate presentation.",
+            "1_point_standard": "The storyboard accurately reflects key elements and instructions, effectively capturing the expected scenes and narrative from the text description."
+        },
+        {
+            "question": "Does the generated storyboard maintain temporal and logical consistency throughout the image sequence?",
+            "0_point_standard": "The sequence's chronological order or logical flow is disrupted, with images appearing out of order or lacking narrative coherence.",
+            "1_point_standard": "The storyboard maintains a clear and logical chronological order, with a coherent narrative flow consistent with the story progression."
+        },
+        {
+            "question": "Do the images in the storyboard maintain consistent style and artistic coherence suitable for a professional film storyboard?",
+            "0_point_standard": "The images in the storyboard have inconsistent styles, leading to a visually uncoordinated or unprofessional appearance.",
+            "1_point_standard": "The images maintain a consistent style and artistic coherence, resulting in a professional and visually appealing storyboard."
+        },
+        {
+            "question": "Are the characters in the storyboard consistently recognizable as the same person throughout the sequence?",
+            "0_point_standard": "Characters appear inconsistently or unrecognizable across different images, making it difficult to determine they are the same person.",
+            "1_point_standard": "Characters are consistently recognizable as the same person throughout the storyboard, ensuring continuity and clarity."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_character_definition_0001/images.txt b/dataset/movie_shots_generation_character_definition_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4782d8a529593e3a396901b0090de83812560f97
--- /dev/null
+++ b/dataset/movie_shots_generation_character_definition_0001/images.txt
@@ -0,0 +1,5 @@
+https://img.alicdn.com/imgextra/i3/O1CN01kzxT0W1OTOi5mHzx9_!!6000000001706-0-tps-3106-2027.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01uLeqUC1QxkBdIvTM9_!!6000000002043-0-tps-1732-1498.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01O0X26B1Sg04dvsh6N_!!6000000002275-0-tps-1280-811.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01HL0tMO20dtBLeN30C_!!6000000006873-0-tps-2480-3406.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01RJsl3e1l1DVOO6Yht_!!6000000004758-0-tps-1997-2882.jpg
diff --git a/dataset/movie_shots_generation_character_definition_0001/instruction.txt b/dataset/movie_shots_generation_character_definition_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56e48ff71760b48c86ac9b0212367483290c8d40
--- /dev/null
+++ b/dataset/movie_shots_generation_character_definition_0001/instruction.txt
@@ -0,0 +1,21 @@
+Please generate a set of storyboard images based on the provided script, with each storyboard frame corresponding to one image. The character definitions in the storyboard must be based on the provided images, ensuring that the characters in the generated images closely match the predefined ones.
+
+### Background Summary
+In a futuristic city, a group of heroes from different dimensions are suddenly drawn into a massive conspiracy. A mysterious rift appears in the sky above the city, surging with unknown energy, threatening the collapse of reality itself. Each hero comes from a different background, and they must unite, using their unique abilities, to close the rift and save the future. The story focuses on their journey as they face enemies, uncover secrets, and ultimately work together to close the rift. As the battle intensifies, they realize that not only are they saving the future, but they are also shaping their own destinies.
+
+### Script Storyboard
+
+| Shot # | Location       | Shot Size | Camera Angle | Description                                                                                                                            | Dialogue                                       | Subtitles                                    | Music/SFX                            | Camera Movement                     |
+|--------|----------------|-----------|--------------|----------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------|----------------------------------------------|--------------------------------------|-------------------------------------|
+| 1      | Fast Food Shop  | Medium    | Front Angle  | The first character (Green Coat Hero) is sitting at a table in a fast food restaurant, holding a fry, looking serious as he watches the street outside the window. He mumbles to himself occasionally. | "We can't wait any longer, time is running out." | "We can't wait any longer, time is running out." | Soft background sound, mixed with the restaurant's ambient noise. | Stable shot, slight push towards the character's face. |
+| 2      | Dark Room       | Close-up  | Over-the-Shoulder | The second character (Spider Pig) strikes the iconic web-shooting pose, but in front of him is nothing but emptiness. The atmosphere is eerie as he senses an unknown force. | "This time is different, I can feel a new power." | "This time is different, I can feel a new power." | Deep drumbeats, gradually increasing, building mystery. | Stable shot, slowly pushing towards the character. |
+| 3      | Space Rift Area | Wide      | Bird's Eye    | All characters stand among distorted light beams and rifts. Time begins to warp, and the third character (Young Boy) grabs his head in terror, confused symbols floating around him. | "Where are we? What's going on?" | "Where are we? What's going on?" | Electronic noise mixed with deep ambient sounds, reflecting the space distortion. | Camera slowly zooms out, revealing the entire scene. |
+| 4      | Abandoned Classroom | Medium    | Bird's Eye    | The fourth character (Girl) stands in front of a broken desk, looking tense. Sunlight filters through the shattered window, casting overlapping shadows. | "Maybe there's a clue here, we need to find it." | "Maybe there's a clue here, we need to find it." | Background music fades, tension rises. | Fixed shot, playing with light and shadow. |
+| 5      | Underground Tunnel | Wide      | Tracking Shot | All characters run through the dark tunnel, heavy footsteps echo behind them, as if a massive enemy is chasing them. | "Run! They're coming!" | "Run! They're coming!" | Fast footsteps and clanging metal. | Handheld camera following the characters, shaky to create tension. |
+| 6      | Tunnel Exit     | Medium    | Eye Level     | The characters emerge from the tunnel, greeted by a vast cityscape in ruins, buildings are crumbling, strange lights flicker in the sky. | "This can't be… Is this our future?" | "This can't be… Is this our future?" | Dramatic music highlights the shock, electronic SFX intensifies. | Camera zooms in from afar, capturing the characters' shocked faces. |
+| 7      | Rooftop         | Wide      | High Angle    | The fifth character (Angry Blonde Girl) stands on the edge of a city rooftop, wind blowing through her hair, facing the approaching enemy, ready for battle. | "This is our last chance, don't waste it." | "This is our last chance, don't waste it." | Wind sounds mixed with intense orchestral music. | Static shot, contrasting the character against the city background. |
+| 8      | Future City Streets | Medium    | Eye Level     | All characters stand at the end of the street, facing the approaching enemy. Behind them, futuristic skyscrapers tower, with light streaming down, determination on every face. | "This time, we must unite." | "This time, we must unite." | Tension-building background music. | Camera slowly zooms in, focusing on the characters' expressions. |
+| 9      | Sky Rift       | Wide      | Bird's Eye    | The characters look up to see a giant rift in the sky. Unknown forces surge within, distorting the sky, connecting the future and reality. | "This is our only chance, we have to close it!" | "This is our only chance, we have to close it!" | Deep electronic SFX as the rift expands. | Camera tilts up to reveal the full extent of the rift. |
+| 10     | Battle Scene    | Close-up  | Multiple Angles | The characters engage in a fierce battle with the enemy. Superpowers and technological weapons collide in an explosion of fire and light. | "We can't retreat, fight to the end!" | "We can't retreat, fight to the end!" | Intense orchestral music mixed with explosive sound effects. | Quick cuts between shots, creating intense fight choreography. |
+| 11     | Final Moment    | Close-up  | Face Close-up | The first character reaches out towards the rift, releasing a beam of light. The rift begins to close, but his strength is fading, pain etched on his face. | "Just hold on a little longer, we're almost there!" | "Just hold on a little longer, we're almost there!" | The background music reaches its climax, electronic SFX intensifies. | Close-up shot capturing the character's emotional struggle. |
+| 12     | Rift Closed     | Wide      | Bird's Eye    | The rift finally closes, the sky returns to normal. The characters collapse to the ground, exhausted. In the distance, the sun slowly rises, signaling a new beginning. | "We… did it." | "We… did it." | Soft background music with the sound of birds chirping. | The camera slowly zooms out, capturing the peaceful, serene landscape. |
diff --git a/dataset/movie_shots_generation_character_definition_0001/meta.json b/dataset/movie_shots_generation_character_definition_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7581dfe4d781ae68faabedc8957142388248ba47
--- /dev/null
+++ b/dataset/movie_shots_generation_character_definition_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "movie shots generation given character definition",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": true,
+    "uid": "0040",
+    "output_image_count": 12,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_scene_definition_0001/eval.json b/dataset/movie_shots_generation_scene_definition_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a01ebc844fb66e90517943e4038e0f763428861
--- /dev/null
+++ b/dataset/movie_shots_generation_scene_definition_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Are the content and elements in the generated storyboard directly related to the input scene definition diagram?",
+            "0_point_standard": "The elements and content of the storyboard do not clearly relate to the input scene definition diagram, with noticeable deviations in characters, scenes, or actions.",
+            "1_point_standard": "The storyboard accurately reflects the content of the input scene definition diagram, with all major elements and actions correctly represented."
+        },
+        {
+            "question": "After any specified modifications, do the other parts of the storyboard remain unchanged and consistent with the original scene definition?",
+            "0_point_standard": "Parts of the storyboard that were not specified for modification have been altered unnecessarily, affecting the integrity of the scene.",
+            "1_point_standard": "The parts of the storyboard not specified for modification remain consistent with the original scene definition diagram, with no unnecessary changes."
+        },
+        {
+            "question": "Does the generated storyboard follow the specific instructions and requirements listed in the text description?",
+            "0_point_standard": "The storyboard fails to include specific instructions from the text description, missing key directives such as character actions or scene details.",
+            "1_point_standard": "The storyboard effectively includes the specific instructions from the text description, accurately depicting the required actions and details."
+        },
+        {
+            "question": "Is the temporal logic of the storyboard coherent and consistent with the sequence and flow described in the text input?",
+            "0_point_standard": "The storyboard lacks temporal logic, with a chaotic sequence of scenes or no clear narrative progression.",
+            "1_point_standard": "The storyboard follows a coherent temporal sequence, consistent with the narrative flow described in the text input."
+        },
+        {
+            "question": "Do the visual and stylistic elements of the storyboard maintain a consistent aesthetic quality and style overall?",
+            "0_point_standard": "The storyboard is inconsistent in visual style or aesthetic quality, with noticeable differences in the design or tone of various scenes.",
+            "1_point_standard": "The storyboard maintains a consistent visual style and aesthetic quality overall, providing a unified and coherent visual experience."
+        },
+        {
+            "question": "Is the depiction of characters and objects consistent, ensuring they are recognizable and maintain their identity throughout the storyboard?",
+            "0_point_standard": "The depiction of characters or objects is inconsistent, with changes in appearance or features making them unrecognizable across different scenes.",
+            "1_point_standard": "Characters and objects are depicted consistently throughout the storyboard, retaining recognizable features and identity in all scenes."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/movie_shots_generation_scene_definition_0001/images.txt b/dataset/movie_shots_generation_scene_definition_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ecedd333a8f392772018c6775ff9f025306a43
--- /dev/null
+++ b/dataset/movie_shots_generation_scene_definition_0001/images.txt
@@ -0,0 +1,4 @@
+https://img.alicdn.com/imgextra/i4/O1CN01ypDM0l1ailOAtOI6p_!!6000000003364-0-tps-4528-2084.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01Qsdr9c1QyeyiApV3G_!!6000000002045-0-tps-4528-2084.jpg
+https://img.alicdn.com/imgextra/i4/O1CN01J8djXf1XASsg5NSjJ_!!6000000002883-0-tps-4528-2084.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01jih7Zt1UcudPeX4an_!!6000000002539-0-tps-2264-1042.jpg
diff --git a/dataset/movie_shots_generation_scene_definition_0001/instruction.txt b/dataset/movie_shots_generation_scene_definition_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85a55d64ad888b6c28b9a0543d5845e3743391eb
--- /dev/null
+++ b/dataset/movie_shots_generation_scene_definition_0001/instruction.txt
@@ -0,0 +1,17 @@
+**English Summary**  
+The story is set in an ancient, mysterious mansion. The protagonist, a young archaeologist, discovers a hidden room filled with lost manuscripts and strange devices. As he explores deeper, peculiar phenomena begin to occur, revealing an ancient family secret. The protagonist must solve the puzzles and evade an ancient curse.
+
+**English Version**
+
+### Storyboard Table
+
+| Scene No. | Location | Shot Type | Camera Angle | Description | Dialogue | Subtitle | Music/SFX | Camera Movement |
+| --------- | -------- | --------- | ------------ | ----------- | -------- | -------- | ---------- | --------------- |
+| 1 | Scene 1 | Medium Shot | Low-angle upward | The protagonist pushes open an ancient wooden door, dust rises, and the door creaks. | (Protagonist breathing heavily) | "What could be hidden here?" | Eerie background music, creaking door sound | Fixed shot with slight pan |
+| 2 | Scene 2 | Wide Shot | High-angle down | The library in the mansion is surrounded by bookshelves, moonlight streams through the skylight. The protagonist walks towards a bookshelf, observing the ancient books. | (None) | "It's been so long…" | Background music builds tension | Moving camera, slow zoom-in |
+| 3 | Scene 2 | Close-up | The protagonist holds an old, worn book. | The protagonist gently opens the pages, which emit a faint glow. | (Protagonist whispers) | "What is this thing?" | Soft wind sound, pages rustling | Handheld camera, slight shake |
+| 4 | Scene 3 | Medium Shot | Front-facing eye-level | An ancient machine suddenly activates, lights flicker. The protagonist steps back in surprise. | (Protagonist gasps) | "This can't be…" | Loud mechanical humming | Stable camera, quick zoom-out |
+| 5 | Scene 4 | Close-up | Side angle | A hand suddenly reaches out from the shadows, grabbing the protagonist's shoulder. The protagonist turns around sharply. | (Protagonist screams) | "Who is it?" | Tense background sound | Handheld camera, rapid shaking |
+| 6 | Scene 1 | Medium Shot | Low-angle upward | The protagonist frantically runs out of the room, the door slams shut behind him. | (Protagonist panting) | "I need to get out of here…" | Fast-paced background music | Following camera, fast-moving |
+| 7 | Scene 2 | Wide Shot | Outside window looking in | Moonlight streams through the window, illuminating the empty library, books fall onto the floor. | (None) | "The silence was filled with fear." | Wind sound rising | Fixed shot, zooming out to outside |
+| 8 | Scene 3 | Close-up | The core component of the ancient machine is flashing. | The flashes seem to pulse in rhythm, as if conveying a message. | (None) | "What…signal is this?" | Low-pitched mechanical hum | Handheld camera, following the flashing rhythm |
diff --git a/dataset/movie_shots_generation_scene_definition_0001/meta.json b/dataset/movie_shots_generation_scene_definition_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..08f11ce70b8b10f59c5b3f4b4865b23192c34443
--- /dev/null
+++ b/dataset/movie_shots_generation_scene_definition_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "movie shots generation given scene definition",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": true,
+    "uid": "0041",
+    "output_image_count": 8,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/multi-appearance_variant_generation_0001/eval.json b/dataset/multi-appearance_variant_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d59838928e8de86079030d5e5f5d034f3db01a1
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image clearly originate from the input image, maintaining the core structure and features of the product?",
+            "0_point_standard": "There is a significant deviation between the output image and the input image, making the product unrecognizable or altering its core structure.",
+            "1_point_standard": "The output image maintains the core structure and features of the product, ensuring it can be recognized as the same item."
+        },
+        {
+            "question": "For partial color modifications, does the model ensure that unspecified areas of the product remain unchanged?",
+            "0_point_standard": "Non-target areas of the product are altered, causing unnecessary changes to parts of the image that should remain unchanged.",
+            "1_point_standard": "Non-target areas of the product remain unchanged, preserving the initial design and details of the image."
+        },
+        {
+            "question": "Does the model output strictly adhere to the specific color change requirements described in the text?",
+            "0_point_standard": "The color changes do not match the specifications provided in the text description, with colors appearing inaccurately or not as described.",
+            "1_point_standard": "The color changes in the output image are accurate and precisely match the requirements described in the text."
+        },
+        {
+            "question": "Do different color variants maintain logical consistency within the product, ensuring authenticity and feasibility in color application?",
+            "0_point_standard": "Color changes result in an appearance that is unrealistic or illogical, such as unnatural shadows or color bleeding that affects realism.",
+            "1_point_standard": "The color application is logical and realistic, maintaining a feasible and natural appearance across all variants."
+        },
+        {
+            "question": "Do the generated color variants maintain a consistent image style, ensuring uniformity in lighting, texture, and presentation?",
+            "0_point_standard": "There is inconsistency in style between different color variants, with variations in lighting, texture, or presentation disrupting uniformity.",
+            "1_point_standard": "All color variants maintain a consistent image style, with uniform lighting, texture, and presentation."
+        },
+        {
+            "question": "Is the overall aesthetic and visual quality of each color variant maintained, ensuring high image quality and appeal?",
+            "0_point_standard": "The color variant images are of poor quality, exhibiting issues like pixelation, loss of detail, or lack of visual appeal.",
+            "1_point_standard": "Each color variant maintains high image quality, with clear details and an attractive appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/multi-appearance_variant_generation_0001/images.txt b/dataset/multi-appearance_variant_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e271a5775b2bfd17579ab29ab28a76111af99fd3
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01yYUG2S1IWFTzwGRBU_!!6000000000900-0-tps-3000-2000.jpg
diff --git a/dataset/multi-appearance_variant_generation_0001/instruction.txt b/dataset/multi-appearance_variant_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8261ba668cee791c721c986c0a4a6e1941b47eb5
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Based on the given controller image, generate four images showcasing the controller in different color variations. Each image should retain the same product design and details as the original image, with only the color scheme changed. The first image should display the controller in the classic black and white color scheme, maintaining a simple and elegant design. The second image should present the controller in a black and red color scheme, emphasizing a stylish and dynamic look. The third image should showcase the controller in a silver-gray color scheme, creating a sense of technological and futuristic appeal. The fourth image should feature a blue and yellow color combination, giving a vibrant and modern feel. Ensure that each image captures the unique style and atmosphere while preserving the original product design.
\ No newline at end of file
diff --git a/dataset/multi-appearance_variant_generation_0001/meta.json b/dataset/multi-appearance_variant_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b2c5cb299de0df8805cd8607987f424a367e31e
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "multi-appearance variant generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0047",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/multi-appearance_variant_generation_0002/eval.json b/dataset/multi-appearance_variant_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1e981903fdb5716679399f8e2faf50ecb10ecc9
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image clearly originate from the input image and maintain the core structure and features of the product?",
+            "0_point_standard": "The output image significantly deviates from the input image, making the product unrecognizable or altering its core structure.",
+            "1_point_standard": "The output image maintains the core structure and features of the product, ensuring it is recognizable as the same item."
+        },
+        {
+            "question": "For partial color modifications, does the model ensure that unspecified areas of the product remain unchanged?",
+            "0_point_standard": "Non-target areas of the product are altered, resulting in unnecessary changes to parts of the image that should remain unchanged.",
+            "1_point_standard": "Non-target areas of the product remain unchanged, preserving the initial design and details of the image."
+        },
+        {
+            "question": "Does the model output strictly adhere to the requirements for specific color changes as described in the text?",
+            "0_point_standard": "The color changes do not match the specifications provided in the text description, with colors appearing inaccurately or inconsistently with the description.",
+            "1_point_standard": "The color changes in the output image are accurate and precisely match the requirements described in the text."
+        },
+        {
+            "question": "Do different color variants maintain logical consistency of the product, ensuring the authenticity and feasibility of the color application?",
+            "0_point_standard": "Color changes result in an unrealistic or illogical appearance, such as unnatural shadows or color bleeding that affects realism.",
+            "1_point_standard": "Color application is logical and realistic, maintaining a feasible and natural appearance across all variants."
+        },
+        {
+            "question": "Do the generated color variants maintain a consistent image style, ensuring uniformity in lighting, texture, and presentation?",
+            "0_point_standard": "There is inconsistency in style between different color variants, with variations in lighting, texture, or presentation disrupting uniformity.",
+            "1_point_standard": "All color variants have a consistent image style, with uniform lighting, texture, and presentation."
+        },
+        {
+            "question": "Is the overall aesthetic and visual quality of each color variant maintained, ensuring high image quality and attractiveness?",
+            "0_point_standard": "Color variant images have poor quality, with issues such as pixelation, loss of detail, or lack of visual appeal.",
+            "1_point_standard": "Each color variant maintains high image quality, with clear details and an attractive appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/multi-appearance_variant_generation_0002/images.txt b/dataset/multi-appearance_variant_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f6420097a15c2bbcbd67b289cc26d52c95c0e8b
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01vOR44y20G4aW5oH7a_!!6000000006821-0-tps-3000-2000.jpg
diff --git a/dataset/multi-appearance_variant_generation_0002/instruction.txt b/dataset/multi-appearance_variant_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b7dcc834d211851cbbaaca041f472a1017188a2
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0002/instruction.txt
@@ -0,0 +1 @@
+Generate four images based on the given product image, each showcasing a different color variation of the product. All images must retain the same design, details, and material as the original image, with changes only to the color scheme. The first image should feature a pure white color scheme, highlighting simplicity and modernity. The second image should present a matte black color scheme, evoking a sense of technology and premium feel. The third image should use a metallic silver color scheme, emphasizing the product's futuristic and high-tech appeal. The fourth image should display a deep blue and light gray color combination, emphasizing the product's fashion and dynamic character. Ensure each image conveys the unique style of the product with its respective color scheme.
\ No newline at end of file
diff --git a/dataset/multi-appearance_variant_generation_0002/meta.json b/dataset/multi-appearance_variant_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c4c039d7114d321f425fa9aa252ed568a42bda7
--- /dev/null
+++ b/dataset/multi-appearance_variant_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "multi-appearance variant generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0047",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/multi-interior_decoration_variants_generation_0001/eval.json b/dataset/multi-interior_decoration_variants_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..3d4edd69f643c7c1b2fa3a3ddca9a684200e8fa9
--- /dev/null
+++ b/dataset/multi-interior_decoration_variants_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each generated image reflect different styles as specified in the text description, with each image representing a unique interior design style?",
+            "0_point_standard": "The generated images do not clearly reflect the specified different styles, with styles appearing similar or inconsistent with the text description.",
+            "1_point_standard": "Each generated image clearly reflects the unique styles specified in the text description, representing different interior design aesthetics."
+        },
+        {
+            "question": "Are the room structure, camera angle, and furniture layout consistent across all images, ensuring only style elements change?",
+            "0_point_standard": "There are noticeable changes in room structure, camera angle, or furniture layout between images, deviating from the specified requirements.",
+            "1_point_standard": "The room structure, camera angle, and furniture layout are consistent across all images, with only style changes applied."
+        },
+        {
+            "question": "Are the details of specific styles (e.g., color schemes, textures, and decorative elements) accurately adjusted according to the defined styles in each image?",
+            "0_point_standard": "The details of specific styles are inaccurately expressed or do not match the expected characteristics of each style.",
+            "1_point_standard": "Each image accurately reflects the details of specific styles, such as colors, textures, and decorative elements, consistent with the defined styles."
+        },
+        {
+            "question": "Does the lighting and atmosphere in each image match the ambiance typically associated with each style (e.g., warm lighting for rustic styles, bright and airy for minimalism)?",
+            "0_point_standard": "The lighting and atmosphere are inconsistent with the expected ambiance of each style, resulting in a mismatch between style and atmosphere.",
+            "1_point_standard": "The lighting and atmosphere in each image are carefully adjusted to match the ambiance and characteristics of each specified style."
+        },
+        {
+            "question": "Are the materials and finishes of furniture and fixtures appropriately adapted to each style, enhancing the authenticity of the design?",
+            "0_point_standard": "The materials and finishes do not align with the expected style, reducing the authenticity of the design in each image.",
+            "1_point_standard": "The materials and finishes for each style are carefully selected to accurately reflect the aesthetic of the intended design and enhance authenticity."
+        },
+        {
+            "question": "Does the final set of images exhibit a high level of aesthetic quality, with each style variation contributing to a cohesive and professional-looking series?",
+            "0_point_standard": "The final set of images lacks aesthetic appeal, with inconsistencies or low quality diminishing the professional appearance of the series.",
+            "1_point_standard": "The final set of images exhibits high aesthetic quality, with each style variation contributing to a cohesive, visually appealing, and professional series."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/multi-interior_decoration_variants_generation_0001/images.txt b/dataset/multi-interior_decoration_variants_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef70bf0c0ca127ebe4e5e6d113356d3e9bee1e90
--- /dev/null
+++ b/dataset/multi-interior_decoration_variants_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01V5wsPy2A9nJeBGjOY_!!6000000008161-0-tps-1280-853.jpg
diff --git a/dataset/multi-interior_decoration_variants_generation_0001/instruction.txt b/dataset/multi-interior_decoration_variants_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05144ce82d6fa5f72e2e5e23f2d487f5f035bfa3
--- /dev/null
+++ b/dataset/multi-interior_decoration_variants_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate five images based on the following description, showcasing different style variations of the given interior. The goal is to create style transformations of the original room, including decor styles, material styles, and other styles commonly used in interior design. The first image should feature a Scandinavian style, using light wood, soft lighting, and minimalistic furniture arrangement. The second image should depict an industrial style, with exposed metal elements, dark-toned walls, and more rugged furniture design. The third image should follow a Japanese Zen style, highlighting natural materials like bamboo and paper partitions, using simple lines and an open, serene atmosphere. The fourth image should showcase a modern minimalist style, using bright whites or grays and geometric furniture and decor to create a clean, sleek visual effect. The fifth image should display a vintage style, incorporating dark wood, retro furniture, and metal chandeliers to evoke a nostalgic, time-worn atmosphere. All images should maintain the original layout and structure of the room while reflecting the distinct changes in style.
\ No newline at end of file
diff --git a/dataset/multi-interior_decoration_variants_generation_0001/meta.json b/dataset/multi-interior_decoration_variants_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..83c7db57d01678c041a66abd873fa41b35570457
--- /dev/null
+++ b/dataset/multi-interior_decoration_variants_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "interior design multi-style variant generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0038",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/multi-view_transformation_0001/eval.json b/dataset/multi-view_transformation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6636db31bcc503dc95882180210778ebc5e7bbf0
--- /dev/null
+++ b/dataset/multi-view_transformation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image include multiple different angles and clearly showcase the object's different perspectives as specified?",
+            "0_point_standard": "The generated image does not display different angles and lacks perspective diversity.",
+            "1_point_standard": "The output image includes multiple different angles, clearly showcasing the object's different perspectives."
+        },
+        {
+            "question": "Are the object's features and core structure consistent across all views, ensuring it can be identified in each generated image?",
+            "0_point_standard": "There are differences in the object's structure or features between different views, making it hard to recognize as the same object in each image.",
+            "1_point_standard": "The object's core structure and features are consistent across all images, making it easy to recognize in each view."
+        },
+        {
+            "question": "From each perspective, are the object's proportions and spatial relationships accurate and realistic, avoiding distortion?",
+            "0_point_standard": "In some views, proportions or spatial relationships appear distorted or unrealistic, reducing the accuracy of the representation.",
+            "1_point_standard": "The object's proportions and spatial relationships are accurately presented from each perspective, creating a realistic depiction."
+        },
+        {
+            "question": "Is the lighting across all views consistent with the implied environment, ensuring shadows and highlights correspond naturally?",
+            "0_point_standard": "The lighting between views is inconsistent, and shadows or highlights do not align naturally, creating an unrealistic appearance.",
+            "1_point_standard": "The lighting across all views is consistent and natural, with shadows and highlights corresponding to a cohesive environment."
+        },
+        {
+            "question": "Are the object's surface details, textures, and colors accurately maintained across all views, providing a consistent appearance?",
+            "0_point_standard": "There are noticeable differences in surface details, textures, or colors between views, reducing the realism and continuity of the images.",
+            "1_point_standard": "Surface details, textures, and colors are accurately and consistently presented across all views, enhancing cohesion and realism."
+        },
+        {
+            "question": "Does the final multi-view image set exhibit high-quality visual effects and professional presentation, with each view contributing to a comprehensive understanding of the object?",
+            "0_point_standard": "The image set lacks visual quality or cohesion, making it difficult to gain a complete understanding of the object from the images.",
+            "1_point_standard": "The final image set exhibits high-quality visual effects and professionalism, with each view effectively contributing to a comprehensive and cohesive understanding of the object."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/multi-view_transformation_0001/images.txt b/dataset/multi-view_transformation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..491319fc3f55715b5a7fc64e19e5540d932204cd
--- /dev/null
+++ b/dataset/multi-view_transformation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN014z0G6A1y2fliIemhx_!!6000000006521-0-tps-1280-960.jpg
diff --git a/dataset/multi-view_transformation_0001/instruction.txt b/dataset/multi-view_transformation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..512261db7baef6456547f495df9929b9c926b129
--- /dev/null
+++ b/dataset/multi-view_transformation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate four different perspective images based on the given car image, ensuring that the car and background elements remain completely consistent, as if a photographer is capturing the car from different angles in the same scene. The four perspectives are: a slightly downward front-facing angle showing the front and hood lines of the car; a 45-degree angle from the left rear highlighting the rear design and side profile; a level side view from the right showcasing the car's length and outline; and a top-down angle showing the roof, hood, and part of the front windshield. Ensure that the lighting, background, mountains, and ground remain consistent to give the impression that these are multi-angle photos taken in the same setting.
\ No newline at end of file
diff --git a/dataset/multi-view_transformation_0001/meta.json b/dataset/multi-view_transformation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e6218e3c3b3eb115654321258b00747187939f6
--- /dev/null
+++ b/dataset/multi-view_transformation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "multi-view transformation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0039",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/multi-view_transformation_0002/eval.json b/dataset/multi-view_transformation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d6dabdd4ad685fc4ce5a2593dbd77714f1099084
--- /dev/null
+++ b/dataset/multi-view_transformation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image contain multiple different angles, and does it clearly present the object from different perspectives as specified?",
+            "0_point_standard": "The generated image does not show different angles and lacks diversity in perspectives.",
+            "1_point_standard": "The output image contains multiple different angles, clearly presenting the object from various perspectives."
+        },
+        {
+            "question": "Are the features and core structure of the object consistent across all views, ensuring it can be recognized in each generated image?",
+            "0_point_standard": "There are differences in the structure or features of the object between different views, making it difficult to recognize as the same object in each image.",
+            "1_point_standard": "The core structure and features of the object are consistent across all images, making it easily recognizable in each view."
+        },
+        {
+            "question": "From each perspective, are the proportions and spatial relationships of the object accurate and realistic, avoiding distortion?",
+            "0_point_standard": "In some views, the proportions or spatial relationships are distorted or unrealistic, reducing the accuracy of representation.",
+            "1_point_standard": "The proportions and spatial relationships of the object are accurately presented in each perspective, creating a realistic depiction."
+        },
+        {
+            "question": "Is the lighting in all views consistent with the implied environment, ensuring shadows and highlights naturally correspond?",
+            "0_point_standard": "The lighting between views is inconsistent, with shadows or highlights unnaturally aligned, creating an unrealistic appearance.",
+            "1_point_standard": "The lighting in all views is consistent and natural, with shadows and highlights corresponding to a cohesive environment."
+        },
+        {
+            "question": "Are the surface details, textures, and colors of the object accurately maintained across all views, providing a consistent appearance?",
+            "0_point_standard": "There are noticeable differences in surface details, textures, or colors between different views, reducing realism and continuity in the images.",
+            "1_point_standard": "Surface details, textures, and colors are accurately and consistently presented across all views, enhancing cohesion and realism."
+        },
+        {
+            "question": "Does the final multi-view image set exhibit high-quality visual effects and professional presentation, with each view contributing to a comprehensive understanding of the object?",
+            "0_point_standard": "The image set lacks visual quality or cohesion, making it difficult to gain a complete understanding of the object from the images.",
+            "1_point_standard": "The final image set exhibits high-quality visual effects and professionalism, with each view effectively contributing to a comprehensive and cohesive understanding of the object."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/multi-view_transformation_0002/images.txt b/dataset/multi-view_transformation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c74d6c2f1780b6e78be169b0f5f6e61bd2fbee2b
--- /dev/null
+++ b/dataset/multi-view_transformation_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01EEKRgR1DBZWWSzhYM_!!6000000000178-0-tps-5421-3614.jpg
diff --git a/dataset/multi-view_transformation_0002/instruction.txt b/dataset/multi-view_transformation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ecc6ace17d4d1534cd9fc00945d063afd39f8f8
--- /dev/null
+++ b/dataset/multi-view_transformation_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate four different perspective images based on the sailboat picture, ensuring that the sailboat and background elements remain completely consistent, as if a photographer is capturing the sailboat from different angles in the same scene. The four perspectives are: a frontal horizontal view showing the bow and full sails; a 45-degree angle from the right front displaying the side profile and part of the stern; a top-down view showing the overall layout of the sailboat including the sails and deck details; and a horizontal side view from the left showcasing the other side of the boat and the water. Make sure the ocean, sky, and distant horizon in the background remain consistent to create a unified scene.
\ No newline at end of file
diff --git a/dataset/multi-view_transformation_0002/meta.json b/dataset/multi-view_transformation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..8b0bf9cb610c7ad96d5001b24c56d074a409bd1b
--- /dev/null
+++ b/dataset/multi-view_transformation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "multi-view transformation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0039",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/object_editing_background_changing_0001/eval.json b/dataset/object_editing_background_changing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebaa2f2088ec3a0870d6a707a0fd82bc3268d370
--- /dev/null
+++ b/dataset/object_editing_background_changing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately preserve the original subject and key elements, only modifying the background?",
+            "0_point_standard": "The subject or key elements of the original image are altered, distorted, or partially modified, not just the background.",
+            "1_point_standard": "The original image's subject and key elements remain unchanged, with only the background modified."
+        },
+        {
+            "question": "Does the new background seamlessly blend with the original image, ensuring no obvious artifacts or edge mismatches?",
+            "0_point_standard": "There are obvious artifacts, jagged edges, or mismatches between the subject and the new background.",
+            "1_point_standard": "The new background blends seamlessly, with smooth transitions and no obvious artifacts."
+        },
+        {
+            "question": "Does the new background align with the style and theme specified in the text description?",
+            "0_point_standard": "The background does not reflect the style or theme described in the text input, lacking thematic coherence.",
+            "1_point_standard": "The background accurately reflects the style and theme specified in the text description, maintaining thematic consistency."
+        },
+        {
+            "question": "Are the specific elements or features required by the text description included in the new background?",
+            "0_point_standard": "The new background fails to include the specific elements or features required by the text description.",
+            "1_point_standard": "The background includes all the specific elements or features required by the text description."
+        },
+        {
+            "question": "Are the lighting and color scheme of the new background consistent with the subject, creating a coherent and realistic image?",
+            "0_point_standard": "The lighting or color scheme of the background conflicts with the subject, resulting in an unrealistic or disjointed image.",
+            "1_point_standard": "The lighting and color scheme of the background are consistent with the subject, creating a coherent and realistic overall image."
+        },
+        {
+            "question": "Does the final image maintain a high level of aesthetic quality and visual appeal, adhering to professional standards?",
+            "0_point_standard": "The final image lacks aesthetic quality and does not meet professional visual standards.",
+            "1_point_standard": "The final image exhibits high aesthetic quality and visual appeal, adhering to professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/object_editing_background_changing_0001/images.txt b/dataset/object_editing_background_changing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d53a409cd28cf9e058b38c7c33892ac67d8201b
--- /dev/null
+++ b/dataset/object_editing_background_changing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01Fieu7e1sQal5uozBm_!!6000000005761-0-tps-4000-5000.jpg
diff --git a/dataset/object_editing_background_changing_0001/instruction.txt b/dataset/object_editing_background_changing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d55c0b596c98a9f43c4e71435ecd0d81326655f
--- /dev/null
+++ b/dataset/object_editing_background_changing_0001/instruction.txt
@@ -0,0 +1 @@
+Change the background of this image featuring a man in a blue suit and a black hat. Replace the current plain, weathered wall with a modern cityscape, showing tall glass buildings and a busy street in the distance. Ensure the new background integrates smoothly with the lighting on the subject, keeping the city atmosphere vibrant and realistic.
\ No newline at end of file
diff --git a/dataset/object_editing_background_changing_0001/meta.json b/dataset/object_editing_background_changing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f28b821022cbef6efa354365e45655e8180f68a7
--- /dev/null
+++ b/dataset/object_editing_background_changing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "background changing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0055",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/object_editing_background_changing_0002/eval.json b/dataset/object_editing_background_changing_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..73cffdc8118e52199d78b34d4dd04e7ed89c691f
--- /dev/null
+++ b/dataset/object_editing_background_changing_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image accurately preserve the original subject and key elements, only modifying the background?",
+            "0_point_standard": "The original image's subject or key elements are altered, distorted, or partially modified, rather than just the background.",
+            "1_point_standard": "The original image's subject and key elements remain unchanged, with only the background modified."
+        },
+        {
+            "question": "Does the new background seamlessly blend with the original image, ensuring no noticeable artifacts or edge mismatches?",
+            "0_point_standard": "There are noticeable artifacts, jagged edges, or mismatches between the subject and the new background.",
+            "1_point_standard": "The new background blends seamlessly, with smooth transitions and no noticeable artifacts."
+        },
+        {
+            "question": "Is the new background consistent with the style and theme specified in the text description?",
+            "0_point_standard": "The background does not reflect the style or theme described in the text input, lacking thematic coherence.",
+            "1_point_standard": "The background accurately reflects the style and theme specified in the text description, maintaining thematic consistency."
+        },
+        {
+            "question": "Are the specific elements or features requested in the text description included in the new background?",
+            "0_point_standard": "The new background fails to include specific elements or features requested in the text description.",
+            "1_point_standard": "The background includes all specific elements or features requested in the text description."
+        },
+        {
+            "question": "Does the lighting and color scheme of the new background harmonize with the subject, creating a coherent and realistic image?",
+            "0_point_standard": "The lighting or color scheme of the background conflicts with the subject, resulting in an unrealistic or disjointed image.",
+            "1_point_standard": "The lighting and color scheme of the background harmonize with the subject, creating a coherent and realistic overall image."
+        },
+        {
+            "question": "Does the final image maintain a high level of aesthetic quality and visual appeal, adhering to professional standards?",
+            "0_point_standard": "The final image lacks aesthetic quality and does not meet professional visual standards.",
+            "1_point_standard": "The final image exhibits high aesthetic quality and visual appeal, adhering to professional standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/object_editing_background_changing_0002/images.txt b/dataset/object_editing_background_changing_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f7383f4e4c7216bef95af5a6eeeff5dff1e2fb9
--- /dev/null
+++ b/dataset/object_editing_background_changing_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01YbT06U1FocWTA78nc_!!6000000000534-0-tps-5472-3648.jpg
diff --git a/dataset/object_editing_background_changing_0002/instruction.txt b/dataset/object_editing_background_changing_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea59cf8b5841b5df1fee2aea1088d2083c4b40dc
--- /dev/null
+++ b/dataset/object_editing_background_changing_0002/instruction.txt
@@ -0,0 +1 @@
+Change the background of this image of a woman holding a cup in nature. Replace the current greenery background with a cozy indoor cafe scene. The new background should show soft lighting, wooden furniture, and people chatting in the distance, making the environment warm and inviting. Ensure the lighting on the woman aligns with the indoor setting for a seamless result.
\ No newline at end of file
diff --git a/dataset/object_editing_background_changing_0002/meta.json b/dataset/object_editing_background_changing_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..4846f02ff8131eaf8c40e113942e9f613e84921e
--- /dev/null
+++ b/dataset/object_editing_background_changing_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "background changing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0055",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_adding_0001/eval.json b/dataset/object_editing_object_adding_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4121abd5aa8861a884cfec1562343f4fbc78cf8
--- /dev/null
+++ b/dataset/object_editing_object_adding_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the added object retain the basic characteristics and style of the original image?",
+            "0_point_standard": "The added object looks out of place or inconsistent with the overall style and features of the original image.",
+            "1_point_standard": "The added object seamlessly integrates with the original image, maintaining consistent style and features."
+        },
+        {
+            "question": "Apart from the specified modifications, does the rest of the image remain unchanged?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image that were not specified for modification.",
+            "1_point_standard": "The unmodified parts of the image remain consistent and unchanged, with no unexpected alterations."
+        },
+        {
+            "question": "Does the added object match the content and specifications given in the text description?",
+            "0_point_standard": "The added object does not match the provided description, including type, location, or any other specified features.",
+            "1_point_standard": "The added object accurately meets the specifications of the text description, including its type, location, and any other detailed features."
+        },
+        {
+            "question": "Is the scale of the added object appropriate relative to existing elements in the image?",
+            "0_point_standard": "The scale of the added object is inconsistent, making it appear too large or too small compared to existing elements.",
+            "1_point_standard": "The scale of the added object is balanced and appropriate relative to existing elements in the image."
+        },
+        {
+            "question": "Are the lighting and shadows of the added object consistent with existing elements in the image?",
+            "0_point_standard": "The lighting or shadows of the added object are inconsistent with the rest of the image, making it appear unnatural or out of place.",
+            "1_point_standard": "The lighting and shadows of the added object are consistent with existing elements, creating a realistic and integrated overall effect."
+        },
+        {
+            "question": "Does the final image exhibit an overall aesthetic appeal with harmonious visual effects?",
+            "0_point_standard": "The final image lacks aesthetic harmony, appearing incoherent or visually unappealing.",
+            "1_point_standard": "The final image exhibits strong aesthetic harmony, with visually pleasing effects and cohesive composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_adding_0001/images.txt b/dataset/object_editing_object_adding_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d8cae035655e3eefe9d4cc917598f3ea4ad5313
--- /dev/null
+++ b/dataset/object_editing_object_adding_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01LIlw5t1mVH78Yn3cF_!!6000000004959-0-tps-3024-4032.jpg
diff --git a/dataset/object_editing_object_adding_0001/instruction.txt b/dataset/object_editing_object_adding_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0885389707bd9f4c0f0af234ef13d72a74e90385
--- /dev/null
+++ b/dataset/object_editing_object_adding_0001/instruction.txt
@@ -0,0 +1 @@
+Please add a sika deer to the grassland in the image, placing it near the center of the sunlight beam. Ensure the deer's posture is natural, with its head facing towards the direction of the sunset, and the lighting and shadow effects align with the current scene. The generated image should harmonize with the lighting, shadows, and tone of the grassland, as if the deer has always been part of the scene.
\ No newline at end of file
diff --git a/dataset/object_editing_object_adding_0001/meta.json b/dataset/object_editing_object_adding_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ac26b7c65769cb07ce9bfb9b5a13a4dd848eab22
--- /dev/null
+++ b/dataset/object_editing_object_adding_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "object adding",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0057",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_adding_0002/eval.json b/dataset/object_editing_object_adding_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..59398680b9fbd764b2c09bdc04aaac01544bcc54
--- /dev/null
+++ b/dataset/object_editing_object_adding_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the added object retain the basic features and style of the original image?",
+            "0_point_standard": "The added object looks out of place or inconsistent with the overall style and features of the original image.",
+            "1_point_standard": "The added object seamlessly integrates with the original image, maintaining consistent style and features."
+        },
+        {
+            "question": "Aside from the specified modifications, does the rest of the image remain unchanged?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image that were not supposed to be modified.",
+            "1_point_standard": "The unmodified parts of the image remain consistent and unchanged, with no unexpected alterations."
+        },
+        {
+            "question": "Does the added object match the content and specifications provided in the text description?",
+            "0_point_standard": "The added object does not match the provided description, including aspects like type, location, or any other specified features.",
+            "1_point_standard": "The added object accurately meets the specifications of the text description, including its type, location, and any other detailed features."
+        },
+        {
+            "question": "Is the scale of the added object appropriate relative to the existing elements in the image?",
+            "0_point_standard": "The scale of the added object is inconsistent, making it appear too large or too small compared to existing elements.",
+            "1_point_standard": "The scale of the added object is balanced and appropriate relative to the existing elements in the image."
+        },
+        {
+            "question": "Are the lighting and shadows of the added object consistent with the existing elements in the image?",
+            "0_point_standard": "The lighting or shadows of the added object are inconsistent with the rest of the image, making it look unnatural or out of place.",
+            "1_point_standard": "The lighting and shadows of the added object are consistent with the existing elements, creating a realistic and cohesive overall effect."
+        },
+        {
+            "question": "Does the final image exhibit an overall aesthetic appeal, with harmonious visual effects?",
+            "0_point_standard": "The final image lacks aesthetic harmony, appearing incoherent or visually unappealing.",
+            "1_point_standard": "The final image exhibits strong aesthetic harmony, with visually pleasing effects and harmonious composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_adding_0002/images.txt b/dataset/object_editing_object_adding_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cca84ee06adf163c249682f56e6648bdc5d4c37
--- /dev/null
+++ b/dataset/object_editing_object_adding_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01IoRxAT1PFUIYDFbeJ_!!6000000001811-0-tps-4561-2963.jpg
diff --git a/dataset/object_editing_object_adding_0002/instruction.txt b/dataset/object_editing_object_adding_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cba948bf0a2ea28804d9b08b766f66b028fab51
--- /dev/null
+++ b/dataset/object_editing_object_adding_0002/instruction.txt
@@ -0,0 +1 @@
+Please add a person sitting on the right side of the bench in the image, holding a book and leaning slightly forward in a natural reading posture. Ensure the lighting and shadow effects on the person match the current environment of trees and sunlight. The resulting image should blend seamlessly with the surrounding greenery and the bench, as if the person has been reading there all along.
\ No newline at end of file
diff --git a/dataset/object_editing_object_adding_0002/meta.json b/dataset/object_editing_object_adding_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5dcaf90fd8ad6a4dc1ec7de4e13878bd258b7d85
--- /dev/null
+++ b/dataset/object_editing_object_adding_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "object adding",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0057",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_removal_0002/eval.json b/dataset/object_editing_object_removal_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0d261ca3cd4516e95f3f155a17f37a0adcd22bd
--- /dev/null
+++ b/dataset/object_editing_object_removal_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the specified object completely removed from the image without any visible remnants?",
+            "0_point_standard": "The object is still partially visible, or its removal has left noticeable remnants or traces.",
+            "1_point_standard": "The object is completely removed with no visible remnants or traces."
+        },
+        {
+            "question": "Apart from the specified object, does the rest of the image remain unchanged and consistent with the original image?",
+            "0_point_standard": "The rest of the image shows noticeable changes or modifications unrelated to the object removal.",
+            "1_point_standard": "Aside from the removal of the specified object, the rest of the image remains unchanged and consistent with the original image."
+        },
+        {
+            "question": "Does the removal of the object maintain the overall content and style of the original image, ensuring consistency between input and output?",
+            "0_point_standard": "The removal disrupts the overall content or style, leading to an incoherent or inconsistent image.",
+            "1_point_standard": "The content and style are maintained, ensuring consistency between the input and output images."
+        },
+        {
+            "question": "Does the image meet any specific requirements or instructions provided in the text description, such as naturally filling the removed area?",
+            "0_point_standard": "The specific requirements or instructions in the text description are not met or are poorly executed.",
+            "1_point_standard": "The image accurately and effectively meets the specific requirements or instructions in the text description."
+        },
+        {
+            "question": "Is the quality of the fill in the area where the object was removed consistent with the surrounding area, including texture, color, and lighting?",
+            "0_point_standard": "The fill area is inconsistent with the surrounding area, showing noticeable differences in texture, color, or lighting.",
+            "1_point_standard": "The fill area blends seamlessly with the surrounding area, with consistent texture, color, and lighting."
+        },
+        {
+            "question": "Does the modified image have an overall aesthetic appeal, maintaining a high level of visual quality and attractiveness?",
+            "0_point_standard": "The modified image lacks aesthetic appeal and visual quality, appearing unprofessional or unattractive.",
+            "1_point_standard": "The modified image exhibits strong aesthetic appeal, maintaining a high level of visual quality and attractiveness."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_removal_0002/images.txt b/dataset/object_editing_object_removal_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cb8a4a6f40e3efafb4baeb89f7bb258fc94015f
--- /dev/null
+++ b/dataset/object_editing_object_removal_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01NPiuRb1qkcqreUFz9_!!6000000005534-0-tps-4775-3183.jpg
diff --git a/dataset/object_editing_object_removal_0002/instruction.txt b/dataset/object_editing_object_removal_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..343a3e7d8d59d990127021c86bb0de18bdbb1eed
--- /dev/null
+++ b/dataset/object_editing_object_removal_0002/instruction.txt
@@ -0,0 +1 @@
+Please remove the two lounge chairs from the foreground of the beach scene. The goal is to keep the sand, background scenery, and lighting effects unchanged, and fill in the area after the chairs are removed, ensuring the beach texture and environment look natural. The resulting image should appear seamless without any visible traces of removal.
\ No newline at end of file
diff --git a/dataset/object_editing_object_removal_0002/meta.json b/dataset/object_editing_object_removal_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d88e946e557da8671c7080f5efbc556aca0b3eef
--- /dev/null
+++ b/dataset/object_editing_object_removal_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "object removal",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0056",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_replacing_0001/eval.json b/dataset/object_editing_object_replacing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..004e2007b3c263addaa8c679ad7038678df4ffa8
--- /dev/null
+++ b/dataset/object_editing_object_replacing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified image accurately replace the specified object while retaining the rest of the image?",
+            "0_point_standard": "The replaced object is inaccurately placed or out of context, with unexpected changes in other parts of the image.",
+            "1_point_standard": "The specified object is accurately replaced, seamlessly blending into the image, with no unexpected changes to the rest of the image."
+        },
+        {
+            "question": "Does the replaced object remain consistent with the overall content and style of the reference image?",
+            "0_point_standard": "The replaced object noticeably conflicts with the content or style of the reference image, creating a sense of discord.",
+            "1_point_standard": "The replaced object is consistent with the content and style of the reference image, blending naturally and smoothly."
+        },
+        {
+            "question": "Does the replaced object match the lighting and shadows of surrounding elements to ensure natural integration?",
+            "0_point_standard": "The lighting or shadows on the replaced object are inconsistent with surrounding elements, making it appear unnatural or out of place.",
+            "1_point_standard": "The lighting and shadows on the replaced object are consistent with surrounding elements, allowing it to naturally integrate into the scene."
+        },
+        {
+            "question": "Is the scale of the replaced object accurate relative to other elements in the image?",
+            "0_point_standard": "The scale of the replaced object is noticeably off, making it appear too large or too small compared to surrounding elements.",
+            "1_point_standard": "The scale of the replaced object is accurate and consistent with other elements in the image, ensuring a balanced appearance."
+        },
+        {
+            "question": "Do the texture and color of the replaced object match the surrounding environment, contributing to a cohesive appearance?",
+            "0_point_standard": "The texture or color of the replaced object conflicts with the surrounding environment, causing jarring contrast.",
+            "1_point_standard": "The texture and color of the replaced object match the surrounding environment well, enhancing the cohesiveness of the image."
+        },
+        {
+            "question": "Does the modified image exhibit a professional level of visual appeal and quality, with attention to detail and aesthetics?",
+            "0_point_standard": "The modified image lacks professional visual quality, showing noticeable flaws or a lack of attention to detail.",
+            "1_point_standard": "The modified image exhibits high visual appeal and quality, demonstrating meticulous attention to detail and aesthetics."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/object_editing_object_replacing_0001/images.txt b/dataset/object_editing_object_replacing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1099186ab64d5dc191e90fe5ee033dd0c14918dc
--- /dev/null
+++ b/dataset/object_editing_object_replacing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01b9e21Y1JnU0FCgw7d_!!6000000001073-0-tps-3500-2344.jpg
diff --git a/dataset/object_editing_object_replacing_0001/instruction.txt b/dataset/object_editing_object_replacing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aef7137ad9a448a6d35b5174a4143a45ca2fac0c
--- /dev/null
+++ b/dataset/object_editing_object_replacing_0001/instruction.txt
@@ -0,0 +1 @@
+Please replace all the apples in the image with bananas. The goal is to keep the position and background unchanged, but transform the shape, color, and texture of the apples into the appearance of bananas, ensuring the lighting, shadows, and scene remain consistent with the original image. The generated image should appear natural and realistic, as if bananas were originally placed on the leaves and fabric.
\ No newline at end of file
diff --git a/dataset/object_editing_object_replacing_0001/meta.json b/dataset/object_editing_object_replacing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f6fd6bd6d507aa4b22ff77e6b1ba9137c028428
--- /dev/null
+++ b/dataset/object_editing_object_replacing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "object replacing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0058",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/packaging_rendering_0001/eval.json b/dataset/packaging_rendering_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ef70f40ab64ddc681b7767a8f7eb94de39848aed
--- /dev/null
+++ b/dataset/packaging_rendering_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated packaging render accurately retain the basic structure and shape of the product as shown in the original product image?",
+            "0_point_standard": "The structure and shape of the product in the packaging render significantly deviate or are distorted compared to the original product image.",
+            "1_point_standard": "The packaging render accurately retains the structure and shape of the product as shown in the original product image."
+        },
+        {
+            "question": "If the task involves partial modification, does the rest of the image remain unchanged, preserving the original context and details?",
+            "0_point_standard": "Parts of the image that were not intended to be modified have been changed, resulting in the loss of original context or details.",
+            "1_point_standard": "The unmodified parts of the image remain unchanged, preserving the original context and details as expected."
+        },
+        {
+            "question": "Does the rendered packaging accurately reflect the content, style, and branding specified in the text description?",
+            "0_point_standard": "The packaging render does not accurately reflect the content, style, or branding specified in the text description.",
+            "1_point_standard": "The packaging render accurately reflects the content, style, and branding specified in the text description."
+        },
+        {
+            "question": "Are the specific text-based instructions (e.g., color changes or branding elements) correctly implemented in the packaging render?",
+            "0_point_standard": "The packaging render fails to correctly implement the specific text-based instructions, such as color changes or branding elements.",
+            "1_point_standard": "The packaging render correctly implements all specific text-based instructions, such as color changes or branding elements."
+        },
+        {
+            "question": "Does the text editing in the packaging render maintain high quality, with text elements being clear, readable, and correctly positioned?",
+            "0_point_standard": "The text in the packaging render is unclear, hard to read, or incorrectly positioned, affecting the overall presentation.",
+            "1_point_standard": "The text in the packaging render is clear, readable, and correctly positioned, enhancing the overall presentation."
+        },
+        {
+            "question": "Does the packaging render exhibit a high degree of professionalism and aesthetic appeal, meeting industry visual quality and design standards?",
+            "0_point_standard": "The packaging render lacks professionalism and aesthetic appeal, not meeting industry visual quality and design standards.",
+            "1_point_standard": "The packaging render exhibits a high degree of professionalism and aesthetic appeal, meeting or exceeding industry visual quality and design standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/packaging_rendering_0001/images.txt b/dataset/packaging_rendering_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37d061faa1777bff244f5ab0946d5e9188f499a8
--- /dev/null
+++ b/dataset/packaging_rendering_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN012d8LFa1fmx5brNILK_!!6000000004050-0-tps-2800-2100.jpg
diff --git a/dataset/packaging_rendering_0001/instruction.txt b/dataset/packaging_rendering_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f1ff6beb6ca80ff61de9e6f11225978be7b4eba
--- /dev/null
+++ b/dataset/packaging_rendering_0001/instruction.txt
@@ -0,0 +1 @@
+Design the beauty device with a high-end luxury aesthetic. The entire outer shell should have a matte rose gold finish, giving it a soft metallic sheen that exudes sophistication. The top control panel should be deep black, creating a contrast with the rose gold body and enhancing the sense of technology. The device's shape should be streamlined with rounded edges, providing a comfortable and modern visual appeal. The surface should feature subtle light and shadow reflections to amplify the luxurious feel and add visual depth. A clean and elegant brand logo should be placed on the front, reinforcing the high-end image of the product while maintaining a sleek, modern, and tech-driven design.
\ No newline at end of file
diff --git a/dataset/packaging_rendering_0001/meta.json b/dataset/packaging_rendering_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a160999d1ceeaaece3de84d6a4e7deaec8b8904
--- /dev/null
+++ b/dataset/packaging_rendering_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "package rendering",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0066",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/painting_generation_0001/eval.json b/dataset/painting_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0dfb5d194ff1699eeaaf6e86d8e094e3512137d4
--- /dev/null
+++ b/dataset/painting_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the generated image as clear as a painting, with recognizable brushstrokes, textures, or artistic qualities?",
+            "0_point_standard": "The image lacks recognizable painting qualities, making it unlike a piece of art.",
+            "1_point_standard": "The image has clear painting qualities, with textures and brushstrokes similar to a hand-drawn piece."
+        },
+        {
+            "question": "Is the image visually complete, with balanced composition, requiring no additional elements to be considered a complete artwork?",
+            "0_point_standard": "The image appears incomplete or lacks a balanced composition, giving an impression of an unfinished work.",
+            "1_point_standard": "The image is visually complete and balanced, serving well as a standalone artwork."
+        },
+        {
+            "question": "Does the painting accurately represent the specific theme, style, or elements described in the text prompt (e.g., landscape, portrait, or surreal theme)?",
+            "0_point_standard": "The painting does not match the described theme, style, or elements, deviating from the textual requirements.",
+            "1_point_standard": "The painting accurately represents the theme, style, and elements specified in the text prompt."
+        },
+        {
+            "question": "Is the artistic style consistently applied throughout the painting, maintaining the expected style described (e.g., realism, impressionism, abstraction)?",
+            "0_point_standard": "The style appears inconsistent or mixed, lacking cohesion with the expected artistic approach.",
+            "1_point_standard": "The artistic style is consistently applied, reflecting the expected approach described in the prompt."
+        },
+        {
+            "question": "Are the details and textures in the painting, such as fine brushstrokes or layering, realistically rendered to add depth and dimension?",
+            "0_point_standard": "The details and textures lack clarity or depth, making the image appear flat or artificial.",
+            "1_point_standard": "The details and textures are rendered with depth and clarity, adding dimension and enhancing the realism of the painting."
+        },
+        {
+            "question": "Does the painting exhibit a high level of aesthetic quality, with balanced colors, pleasing composition, and professional artistic finish?",
+            "0_point_standard": "The painting lacks aesthetic appeal, with poor color balance, weak composition, or an unfinished appearance.",
+            "1_point_standard": "The painting exhibits strong aesthetic appeal, with harmonious colors, balanced composition, and a polished, professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/painting_generation_0001/images.txt b/dataset/painting_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/painting_generation_0001/instruction.txt b/dataset/painting_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77246664c7bc9e0780c3e44917772bbb7ecb8ac0
--- /dev/null
+++ b/dataset/painting_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This is a watercolor painting depicting a stone house with elements of Chinese architectural style, nestled on a tree-lined street. The house is rendered in muted gray-brown tones, with visible wooden structures and eaves on the roof. A dark wooden door and a staircase leading to a second-floor balcony are prominent features on the facade. Tall trees surround the house, their foliage painted in soft greens and grays, contributing to a hazy atmosphere. A couple, dressed in casual attire, strolls along the path, their figures slightly blurred, blending into the surrounding environment. The sky is a pale blue with soft clouds. The overall color palette is dominated by gray-greens and browns, creating a tranquil and serene mood. The painting style is impressionistic, with light and fluid brushstrokes, emphasizing the interplay of light and shadow and creating a poetic atmosphere.
\ No newline at end of file
diff --git a/dataset/painting_generation_0001/meta.json b/dataset/painting_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..aaf22fbcb16260554a7e49a0553af3467c2db96e
--- /dev/null
+++ b/dataset/painting_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "painting generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0030",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/paintings_undo_painting_undo_from_finished_work_0001/eval.json b/dataset/paintings_undo_painting_undo_from_finished_work_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ae6c89aedb33f22e4e80206edee8f1ec3251253
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_finished_work_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Correlation between input and output images: Does each intermediate process rendering clearly derive its content and style from the completed painting image?",
+            "0_point_standard": "There is no apparent correlation between the intermediate images and the completed painting; elements or styles do not match the original image.",
+            "1_point_standard": "Each intermediate image is closely related to the completed painting, maintaining consistent content and style derived from the original image."
+        },
+        {
+            "question": "Retention of key features: Are the main structural elements and key features of the painting retained in all intermediate images?",
+            "0_point_standard": "Key features or structural elements vary greatly, resulting in a lack of continuity in the representation of the painting process.",
+            "1_point_standard": "The main structural elements and key features are retained in all intermediate images, ensuring continuity and consistency throughout the process."
+        },
+        {
+            "question": "Temporal logic: Do the intermediate process images follow a logical progression, showing a believable development sequence from the initial painting to the final painting?",
+            "0_point_standard": "The image sequence lacks logical progression, with steps appearing out of order or inconsistent with the natural painting process.",
+            "1_point_standard": "The intermediate images progress logically, accurately reflecting a step-by-step development consistent with painting practice."
+        },
+        {
+            "question": "Consistency of image style: Is the style of all intermediate images consistent, maintaining the same artistic approach as the completed painting?",
+            "0_point_standard": "There are significant style differences between intermediate images, lacking cohesion and differing from the style of the completed painting.",
+            "1_point_standard": "The style of all images remains consistent, reflecting a unified artistic approach similar to the completed painting."
+        },
+        {
+            "question": "Aesthetic quality and detail: Do the intermediate images maintain a high level of detail and aesthetic quality, reflecting the professional completion of the original painting?",
+            "0_point_standard": "The intermediate images lack detail or aesthetic appeal, resulting in a decline in image quality.",
+            "1_point_standard": "Each intermediate image is rich in detail and aesthetically pleasing, maintaining the high-quality completion of the original painting."
+        },
+        {
+            "question": "Realism of process details: Do the intermediate images display a realistic progression of details, with textures, shadows, and other artistic elements naturally accumulating over time?",
+            "0_point_standard": "The progression of details is unrealistic, with abrupt changes in textures, shadows, or details not reflecting the natural painting process.",
+            "1_point_standard": "The intermediate images show a realistic accumulation of textures, shadows, and other details, accurately reflecting the natural progression towards the final painting."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/paintings_undo_painting_undo_from_finished_work_0001/images.txt b/dataset/paintings_undo_painting_undo_from_finished_work_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..233e9a933f68b0bf39fd3df8cae44d1555bbf9a7
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_finished_work_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01DMizm51FJTacpxP1k_!!6000000000466-0-tps-2048-2048.jpg
diff --git a/dataset/paintings_undo_painting_undo_from_finished_work_0001/instruction.txt b/dataset/paintings_undo_painting_undo_from_finished_work_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90aa00d89671fd84861928e0ab0d0838216eac80
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_finished_work_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate 4 images representing the intermediate stages of this lotus painting, with each image depicting a different phase of the painting process. The first image should show the early stage, where the outlines of the lotus flowers, leaves, and buds have been drawn, but there are no details or colors yet. The second image should depict the mid-stage, where the basic colors are applied to the leaves and petals, but the shading and lighting effects have not yet been added, and the colors are still flat. The third image should show the stage where more details, such as light and shadow on the petals and leaves, start to emerge, adding depth to the artwork. The fourth image should represent the nearly finished stage, where all the major elements are fully colored, and the light and shadow effects are prominent, with very detailed rendering, though not yet reaching the final, polished state of the finished artwork.
\ No newline at end of file
diff --git a/dataset/paintings_undo_painting_undo_from_finished_work_0001/meta.json b/dataset/paintings_undo_painting_undo_from_finished_work_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..8e50559ba36d987a206936ce809348aa2e66d3a2
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_finished_work_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "drawing process generation given finished painting",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0036",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/eval.json b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c6b4631f19b6d3a4c99d12dc88e780be4bb127d
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each intermediate image clearly originate from the initial unfinished painting, retaining its fundamental elements and layout?",
+            "0_point_standard": "The intermediate image lacks a clear connection with the initial unfinished painting, altering the fundamental structure.",
+            "1_point_standard": "Each intermediate image is closely related to the initial unfinished painting, retaining the fundamental elements and layout."
+        },
+        {
+            "question": "Does the sequence of images reflect a logical progression, showing step-by-step development towards a completed painting?",
+            "0_point_standard": "The sequence appears disjointed, with steps not following a natural completion process.",
+            "1_point_standard": "The images follow a logical, step-by-step progression consistent with natural painting practices."
+        },
+        {
+            "question": "Does the sequence end with a fully completed painting, exhibiting a refined and finished appearance?",
+            "0_point_standard": "The final image appears unfinished, lacking the refinement or detail expected in a completed painting.",
+            "1_point_standard": "The final image is clearly completed, exhibiting a refined and complete appearance consistent with the expected final effect."
+        },
+        {
+            "question": "Do the intermediate images realistically build textures, shadows, and other details, demonstrating a natural artistic progression?",
+            "0_point_standard": "Details, textures, or shadows develop in an unrealistic or inconsistent manner, lacking a natural process.",
+            "1_point_standard": "The images show a realistic progression of textures, shadows, and details, naturally advancing towards a completed painting."
+        },
+        {
+            "question": "Is the artistic style consistent across all intermediate images, reflecting the same creative approach as the final painting?",
+            "0_point_standard": "There is a significant stylistic difference at various stages, disrupting the overall artistic coherence.",
+            "1_point_standard": "The style remains consistent across all images, aligning with the creative approach of the final painting."
+        },
+        {
+            "question": "Do the set of intermediate images exhibit high aesthetic quality, contributing to a coherent and professional-looking progression?",
+            "0_point_standard": "The images lack aesthetic coherence or professionalism, with low-quality or inconsistent elements detracting from the overall progression.",
+            "1_point_standard": "The images exhibit aesthetic coherence and high quality, presenting a professional-looking progression towards the final painting."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/images.txt b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e592475e8d542bef6f224e33ffd1f1033d88eb7
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01eFEngX1wJxU9M6kLv_!!6000000006288-0-tps-1209-1338.jpg
diff --git a/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/instruction.txt b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c80baca7b6a57bfd52df95939b37b6aa8c4a9bcf
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate 4 images based on this unfinished line drawing to show the painting process from the sketch to the final product. The first image should show the phase where basic color blocks are applied to all the main elements such as the fish, lotus flowers, and leaves, but without any details. The second image should depict further refinement of colors, with the fish's scales starting to show dimension, and the light and shadow on the lotus and leaves becoming more defined. The third image should show the addition of details, such as the texture of the fish, the veins of the leaves, and the ripples in the water. The fourth image will be the final product, where all details and colors are completed. The final product should maintain the detailed style of traditional Chinese gongbi painting, with clear lines, rich colors, and a strong sense of detail and layering, reflecting the meticulous and refined nature of the gongbi style.
\ No newline at end of file
diff --git a/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/meta.json b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b48a15154a2bd9c2fbc43ed2b7d47375de3afd00
--- /dev/null
+++ b/dataset/paintings_undo_painting_undo_from_semi-finished_work_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "drawing process generation given semi-finished reference",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0037",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/panorama_generation_0002/eval.json b/dataset/panorama_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..aa5ccbac421cd3a480bc09594cea98bf35af935c
--- /dev/null
+++ b/dataset/panorama_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the panorama clearly derived from the input images, effectively combining the content of each image into a single, continuous view?",
+            "0_point_standard": "The panorama fails to clearly present the combination of input images, lacking continuity or missing elements from the original images.",
+            "1_point_standard": "The panorama is clearly derived from the input images, integrating the content of each image into a coherent, continuous scene."
+        },
+        {
+            "question": "Are the edges and overlapping parts between the images seamlessly stitched without noticeable misalignments or breaks?",
+            "0_point_standard": "The panorama displays noticeable misalignments or breaks at the edges or overlaps, disrupting the fluidity of the scene.",
+            "1_point_standard": "The panorama smoothly stitches the edges and overlaps without noticeable gaps or breaks, ensuring scene coherence."
+        },
+        {
+            "question": "Does the panorama maintain consistency in content and style throughout the image, accurately reflecting the original input images?",
+            "0_point_standard": "The panorama exhibits inconsistencies in content or style, introducing elements or styles not present in the input images.",
+            "1_point_standard": "The panorama maintains consistency in content and style throughout, accurately reflecting the key elements and appearance of the input images."
+        },
+        {
+            "question": "Does the panorama meet any specific requirements from the text description, such as orientation, specific focal areas, or designated elements?",
+            "0_point_standard": "The panorama does not meet the specific requirements mentioned in the text description, missing key orientations or focal elements.",
+            "1_point_standard": "The panorama accurately meets all specific requirements in the text description, incorporating orientation, focal points, or designated elements as specified."
+        },
+        {
+            "question": "In the stitched images, do color, lighting, and contrast naturally blend together to create a unified appearance throughout the panorama?",
+            "0_point_standard": "The panorama shows noticeable differences in color, lighting, or contrast between stitched parts, resulting in a disjointed appearance.",
+            "1_point_standard": "The panorama achieves a naturally consistent blend in color, lighting, and contrast across all parts, creating a unified, coherent appearance."
+        },
+        {
+            "question": "Does the panorama exhibit high-quality rendering effects, with attention to detail, clarity, and aesthetically pleasing composition?",
+            "0_point_standard": "The panorama lacks high-quality rendering effects, with issues such as blurriness, low resolution, or unbalanced composition affecting its visual appeal.",
+            "1_point_standard": "The panorama exhibits high-quality rendering effects with clear details, balanced composition, and a professionally appealing appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/panorama_generation_0002/images.txt b/dataset/panorama_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4720a5db79eace67de77b557a15626ed42637878
--- /dev/null
+++ b/dataset/panorama_generation_0002/images.txt
@@ -0,0 +1,4 @@
+https://img.alicdn.com/imgextra/i2/O1CN01Yu8mPL25iXHNFB1nF_!!6000000007560-0-tps-431-573.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01i6xHz01sF8rDjvMAi_!!6000000005736-0-tps-431-569.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01qYeZS41iUa3CIim0K_!!6000000004416-0-tps-426-565.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01wsPn6B1LMZy457oZl_!!6000000001285-0-tps-426-570.jpg
diff --git a/dataset/panorama_generation_0002/instruction.txt b/dataset/panorama_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..272b24db0b7c6d4bfe1423541e5c54c60b61630d
--- /dev/null
+++ b/dataset/panorama_generation_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a wide panorama image based on multiple input images, ensuring that the model performs seamless stitching of the images with no visible transition lines between them. The overlapping areas should be handled appropriately to avoid any duplication or distortion, and the perspective across the images should remain consistent. The final panorama should be smooth and cohesive, maintaining continuity in the overall content and scene, resulting in a high-quality, wide panorama that captures the full scene.
\ No newline at end of file
diff --git a/dataset/panorama_generation_0002/meta.json b/dataset/panorama_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c8f7ac8e3e7892e727a2f0ecfc18b3ef96d049c7
--- /dev/null
+++ b/dataset/panorama_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "panorama generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0054",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/physical_laws_illustration_0001/eval.json b/dataset/physical_laws_illustration_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a146d4552ec9a8654575fd80f1715ac876984da
--- /dev/null
+++ b/dataset/physical_laws_illustration_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Temporal Logic: Does the sequence of images logically present the change of physical laws in chronological order?",
+            "0_point_standard": "The image sequence is not arranged in chronological order or lacks logical flow, failing to illustrate the process of gradual change.",
+            "1_point_standard": "The image sequence clearly presents the change of physical laws in a logical chronological order."
+        },
+        {
+            "question": "Consistency with Text Description: Do the contents of the images match the physical laws specified in the text description?",
+            "0_point_standard": "The image contents do not accurately reflect the physical laws in the text description, with noticeable discrepancies.",
+            "1_point_standard": "The image contents completely match the text description, accurately showcasing the specified physical laws."
+        },
+        {
+            "question": "Consistency of Image Style: Are the style and overall visual effect of the images consistent?",
+            "0_point_standard": "The image styles are inconsistent, leading to a disjointed visual effect.",
+            "1_point_standard": "All images maintain a consistent style, creating a coherent visual effect."
+        },
+        {
+            "question": "Consistency of Object/Character ID: Does the generated image sequence maintain consistency of the same object or character ID (e.g., the same object or character)?",
+            "0_point_standard": "The main subjects are inconsistent between different frames, making it difficult to recognize them as the same object or character.",
+            "1_point_standard": "The main subjects are consistent, clearly identifiable as the same object or character."
+        },
+        {
+            "question": "Logical Accuracy: Is the demonstration of physical laws reasonable and logically accurate?",
+            "0_point_standard": "The representation of physical laws is illogical or unreasonable, with obvious errors or unrealistic depictions.",
+            "1_point_standard": "The representation of physical laws is reasonable, logical, and accurately reflects the expected physical principles."
+        },
+        {
+            "question": "Professional Aesthetics: Do the details and aesthetics of the images meet professional standards and are visually appealing?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, and do not meet visual standards.",
+            "1_point_standard": "The images are rich in detail, have excellent aesthetics, meet professional standards, and are visually appealing."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/physical_laws_illustration_0001/images.txt b/dataset/physical_laws_illustration_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/physical_laws_illustration_0001/instruction.txt b/dataset/physical_laws_illustration_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45aeea8a6384da7e1cc7235d5cdea16a97481c53
--- /dev/null
+++ b/dataset/physical_laws_illustration_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a scene of an apple falling from a tree, containing 4 images arranged in chronological order, showing the process of the apple detaching from the tree and contacting the ground. All images must follow the physical law of gravity.
\ No newline at end of file
diff --git a/dataset/physical_laws_illustration_0001/meta.json b/dataset/physical_laws_illustration_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d9bd665a6451949c65d1aba245c4534bfc736d4d
--- /dev/null
+++ b/dataset/physical_laws_illustration_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "physical laws illustration",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0018",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0002/eval.json b/dataset/plant_growth_process_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..47214b91925ca17d53f12ba687318091f583929f
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the stages of plant growth in chronological order?",
+            "0_point_standard": "The sequence of images is not in chronological order or lacks logical progression, failing to illustrate the stages of plant growth.",
+            "1_point_standard": "The sequence of images clearly presents the stages of plant growth in logical chronological order."
+        },
+        {
+            "question": "Does the content of the images accurately reflect the plant growth process specified in the text description?",
+            "0_point_standard": "The content of the images inaccurately represents the stages of plant growth described in the text, showing significant discrepancies.",
+            "1_point_standard": "The content of the images perfectly matches the text description, accurately depicting the specified stages of plant growth."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent throughout the sequence?",
+            "0_point_standard": "The style of the images is inconsistent, leading to a disjointed visual effect that disrupts the sequence's coherence.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect throughout the growth sequence."
+        },
+        {
+            "question": "Does the generated sequence of images maintain consistency of the same plant species or individual plant?",
+            "0_point_standard": "The plant appears inconsistent across different frames, making it difficult to recognize as the same species or individual plant.",
+            "1_point_standard": "The plant is consistent, clearly recognizable as the same species or individual plant throughout the sequence."
+        },
+        {
+            "question": "Considering biological principles, is the demonstration of the plant growth process reasonable and logical?",
+            "0_point_standard": "The demonstration of plant growth is illogical or unreasonable, with descriptions of growth stages being notably inaccurate or unrealistic.",
+            "1_point_standard": "The demonstration of the plant growth process is reasonable, logical, and accurately reflects expected biological growth principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and have visual appeal?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, do not meet visual standards, and detract from the overall presentation.",
+            "1_point_standard": "The images are richly detailed, have excellent aesthetics, meet professional standards, and are visually appealing, enhancing the overall presentation."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0002/images.txt b/dataset/plant_growth_process_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/plant_growth_process_generation_0002/instruction.txt b/dataset/plant_growth_process_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9efe1fd8ba484dbfbfe768f861555636f1605f3a
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting the growth of a magical vine from seed to full maturity. The first image shows a strange seed being planted in glowing blue soil, with a mysterious forest in the background; the second image shows the seedling stage, with the vine starting to grow, and magical symbols faintly appearing on its leaves; the third image shows the half-grown vine, already climbing the surrounding stone walls and emitting a soft glow; the fourth image shows the fully grown magical vine coiling around an ancient stone tower, with glowing flowers blooming on the vine.
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0002/meta.json b/dataset/plant_growth_process_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..40ef0f83d19f7c52746980e7917f29af00fcfb60
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "plant growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0020",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0003/eval.json b/dataset/plant_growth_process_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..add17c977103ae9fa50d940cc36445bb75370205
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the stages of plant growth in chronological order?",
+            "0_point_standard": "The sequence of images is not in chronological order or lacks logical progression, failing to illustrate the stages of plant growth.",
+            "1_point_standard": "The sequence of images clearly presents the stages of plant growth in a logical chronological order."
+        },
+        {
+            "question": "Does the image content accurately reflect the plant growth process specified in the text description?",
+            "0_point_standard": "The image content inaccurately represents the plant growth stages described in the text, showing significant discrepancies.",
+            "1_point_standard": "The image content completely matches the text description, accurately depicting the specified stages of plant growth."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent throughout the sequence?",
+            "0_point_standard": "The image style is inconsistent, leading to a disjointed visual effect that disrupts the sequence's coherence.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect throughout the growth sequence."
+        },
+        {
+            "question": "Does the generated image sequence maintain consistency of the same plant species or individual plant?",
+            "0_point_standard": "The plant appears inconsistent between frames, making it difficult to recognize as the same species or individual plant.",
+            "1_point_standard": "The plant is consistent and can be clearly identified as the same species or individual plant throughout the sequence."
+        },
+        {
+            "question": "Considering biological principles, is the demonstration of the plant growth process reasonable and logical?",
+            "0_point_standard": "The representation of plant growth is illogical or unreasonable, with descriptions of growth stages that are clearly inaccurate or unrealistic.",
+            "1_point_standard": "The representation of the plant growth process is reasonable, logical, and accurately reflects the expected biological growth principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and are visually appealing?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, do not meet visual standards, and detract from the overall presentation.",
+            "1_point_standard": "The images are rich in detail, have excellent aesthetics, meet professional standards, are visually appealing, and enhance the overall presentation."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0003/images.txt b/dataset/plant_growth_process_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/plant_growth_process_generation_0003/instruction.txt b/dataset/plant_growth_process_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e96e30c01503ee2dfb9367373671dadb1101f18
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting the growth of a sunflower from sprouting to full bloom. The first image shows a tiny sprout emerging from moist soil in the morning dew; the second image shows the young seedling with a few leaves growing, with a bright sky in the background; the third image shows the half-grown sunflower, standing tall with its flower head beginning to form; the fourth image shows the fully bloomed sunflower facing the sun, with a golden field of sunflowers in the background.
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0003/meta.json b/dataset/plant_growth_process_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..034fa97e6e3a6509dadab714df5d24b2db23b858
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "plant growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0020",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0004/eval.json b/dataset/plant_growth_process_generation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..20e3260bb4921ae30db7420d9b83ab0f99d8ab31
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sequence of images logically present the stages of plant growth in chronological order?",
+            "0_point_standard": "The sequence of images is not arranged in chronological order or lacks logical progression, failing to illustrate the stages of plant growth.",
+            "1_point_standard": "The sequence of images clearly presents the stages of plant growth in a logical chronological order."
+        },
+        {
+            "question": "Does the content of the images accurately reflect the plant growth process specified in the text description?",
+            "0_point_standard": "The content of the images inaccurately represents the plant growth stages described in the text, showing significant discrepancies.",
+            "1_point_standard": "The content of the images matches the text description perfectly, accurately showcasing the specified stages of plant growth."
+        },
+        {
+            "question": "Is the style and overall visual effect of the images consistent throughout the sequence?",
+            "0_point_standard": "The style of the images is inconsistent, causing a disjointed visual effect that disrupts the coherence of the sequence.",
+            "1_point_standard": "All images maintain a consistent style, creating a cohesive visual effect throughout the growth sequence."
+        },
+        {
+            "question": "Does the generated sequence of images maintain consistency of the same plant species or individual plant?",
+            "0_point_standard": "The plant appears inconsistent between frames, making it difficult to recognize as the same species or individual plant.",
+            "1_point_standard": "The plant is consistent, clearly recognizable as the same species or individual plant throughout the sequence."
+        },
+        {
+            "question": "Considering biological principles, is the demonstration of the plant growth process reasonable and logical?",
+            "0_point_standard": "The representation of plant growth is illogical or unreasonable, with inaccurately or unrealistically described growth stages.",
+            "1_point_standard": "The representation of the plant growth process is reasonable, logical, and accurately reflects expected biological growth principles."
+        },
+        {
+            "question": "Do the details and aesthetics of the images meet professional standards and possess visual appeal?",
+            "0_point_standard": "The images lack detail, have poor aesthetics, do not meet visual standards, and detract from the overall presentation.",
+            "1_point_standard": "The images are rich in detail, have excellent aesthetics, meet professional standards, are visually appealing, and enhance the overall presentation."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0004/images.txt b/dataset/plant_growth_process_generation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/plant_growth_process_generation_0004/instruction.txt b/dataset/plant_growth_process_generation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d044c162b38efddd7c635db1aa50816ef4490a05
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of images depicting the growth of a cactus from a tiny sprout to full maturity. The first image shows a small cactus sprout growing in dry desert soil, with vast sand dunes in the background; the second image shows a half-grown cactus with spines beginning to develop, surrounded by sparse desert vegetation; the third image shows a fully mature cactus, standing tall against the backdrop of a sunset over the desert and distant mountains.
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_0004/meta.json b/dataset/plant_growth_process_generation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..39fbc46ec384a8c14804dd552ada8f9d2c94d1bc
--- /dev/null
+++ b/dataset/plant_growth_process_generation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "plant growth process generation without reference",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0020",
+    "output_image_count": 3,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_with_reference_0001/auto_eval.jsonl b/dataset/plant_growth_process_generation_with_reference_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..5eeaba1281799cf864ae1253e782dc3cfe97601e
--- /dev/null
+++ b/dataset/plant_growth_process_generation_with_reference_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg"], "question": "Is the number in the image the digit 3? 0 points: The number in the image is not the digit 3; 1 point: The number in the image is the digit 3. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first output image and original input image of the response provided by a student. The task objective is to generate a set of plant growth stage images.\nThe text requirement is:\nPlease generate 3 images showing different growth stages of this tree, illustrating the transformation from a sapling to full maturity. The first image should depict the sapling stage, with a thin trunk and fewer leaves. The second image should show the tree growing, with a thicker trunk, more branches, and a moderate amount of foliage. The third image should represent the tree nearing maturity, with a sturdy trunk, dense branches, and a fuller canopy. Ensure that all generated images reflect the same tree as the original, showing its progression over time.\nYour review question is:\nDoes the sapling in the first output image appear as a younger version of the same tree in the original input image, maintaining recognizable characteristics such as branching patterns and leaf shape? 0 points: The sapling does not resemble the tree in the original image, making it difficult to identify as the same tree. 1 point: The sapling retains recognizable features of the original tree, indicating a younger version of the same plant.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third output images of the response provided by a student. The task objective is to generate a set of plant growth stage images.\nThe text requirement is:\nPlease generate 3 images showing different growth stages of this tree, illustrating the transformation from a sapling to full maturity. The first image should depict the sapling stage, with a thin trunk and fewer leaves. The second image should show the tree growing, with a thicker trunk, more branches, and a moderate amount of foliage. The third image should represent the tree nearing maturity, with a sturdy trunk, dense branches, and a fuller canopy. Ensure that all generated images reflect the same tree as the original, showing its progression over time.\nYour review question is:\nDo the first and third output images show a logical progression in the growth of the tree, with an evident increase in trunk thickness, branching, and foliage? 0 points: The progression in growth appears unrealistic or inconsistent, with abrupt or illogical changes in the tree’s size or structure. 1 point: The growth progression is logical, showing a natural increase in the tree’s size and complexity from the sapling to near maturity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and third output images of the response provided by a student. The task objective is to generate a set of plant growth stage images.\nThe text requirement is:\nPlease generate 3 images showing different growth stages of this tree, illustrating the transformation from a sapling to full maturity. The first image should depict the sapling stage, with a thin trunk and fewer leaves. The second image should show the tree growing, with a thicker trunk, more branches, and a moderate amount of foliage. The third image should represent the tree nearing maturity, with a sturdy trunk, dense branches, and a fuller canopy. Ensure that all generated images reflect the same tree as the original, showing its progression over time.\nYour review question is:\nDo the second and third output images maintain a consistent visual style, including lighting, shading, and rendering quality? 0 points: The style differs noticeably between the images, reducing the cohesion of the series. 1 point: The style is consistent across both images, with matching lighting, shading, and rendering quality that enhance continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the original input image and third output image of the response provided by a student. The task objective is to generate a set of plant growth stage images.\nThe text requirement is:\nPlease generate 3 images showing different growth stages of this tree, illustrating the transformation from a sapling to full maturity. The first image should depict the sapling stage, with a thin trunk and fewer leaves. The second image should show the tree growing, with a thicker trunk, more branches, and a moderate amount of foliage. The third image should represent the tree nearing maturity, with a sturdy trunk, dense branches, and a fuller canopy. Ensure that all generated images reflect the same tree as the original, showing its progression over time.\nYour review question is:\nDoes the background in the third output image remain consistent with the original input image, including the landscape, lighting, and surrounding vegetation? 0 points: The background differs noticeably, causing the scene to look unrelated or inconsistent with the original setting. 1 point: The background is consistent, showing the same landscape and environmental elements, maintaining continuity with the original setting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second output images of the response provided by a student. The task objective is to generate a set of plant growth stage images.\nThe text requirement is:\nPlease generate 3 images showing different growth stages of this tree, illustrating the transformation from a sapling to full maturity. The first image should depict the sapling stage, with a thin trunk and fewer leaves. The second image should show the tree growing, with a thicker trunk, more branches, and a moderate amount of foliage. The third image should represent the tree nearing maturity, with a sturdy trunk, dense branches, and a fuller canopy. Ensure that all generated images reflect the same tree as the original, showing its progression over time.\nYour review question is:\nDo the first and second output images accurately reflect the described growth stages, with the first showing a sapling with fewer leaves and the second showing intermediate growth with more branches and foliage? 0 points: The images do not clearly match the described growth stages, with unrealistic or inaccurate representations of the tree’s development. 1 point: The images align well with the descriptions, showing a logical increase in the tree’s structure and foliage as it grows.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/plant_growth_process_generation_with_reference_0001/eval.json b/dataset/plant_growth_process_generation_with_reference_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..b64b1dd6b164624046dd9a2372934f964fc1f4bd
--- /dev/null
+++ b/dataset/plant_growth_process_generation_with_reference_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements of the text description?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "In the first output image, does the sapling appear as a younger version of the tree in the original input image, retaining recognizable features such as branching pattern and leaf shape?",
+            "0_point_standard": "The sapling does not resemble the tree in the original image, making it difficult to identify as the same tree.",
+            "1_point_standard": "The sapling retains recognizable features of the original tree, showing a younger version of the same plant."
+        },
+        {
+            "question": "Do the first and third output images display a reasonable progression of tree growth, showing changes such as trunk thickening, increased branching, and more leaves?",
+            "0_point_standard": "The growth progression appears unrealistic or inconsistent, with abrupt or unreasonable changes in tree size or structure.",
+            "1_point_standard": "The growth progression is reasonable, showing a natural increase in size and complexity from a young sapling to a near-mature tree."
+        },
+        {
+            "question": "Do the second and third output images maintain a consistent visual style, including lighting, shadow, and rendering quality?",
+            "0_point_standard": "The styles of the two images are noticeably different, reducing the coherence of the series.",
+            "1_point_standard": "The styles of the two images are consistent, with matching lighting, shadow, and rendering quality, enhancing continuity."
+        },
+        {
+            "question": "Does the background in the third output image remain consistent with the original input image, including landscape, lighting, and surrounding vegetation?",
+            "0_point_standard": "There are noticeable differences in the background, making the scene appear unrelated or inconsistent with the original environment.",
+            "1_point_standard": "The background is consistent, showing the same landscape and environmental elements, maintaining continuity with the original environment."
+        },
+        {
+            "question": "Do the first and second output images accurately reflect the described growth stages, with the first showing a sapling with fewer leaves and the second showing an intermediate growth state with more branches and leaves?",
+            "0_point_standard": "The images fail to clearly correspond to the described growth stages, with tree growth appearing unreasonable or inaccurate.",
+            "1_point_standard": "The images closely match the description, showing a reasonable increase in tree structure and leaf quantity during growth."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_with_reference_0001/images.txt b/dataset/plant_growth_process_generation_with_reference_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b06e9310a7142b8e1e84653322fd5f132d2d1ab
--- /dev/null
+++ b/dataset/plant_growth_process_generation_with_reference_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN01ASs2dE1hZKXfVh0A2_!!6000000004291-49-tps-2016-2534.webp
diff --git a/dataset/plant_growth_process_generation_with_reference_0001/instruction.txt b/dataset/plant_growth_process_generation_with_reference_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c46fc945100d29dbb836299665e7f947d8c0c532
--- /dev/null
+++ b/dataset/plant_growth_process_generation_with_reference_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate 3 images showing different growth stages of this tree, illustrating the transformation from a sapling to full maturity. The first image should depict the sapling stage, with a thin trunk and fewer leaves. The second image should show the tree growing, with a thicker trunk, more branches, and a moderate amount of foliage. The third image should represent the tree nearing maturity, with a sturdy trunk, dense branches, and a fuller canopy. Ensure that all generated images reflect the same tree as the original, showing its progression over time.
\ No newline at end of file
diff --git a/dataset/plant_growth_process_generation_with_reference_0001/meta.json b/dataset/plant_growth_process_generation_with_reference_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e5ad922a55cc08255f11f2567e11626c256c037b
--- /dev/null
+++ b/dataset/plant_growth_process_generation_with_reference_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "plant growth process generation with reference",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0046",
+    "output_image_count": 3,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0002/auto_eval.jsonl b/dataset/poster_generation_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..4b68cd7b519fcd4ae0bf7174c5830902d4114a65
--- /dev/null
+++ b/dataset/poster_generation_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.\"\nYour review question is:\nDoes the generated image clearly resemble a poster, with recognizable elements such as a focal design, layout structure, and text components? 0 points: The image lacks identifiable poster qualities, making it unclear as a promotional or informational design. 1 point: The image has clear poster characteristics, with a defined layout, focal design, and text elements typical of a poster.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.\"\nYour review question is:\nIs the image visually complete, with a balanced composition that does not require additional elements to be perceived as a finished painting? 0 points: The image appears incomplete or lacks a balanced composition, giving the impression of an unfinished piece. 1 point: The image is visually complete and balanced, functioning well as a standalone painting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.\"\nYour review question is:\nDoes the painting accurately represent the specific subject, style, or elements described in the text prompt (e.g., a landscape, portrait, or surreal theme)? Read the text requirement sentence by sentence, If any elements in one sentence is not reflected in the poster, it will be considered as 0 points. 0 points: The painting does not align with the described subject, style, or elements, deviating from the text requirements. 1 point: The painting accurately represents the subject, style, and elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.\"\nYour review question is:\nAre the text elements (e.g., title, tagline, body text) in the poster clear, readable, and appropriately placed to convey the intended message? 0 points: The text elements are unclear, difficult to read, or poorly positioned, affecting the communication of the message. 1 point: The text elements are clear, readable, and well-placed, effectively conveying the intended message.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.\"\nYour review question is:\nDoes the poster utilize visual hierarchy effectively, with emphasis on key elements such as the main message, imagery, or call-to-action? 0 points: The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from supporting details. 1 point: The poster uses visual hierarchy effectively, with clear emphasis on key elements, making the design easy to follow.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.\"\nYour review question is:\nDoes the poster exhibit a high level of aesthetic quality, with a cohesive design, appealing color choices, and strong visual impact? 0 points: The poster lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The poster has strong aesthetic appeal, with cohesive design elements, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/poster_generation_0002/eval.json b/dataset/poster_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c3882a510efbd88d32f06a988f91b339167f805b
--- /dev/null
+++ b/dataset/poster_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present itself as a poster with recognizable elements such as focal design, layout structure, and text components?",
+            "0_point_standard": "The image lacks recognizable features of a poster, making it difficult to identify as promotional or informational design.",
+            "1_point_standard": "The image clearly exhibits poster characteristics, with a defined layout, focal design, and text elements that typically conform to poster style."
+        },
+        {
+            "question": "Is the image visually complete with a balanced composition, requiring no additional elements to be considered a complete artwork?",
+            "0_point_standard": "The image appears incomplete or lacks a balanced composition, giving an unfinished impression.",
+            "1_point_standard": "The image is visually complete and balanced, functioning well as a standalone artwork."
+        },
+        {
+            "question": "Does the artwork accurately represent the specific theme, style, or elements described in the text prompt (e.g., landscape, portrait, or surreal themes)? Read the text requirements sentence by sentence; if any element in a sentence is missing in the poster, score 0 points.",
+            "0_point_standard": "The artwork fails to capture the described theme, style, or elements, deviating from the text requirements.",
+            "1_point_standard": "The artwork accurately represents the specified theme, style, and elements from the text prompt."
+        },
+        {
+            "question": "Are the text elements in the poster (e.g., title, slogan, body text) clear, legible, and appropriately placed to convey the intended message?",
+            "0_point_standard": "Text elements are unclear, difficult to read, or poorly placed, hindering the message delivery.",
+            "1_point_standard": "Text elements are clear, legible, and appropriately placed, effectively conveying the intended message."
+        },
+        {
+            "question": "Does the poster effectively utilize visual hierarchy to emphasize key elements such as the main message, image, or call to action?",
+            "0_point_standard": "The poster lacks a clear visual hierarchy, making it difficult to distinguish between important elements and supporting details.",
+            "1_point_standard": "The poster effectively uses visual hierarchy, clearly emphasizing key elements, making the design easy to understand."
+        },
+        {
+            "question": "Does the poster exhibit a high level of aesthetic quality, with cohesive design, appealing color scheme, and strong visual impact?",
+            "0_point_standard": "The poster lacks aesthetic appeal, with poor color scheme, weak composition, or appearing unprofessional.",
+            "1_point_standard": "The poster has strong aesthetic appeal, cohesive design, appealing color scheme, and strong visual impact, presenting a professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0002/images.txt b/dataset/poster_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/poster_generation_0002/instruction.txt b/dataset/poster_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398ddecf7027b68eda08f10bafb1608ee4bf6988
--- /dev/null
+++ b/dataset/poster_generation_0002/instruction.txt
@@ -0,0 +1 @@
+This poster features the character Joker, as portrayed by Joaquin Phoenix, in a stylized, minimalist design. The background is a textured, muted green with slight grunge effects that give it a worn, vintage feel. At the top of the poster, “JOAQUIN PHOENIX” is written in small white capital letters. Below this, the word “JOKER” dominates the upper half in large, bold, distressed yellow letters. The left side of the poster reads “OCTOBER 4” in small white text, while the right side has the phrase “PUT ON A HAPPY FACE” also in small white text. The lower half of the poster is dedicated to an illustration of the Joker’s face, shown in profile and looking upwards with a broad, unsettling smile. His face is painted in clown makeup: a white base, red painted smile extending beyond his lips, and blue triangles around his eyes. His hair is slicked back, dark green, and styled in loose waves. He wears a red suit jacket over a green shirt, with a yellow vest visible underneath. The overall style is bold, with thick outlines and a simplified color palette, capturing a dark yet intriguing tone.
\ No newline at end of file
diff --git a/dataset/poster_generation_0002/meta.json b/dataset/poster_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ecd63c79bf74566d64157324921d87c5dddaa1be
--- /dev/null
+++ b/dataset/poster_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "poster generation",
+    "num_of_cases": 5,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0026",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0003/auto_eval.jsonl b/dataset/poster_generation_0003/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..cc776cfb07a279c516a3233def73c8074504e943
--- /dev/null
+++ b/dataset/poster_generation_0003/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.\"\nYour review question is:\nDoes the generated image clearly resemble a poster, with recognizable elements such as a focal design, layout structure, and text components? 0 points: The image lacks identifiable poster qualities, making it unclear as a promotional or informational design. 1 point: The image has clear poster characteristics, with a defined layout, focal design, and text elements typical of a poster.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.\"\nYour review question is:\nIs the image visually complete, with a balanced composition that does not require additional elements to be perceived as a finished painting? 0 points: The image appears incomplete or lacks a balanced composition, giving the impression of an unfinished piece. 1 point: The image is visually complete and balanced, functioning well as a standalone painting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.\"\nYour review question is:\nDoes the painting accurately represent the specific subject, style, or elements described in the text prompt (e.g., a landscape, portrait, or surreal theme)? Read the text requirement sentence by sentence, If any elements in one sentence is not reflected in the poster, it will be considered as 0 points. 0 points: The painting does not align with the described subject, style, or elements, deviating from the text requirements. 1 point: The painting accurately represents the subject, style, and elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.\"\nYour review question is:\nAre the text elements (e.g., title, tagline, body text) in the poster clear, readable, and appropriately placed to convey the intended message? 0 points: The text elements are unclear, difficult to read, or poorly positioned, affecting the communication of the message. 1 point: The text elements are clear, readable, and well-placed, effectively conveying the intended message.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.\"\nYour review question is:\nDoes the poster utilize visual hierarchy effectively, with emphasis on key elements such as the main message, imagery, or call-to-action? 0 points: The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from supporting details. 1 point: The poster uses visual hierarchy effectively, with clear emphasis on key elements, making the design easy to follow.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.\"\nYour review question is:\nDoes the poster exhibit a high level of aesthetic quality, with a cohesive design, appealing color choices, and strong visual impact? 0 points: The poster lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The poster has strong aesthetic appeal, with cohesive design elements, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/poster_generation_0003/eval.json b/dataset/poster_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..86337dc991bd6f08067129c856063427fb667f82
--- /dev/null
+++ b/dataset/poster_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present itself as a poster, with recognizable elements such as focal design, layout structure, and text components?",
+            "0_point_standard": "The image lacks recognizable poster features, making it difficult to identify as promotional or informational design.",
+            "1_point_standard": "The image has clear poster features, with a defined layout, focal design, and typical text elements that align with poster style."
+        },
+        {
+            "question": "Is the image visually complete, with a balanced composition, not requiring additional elements to be considered a complete artwork?",
+            "0_point_standard": "The image appears incomplete or lacks balanced composition, giving an unfinished impression.",
+            "1_point_standard": "The image is visually complete and balanced, functioning well as a standalone piece of artwork."
+        },
+        {
+            "question": "Does the artwork accurately represent the specific theme, style, or elements described in the text prompt (e.g., landscape, portrait, or surreal theme)? Read the text requirements sentence by sentence; if any element from a sentence is not depicted in the poster, score it 0 points.",
+            "0_point_standard": "The artwork fails to embody the described theme, style, or elements, deviating from the text requirements.",
+            "1_point_standard": "The artwork accurately represents the specified theme, style, and elements from the text prompt."
+        },
+        {
+            "question": "Are the text elements in the poster (such as title, slogan, body text) clear, legible, and appropriately placed to convey the intended message?",
+            "0_point_standard": "Text elements are unclear, hard to read, or poorly positioned, hindering message conveyance.",
+            "1_point_standard": "Text elements are clear, legible, and appropriately placed, effectively conveying the intended message."
+        },
+        {
+            "question": "Does the poster effectively utilize visual hierarchy to emphasize key elements such as main information, imagery, or calls to action?",
+            "0_point_standard": "The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from supporting details.",
+            "1_point_standard": "The poster effectively utilizes visual hierarchy, clearly emphasizing key elements, making the design easy to understand."
+        },
+        {
+            "question": "Does the poster exhibit a high level of aesthetic quality, with coherent design, attractive color scheme, and strong visual impact?",
+            "0_point_standard": "The poster lacks aesthetic appeal, has poor color choices, weak composition, or appears unprofessional.",
+            "1_point_standard": "The poster exhibits strong aesthetic appeal, with coherent design, attractive color scheme, strong visual impact, and a professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0003/images.txt b/dataset/poster_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/poster_generation_0003/instruction.txt b/dataset/poster_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8befe1f90a029c67fe66acb28d1c1ce4dad1e4f2
--- /dev/null
+++ b/dataset/poster_generation_0003/instruction.txt
@@ -0,0 +1 @@
+This poster is themed around the movie “Harry Potter and the Prisoner of Azkaban” and features a dark, eerie atmosphere with a striking color palette of teal, purple, and black. The scene depicts a dense forest with tall, bare trees, rendered in dark purple and black tones. At the center of the forest path, Harry Potter stands with his back facing the viewer, raising his wand towards the sky in a defensive stance. He is accompanied by a figure lying beside him, presumably Sirius Black. Above Harry, a group of dark, ghostly Dementors with flowing, shadowy forms descends upon him. The Dementors are illustrated as black silhouettes with purple accents, swirling and twisting ominously as they move toward him. In the background, a large, bright full moon illuminates the scene, casting a cold, ethereal light over the trees. The sky is filled with wispy clouds, adding to the foreboding ambiance. At the bottom of the poster, the title “Harry Potter and the Prisoner of Azkaban” is written in a stylized white font, with swirling, wave-like teal shapes beneath it, resembling mist or magical energy. The overall style is intense and mystical, capturing the dark magic of the Dementors in a foreboding forest setting.
\ No newline at end of file
diff --git a/dataset/poster_generation_0003/meta.json b/dataset/poster_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..28a0a63e71a790c07c1607ea07df6c9d2f1c313f
--- /dev/null
+++ b/dataset/poster_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "poster generation",
+    "num_of_cases": 5,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0026",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0004/auto_eval.jsonl b/dataset/poster_generation_0004/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..efd676f14a9da86aafd630cdf05a4390b9db666c
--- /dev/null
+++ b/dataset/poster_generation_0004/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.\"\nYour review question is:\nDoes the generated image clearly resemble a poster, with recognizable elements such as a focal design, layout structure, and text components? 0 points: The image lacks identifiable poster qualities, making it unclear as a promotional or informational design. 1 point: The image has clear poster characteristics, with a defined layout, focal design, and text elements typical of a poster.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.\"\nYour review question is:\nIs the image visually complete, with a balanced composition that does not require additional elements to be perceived as a finished painting? 0 points: The image appears incomplete or lacks a balanced composition, giving the impression of an unfinished piece. 1 point: The image is visually complete and balanced, functioning well as a standalone painting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.\"\nYour review question is:\nDoes the painting accurately represent the specific subject, style, or elements described in the text prompt (e.g., a landscape, portrait, or surreal theme)? Read the text requirement sentence by sentence, If any elements in one sentence is not reflected in the poster, it will be considered as 0 points. 0 points: The painting does not align with the described subject, style, or elements, deviating from the text requirements. 1 point: The painting accurately represents the subject, style, and elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.\"\nYour review question is:\nAre the text elements (e.g., title, tagline, body text) in the poster clear, readable, and appropriately placed to convey the intended message? 0 points: The text elements are unclear, difficult to read, or poorly positioned, affecting the communication of the message. 1 point: The text elements are clear, readable, and well-placed, effectively conveying the intended message.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.\"\nYour review question is:\nDoes the poster utilize visual hierarchy effectively, with emphasis on key elements such as the main message, imagery, or call-to-action? 0 points: The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from supporting details. 1 point: The poster uses visual hierarchy effectively, with clear emphasis on key elements, making the design easy to follow.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.\"\nYour review question is:\nDoes the poster exhibit a high level of aesthetic quality, with a cohesive design, appealing color choices, and strong visual impact? 0 points: The poster lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The poster has strong aesthetic appeal, with cohesive design elements, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/poster_generation_0004/eval.json b/dataset/poster_generation_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..466bc2265efdf8e924236b0c82e6b2ead657d1fa
--- /dev/null
+++ b/dataset/poster_generation_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present itself as a poster with recognizable elements such as focal design, layout structure, and text components?",
+            "0_point_standard": "The image lacks recognizable poster features, making it difficult to identify as a promotional or informational design.",
+            "1_point_standard": "The image has clear poster characteristics with a defined layout, focal design, and text elements typically conforming to poster style."
+        },
+        {
+            "question": "Is the image visually complete with a balanced composition, requiring no additional elements to be considered a complete artwork?",
+            "0_point_standard": "The image appears incomplete or lacks a balanced composition, giving an impression of being unfinished.",
+            "1_point_standard": "The image is visually complete and balanced, functioning well as a standalone artwork."
+        },
+        {
+            "question": "Does the artwork accurately depict the specific theme, style, or elements described in the text prompt (e.g., landscape, portrait, or surreal themes)? Read the text requirements sentence by sentence, and if any element is not represented in the poster, score it 0 points.",
+            "0_point_standard": "The artwork fails to represent the described theme, style, or elements, deviating from the text requirements.",
+            "1_point_standard": "The artwork accurately depicts the specified theme, style, and elements in the text prompt."
+        },
+        {
+            "question": "Are the text elements in the poster (such as title, slogan, body text) clear, legible, and appropriately placed to convey the intended message?",
+            "0_point_standard": "Text elements are unclear, hard to read, or poorly placed, affecting the conveyance of information.",
+            "1_point_standard": "Text elements are clear, legible, and appropriately placed, effectively conveying the intended message."
+        },
+        {
+            "question": "Does the poster effectively utilize visual hierarchy to emphasize key elements such as the main message, image, or call to action?",
+            "0_point_standard": "The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from auxiliary details.",
+            "1_point_standard": "The poster effectively utilizes visual hierarchy, clearly emphasizing key elements, making the design easy to understand."
+        },
+        {
+            "question": "Does the poster demonstrate a high level of aesthetic quality, with cohesive design, appealing color scheme, and strong visual impact?",
+            "0_point_standard": "The poster lacks aesthetic appeal, has poor color scheme, weak composition, or appears unprofessional.",
+            "1_point_standard": "The poster has strong aesthetic appeal, cohesive design, appealing color scheme, and strong visual impact, presenting a professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0004/images.txt b/dataset/poster_generation_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/poster_generation_0004/instruction.txt b/dataset/poster_generation_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f632b50b93d648c42c0edf8aaf2dad9aabe5c2ff
--- /dev/null
+++ b/dataset/poster_generation_0004/instruction.txt
@@ -0,0 +1 @@
+This poster is designed in a vintage racing theme featuring vibrant colors and bold graphics. At the top, the words “PISTON CUP” are written in large, black, retro-styled letters, with a trophy icon replacing the letter “O” to emphasize the racing theme. Below it, in smaller italicized letters, reads “RACING SERIES.” The center of the poster highlights three racing cars in dynamic motion. The red car, “Lightning McQueen” with the number “95” prominently displayed on its side, takes the forefront, showcasing a smile and intense speed lines that enhance its fast movement. A turquoise car labeled “DINOC” races alongside, while a black car marked “The King” trails slightly behind. The background has stylized palm trees and an orange sky, adding a California racing vibe. On the left side, a “Goodyear” blimp floats above a checkered flag, reinforcing the racing atmosphere. At the bottom, a red flame background displays the name “LIGHTNING MCQUEEN” in bold yellow letters, along with an illustration of additional characters from the “Cars” series. The “CARS” logo appears in bold letters at the very bottom, along with Disney and Pixar logos.
\ No newline at end of file
diff --git a/dataset/poster_generation_0004/meta.json b/dataset/poster_generation_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5582c7ce6570d22acb353734ceaf0e04b238fa23
--- /dev/null
+++ b/dataset/poster_generation_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "poster generation",
+    "num_of_cases": 5,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0026",
+    "output_image_count": 1,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0005/auto_eval.jsonl b/dataset/poster_generation_0005/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..fb46ded40972d136e227768e9136e025d1d1f98c
--- /dev/null
+++ b/dataset/poster_generation_0005/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.\"\nYour review question is:\nDoes the generated image clearly resemble a poster, with recognizable elements such as a focal design, layout structure, and text components? 0 points: The image lacks identifiable poster qualities, making it unclear as a promotional or informational design. 1 point: The image has clear poster characteristics, with a defined layout, focal design, and text elements typical of a poster.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.\"\nYour review question is:\nIs the image visually complete, with a balanced composition that does not require additional elements to be perceived as a finished painting? 0 points: The image appears incomplete or lacks a balanced composition, giving the impression of an unfinished piece. 1 point: The image is visually complete and balanced, functioning well as a standalone painting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.\"\nYour review question is:\nDoes the painting accurately represent the specific subject, style, or elements described in the text prompt (e.g., a landscape, portrait, or surreal theme)? Read the text requirement sentence by sentence, If any elements in one sentence is not reflected in the poster, it will be considered as 0 points. 0 points: The painting does not align with the described subject, style, or elements, deviating from the text requirements. 1 point: The painting accurately represents the subject, style, and elements specified in the text prompt.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.\"\nYour review question is:\nAre the text elements (e.g., title, tagline, body text) in the poster clear, readable, and appropriately placed to convey the intended message? 0 points: The text elements are unclear, difficult to read, or poorly positioned, affecting the communication of the message. 1 point: The text elements are clear, readable, and well-placed, effectively conveying the intended message.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.\"\nYour review question is:\nDoes the poster utilize visual hierarchy effectively, with emphasis on key elements such as the main message, imagery, or call-to-action? 0 points: The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from supporting details. 1 point: The poster uses visual hierarchy effectively, with clear emphasis on key elements, making the design easy to follow.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. The work consists of only one image as the response provided by a student. The task objective is to generate a poster based on the text requirements.\nThe text requirement is:\n\"This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.\"\nYour review question is:\nDoes the poster exhibit a high level of aesthetic quality, with a cohesive design, appealing color choices, and strong visual impact? 0 points: The poster lacks aesthetic appeal, with poor color choices, weak composition, or an unprofessional look. 1 point: The poster has strong aesthetic appeal, with cohesive design elements, attractive colors, and a visually impactful, professional finish.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/poster_generation_0005/eval.json b/dataset/poster_generation_0005/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0700830b1cb3d49e0af29ce9f16f8bedd4aebd6f
--- /dev/null
+++ b/dataset/poster_generation_0005/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image clearly present itself as a poster, with recognizable elements such as focal design, layout structure, and text components?",
+            "0_point_standard": "The image lacks recognizable poster features, making it difficult to identify as promotional or informational design.",
+            "1_point_standard": "The image has clear poster characteristics, with a definite layout, focal design, and text elements typical of poster style."
+        },
+        {
+            "question": "Is the image visually complete, with a balanced composition that requires no additional elements to be considered a complete artwork?",
+            "0_point_standard": "The image appears incomplete or lacks a balanced composition, giving an unfinished impression.",
+            "1_point_standard": "The image is visually complete and balanced, functioning well as a standalone artwork."
+        },
+        {
+            "question": "Does the artwork accurately depict the specific themes, styles, or elements described in the text prompt (e.g., landscape, portrait, or surreal themes)? Read through the text requirements sentence by sentence; if any element from a sentence is missing in the poster, score it 0 points.",
+            "0_point_standard": "The artwork fails to reflect the described themes, styles, or elements, deviating from the text requirements.",
+            "1_point_standard": "The artwork accurately depicts the specified themes, styles, and elements from the text prompt."
+        },
+        {
+            "question": "Are the text elements in the poster (such as titles, slogans, body text) clear, legible, and appropriately placed to convey the intended message?",
+            "0_point_standard": "Text elements are unclear, illegible, or poorly positioned, hindering the communication of information.",
+            "1_point_standard": "Text elements are clear, legible, and appropriately placed, effectively conveying the intended message."
+        },
+        {
+            "question": "Does the poster effectively use visual hierarchy to emphasize main elements like main message, images, or calls to action?",
+            "0_point_standard": "The poster lacks a clear visual hierarchy, making it difficult to distinguish important elements from supporting details.",
+            "1_point_standard": "The poster effectively uses visual hierarchy, clearly emphasizing main elements, making the design easy to understand."
+        },
+        {
+            "question": "Does the poster exhibit a high level of aesthetic quality, with cohesive design, appealing color schemes, and strong visual impact?",
+            "0_point_standard": "The poster lacks aesthetic appeal, with poor color schemes, weak composition, or appearing unprofessional.",
+            "1_point_standard": "The poster has strong aesthetic appeal, with cohesive design, appealing color schemes, strong visual impact, and presents a professional appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/poster_generation_0005/images.txt b/dataset/poster_generation_0005/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/poster_generation_0005/instruction.txt b/dataset/poster_generation_0005/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41c205f1a4342c415a1d44c4fa824321fe002803
--- /dev/null
+++ b/dataset/poster_generation_0005/instruction.txt
@@ -0,0 +1 @@
+This poster has a vintage-inspired space exploration theme with bold, stylized typography and minimalist graphics. At the top, the large white letters spell out “FLY ME TO THE MOON” in a vertically stacked format, with each word taking up one line. The background transitions from a dark blue starry night sky at the top, gradually blending into a warm gradient of orange and red hues at the bottom, resembling a landscape or the flame of a rocket launch. The center of the poster features a tall, white Saturn V rocket with black and gray details, standing upright and ready for launch. The rocket has the American flag and the letters “USA” printed on its lower section. To the right of the rocket, a white full moon is set against the dark blue sky, adding to the outer space ambiance. A thin, black launch tower is positioned behind the rocket, adding structure to the scene. The entire design has a stylized, retro feel, emphasizing the excitement of lunar exploration.
\ No newline at end of file
diff --git a/dataset/poster_generation_0005/meta.json b/dataset/poster_generation_0005/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..48a3f649b629662b33aaebd93badb123f55343e0
--- /dev/null
+++ b/dataset/poster_generation_0005/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "poster generation",
+    "num_of_cases": 5,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0026",
+    "output_image_count": 1,
+    "case_id": "0005"
+}
\ No newline at end of file
diff --git a/dataset/product_usage_scenario_generation_0001/auto_eval.jsonl b/dataset/product_usage_scenario_generation_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..15e19289d89bd1d4072020e8deba178b4b942c52
--- /dev/null
+++ b/dataset/product_usage_scenario_generation_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "question": "Is the number in the image the digit 4? 0 points: The number in the image is not the digit 4; 1 point: The number in the image is the digit 4. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the original input image and first output image of the response provided by a student. The task objective is to generate different application scenario images of a speicific product.\nThe text requirement is:\nPlease generate a series of images based on the following text prompt, showcasing the use of this speaker in different application scenarios. A total of four images need to be generated, each representing a different scene. The first image shows the speaker on a modern-style office desk indoors, with a computer in the background, sunlight streaming through the window illuminating the desk, and the black speaker contrasting with the white and wooden elements of the desk. The second image shows the speaker on the armrest of a soft sofa, with fine fabric textures on the sofa, a warm table lamp nearby, creating a cozy and relaxed atmosphere. The third image shows the speaker outdoors on a tree stump, with sunlight filtering through the leaves in the background, a backpack placed next to the speaker, creating a natural and lively atmosphere. The fourth image shows the speaker placed on an outdoor wooden table, with a smartphone next to it playing music, autumn leaves scattered on the table, blending nature with technology.\nYour review question is:\nDoes the speaker in the first output image match the design, shape, and color of the original speaker image, ensuring product consistency? 0 points: The speaker appears different from the original, with noticeable inconsistencies in design or color. 1 point: The speaker in the generated image matches the original design, shape, and color, ensuring it’s recognizable as the same product.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0003.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fourth output images of the response provided by a student. The task objective is to generate different application scenario images of a speicific product.\nThe text requirement is:\nPlease generate a series of images based on the following text prompt, showcasing the use of this speaker in different application scenarios. A total of four images need to be generated, each representing a different scene. The first image shows the speaker on a modern-style office desk indoors, with a computer in the background, sunlight streaming through the window illuminating the desk, and the black speaker contrasting with the white and wooden elements of the desk. The second image shows the speaker on the armrest of a soft sofa, with fine fabric textures on the sofa, a warm table lamp nearby, creating a cozy and relaxed atmosphere. The third image shows the speaker outdoors on a tree stump, with sunlight filtering through the leaves in the background, a backpack placed next to the speaker, creating a natural and lively atmosphere. The fourth image shows the speaker placed on an outdoor wooden table, with a smartphone next to it playing music, autumn leaves scattered on the table, blending nature with technology.\nYour review question is:\nDo the third and fourth output images accurately depict the unique details described in the prompt, such as the tree stump, outdoor setting with a backpack, and autumn leaves? 0 points: The scenes lack the specific details or elements described, making it difficult to identify the setting as per the prompt. Check the prompts for the third and fourth generated images sentence by sentence, and if there are any discrepancies, score zero. 1 point: The scenes accurately incorporate the specified details, such as outdoor elements, making the setting clear and aligned with the description.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth output images of the response provided by a student. The task objective is to generate different application scenario images of a speicific product.\nThe text requirement is:\nPlease generate a series of images based on the following text prompt, showcasing the use of this speaker in different application scenarios. A total of four images need to be generated, each representing a different scene. The first image shows the speaker on a modern-style office desk indoors, with a computer in the background, sunlight streaming through the window illuminating the desk, and the black speaker contrasting with the white and wooden elements of the desk. The second image shows the speaker on the armrest of a soft sofa, with fine fabric textures on the sofa, a warm table lamp nearby, creating a cozy and relaxed atmosphere. The third image shows the speaker outdoors on a tree stump, with sunlight filtering through the leaves in the background, a backpack placed next to the speaker, creating a natural and lively atmosphere. The fourth image shows the speaker placed on an outdoor wooden table, with a smartphone next to it playing music, autumn leaves scattered on the table, blending nature with technology.\nYour review question is:\nDo the second and fourth output images maintain a consistent realistic photography style, with appropriate lighting, textures, and rendering quality? 0 points: The style between these images differs noticeably, reducing the cohesion of the series. 1 point: Both images exhibit a consistent realistic photography style, with coherent lighting, textures, and rendering that enhance continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and second output images of the response provided by a student. The task objective is to generate different application scenario images of a speicific product.\nThe text requirement is:\nPlease generate a series of images based on the following text prompt, showcasing the use of this speaker in different application scenarios. A total of four images need to be generated, each representing a different scene. The first image shows the speaker on a modern-style office desk indoors, with a computer in the background, sunlight streaming through the window illuminating the desk, and the black speaker contrasting with the white and wooden elements of the desk. The second image shows the speaker on the armrest of a soft sofa, with fine fabric textures on the sofa, a warm table lamp nearby, creating a cozy and relaxed atmosphere. The third image shows the speaker outdoors on a tree stump, with sunlight filtering through the leaves in the background, a backpack placed next to the speaker, creating a natural and lively atmosphere. The fourth image shows the speaker placed on an outdoor wooden table, with a smartphone next to it playing music, autumn leaves scattered on the table, blending nature with technology.\nYour review question is:\nIs the lighting and atmosphere in the first and second output images aligned with the description, such as sunlight streaming through a window for a bright office setting, and warm lamp lighting for a cozy living room ambiance? 0 points: The lighting and atmosphere do not match the prompt’s descriptions, creating a dissonant feel in each setting. 1 point: The lighting and atmosphere align well with the descriptions, with sunlight and warm lighting enhancing the ambiance in each respective setting.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third and fourth output images of the response provided by a student. The task objective is to generate different application scenario images of a speicific product.\nThe text requirement is:\nPlease generate a series of images based on the following text prompt, showcasing the use of this speaker in different application scenarios. A total of four images need to be generated, each representing a different scene. The first image shows the speaker on a modern-style office desk indoors, with a computer in the background, sunlight streaming through the window illuminating the desk, and the black speaker contrasting with the white and wooden elements of the desk. The second image shows the speaker on the armrest of a soft sofa, with fine fabric textures on the sofa, a warm table lamp nearby, creating a cozy and relaxed atmosphere. The third image shows the speaker outdoors on a tree stump, with sunlight filtering through the leaves in the background, a backpack placed next to the speaker, creating a natural and lively atmosphere. The fourth image shows the speaker placed on an outdoor wooden table, with a smartphone next to it playing music, autumn leaves scattered on the table, blending nature with technology.\nYour review question is:\nDo the third and fourth output images convey a clear sense of the speaker being used in suitable environments (e.g., outdoor and nature-friendly setups), as intended in the prompt? 0 points: The context does not convincingly represent suitable use environments, making the scenes feel out of place. 1 point: The scenes appropriately match the outdoor and natural settings, making the speaker’s use feel relevant and purposeful in the context.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/product_usage_scenario_generation_0001/eval.json b/dataset/product_usage_scenario_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..a380cf3ff52e0b59cbf482001e0cbaf6d362c408
--- /dev/null
+++ b/dataset/product_usage_scenario_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "Does the speaker in the first output image match the design, shape, and color of the original speaker image to ensure product consistency?",
+            "0_point_standard": "The speaker appears different from the original image, with noticeable inconsistencies in design or color.",
+            "1_point_standard": "The speaker in the generated image matches the original design, shape, and color, ensuring it can be recognized as the same product."
+        },
+        {
+            "question": "Do the third and fourth output images accurately depict the unique details described in the prompt, such as a tree stump, outdoor scene with a backpack, and autumn leaves?",
+            "0_point_standard": "The scene lacks specific details or elements, making it difficult to identify the setting as described in the prompt. Check each sentence of the prompt for the third and fourth generated images, and score zero if any discrepancies are found.",
+            "1_point_standard": "The scene accurately includes specified details, such as outdoor elements, making the setting clear and aligned with the description."
+        },
+        {
+            "question": "Do the second and fourth output images maintain a consistent realistic photographic style with appropriate lighting, texture, and rendering quality?",
+            "0_point_standard": "There are noticeable style differences between these images, reducing the coherence of the series.",
+            "1_point_standard": "Both images exhibit a consistent realistic photographic style, with coherent lighting, texture, and rendering that enhance continuity."
+        },
+        {
+            "question": "Do the lighting and atmosphere in the first and second output images align with the descriptions, such as a bright office scene with sunlight streaming through a window and a cozy living room ambiance created by warm lighting?",
+            "0_point_standard": "The lighting and atmosphere do not align with the prompt descriptions, leading to a discordant feel in each scene.",
+            "1_point_standard": "The lighting and atmosphere match the descriptions well, with sunlight and warm lighting enhancing the ambiance of each respective scene."
+        },
+        {
+            "question": "Do the third and fourth output images clearly convey the feeling of the speaker being used in appropriate environments (e.g., outdoor and nature-friendly scenes) in line with the prompt's setting?",
+            "0_point_standard": "The scenes fail to convincingly portray a suitable usage environment, making the setting appear inappropriate.",
+            "1_point_standard": "The scenes appropriately match outdoor and nature settings, making the use of the speaker in the background feel natural and contextually fitting."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/product_usage_scenario_generation_0001/images.txt b/dataset/product_usage_scenario_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a7e7fca86813e5101ad75fb6794ebe27b4eaa90
--- /dev/null
+++ b/dataset/product_usage_scenario_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01mtDB9v1BviC0OUCmF_!!6000000000008-0-tps-2800-1575.jpg
diff --git a/dataset/product_usage_scenario_generation_0001/instruction.txt b/dataset/product_usage_scenario_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c591a2273009fbf89232a9ef81e47fbd224d261a
--- /dev/null
+++ b/dataset/product_usage_scenario_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a series of images based on the following text prompt, showcasing the use of this speaker in different application scenarios. A total of four images need to be generated, each representing a different scene. The first image shows the speaker on a modern-style office desk indoors, with a computer in the background, sunlight streaming through the window illuminating the desk, and the black speaker contrasting with the white and wooden elements of the desk. The second image shows the speaker on the armrest of a soft sofa, with fine fabric textures on the sofa, a warm table lamp nearby, creating a cozy and relaxed atmosphere. The third image shows the speaker outdoors on a tree stump, with sunlight filtering through the leaves in the background, a backpack placed next to the speaker, creating a natural and lively atmosphere. The fourth image shows the speaker placed on an outdoor wooden table, with a smartphone next to it playing music, autumn leaves scattered on the table, blending nature with technology.
\ No newline at end of file
diff --git a/dataset/product_usage_scenario_generation_0001/meta.json b/dataset/product_usage_scenario_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..a47f03de4371b2d97b8dc46fc709ee404b984137
--- /dev/null
+++ b/dataset/product_usage_scenario_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "product usage scenario generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0050",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/eval.json b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ce16761dd9bbbff007025e6895d4bdf08d0955c
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image contain the anime character from the input image, retaining its character identity?",
+            "0_point_standard": "The anime character is missing, or the character identity has significantly changed, making it unrecognizable.",
+            "1_point_standard": "The anime character is present and retains its character identity without significant changes."
+        },
+        {
+            "question": "Is the real-world background from the input image completely retained without significant alteration or removal of key elements?",
+            "0_point_standard": "The real-world background has been altered, removed, or significantly modified, disrupting the original scene.",
+            "1_point_standard": "The real-world background remains unchanged, preserving all original elements and details."
+        },
+        {
+            "question": "Is the anime character naturally integrated into the specified position or context in the real-world photo (e.g., standing, sitting, interacting with specific elements)?",
+            "0_point_standard": "The anime character is not correctly positioned or does not interact as described, disrupting the task context.",
+            "1_point_standard": "The anime character is correctly positioned and interacts with specific elements in the real-world photo as expected."
+        },
+        {
+            "question": "Are the lighting and shadows of the anime character consistent with the real environment in the photo?",
+            "0_point_standard": "The lighting or shadows of the anime character are inconsistent with the direction, intensity, or tone of the real environment, making the integration appear unnatural.",
+            "1_point_standard": "The lighting and shadows of the anime character seamlessly match the real environment, presenting a natural appearance."
+        },
+        {
+            "question": "Are the resolution, texture, and details of the anime character visually consistent with the real-world photo, avoiding sharpness or integration issues?",
+            "0_point_standard": "The resolution, texture, or details of the anime character conflict with the real-world photo, resulting in inconsistent and unbalanced visuals.",
+            "1_point_standard": "The resolution, texture, and details of the anime character are visually consistent with the real-world photo, enhancing the realism of the integration."
+        },
+        {
+            "question": "Does the overall composition maintain a harmonious balance between the stylized appearance of the anime character and the real-world photo, avoiding any elements that are too jarring?",
+            "0_point_standard": "The stylized appearance of the anime character conflicts with the real-world photo, disrupting the overall harmony and making the integration appear awkward.",
+            "1_point_standard": "The stylized appearance of the anime character is balanced with the real-world photo, presenting a unified and visually harmonious composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/images.txt b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c2f6cb4d61a348b1956284c7d4ea620891605d2
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i2/O1CN01IBRdlT1WjRVs8GCrD_!!6000000002824-0-tps-480-853.jpg
+https://img.alicdn.com/imgextra/i4/O1CN0102f1DG1PU8yOhVwMO_!!6000000001843-0-tps-736-1104.jpg
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/instruction.txt b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2643e13e582a9cab1bac5751ca322cff9febccc
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate an image that naturally integrates the anime character from the second image into the real-world photo in the first image. The anime character should sit on the bench in the first image, maintaining their pose of drinking a beverage while retaining their anime style and distinctive visual features, such as hairstyle, accessories, outfit design, and line-drawn rendering style. The character’s colors and shading should remain unchanged and consistent with the original. The autumn setting with fallen leaves and the realistic texture of the bench in the background should remain intact, ensuring a natural and visually coherent integration.
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/meta.json b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..aec8f410b7df071272da8b73c55d5668866d493f
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "anime character in real world",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0098",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/eval.json b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..8e15933a851883a09560ac10066f8ac9ceeae9e6
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image contain the anime character from the input image, and retain its character identity?",
+            "0_point_standard": "The anime character is missing, or the character's identity has significantly changed, making it unrecognizable.",
+            "1_point_standard": "The anime character is present and its identity is retained without significant changes."
+        },
+        {
+            "question": "Is the real-world background in the input image completely preserved, without significant alterations or removal of key elements?",
+            "0_point_standard": "The real-world background has been altered, removed, or significantly modified, disrupting the original scene.",
+            "1_point_standard": "The real-world background remains completely unchanged, retaining all original elements and details."
+        },
+        {
+            "question": "Is the anime character naturally integrated into the specified location or context within the real-world photo (e.g., standing, sitting, interacting with specific elements)?",
+            "0_point_standard": "The anime character is not correctly positioned or does not interact as described, disrupting the task context.",
+            "1_point_standard": "The anime character is correctly positioned and interacts as expected with specific elements in the real-world photo."
+        },
+        {
+            "question": "Are the lighting and shadows of the anime character consistent with the real environment in the photo?",
+            "0_point_standard": "The lighting or shadows of the anime character are inconsistent with the direction, intensity, or tone of the real environment, making the integration appear unnatural.",
+            "1_point_standard": "The lighting and shadows of the anime character seamlessly match the real environment, presenting a natural appearance."
+        },
+        {
+            "question": "Is the resolution, texture, and detail of the anime character visually consistent with the real-world photo, avoiding sharpness or blending issues?",
+            "0_point_standard": "The resolution, texture, or detail of the anime character conflicts with the real-world photo, resulting in inconsistent and unbalanced visual effects.",
+            "1_point_standard": "The resolution, texture, and detail of the anime character are visually consistent with the real-world photo, enhancing the realism of the integration."
+        },
+        {
+            "question": "Does the overall composition maintain a harmonious balance between the stylized appearance of the anime character and the real-world photo, avoiding any element being too jarring?",
+            "0_point_standard": "The stylized appearance of the anime character conflicts with the real-world photo, disrupting overall harmony and making the integration appear awkward.",
+            "1_point_standard": "A balance is maintained between the stylized appearance of the anime character and the real-world photo, presenting a unified and visually harmonious composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/images.txt b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..432068a598bc53a9b46e0e33afc2eaa039bf5df1
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i4/O1CN01seiINs1vulhbjm4xd_!!6000000006233-0-tps-736-1102.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01pBy5Ke1J4bBhrYx7I_!!6000000000975-0-tps-626-626.jpg
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/instruction.txt b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca3dc8fcd2f1ecc7ef76e03354834c7bd6348831
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate an image that naturally integrates the cartoon dog character from the second image into the real-world scene of the first photo. Keep the background of the first photo completely unchanged, while maintaining the cartoon dog’s original style and appearance, including sunglasses and its smiling expression. Specifically, the cartoon dog should be sliding down the slide in the photo, appearing joyful and full of energy. Ensure that the cartoon dog’s drawing style remains consistent, with unchanged colors and character identity. Make sure the final image presents a seamless blend, with subtle adjustments to enhance overall realism.
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/meta.json b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1b5b48970574bed48714709b99ebc40ed5a2883
--- /dev/null
+++ b/dataset/real_and_anime_interaction_anime_character_in_real_world_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "anime character in real world",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0098",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_mixed_portrait_0001/eval.json b/dataset/real_and_anime_interaction_mixed_portrait_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..3fb78bc0e47cb7c8e56fefeba621974d146dec7d
--- /dev/null
+++ b/dataset/real_and_anime_interaction_mixed_portrait_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image accurately apply the specified anime-realism hybrid effect within the designated area (e.g., the left half of the character, clothing, or specific features), while preserving the other parts of the image as instructed?",
+            "0_point_standard": "The hybrid effect is not applied in the correct area, or other unspecified parts of the image have been altered.",
+            "1_point_standard": "The hybrid effect is accurately applied in the specified area, and other parts remain unchanged as instructed."
+        },
+        {
+            "question": "Is the anime style applied in the output image consistent with the reference anime style (e.g., line quality, shading, and color scheme)?",
+            "0_point_standard": "There are significant deviations in the anime style from the reference style, leading to inconsistency or mismatch of elements.",
+            "1_point_standard": "The anime style closely matches the reference style, maintaining consistency in lines, shading, and colors."
+        },
+        {
+            "question": "Is the boundary between the anime and realistic parts naturally transitioned, presenting a coherent visual effect?",
+            "0_point_standard": "The transition between the anime and realistic parts is harsh or poorly blended, making the transition appear unnatural.",
+            "1_point_standard": "The boundary transitions smoothly and blends well, ensuring a natural integration of anime and realistic parts."
+        },
+        {
+            "question": "Do the added anime-style decorative elements (e.g., stars, ribbons, light effects) harmoniously integrate into the image, enhancing aesthetic appeal without disrupting the overall realistic effect?",
+            "0_point_standard": "Decorative elements are missing, poorly blended, or disrupt the overall harmony of the image.",
+            "1_point_standard": "Decorative elements are well integrated, enhancing the anime-realism hybrid effect while maintaining the original aesthetics."
+        },
+        {
+            "question": "Do the transformed anime parts (e.g., clothing, hair, or specific features) remain consistent with the original pose, texture, and identity of the character in the image?",
+            "0_point_standard": "The transformed parts are disconnected from the character's original pose, texture, or identity, leading to a lack of coherence.",
+            "1_point_standard": "The transformed parts are consistent with the character's pose, texture, and identity, ensuring a coherent and authentic blending effect."
+        },
+        {
+            "question": "Does the overall composition achieve a visually compelling balance between anime and realistic styles while maintaining the artistic goals of the task?",
+            "0_point_standard": "The image appears visually unbalanced, with either anime or realistic style being overly dominant or conflicting.",
+            "1_point_standard": "The image achieves a balance between anime and realistic styles that is visually appealing and aligns with the artistic goals of the task."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_mixed_portrait_0001/images.txt b/dataset/real_and_anime_interaction_mixed_portrait_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cfd4b08d3791aa692238871a08ab13c48272af4
--- /dev/null
+++ b/dataset/real_and_anime_interaction_mixed_portrait_0001/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i4/O1CN01wmTuME1MmyPPYaTY4_!!6000000001478-0-tps-736-981.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01D23qJC1UdN1rT4Ajz_!!6000000002540-0-tps-736-1308.jpg
diff --git a/dataset/real_and_anime_interaction_mixed_portrait_0001/instruction.txt b/dataset/real_and_anime_interaction_mixed_portrait_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..849da7b825feb215584a9489a03375769ca014bd
--- /dev/null
+++ b/dataset/real_and_anime_interaction_mixed_portrait_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate an image where the character in the second picture is a blend of anime and realistic styles. Specifically, retain the left half of the character in its normal realistic style, and transform the right half into an anime style. Additionally, add some soft anime decorative elements (such as small stars or color stripes) throughout the image to enhance its overall harmony. Ensure the boundary between the anime and realistic parts transitions naturally, and keep the anime section’s style consistent with the anime style in the first image.
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_mixed_portrait_0001/meta.json b/dataset/real_and_anime_interaction_mixed_portrait_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..a00584b65f51123e87a68ef84c2fda0dacc401d7
--- /dev/null
+++ b/dataset/real_and_anime_interaction_mixed_portrait_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "mixed portrait",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0099",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/eval.json b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a706bd586b62676562058a7c3cd0370a47086b9
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image accurately position the real-life character in the anime-style background location specified in the text prompt?",
+            "0_point_standard": "The real-life character is not positioned in the specified location within the anime-style background, or the positioning is inaccurate.",
+            "1_point_standard": "The real-life character is correctly positioned in the specified location within the anime-style background."
+        },
+        {
+            "question": "Is the anime-style background completely preserved without any unintended alterations to its elements or visual composition?",
+            "0_point_standard": "The anime-style background has noticeable alterations or disruptions, deviating from the original input image.",
+            "1_point_standard": "The anime-style background is completely preserved as specified, with no unintended changes to its elements or composition."
+        },
+        {
+            "question": "Does the real-life character seamlessly blend into the anime-style background in terms of lighting, shadow alignment, and spatial consistency?",
+            "0_point_standard": "The real-life character blends poorly with the anime-style background, with inconsistencies in lighting, shadow, or spatial alignment.",
+            "1_point_standard": "The real-life character naturally integrates into the anime-style background, with good alignment of lighting, shadows, and spatial consistency."
+        },
+        {
+            "question": "Does the real-life character retain its realistic photographic style while maintaining identity consistency (such as facial features, pose, and clothing) with the prompt description?",
+            "0_point_standard": "The realistic style of the real-life character is altered, or its identity features (such as facial features, pose, or clothing) are inconsistent with the original input image.",
+            "1_point_standard": "The real-life character retains its realistic style and identity features, with all defining characteristics consistent with the original image."
+        },
+        {
+            "question": "Is the overall composition visually harmonious, with the real-life character naturally balanced with the anime-style background, avoiding visual conflicts?",
+            "0_point_standard": "The overall composition is visually unbalanced, with the real-life character conflicting with or appearing out of place in the anime-style background.",
+            "1_point_standard": "The overall composition is harmonious, with the real-life character complementing the anime-style background, resulting in a visually pleasing effect."
+        },
+        {
+            "question": "Do adjustments to the real-life character's pose or expression enhance its natural integration into the anime-style scene?",
+            "0_point_standard": "Adjustments to the real-life character's pose or expression appear unnatural, failing to enhance its integration with the scene.",
+            "1_point_standard": "Adjustments to the real-life character's pose or expression enhance its natural fit with the anime-style background, improving the scene's overall coherence."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/images.txt b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c5a247f060c093d527a8925a83d7ca36ae34cd3
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i3/O1CN01c5IRHz1dPvx9WMrBh_!!6000000003729-0-tps-736-1308.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01zfwcrl1g2WYa1cwzl_!!6000000004084-0-tps-736-920.jpg
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/instruction.txt b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e60871c8157e8c85753bd07096cb11f975790fe
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate an image that seamlessly integrates the realistic character from the second image into the anime-style background of the first image. The character should be standing on the grassy field in the first image, blending naturally with the scene. The anime-style background must remain unchanged, while the realistic features, clothing, and pose of the character must match the original photo. Adjustments to the pose or expression are allowed to ensure a natural fit, but the character’s identity (ID) must remain consistent with the original.
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/meta.json b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e65848b1170517afab994f5f00fafdce7448a70
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "real person in anime background",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0100",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/eval.json b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2a826eb2551d32178043f68db4bdf6f2dfa154e1
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image accurately position real-life characters in the anime-style background location specified in the text prompt?",
+            "0_point_standard": "The real-life characters are not positioned accurately in the specified location within the anime-style background, or the positioning is inaccurate.",
+            "1_point_standard": "The real-life characters are correctly positioned in the specified location within the anime-style background."
+        },
+        {
+            "question": "Is the anime-style background completely preserved without unexpected changes to its elements or visual composition?",
+            "0_point_standard": "There are noticeable changes or disruptions to the anime-style background, deviating from the original input image.",
+            "1_point_standard": "The anime-style background is fully preserved as specified, without unexpected changes to its elements or composition."
+        },
+        {
+            "question": "Do the real-life characters seamlessly integrate with the anime-style background in terms of lighting, shadow alignment, and spatial consistency?",
+            "0_point_standard": "The integration of real-life characters with the anime-style background is poor, with inconsistent lighting, shadows, or spatial alignment.",
+            "1_point_standard": "The real-life characters are naturally integrated into the anime-style background with good alignment of lighting, shadows, and spatial consistency."
+        },
+        {
+            "question": "Do the real-life characters maintain their realistic photographic style while remaining consistent in identity (such as facial features, posture, and clothing) as described in the prompt?",
+            "0_point_standard": "The realistic style of the real-life characters has been altered, or their identity features (such as facial features, posture, or clothing) are inconsistent with the original input image.",
+            "1_point_standard": "The real-life characters retain their realistic style and identity features, with all defining characteristics consistent with the original image."
+        },
+        {
+            "question": "Is the overall composition visually harmonious, with real-life characters naturally balanced with the anime-style background, avoiding visual conflict?",
+            "0_point_standard": "The overall composition is visually unbalanced, with the real-life characters conflicting with or appearing incongruous with the anime-style background.",
+            "1_point_standard": "The overall composition is harmonious, with the real-life characters complementing the anime-style background, resulting in a pleasing visual effect."
+        },
+        {
+            "question": "Do adjustments to the real-life characters' posture or expression enhance their natural integration into the anime-style scene?",
+            "0_point_standard": "Adjustments to the real-life characters' posture or expression appear unnatural, failing to enhance their integration with the scene.",
+            "1_point_standard": "Adjustments to the real-life characters' posture or expression improve their natural fit with the anime-style background, enhancing the overall coherence of the scene."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/images.txt b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7714c8224ea5ea18fec0292b5204f1323847f8f
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i1/O1CN012zebG823isLEUjtNc_!!6000000007290-0-tps-736-1472.jpg
+https://img.alicdn.com/imgextra/i4/O1CN01vUe3hk1suMVEtFx0e_!!6000000005826-0-tps-736-981.jpg
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/instruction.txt b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e023a3266c78c87d0dc01faa423bf06f1452366
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/instruction.txt
@@ -0,0 +1 @@
+Generate an image that naturally integrates the real-life boy from the second image into the anime-style background of the first image. Keep the anime background of the first image unchanged and place the boy in the center of the basketball court, showing him in a position preparing to shoot a basketball, holding the ball in his hands and focusing on the hoop. Ensure the boy retains the realistic photographic style and clear details, with his ID (e.g., body type, facial features) consistent with the original photo. The generated image should display a natural blend of the boy with the anime-style background, maintaining overall visual harmony.
\ No newline at end of file
diff --git a/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/meta.json b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..74f1d011c2407c4c7a01890757b28c788b4cd429
--- /dev/null
+++ b/dataset/real_and_anime_interaction_real_person_in_anime_background_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "real person in anime background",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0100",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/same_pose_generation_0001/eval.json b/dataset/same_pose_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f67fcd11a5a6361f9cf17ea432e61685a09d219
--- /dev/null
+++ b/dataset/same_pose_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Do the generated images ensure that each character maintains a strong visual connection to the original character definition sheet?",
+            "0_point_standard": "The characters in the generated images do not resemble the original characters in terms of key features or identifying elements.",
+            "1_point_standard": "The characters in the generated images closely match the distinct features and style of the original character definition sheet."
+        },
+        {
+            "question": "Are the poses of the characters in all generated images consistent with those specified in the task?",
+            "0_point_standard": "Characters are depicted in different poses, failing to adhere to the consistent pose requirement.",
+            "1_point_standard": "All characters are depicted in the same specified pose, ensuring consistency across images."
+        },
+        {
+            "question": "Does the model accurately follow the specific instructions regarding character attributes (e.g., clothing or accessories) from the text description?",
+            "0_point_standard": "The model fails to incorporate the specified attributes from the text description, or the attributes are inaccurately represented.",
+            "1_point_standard": "The model accurately integrates the specified attributes from the text description into each character image."
+        },
+        {
+            "question": "Are any parts of the character images unnecessarily altered or distorted beyond the specified modifications?",
+            "0_point_standard": "Unnecessary changes or distortions exist, affecting parts of the characters that were not intended to be modified.",
+            "1_point_standard": "Only the specified modifications were made, with no unnecessary changes to other parts of the images."
+        },
+        {
+            "question": "Do the generated images maintain a high level of aesthetic quality and visual appeal?",
+            "0_point_standard": "The character images lack visual appeal and are of poor quality in detail and composition.",
+            "1_point_standard": "The character images are visually appealing, with high-quality details and pleasing composition."
+        },
+        {
+            "question": "Are the character images stylistically consistent with each other, forming a coherent and unified set?",
+            "0_point_standard": "There are noticeable stylistic inconsistencies between the character images, disrupting the visual harmony of the set.",
+            "1_point_standard": "The character images are stylistically consistent, creating a cohesive and unified visual presentation."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/same_pose_generation_0001/images.txt b/dataset/same_pose_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4485cde396dba08d50231103569f694f9b5da2d
--- /dev/null
+++ b/dataset/same_pose_generation_0001/images.txt
@@ -0,0 +1,4 @@
+https://img.alicdn.com/imgextra/i2/O1CN018Iobi81dfVQ1i7UaO_!!6000000003763-0-tps-1920-1920.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01xjbPAu1iSkSws67Bi_!!6000000004412-0-tps-1920-1920.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01eihKzU275iwsHwnEj_!!6000000007746-0-tps-1920-1920.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01KBgcKA1yeEFCtsDjC_!!6000000006603-0-tps-1920-1920.jpg
diff --git a/dataset/same_pose_generation_0001/instruction.txt b/dataset/same_pose_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4793499869d22273f9117235c7d64dc4b7c4d052
--- /dev/null
+++ b/dataset/same_pose_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Input these four images of the characters and generate 4 new images, where each image corresponds to one character from the input, with all characters performing the same specified action. The specified action is “waving.” In each image, the character should perform the waving gesture, and their appearance should remain consistent with the input image. Adjustments to items held by the characters can be made as needed, and the background can be modified, but this should not affect the overall composition. The final output should be 4 images, each featuring one of the original characters, all performing the waving gesture.
\ No newline at end of file
diff --git a/dataset/same_pose_generation_0001/meta.json b/dataset/same_pose_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ac588be0bc376a05948a57f9d03092f30860f5d2
--- /dev/null
+++ b/dataset/same_pose_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "dynamic characters with same pose",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": true,
+    "uid": "0034",
+    "output_image_count": 4,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/sculpture_generation_0001/eval.json b/dataset/sculpture_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad88837ff96f7bb9f3b10455919d84a40cf30cca
--- /dev/null
+++ b/dataset/sculpture_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sculpture form match the text description, and is the overall structure and posture accurate?",
+            "0_point_standard": "The sculpture form does not match the description, and there are significant deviations or deficiencies in the structure or posture.",
+            "1_point_standard": "The sculpture form matches the description, and the structure and posture are accurate."
+        },
+        {
+            "question": "Does the generated sculpture image have a clear 3D effect and meet the spatial requirements of a sculpture?",
+            "0_point_standard": "The image lacks a 3D effect, has insufficient spatial depth, and appears flat.",
+            "1_point_standard": "The image has a strong 3D effect, showing depth and meeting the spatial requirements of a sculpture."
+        },
+        {
+            "question": "Does the texture representation of the sculpture match the material requirements described in the text (e.g., smooth, rough, metallic)?",
+            "0_point_standard": "The texture does not match the text description, the detail is poor, and it lacks material realism.",
+            "1_point_standard": "The texture matches the text description, is rich in detail, and the material texture is realistic."
+        },
+        {
+            "question": "Has the model accurately implemented the specific details pointed out in the text (e.g., texture, ornaments, or specific decorative elements)?",
+            "0_point_standard": "The image lacks or misunderstands the specified details in the text, resulting in inaccurate presentation.",
+            "1_point_standard": "The image accurately presents all specified details, with fine and natural design."
+        },
+        {
+            "question": "Does the style and visual effect of the sculpture match the description in the text (e.g., modern, classical, abstract)?",
+            "0_point_standard": "The style significantly deviates from the text description and fails to convey the specified style.",
+            "1_point_standard": "The sculpture style matches the text description, and the visual effect meets expectations."
+        },
+        {
+            "question": "Does the overall aesthetic quality of the sculpture image reach a professional level of sculpture design with strong visual impact?",
+            "0_point_standard": "The sculpture image lacks aesthetic appeal, has insufficient visual impact, and lacks design sense.",
+            "1_point_standard": "The sculpture image has excellent aesthetic quality, strong visual impact, and reaches a professional design level."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/sculpture_generation_0001/images.txt b/dataset/sculpture_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/sculpture_generation_0001/instruction.txt b/dataset/sculpture_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d9088e97a74edcde8d02eb4d0c0990d99e536b2
--- /dev/null
+++ b/dataset/sculpture_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This image features an abstract sculpture with a figure playing the violin, characterized by a sense of fluidity and surreal distortion. The figure's head, shoulders, and arms retain relatively realistic details, while the torso and legs stretch and twist in an exaggerated, flowing manner, giving the impression of movement and transformation. The figure's right shoulder holds a violin, with both hands elegantly positioned in a playing posture, creating an artistic and focused expression. The legs are elongated and twisted in an extreme, irregular way, especially around the knees and ankles, adding a sense of weightlessness and floating. The figure stands on an irregular base that resembles a smooth rock or abstract wave-like structure, with a hollow section, enhancing the dynamic and mysterious nature of the piece. The overall tone is a gray-black color, with the surface appearing smooth like metal, reflecting subtle highlights. The background is pure black, emphasizing the sculpture's contours and three-dimensionality.
\ No newline at end of file
diff --git a/dataset/sculpture_generation_0001/meta.json b/dataset/sculpture_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..34b462e3d3dbf7e83551d50a11257bdd1281b1b5
--- /dev/null
+++ b/dataset/sculpture_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "sculpture generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0029",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/sculpture_generation_0003/eval.json b/dataset/sculpture_generation_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d7ee913c3e2578842e33b64b6d30adb217fbf45
--- /dev/null
+++ b/dataset/sculpture_generation_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the sculpture match the textual description, and is the overall structure and posture accurate?",
+            "0_point_standard": "The sculpture does not match the description, with obvious deviations or deficiencies in structure or posture.",
+            "1_point_standard": "The sculpture matches the description, with accurate structure and posture."
+        },
+        {
+            "question": "Does the generated sculpture image have a clear 3D effect that meets the spatial requirements of a sculpture?",
+            "0_point_standard": "The image lacks a 3D effect, insufficient spatial depth, and appears flat.",
+            "1_point_standard": "The image has a strong 3D effect, showing depth and meeting the spatial requirements of a sculpture."
+        },
+        {
+            "question": "Does the texture representation of the sculpture match the material requirements described in the text (e.g., smooth, rough, metallic)?",
+            "0_point_standard": "The texture does not match the text description, with poor detail expression and a lack of material realism.",
+            "1_point_standard": "The texture matches the text description, with rich details and realistic material texture."
+        },
+        {
+            "question": "Does the model accurately implement the specific details pointed out in the text (e.g., texture, ornaments, or specific decorative elements)?",
+            "0_point_standard": "The image lacks or misunderstands the specified details in the text, with inaccurate presentation.",
+            "1_point_standard": "The image accurately presents all specified details, with intricate and natural design."
+        },
+        {
+            "question": "Does the style and visual effect of the sculpture match the description in the text (e.g., modern, classical, abstract)?",
+            "0_point_standard": "The style significantly deviates from the text description, failing to convey the specified style.",
+            "1_point_standard": "The sculpture's style matches the text description, achieving the expected visual effect."
+        },
+        {
+            "question": "Does the overall aesthetic quality of the sculpture image reach the professional standard of sculpture design, with strong visual impact?",
+            "0_point_standard": "The sculpture image lacks aesthetic appeal, insufficient visual impact, and lacks design sense.",
+            "1_point_standard": "The sculpture image has excellent aesthetic quality, strong visual impact, reaching professional design standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/sculpture_generation_0003/images.txt b/dataset/sculpture_generation_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/sculpture_generation_0003/instruction.txt b/dataset/sculpture_generation_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5be2db004f67d33a5c9c9d3579f0fe9e6a35c80c
--- /dev/null
+++ b/dataset/sculpture_generation_0003/instruction.txt
@@ -0,0 +1 @@
+This image features a highly futuristic and mechanical-themed sculpture, depicting a knight and his mount, both exuding a strong sci-fi aesthetic. The knight is fully armored, with only his face exposed, sporting a long beard and a stern expression. His armor is adorned with intricate mechanical details, including bolts and seams across the shoulders, chest, and legs, all crafted from metal, emphasizing a sense of strength and durability. The knight grips a massive, curved blade in both hands; the blade is wide, and the hilt is equipped with mechanical components, blending traditional weaponry with futuristic design. The knight's mount is a mechanical creature, resembling a hybrid between a wolf and a horse. Its limbs are constructed with mechanical parts, with joints and feet displaying gears, screws, and metallic plates, showcasing a highly detailed technological design. The mount's head resembles that of a wolf, with an open mouth, ready to strike at any moment. Its tail is long and thin, hanging down like a whip. The entire sculpture is rendered in a dark metallic gray, reflecting subtle highlights that enhance the fusion of futuristic technology and mechanical elements. The sculpture is mounted on a simple rectangular platform, dark in color, providing a stark contrast to the intricacy and striking presence of the knight and his mechanical beast.
\ No newline at end of file
diff --git a/dataset/sculpture_generation_0003/meta.json b/dataset/sculpture_generation_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..3def2d6338ba98e917fbb0b61181867956ef056b
--- /dev/null
+++ b/dataset/sculpture_generation_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "sculpture generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0029",
+    "output_image_count": 1,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/special_effect_adding_0001/auto_eval.jsonl b/dataset/special_effect_adding_0001/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..c685ca0de41b90d07dba105547c9dd567cca1203
--- /dev/null
+++ b/dataset/special_effect_adding_0001/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": ["0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second input image and output image of the response provided by a student. The task objective is to add a special effect to the third input image, similar to the effect change from the first input image to the second input image.. \nThe text requirement is:\nThe second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.\nYour review question is:\nDoes the output image feature a circular background element similar to the second input image? Score 0: The output image does not feature a circular background element or has a background shape that significantly deviates from the circular structure in the second input image. Score 1: The output image includes a circular background element positioned behind the subject’s head, similar to the visual structure in the second input image. The circular element complements the subject’s position and enhances the composition.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second input image and output image of the response provided by a student. The task objective is to add a special effect to the third input image, similar to the effect change from the first input image to the second input image.. \nThe text requirement is:\nThe second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.\nYour review question is:\nIs the visual style of the output image consistent with the stylized effect in the second input image? Score 0: The output image lacks the stylized, graphic quality observed in the second input image, either retaining too much photographic detail or applying an inconsistent visual effect. Score 1: The output image adopts a graphic, simplified color style that resembles the stylized effect applied in the second input image, with bold, flat color areas and minimal shading.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third input image and output image of the response provided by a student. The task objective is to add a special effect to the third input image, similar to the effect change from the first input image to the second input image.. \nThe text requirement is:\nThe second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.\nYour review question is:\nDoes the output image accurately depict the same subject as the third input image, retaining key facial features, pose, and expression? Score 0: The subject in the output image does not resemble the subject in the third input image, with alterations in facial features, pose, or expression that compromise the subject’s identity. Score 1: The output image clearly represents the same subject as the third input image, with preserved facial features, pose, and expression. The stylized effect does not obscure the identity of the subject.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0003.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third input image and output image of the response provided by a student. The task objective is to add a special effect to the third input image, similar to the effect change from the first input image to the second input image.. \nThe text requirement is:\nThe second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.\nYour review question is:\nDoes the output image maintain the hair and clothing details consistent with the third input image? Score 0: The hairstyle or clothing in the output image differs noticeably from the third input image, resulting in a loss of detail or inconsistency with the subject’s original appearance. Score 1: The hairstyle and clothing details in the output image closely match those in the third input image, with no significant alterations due to the applied effect.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second input image and output image of the response provided by a student. The task objective is to add a special effect to the third input image, similar to the effect change from the first input image to the second input image.. \nThe text requirement is:\nThe second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.\nYour review question is:\nIs the overall color contrast between the subject and the background consistent with the second input image? Score 0: The color contrast between the subject and the background in the output image is too weak or too strong, failing to achieve the balanced separation observed in the second input image. Score 1: The color contrast in the output image between the subject and the background is similar to that in the second input image, allowing the subject to stand out effectively against the background.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0002.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second input image and output image of the response provided by a student. The task objective is to add a special effect to the third input image, similar to the effect change from the first input image to the second input image.. \nThe text requirement is:\nThe second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.\nYour review question is:\nAre the edge lines and contours around the subject consistent with the stylized effect in the second input image? Score 0: The output image lacks clear edge lines or contours, or they appear distorted, reducing the effectiveness of the stylized effect compared to the second input image. Score 1: The output image includes defined edge lines and contours around the subject that align with the graphic stylization in the second input image, enhancing the overall composition without distorting the subject.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/special_effect_adding_0001/eval.json b/dataset/special_effect_adding_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..a5f8e884c2aef8ee96eb8191d46a457a856599c1
--- /dev/null
+++ b/dataset/special_effect_adding_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output image contain circular background elements similar to the second input image?",
+            "0_point_standard": "The output image does not have circular background elements, or the background shape significantly deviates from the circular structure in the second input image.",
+            "1_point_standard": "The output image contains a circular background element located behind the main subject's head, similar to the visual structure of the second input image. This circular element complements the subject's position, enhancing the composition effect."
+        },
+        {
+            "question": "Is the visual style of the output image consistent with the stylized effect in the second input image?",
+            "0_point_standard": "The output image lacks the stylized graphic effects of the second input image, retaining too much photographic detail or having inconsistent visual effects.",
+            "1_point_standard": "The output image adopts a graphic, simplified color style similar to the second input image, featuring bold, flat color areas with minimal shadow effects."
+        },
+        {
+            "question": "Does the output image accurately depict the same subject as the third input image, retaining key facial features, posture, and expression?",
+            "0_point_standard": "The subject in the output image does not resemble the one in the third input image, with changes in facial features, posture, or expression affecting the subject's recognizability.",
+            "1_point_standard": "The output image clearly depicts the same subject as the third input image, retaining facial features, posture, and expression. The stylized effect does not obscure the subject's identity."
+        },
+        {
+            "question": "Are the hairstyle and clothing details in the output image consistent with the third input image?",
+            "0_point_standard": "The hairstyle or clothing in the output image are noticeably different from the third input image, resulting in detail loss or inconsistency with the subject's original appearance.",
+            "1_point_standard": "The hairstyle and clothing details in the output image closely match the third input image, with the applied effects not significantly altering these details."
+        },
+        {
+            "question": "Is the overall color contrast between the subject and the background in the output image consistent with the second input image?",
+            "0_point_standard": "The color contrast between the subject and the background in the output image is either too weak or too strong, failing to achieve the balanced separation effect seen in the second input image.",
+            "1_point_standard": "The color contrast between the subject and the background in the output image is similar to the second input image, effectively allowing the subject to stand out against the background."
+        },
+        {
+            "question": "Are the edge lines and contours of the subject in the output image consistent with the stylized effect in the second input image?",
+            "0_point_standard": "The output image lacks clear edge lines or contours, or they appear distorted, reducing the expressiveness of the stylized effect and inconsistent with the second input image.",
+            "1_point_standard": "The output image includes well-defined edge lines and contours consistent with the graphic stylized effect of the second input image, enhancing the overall composition without distorting the subject."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/special_effect_adding_0001/images.txt b/dataset/special_effect_adding_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..964e6eb075621eca8b121a44f2a31cef546855ca
--- /dev/null
+++ b/dataset/special_effect_adding_0001/images.txt
@@ -0,0 +1,3 @@
+https://img.alicdn.com/imgextra/i1/O1CN01hQbNul1iZcPIz92HR_!!6000000004427-0-tps-1428-848.jpg
+https://img.alicdn.com/imgextra/i4/O1CN01qBPQ04259GmfBimX3_!!6000000007483-0-tps-1430-821.jpg
+https://img.alicdn.com/imgextra/i1/O1CN01uYdvvs1L13LzsoZRd_!!6000000001238-0-tps-1428-847.jpg
diff --git a/dataset/special_effect_adding_0001/instruction.txt b/dataset/special_effect_adding_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3099f9d443f4c7cec4da238c136f69a5a66bfbac
--- /dev/null
+++ b/dataset/special_effect_adding_0001/instruction.txt
@@ -0,0 +1 @@
+The second image applies an artistic effect to the original first image. Please apply this same effect to the third image. This effect should include a stylized, graphic rendering of the portrait with bold, simplified colors and a circular background element in a contrasting color behind the subject’s head. Ensure that all primary elements of the third image, including the subject’s facial features, expression, hairstyle, and clothing, remain unchanged. The added effect should enhance the image in a way that mirrors the transformation from the first to the second image, without altering the original composition or details.
\ No newline at end of file
diff --git a/dataset/special_effect_adding_0001/meta.json b/dataset/special_effect_adding_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b591ae94101c7dbe96e7fbe483a218ec0cb773bb
--- /dev/null
+++ b/dataset/special_effect_adding_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "special effect adding",
+    "num_of_cases": 3,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0085",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/stop-motion_animation_generation_0002/auto_eval.jsonl b/dataset/stop-motion_animation_generation_0002/auto_eval.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..537575b43f482280e72f50aadaadc0b684b5d40b
--- /dev/null
+++ b/dataset/stop-motion_animation_generation_0002/auto_eval.jsonl
@@ -0,0 +1,6 @@
+{"input_images": [], "output_images": ["0001.jpg", "0002.jpg", "0003.jpg", "0004.jpg"], "question": "Is the number in the image the digit 4? 0 points: The number in the image is not the digit 4; 1 point: The number in the image is the digit 4. \nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0001.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first input image and first interpolated image of the response provided by a student. The task objective is to interpolated frames for the given key frames.\nThe text requirement is:\nInsert 4 frames between two given keyframes, generating one image for each frame. The goal is to maintain scene continuity and consistency. The first frame shows a warrior swinging a long spear, standing in the center of an indoor setting with traditional pillars and windows in the background, while his red cape flows behind him. The final frame shows the warrior at the end of his swing, facing a group of enemies who have fallen to the ground to avoid his attack, with the green lighting in the scene becoming brighter. The four interpolated frames should depict the transition from the initial spear swing to the final pose. The first interpolated frame shows the warrior slightly moving forward, with the tip of the spear beginning to accelerate, and the enemies starting to dodge. In the second interpolated frame, the spear moves faster, the warrior's cape flares out, and some enemies begin to kneel. The third interpolated frame shows the spear nearing its final position, with most enemies on the ground, and the lighting becoming more focused. The fourth interpolated frame shows the warrior nearly completing his move, with the spear almost in its final position, and the lighting in the scene becoming bright and concentrated as all enemies fall to the ground.\nYour review question is:\nDoes the transition from the initial spear swing in the first input image to the beginning of forward motion in the first interpolated frame show a logical progression in movement? 0 points: The transition lacks clear progression, making the movement appear sudden or disjointed. 1 point: The movement is logically continuous, showing a smooth transition from the initial position to the beginning of the spear’s acceleration.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0002.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the second and fourth interpolated frames of the response provided by a student. The task objective is to interpolated frames for the given key frames.\nThe text requirement is:\nInsert 4 frames between two given keyframes, generating one image for each frame. The goal is to maintain scene continuity and consistency. The first frame shows a warrior swinging a long spear, standing in the center of an indoor setting with traditional pillars and windows in the background, while his red cape flows behind him. The final frame shows the warrior at the end of his swing, facing a group of enemies who have fallen to the ground to avoid his attack, with the green lighting in the scene becoming brighter. The four interpolated frames should depict the transition from the initial spear swing to the final pose. The first interpolated frame shows the warrior slightly moving forward, with the tip of the spear beginning to accelerate, and the enemies starting to dodge. In the second interpolated frame, the spear moves faster, the warrior's cape flares out, and some enemies begin to kneel. The third interpolated frame shows the spear nearing its final position, with most enemies on the ground, and the lighting becoming more focused. The fourth interpolated frame shows the warrior nearly completing his move, with the spear almost in its final position, and the lighting in the scene becoming bright and concentrated as all enemies fall to the ground.\nYour review question is:\nDo the second and fourth interpolated frames maintain a smooth flow in the action, with the warrior’s position, spear movement, and enemy reactions transitioning naturally? 0 points: The action flow is inconsistent, with abrupt changes in the warrior’s movement or enemy reactions that disrupt continuity. 1 point: The action flow is smooth and consistent, showing a natural progression of the warrior’s swing and enemy responses.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0002.jpg"], "output_images": ["0003.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the third interpolated frame and final input image of the response provided by a student. The task objective is to interpolated frames for the given key frames.\nThe text requirement is:\nInsert 4 frames between two given keyframes, generating one image for each frame. The goal is to maintain scene continuity and consistency. The first frame shows a warrior swinging a long spear, standing in the center of an indoor setting with traditional pillars and windows in the background, while his red cape flows behind him. The final frame shows the warrior at the end of his swing, facing a group of enemies who have fallen to the ground to avoid his attack, with the green lighting in the scene becoming brighter. The four interpolated frames should depict the transition from the initial spear swing to the final pose. The first interpolated frame shows the warrior slightly moving forward, with the tip of the spear beginning to accelerate, and the enemies starting to dodge. In the second interpolated frame, the spear moves faster, the warrior's cape flares out, and some enemies begin to kneel. The third interpolated frame shows the spear nearing its final position, with most enemies on the ground, and the lighting becoming more focused. The fourth interpolated frame shows the warrior nearly completing his move, with the spear almost in its final position, and the lighting in the scene becoming bright and concentrated as all enemies fall to the ground.\nYour review question is:\nDoes the third interpolated frame maintain a consistent visual style with the final input image, including line quality, lighting, and character rendering? 0 points: The style differs noticeably between the frames, reducing the cohesion of the series. 1 point: The style is consistent, with matching line quality, lighting, and rendering that enhance continuity.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": [], "output_images": ["0001.jpg", "0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first and fourth interpolated frames of the response provided by a student. The task objective is to interpolated frames for the given key frames.\nThe text requirement is:\nInsert 4 frames between two given keyframes, generating one image for each frame. The goal is to maintain scene continuity and consistency. The first frame shows a warrior swinging a long spear, standing in the center of an indoor setting with traditional pillars and windows in the background, while his red cape flows behind him. The final frame shows the warrior at the end of his swing, facing a group of enemies who have fallen to the ground to avoid his attack, with the green lighting in the scene becoming brighter. The four interpolated frames should depict the transition from the initial spear swing to the final pose. The first interpolated frame shows the warrior slightly moving forward, with the tip of the spear beginning to accelerate, and the enemies starting to dodge. In the second interpolated frame, the spear moves faster, the warrior's cape flares out, and some enemies begin to kneel. The third interpolated frame shows the spear nearing its final position, with most enemies on the ground, and the lighting becoming more focused. The fourth interpolated frame shows the warrior nearly completing his move, with the spear almost in its final position, and the lighting in the scene becoming bright and concentrated as all enemies fall to the ground.\nYour review question is:\nDo the lighting changes from the first to the fourth interpolated frames align with the description, showing a gradual increase in brightness and focus? 0 points: The lighting does not progress as described, either remaining static or changing abruptly without a clear transition. 1 point: The lighting transition is smooth and follows the description, with the brightness and focus gradually intensifying.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
+{"input_images": ["0001.jpg"], "output_images": ["0004.jpg"], "question": "You are a professional image designer, and you are now required to conduct a strict evaluation of the following design work. This is the first input image and final interpolated frame of the response provided by a student. The task objective is to interpolated frames for the given key frames.\nThe text requirement is:\nInsert 4 frames between two given keyframes, generating one image for each frame. The goal is to maintain scene continuity and consistency. The first frame shows a warrior swinging a long spear, standing in the center of an indoor setting with traditional pillars and windows in the background, while his red cape flows behind him. The final frame shows the warrior at the end of his swing, facing a group of enemies who have fallen to the ground to avoid his attack, with the green lighting in the scene becoming brighter. The four interpolated frames should depict the transition from the initial spear swing to the final pose. The first interpolated frame shows the warrior slightly moving forward, with the tip of the spear beginning to accelerate, and the enemies starting to dodge. In the second interpolated frame, the spear moves faster, the warrior's cape flares out, and some enemies begin to kneel. The third interpolated frame shows the spear nearing its final position, with most enemies on the ground, and the lighting becoming more focused. The fourth interpolated frame shows the warrior nearly completing his move, with the spear almost in its final position, and the lighting in the scene becoming bright and concentrated as all enemies fall to the ground.\nYour review question is:\nDoes the change in enemy positions from the first input image to the final interpolated frame display a consistent reaction to the warrior’s attack, with enemies progressively dodging or falling? 0 points: The enemy reactions are inconsistent or unclear, making the progression of their movements confusing. 1 point: The enemy reactions are consistent, with a clear progression showing them dodging or falling as the warrior’s attack nears completion.\nUse this JSON schema:\nEvaluation = {'score': int, 'reason': str}"}
diff --git a/dataset/stop-motion_animation_generation_0002/eval.json b/dataset/stop-motion_animation_generation_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..667389aee94de6237d408206eb8c229dc878dcae
--- /dev/null
+++ b/dataset/stop-motion_animation_generation_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the number of output images meet the requirements described in the text?",
+            "0_point_standard": "The number of output images does not meet the requirements.",
+            "1_point_standard": "The number of output images meets the requirements."
+        },
+        {
+            "question": "From the initial spear thrust action in the first input image to the beginning of the charge in the first interpolated frame, does it show a reasonable progression of motion?",
+            "0_point_standard": "The motion lacks clear progression, making the action appear sudden or discontinuous.",
+            "1_point_standard": "The motion is logically continuous, with a smooth transition from the initial position to the acceleration of the spear."
+        },
+        {
+            "question": "Do the second and fourth interpolated frames maintain smooth motion flow, with the warrior's position, spear action, and enemy reaction transitioning naturally?",
+            "0_point_standard": "The motion flow is inconsistent, with abrupt changes in the warrior's actions or the enemy's reactions, disrupting continuity.",
+            "1_point_standard": "The motion flow is smooth and consistent, showcasing a natural progression of the warrior's spear thrust and the enemy's reaction."
+        },
+        {
+            "question": "Does the third interpolated frame maintain visual style consistency with the final input image, including line quality, lighting, and character rendering?",
+            "0_point_standard": "There is a noticeable difference in style between frames, reducing the coherence of the series.",
+            "1_point_standard": "The style is consistent, with matching line quality, lighting, and rendering, enhancing overall coherence."
+        },
+        {
+            "question": "Does the lighting change from the first to the fourth interpolated frame align with the description, gradually increasing in brightness and focus?",
+            "0_point_standard": "The lighting change does not follow the description, remaining static or changing abruptly, lacking a clear transition.",
+            "1_point_standard": "The lighting transition is smooth and aligns with the description, with brightness and focus gradually increasing."
+        },
+        {
+            "question": "Does the change in the enemy's position from the first input image to the final interpolated frame demonstrate a consistent reaction to the warrior's attack, gradually dodging or falling?",
+            "0_point_standard": "The enemy's reaction is inconsistent or unclear, making the progression of their actions confusing.",
+            "1_point_standard": "The enemy's reaction is consistent, clearly showing the progression of dodging or falling as the warrior's attack approaches."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/stop-motion_animation_generation_0002/images.txt b/dataset/stop-motion_animation_generation_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6fafdfed324da73a00f6fbf66ed03ec716e63d92
--- /dev/null
+++ b/dataset/stop-motion_animation_generation_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i1/O1CN01Dgvhi71ZOmUgJ1XMf_!!6000000003185-0-tps-1540-864.jpg
+https://img.alicdn.com/imgextra/i3/O1CN01lzArNV1giezifltI5_!!6000000004176-0-tps-1556-870.jpg
diff --git a/dataset/stop-motion_animation_generation_0002/instruction.txt b/dataset/stop-motion_animation_generation_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6f2db0db904efa7313beabee9ad285ce8c0a8dd
--- /dev/null
+++ b/dataset/stop-motion_animation_generation_0002/instruction.txt
@@ -0,0 +1 @@
+Insert 4 frames between two given keyframes, generating one image for each frame. The goal is to maintain scene continuity and consistency. The first frame shows a warrior swinging a long spear, standing in the center of an indoor setting with traditional pillars and windows in the background, while his red cape flows behind him. The final frame shows the warrior at the end of his swing, facing a group of enemies who have fallen to the ground to avoid his attack, with the green lighting in the scene becoming brighter. The four interpolated frames should depict the transition from the initial spear swing to the final pose. The first interpolated frame shows the warrior slightly moving forward, with the tip of the spear beginning to accelerate, and the enemies starting to dodge. In the second interpolated frame, the spear moves faster, the warrior's cape flares out, and some enemies begin to kneel. The third interpolated frame shows the spear nearing its final position, with most enemies on the ground, and the lighting becoming more focused. The fourth interpolated frame shows the warrior nearly completing his move, with the spear almost in its final position, and the lighting in the scene becoming bright and concentrated as all enemies fall to the ground.
\ No newline at end of file
diff --git a/dataset/stop-motion_animation_generation_0002/meta.json b/dataset/stop-motion_animation_generation_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5484825eb6ff4f9ed8af9767d3c3b5d3d8884052
--- /dev/null
+++ b/dataset/stop-motion_animation_generation_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "stop-motion animation generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": true,
+    "uid": "0049",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_art_style_editing_0001/.DS_Store b/dataset/style_editing_art_style_editing_0001/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/dataset/style_editing_art_style_editing_0001/.DS_Store differ
diff --git a/dataset/style_editing_art_style_editing_0001/eval.json b/dataset/style_editing_art_style_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..96063ab5118e459782e1c70663285acaaf759d52
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image retain the main thematic elements of the original image, ensuring key content is still recognizable?",
+            "0_point_standard": "Key thematic elements have been altered or lost, making the main content difficult to recognize.",
+            "1_point_standard": "All main thematic elements are clearly retained, ensuring the content is still recognizable in the new style."
+        },
+        {
+            "question": "Are the proportions and spatial relationships of the main elements unchanged, maintaining their original position in the image?",
+            "0_point_standard": "The proportions or spatial relationships of the main elements have been altered, disrupting the original layout.",
+            "1_point_standard": "The main elements retain their original proportions and position, maintaining consistent spatial relationships."
+        },
+        {
+            "question": "Is the applied style transformation consistent with the specified artistic style, clearly conveying the intended visual effect?",
+            "0_point_standard": "The style transformation is unclear or does not match the specified artistic style, leading to inconsistent effects.",
+            "1_point_standard": "The style transformation is clear and accurately reflects the specified artistic style, achieving the intended visual effect."
+        },
+        {
+            "question": "Have colors and textures been adjusted according to the new style, creating a cohesive transformation without overshadowing the main elements?",
+            "0_point_standard": "Colors or textures are inconsistent or excessively obscure the main thematic elements, causing a disjointed appearance.",
+            "1_point_standard": "Colors and textures are well-adjusted according to the new style, enhancing the overall aesthetic while maintaining the clarity of the main elements."
+        },
+        {
+            "question": "Does the image maintain a high level of detail and clarity in the main elements, ensuring they are not compromised by the style change?",
+            "0_point_standard": "The main elements appear blurry, pixelated, or lack detail due to the style transformation.",
+            "1_point_standard": "The main elements maintain clear detail and clarity, unaffected by the style change, preserving image quality."
+        },
+        {
+            "question": "Does the final image achieve a visually cohesive and appealing blend of style and content, presenting a balanced and harmonious composition?",
+            "0_point_standard": "The image lacks cohesion, with style and content appearing disconnected or visually unappealing.",
+            "1_point_standard": "The image presents a harmonious blend of style and content, with a balanced and visually appealing composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_art_style_editing_0001/images.txt b/dataset/style_editing_art_style_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26b2e020ca7495efb0a1c66f40fc26520f887152
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN0180Ie2A1sGyRPsotjN_!!6000000005740-0-tps-4608-2592.jpg
diff --git a/dataset/style_editing_art_style_editing_0001/instruction.txt b/dataset/style_editing_art_style_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65f1adbb03f2c800db1340096538a0105823c078
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Transform this image of the dock and boat into an impressionist style, using soft tones and blurred brushstrokes to evoke the shimmering water and the vibrant feel of nature. Retain the overall structure of the scene but make the details of the water and boat more abstract, capturing the interplay of light and color typical of impressionist paintings.
\ No newline at end of file
diff --git a/dataset/style_editing_art_style_editing_0001/meta.json b/dataset/style_editing_art_style_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7fde2dd68b91a6d53c930b067ec7cb2f1f087095
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "art style editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0061",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_art_style_editing_0002/eval.json b/dataset/style_editing_art_style_editing_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..3e442367a923d4a281b62c75785ab7e5d671a101
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image retain the main thematic elements of the original image, ensuring that key content remains recognizable?",
+            "0_point_standard": "Key thematic elements have been altered or lost, making the main content difficult to recognize.",
+            "1_point_standard": "All major thematic elements are clearly preserved, ensuring the content remains recognizable in the new style."
+        },
+        {
+            "question": "Are the proportions and spatial relationships of the main elements maintained, preserving their original position in the image?",
+            "0_point_standard": "The proportions or spatial relationships of the main elements have been modified, disrupting the original layout.",
+            "1_point_standard": "The main elements retain their original proportions and positions, maintaining consistent spatial relationships."
+        },
+        {
+            "question": "Does the applied style transformation align with the specified artistic style, clearly conveying the intended visual effect?",
+            "0_point_standard": "The style transformation is unclear or does not match the specified artistic style, leading to inconsistent effects.",
+            "1_point_standard": "The style transformation is clear and accurately reflects the specified artistic style, achieving the intended visual effect."
+        },
+        {
+            "question": "Have colors and textures been adjusted according to the new style to create a cohesive transformation without obscuring major elements?",
+            "0_point_standard": "Colors or textures are inconsistent or excessively obscure the main thematic elements, resulting in a disjointed appearance.",
+            "1_point_standard": "Colors and textures are well-adjusted according to the new style, enhancing the overall aesthetic while maintaining the clarity of major elements."
+        },
+        {
+            "question": "Does the image maintain a high level of detail and clarity in the main elements, ensuring they are not compromised by style changes?",
+            "0_point_standard": "Due to style transformation, the main elements appear blurry, pixelated, or lack detail.",
+            "1_point_standard": "The main elements maintain clear detail and clarity, unaffected by style changes, preserving image quality."
+        },
+        {
+            "question": "Does the final image achieve a visually cohesive and appealing fusion of style and content, presenting a balanced and harmonious composition?",
+            "0_point_standard": "The image lacks cohesiveness, with style and content appearing disjointed or visually unappealing.",
+            "1_point_standard": "The image presents a harmonious fusion of style and content, with a balanced and visually appealing composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_art_style_editing_0002/images.txt b/dataset/style_editing_art_style_editing_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fa2047acc6a1e4445f627659b763e9f546ecc35
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01pnZRqs1h4e0MpZUE7_!!6000000004224-0-tps-1280-800.jpg
diff --git a/dataset/style_editing_art_style_editing_0002/instruction.txt b/dataset/style_editing_art_style_editing_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18f7bebf54d8f30364aa9238c4c7e47564233b7e
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0002/instruction.txt
@@ -0,0 +1 @@
+Transform this image of the snowy mountains into a surrealist style, adding fantastical elements like floating peaks or dreamlike skies. Use unusual colors and shapes to emphasize the mysterious and surreal quality of the mountains. The whole scene should evoke a sense of fantasy, pushing the boundaries of the real world.
\ No newline at end of file
diff --git a/dataset/style_editing_art_style_editing_0002/meta.json b/dataset/style_editing_art_style_editing_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9579a25d5ebe2471677de79c689405fe5e98eb2b
--- /dev/null
+++ b/dataset/style_editing_art_style_editing_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "art style editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0061",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_era_editing_0001/eval.json b/dataset/style_editing_era_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..a25f7ac5b992041099f04d920d0f1c70c10b08a7
--- /dev/null
+++ b/dataset/style_editing_era_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the generated image retain the core theme and recognizable features of the original image, ensuring that the main content and identity are preserved?",
+            "0_point_standard": "The core theme or main features have changed, making the original content difficult to recognize.",
+            "1_point_standard": "The main content and identity of the original image are preserved, maintaining recognizable themes or scenes in the new era style."
+        },
+        {
+            "question": "Does the transformation of the era style accurately reflect the specified historical period with corresponding characteristics and details?",
+            "0_point_standard": "The style transformation does not match the specified era, showing inconsistencies or lacking key elements of that era.",
+            "1_point_standard": "The era style is applied accurately, with distinctive elements and characteristics clearly representing the specified historical period."
+        },
+        {
+            "question": "Does the added era style smoothly integrate with the original image composition, avoiding any abrupt transitions or mismatched areas?",
+            "0_point_standard": "The era style is unevenly applied or has abrupt transitions, making parts of the image appear inconsistent.",
+            "1_point_standard": "The era style smoothly integrates into the entire image without abrupt transitions, creating a cohesive and unified appearance."
+        },
+        {
+            "question": "Do any specific era items or accessories, such as furniture, decorations, or clothing, match the designated era?",
+            "0_point_standard": "Specific era items or accessories appear inaccurate or misplaced, undermining the historical authenticity of the image.",
+            "1_point_standard": "Specific era items or accessories are appropriately chosen and match the designated era, enhancing the historical feel of the image."
+        },
+        {
+            "question": "Do the lighting and color adjustments align with the era style, creating an authentic atmosphere without compromising the original content?",
+            "0_point_standard": "The lighting and color adjustments do not match the era style, or they overpower the main content, making it appear inconsistent or unnatural.",
+            "1_point_standard": "The lighting and color are well-suited to the specified era style, enhancing the atmosphere while maintaining visual coherence of the main content."
+        },
+        {
+            "question": "Does the final image cohesively and aesthetically appealingly integrate the era style and content, achieving a balanced and harmonious appearance?",
+            "0_point_standard": "The image lacks aesthetic cohesion, with elements appearing disjointed or detracting from visual appeal.",
+            "1_point_standard": "The image is visually cohesive, with the era style and original content harmoniously blending into an aesthetically pleasing composition."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_era_editing_0001/images.txt b/dataset/style_editing_era_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dea8b383b1ff9a5f6acb9821aaeaa314e2ec6cef
--- /dev/null
+++ b/dataset/style_editing_era_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01hyr7UC26vBq0yFlHe_!!6000000007723-0-tps-3000-2000.jpg
diff --git a/dataset/style_editing_era_editing_0001/instruction.txt b/dataset/style_editing_era_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ea500e0a98faf9a3d635e9596d50396b837a824
--- /dev/null
+++ b/dataset/style_editing_era_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Convert this image of Times Square into a 1980s retro style.
\ No newline at end of file
diff --git a/dataset/style_editing_era_editing_0001/meta.json b/dataset/style_editing_era_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..041950935420bce2f1f030c07e8a7e184ff41ba1
--- /dev/null
+++ b/dataset/style_editing_era_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "era editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0060",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_material_editing_0001/eval.json b/dataset/style_editing_material_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..abb9c682f71b261a5c73d89961d9ccbacc8a924f
--- /dev/null
+++ b/dataset/style_editing_material_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified image accurately retain the essential content and features of the original image, except for the specified material change items?",
+            "0_point_standard": "The content and features of the original image have changed beyond the specified material change items, resulting in noticeable deviations or distortions.",
+            "1_point_standard": "The content and features of the original image are accurately retained, with no other unexpected changes except for the specified material change items."
+        },
+        {
+            "question": "Are the modifications limited only to the specified items, with the rest of the image remaining unchanged?",
+            "0_point_standard": "The modifications affect areas beyond the specified items, leading to unexpected changes in other parts of the image.",
+            "1_point_standard": "The modifications are strictly limited to the specified items, with the rest of the image unchanged."
+        },
+        {
+            "question": "Does the modified material style of the specified items accurately reflect the requirements of the text description?",
+            "0_point_standard": "The material style changes do not reflect the requirements or instructions detailed in the text description.",
+            "1_point_standard": "The material style changes accurately reflect the requirements and instructions detailed in the text description."
+        },
+        {
+            "question": "Do the texture and style of the modified material align with the description and blend consistently with the overall image?",
+            "0_point_standard": "The texture and style of the modified material do not align with the description or do not blend well with the overall image.",
+            "1_point_standard": "The texture and style of the modified material align with the description and blend perfectly with the overall image."
+        },
+        {
+            "question": "Does the modified image maintain a high quality of detail, ensuring the texture and finish look realistic and professional?",
+            "0_point_standard": "The modified image lacks detail quality, with texture and finish appearing unrealistic or poorly rendered.",
+            "1_point_standard": "The modified image maintains high quality of detail, with texture and finish looking realistic and professional."
+        },
+        {
+            "question": "Does the modified image exhibit overall aesthetic appeal, ensuring that material changes have enhanced the visual quality and appeal of the image?",
+            "0_point_standard": "The modified image lacks aesthetic appeal, with material changes reducing visual quality.",
+            "1_point_standard": "The modified image exhibits strong aesthetic appeal, with material changes enhancing visual quality and appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_material_editing_0001/images.txt b/dataset/style_editing_material_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e578bd2e93c1d77adf9ff7d323e6a27ee5bb838
--- /dev/null
+++ b/dataset/style_editing_material_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN0120sDta1kitf6IDmHR_!!6000000004718-0-tps-2000-2667.jpg
diff --git a/dataset/style_editing_material_editing_0001/instruction.txt b/dataset/style_editing_material_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c033cf402437c05ec73c3d8ebc976f8239273adf
--- /dev/null
+++ b/dataset/style_editing_material_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Change the material of the blue dress in the image to silk, making the dress appear smooth, glossy, and soft, with the characteristic drape and light reflection of silk.
\ No newline at end of file
diff --git a/dataset/style_editing_material_editing_0001/meta.json b/dataset/style_editing_material_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..22ae5381a44977328b8b0d061c72bd1d16d093d7
--- /dev/null
+++ b/dataset/style_editing_material_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "material editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0062",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_material_editing_0002/eval.json b/dataset/style_editing_material_editing_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e63c1fe07ed3505164792ad9be701188698c257e
--- /dev/null
+++ b/dataset/style_editing_material_editing_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified image accurately retain the essential content and features of the original image, except for the specified material change items?",
+            "0_point_standard": "The content and features of the original image have changed beyond the specified material change items, resulting in noticeable deviation or distortion.",
+            "1_point_standard": "The content and features of the original image are accurately retained, with no other unexpected changes except for the specified material change items."
+        },
+        {
+            "question": "Is the modification limited to the specified items, with the rest of the image remaining unchanged?",
+            "0_point_standard": "The modification has affected areas beyond the specified items, resulting in unexpected changes to other parts of the image.",
+            "1_point_standard": "The modification is strictly limited to the specified items, with no changes to the rest of the image."
+        },
+        {
+            "question": "Does the modified material style of the specified items accurately reflect the requirements described in the text?",
+            "0_point_standard": "The material style change does not reflect the requirements or descriptions detailed in the text.",
+            "1_point_standard": "The material style change accurately reflects the requirements and descriptions detailed in the text."
+        },
+        {
+            "question": "Do the texture and style of the modified material match the description and blend consistently with the overall image?",
+            "0_point_standard": "The texture and style of the modified material are inconsistent with the description or do not blend well with the overall image.",
+            "1_point_standard": "The texture and style of the modified material are consistent with the description and blend perfectly with the overall image."
+        },
+        {
+            "question": "Does the modified image maintain high-quality details, ensuring the texture and finish of the material look realistic and professional?",
+            "0_point_standard": "The modified image lacks quality in detail, with the texture and finish appearing unrealistic or poorly rendered.",
+            "1_point_standard": "The modified image maintains high-quality details, with the texture and finish looking realistic and professional."
+        },
+        {
+            "question": "Does the modified image exhibit an overall aesthetic appeal, ensuring the material changes enhance the visual quality and attractiveness of the image?",
+            "0_point_standard": "The modified image lacks aesthetic appeal, and the material changes have reduced the visual quality.",
+            "1_point_standard": "The modified image possesses strong aesthetic appeal, with the material changes enhancing visual quality and attractiveness."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_material_editing_0002/images.txt b/dataset/style_editing_material_editing_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9634fc07edbacecc1af85ac4947855f252bf2350
--- /dev/null
+++ b/dataset/style_editing_material_editing_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN013IVCjY1UDGSFLWlNy_!!6000000002483-0-tps-2000-2000.jpg
diff --git a/dataset/style_editing_material_editing_0002/instruction.txt b/dataset/style_editing_material_editing_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..872c5adfc86d3602d21229b73a1896401ce81382
--- /dev/null
+++ b/dataset/style_editing_material_editing_0002/instruction.txt
@@ -0,0 +1 @@
+Change the marble tabletop material in the image to dark wood, giving the table a natural wood grain and a warm texture.
\ No newline at end of file
diff --git a/dataset/style_editing_material_editing_0002/meta.json b/dataset/style_editing_material_editing_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..83a85945ba1912b1f9a2fb244692ae6031908a04
--- /dev/null
+++ b/dataset/style_editing_material_editing_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "material editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0062",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_time_editing_0001/eval.json b/dataset/style_editing_time_editing_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e74089b844e2a2bf302d1d93c17b78f914ed4691
--- /dev/null
+++ b/dataset/style_editing_time_editing_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified image accurately reflect the specified time changes described in the text input (e.g., from day to night, and vice versa)?",
+            "0_point_standard": "The time change in the image does not accurately reflect the specified modification, or it is unclear.",
+            "1_point_standard": "The image clearly reflects the specified time changes described in the text input."
+        },
+        {
+            "question": "In areas where modifications were not requested, does the rest of the image remain unchanged, maintaining the integrity of the original content?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image that were not intended to be modified.",
+            "1_point_standard": "The unmodified parts of the image remain consistent with the original image, without unexpected changes."
+        },
+        {
+            "question": "Does the image maintain its original style and features, ensuring a seamless transition between the original and modified components?",
+            "0_point_standard": "There is a noticeable change in the style or features of the image, leading to a lack of cohesion between the original and modified areas.",
+            "1_point_standard": "The image retains its original style and features, achieving a seamless transition between modified and unmodified areas."
+        },
+        {
+            "question": "Do the time changes in the image match the specific instructions regarding atmosphere, lighting, and mood provided in the text input (e.g., a warm sunset or a bright afternoon)?",
+            "0_point_standard": "The atmosphere, lighting, or mood does not match the specific instructions provided in the text input.",
+            "1_point_standard": "The modifications accurately reflect the specified atmosphere, lighting, and mood described in the text input."
+        },
+        {
+            "question": "Are the adjustments to lighting and shadows consistent with the specified time changes, creating a natural and realistic effect?",
+            "0_point_standard": "Lighting and shadows do not accurately reflect the specified time changes, resulting in an unnatural or inconsistent appearance.",
+            "1_point_standard": "Lighting and shadows are well-adjusted to match the specified time changes, creating a natural and realistic effect."
+        },
+        {
+            "question": "Does the edited image have a high aesthetic appeal, with enhancements that professionally improve its overall visual attractiveness?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, has poor visual quality, and the enhancements are ineffective.",
+            "1_point_standard": "The edited image exhibits strong aesthetic appeal, with high-quality enhancements positively contributing to its visual appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_time_editing_0001/images.txt b/dataset/style_editing_time_editing_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..078e310770afcd1af704f3c510c289cdf7a3a510
--- /dev/null
+++ b/dataset/style_editing_time_editing_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i1/O1CN013p40a91gZ2g2Es4P9_!!6000000004155-0-tps-2880-1800.jpg
diff --git a/dataset/style_editing_time_editing_0001/instruction.txt b/dataset/style_editing_time_editing_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f09404b21bfe9c68f5ab1fee6ca030b7ce47c188
--- /dev/null
+++ b/dataset/style_editing_time_editing_0001/instruction.txt
@@ -0,0 +1 @@
+Change the time of this mountain sunrise scene to night, with a sky full of stars and the mountain outlines faintly visible under the moonlight, enhancing the peaceful nighttime atmosphere.
\ No newline at end of file
diff --git a/dataset/style_editing_time_editing_0001/meta.json b/dataset/style_editing_time_editing_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..a8e28fd5bbcce96906cb6c89e9a46c025566e028
--- /dev/null
+++ b/dataset/style_editing_time_editing_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "time editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0059",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_editing_time_editing_0002/eval.json b/dataset/style_editing_time_editing_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5eb7f6fe041bb453172cad75325a383243bf3d5b
--- /dev/null
+++ b/dataset/style_editing_time_editing_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified image accurately reflect the specified time change described in the text input (e.g., from day to night, and vice versa)?",
+            "0_point_standard": "The time change in the image does not accurately reflect the specified modification, or it is unclear.",
+            "1_point_standard": "The image clearly reflects the specified time change described in the text input."
+        },
+        {
+            "question": "In areas where modifications were not requested, does the rest of the image remain unchanged, maintaining the integrity of the original content?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image that were not intended to be modified.",
+            "1_point_standard": "Unchanged parts of the image remain consistent with the original image with no unintended changes."
+        },
+        {
+            "question": "Has the image retained its original style and features, ensuring a seamless transition between the original components and the modified components?",
+            "0_point_standard": "There is a noticeable change in the style or features of the image, leading to a lack of cohesion between original and modified areas.",
+            "1_point_standard": "The image has retained its original style and features, achieving a seamless transition between modified and unmodified areas."
+        },
+        {
+            "question": "Does the time change in the image adhere to the specific instructions in the text input regarding atmosphere, lighting, and mood (e.g., a warm sunset or a bright afternoon)?",
+            "0_point_standard": "The atmosphere, lighting, or mood does not match the specific instructions provided in the text input.",
+            "1_point_standard": "The modification accurately reflects the specified atmosphere, lighting, and mood in the text input."
+        },
+        {
+            "question": "Are the adjustments in lighting and shadow consistent with the specified time change, creating a natural and realistic effect?",
+            "0_point_standard": "Lighting and shadow do not accurately reflect the specified time change, resulting in an unnatural or inconsistent appearance.",
+            "1_point_standard": "Lighting and shadow are well-adjusted to match the specified time change, creating a natural and realistic effect."
+        },
+        {
+            "question": "Does the edited image possess a high level of aesthetic appeal, enhanced by professional quality improvements to boost overall visual appeal?",
+            "0_point_standard": "The edited image lacks aesthetic appeal, has poor visual quality, and the enhancement effects are inadequate.",
+            "1_point_standard": "The edited image displays strong aesthetic appeal, with high-quality enhancements positively contributing to its visual appearance."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_editing_time_editing_0002/images.txt b/dataset/style_editing_time_editing_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5faba370aa768dc72b696cb584a642376f48c8c2
--- /dev/null
+++ b/dataset/style_editing_time_editing_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01p6xtTB1afYcILvKML_!!6000000003357-0-tps-2400-1500.jpg
diff --git a/dataset/style_editing_time_editing_0002/instruction.txt b/dataset/style_editing_time_editing_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..176a9ae44b09de6d746d50383f832b28dd596da5
--- /dev/null
+++ b/dataset/style_editing_time_editing_0002/instruction.txt
@@ -0,0 +1 @@
+Change the time of this city night scene to early morning, with the sky starting to brighten, building lights gradually turning off, and the sky transitioning into soft shades of blue and pink as the city awakens.
\ No newline at end of file
diff --git a/dataset/style_editing_time_editing_0002/meta.json b/dataset/style_editing_time_editing_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..aeffedf742b23ae0e5e741585a86c7e5005e1002
--- /dev/null
+++ b/dataset/style_editing_time_editing_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "time editing",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0059",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0002/eval.json b/dataset/style_group_generation_abstract_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a403670cea688ab95db4f9d4f30c11e2f908de2
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Do each of the images follow the same abstract style in terms of abstract method, level of abstraction, and overall aesthetic approach?",
+            "0_point_standard": "The images display different abstract styles, resulting in inconsistent visual themes.",
+            "1_point_standard": "All images maintain a consistent abstract style, creating a unified and coherent aesthetic throughout the sequence."
+        },
+        {
+            "question": "Do the generated images align with the content and themes outlined in the text description?",
+            "0_point_standard": "There is a significant deviation of the images from the instructions or themes specified in the text description.",
+            "1_point_standard": "The images accurately express the themes or instructions specified in the text description."
+        },
+        {
+            "question": "Is the theme or central concept consistent across all images, forming a recognizable narrative or thematic connection?",
+            "0_point_standard": "The theme or concept is unclear or inconsistent between images, disrupting the continuity of the narrative or theme.",
+            "1_point_standard": "The theme or concept is consistently represented in the images, forming a recognizable connection between them."
+        },
+        {
+            "question": "Are key objects, characters, or abstract forms recognizable throughout the series, even within an abstract context?",
+            "0_point_standard": "Key elements are difficult to consistently recognize between different images, losing the continuity of identity.",
+            "1_point_standard": "Key objects, characters, or forms are recognizable across the images, maintaining continuity of identity within the abstract style."
+        },
+        {
+            "question": "Is the level of abstraction and technique consistently applied across all images, ensuring a unified approach to the abstract style?",
+            "0_point_standard": "There is significant variation in the level of abstraction or technique between images, disrupting the visual cohesion of the series.",
+            "1_point_standard": "The level of abstraction and technique is consistent across all images, maintaining a unified approach to the abstract style."
+        },
+        {
+            "question": "Do the images exhibit high aesthetic quality, with detailed abstract elements, visual coherence, and a professional level of finish?",
+            "0_point_standard": "The images lack detail, visual coherence, or aesthetic appeal, failing to meet professional standards.",
+            "1_point_standard": "The images are rich in detail, visually appealing, and exhibit professional quality, contributing to a compelling abstract sequence."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0002/images.txt b/dataset/style_group_generation_abstract_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_abstract_0002/instruction.txt b/dataset/style_group_generation_abstract_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47ff10c7632b2eb30179a7b456b9c929d7cc0404
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0002/instruction.txt
@@ -0,0 +1 @@
+Geometric abstract style, using basic geometric shapes such as circles, squares, and triangles to outline the subjects. The lines are clean and smooth. Generate 4 images with the subjects being a fish, a bird, a deer, and a lion. All geometric elements should maintain a consistent style with neat and proportional lines.
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0002/meta.json b/dataset/style_group_generation_abstract_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ac9f1e68814c48734f36116fe1c545cadd1bb2cf
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group abstract style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0002",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0003/eval.json b/dataset/style_group_generation_abstract_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d85b68000122e1079a0190fb9d07cbd9ea3dc41
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does each image follow the same abstract style in terms of abstract approach, level of abstraction, and overall aesthetic method?",
+            "0_point_standard": "The images display different abstract styles, leading to an inconsistent visual theme.",
+            "1_point_standard": "All images maintain a consistent abstract style, creating a unified and coherent aesthetic throughout the series."
+        },
+        {
+            "question": "Do the generated images align with the content and themes outlined in the text description?",
+            "0_point_standard": "There is a significant deviation of the images from the instructions or themes specified in the text description.",
+            "1_point_standard": "The images accurately express the themes or instructions specified in the text description."
+        },
+        {
+            "question": "Is the theme or central concept maintained consistently across all images, forming a recognizable narrative or thematic link?",
+            "0_point_standard": "The theme or concept is unclear or inconsistent between images, disrupting the continuity of the narrative or theme.",
+            "1_point_standard": "The theme or concept is consistently reflected in the images, creating a recognizable link between them."
+        },
+        {
+            "question": "Are key objects, characters, or abstract forms recognizable throughout the series, even in an abstract context?",
+            "0_point_standard": "Key elements are difficult to consistently recognize across different images, losing the continuity of identity.",
+            "1_point_standard": "Key objects, characters, or forms are recognizable across images, maintaining continuity of identity within the abstract style."
+        },
+        {
+            "question": "Is the level and technique of abstraction applied consistently across all images, ensuring a unified approach to the abstract style?",
+            "0_point_standard": "There is a significant variation in the level or technique of abstraction between images, disrupting the visual cohesion of the series.",
+            "1_point_standard": "The level and technique of abstraction are consistent across all images, maintaining a unified approach to the abstract style."
+        },
+        {
+            "question": "Do the images exhibit high aesthetic quality, with detailed abstract elements, visual coherence, and professional completion?",
+            "0_point_standard": "The images lack detail, visual coherence, or aesthetic appeal, failing to meet professional standards.",
+            "1_point_standard": "The images are rich in detail, visually appealing, and exhibit professional quality, contributing to a compelling abstract sequence."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0003/images.txt b/dataset/style_group_generation_abstract_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_abstract_0003/instruction.txt b/dataset/style_group_generation_abstract_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f3672169b75a3778e5a5fd62cc1dbcd186b1fdb
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0003/instruction.txt
@@ -0,0 +1 @@
+Flowing line style, where the lines are gentle and natural, like flowing water, and the subjects' outlines seem to sway with the wind. Generate 4 images with the subjects being a dragon, a snake, a horse, and an elephant. All images should have flowing and uniform line styles, conveying a sense of motion and life through simple lines.
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0003/meta.json b/dataset/style_group_generation_abstract_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c5b92b178ee2de3c6328354b79686821b92aff21
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group abstract style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0002",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0004/eval.json b/dataset/style_group_generation_abstract_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..497c6126f3635f444ae9f09b1bc27309d1b0aed5
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Do all images follow the same abstract style in terms of approach, level of abstraction, and overall aesthetic method?",
+            "0_point_standard": "The images display different abstract styles, leading to an inconsistent visual theme.",
+            "1_point_standard": "All images maintain a consistent abstract style, creating a unified and coherent aesthetic throughout the sequence."
+        },
+        {
+            "question": "Are the generated images consistent with the content and theme outlined in the textual description?",
+            "0_point_standard": "The images significantly deviate from the instructions or themes specified in the text description.",
+            "1_point_standard": "The images accurately express the theme or instructions specified in the text description."
+        },
+        {
+            "question": "Is the theme or central concept consistently maintained across all images, forming a recognizable narrative or thematic connection?",
+            "0_point_standard": "The theme or concept is unclear or inconsistent between images, disrupting the continuity of the narrative or theme.",
+            "1_point_standard": "The theme or concept is consistently reflected across the images, forming a recognizable connection between them."
+        },
+        {
+            "question": "Are key objects, characters, or abstract forms recognizable throughout the series, even in an abstract context?",
+            "0_point_standard": "Key elements are difficult to consistently recognize between different images, losing the continuity of identity.",
+            "1_point_standard": "Key objects, characters, or forms are recognizable between images, maintaining the continuity of identity in an abstract style."
+        },
+        {
+            "question": "Is the degree and technique of abstraction consistently applied across all images, ensuring a unified approach to the abstract style?",
+            "0_point_standard": "There is a significant variation in the degree or technique of abstraction between images, disrupting the visual cohesion of the series.",
+            "1_point_standard": "The degree and technique of abstraction are consistent across all images, maintaining a unified approach to the abstract style."
+        },
+        {
+            "question": "Do the images exhibit a high aesthetic quality with detailed abstract elements, visual coherence, and a professional level of completion?",
+            "0_point_standard": "The images lack detail, visual coherence, or aesthetic appeal, failing to meet professional standards.",
+            "1_point_standard": "The images are rich in detail, visually appealing, and exhibit professional quality, contributing to a compelling abstract sequence."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0004/images.txt b/dataset/style_group_generation_abstract_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_abstract_0004/instruction.txt b/dataset/style_group_generation_abstract_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bed2195d696932c51783753fbf97311d6738787e
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0004/instruction.txt
@@ -0,0 +1 @@
+Brush stroke style, imitating traditional Chinese ink painting, using black ink lines of varying thickness with slight smudging effects. Generate 4 images with the subjects being bamboo, plum blossom, pine tree, and lotus. Ensure the brushstroke variations are consistent across all images, emphasizing the elegance and natural flow of ink.
\ No newline at end of file
diff --git a/dataset/style_group_generation_abstract_0004/meta.json b/dataset/style_group_generation_abstract_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f05814d1fa28efe7ab1d550c91df51b989d84a1c
--- /dev/null
+++ b/dataset/style_group_generation_abstract_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group abstract style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0002",
+    "output_image_count": 4,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_anime_0001/eval.json b/dataset/style_group_generation_anime_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..eae4f5f1c602dabd79e0b0487f003aeac8cdb6d7
--- /dev/null
+++ b/dataset/style_group_generation_anime_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the animation style consistent across all images, including lines, shading, palette, and overall aesthetic approach?",
+            "0_point_standard": "The images have noticeable style differences, disrupting a unified animation aesthetic.",
+            "1_point_standard": "The animation style is consistently applied, with cohesive lines, shading, palette, and overall aesthetics across all images."
+        },
+        {
+            "question": "Do the generated images align with the content and theme specified in the text description, accurately depicting the described characters, scenes, or objects?",
+            "0_point_standard": "The images deviate from the content or theme specified in the text description, lacking important details or interpretations.",
+            "1_point_standard": "The images accurately depict the themes and content described in the text, capturing the specified characters, scenes, or objects."
+        },
+        {
+            "question": "Are the characters or main themes visually consistent across images, such as recognizable features like hairstyle, expressions, and clothing?",
+            "0_point_standard": "Character features or main themes differ between images, making them hard to recognize as the same entity.",
+            "1_point_standard": "The characters or main themes are visually consistent, with recognizable features remaining unchanged across images."
+        },
+        {
+            "question": "Are animation-specific elements such as eye details, hair details, and expressions accurately rendered to fit the animation style?",
+            "0_point_standard": "Animation-specific elements fail to accurately reflect the style, diminishing the authenticity of the images.",
+            "1_point_standard": "Eyes, hair, expressions, and other animation-specific elements are accurately rendered, enhancing the authenticity of the animation style."
+        },
+        {
+            "question": "Do the background and environmental elements in each image match the animation style and coordinate with the character design?",
+            "0_point_standard": "The background or environment is inconsistent with the animation style or does not coordinate with the character design.",
+            "1_point_standard": "Backgrounds and environments are consistent with the animation style and coordinate with characters, creating cohesive scenes."
+        },
+        {
+            "question": "Do the images have high aesthetic quality, with clear details, appealing composition, and a polished, professional finish?",
+            "0_point_standard": "The images lack aesthetic appeal or have poor detail quality, reducing visual impact.",
+            "1_point_standard": "The images are visually appealing, with high-quality details, balanced composition, and a polished, professional finish."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_anime_0001/images.txt b/dataset/style_group_generation_anime_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_anime_0001/instruction.txt b/dataset/style_group_generation_anime_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6966a3c1a09dc7c63515278bc493441e8636f553
--- /dev/null
+++ b/dataset/style_group_generation_anime_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 images depicting summer campus scenes. All images must adhere to the same consistent anime-style, ensuring they follow the Japanese 2D anime aesthetic. The first image shows a girl in a school uniform waiting for her friends at the school gate, with sunlight shining on the school buildings and cherry blossom trees in the background; the second image shows a group of students discussing their studies in a classroom, with the blue sky and swaying green trees visible outside the windows; the third image features boys playing soccer on the field, their movements dynamic under the bright blue sky; the fourth image shows a group of girls having a picnic on the lawn, with colorful food and the distant view of school buildings and the playground; the fifth image depicts a girl walking alone in the school corridor at sunset, her shadow stretched long by the setting sun.
\ No newline at end of file
diff --git a/dataset/style_group_generation_anime_0001/meta.json b/dataset/style_group_generation_anime_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f71464bbecff91324b1b6e7c9a5528fe9273068
--- /dev/null
+++ b/dataset/style_group_generation_anime_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group anime style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0006",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0002/eval.json b/dataset/style_group_generation_creative_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..c825d00311602caaf6e86f1767945b958e59d068
--- /dev/null
+++ b/dataset/style_group_generation_creative_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the creative style consistently applied across all images, including elements like color schemes, design methods, and stylistic techniques?",
+            "0_point_standard": "There are noticeable differences in style among the images, disrupting the visual consistency of the series.",
+            "1_point_standard": "The creative style is consistently applied, with cohesive colors, design, and stylistic techniques across all images."
+        },
+        {
+            "question": "Do the generated images align with the content and themes specified in the textual description, accurately depicting the intended concept or theme?",
+            "0_point_standard": "The images deviate from the theme or subject described in the text, lacking key details or creative interpretation.",
+            "1_point_standard": "The images accurately reflect the theme and content described in the text, capturing the specified concept or theme according to the creative style."
+        },
+        {
+            "question": "Are key elements (such as objects, characters, or patterns) visually consistent across images, making them easily recognizable as part of the same creative series?",
+            "0_point_standard": "Key elements vary significantly between images, making it difficult to recognize them as part of the same series.",
+            "1_point_standard": "Key elements are visually consistent, maintaining recognizable features across images to ensure continuity."
+        },
+        {
+            "question": "Does each image provide a unique interpretation of the creative theme while remaining true to the overall style?",
+            "0_point_standard": "The images lack diversity or appear repetitive, failing to explore unique aspects of the creative theme.",
+            "1_point_standard": "Each image offers a unique interpretation of the theme, adding diversity and interest within the specified creative style."
+        },
+        {
+            "question": "Are design elements (such as shapes, textures, and composition) harmoniously coordinated both within each image and throughout the series, enhancing the creative style?",
+            "0_point_standard": "Design elements appear inconsistent or clashing within and between images, weakening the cohesiveness of the creative style.",
+            "1_point_standard": "Design elements are effectively coordinated both within each image and across the series, enhancing the consistency of the creative style."
+        },
+        {
+            "question": "Do the images exhibit high aesthetic quality, with fine detail, balanced composition, and a polished, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have poor detail quality, diminishing visual impact.",
+            "1_point_standard": "The images are visually appealing, with high-quality details, balanced composition, and a polished, professional finish, enhancing the creative style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0002/images.txt b/dataset/style_group_generation_creative_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_creative_0002/instruction.txt b/dataset/style_group_generation_creative_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2f57f8703717a640ea488f7e00ac11fe70aa7b1
--- /dev/null
+++ b/dataset/style_group_generation_creative_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 creative-style images depicting the fusion of fruits and animals. The first image is a strawberry bird, with wings made of strawberry flesh; the second image shows a watermelon whale, with the whale's body made of watermelon rind and its tail sprouting green vines; the third image features an orange monkey, with its body covered in the texture of orange peel; the fourth image is an apple hedgehog, with small apples growing on its back instead of spines; the fifth image shows a banana parrot, with banana peels forming the bird's bright yellow feathers and wings spreading like peeled bananas. All images must follow the same consistent creative style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0002/meta.json b/dataset/style_group_generation_creative_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..00da7b4a7085cd91b538360b698408dcee75ee46
--- /dev/null
+++ b/dataset/style_group_generation_creative_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group creative images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0005",
+    "output_image_count": 5,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0003/eval.json b/dataset/style_group_generation_creative_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..2114f5234c40a8fd3d39e048f8e153cca09ecb91
--- /dev/null
+++ b/dataset/style_group_generation_creative_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the creative style consistently applied across all images, including elements such as color schemes, design approaches, and stylistic techniques?",
+            "0_point_standard": "The images show significant differences in style, disrupting the visual consistency of the series.",
+            "1_point_standard": "The creative style is consistently applied, with cohesive color, design, and stylistic techniques across all images."
+        },
+        {
+            "question": "Do the generated images match the content and themes specified in the text description, accurately depicting the intended concept or theme?",
+            "0_point_standard": "The images deviate from the theme or subject described in the text, lacking key details or creative interpretation.",
+            "1_point_standard": "The images accurately reflect the themes and content described in the text, capturing the specified concept or theme according to the creative style."
+        },
+        {
+            "question": "Are key elements (such as objects, characters, or patterns) visually consistent across the images, making them easily recognizable as part of the same creative series?",
+            "0_point_standard": "Key elements vary greatly between images, making it difficult to recognize them as part of the same series.",
+            "1_point_standard": "Key elements are visually consistent, maintaining recognizable features across images, ensuring continuity."
+        },
+        {
+            "question": "Does each image offer a unique interpretation of the creative theme while remaining faithful to the overall style?",
+            "0_point_standard": "The images lack diversity or appear repetitive, failing to explore unique aspects of the creative theme.",
+            "1_point_standard": "Each image provides a unique interpretation of the theme, adding diversity and interest within the specified creative style."
+        },
+        {
+            "question": "Are design elements (such as shapes, textures, and compositions) harmoniously coordinated within each image and throughout the series, enhancing the creative style?",
+            "0_point_standard": "Design elements appear inconsistent or conflicting within and between images, weakening the cohesion of the creative style.",
+            "1_point_standard": "Design elements are effectively coordinated within each image and throughout the series, enhancing the consistency of the creative style."
+        },
+        {
+            "question": "Do the images exhibit high aesthetic quality with fine details, balanced compositions, and a polished professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have poor detail quality, reducing visual impact.",
+            "1_point_standard": "The images are visually appealing, with high-quality details, balanced compositions, and a polished professional finish, enhancing the creative style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0003/images.txt b/dataset/style_group_generation_creative_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_creative_0003/instruction.txt b/dataset/style_group_generation_creative_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ae1e4aba66d4dd71e3173e0c1f0a69c1c2f2cd8
--- /dev/null
+++ b/dataset/style_group_generation_creative_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 6 creative-style images showcasing the fusion of plants and machinery. The first image is a mechanical flower, with metal petals and gears turning in the center; the second image depicts a tree combined with an engine, where the tree trunk reveals intricate mechanical parts inside; the third image shows a robot fused with a cactus, with spines growing out of its metallic surface; the fourth image features a bicycle made of tree branches, with wheels formed from woven twigs; the fifth image depicts a windmill combined with a sunflower, with the blades spinning like sunflower petals; the sixth image shows a mechanical insect, with circuit-like leaf veins on its wings. All images must maintain a consistent creative style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0003/meta.json b/dataset/style_group_generation_creative_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..6e221a17ab031ffecae374420d6b4a687182cf36
--- /dev/null
+++ b/dataset/style_group_generation_creative_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group creative images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0005",
+    "output_image_count": 6,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0004/eval.json b/dataset/style_group_generation_creative_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..63f064bdb0f34c32f75673b9fb95837bf2b68ca6
--- /dev/null
+++ b/dataset/style_group_generation_creative_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the creative style consistently applied across all images, including elements such as color schemes, design approaches, and stylistic techniques?",
+            "0_point_standard": "The images exhibit noticeable differences in style, disrupting the visual consistency of the series.",
+            "1_point_standard": "The creative style is applied consistently, with cohesive colors, design, and stylistic techniques across all images."
+        },
+        {
+            "question": "Do the generated images align with the content and themes specified in the text description, accurately depicting the intended concept or theme?",
+            "0_point_standard": "The images deviate from the themes or subjects described in the text, lacking key details or creative interpretations.",
+            "1_point_standard": "The images accurately reflect the themes and content described in the text, capturing the specified concept or theme according to the creative style."
+        },
+        {
+            "question": "Do key elements (such as objects, characters, or patterns) maintain visual consistency across images, making them easily recognizable as part of the same creative series?",
+            "0_point_standard": "Key elements vary significantly between images, making it difficult to recognize them as part of the same series.",
+            "1_point_standard": "Key elements maintain visual consistency, with recognizable features across images, ensuring continuity."
+        },
+        {
+            "question": "Does each image provide a unique interpretation of the creative theme while staying true to the overall style?",
+            "0_point_standard": "The images lack diversity or appear repetitive, failing to explore unique aspects of the creative theme.",
+            "1_point_standard": "Each image offers a unique interpretation of the theme, adding diversity and interest within the specified creative style."
+        },
+        {
+            "question": "Are design elements (such as shapes, textures, and composition) harmoniously consistent within each image and across the series, enhancing the creative style?",
+            "0_point_standard": "Design elements appear inconsistent or conflicting within and between images, undermining the cohesion of the creative style.",
+            "1_point_standard": "Design elements are effectively coordinated within each image and across the series, enhancing the consistency of the creative style."
+        },
+        {
+            "question": "Do the images exhibit a high aesthetic quality with well-executed details, balanced compositions, and a polished, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have poor detail quality, diminishing their visual impact.",
+            "1_point_standard": "The images are visually appealing, with high-quality details, balanced compositions, and a polished professional finish, enhancing the creative style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0004/images.txt b/dataset/style_group_generation_creative_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_creative_0004/instruction.txt b/dataset/style_group_generation_creative_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0eb8d24249fbbe7a3003bbd8edabf41f5cdc10f6
--- /dev/null
+++ b/dataset/style_group_generation_creative_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 3 creative-style images combining elements of the sky and ground animals. The first image shows a kite butterfly, with wings shaped like a soaring kite; the second image depicts a cloud sheep, with a fluffy body made of clouds and raindrops falling from above; the third image is a rainbow fish, with scales arranged in rainbow colors, and a tail resembling clouds in the sky. All images must follow the same consistent creative style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_creative_0004/meta.json b/dataset/style_group_generation_creative_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a71e475e50678f4e713c05d3cb9905a65bf6879
--- /dev/null
+++ b/dataset/style_group_generation_creative_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group creative images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0005",
+    "output_image_count": 3,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0002/eval.json b/dataset/style_group_generation_cthulhu_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f1626c61182686c556323a6d0187af297e819560
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the Cthulhu style consistently applied across all images, including elements such as dark tones, bizarre atmosphere, and surreal monster designs?",
+            "0_point_standard": "There are significant stylistic differences in the images, disrupting the visual and thematic consistency of the Cthulhu series.",
+            "1_point_standard": "The Cthulhu style is consistently applied, with all images cohesively using dark tones, bizarre atmosphere, and surreal design elements."
+        },
+        {
+            "question": "Do the generated images accurately express the themes and objects described in the text prompts, capturing the peculiar and mysterious qualities unique to the Cthulhu mythos?",
+            "0_point_standard": "The images deviate from the described themes or objects, failing to capture the expected dark or surreal qualities.",
+            "1_point_standard": "The images accurately reflect the described themes and objects, effectively conveying the bizarre and mysterious qualities of the Cthulhu mythos."
+        },
+        {
+            "question": "Are recurring elements, such as creatures, symbols, or landscapes, visually consistent across images, thereby creating a unified Cthulhu series?",
+            "0_point_standard": "Key elements differ greatly between images, disrupting continuity and making them feel disconnected from each other.",
+            "1_point_standard": "Key elements, such as creatures, symbols, or landscapes, are consistently represented across images, creating a cohesive visual narrative."
+        },
+        {
+            "question": "Does each image effectively evoke the typical horror and unease of the Cthulhu mythos through memorable details, shadowy shapes, and a sinister foreboding?",
+            "0_point_standard": "The images lack a cohesive atmosphere or fail to evoke the unsettling feeling typical of Cthulhu-style artworks.",
+            "1_point_standard": "Each image successfully creates a memorable atmosphere with unsettling details, shadowy shapes, and a sinister foreboding."
+        },
+        {
+            "question": "Are textures and details (such as tentacles, symbols, and decaying structures) rendered in high quality and consistent with the Cthulhu style?",
+            "0_point_standard": "Textures or details are poorly rendered or inconsistent, detracting from the typical dark, detailed appearance of Cthulhu artworks.",
+            "1_point_standard": "Textures and details are meticulously rendered, enhancing the dark and detailed appearance consistent with the Cthulhu theme."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, including balanced composition, refined details, and a cohesive, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have low detail quality, reducing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality details, and a refined, professional finish, enhancing the Cthulhu theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0002/images.txt b/dataset/style_group_generation_cthulhu_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_cthulhu_0002/instruction.txt b/dataset/style_group_generation_cthulhu_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb392bc7957719f9ccf87fb960b3139aa4a515bc
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate 5 Cthulhu-style images, depicting a cursed forest. The first image shows a vine-covered path, with thick mist in the air. The second image features a huge, twisted ancient tree, with faint faces emerging from its bark. The third image shows an abandoned stone altar, with ominous symbols carved into the surrounding stones. The fourth image depicts dolls hanging from the branches, each with a creepy smile on its face. The fifth image shows a dark lake, with strange glowing lights floating on the surface. Keep all images in the Cthulhu style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0002/meta.json b/dataset/style_group_generation_cthulhu_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2305616b5a2913b7149e888b4e7f9e55d464b846
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group cruthu style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0008",
+    "output_image_count": 5,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0003/eval.json b/dataset/style_group_generation_cthulhu_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d6e39bd2ec6e6f4ad6c03b16d4f5667b410f657
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the Cthulhu style consistently applied across all images, including elements such as dark tones, grotesque atmosphere, and surreal monster designs?",
+            "0_point_standard": "There are noticeable differences in style among the images, disrupting the visual and thematic consistency of the Cthulhu series.",
+            "1_point_standard": "The Cthulhu style is consistently applied, with all images cohesively using dark tones, grotesque atmosphere, and surreal design elements."
+        },
+        {
+            "question": "Do the generated images accurately express the themes and subjects described in the text prompt, capturing the unique grotesque and mysterious qualities of the Cthulhu mythos?",
+            "0_point_standard": "The images deviate from the described themes or subjects, failing to capture the intended dark or surreal qualities.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, effectively conveying the grotesque and mysterious qualities typical of the Cthulhu mythos."
+        },
+        {
+            "question": "Are recurring elements, such as creatures, symbols, or landscapes, visually consistent between images, thus creating a unified Cthulhu series?",
+            "0_point_standard": "Key elements vary greatly between images, disrupting continuity and making them feel disconnected.",
+            "1_point_standard": "Key elements, such as creatures, symbols, or landscapes, are consistently represented across the images, creating a cohesive visual narrative."
+        },
+        {
+            "question": "Does each image effectively evoke the typical horror and unease present in the Cthulhu mythos through memorable details, shadowy forms, and ominous foreboding?",
+            "0_point_standard": "The images lack a cohesive atmosphere or fail to evoke the unsettling feeling characteristic of Cthulhu-style artwork.",
+            "1_point_standard": "Each image successfully creates a memorable atmosphere with unsettling details, shadowy forms, and ominous foreboding."
+        },
+        {
+            "question": "Are the textures and details (such as tentacles, symbols, and decaying structures) rendered with high quality and aligned with the Cthulhu style?",
+            "0_point_standard": "The textures or details are poorly rendered or inconsistent, detracting from the dark, intricate appearance typical of Cthulhu artwork.",
+            "1_point_standard": "The textures and details are rendered meticulously, enhancing the dark and intricate appearance consistent with the Cthulhu theme."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, including balanced composition, refined details, and a cohesive, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have low detail quality, reducing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality details, and a refined, professional finish that enhances the Cthulhu theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0003/images.txt b/dataset/style_group_generation_cthulhu_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_cthulhu_0003/instruction.txt b/dataset/style_group_generation_cthulhu_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d559bf90efa3bc108c052c76c610e67d0c319d1
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate 3 Cthulhu-style images, depicting an ancient city submerged underwater. The first image shows a field of fallen stone pillars and statues, covered in seaweed and shells. The second image displays the ruins of a main hall, with faint ancient runes on the walls and strange glowing creatures floating in the water. The third image shows a towering stone spire, with its top torn off by some force, and shadows of unknown creatures surrounding it. Keep all images in the Cthulhu style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0003/meta.json b/dataset/style_group_generation_cthulhu_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..408c1f36d2927b935ffc40aa19346c754446ce0f
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group cruthu style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0008",
+    "output_image_count": 3,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0004/eval.json b/dataset/style_group_generation_cthulhu_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d9c69ce4760ec5e92667d4604027f30b04733342
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the Cthulhu style consistently applied across all images, including elements like dark tones, grotesque atmosphere, and surreal monster designs?",
+            "0_point_standard": "There are significant stylistic differences between the images, disrupting the visual and thematic consistency of the Cthulhu series.",
+            "1_point_standard": "The Cthulhu style is consistently applied, with all images cohesively utilizing dark tones, grotesque atmosphere, and surreal design elements."
+        },
+        {
+            "question": "Do the generated images accurately express the themes and objects described in the text prompts, capturing the peculiar and mysterious qualities unique to the Cthulhu mythos?",
+            "0_point_standard": "The images deviate from the described themes or objects and fail to capture the intended dark or surreal qualities.",
+            "1_point_standard": "The images accurately reflect the described themes and objects, effectively conveying the grotesque and mysterious qualities of the Cthulhu mythos."
+        },
+        {
+            "question": "Do recurring elements, such as creatures, symbols, or landscapes, maintain visual consistency across images, thereby creating a unified Cthulhu series?",
+            "0_point_standard": "Key elements differ greatly between images, breaking continuity and making them feel disconnected from each other.",
+            "1_point_standard": "Key elements such as creatures, symbols, or landscapes are consistently represented across images, creating a cohesive visual narrative."
+        },
+        {
+            "question": "Does each image effectively evoke the typical horror and unease of the Cthulhu mythos through memorable details, shadow shapes, and a sense of foreboding?",
+            "0_point_standard": "The images lack a cohesive atmosphere or fail to evoke the unsettling feeling characteristic of Cthulhu-style artworks.",
+            "1_point_standard": "Each image successfully creates a memorable atmosphere with unsettling details, shadow shapes, and a sense of foreboding."
+        },
+        {
+            "question": "Are the textures and details (such as tentacles, symbols, and decaying structures) rendered with high quality and in keeping with the Cthulhu style?",
+            "0_point_standard": "The textures or details are poorly rendered or inconsistent, detracting from the typical dark, intricate look of Cthulhu artworks.",
+            "1_point_standard": "The textures and details are meticulously rendered, enhancing the dark and intricate appearance consistent with the Cthulhu theme."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, including balanced composition, refined details, and a cohesive, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have low detail quality, reducing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality details, and a refined, professional finish, enhancing the Cthulhu theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0004/images.txt b/dataset/style_group_generation_cthulhu_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_cthulhu_0004/instruction.txt b/dataset/style_group_generation_cthulhu_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bcc7a9a4c92d420ab5ba9c9f3dde732743a37ae
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate 6 Cthulhu-style images, depicting an ancient underground cave sealed by a mysterious force. The first image shows the cave entrance, surrounded by strange symbols and bones. The second image displays the walls inside the cave, covered with dark murals depicting a massive creature. The third image shows an ancient well deep within the cave, with faint light flickering from within. The fourth image depicts a dark underground river, with unidentified glowing fragments floating on the surface. The fifth image shows a stone door at the end of the cave, carved with ancient spells. The sixth image reveals a dark abyss beyond the door, with massive shadows moving within. Keep all images in the Cthulhu style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_cthulhu_0004/meta.json b/dataset/style_group_generation_cthulhu_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..256ab26356f2a7f0ddb2421cccf61a0b416c9409
--- /dev/null
+++ b/dataset/style_group_generation_cthulhu_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group cruthu style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0008",
+    "output_image_count": 6,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0002/eval.json b/dataset/style_group_generation_cyberpunk_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc8e6a3d9253ced132d376a195390b7a16c35703
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the cyberpunk style consistently applied across all images, incorporating elements such as neon lights, high-tech urban landscapes, and futuristic dystopian aesthetics?",
+            "0_point_standard": "The images exhibit noticeable style differences that disrupt the visual coherence of the cyberpunk theme.",
+            "1_point_standard": "The cyberpunk style is consistently applied across all images, using coherent neon colors, urban elements, and futuristic aesthetics."
+        },
+        {
+            "question": "Do the generated images accurately depict the themes and subjects described in the text prompts, capturing the core high-tech, low-life vibe of cyberpunk?",
+            "0_point_standard": "The images deviate from the described themes or subjects, failing to capture the expected futuristic and gritty texture.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, effectively conveying the high-tech, dystopian atmosphere unique to cyberpunk."
+        },
+        {
+            "question": "Are elements that recur, such as neon lights, futuristic architecture, or technological gadgets, visually consistent across the images, creating a unified cyberpunk style series?",
+            "0_point_standard": "Key elements vary greatly between images, disrupting continuity and making them feel disjointed from each other.",
+            "1_point_standard": "Key elements, such as neon lights, skyscrapers, or technological gadgets, are consistently depicted, creating a coherent cyberpunk-themed series."
+        },
+        {
+            "question": "Does each image evoke a strong cyberpunk atmosphere through immersive lighting, shadows, and color schemes, conveying a futuristic and gritty vibe?",
+            "0_point_standard": "The images lack a coherent atmosphere or fail to evoke the dystopian feel typical of cyberpunk neon lighting.",
+            "1_point_standard": "Each image successfully creates an immersive cyberpunk atmosphere, with neon lights, shadow contrasts, and color schemes conveying a futuristic and gritty vibe."
+        },
+        {
+            "question": "Are the textures and details, such as metallic surfaces, neon lights, and holographic elements, rendered in high quality and aligned with the cyberpunk style?",
+            "0_point_standard": "The texture or detail quality is poor or inconsistent, detracting from the intricate, high-tech appearance typical of cyberpunk art.",
+            "1_point_standard": "The textures and details are meticulously rendered, enhancing the intricate, high-tech look consistent with the cyberpunk theme."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with balanced composition, refined details, and a coherent, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have low detail quality, reducing the visual impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality details, and a refined, professional finish that enhances the cyberpunk theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0002/images.txt b/dataset/style_group_generation_cyberpunk_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_cyberpunk_0002/instruction.txt b/dataset/style_group_generation_cyberpunk_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1df98de09f3fe41133d1ab3f2d98fd5ac29545fd
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 6 images depicting a future society controlled by megacorporations, where the sky is filled with holographic ads and flying vehicles. The first image shows a massive corporate headquarters towering into the sky, exuding a cold, imposing presence; the second image depicts a corporate executive standing in a large office with floor-to-ceiling windows overlooking the cityscape; the third image shows pedestrians on the street surrounded by giant advertising screens and surveillance cameras, with flying vehicles crisscrossing overhead; the fourth image features the interior of a corporate laboratory, where scientists are conducting body modification experiments in a room filled with cold light and metallic surfaces; the fifth image shows a corporate mercenary standing in a neon-lit alleyway; the sixth image depicts the city at night, with holographic ads intertwining in the sky and corporate logos everywhere. All images must maintain a consistent cyberpunk style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0002/meta.json b/dataset/style_group_generation_cyberpunk_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b6063f4ce2fc9b3957615e2ff23aa50a25afd8a6
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group cyberpunk style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0007",
+    "output_image_count": 6,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0003/eval.json b/dataset/style_group_generation_cyberpunk_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..181d681ddb412ececf4507093040b65f188df7cb
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the cyberpunk style consistently applied across all images, featuring elements such as neon lights, high-tech urban landscapes, and a futuristic dystopian aesthetic?",
+            "0_point_standard": "The images show significant stylistic differences that disrupt the visual consistency of the cyberpunk theme.",
+            "1_point_standard": "The cyberpunk style is consistently applied across all images, utilizing coherent neon colors, urban elements, and futuristic aesthetics."
+        },
+        {
+            "question": "Do the generated images accurately represent the themes and subjects described in the text prompt, capturing the core cyberpunk atmosphere of high-tech, low-life?",
+            "0_point_standard": "The images deviate from the described themes or subjects, failing to capture the intended futuristic and gritty feel.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, effectively conveying the distinctive high-tech, dystopian atmosphere of cyberpunk."
+        },
+        {
+            "question": "Are recurring elements like neon lights, futuristic architecture, or technological devices visually consistent across the images, creating a unified cyberpunk style series?",
+            "0_point_standard": "Key elements vary significantly between images, disrupting continuity and making them feel disconnected from each other.",
+            "1_point_standard": "Key elements, such as neon lights, skyscrapers, or technological devices, are consistently depicted, creating a coherent cyberpunk-themed series."
+        },
+        {
+            "question": "Does each image evoke a strong cyberpunk atmosphere through immersive lighting, shadows, and color schemes, conveying a futuristic and gritty ambiance?",
+            "0_point_standard": "The images lack a coherent atmosphere or fail to evoke the dystopian feel typical of cyberpunk with neon lighting.",
+            "1_point_standard": "Each image successfully creates an immersive cyberpunk atmosphere, with neon lighting, shadow contrasts, and color schemes conveying a futuristic and gritty ambiance."
+        },
+        {
+            "question": "Are textures and details like metallic surfaces, neon lights, and holographic elements rendered in high quality, adhering to the cyberpunk style?",
+            "0_point_standard": "Textures or details are poorly rendered or inconsistent, detracting from the refined, high-tech look typical of cyberpunk art.",
+            "1_point_standard": "Textures and details are meticulously rendered, enhancing the refined, high-tech appearance consistent with the cyberpunk theme."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with balanced composition, refined details, and a coherent, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or have low detail quality, diminishing the visual impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality details, and a refined, professional finish, enhancing the cyberpunk theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0003/images.txt b/dataset/style_group_generation_cyberpunk_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_cyberpunk_0003/instruction.txt b/dataset/style_group_generation_cyberpunk_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c123c1f06f4b6edb8916b53b750d8953da01a18a
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 4 images depicting an underground black market in the future, filled with mystery and danger. The first image shows a trader equipped with cyber implants standing in a dilapidated alleyway, with neon lights flickering in the background; the second image depicts an underground marketplace where sellers offer illegal cyber modifications and weapons; the third image shows a hidden black market lab where technicians are performing illicit body modifications for clients, with disorganized machinery and tools in the background; the fourth image features a cyber fugitive escaping from the black market, pursued by drones down neon-lit streets. All images must adhere to the same consistent cyberpunk style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0003/meta.json b/dataset/style_group_generation_cyberpunk_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..d78f3c1d05e6484cea2e947f53fd48a63edc7550
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group cyberpunk style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0007",
+    "output_image_count": 4,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0004/eval.json b/dataset/style_group_generation_cyberpunk_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c467c89467c1283d357a3351c952dfa5ac62e75
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the cyberpunk style consistently applied across all images, including elements like neon lights, high-tech cityscapes, and futuristic dystopian aesthetics?",
+            "0_point_standard": "The images display noticeable style differences, disrupting the visual consistency of the cyberpunk theme.",
+            "1_point_standard": "The cyberpunk style is consistently applied across all images, utilizing coherent neon colors, city elements, and futuristic aesthetics."
+        },
+        {
+            "question": "Do the generated images accurately represent the themes and subjects described in the text prompts, capturing the core high-tech, low-life atmosphere of cyberpunk?",
+            "0_point_standard": "The images deviate from the described theme or subject, failing to capture the expected futuristic and gritty texture.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, effectively conveying the high-tech, dystopian atmosphere characteristic of cyberpunk."
+        },
+        {
+            "question": "Are the recurring elements such as neon lights, futuristic architecture, or technological devices visually consistent across images, creating a unified cyberpunk style series?",
+            "0_point_standard": "Key elements vary significantly between images, breaking continuity and making them feel disconnected from each other.",
+            "1_point_standard": "Key elements such as neon lights, skyscrapers, or technological devices are consistently depicted, creating a coherent cyberpunk-themed series."
+        },
+        {
+            "question": "Does each image evoke a strong cyberpunk atmosphere with immersive lighting, shadows, and color schemes, conveying a futuristic and gritty vibe?",
+            "0_point_standard": "The images lack a coherent atmosphere or fail to evoke the dystopian feel typical of cyberpunk neon lighting.",
+            "1_point_standard": "Each image successfully creates an immersive cyberpunk atmosphere, with neon lighting, shadow contrasts, and color schemes conveying a futuristic and gritty vibe."
+        },
+        {
+            "question": "Are the textures and details like metallic surfaces, neon lights, and holographic elements rendered with high quality and in line with the cyberpunk style?",
+            "0_point_standard": "Textures or details are poorly rendered or inconsistent, diminishing the refined, high-tech appearance typical of cyberpunk art.",
+            "1_point_standard": "Textures and details are meticulously rendered, enhancing the refined, high-tech appearance consistent with the cyberpunk theme."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality with balanced composition, refined details, and a coherent, professional look?",
+            "0_point_standard": "The images lack aesthetic appeal or have low detail quality, reducing the visual impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality details, and a refined, professional finish that enhances the cyberpunk theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0004/images.txt b/dataset/style_group_generation_cyberpunk_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_cyberpunk_0004/instruction.txt b/dataset/style_group_generation_cyberpunk_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07a8e9f08336fa8355de0500896c1fb25376d3ae
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 images depicting a cyberpunk mech battle scene in the future. The first image shows a warrior wearing cyber armor standing in the center of a deserted city square, surrounded by the wreckage of battle; the second image depicts massive mechs engaged in combat amid the ruins of the city, with lasers flashing in the sky; the third image shows the warrior being repaired in an underground base filled with neon lights, where technicians are busy adjusting the armor; the fourth image depicts the warrior piloting their mech, flying between skyscrapers in a futuristic city; the fifth image shows the city after the battle, with collapsed skyscrapers, smoke rising in the air, and mech wreckage scattered on the ground. All images must follow the same consistent cyberpunk style.
\ No newline at end of file
diff --git a/dataset/style_group_generation_cyberpunk_0004/meta.json b/dataset/style_group_generation_cyberpunk_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b1b750d950f1293bcbd06511822d88a2916c3840
--- /dev/null
+++ b/dataset/style_group_generation_cyberpunk_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group cyberpunk style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0007",
+    "output_image_count": 5,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_european_and_american_comic_0001/eval.json b/dataset/style_group_generation_european_and_american_comic_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..350af805b2479bb00be5b8055163452b44be867c
--- /dev/null
+++ b/dataset/style_group_generation_european_and_american_comic_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Do all the images consistently apply the Western comic style, including elements like bold outlines, vibrant colors, and dynamic composition?",
+            "0_point_standard": "The visual style of the images varies significantly, disrupting the visual consistency of the Western comic theme.",
+            "1_point_standard": "All images consistently apply the Western comic style, with unified use of bold outlines, vibrant colors, and stylized features."
+        },
+        {
+            "question": "Do the generated images accurately represent the themes, scenes, or character types described in the text prompts, capturing the distinctive qualities typical of Western comics?",
+            "0_point_standard": "The images deviate from the described themes or character types, lacking key stylistic or thematic elements.",
+            "1_point_standard": "The images accurately reflect the themes and elements described in the text, capturing the expected traits of the Western comic style."
+        },
+        {
+            "question": "Are recurring visual elements such as costumes, props, or background details consistent across all images to maintain a unified aesthetic?",
+            "0_point_standard": "Visual elements like costumes or props vary greatly between different images, making them feel disconnected.",
+            "1_point_standard": "Visual elements are presented consistently, ensuring a cohesive aesthetic throughout the series."
+        },
+        {
+            "question": "Are the characters' expressions and poses expressive and dynamic, enhancing the typical vibrancy and visual appeal of Western comics?",
+            "0_point_standard": "Expressions or poses appear stiff or lack dynamism, reducing the images' visual impact.",
+            "1_point_standard": "Characters' facial expressions are rich, and poses are dynamic, enhancing the typical vibrancy of Western comics."
+        },
+        {
+            "question": "Are the line work, coloring, and shading consistent and well executed, with bold outlines, vibrant colors, and stylized shading appropriate for Western comics?",
+            "0_point_standard": "Line work, coloring, or shading is poorly executed or inconsistent, reducing the comic's visual appeal.",
+            "1_point_standard": "Line work, coloring, and shading are well executed and consistent across all images, enhancing the authentic comic appearance."
+        },
+        {
+            "question": "Do the images demonstrate a high level of aesthetic quality, with detailed refinement, balanced composition, and a cohesive and professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, reducing the visual impact of the series.",
+            "1_point_standard": "The images are visually appealing, with balanced composition, high-quality detailing, and a refined, professional finish that enhances the Western comic theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_european_and_american_comic_0001/images.txt b/dataset/style_group_generation_european_and_american_comic_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_european_and_american_comic_0001/instruction.txt b/dataset/style_group_generation_european_and_american_comic_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d19092cfc9e875ee2543b54a1df2a1b25644007c
--- /dev/null
+++ b/dataset/style_group_generation_european_and_american_comic_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 images depicting an intense superhero battle, with all images following the same consistent Western comic book style. The first image shows a red-caped superhero speeding between skyscrapers, with a city destroyed by battle in the background; the second image depicts the superhero facing off against a giant robot in the city center, the robot spewing flames while the hero raises his fists; the third image shows the superhero saving a bus about to fall off a bridge, with panicked passengers inside; the fourth image depicts the climactic moment where the hero unleashes all their power, punching through the enemy's armor; the fifth image shows the aftermath of the battle, with the hero standing atop the city's rubble, sunlight streaming between the broken buildings.
\ No newline at end of file
diff --git a/dataset/style_group_generation_european_and_american_comic_0001/meta.json b/dataset/style_group_generation_european_and_american_comic_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..fdadd5f068f53ac5fce3ed77700bb6663c831d03
--- /dev/null
+++ b/dataset/style_group_generation_european_and_american_comic_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group european and american comic style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0001",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0001/eval.json b/dataset/style_group_generation_oil_painting_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..cae54542d6ef2730dcdc9030e9eefd7770a8e39e
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the oil painting style applied consistently across all images, with characteristics like typical brushstrokes, texture, and depth?",
+            "0_point_standard": "There are noticeable differences in style across the images, disrupting the visual consistency of the oil painting theme.",
+            "1_point_standard": "The oil painting style is applied consistently, with cohesive brushstrokes, texture, and depth across all images."
+        },
+        {
+            "question": "Do the generated images accurately represent the themes, subjects, or compositions specified in the text prompts and capture the expected qualities of an oil painting?",
+            "0_point_standard": "The images deviate from the described themes or subjects, lacking key stylistic or thematic elements.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, capturing the expected qualities of an oil painting."
+        },
+        {
+            "question": "Is there consistency in the color palette and lighting across all images, creating the typical unified aesthetic of an oil painting?",
+            "0_point_standard": "There are significant differences in the color palette or lighting effects, leading to a discordant appearance between images.",
+            "1_point_standard": "The color palette and lighting are applied consistently, enhancing the cohesion and overall aesthetic of the series."
+        },
+        {
+            "question": "Do the textures, such as brushstrokes and layering, realistically mimic the feel of an oil painting, adding depth and dimension to each image?",
+            "0_point_standard": "The textures lack detail or fail to convincingly replicate the typical layering and shading of oil paintings.",
+            "1_point_standard": "The textures are realistically detailed, with brushstrokes and layering adding depth, enhancing the oil painting effect."
+        },
+        {
+            "question": "Does each image exhibit a harmonious composition, with balanced arrangement of elements in line with traditional oil painting aesthetics?",
+            "0_point_standard": "The composition appears unbalanced or inconsistent with the aesthetics of oil paintings.",
+            "1_point_standard": "Each image features a harmonious composition, with a balanced arrangement of elements in line with oil painting aesthetic principles."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, rich textures, and professional polish, enhancing the oil painting style?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, reducing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with rich textures, refined details, and professional polish, enhancing the oil painting theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0001/images.txt b/dataset/style_group_generation_oil_painting_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_oil_painting_0001/instruction.txt b/dataset/style_group_generation_oil_painting_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1dffefce6ddaf28459bfe0e12bcd4817d5cc8c1
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 oil painting-style images depicting a pastoral scene. The first image shows a flower-filled meadow with a few sheep grazing peacefully, with rolling hills in the background; the second image is of a small farmhouse nestled in a valley, with colorful curtains in the windows and a stream gently flowing nearby; the third image depicts a farmer working in the golden wheat fields, with a few white clouds drifting across the sky; the fourth image shows a group of children playing by the river, sunlight reflecting warmly off the water; the fifth image is set at sunset, with the sky painted in shades of orange and red, and the distant trees and mountains softly silhouetted.
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0001/meta.json b/dataset/style_group_generation_oil_painting_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd5bff7f0202c66c49ea6b0b54d0575bff7a3986
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group oil painting style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0003",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0002/eval.json b/dataset/style_group_generation_oil_painting_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0d4f6ad6e409838c2d2efcdd9788be005923a96
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the oil painting style consistently applied across all images, featuring typical characteristics such as brushstrokes, texture, and depth?",
+            "0_point_standard": "There are noticeable differences in style among the images, disrupting the visual consistency of the oil painting theme.",
+            "1_point_standard": "The oil painting style is consistently applied, with cohesive brushstrokes, texture, and depth across all images."
+        },
+        {
+            "question": "Do the generated images accurately represent the themes, subjects, or compositions specified in the text prompts and capture the expected qualities of an oil painting?",
+            "0_point_standard": "The images deviate from the described themes or subjects, lacking key stylistic or thematic elements.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, capturing the expected qualities of an oil painting."
+        },
+        {
+            "question": "Is the color palette and lighting consistent across all images, creating the typical unified aesthetic of an oil painting?",
+            "0_point_standard": "There are significant differences in the color palette or lighting effects, leading to a disjointed appearance among the images.",
+            "1_point_standard": "The color palette and lighting are consistently applied, enhancing the cohesion and overall aesthetic of the series."
+        },
+        {
+            "question": "Do the textures (such as brushstrokes and layering) convincingly mimic the texture of oil paintings, adding depth and dimension to each image?",
+            "0_point_standard": "The textures lack detail or fail to convincingly replicate the typical layering and shading of oil paintings.",
+            "1_point_standard": "The textures are detailed and realistic, with brushstrokes and layering adding depth and enhancing the oil painting effect."
+        },
+        {
+            "question": "Does each image display a harmonious composition, with balanced arrangement of elements, consistent with the aesthetics of traditional oil paintings?",
+            "0_point_standard": "The composition appears unbalanced or inconsistent with the aesthetics of oil paintings.",
+            "1_point_standard": "Each image has a harmonious composition, with a balanced arrangement of elements, adhering to the aesthetic principles of oil paintings."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, rich textures, and professional finishing that enhance the oil painting style?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, diminishing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with rich textures, refined details, and professional finishing that enhance the oil painting theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0002/images.txt b/dataset/style_group_generation_oil_painting_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_oil_painting_0002/instruction.txt b/dataset/style_group_generation_oil_painting_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4947b6e80153cd33aafee3611e3df2a1ddd643dd
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 4 oil painting-style images depicting a city's four seasons. The first image is of a spring street, with pink cherry blossoms blooming on both sides and people strolling in the warm sunshine; the second image shows a summer plaza, with water sparkling from a central fountain and the surrounding buildings framed by lush green trees; the third image depicts an autumn park, with golden leaves covering the ground and a few elderly people sitting on benches, enjoying the cool breeze; the fourth image shows a winter city square, covered in snow, with a Christmas tree adorned with lights in the center, and the distant streets transformed into a snowy wonderland.
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0002/meta.json b/dataset/style_group_generation_oil_painting_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..08ea3c6a0eb30bfecd3b7b8aababa1fcbbd26496
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group oil painting style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0003",
+    "output_image_count": 4,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0003/eval.json b/dataset/style_group_generation_oil_painting_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6b3f266cecdabe4f45ca0a7091fd0bd18342e4e
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the oil painting style consistently applied across all images, displaying typical characteristics such as brush strokes, texture, and depth?",
+            "0_point_standard": "There are noticeable differences in style among the images, disrupting the visual consistency of the oil painting theme.",
+            "1_point_standard": "The oil painting style is consistently applied, with cohesive brush strokes, texture, and depth across all images."
+        },
+        {
+            "question": "Do the generated images accurately express the theme, subject, or composition specified in the text prompt, capturing the intended qualities of an oil painting?",
+            "0_point_standard": "The images deviate from the described theme or subject, lacking key stylistic or thematic elements.",
+            "1_point_standard": "The images accurately reflect the described theme and subject, capturing the intended qualities of an oil painting."
+        },
+        {
+            "question": "Are the color palette and lighting effects consistent across all images, creating the typical unified aesthetic of oil paintings?",
+            "0_point_standard": "There are significant variations in color palette or lighting effects, resulting in a lack of harmony between images.",
+            "1_point_standard": "The color palette and lighting are consistently applied, enhancing the cohesion and overall aesthetic of the series."
+        },
+        {
+            "question": "Does the texture, such as brush strokes and layering, realistically mimic the tactile quality of oil paintings, adding depth and dimension to each image?",
+            "0_point_standard": "The texture lacks detail or fails to convincingly replicate the typical layering and shading of oil paintings.",
+            "1_point_standard": "The texture details are realistic, with brush strokes and layering adding depth and enhancing the oil painting effect."
+        },
+        {
+            "question": "Does each image exhibit a harmonious composition with balanced arrangement of elements, consistent with the aesthetics of traditional oil paintings?",
+            "0_point_standard": "The composition appears unbalanced or inconsistent with the aesthetics of oil paintings.",
+            "1_point_standard": "Each image has a harmonious composition with balanced arrangement of elements, in line with the aesthetic principles of oil paintings."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, rich textures, and professional finishing, enhancing the oil painting style?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, detracting from the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with rich textures, refined details, and professional finishing, enhancing the oil painting theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0003/images.txt b/dataset/style_group_generation_oil_painting_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_oil_painting_0003/instruction.txt b/dataset/style_group_generation_oil_painting_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee242ebcc40d99ba06ac7dcdb6624d41964f975c
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 6 oil painting-style images depicting a tranquil lakeside landscape. The first image shows a fisherman sitting on the grassy shore, the water as calm as a mirror, reflecting the clouds in the sky; the second image depicts the forest by the lake, with sunlight filtering through the leaves and casting dappled shadows on the water; the third image shows a small boat moored quietly by the shore, with distant mountains reflected in the lake; the fourth image is set at sunset, with the sky ablaze in red and orange hues, and birds flying home in the distance; the fifth image shows the lake at night, with stars twinkling in the water, as if the lake has become a mirror of the sky; the sixth image is set in the early morning, with mist gently covering the lake and distant mountains barely visible.
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0003/meta.json b/dataset/style_group_generation_oil_painting_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e0d1444106034a53e8a7e0f438f3333f11fabdab
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group oil painting style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0003",
+    "output_image_count": 6,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0004/eval.json b/dataset/style_group_generation_oil_painting_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..659dea37844b167a828c70ce551b24ce7b2daf8e
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the oil painting style consistently applied across all images, featuring typical characteristics such as brushstrokes, texture, and depth?",
+            "0_point_standard": "There are noticeable differences in style across the images, disrupting the visual consistency of the oil painting theme.",
+            "1_point_standard": "The oil painting style is consistently applied, with cohesive brushstrokes, texture, and depth across all images."
+        },
+        {
+            "question": "Do the generated images accurately express the themes, subjects, or compositions specified in the text prompts, capturing the intended qualities of oil paintings?",
+            "0_point_standard": "The images deviate from the described themes or subjects, lacking key stylistic or thematic elements.",
+            "1_point_standard": "The images accurately reflect the described themes and subjects, capturing the intended qualities of oil paintings."
+        },
+        {
+            "question": "Is the color palette and lighting effect consistent across all images, creating the typical unified aesthetic of oil paintings?",
+            "0_point_standard": "There are significant differences in the color palette or lighting effects, resulting in a disjointed appearance between the images.",
+            "1_point_standard": "The color palette and lighting are applied consistently, enhancing the cohesion and overall aesthetic of the series."
+        },
+        {
+            "question": "Do the textures (such as brushstrokes and layering) realistically mimic the feel of an oil painting, adding depth and dimension to each image?",
+            "0_point_standard": "The textures lack detail or fail to convincingly replicate the typical layering and shading of oil paintings.",
+            "1_point_standard": "The texture details are realistic, with brushstrokes and layering adding depth and enhancing the oil painting effect."
+        },
+        {
+            "question": "Does each image display a harmonious composition with balanced arrangement of elements in line with the aesthetics of traditional oil paintings?",
+            "0_point_standard": "The composition appears unbalanced or inconsistent with the aesthetics of oil paintings.",
+            "1_point_standard": "Each image has a harmonious composition with a balanced arrangement of elements, adhering to the aesthetic principles of oil paintings."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, rich textures, and professional finishing that enhance the oil painting style?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, detracting from the overall series effect.",
+            "1_point_standard": "The images are visually appealing, with rich textures, refined details, and professional finishing that enhance the oil painting theme."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0004/images.txt b/dataset/style_group_generation_oil_painting_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_oil_painting_0004/instruction.txt b/dataset/style_group_generation_oil_painting_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b51dc04a195ca393687c2485a51e0b0fa88c1544
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 4 oil painting-style images depicting a bustling harbor. The first image shows the pier at dawn, with fishing boats setting out to sea and a light mist hanging over the water; the second image is set at noon, with ships coming and going, and workers busily loading cargo on the docks; the third image shows the harbor at sunset, with the sky painted in gold, and the silhouettes of ships standing out against the setting sun; the fourth image is set at night, with the lighthouse casting its beam over the sea, and distant ships slowly entering the harbor under the night sky.
\ No newline at end of file
diff --git a/dataset/style_group_generation_oil_painting_0004/meta.json b/dataset/style_group_generation_oil_painting_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..029e47f5c98452ed77a47b762e2f97a80da662c0
--- /dev/null
+++ b/dataset/style_group_generation_oil_painting_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group oil painting style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0003",
+    "output_image_count": 4,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0001/eval.json b/dataset/style_group_generation_pixel_art_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..e3c90da92ca3aa852f6c25a5aa20788187944164
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the pixel art style consistently applied across all images, including pixel size, resolution, and the typical simplicity of pixel art?",
+            "0_point_standard": "There are significant differences in pixel size or style among the images, disrupting the visual consistency of the pixel art theme.",
+            "1_point_standard": "The pixel art style is consistently applied; all images have cohesive pixel size, resolution, and stylistic simplicity."
+        },
+        {
+            "question": "Do the generated images accurately express the theme, objects, or characters described in the text prompts within the constraints of pixel art?",
+            "0_point_standard": "The images deviate from the described theme or objects, lacking key elements or stylistic details.",
+            "1_point_standard": "The images accurately reflect the described theme and objects, capturing the intended features in the pixel art style."
+        },
+        {
+            "question": "Is the color palette and tone consistent across all images, using the limited and harmonious colors typical of pixel art?",
+            "0_point_standard": "There are significant differences in palette or tone, leading to a discordant appearance between the images.",
+            "1_point_standard": "The color palette and tone are consistently applied, featuring a cohesive and limited color scheme that aligns with pixel art aesthetics."
+        },
+        {
+            "question": "Are the objects or subjects in each image clearly presented with sufficient detail and recognizable shapes within the constraints of pixel art?",
+            "0_point_standard": "Objects or subjects are unclear or difficult to recognize, lacking the necessary detail for identification.",
+            "1_point_standard": "Each object or subject is clearly presented with recognizable shapes and sufficient detail to be clearly visible within pixel constraints."
+        },
+        {
+            "question": "Does each image effectively use shading techniques like dithering or contrast to create depth and dimension without overcomplicating the pixel art style?",
+            "0_point_standard": "Shading and highlights are poorly executed or overly complex, compromising the simplicity and clarity of pixel art.",
+            "1_point_standard": "Shading and highlights are effectively applied, with techniques like dithering and contrast adding depth while maintaining pixel art aesthetics."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, featuring refined details, consistent pixel precision, and a cohesive, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal, show inconsistencies, or appear unfinished, diminishing the overall effect of the series.",
+            "1_point_standard": "The images are visually appealing, with refined details, consistent pixel precision, and a professional finish, enhancing the pixel art style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0001/images.txt b/dataset/style_group_generation_pixel_art_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_pixel_art_0001/instruction.txt b/dataset/style_group_generation_pixel_art_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd398e54f50d035405f93ab11a39ada0cdd5da3a
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 pixel art-style images depicting a bustling pixel city. The first image shows a morning city street, with vendors setting up stalls and sunlight hitting pixelated buildings; the second image is set at noon in the city square, with people relaxing by a fountain and pixel skyscrapers towering in the background; the third image shows the entrance to the subway, with a pixelated train arriving as passengers wait on the platform; the fourth image depicts a shopping street filled with neon signs and crowds of shoppers; the fifth image shows the city at night, with windows glowing from skyscrapers and the streets bustling with life under the night sky.
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0001/meta.json b/dataset/style_group_generation_pixel_art_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e8bc63a8e0d3198f8ac17ec982ad3a67fe795d50
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group pixel art style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0004",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0003/eval.json b/dataset/style_group_generation_pixel_art_0003/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..eb55fd871ab992cccb4d4ac435c9b4136e967986
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0003/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the pixel art style consistently applied across all images, including pixel size, resolution, and the typical simplicity of pixel art?",
+            "0_point_standard": "There are significant differences in pixel size or style among the images, disrupting the visual consistency of the pixel art theme.",
+            "1_point_standard": "The pixel art style is consistently applied, with cohesive pixel size, resolution, and stylistic simplicity across all images."
+        },
+        {
+            "question": "Do the generated images accurately express the theme, objects, or characters described in the text prompt within the constraints of pixel art?",
+            "0_point_standard": "The images deviate from the described theme or object, lacking key elements or stylistic details.",
+            "1_point_standard": "The images accurately reflect the described theme and objects, capturing the intended features within the pixel art style."
+        },
+        {
+            "question": "Is the palette and tone consistent across all images, using the limited and harmonious colors typical of pixel art?",
+            "0_point_standard": "There is significant variation in the palette or tone, resulting in a discordant appearance across images.",
+            "1_point_standard": "The palette and tone are consistently applied, with a cohesive and limited color scheme that aligns with pixel art aesthetics."
+        },
+        {
+            "question": "Are the objects or entities in each image clearly presented, with sufficient detail and recognizable shapes within the constraints of pixel art?",
+            "0_point_standard": "Objects or entities are unclear or difficult to recognize, lacking the necessary details for identification.",
+            "1_point_standard": "Each object or entity is clearly presented, with recognizable shapes and sufficient details to be clearly visible within pixel constraints."
+        },
+        {
+            "question": "Does each image effectively use shading techniques such as dithering or contrast to create depth and dimension without overcomplicating the pixel art style?",
+            "0_point_standard": "Shading and highlights are poorly executed or overly complex, compromising the simplicity and clarity of pixel art.",
+            "1_point_standard": "Shading and highlights are effectively applied, using techniques like dithering and contrast to add depth while maintaining the pixel art aesthetic."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, consistent pixel accuracy, and a cohesive, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal, with inconsistencies or an unfinished look that detracts from the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with refined details, consistent pixel accuracy, and a professional level of completion that enhances the pixel art style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0003/images.txt b/dataset/style_group_generation_pixel_art_0003/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_pixel_art_0003/instruction.txt b/dataset/style_group_generation_pixel_art_0003/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c15b980461bde12f5307cdbef48a7b31de177b1
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0003/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 6 pixel art-style images depicting a futuristic cyberpunk city. The first image shows a character in cyber gear standing on a rooftop, looking out at the neon-lit city in the distance; the second image is set on a crowded street, with people in high-tech clothing and holographic billboards flashing overhead; the third image depicts the lower levels of the cyber city, filled with pipes and abandoned machinery; the fourth image shows a cyber bar, where customers sit in dim lighting while holographic screens display streams of data; the fifth image depicts a high-speed highway above the city, with flying vehicles darting through the skyline; the sixth image shows a nighttime cityscape, where neon lights and vehicle lights illuminate the dark sky.
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0003/meta.json b/dataset/style_group_generation_pixel_art_0003/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ae46c8e173a59f5920b512d2ffc8af4518d5b47a
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0003/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group pixel art style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0004",
+    "output_image_count": 6,
+    "case_id": "0003"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0004/eval.json b/dataset/style_group_generation_pixel_art_0004/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..f306d426aeb15c88309d746f1c385fd6de43e433
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0004/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the pixel art style consistently applied across all images, including pixel size, resolution, and the typical simplicity of pixel art?",
+            "0_point_standard": "There are significant differences in pixel size or style among images, disrupting the visual consistency of the pixel art theme.",
+            "1_point_standard": "The pixel art style is consistently applied, with cohesive pixel size, resolution, and simplicity of style across all images."
+        },
+        {
+            "question": "Do the generated images accurately convey the theme, objects, or characters described in the text prompt within the constraints of pixel art?",
+            "0_point_standard": "The images deviate from the described theme or objects, lacking key elements or stylistic details.",
+            "1_point_standard": "The images accurately reflect the described theme and objects, capturing the intended characteristics in the pixel art style."
+        },
+        {
+            "question": "Is the palette and tone consistent across all images, using a limited and harmonious color scheme typical of pixel art?",
+            "0_point_standard": "There is a significant difference in palette or tone, resulting in a discordant appearance between images.",
+            "1_point_standard": "The palette and tone are consistently applied, with a cohesive and limited color scheme in line with pixel art aesthetics."
+        },
+        {
+            "question": "Are the objects or figures in each image clearly presented, with sufficient detail and recognizable shapes within the constraints of pixel art?",
+            "0_point_standard": "Objects or figures are unclear or hard to recognize, lacking necessary details for identification.",
+            "1_point_standard": "Each object or figure is clearly presented, with recognizable shapes and sufficient details to be clearly visible within pixel constraints."
+        },
+        {
+            "question": "Does each image effectively use shading techniques, such as dithering or contrast, to create depth and dimension without overly complicating the pixel art style?",
+            "0_point_standard": "Shading and highlights are poorly executed or overly complex, compromising the clarity and simplicity of pixel art.",
+            "1_point_standard": "Shading and highlights are effectively applied, using techniques like dithering and contrast to add depth while maintaining the pixel art aesthetic."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, consistent pixel accuracy, and a cohesive, professional appearance?",
+            "0_point_standard": "The images lack aesthetic appeal, have inconsistencies, or appear unfinished, reducing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with refined details, consistent pixel accuracy, and a professional finish that enhances the pixel art style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0004/images.txt b/dataset/style_group_generation_pixel_art_0004/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_pixel_art_0004/instruction.txt b/dataset/style_group_generation_pixel_art_0004/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97d2d1ddc4d5010af714ba40a69b7c47fa77c849
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0004/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 3 pixel art-style images depicting a mountain and a temple at its peak. The first image shows a steep mountain path, with clouds swirling around the summit, and pixelated tree leaves swaying gently in the wind; the second image depicts a pixelated waterfall cascading down the mountainside, the water formed by strings of pixel blocks; the third image shows the temple at the mountain peak, its doors open to reveal a vast sky, with pixel birds circling overhead.
\ No newline at end of file
diff --git a/dataset/style_group_generation_pixel_art_0004/meta.json b/dataset/style_group_generation_pixel_art_0004/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f128b0237fc4bbc494d67bb2b3f600b139687218
--- /dev/null
+++ b/dataset/style_group_generation_pixel_art_0004/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group pixel art style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0004",
+    "output_image_count": 3,
+    "case_id": "0004"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_sketch_0001/eval.json b/dataset/style_group_generation_sketch_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..264fa350c6b4a18b8f5a79f7b6cfa9b2308aa40a
--- /dev/null
+++ b/dataset/style_group_generation_sketch_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the sketch style consistent across all images, including elements such as line quality, shading techniques, and pencil texture?",
+            "0_point_standard": "There are noticeable differences in style across the images, disrupting the visual consistency of the sketch theme.",
+            "1_point_standard": "The sketch style is used consistently, with cohesive line quality, shading, and pencil texture across all images."
+        },
+        {
+            "question": "Do the generated images accurately represent the theme, object, or subject described in the text prompt, capturing the intended qualities of sketch art?",
+            "0_point_standard": "The images deviate from the described theme or subject, lacking key elements or stylistic details.",
+            "1_point_standard": "The images accurately reflect the described theme and subject, capturing the intended features in sketch style."
+        },
+        {
+            "question": "Is the consistency in the thickness of lines and brushstrokes maintained across all images, creating a unified aesthetic that aligns with traditional sketch techniques?",
+            "0_point_standard": "There is significant variation in line and brushstroke thickness, leading to a disjointed appearance between images.",
+            "1_point_standard": "The consistency in line and brushstroke thickness creates a cohesive visual effect across all images in the series."
+        },
+        {
+            "question": "Are shading techniques such as hatching, cross-hatching, or smudging realistically and effectively applied to create depth and dimensionality?",
+            "0_point_standard": "The shading is unrealistic or inconsistent, lacking depth and failing to replicate sketch techniques.",
+            "1_point_standard": "Shading is effectively applied, with realistic techniques enhancing depth and sketch quality."
+        },
+        {
+            "question": "Does each image maintain the typical paper-like texture and tactile feel of pencil sketches?",
+            "0_point_standard": "The images lack a paper-like or textured feel, reducing the authenticity of the sketch style.",
+            "1_point_standard": "The images present a paper-like texture, enhancing the tactile feel and authenticity of the sketch style."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with refined details, balanced composition, and a professional look that enhances the sketch style?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, diminishing the visual impact of the series.",
+            "1_point_standard": "The images are visually appealing, with refined details, balanced composition, and a professional level of finish that enhances the sketch style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_sketch_0001/images.txt b/dataset/style_group_generation_sketch_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_sketch_0001/instruction.txt b/dataset/style_group_generation_sketch_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74fb1f97ab081ecbd581d542f66862e067863287
--- /dev/null
+++ b/dataset/style_group_generation_sketch_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 sketch-style images depicting a peaceful countryside scene. The first image shows a stone bridge by a river, with the water gently flowing and tall grass growing on the banks; the second image is of a cottage in the countryside, with smoke rising from the chimney and rolling fields and distant hills in the background; the third image depicts a villager working in the fields, pushing a wooden cart, with vast farmland stretching behind; the fourth image shows the sunset, with the sky bathed in soft colors, and the silhouette of trees against the sun; the fifth image is of a starry night sky, with the village below illuminated by the calm of the night.
\ No newline at end of file
diff --git a/dataset/style_group_generation_sketch_0001/meta.json b/dataset/style_group_generation_sketch_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..6693c78175f84fd1ca8f5b21a84f998edc6f35f3
--- /dev/null
+++ b/dataset/style_group_generation_sketch_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group sketch style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0009",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_woodcut_0001/eval.json b/dataset/style_group_generation_woodcut_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..75bf9e21f14846be4b44ef153db2f154517ef961
--- /dev/null
+++ b/dataset/style_group_generation_woodcut_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Are all images consistently applying the woodcut style, including features like bold lines, carved textures, and the high contrast typical of woodblock prints?",
+            "0_point_standard": "The images exhibit noticeable stylistic variations, disrupting the visual consistency of the woodcut theme.",
+            "1_point_standard": "The woodcut style is consistently applied, with all images uniformly using bold lines, textures, and contrast."
+        },
+        {
+            "question": "Do the generated images accurately represent the themes, objects, or characters described in the text prompts, capturing the intended characteristics of woodcut art?",
+            "0_point_standard": "The images deviate from the described themes or objects, lacking key stylistic or thematic details.",
+            "1_point_standard": "The images accurately reflect the described themes and objects, capturing the intended characteristics of woodcut art."
+        },
+        {
+            "question": "Is the quality of lines and engraving patterns (e.g., bold outlines, cross-hatching, or parallel lines) consistent across all images, creating the typical unified aesthetic of woodcut art?",
+            "0_point_standard": "There is significant variation in line quality and engraving patterns, leading to a lack of cohesive appearance among the images.",
+            "1_point_standard": "Line quality and engraving patterns are consistent, enhancing the overall look and feel of the woodcut series."
+        },
+        {
+            "question": "Do the images effectively use high contrast and negative space to create depth and clarity, as seen in traditional woodblock prints?",
+            "0_point_standard": "The use of contrast and negative space is inconsistent or ineffective, resulting in a lack of depth or clarity.",
+            "1_point_standard": "The use of contrast and negative space is effective, adding depth, clarity, and dynamic visual quality to the images."
+        },
+        {
+            "question": "Does each image maintain a carved appearance similar to the texture and tactile quality of woodcut prints?",
+            "0_point_standard": "The images lack the textured quality similar to woodcuts, undermining the authenticity of the style.",
+            "1_point_standard": "Each image possesses a textured and carved appearance, adding tactile quality and enhancing the authenticity of the woodcut style."
+        },
+        {
+            "question": "Do the images exhibit a high level of aesthetic quality, with fine detail, balanced composition, and a professional finish that enhances the woodcut style?",
+            "0_point_standard": "The images lack aesthetic appeal or appear unfinished, diminishing the overall impact of the series.",
+            "1_point_standard": "The images are visually appealing, with fine detail, balanced composition, and a professional finish, enhancing the woodcut style."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/style_group_generation_woodcut_0001/images.txt b/dataset/style_group_generation_woodcut_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/style_group_generation_woodcut_0001/instruction.txt b/dataset/style_group_generation_woodcut_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16030a93808b15c29e992253901156bafb9e972
--- /dev/null
+++ b/dataset/style_group_generation_woodcut_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a set of 5 woodcut-style images depicting an ancient mountain temple. The first image shows the temple entrance, with a large stone gate standing between towering mountains and bamboo groves in the background; the second image is of the temple courtyard, where monks are meditating, surrounded by trees and stone carvings that evoke a sense of serenity and reverence; the third image shows the temple's bell tower, where an ancient copper bell hangs inside a wooden tower, with low clouds in the distance; the fourth image depicts the temple's hallway, where wind chimes hang on both sides, softly ringing in the breeze; the fifth image shows the temple's prayer room, where monks are quietly praying by candlelight, with plain stone walls in the background.
\ No newline at end of file
diff --git a/dataset/style_group_generation_woodcut_0001/meta.json b/dataset/style_group_generation_woodcut_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b814d8a628070bff04e4835b2cb5b70df1d117c
--- /dev/null
+++ b/dataset/style_group_generation_woodcut_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "group woodcut style images generation",
+    "num_of_cases": 4,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0010",
+    "output_image_count": 5,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_insertion_0001/eval.json b/dataset/text_editing_text_insertion_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..39ec8caa3e47c577b198397b5f71824d08ebd743
--- /dev/null
+++ b/dataset/text_editing_text_insertion_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Is the inserted text positioned in the specified location as described in the text?",
+            "0_point_standard": "The insertion point of the text differs from the specified location in the description.",
+            "1_point_standard": "The text is precisely inserted at the location specified in the description."
+        },
+        {
+            "question": "Other than the specified text insertion, does the rest of the image remain unchanged and consistent with the original image?",
+            "0_point_standard": "There are noticeable changes or distortions in parts of the image that were not intended to be modified.",
+            "1_point_standard": "The rest of the image remains unchanged and consistent with the original."
+        },
+        {
+            "question": "Does the inserted text match the font, size, and style specified in the text description?",
+            "0_point_standard": "The inserted text does not match the specified font, size, or style.",
+            "1_point_standard": "The inserted text completely matches the font, size, and style specified in the description."
+        },
+        {
+            "question": "Is the content of the inserted text accurate and fully consistent with the given text description?",
+            "0_point_standard": "The text content is incorrect, with errors or omissions.",
+            "1_point_standard": "The text content is accurate and fully consistent with the given text description."
+        },
+        {
+            "question": "Does the inserted text blend seamlessly into the image, looking natural and maintaining the overall aesthetic of the image?",
+            "0_point_standard": "The text insertion looks fake or disjointed, disrupting the aesthetics of the image.",
+            "1_point_standard": "The text blends seamlessly, maintaining the overall aesthetic and visual harmony of the image."
+        },
+        {
+            "question": "Does the final image (including text insertion) exhibit high-quality editing without any visible flaws or errors?",
+            "0_point_standard": "The image shows visible editing errors or flaws, reducing its quality.",
+            "1_point_standard": "The image exhibits high-quality editing with no visible flaws or errors, maintaining a professional finish."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_insertion_0001/images.txt b/dataset/text_editing_text_insertion_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..488bd3c661a34b5a1268d12e4dd63baa9fd3cc94
--- /dev/null
+++ b/dataset/text_editing_text_insertion_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01BPGXNc1OmdLYuaV3i_!!6000000001748-0-tps-7718-5148.jpg
diff --git a/dataset/text_editing_text_insertion_0001/instruction.txt b/dataset/text_editing_text_insertion_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c09a6b8b878fb286cde34e8beb11f91ab070468
--- /dev/null
+++ b/dataset/text_editing_text_insertion_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate an image by adding the following English text, “THE JOURNEY BEGINS,” in a perspective style on the road, making it look like the text is painted on the road. Ensure that the text aligns with the perspective of the road and that all other elements in the image, including lighting, shadows, and reflections, remain unchanged.
\ No newline at end of file
diff --git a/dataset/text_editing_text_insertion_0001/meta.json b/dataset/text_editing_text_insertion_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..08284a83b73573bda961b94fe79fe95e52882dde
--- /dev/null
+++ b/dataset/text_editing_text_insertion_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "text insertion",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0084",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_modification_0001/eval.json b/dataset/text_editing_text_modification_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..958b3bc3ee261ace3819463cde0295ec45e08e74
--- /dev/null
+++ b/dataset/text_editing_text_modification_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified text in the image accurately reflect the changes specified in the text description?",
+            "0_point_standard": "The modified text does not match the changes specified in the text description or contains errors.",
+            "1_point_standard": "The modified text accurately reflects the changes specified in the text description without errors."
+        },
+        {
+            "question": "Is the modification task executed correctly, ensuring only the specified text is changed while the rest of the image remains unchanged?",
+            "0_point_standard": "Elements other than the specified text have been altered, affecting the integrity of the rest of the image.",
+            "1_point_standard": "Only the specified text has been changed, with no alterations to other elements of the image."
+        },
+        {
+            "question": "Does the modified text maintain the original style and format of the text in the image?",
+            "0_point_standard": "The style and format of the modified text do not match the original text in the image.",
+            "1_point_standard": "The modified text maintains the original style and format, ensuring consistency with the rest of the image."
+        },
+        {
+            "question": "Does the modified text integrate correctly into the image, ensuring seamless blending with the surrounding content?",
+            "0_point_standard": "The modified text appears uncoordinated or poorly blended, disrupting the visual coherence of the image.",
+            "1_point_standard": "The modified text seamlessly integrates, maintaining the visual coherence and flow of the image."
+        },
+        {
+            "question": "Is the quality of text editing in the image professional, ensuring the text is clear and readable?",
+            "0_point_standard": "The text editing is poorly executed, resulting in text that is unclear or difficult to read.",
+            "1_point_standard": "The text editing is professionally executed, ensuring the text is clear and readable."
+        },
+        {
+            "question": "Does the overall image with modified text retain its aesthetic appeal and meet professional visual quality standards?",
+            "0_point_standard": "The image lacks aesthetic appeal or does not meet professional visual quality standards.",
+            "1_point_standard": "The image retains its aesthetic appeal and meets professional visual quality standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_modification_0001/images.txt b/dataset/text_editing_text_modification_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..626e6777a3a1f4b9317a977224d8f2c9b826176b
--- /dev/null
+++ b/dataset/text_editing_text_modification_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i4/O1CN01lOifEh1FgNOU2DdDK_!!6000000000516-0-tps-3691-5536.jpg
diff --git a/dataset/text_editing_text_modification_0001/instruction.txt b/dataset/text_editing_text_modification_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83c3a6d2d9ea4bac83fbc7f148ea029ef6fc71ce
--- /dev/null
+++ b/dataset/text_editing_text_modification_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate an image where the text on the street signs reading “São João” and “Ipiranga” are replaced with “Rua Flores” and “Avenida Luz,” respectively. All other elements, including font style, color, material, and the background buildings and image details, should remain unchanged. The final result should make it look like the original street signs were always “Rua Flores” and “Avenida Luz.”
\ No newline at end of file
diff --git a/dataset/text_editing_text_modification_0001/meta.json b/dataset/text_editing_text_modification_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..58fc6e766bf45d02667993854059bd3abc002de9
--- /dev/null
+++ b/dataset/text_editing_text_modification_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "text modification",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0081",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_modification_0002/eval.json b/dataset/text_editing_text_modification_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..89fb0ee44f273291e7df211eb4fa2e463c6e7b07
--- /dev/null
+++ b/dataset/text_editing_text_modification_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified text in the image accurately reflect the changes specified in the text description?",
+            "0_point_standard": "The modified text does not match the changes specified in the text description or contains errors.",
+            "1_point_standard": "The modified text accurately reflects the changes specified in the text description, with no errors."
+        },
+        {
+            "question": "Was the modification task executed correctly, ensuring that only the specified text was changed while the rest of the image remained unchanged?",
+            "0_point_standard": "Elements other than the specified text were also changed, affecting the integrity of the rest of the image.",
+            "1_point_standard": "Only the specified text was changed, with no alterations to other elements of the image."
+        },
+        {
+            "question": "Does the modified text retain the original style and format of the text in the image?",
+            "0_point_standard": "The style and format of the modified text do not match the original text in the image.",
+            "1_point_standard": "The modified text retains the original style and format, ensuring consistency with the rest of the image."
+        },
+        {
+            "question": "Does the modified text blend into the image correctly, ensuring seamless integration with the surrounding content?",
+            "0_point_standard": "The modified text looks out of place or poorly integrated, disrupting the visual coherence of the image.",
+            "1_point_standard": "The modified text is seamlessly integrated, maintaining the visual coherence and flow of the image."
+        },
+        {
+            "question": "Is the quality of text editing in the image professional, with clear and readable text?",
+            "0_point_standard": "The text editing is poorly executed, resulting in unclear or hard-to-read text.",
+            "1_point_standard": "The text editing is professionally executed, resulting in clear and readable text."
+        },
+        {
+            "question": "Does the overall image with modified text retain aesthetic appeal and meet professional visual quality standards?",
+            "0_point_standard": "The image lacks aesthetic appeal or fails to meet professional visual quality standards.",
+            "1_point_standard": "The image retains aesthetic appeal and meets professional visual quality standards."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_modification_0002/images.txt b/dataset/text_editing_text_modification_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1df4f3cdf23c3972fc4f4a8b2522a2cf669ea72c
--- /dev/null
+++ b/dataset/text_editing_text_modification_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN01fuExC41qmSR5nr8aP_!!6000000005538-0-tps-6000-4000.jpg
diff --git a/dataset/text_editing_text_modification_0002/instruction.txt b/dataset/text_editing_text_modification_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea4b7419729156ce49a964ef0e411ee32234f572
--- /dev/null
+++ b/dataset/text_editing_text_modification_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate an image where the text on the walls reading “AVENIDA VIRGEN DEL MAR” and “CALLE EL TRAPECIO” are replaced with “CALLE SOL” and “PLAZA LUNA,” respectively. All other elements, including font style, color, material, and the wall texture and lighting in the image, should remain unchanged. The final result should make it look like the original wall text was always “CALLE SOL” and “PLAZA LUNA.”
\ No newline at end of file
diff --git a/dataset/text_editing_text_modification_0002/meta.json b/dataset/text_editing_text_modification_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed4eb4bfc76878350911dd20e85fb2c858522e1e
--- /dev/null
+++ b/dataset/text_editing_text_modification_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "text modification",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0081",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_removal_0002/eval.json b/dataset/text_editing_text_removal_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b34f875007709ee4bf44f0e47743009c19ff9b9
--- /dev/null
+++ b/dataset/text_editing_text_removal_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Was the text removal process effective in eliminating the specified text from the image?",
+            "0_point_standard": "The specified text is still partially or fully visible after the removal process.",
+            "1_point_standard": "The specified text has been completely removed from the image, with no residual or visible traces."
+        },
+        {
+            "question": "Does the area where the text was removed seamlessly blend with the rest of the image without noticeable artifacts or distortions?",
+            "0_point_standard": "The area where the text was removed shows noticeable artifacts, color mismatches, or distortions, disrupting the uniformity of the image.",
+            "1_point_standard": "The area looks natural and seamlessly blends with the surrounding parts of the image without any visible artifacts or distortions."
+        },
+        {
+            "question": "Did the text removal process preserve the original content, style, and features of the rest of the image?",
+            "0_point_standard": "There are noticeable changes or alterations to the original content, style, or features of the image apart from the text removal.",
+            "1_point_standard": "The rest of the image retains its original content, style, and features with no unexpected changes."
+        },
+        {
+            "question": "Does the text removal meet the specific requirements outlined in the text description, such as retaining certain elements or formats?",
+            "0_point_standard": "The text removal does not comply with the specific requirements or conditions outlined in the text description.",
+            "1_point_standard": "The text removal meets all the specified requirements and conditions described in the text input."
+        },
+        {
+            "question": "Is the quality of the image maintained, with no loss in resolution or visual quality due to the text removal process?",
+            "0_point_standard": "There is a degradation in the image's resolution or visual quality due to text removal, such as blurring or pixelation.",
+            "1_point_standard": "The image retains its original resolution and visual quality, with no quality loss due to text removal."
+        },
+        {
+            "question": "Does the edited image exhibit a high level of professional aesthetic quality, looking natural and pleasing to the eye?",
+            "0_point_standard": "The edited image lacks aesthetic quality, appearing unprofessional or unnatural.",
+            "1_point_standard": "The edited image exhibits a high level of professional aesthetic quality, looking natural and pleasing to the eye."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_removal_0002/images.txt b/dataset/text_editing_text_removal_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b77b86fb2684cc1ac1a5ab09859f73b5789049c
--- /dev/null
+++ b/dataset/text_editing_text_removal_0002/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i3/O1CN01Bung1h218ZijC8YmS_!!6000000006940-0-tps-6000-4000.jpg
diff --git a/dataset/text_editing_text_removal_0002/instruction.txt b/dataset/text_editing_text_removal_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91096c6e468bf371bb8e9cf1c44a33c803d06004
--- /dev/null
+++ b/dataset/text_editing_text_removal_0002/instruction.txt
@@ -0,0 +1 @@
+Please generate an image by removing all the text from the picture while keeping all other elements of the image unchanged. Ensure that the area where the text is removed blends naturally with the surrounding environment, maintaining the texture and lighting effects on the wall.
\ No newline at end of file
diff --git a/dataset/text_editing_text_removal_0002/meta.json b/dataset/text_editing_text_removal_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..70db18ee1ea62cff3e842e12ed98bfe3c1f000b1
--- /dev/null
+++ b/dataset/text_editing_text_removal_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "text removal",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0083",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_style_editing_0002/eval.json b/dataset/text_editing_text_style_editing_0002/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..813bff6b16ff6cedff2b45d2c5d044810bd521fa
--- /dev/null
+++ b/dataset/text_editing_text_style_editing_0002/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the modified text style in Image A match the style in Image B, accurately reflecting the features of the reference image?",
+            "0_point_standard": "The text style does not match the reference style in Image B, showing noticeable differences or missing style elements.",
+            "1_point_standard": "The text style in Image A closely matches the reference style in Image B, accurately capturing the desired style characteristics."
+        },
+        {
+            "question": "Were only the specified text styles modified, with all other elements in Image A remaining unchanged?",
+            "0_point_standard": "Unexpected changes are present in other parts of Image A, affecting areas that should have remained unchanged.",
+            "1_point_standard": "Only the specified text styles were modified, with all other elements in Image A completely unchanged."
+        },
+        {
+            "question": "Is the modified text in Image A logical within its original context, ensuring the new style integrates naturally with the surrounding content?",
+            "0_point_standard": "The modified text style appears out of place or inconsistent within the context of Image A, disrupting visual harmony.",
+            "1_point_standard": "The modified text style integrates naturally with the context of Image A, enhancing coherence and seamlessly fitting with the surrounding content."
+        },
+        {
+            "question": "Does the modified text completely retain the original content, changing only the style according to the reference?",
+            "0_point_standard": "The text content has changed or is unclear after the style modification, deviating from the original wording.",
+            "1_point_standard": "The text content is exactly the same as the original, with only the style modified according to the reference."
+        },
+        {
+            "question": "Do the lighting, shadow, and texture of the modified text match the style of Image B, contributing to a realistic and cohesive appearance?",
+            "0_point_standard": "There are inconsistencies in the lighting, shadow, or texture of the modified text, making it look unrealistic or disconnected from the reference style.",
+            "1_point_standard": "The lighting, shadow, and texture of the modified text match the style of Image B, creating a cohesive and realistic appearance."
+        },
+        {
+            "question": "Does the final image maintain a high aesthetic quality, with the modified text style enhancing the overall visual appeal?",
+            "0_point_standard": "The final image lacks aesthetic cohesion or appeal, with the modified text style reducing its visual quality.",
+            "1_point_standard": "The final image displays high aesthetic quality, with the modified text style seamlessly enhancing its overall visual appeal."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/text_editing_text_style_editing_0002/images.txt b/dataset/text_editing_text_style_editing_0002/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36081ca1c57d1101dc5d7634650f7dc039374314
--- /dev/null
+++ b/dataset/text_editing_text_style_editing_0002/images.txt
@@ -0,0 +1,2 @@
+https://img.alicdn.com/imgextra/i3/O1CN01Za0wi11gAlgYxz8ex_!!6000000004102-0-tps-988-1500.jpg
+https://img.alicdn.com/imgextra/i2/O1CN01heMGuk1hiUSkJhf0G_!!6000000004311-0-tps-564-564.jpg
diff --git a/dataset/text_editing_text_style_editing_0002/instruction.txt b/dataset/text_editing_text_style_editing_0002/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..924540b444416594bb8a8dc6d3abb7c57bd104df
--- /dev/null
+++ b/dataset/text_editing_text_style_editing_0002/instruction.txt
@@ -0,0 +1 @@
+Please apply the text style from the second image to the “JAWS” text in the first image. Keep the rest of the content in the first image as unchanged as possible, making minor adjustments if necessary. Ensure that the “JAWS” text remains unchanged in content but adopts the bubbly style from the second image. The overall composition and atmosphere of the image should remain as consistent as possible.
\ No newline at end of file
diff --git a/dataset/text_editing_text_style_editing_0002/meta.json b/dataset/text_editing_text_style_editing_0002/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5e6e31b340479417e822317cf01432f1d2fb6f0
--- /dev/null
+++ b/dataset/text_editing_text_style_editing_0002/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "text style modification",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": true,
+    "multi_image_output": false,
+    "uid": "0082",
+    "output_image_count": 1,
+    "case_id": "0002"
+}
\ No newline at end of file
diff --git a/dataset/three-view_generation_0001/eval.json b/dataset/three-view_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d2ab25836aff2343ba6b4d3cb9681c86d2220c7
--- /dev/null
+++ b/dataset/three-view_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the output include all three required views (front, side, and top) to provide a complete representation of the object?",
+            "0_point_standard": "One or more views are missing, resulting in an incomplete representation.",
+            "1_point_standard": "All three views are present, providing a complete front, side, and top representation of the object."
+        },
+        {
+            "question": "Is the object's structure consistent across all three views, accurately reflecting the same shape and features in each view?",
+            "0_point_standard": "The structure or features of the object differ between views, resulting in inconsistency and suggesting different interpretations of the object.",
+            "1_point_standard": "The object's structure is consistent across all views, clearly presenting the same shape and features."
+        },
+        {
+            "question": "Are the proportions of the object accurately represented in each view, maintaining correct dimensions and scale?",
+            "0_point_standard": "In one or more views, the proportions are distorted or inaccurate, misrepresenting the dimensions of the object.",
+            "1_point_standard": "Proportions are accurately represented in each view, reflecting the correct dimensions and scale of the object."
+        },
+        {
+            "question": "Are key features of the object (such as edges, corners, and design details) correctly aligned across the three views?",
+            "0_point_standard": "Key features are misaligned or inconsistent between views, disrupting the visual coherence of the object's design.",
+            "1_point_standard": "Key features are correctly aligned across all views, maintaining a coherent and accurate representation."
+        },
+        {
+            "question": "Is the level of detail in each view high, ensuring important features are clear and easily distinguishable?",
+            "0_point_standard": "In one or more views, details are unclear or poorly defined, making it difficult to distinguish important features.",
+            "1_point_standard": "Each view is detailed and clear, with important features easily distinguishable and well-represented."
+        },
+        {
+            "question": "Does the final set of three views exhibit a high level of aesthetics and professional quality, providing a well-crafted and cohesive presentation?",
+            "0_point_standard": "The set of views lacks aesthetic cohesiveness or professional quality, with visual inconsistencies diminishing its presentation.",
+            "1_point_standard": "The set of three views is aesthetically pleasing and professionally rendered, providing a cohesive and well-crafted presentation."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/three-view_generation_0001/images.txt b/dataset/three-view_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19da081804762da8e027d8c615dcb23155f62308
--- /dev/null
+++ b/dataset/three-view_generation_0001/images.txt
@@ -0,0 +1 @@
+https://img.alicdn.com/imgextra/i2/O1CN014KUh2C1cTlePscj0G_!!6000000003602-0-tps-424-300.jpg
diff --git a/dataset/three-view_generation_0001/instruction.txt b/dataset/three-view_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afad5a072f29628d77f6528c7b2334e0857ba76e
--- /dev/null
+++ b/dataset/three-view_generation_0001/instruction.txt
@@ -0,0 +1 @@
+Please generate a three-view illustration of the object, including the front, side and back, based on the image provided. Ensure that each view keeps the key features of the object consistent, such as shape, colour, texture and scale. The front view should clearly show the main facial or frontal features of the object, the side view shows the side profile and details of the object, and the back view should show the rear structure and appearance of the object. The generated three views should accurately represent all angles of the object and be consistent with the description.
\ No newline at end of file
diff --git a/dataset/three-view_generation_0001/meta.json b/dataset/three-view_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..b5b8242a2710b17ea6d843f63bcc86108a7c4d1e
--- /dev/null
+++ b/dataset/three-view_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "three-view generation",
+    "num_of_cases": 2,
+    "image_reference": true,
+    "multi_image_reference": false,
+    "multi_image_output": true,
+    "uid": "0043",
+    "output_image_count": 3,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/dataset/ticket_generation_0001/eval.json b/dataset/ticket_generation_0001/eval.json
new file mode 100644
index 0000000000000000000000000000000000000000..9a16c24d1b24fb0a32aa26a63e92768027f365d9
--- /dev/null
+++ b/dataset/ticket_generation_0001/eval.json
@@ -0,0 +1,34 @@
+{
+    "questions": [
+        {
+            "question": "Does the ticket design match the text description and include all key information (e.g., date, location, event name)?",
+            "0_point_standard": "The ticket design does not match the description, key information is missing or displayed incorrectly.",
+            "1_point_standard": "The ticket design matches the description and accurately displays all key information."
+        },
+        {
+            "question": "Is the text on the ticket clear and easy to read, and does the font style and layout meet design requirements?",
+            "0_point_standard": "The text is unclear, the font style or layout does not meet requirements, affecting overall readability.",
+            "1_point_standard": "The text is clear and easy to read, and the font style and layout meet design requirements."
+        },
+        {
+            "question": "Do the overall color scheme and visual style of the ticket align with the style requirements in the text description (e.g., modern, vintage)?",
+            "0_point_standard": "The color scheme and visual style do not match the text description and fail to convey the intended style.",
+            "1_point_standard": "The color scheme and visual style align with the text description and convey the intended style effect."
+        },
+        {
+            "question": "Does the model accurately implement the special design requirements in the text (e.g., watermark, security marks)?",
+            "0_point_standard": "The special design requirements in the text are not accurately implemented, or details are poorly executed.",
+            "1_point_standard": "The special design requirements in the text are accurately implemented with precise detail."
+        },
+        {
+            "question": "Is the ticket layout clear and reasonable, is the information organized orderly, and are the visual hierarchies distinct?",
+            "0_point_standard": "The ticket layout is chaotic, information is disorganized, and the visual effect is confusing.",
+            "1_point_standard": "The ticket layout is clear and reasonable, information is organized orderly, and visual hierarchies are distinct."
+        },
+        {
+            "question": "Does the overall aesthetic and design appeal of the ticket meet professional standards, and is it highly attractive?",
+            "0_point_standard": "The ticket lacks overall aesthetic appeal, the design quality is poor, and it is not attractive.",
+            "1_point_standard": "The ticket has excellent aesthetic appeal, high design quality, and strong visual attraction."
+        }
+    ]
+}
\ No newline at end of file
diff --git a/dataset/ticket_generation_0001/images.txt b/dataset/ticket_generation_0001/images.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/dataset/ticket_generation_0001/instruction.txt b/dataset/ticket_generation_0001/instruction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53fe4eacf898b99dd70def476c37f2ae349434c3
--- /dev/null
+++ b/dataset/ticket_generation_0001/instruction.txt
@@ -0,0 +1 @@
+This only image shows two Halloween-themed event tickets, one positioned above the other, each with a vintage horror design featuring dark and spooky elements. Both tickets have a similar layout, divided into a left section with event details and a right section with admission information, separated by a perforated line. The upper ticket has a black background, while the lower ticket uses a light gray background, creating contrast between the two. Each ticket features jagged edges on both sides, resembling classic tear-away admission tickets. On the left section of both tickets, the word “HALLOWEEN” appears prominently at the top in a distressed, gothic font. The letters are uppercase, with “HALLO” in white and “WEEN” in red, creating a chilling effect. Below it, the words “HORROR PARTY” are written in a smaller, red, horror-themed font. Each ticket is adorned with Halloween illustrations: the upper ticket displays two white ghosts floating on either side of the text, while the lower ticket has a red disembodied hand and a white eyeball with red veins, adding a creepy touch to the design. Underneath these images, further details about the event are listed, including “LIVE MUSIC BOO! 31.10 2020” and “COSTUME CONTEST WITH A SECRET PRIZE,” both written in mixed fonts and sizes to create visual interest. Additionally, the phrase “COME IN… IF YOU DARE!” is printed just above the bottom line, daring the reader to attend. At the very bottom, the location is specified as “AT THE HAUNTED MANSION, 666 ENCHANTED FOREST, 123456,” followed by the website “WWW.HORRORPARTY.COM.” The right section of both tickets includes admission details, displayed in a vertical format. The text “ADMIT ONE” is printed in large, bold, uppercase letters running vertically along the right edge. Just below, the date “31 OCT” and time “10:00 PM” appear in red, making them stand out. Further down, there is the label “TICKET NO.” followed by a sample ticket number “0123456789” in smaller white font. Overall, the ticket design combines gothic typography with Halloween-themed illustrations, creating a spooky, atmospheric invitation perfect for a horror-themed Halloween party.
\ No newline at end of file
diff --git a/dataset/ticket_generation_0001/meta.json b/dataset/ticket_generation_0001/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..373c27af49dbb9ccb7d721f7fc642e5e00343cea
--- /dev/null
+++ b/dataset/ticket_generation_0001/meta.json
@@ -0,0 +1,10 @@
+{
+    "task_name": "ticket generation",
+    "num_of_cases": 3,
+    "image_reference": false,
+    "multi_image_reference": false,
+    "multi_image_output": false,
+    "uid": "0031",
+    "output_image_count": 1,
+    "case_id": "0001"
+}
\ No newline at end of file
diff --git a/logs/vote_log/gr_web_image_generation_multi.log b/logs/vote_log/gr_web_image_generation_multi.log
index 49d8aef93c0a90ca68d6077e18c316e59680dc83..9e0df1759db253d13c0a4cff6f4239802d8fa373 100644
--- a/logs/vote_log/gr_web_image_generation_multi.log
+++ b/logs/vote_log/gr_web_image_generation_multi.log
@@ -448,3 +448,52 @@
 2024-12-27 16:00:57 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/blocks.py", line 2619, in launch
 2024-12-27 16:00:57 | ERROR | stderr |     raise ValueError(
 2024-12-27 16:00:57 | ERROR | stderr | ValueError: When localhost is not accessible, a shareable link must be created. Please set share=True or check your proxy settings to allow access to localhost.
+2024-12-27 16:27:30 | INFO | stdout | * Running on local URL:  http://127.0.0.1:7860
+2024-12-27 16:27:30 | INFO | stdout | 
+2024-12-27 16:27:30 | INFO | stdout | To create a public link, set `share=True` in `launch()`.
+2024-12-27 16:27:39 | ERROR | stderr | Traceback (most recent call last):
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/queueing.py", line 625, in process_events
+2024-12-27 16:27:39 | ERROR | stderr |     response = await route_utils.call_process_api(
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/route_utils.py", line 322, in call_process_api
+2024-12-27 16:27:39 | ERROR | stderr |     output = await app.get_blocks().process_api(
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/blocks.py", line 2047, in process_api
+2024-12-27 16:27:39 | ERROR | stderr |     result = await self.call_function(
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/blocks.py", line 1594, in call_function
+2024-12-27 16:27:39 | ERROR | stderr |     prediction = await anyio.to_thread.run_sync(  # type: ignore
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
+2024-12-27 16:27:39 | ERROR | stderr |     return await get_async_backend().run_sync_in_worker_thread(
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2505, in run_sync_in_worker_thread
+2024-12-27 16:27:39 | ERROR | stderr |     return await future
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 1005, in run
+2024-12-27 16:27:39 | ERROR | stderr |     result = context.run(func, *args)
+2024-12-27 16:27:39 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/utils.py", line 869, in wrapper
+2024-12-27 16:27:39 | ERROR | stderr |     response = f(*args, **kwargs)
+2024-12-27 16:27:39 | ERROR | stderr |   File "/Users/jasi/Documents/ali-vilab/huggingface/IDEA-Bench-Arena/serve/utils.py", line 168, in refresh_side_by_side_anony
+2024-12-27 16:27:39 | ERROR | stderr |     state0, state1, prompt, input_images, output_images_A, output_images_B = models.get_result_of_random_case_anony()
+2024-12-27 16:27:39 | ERROR | stderr |   File "/Users/jasi/Documents/ali-vilab/huggingface/IDEA-Bench-Arena/model/model_manager.py", line 167, in get_result_of_random_case_anony
+2024-12-27 16:27:39 | ERROR | stderr |     case_name, case_info = self.choose_case_randomly()
+2024-12-27 16:27:39 | ERROR | stderr |   File "/Users/jasi/Documents/ali-vilab/huggingface/IDEA-Bench-Arena/model/model_manager.py", line 121, in choose_case_randomly
+2024-12-27 16:27:39 | ERROR | stderr |     with open(case_meta_path, 'r') as file:
+2024-12-27 16:27:39 | ERROR | stderr | FileNotFoundError: [Errno 2] No such file or directory: 'dataset/object_editing_background_changing_0001/meta.json'
+2024-12-27 16:31:12 | INFO | stdout | Keyboard interruption in main thread... closing server.
+2024-12-27 16:31:12 | ERROR | stderr | Traceback (most recent call last):
+2024-12-27 16:31:12 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/blocks.py", line 2869, in block_thread
+2024-12-27 16:31:12 | ERROR | stderr |     time.sleep(0.1)
+2024-12-27 16:31:12 | ERROR | stderr | KeyboardInterrupt
+2024-12-27 16:31:12 | ERROR | stderr | 
+2024-12-27 16:31:12 | ERROR | stderr | During handling of the above exception, another exception occurred:
+2024-12-27 16:31:12 | ERROR | stderr | 
+2024-12-27 16:31:12 | ERROR | stderr | Traceback (most recent call last):
+2024-12-27 16:31:12 | ERROR | stderr |   File "/Users/jasi/Documents/ali-vilab/huggingface/IDEA-Bench-Arena/app.py", line 64, in <module>
+2024-12-27 16:31:12 | ERROR | stderr |     demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH, show_error=True)
+2024-12-27 16:31:12 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/blocks.py", line 2774, in launch
+2024-12-27 16:31:12 | ERROR | stderr |     self.block_thread()
+2024-12-27 16:31:12 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/blocks.py", line 2873, in block_thread
+2024-12-27 16:31:12 | ERROR | stderr |     self.server.close()
+2024-12-27 16:31:12 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/site-packages/gradio/http_server.py", line 69, in close
+2024-12-27 16:31:12 | ERROR | stderr |     self.thread.join(timeout=5)
+2024-12-27 16:31:12 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/threading.py", line 1100, in join
+2024-12-27 16:31:12 | ERROR | stderr |     self._wait_for_tstate_lock(timeout=max(timeout, 0))
+2024-12-27 16:31:12 | ERROR | stderr |   File "/opt/anaconda3/envs/tiger/lib/python3.10/threading.py", line 1116, in _wait_for_tstate_lock
+2024-12-27 16:31:12 | ERROR | stderr |     if lock.acquire(block, timeout):
+2024-12-27 16:31:12 | ERROR | stderr | KeyboardInterrupt
diff --git a/model/__pycache__/__init__.cpython-310.pyc b/model/__pycache__/__init__.cpython-310.pyc
index 336a913cee56c7ea958c77cde3010e63d2cb9d8a..893f37e0804b179c9bf4f85678c0dd921303f4dd 100644
Binary files a/model/__pycache__/__init__.cpython-310.pyc and b/model/__pycache__/__init__.cpython-310.pyc differ
diff --git a/model/__pycache__/model_manager.cpython-310.pyc b/model/__pycache__/model_manager.cpython-310.pyc
index 6426466748f6071be5d2ac3a87dc72ec59de6597..43e8a38c79fcde3b31a88cee4cc2aec386140a99 100644
Binary files a/model/__pycache__/model_manager.cpython-310.pyc and b/model/__pycache__/model_manager.cpython-310.pyc differ
diff --git a/model/__pycache__/model_registry.cpython-310.pyc b/model/__pycache__/model_registry.cpython-310.pyc
index cb1b2c492a092df46db703c1776d074646575e62..6f16a791be1a7ec439eddcd5399c3c8bfd9de156 100644
Binary files a/model/__pycache__/model_registry.cpython-310.pyc and b/model/__pycache__/model_registry.cpython-310.pyc differ
diff --git a/serve/__pycache__/__init__.cpython-310.pyc b/serve/__pycache__/__init__.cpython-310.pyc
index cba025a1ccbe0d2219bcf8a16deec5f89bad4a24..c6cc33e1e734dc443007733593d1e645cb89f0aa 100644
Binary files a/serve/__pycache__/__init__.cpython-310.pyc and b/serve/__pycache__/__init__.cpython-310.pyc differ
diff --git a/serve/__pycache__/constants.cpython-310.pyc b/serve/__pycache__/constants.cpython-310.pyc
index a9cddbe71eeda5e828b6c27e3eaee9a29c2e35b5..56f8f6e093347433c43f7a457f68b30c5c4e127f 100644
Binary files a/serve/__pycache__/constants.cpython-310.pyc and b/serve/__pycache__/constants.cpython-310.pyc differ
diff --git a/serve/__pycache__/gradio_web.cpython-310.pyc b/serve/__pycache__/gradio_web.cpython-310.pyc
index e8d559f376cced39646b0268e9396cbc6959e44e..828b476127085c9914ffcb7551f7d368dd22c83b 100644
Binary files a/serve/__pycache__/gradio_web.cpython-310.pyc and b/serve/__pycache__/gradio_web.cpython-310.pyc differ
diff --git a/serve/__pycache__/leaderboard.cpython-310.pyc b/serve/__pycache__/leaderboard.cpython-310.pyc
index 8710f010964cf773bfe58e3409173a081f2c881f..039ceaf173657a773bab71e8e4ad0d4c5f9ec656 100644
Binary files a/serve/__pycache__/leaderboard.cpython-310.pyc and b/serve/__pycache__/leaderboard.cpython-310.pyc differ
diff --git a/serve/__pycache__/log_utils.cpython-310.pyc b/serve/__pycache__/log_utils.cpython-310.pyc
index 0f2600b6c1f28fe812999eddf678abdee925d344..ff15792dc2ab9a4c4bde4e5871cba81e57c8a329 100644
Binary files a/serve/__pycache__/log_utils.cpython-310.pyc and b/serve/__pycache__/log_utils.cpython-310.pyc differ
diff --git a/serve/__pycache__/utils.cpython-310.pyc b/serve/__pycache__/utils.cpython-310.pyc
index b9b33c6a7a46cd42d7537c920b0e3c8d1801d7fd..cb41564ac0077ba434adecbae41e04f73eb3e459 100644
Binary files a/serve/__pycache__/utils.cpython-310.pyc and b/serve/__pycache__/utils.cpython-310.pyc differ
diff --git a/serve/__pycache__/vote_utils.cpython-310.pyc b/serve/__pycache__/vote_utils.cpython-310.pyc
index 334fca33a83af6ec5f6db639afff531a0e5c7ed2..ddd759a37e4120d1329801c0a9bc2f26c76ac093 100644
Binary files a/serve/__pycache__/vote_utils.cpython-310.pyc and b/serve/__pycache__/vote_utils.cpython-310.pyc differ