Skip to content

Commit eb074de

Browse files
feat(image-generation): Image generation using Gemini Flash (#13334)
* feat(image-generation): Image generation using Gemini Flash * fix(image-generation): update model name * chore: code refactor * fix: reducing tests to only 3.12 due to frequent CICD `RESOURCE_EXHAUSTED` error
1 parent 062a5af commit eb074de

File tree

5 files changed

+189
-10
lines changed

5 files changed

+189
-10
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content() -> str:
17+
# [START googlegenaisdk_imggen_mmflash_edit_img_with_txt_img]
18+
from google import genai
19+
from google.genai.types import GenerateContentConfig, Modality
20+
from PIL import Image
21+
from io import BytesIO
22+
23+
client = genai.Client()
24+
25+
# Using an image of Eiffel tower, with fireworks in the background.
26+
image = Image.open("example-image.png")
27+
28+
response = client.models.generate_content(
29+
model="gemini-2.0-flash-exp",
30+
contents=[image, "Edit this image to make it look like a cartoon."],
31+
config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]),
32+
)
33+
for part in response.candidates[0].content.parts:
34+
if part.text:
35+
print(part.text)
36+
elif part.inline_data:
37+
image = Image.open(BytesIO((part.inline_data.data)))
38+
image.save("bw-example-image.png")
39+
# Example response:
40+
# Here's the cartoon-style edit of the image:
41+
# Cartoon-style edit:
42+
# - Simplified the Eiffel Tower with bolder lines and slightly exaggerated proportions.
43+
# - Brightened and saturated the colors of the sky, fireworks, and foliage for a more vibrant, cartoonish look.
44+
# ....
45+
# [END googlegenaisdk_imggen_mmflash_edit_img_with_txt_img]
46+
return "bw-example-image.png"
47+
48+
49+
if __name__ == "__main__":
50+
generate_content()
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content() -> int:
17+
# [START googlegenaisdk_imggen_mmflash_txt_and_img_with_txt]
18+
from google import genai
19+
from google.genai.types import GenerateContentConfig, Modality
20+
from PIL import Image
21+
from io import BytesIO
22+
23+
client = genai.Client()
24+
25+
response = client.models.generate_content(
26+
model="gemini-2.0-flash-exp",
27+
contents=(
28+
"Generate an illustrated recipe for a paella."
29+
"Create images to go alongside the text as you generate the recipe"
30+
),
31+
config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]),
32+
)
33+
with open("paella-recipe.md", "w") as fp:
34+
for i, part in enumerate(response.candidates[0].content.parts):
35+
if part.text is not None:
36+
fp.write(part.text)
37+
elif part.inline_data is not None:
38+
image = Image.open(BytesIO((part.inline_data.data)))
39+
image.save(f"example-image-{i+1}.png")
40+
fp.write(f"![image](./example-image-{i+1}.png)")
41+
# Example response:
42+
# A markdown page for a Paella recipe(`paella-recipe.md`) has been generated.
43+
# It includes detailed steps and several images illustrating the cooking process.
44+
# [END googlegenaisdk_imggen_mmflash_txt_and_img_with_txt]
45+
return i
46+
47+
48+
if __name__ == "__main__":
49+
generate_content()
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content() -> str:
17+
# [START googlegenaisdk_imggen_mmflash_with_txt]
18+
from google import genai
19+
from google.genai.types import GenerateContentConfig, Modality
20+
from PIL import Image
21+
from io import BytesIO
22+
23+
client = genai.Client()
24+
25+
response = client.models.generate_content(
26+
model="gemini-2.0-flash-exp",
27+
contents=(
28+
"Generate an image of the Eiffel tower with fireworks in the background."
29+
),
30+
config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]),
31+
)
32+
for part in response.candidates[0].content.parts:
33+
if part.text:
34+
print(part.text)
35+
elif part.inline_data:
36+
image = Image.open(BytesIO((part.inline_data.data)))
37+
image.save("example-image.png")
38+
# Example response:
39+
# A beautiful photograph captures the iconic Eiffel Tower in Paris, France,
40+
# against a backdrop of a vibrant and dynamic fireworks display. The tower itself...
41+
# [END googlegenaisdk_imggen_mmflash_with_txt]
42+
return "example-image.png"
43+
44+
45+
if __name__ == "__main__":
46+
generate_content()

genai/image_generation/noxfile_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
TEST_CONFIG_OVERRIDE = {
2424
# You can opt out from the test for specific Python versions.
25-
"ignored_versions": ["2.7", "3.7", "3.8", "3.10", "3.11", "3.13"],
25+
"ignored_versions": ["2.7", "3.7", "3.8", "3.9", "3.10", "3.11", "3.13"],
2626
# Old samples are opted out of enforcing Python type hints
2727
# All new samples should feature them
2828
"enforce_type_hints": True,

genai/image_generation/test_image_generation.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,12 @@
2525
import pytest
2626

2727
import imggen_canny_ctrl_type_with_txt_img
28-
28+
import imggen_mmflash_edit_img_with_txt_img
29+
import imggen_mmflash_txt_and_img_with_txt
30+
import imggen_mmflash_with_txt
2931
import imggen_raw_reference_with_txt_img
30-
3132
import imggen_scribble_ctrl_type_with_txt_img
32-
3333
import imggen_style_reference_with_txt_img
34-
3534
import imggen_subj_refer_ctrl_refer_with_txt_imgs
3635

3736

@@ -57,25 +56,60 @@ def output_gcs_uri() -> str:
5756

5857

5958
def test_img_customization_subject(output_gcs_uri: str) -> None:
60-
response = imggen_subj_refer_ctrl_refer_with_txt_imgs.subject_customization(output_gcs_uri=output_gcs_uri)
59+
response = imggen_subj_refer_ctrl_refer_with_txt_imgs.subject_customization(
60+
output_gcs_uri=output_gcs_uri
61+
)
6162
assert response
6263

6364

6465
def test_img_customization_style(output_gcs_uri: str) -> None:
65-
response = imggen_style_reference_with_txt_img.style_customization(output_gcs_uri=output_gcs_uri)
66+
response = imggen_style_reference_with_txt_img.style_customization(
67+
output_gcs_uri=output_gcs_uri
68+
)
6669
assert response
6770

6871

6972
def test_img_customization_style_transfer(output_gcs_uri: str) -> None:
70-
response = imggen_raw_reference_with_txt_img.style_transfer_customization(output_gcs_uri=output_gcs_uri)
73+
response = imggen_raw_reference_with_txt_img.style_transfer_customization(
74+
output_gcs_uri=output_gcs_uri
75+
)
7176
assert response
7277

7378

7479
def test_img_customization_scribble(output_gcs_uri: str) -> None:
75-
response = imggen_scribble_ctrl_type_with_txt_img.scribble_customization(output_gcs_uri=output_gcs_uri)
80+
response = imggen_scribble_ctrl_type_with_txt_img.scribble_customization(
81+
output_gcs_uri=output_gcs_uri
82+
)
7683
assert response
7784

7885

7986
def test_img_customization_canny_edge(output_gcs_uri: str) -> None:
80-
response = imggen_canny_ctrl_type_with_txt_img.canny_edge_customization(output_gcs_uri=output_gcs_uri)
87+
response = imggen_canny_ctrl_type_with_txt_img.canny_edge_customization(
88+
output_gcs_uri=output_gcs_uri
89+
)
8190
assert response
91+
92+
93+
def test_imggen_mmflash_examples() -> None:
94+
# generate image
95+
fname = imggen_mmflash_with_txt.generate_content()
96+
assert os.path.isfile(fname)
97+
# edit generate image
98+
new_fname = imggen_mmflash_edit_img_with_txt_img.generate_content()
99+
assert os.path.isfile(new_fname)
100+
101+
# clean-up
102+
os.remove(fname)
103+
os.remove(new_fname)
104+
105+
106+
def test_imggen_mmflash_txt_and_img_with_txt() -> None:
107+
last_image_id = imggen_mmflash_txt_and_img_with_txt.generate_content()
108+
# clean-up
109+
for i in range(last_image_id + 1):
110+
img_name = f"example-image-{i+1}.png"
111+
if os.path.isfile(img_name):
112+
os.remove(img_name)
113+
fname = "paella-recipe.md"
114+
if os.path.isfile(fname):
115+
os.remove(fname)

0 commit comments

Comments
 (0)