|
| 1 | +""" |
| 2 | + Copyright (c) 2022, salesforce.com, inc. |
| 3 | + All rights reserved. |
| 4 | + SPDX-License-Identifier: BSD-3-Clause |
| 5 | + For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause |
| 6 | +""" |
| 7 | + |
| 8 | +import os |
| 9 | + |
| 10 | +from PIL import Image |
| 11 | +from torch.utils.data import Dataset |
| 12 | +from torch.utils.data.dataloader import default_collate |
| 13 | + |
| 14 | + |
| 15 | +class SubjectDrivenTextToImageDataset(Dataset): |
| 16 | + def __init__( |
| 17 | + self, |
| 18 | + image_dir, |
| 19 | + subject_text, |
| 20 | + inp_image_processor, |
| 21 | + tgt_image_processor, |
| 22 | + txt_processor, |
| 23 | + repetition=100000, |
| 24 | + ): |
| 25 | + self.subject = txt_processor(subject_text.lower()) |
| 26 | + self.image_dir = image_dir |
| 27 | + |
| 28 | + self.inp_image_transform = inp_image_processor |
| 29 | + self.tgt_image_transform = tgt_image_processor |
| 30 | + |
| 31 | + self.text_processor = txt_processor |
| 32 | + |
| 33 | + image_paths = os.listdir(image_dir) |
| 34 | + # image paths are jpg png webp |
| 35 | + image_paths = [ |
| 36 | + os.path.join(image_dir, imp) |
| 37 | + for imp in image_paths |
| 38 | + if os.path.splitext(imp)[1][1:] |
| 39 | + in ["jpg", "png", "webp", "jpeg", "JPG", "PNG", "WEBP", "JPEG"] |
| 40 | + ] |
| 41 | + # make absolute path |
| 42 | + self.image_paths = [os.path.abspath(imp) for imp in image_paths] |
| 43 | + self.repetition = repetition |
| 44 | + |
| 45 | + def __len__(self): |
| 46 | + return len(self.image_paths) * self.repetition |
| 47 | + |
| 48 | + @property |
| 49 | + def len_without_repeat(self): |
| 50 | + return len(self.image_paths) |
| 51 | + |
| 52 | + def collater(self, samples): |
| 53 | + return default_collate(samples) |
| 54 | + |
| 55 | + def __getitem__(self, index): |
| 56 | + image_path = self.image_paths[index % len(self.image_paths)] |
| 57 | + image = Image.open(image_path).convert("RGB") |
| 58 | + |
| 59 | + # For fine-tuning, we use the same caption for all images |
| 60 | + # maybe worth trying different captions for different images |
| 61 | + caption = f"a {self.subject}" |
| 62 | + caption = self.text_processor(caption) |
| 63 | + |
| 64 | + inp_image = self.inp_image_transform(image) |
| 65 | + tgt_image = self.tgt_image_transform(image) |
| 66 | + |
| 67 | + return { |
| 68 | + "inp_image": inp_image, |
| 69 | + "tgt_image": tgt_image, |
| 70 | + "caption": caption, |
| 71 | + "subject_text": self.subject, |
| 72 | + } |
0 commit comments