InvokeAI/ldm/data/imagenet.py

import os, yaml, pickle, shutil, tarfile, glob
import cv2
import albumentations
import PIL
import numpy as np
import torchvision.transforms.functional as TF
from omegaconf import OmegaConf
from functools import partial
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, Subset

import taming.data.utils as tdu
from taming.data.imagenet import (
    str_to_indices,
    give_synsets_from_indices,
    download,
    retrieve,
)
from taming.data.imagenet import ImagePaths

from ldm.modules.image_degradation import (
    degradation_fn_bsr,
    degradation_fn_bsr_light,
)


def synset2idx(path_to_yaml='data/index_synset.yaml'):
    with open(path_to_yaml) as f:
        di2s = yaml.load(f)
    return dict((v, k) for k, v in di2s.items())


class ImageNetBase(Dataset):
    def __init__(self, config=None):
        self.config = config or OmegaConf.create()
        if not type(self.config) == dict:
            self.config = OmegaConf.to_container(self.config)
        self.keep_orig_class_label = self.config.get(
            'keep_orig_class_label', False
        )
        self.process_images = True  # if False we skip loading & processing images and self.data contains filepaths
        self._prepare()
        self._prepare_synset_to_human()
        self._prepare_idx_to_synset()
        self._prepare_human_to_integer_label()
        self._load()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, i):
        return self.data[i]

    def _prepare(self):
        raise NotImplementedError()

    def _filter_relpaths(self, relpaths):
        ignore = set(
            [
                'n06596364_9591.JPEG',
            ]
        )
        relpaths = [
            rpath for rpath in relpaths if not rpath.split('/')[-1] in ignore
        ]
        if 'sub_indices' in self.config:
            indices = str_to_indices(self.config['sub_indices'])
            synsets = give_synsets_from_indices(
                indices, path_to_yaml=self.idx2syn
            )  # returns a list of strings
            self.synset2idx = synset2idx(path_to_yaml=self.idx2syn)
            files = []
            for rpath in relpaths:
                syn = rpath.split('/')[0]
                if syn in synsets:
                    files.append(rpath)
            return files
        else:
            return relpaths

    def _prepare_synset_to_human(self):
        SIZE = 2655750
        URL = 'https://heibox.uni-heidelberg.de/f/9f28e956cd304264bb82/?dl=1'
        self.human_dict = os.path.join(self.root, 'synset_human.txt')
        if (
            not os.path.exists(self.human_dict)
            or not os.path.getsize(self.human_dict) == SIZE
        ):
            download(URL, self.human_dict)

    def _prepare_idx_to_synset(self):
        URL = 'https://heibox.uni-heidelberg.de/f/d835d5b6ceda4d3aa910/?dl=1'
        self.idx2syn = os.path.join(self.root, 'index_synset.yaml')
        if not os.path.exists(self.idx2syn):
            download(URL, self.idx2syn)

    def _prepare_human_to_integer_label(self):
        URL = 'https://heibox.uni-heidelberg.de/f/2362b797d5be43b883f6/?dl=1'
        self.human2integer = os.path.join(
            self.root, 'imagenet1000_clsidx_to_labels.txt'
        )
        if not os.path.exists(self.human2integer):
            download(URL, self.human2integer)
        with open(self.human2integer, 'r') as f:
            lines = f.read().splitlines()
            assert len(lines) == 1000
            self.human2integer_dict = dict()
            for line in lines:
                value, key = line.split(':')
                self.human2integer_dict[key] = int(value)

    def _load(self):
        with open(self.txt_filelist, 'r') as f:
            self.relpaths = f.read().splitlines()
            l1 = len(self.relpaths)
            self.relpaths = self._filter_relpaths(self.relpaths)
            print(
                'Removed {} files from filelist during filtering.'.format(
                    l1 - len(self.relpaths)
                )
            )

        self.synsets = [p.split('/')[0] for p in self.relpaths]
        self.abspaths = [os.path.join(self.datadir, p) for p in self.relpaths]

        unique_synsets = np.unique(self.synsets)
        class_dict = dict(
            (synset, i) for i, synset in enumerate(unique_synsets)
        )
        if not self.keep_orig_class_label:
            self.class_labels = [class_dict[s] for s in self.synsets]
        else:
            self.class_labels = [self.synset2idx[s] for s in self.synsets]

        with open(self.human_dict, 'r') as f:
            human_dict = f.read().splitlines()
            human_dict = dict(line.split(maxsplit=1) for line in human_dict)

        self.human_labels = [human_dict[s] for s in self.synsets]

        labels = {
            'relpath': np.array(self.relpaths),
            'synsets': np.array(self.synsets),
            'class_label': np.array(self.class_labels),
            'human_label': np.array(self.human_labels),
        }

        if self.process_images:
            self.size = retrieve(self.config, 'size', default=256)
            self.data = ImagePaths(
                self.abspaths,
                labels=labels,
                size=self.size,
                random_crop=self.random_crop,
            )
        else:
            self.data = self.abspaths


class ImageNetTrain(ImageNetBase):
    NAME = 'ILSVRC2012_train'
    URL = 'http://www.image-net.org/challenges/LSVRC/2012/'
    AT_HASH = 'a306397ccf9c2ead27155983c254227c0fd938e2'
    FILES = [
        'ILSVRC2012_img_train.tar',
    ]
    SIZES = [
        147897477120,
    ]

    def __init__(self, process_images=True, data_root=None, **kwargs):
        self.process_images = process_images
        self.data_root = data_root
        super().__init__(**kwargs)

    def _prepare(self):
        if self.data_root:
            self.root = os.path.join(self.data_root, self.NAME)
        else:
            cachedir = os.environ.get(
                'XDG_CACHE_HOME', os.path.expanduser('~/.cache')
            )
            self.root = os.path.join(cachedir, 'autoencoders/data', self.NAME)

        self.datadir = os.path.join(self.root, 'data')
        self.txt_filelist = os.path.join(self.root, 'filelist.txt')
        self.expected_length = 1281167
        self.random_crop = retrieve(
            self.config, 'ImageNetTrain/random_crop', default=True
        )
        if not tdu.is_prepared(self.root):
            # prep
            print('Preparing dataset {} in {}'.format(self.NAME, self.root))

            datadir = self.datadir
            if not os.path.exists(datadir):
                path = os.path.join(self.root, self.FILES[0])
                if (
                    not os.path.exists(path)
                    or not os.path.getsize(path) == self.SIZES[0]
                ):
                    import academictorrents as at

                    atpath = at.get(self.AT_HASH, datastore=self.root)
                    assert atpath == path

                print('Extracting {} to {}'.format(path, datadir))
                os.makedirs(datadir, exist_ok=True)
                with tarfile.open(path, 'r:') as tar:
                    tar.extractall(path=datadir)

                print('Extracting sub-tars.')
                subpaths = sorted(glob.glob(os.path.join(datadir, '*.tar')))
                for subpath in tqdm(subpaths):
                    subdir = subpath[: -len('.tar')]
                    os.makedirs(subdir, exist_ok=True)
                    with tarfile.open(subpath, 'r:') as tar:
                        tar.extractall(path=subdir)

            filelist = glob.glob(os.path.join(datadir, '**', '*.JPEG'))
            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
            filelist = sorted(filelist)
            filelist = '\n'.join(filelist) + '\n'
            with open(self.txt_filelist, 'w') as f:
                f.write(filelist)

            tdu.mark_prepared(self.root)


class ImageNetValidation(ImageNetBase):
    NAME = 'ILSVRC2012_validation'
    URL = 'http://www.image-net.org/challenges/LSVRC/2012/'
    AT_HASH = '5d6d0df7ed81efd49ca99ea4737e0ae5e3a5f2e5'
    VS_URL = 'https://heibox.uni-heidelberg.de/f/3e0f6e9c624e45f2bd73/?dl=1'
    FILES = [
        'ILSVRC2012_img_val.tar',
        'validation_synset.txt',
    ]
    SIZES = [
        6744924160,
        1950000,
    ]

    def __init__(self, process_images=True, data_root=None, **kwargs):
        self.data_root = data_root
        self.process_images = process_images
        super().__init__(**kwargs)

    def _prepare(self):
        if self.data_root:
            self.root = os.path.join(self.data_root, self.NAME)
        else:
            cachedir = os.environ.get(
                'XDG_CACHE_HOME', os.path.expanduser('~/.cache')
            )
            self.root = os.path.join(cachedir, 'autoencoders/data', self.NAME)
        self.datadir = os.path.join(self.root, 'data')
        self.txt_filelist = os.path.join(self.root, 'filelist.txt')
        self.expected_length = 50000
        self.random_crop = retrieve(
            self.config, 'ImageNetValidation/random_crop', default=False
        )
        if not tdu.is_prepared(self.root):
            # prep
            print('Preparing dataset {} in {}'.format(self.NAME, self.root))

            datadir = self.datadir
            if not os.path.exists(datadir):
                path = os.path.join(self.root, self.FILES[0])
                if (
                    not os.path.exists(path)
                    or not os.path.getsize(path) == self.SIZES[0]
                ):
                    import academictorrents as at

                    atpath = at.get(self.AT_HASH, datastore=self.root)
                    assert atpath == path

                print('Extracting {} to {}'.format(path, datadir))
                os.makedirs(datadir, exist_ok=True)
                with tarfile.open(path, 'r:') as tar:
                    tar.extractall(path=datadir)

                vspath = os.path.join(self.root, self.FILES[1])
                if (
                    not os.path.exists(vspath)
                    or not os.path.getsize(vspath) == self.SIZES[1]
                ):
                    download(self.VS_URL, vspath)

                with open(vspath, 'r') as f:
                    synset_dict = f.read().splitlines()
                    synset_dict = dict(line.split() for line in synset_dict)

                print('Reorganizing into synset folders')
                synsets = np.unique(list(synset_dict.values()))
                for s in synsets:
                    os.makedirs(os.path.join(datadir, s), exist_ok=True)
                for k, v in synset_dict.items():
                    src = os.path.join(datadir, k)
                    dst = os.path.join(datadir, v)
                    shutil.move(src, dst)

            filelist = glob.glob(os.path.join(datadir, '**', '*.JPEG'))
            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
            filelist = sorted(filelist)
            filelist = '\n'.join(filelist) + '\n'
            with open(self.txt_filelist, 'w') as f:
                f.write(filelist)

            tdu.mark_prepared(self.root)


class ImageNetSR(Dataset):
    def __init__(
        self,
        size=None,
        degradation=None,
        downscale_f=4,
        min_crop_f=0.5,
        max_crop_f=1.0,
        random_crop=True,
    ):
        """
        Imagenet Superresolution Dataloader
        Performs following ops in order:
        1.  crops a crop of size s from image either as random or center crop
        2.  resizes crop to size with cv2.area_interpolation
        3.  degrades resized crop with degradation_fn

        :param size: resizing to size after cropping
        :param degradation: degradation_fn, e.g. cv_bicubic or bsrgan_light
        :param downscale_f: Low Resolution Downsample factor
        :param min_crop_f: determines crop size s,
          where s = c * min_img_side_len with c sampled from interval (min_crop_f, max_crop_f)
        :param max_crop_f: ""
        :param data_root:
        :param random_crop:
        """
        self.base = self.get_base()
        assert size
        assert (size / downscale_f).is_integer()
        self.size = size
        self.LR_size = int(size / downscale_f)
        self.min_crop_f = min_crop_f
        self.max_crop_f = max_crop_f
        assert max_crop_f <= 1.0
        self.center_crop = not random_crop

        self.image_rescaler = albumentations.SmallestMaxSize(
            max_size=size, interpolation=cv2.INTER_AREA
        )

        self.pil_interpolation = (
            False  # gets reset later if incase interp_op is from pillow
        )

        if degradation == 'bsrgan':
            self.degradation_process = partial(
                degradation_fn_bsr, sf=downscale_f
            )

        elif degradation == 'bsrgan_light':
            self.degradation_process = partial(
                degradation_fn_bsr_light, sf=downscale_f
            )

        else:
            interpolation_fn = {
                'cv_nearest': cv2.INTER_NEAREST,
                'cv_bilinear': cv2.INTER_LINEAR,
                'cv_bicubic': cv2.INTER_CUBIC,
                'cv_area': cv2.INTER_AREA,
                'cv_lanczos': cv2.INTER_LANCZOS4,
                'pil_nearest': PIL.Image.NEAREST,
                'pil_bilinear': PIL.Image.BILINEAR,
                'pil_bicubic': PIL.Image.BICUBIC,
                'pil_box': PIL.Image.BOX,
                'pil_hamming': PIL.Image.HAMMING,
                'pil_lanczos': PIL.Image.LANCZOS,
            }[degradation]

            self.pil_interpolation = degradation.startswith('pil_')

            if self.pil_interpolation:
                self.degradation_process = partial(
                    TF.resize,
                    size=self.LR_size,
                    interpolation=interpolation_fn,
                )

            else:
                self.degradation_process = albumentations.SmallestMaxSize(
                    max_size=self.LR_size, interpolation=interpolation_fn
                )

    def __len__(self):
        return len(self.base)

    def __getitem__(self, i):
        example = self.base[i]
        image = Image.open(example['file_path_'])

        if not image.mode == 'RGB':
            image = image.convert('RGB')

        image = np.array(image).astype(np.uint8)

        min_side_len = min(image.shape[:2])
        crop_side_len = min_side_len * np.random.uniform(
            self.min_crop_f, self.max_crop_f, size=None
        )
        crop_side_len = int(crop_side_len)

        if self.center_crop:
            self.cropper = albumentations.CenterCrop(
                height=crop_side_len, width=crop_side_len
            )

        else:
            self.cropper = albumentations.RandomCrop(
                height=crop_side_len, width=crop_side_len
            )

        image = self.cropper(image=image)['image']
        image = self.image_rescaler(image=image)['image']

        if self.pil_interpolation:
            image_pil = PIL.Image.fromarray(image)
            LR_image = self.degradation_process(image_pil)
            LR_image = np.array(LR_image).astype(np.uint8)

        else:
            LR_image = self.degradation_process(image=image)['image']

        example['image'] = (image / 127.5 - 1.0).astype(np.float32)
        example['LR_image'] = (LR_image / 127.5 - 1.0).astype(np.float32)

        return example


class ImageNetSRTrain(ImageNetSR):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def get_base(self):
        with open('data/imagenet_train_hr_indices.p', 'rb') as f:
            indices = pickle.load(f)
        dset = ImageNetTrain(
            process_images=False,
        )
        return Subset(dset, indices)


class ImageNetSRValidation(ImageNetSR):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def get_base(self):
        with open('data/imagenet_val_hr_indices.p', 'rb') as f:
            indices = pickle.load(f)
        dset = ImageNetValidation(
            process_images=False,
        )
        return Subset(dset, indices)