Image Quality Metrics

`LPIPSMetric`

Bases: Scorer

LPIPS Metric to compute the Learned Perceptual Image Patch Similarity (LPIPS) score between two images. LPIPS essentially computes the similarity between the activations of two image patches for some pre-defined network. This measure has been shown to match human perception well. A low LPIPS score means that image patches are perceptual similar.

Parameters:

Name	Type	Description	Default
`lpips_net_type`	`str`	The network type to use for computing LPIPS. One of "alex", "vgg", or "squeeze".	`'alex'`
`image_height`	`int`	The height to which images will be resized before computing LPIPS.	`512`
`image_width`	`int`	The width to which images will be resized before computing LPIPS.	`512`

Source code in hemm/metrics/image_quality/lpips.py

class LPIPSMetric(weave.Scorer):
    """LPIPS Metric to compute the Learned Perceptual Image Patch Similarity (LPIPS) score
    between two images. LPIPS essentially computes the similarity between the activations of
    two image patches for some pre-defined network. This measure has been shown to match
    human perception well. A low LPIPS score means that image patches are perceptual similar.

    Args:
        lpips_net_type (str): The network type to use for computing LPIPS. One of "alex", "vgg",
            or "squeeze".
        image_height (int): The height to which images will be resized before computing LPIPS.
        image_width (int): The width to which images will be resized before computing LPIPS.
    """

    lpips_net_type: Literal["alex", "vgg", "squeeze"]
    image_height: int
    image_width: int
    _lpips_metric: Callable

    def __init__(
        self,
        lpips_net_type: Literal["alex", "vgg", "squeeze"] = "alex",
        image_height: int = 512,
        image_width: int = 512,
    ) -> None:
        super().__init__(
            lpips_net_type=lpips_net_type,
            image_height=image_height,
            image_width=image_width,
        )
        self._lpips_metric = partial(
            learned_perceptual_image_patch_similarity, net_type=self.lpips_net_type
        )

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image, generated_pil_image: Image
    ) -> Dict[str, float]:
        ground_truth_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        ground_truth_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 2, 1)
            .float()
        )
        generated_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        generated_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 2, 1)
            .float()
        )
        ground_truth_image = (ground_truth_image / 127.5) - 1.0
        generated_image = (generated_image / 127.5) - 1.0
        return {
            "score": float(
                self._lpips_metric(generated_image, ground_truth_image).detach()
            ),
            "ground_truth_image": ground_truth_pil_image,
        }

    @weave.op()
    def score(
        self, prompt: str, ground_truth_image: Image.Image, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = prompt
        metric_output = self.compute_metric(ground_truth_image, model_output["image"])
        return {"score": metric_output["score"]}

`PSNRMetric`

Bases: Scorer

PSNR Metric to compute the Peak Signal-to-Noise Ratio (PSNR) between two images.

Parameters:

Name	Type	Description	Default
`psnr_base`	`float`	The base of the logarithm in the PSNR formula.	`10.0`
`psnr_data_range`	`Optional[Union[float, Tuple[float, float]]]`	The data range of the input image (min, max). If None, the data range is determined from the image data type.	`None`
`image_height`	`int`	The height to which images will be resized before computing PSNR.	`512`
`image_width`	`int`	The width to which images will be resized before computing PSNR.	`512`

Source code in hemm/metrics/image_quality/psnr.py

class PSNRMetric(weave.Scorer):
    """PSNR Metric to compute the Peak Signal-to-Noise Ratio (PSNR) between two images.

    Args:
        psnr_base (float): The base of the logarithm in the PSNR formula.
        psnr_data_range (Optional[Union[float, Tuple[float, float]]]): The data range of the input
            image (min, max). If None, the data range is determined from the image data type.
        image_height (int): The height to which images will be resized before computing PSNR.
        image_width (int): The width to which images will be resized before computing PSNR.
    """

    psnr_base: float
    psnr_data_range: Optional[Union[float, Tuple[float, float]]]
    image_height: int
    image_width: int
    _psnr_metric: Callable

    def __init__(
        self,
        psnr_data_range: Optional[Union[float, Tuple[float, float]]] = None,
        psnr_base: float = 10.0,
        image_height: int = 512,
        image_width: int = 512,
    ) -> None:
        super().__init__(
            psnr_data_range=psnr_data_range,
            psnr_base=psnr_base,
            image_height=image_height,
            image_width=image_width,
        )
        self._psnr_metric = partial(
            peak_signal_noise_ratio,
            data_range=self.psnr_data_range,
            base=self.psnr_base,
        )

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image.Image, generated_pil_image: Image.Image
    ) -> Dict[str, float]:
        ground_truth_image = torch.from_numpy(
            np.expand_dims(
                np.array(
                    ground_truth_pil_image.resize((self.image_height, self.image_width))
                ),
                axis=0,
            ).astype(np.uint8)
        ).float()
        generated_image = torch.from_numpy(
            np.expand_dims(
                np.array(
                    generated_pil_image.resize((self.image_height, self.image_width))
                ),
                axis=0,
            ).astype(np.uint8)
        ).float()
        return {
            "score": float(
                self._psnr_metric(generated_image, ground_truth_image).detach()
            ),
            "ground_truth_image": ground_truth_pil_image,
        }

    @weave.op()
    def score(
        self, prompt: str, ground_truth_image: Image.Image, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = prompt
        metric_output = self.compute_metric(ground_truth_image, model_output["image"])
        return {"score": metric_output["score"]}

`SSIMMetric`

Bases: Scorer

SSIM Metric to compute the Structural Similarity Index Measure (SSIM) between two images.

Parameters:

Name	Type	Description	Default
`ssim_gaussian_kernel`	`bool`	Whether to use a Gaussian kernel for SSIM computation.	`True`
`ssim_sigma`	`float`	The standard deviation of the Gaussian kernel.	`1.5`
`ssim_kernel_size`	`int`	The size of the Gaussian kernel.	`11`
`ssim_data_range`	`Optional[Union[float, Tuple[float, float]]]`	The data range of the input image (min, max). If None, the data range is determined from the image data type.	`None`
`ssim_k1`	`float`	The constant used to stabilize the SSIM numerator.	`0.01`
`ssim_k2`	`float`	The constant used to stabilize the SSIM denominator.	`0.03`
`image_height`	`int`	The height to which images will be resized before computing SSIM.	`512`
`image_width`	`int`	The width to which images will be resized before computing SSIM.	`512`

Source code in hemm/metrics/image_quality/ssim.py

class SSIMMetric(weave.Scorer):
    """SSIM Metric to compute the
    [Structural Similarity Index Measure (SSIM)](https://en.wikipedia.org/wiki/Structural_similarity)
    between two images.

    Args:
        ssim_gaussian_kernel (bool): Whether to use a Gaussian kernel for SSIM computation.
        ssim_sigma (float): The standard deviation of the Gaussian kernel.
        ssim_kernel_size (int): The size of the Gaussian kernel.
        ssim_data_range (Optional[Union[float, Tuple[float, float]]]): The data range of the input
            image (min, max). If None, the data range is determined from the image data type.
        ssim_k1 (float): The constant used to stabilize the SSIM numerator.
        ssim_k2 (float): The constant used to stabilize the SSIM denominator.
        image_height (int): The height to which images will be resized before computing SSIM.
        image_width (int): The width to which images will be resized before computing SSIM.
    """

    ssim_gaussian_kernel: bool
    ssim_sigma: float
    ssim_kernel_size: int
    ssim_data_range: Union[float, Tuple[float, float], None]
    ssim_k1: float
    ssim_k2: float
    image_height: int
    image_width: int
    _ssim_metric: Callable

    def __init__(
        self,
        ssim_gaussian_kernel: bool = True,
        ssim_sigma: float = 1.5,
        ssim_kernel_size: int = 11,
        ssim_data_range: Union[float, Tuple[float, float], None] = None,
        ssim_k1: float = 0.01,
        ssim_k2: float = 0.03,
        image_height: int = 512,
        image_width: int = 512,
    ) -> None:
        super().__init__(
            ssim_gaussian_kernel=ssim_gaussian_kernel,
            ssim_sigma=ssim_sigma,
            ssim_kernel_size=ssim_kernel_size,
            ssim_data_range=ssim_data_range,
            ssim_k1=ssim_k1,
            ssim_k2=ssim_k2,
            image_height=image_height,
            image_width=image_width,
        )
        self._ssim_metric = partial(
            structural_similarity_index_measure,
            gaussian_kernel=ssim_gaussian_kernel,
            sigma=ssim_sigma,
            kernel_size=ssim_kernel_size,
            data_range=ssim_data_range,
            k1=ssim_k1,
            k2=ssim_k2,
        )

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image.Image, generated_pil_image: Image.Image
    ) -> Dict[str, float]:
        ground_truth_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        ground_truth_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 1, 2)
            .float()
        )
        generated_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        generated_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 1, 2)
            .float()
        )
        return {
            "score": float(self._ssim_metric(generated_image, ground_truth_image)),
            "ground_truth_image": ground_truth_pil_image,
        }

    @weave.op()
    def score(
        self, prompt: str, ground_truth_image: Image.Image, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = prompt
        metric_output = self.compute_metric(ground_truth_image, model_output["image"])
        return {"score": metric_output["score"]}