Skip to content

Image Quality Metrics

LPIPSMetric

Bases: Scorer

LPIPS Metric to compute the Learned Perceptual Image Patch Similarity (LPIPS) score between two images. LPIPS essentially computes the similarity between the activations of two image patches for some pre-defined network. This measure has been shown to match human perception well. A low LPIPS score means that image patches are perceptual similar.

Parameters:

Name Type Description Default
lpips_net_type str

The network type to use for computing LPIPS. One of "alex", "vgg", or "squeeze".

'alex'
image_height int

The height to which images will be resized before computing LPIPS.

512
image_width int

The width to which images will be resized before computing LPIPS.

512
Source code in hemm/metrics/image_quality/lpips.py
class LPIPSMetric(weave.Scorer):
    """LPIPS Metric to compute the Learned Perceptual Image Patch Similarity (LPIPS) score
    between two images. LPIPS essentially computes the similarity between the activations of
    two image patches for some pre-defined network. This measure has been shown to match
    human perception well. A low LPIPS score means that image patches are perceptual similar.

    Args:
        lpips_net_type (str): The network type to use for computing LPIPS. One of "alex", "vgg",
            or "squeeze".
        image_height (int): The height to which images will be resized before computing LPIPS.
        image_width (int): The width to which images will be resized before computing LPIPS.
    """

    lpips_net_type: Literal["alex", "vgg", "squeeze"]
    image_height: int
    image_width: int
    _lpips_metric: Callable

    def __init__(
        self,
        lpips_net_type: Literal["alex", "vgg", "squeeze"] = "alex",
        image_height: int = 512,
        image_width: int = 512,
    ) -> None:
        super().__init__(
            lpips_net_type=lpips_net_type,
            image_height=image_height,
            image_width=image_width,
        )
        self._lpips_metric = partial(
            learned_perceptual_image_patch_similarity, net_type=self.lpips_net_type
        )

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image, generated_pil_image: Image
    ) -> Dict[str, float]:
        ground_truth_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        ground_truth_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 2, 1)
            .float()
        )
        generated_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        generated_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 2, 1)
            .float()
        )
        ground_truth_image = (ground_truth_image / 127.5) - 1.0
        generated_image = (generated_image / 127.5) - 1.0
        return {
            "score": float(
                self._lpips_metric(generated_image, ground_truth_image).detach()
            ),
            "ground_truth_image": ground_truth_pil_image,
        }

    @weave.op()
    def score(
        self, prompt: str, ground_truth_image: Image.Image, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = prompt
        metric_output = self.compute_metric(ground_truth_image, model_output["image"])
        return {"score": metric_output["score"]}

PSNRMetric

Bases: Scorer

PSNR Metric to compute the Peak Signal-to-Noise Ratio (PSNR) between two images.

Parameters:

Name Type Description Default
psnr_base float

The base of the logarithm in the PSNR formula.

10.0
psnr_data_range Optional[Union[float, Tuple[float, float]]]

The data range of the input image (min, max). If None, the data range is determined from the image data type.

None
image_height int

The height to which images will be resized before computing PSNR.

512
image_width int

The width to which images will be resized before computing PSNR.

512
Source code in hemm/metrics/image_quality/psnr.py
class PSNRMetric(weave.Scorer):
    """PSNR Metric to compute the Peak Signal-to-Noise Ratio (PSNR) between two images.

    Args:
        psnr_base (float): The base of the logarithm in the PSNR formula.
        psnr_data_range (Optional[Union[float, Tuple[float, float]]]): The data range of the input
            image (min, max). If None, the data range is determined from the image data type.
        image_height (int): The height to which images will be resized before computing PSNR.
        image_width (int): The width to which images will be resized before computing PSNR.
    """

    psnr_base: float
    psnr_data_range: Optional[Union[float, Tuple[float, float]]]
    image_height: int
    image_width: int
    _psnr_metric: Callable

    def __init__(
        self,
        psnr_data_range: Optional[Union[float, Tuple[float, float]]] = None,
        psnr_base: float = 10.0,
        image_height: int = 512,
        image_width: int = 512,
    ) -> None:
        super().__init__(
            psnr_data_range=psnr_data_range,
            psnr_base=psnr_base,
            image_height=image_height,
            image_width=image_width,
        )
        self._psnr_metric = partial(
            peak_signal_noise_ratio,
            data_range=self.psnr_data_range,
            base=self.psnr_base,
        )

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image.Image, generated_pil_image: Image.Image
    ) -> Dict[str, float]:
        ground_truth_image = torch.from_numpy(
            np.expand_dims(
                np.array(
                    ground_truth_pil_image.resize((self.image_height, self.image_width))
                ),
                axis=0,
            ).astype(np.uint8)
        ).float()
        generated_image = torch.from_numpy(
            np.expand_dims(
                np.array(
                    generated_pil_image.resize((self.image_height, self.image_width))
                ),
                axis=0,
            ).astype(np.uint8)
        ).float()
        return {
            "score": float(
                self._psnr_metric(generated_image, ground_truth_image).detach()
            ),
            "ground_truth_image": ground_truth_pil_image,
        }

    @weave.op()
    def score(
        self, prompt: str, ground_truth_image: Image.Image, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = prompt
        metric_output = self.compute_metric(ground_truth_image, model_output["image"])
        return {"score": metric_output["score"]}

SSIMMetric

Bases: Scorer

SSIM Metric to compute the Structural Similarity Index Measure (SSIM) between two images.

Parameters:

Name Type Description Default
ssim_gaussian_kernel bool

Whether to use a Gaussian kernel for SSIM computation.

True
ssim_sigma float

The standard deviation of the Gaussian kernel.

1.5
ssim_kernel_size int

The size of the Gaussian kernel.

11
ssim_data_range Optional[Union[float, Tuple[float, float]]]

The data range of the input image (min, max). If None, the data range is determined from the image data type.

None
ssim_k1 float

The constant used to stabilize the SSIM numerator.

0.01
ssim_k2 float

The constant used to stabilize the SSIM denominator.

0.03
image_height int

The height to which images will be resized before computing SSIM.

512
image_width int

The width to which images will be resized before computing SSIM.

512
Source code in hemm/metrics/image_quality/ssim.py
class SSIMMetric(weave.Scorer):
    """SSIM Metric to compute the
    [Structural Similarity Index Measure (SSIM)](https://en.wikipedia.org/wiki/Structural_similarity)
    between two images.

    Args:
        ssim_gaussian_kernel (bool): Whether to use a Gaussian kernel for SSIM computation.
        ssim_sigma (float): The standard deviation of the Gaussian kernel.
        ssim_kernel_size (int): The size of the Gaussian kernel.
        ssim_data_range (Optional[Union[float, Tuple[float, float]]]): The data range of the input
            image (min, max). If None, the data range is determined from the image data type.
        ssim_k1 (float): The constant used to stabilize the SSIM numerator.
        ssim_k2 (float): The constant used to stabilize the SSIM denominator.
        image_height (int): The height to which images will be resized before computing SSIM.
        image_width (int): The width to which images will be resized before computing SSIM.
    """

    ssim_gaussian_kernel: bool
    ssim_sigma: float
    ssim_kernel_size: int
    ssim_data_range: Union[float, Tuple[float, float], None]
    ssim_k1: float
    ssim_k2: float
    image_height: int
    image_width: int
    _ssim_metric: Callable

    def __init__(
        self,
        ssim_gaussian_kernel: bool = True,
        ssim_sigma: float = 1.5,
        ssim_kernel_size: int = 11,
        ssim_data_range: Union[float, Tuple[float, float], None] = None,
        ssim_k1: float = 0.01,
        ssim_k2: float = 0.03,
        image_height: int = 512,
        image_width: int = 512,
    ) -> None:
        super().__init__(
            ssim_gaussian_kernel=ssim_gaussian_kernel,
            ssim_sigma=ssim_sigma,
            ssim_kernel_size=ssim_kernel_size,
            ssim_data_range=ssim_data_range,
            ssim_k1=ssim_k1,
            ssim_k2=ssim_k2,
            image_height=image_height,
            image_width=image_width,
        )
        self._ssim_metric = partial(
            structural_similarity_index_measure,
            gaussian_kernel=ssim_gaussian_kernel,
            sigma=ssim_sigma,
            kernel_size=ssim_kernel_size,
            data_range=ssim_data_range,
            k1=ssim_k1,
            k2=ssim_k2,
        )

    @weave.op()
    def compute_metric(
        self, ground_truth_pil_image: Image.Image, generated_pil_image: Image.Image
    ) -> Dict[str, float]:
        ground_truth_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        ground_truth_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 1, 2)
            .float()
        )
        generated_image = (
            torch.from_numpy(
                np.expand_dims(
                    np.array(
                        generated_pil_image.resize(
                            (self.image_height, self.image_width)
                        )
                    ),
                    axis=0,
                ).astype(np.uint8)
            )
            .permute(0, 3, 1, 2)
            .float()
        )
        return {
            "score": float(self._ssim_metric(generated_image, ground_truth_image)),
            "ground_truth_image": ground_truth_pil_image,
        }

    @weave.op()
    def score(
        self, prompt: str, ground_truth_image: Image.Image, model_output: Dict[str, Any]
    ) -> Union[float, Dict[str, float]]:
        _ = prompt
        metric_output = self.compute_metric(ground_truth_image, model_output["image"])
        return {"score": metric_output["score"]}