Open in Colab

Image anti-alias with local features#

In this example we will show the benefits of using anti-aliased patch extraction with kornia.

%%capture
!pip install kornia seaborn
%%capture
!wget https://github.com/kornia/data/raw/main/drslump.jpg

First, lets load some image.

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import PIL
import torch
import seaborn as sns
import kornia.feature as KF
import kornia as K
import cv2
/home/docs/checkouts/readthedocs.org/user_builds/kornia-tutorials/envs/latest/lib/python3.7/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
img_original = cv2.cvtColor(cv2.imread('drslump.jpg'), cv2.COLOR_BGR2RGB)
plt.figure()
plt.imshow(img_original)
H,W,CH = img_original.shape

DOWNSAMPLE = 4
img_small = cv2.resize(img_original, (W//DOWNSAMPLE, H//DOWNSAMPLE), interpolation = cv2.INTER_AREA)
plt.figure()
plt.imshow(img_small)
<matplotlib.image.AxesImage at 0x7fd110f49750>
_images/387d727c058311d7fe353ce0d9117feba915694d008d2fa53d87dcc3d24c624b.png _images/7ec88ac7cbaf239337e8192e5674e32de04e2911b9ecabbd891440378a1fad17.png

Now, lets define a keypoint with a large support region.

def show_lafs(img, lafs, idx=0, color='r', figsize = (10,7)):
    x,y = KF.laf.get_laf_pts_to_draw(lafs, idx)
    plt.figure(figsize=figsize)
    if (type(img) is torch.tensor):
        img_show = K.tensor_to_image(img)
    else:
        img_show = img
    plt.imshow(img_show)
    plt.plot(x, y, color)
    return

device = torch.device('cpu')

laf_orig  = torch.tensor([[150., 0, 180],
                     [0, 150, 280]]).float().view(1,1,2,3)
laf_small = laf_orig / float(DOWNSAMPLE)

show_lafs(img_original, laf_orig, figsize=(6,4))
show_lafs(img_small, laf_small, figsize=(6,4))
_images/7f766cb1630b740b968fe3fdee0bb28189a4e3d50956a772452665322788c929.png _images/649742b5f133b4ed6b1036566c1a424f449a85267cadd951dc5b30f948160ddb.png

Now lets compare how extracted patch would look like when extracted in a naive way and from scale pyramid.

PS = 32
with torch.no_grad():
    timg_original = K.image_to_tensor(img_original, False).float().to(device) / 255.
    patches_pyr_orig = KF.extract_patches_from_pyramid(timg_original,laf_orig.to(device), PS)
    patches_simple_orig = KF.extract_patches_simple(timg_original, laf_orig.to(device), PS)
    
    timg_small = K.image_to_tensor(img_small, False).float().to(device)/255.
    patches_pyr_small = KF.extract_patches_from_pyramid(timg_small, laf_small.to(device), PS)
    patches_simple_small = KF.extract_patches_simple(timg_small, laf_small.to(device), PS)
    
# Now we will glue all the patches together:

def vert_cat_with_margin(p1, p2, margin=3):
    b,n,ch,h,w = p1.size()
    return torch.cat([p1, torch.ones(b, n, ch, h, margin).to(device), p2], dim=4)

def horiz_cat_with_margin(p1, p2, margin=3):
    b,n,ch,h,w = p1.size()
    return torch.cat([p1, torch.ones(b, n, ch, margin, w).to(device), p2], dim=3)

patches_pyr = vert_cat_with_margin(patches_pyr_orig, patches_pyr_small)
patches_naive = vert_cat_with_margin(patches_simple_orig, patches_simple_small)

patches_all = horiz_cat_with_margin(patches_naive, patches_pyr)

Now lets show the result. Top row is what you get if you are extracting patches without any antialiasing - note how the patches extracted from the images of different sizes differ.

Bottom row is patches, which are extracted from images of different sizes using a scale pyramid. They are not yet exactly the same, but the difference is much smaller.

plt.figure(figsize=(10,10))
plt.imshow(K.tensor_to_image(patches_all[0,0]))
<matplotlib.image.AxesImage at 0x7fd10d19a090>
_images/208ee5e53e1cbd1ed32d7181129ef3f4f37f578107efea6bfbaec8437d35a8d2.png

Lets check how much it influences local descriptor performance such as HardNet

hardnet = KF.HardNet(True).eval()
all_patches = torch.cat([patches_pyr_orig,
                         patches_pyr_small,
                         patches_simple_orig,
                         patches_simple_small], dim=0).squeeze(1).mean(dim=1,keepdim=True)
with torch.no_grad():
    descs = hardnet(all_patches)
    distances = torch.cdist(descs, descs)
    print (distances.cpu().detach().numpy())
Downloading: "https://github.com/DagnyT/hardnet/raw/master/pretrained/train_liberty_with_aug/checkpoint_liberty_with_aug.pth" to /home/docs/.cache/torch/hub/checkpoints/checkpoint_liberty_with_aug.pth
  0%|          | 0.00/5.10M [00:00<?, ?B/s]
100%|██████████| 5.10M/5.10M [00:00<00:00, 62.6MB/s]
[[0.         0.0926461  0.8155749  0.5459373 ]
 [0.0926461  0.         0.7833667  0.5068493 ]
 [0.8155749  0.7833667  0.         0.44813344]
 [0.5459373  0.5068493  0.44813344 0.        ]]

So the descriptor difference between antialiased patches is 0.09 and between naively extracted – 0.44