Faster R-CNN Models

Construct Faster R-CNN model variants for object-detection task.

model_fasterrcnn_resnet50_fpn(
  pretrained = FALSE,
  progress = TRUE,
  num_classes = 90,
  score_thresh = 0.05,
  nms_thresh = 0.5,
  detections_per_img = 100,
  ...
)

model_fasterrcnn_resnet50_fpn_v2(
  pretrained = FALSE,
  progress = TRUE,
  num_classes = 90,
  score_thresh = 0.05,
  nms_thresh = 0.5,
  detections_per_img = 100,
  ...
)

model_fasterrcnn_mobilenet_v3_large_fpn(
  pretrained = FALSE,
  progress = TRUE,
  num_classes = 90,
  score_thresh = 0.05,
  nms_thresh = 0.5,
  detections_per_img = 100,
  ...
)

model_fasterrcnn_mobilenet_v3_large_320_fpn(
  pretrained = FALSE,
  progress = TRUE,
  num_classes = 90,
  score_thresh = 0.05,
  nms_thresh = 0.5,
  detections_per_img = 100,
  ...
)

Arguments

pretrained: Logical. If TRUE, loads pretrained weights from local file.
progress: Logical. Show progress bar during download (unused).
num_classes: Number of output classes excluding background (default: 90 for COCO).
score_thresh: Numeric. Minimum score threshold for detections (default: 0.05).
nms_thresh: Numeric. Non-Maximum Suppression (NMS) IoU threshold for removing overlapping boxes (default: 0.5).
detections_per_img: Integer. Maximum number of detections per image (default: 100).
...: Other arguments (unused).

Value

A fasterrcnn_model nn_module.

Functions

model_fasterrcnn_resnet50_fpn(): Faster R-CNN with ResNet-50 FPN
model_fasterrcnn_resnet50_fpn_v2(): Faster R-CNN with ResNet-50 FPN V2
model_fasterrcnn_mobilenet_v3_large_fpn(): Faster R-CNN with MobileNet V3 Large FPN
model_fasterrcnn_mobilenet_v3_large_320_fpn(): Faster R-CNN with MobileNet V3 Large 320 FPN

Task

Object detection over images with bounding boxes and class labels.

Input Format

Input images should be torch_tensors of shape (batch_size, 3, H, W) where H and W are typically around 800.

Available Models

model_fasterrcnn_resnet50_fpn()
model_fasterrcnn_resnet50_fpn_v2()
model_fasterrcnn_mobilenet_v3_large_fpn()
model_fasterrcnn_mobilenet_v3_large_320_fpn()

Examples

if (FALSE) { # \dontrun{
library(magrittr)
# ImageNet normalization constants, see https://pytorch.org/vision/stable/models.html
norm_mean <- c(0.485, 0.456, 0.406)
norm_std  <- c(0.229, 0.224, 0.225)
# Use a publicly available image of an animal
url <- paste0("https://upload.wikimedia.org/wikipedia/commons/thumb/",
       "e/ea/Morsan_Normande_vache.jpg/120px-Morsan_Normande_vache.jpg")
image <- magick_loader(url) %>%
  transform_to_tensor() %>%
  transform_resize(c(520, 520))
# ResNet backbone requires image normalization
input <- image  %>%
  transform_normalize(norm_mean, norm_std)
batch_normalized <- input$unsqueeze(1)    # Add batch dimension (1, 3, H, W)

# ResNet-50 FPN V2
model <- model_fasterrcnn_resnet50_fpn_v2(pretrained = TRUE, , detections_per_img = 5 )
model$eval()
torch::with_no_grad({pred <- model(batch_normalized)$detections[[1]]})
labels <- coco_classes(as.integer(pred$labels))

# Visualize boxes
labels <- coco_classes(as.integer(pred$labels))
boxed <- draw_bounding_boxes(image, pred$boxes, labels = labels)
tensor_image_browse(boxed)

# MobileNet V3 Large 320 FPN
batch <- image$unsqueeze(1)    # Add batch dimension (1, 3, H, W)
model <- model_fasterrcnn_mobilenet_v3_large_320_fpn(
  pretrained = TRUE, score_thresh = 0.02, nms_thresh = 0.8, detections_per_img = 5
)
model$eval()
torch::with_no_grad({pred <- model(batch)$detections[[1]]})

# Visualize boxes
labels <- coco_classes(as.integer(pred$labels))
boxed <- draw_bounding_boxes(image, pred$boxes, labels = labels)
tensor_image_browse(boxed)
} # }