*This is a mashup of IceVision's "Custom Parser" example and their "Getting Started (Object Detection)" notebooks

Installing IceVision and IceData

If on Colab run the following cell, else check the installation instructions

# Warning: This takes a while!
! [ -e /content ] &&  wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
! [ -e /content ] &&  chmod +x install_colab.sh && ./install_colab.sh 
! [ -e /content ] &&  pip install git+git://github.com/airctic/icevision.git --upgrade && kill -9 -1
 
!pip install espiownage -Uqq
from espiownage.core import *
sysinfo()
TORCH_VERSION=torch1.8.0; CUDA_VERSION=cu101
CUDA available = True, Device count = 1, Current device = 0
Device name = TITAN X (Pascal)
hostname: lecun

Run Parameters

dataset_name = 'preclean'  # choose from: cleaner, preclean, spnet, cyclegan, fake
k = 1               # for k-fold cross-validation
model_choice = 0    # IceVision object detector backbone; see below
nk = 5              # number of k-folds (leave as 5)
use_wandb = True    # can set to false if no WandB login/tracking is desired
project = 'bbox_kfold' # project name for wandb

Imports

from icevision.all import *   # this one takes a while
import pandas as pd
from mrspuff.utils import on_colab
from icevision.models.checkpoint import *  # as a test to make sure IV is installed properly

Dataset

data_dir = get_data(dataset_name); data_dir
Path('/home/drscotthawley/.espiownage/data/espiownage-preclean')

Read in Training Data

df = pd.read_csv(data_dir / "bboxes/annotations.csv")
# shuffle rows: if you don't do this next line then my manual/DIY k-folding will go badly
df = df.sample(frac=1).reset_index(drop=True) 
df['label'] = 'AN'  # all objects as one class: "antinode"
df.head()
filename width height label xmin ymin xmax ymax
0 06240907_proc_01623.png 512 384 AN 0 106 182 329
1 06240907_proc_00592.png 512 384 AN 212 113 361 262
2 06241902_proc_01478.png 512 384 AN 136 72 285 219
3 06240907_proc_01681.png 512 384 AN 43 163 156 310
4 06240907_proc_01488.png 512 384 AN 221 123 366 266

Define IceVision Data Parser

template_record = ObjectDetectionRecord()

class BBoxParser(Parser):
    def __init__(self, template_record, data_dir):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        self.df = pd.read_csv(data_dir / "bboxes/annotations.csv")
        self.df['label'] = 'AN'  # make them all the same object
        # shuffle rows: if you don't do this next line then my manual/DIY k-folding will go badly
        self.df = self.df.sample(frac=1).reset_index(drop=True)  # shuffle rows
        self.class_map = ClassMap(list(self.df['label'].unique()))
        
    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
        
    def __len__(self) -> int:
        return len(self.df)
        
    def record_id(self, o) -> Hashable:
        return o.filename
        
    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(self.data_dir / 'images' / o.filename)
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)
        
        record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
        record.detection.add_labels([o.label])

parser = BBoxParser(template_record, data_dir)

Split the Dataset: Random or K-fold

kfold = True 
if not kfold: 
    print("\n------\n Random splitting")
    parser = BBoxParser(template_record, data_dir)
else:
    # k = ^^ now defined above in 'Run Parameters'  # manual k-folding index -- change this yourself k = 0 to (nk-1)
    nk = 5 # number of k-folds
    print(f"\n-----\n K-fold splitting: {k+1}/{nk}")
    n = len(df)
    idmap, indlist = IDMap(list(df['filename'][0:n])), list(range(n))
    val_size = int(round(n/nk))

    if k < nk-1:
        val_list = indlist[k*val_size:(k+1)*val_size]
        train_list = indlist[0:k*val_size] + indlist[(k+1)*val_size:n]
    else:   # last one might be a bit different
        val_list = indlist[k*val_size:]
        train_list = indlist[0:-len(val_list)]
    val_id_list = list([df['filename'][i] for i in val_list])
    train_id_list = list([df['filename'][i] for i in train_list])
    presplits = list([train_id_list,val_id_list])
    train_records, valid_records = parser.parse(data_splitter=FixedSplitter(presplits))
-----
 K-fold splitting: 3/5
INFO     - Autofixing records | icevision.parsers.parser:parse:122

Quick check: Let's take a look at one record of target data:

show_record(train_records[5], display_label=False, figsize=(14, 10))

Define Transforms and DataSets

Following the Getting Started "refrigerator" notebook...

# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384  
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)
samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)

Model selection

##   this has been moved up to "Run Parameters" look up^^ # 
selection = model_choice

extra_args = {}

if selection == 0:
    model_type = models.mmdet.retinanet
    backbone = model_type.backbones.resnet50_fpn_1x
    model_name='mmdet.retinanet'
    backbone_name = 'resnet50_fpn_1x'

elif selection == 1:
  # The Retinanet model is also implemented in the torchvision library
    model_type = models.torchvision.retinanet
    backbone = model_type.backbones.resnet50_fpn
    model_name='torchvision.retinanet'
    backbone_name = 'resnet50_fpn'

elif selection == 2:
    model_type = models.ross.efficientdet
    backbone = model_type.backbones.tf_lite0
    # The efficientdet model requires an img_size parameter
    model_name='ross.efficientdet'
    backbone_name = 'tf_lite0'
    extra_args['img_size'] = image_size

elif selection == 3:
    model_type = models.ultralytics.yolov5
    backbone = model_type.backbones.small
    model_name='ultralytics.yolov5'
    backbone_name = 'small'
    # The yolov5 model requires an img_size parameter
    extra_args['img_size'] = image_size

model_type, backbone, extra_args
(<module 'icevision.models.mmdet.models.retinanet' from '/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/icevision/models/mmdet/models/retinanet/__init__.py'>,
 <icevision.models.mmdet.models.retinanet.backbones.resnet_fpn.MMDetRetinanetBackboneConfig at 0x7f7eedc751f0>,
 {})

Optional: WandB tracking

if use_wandb: 
    !pip install wandb -qqq
    import wandb
    from fastai.callback.wandb import *
    from fastai.callback.tracker import SaveModelCallback
    wandb.login()
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 
Use load_from_local loader
The model and loaded state dict do not match exactly

size mismatch for bbox_head.retina_cls.weight: copying a param with shape torch.Size([720, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([9, 256, 3, 3]).
size mismatch for bbox_head.retina_cls.bias: copying a param with shape torch.Size([720]) from checkpoint, the shape in current model is torch.Size([9]).
/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/mmcv/cnn/utils/weight_init.py:118: UserWarning: init_cfg without layer key, if you do not define override key either, this init_cfg will do nothing
  warnings.warn(

Define DataLoaders

(batch size & num workers can be decreased if CUDA OOM errors occur)

train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

# show batch - target data -- Ideally these won't all look the same! ;-) 
model_type.show_batch(first(valid_dl), ncols=4)
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
if use_wandb:
    wandb.init(project=project, name=f'k={k},m={model_choice},{dataset_name}')
    cbs = [WandbCallback()]
else:
    cbs = []

learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics, 
            cbs=cbs)
Tracking run with wandb version 0.12.2
Syncing run k=2,m=0,preclean to Weights & Biases (Documentation).
Project page: https://wandb.ai/drscotthawley/bbox_kfold
Run page: https://wandb.ai/drscotthawley/bbox_kfold/runs/i6x1r5uq
Run data is saved locally in /home/drscotthawley/espi-work/icevision/wandb/run-20210927_203010-i6x1r5uq

learn.lr_find(end_lr=0.005)
/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/fastai/callback/schedule.py:269: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "ro" (-> color='r'). The keyword argument will take precedence.
  ax.plot(val, idx, 'ro', label=nm, c=color)
SuggestedLRs(valley=7.351428212132305e-05)
epochs = 11 if kfold else 30   # really 10 or 11 is where things settle down
lr = 1e-4
freeze_epochs=2
print(f"Training for {epochs} epochs, starting with {freeze_epochs} frozen epochs...")
learn.fine_tune(epochs, lr, freeze_epochs=2)
Training for 11 epochs, starting with 2 frozen epochs...
Could not gather input dimensions
WandbCallback requires use of "SaveModelCallback" to log best model
WandbCallback was not able to prepare a DataLoader for logging prediction samples -> 'Dataset' object has no attribute 'items'
0.00% [0/2 00:00<00:00]
epoch train_loss valid_loss COCOMetric time

0.00% [0/220 00:00<00:00]
wandb.finish()

Waiting for W&B process to finish, PID 31909
Program ended successfully.
Find user logs for this run at: /home/drscotthawley/grant/wandb/run-20210925_120229-32x09kwf/logs/debug.log
Find internal logs for this run at: /home/drscotthawley/grant/wandb/run-20210925_120229-32x09kwf/logs/debug-internal.log

Run summary:


COCOMetric0.66297
epoch9.33475
eps_01e-05
eps_11e-05
eps_21e-05
eps_31e-05
eps_41e-05
eps_51e-05
eps_61e-05
lr_00.0
lr_10.0
lr_20.0
lr_30.0
lr_40.0
lr_51e-05
lr_62e-05
mom_00.90368
mom_10.90368
mom_20.90368
mom_30.90368
mom_40.90368
mom_50.90368
mom_60.90368
raw_loss0.33765
sqr_mom_00.99
sqr_mom_10.99
sqr_mom_20.99
sqr_mom_30.99
sqr_mom_40.99
sqr_mom_50.99
sqr_mom_60.99
train_loss0.2849
valid_loss0.26979
wd_00.01
wd_10.01
wd_20.01
wd_30.01
wd_40.01
wd_50.01
wd_60.01

Run history:


COCOMetric▁▂▃▆▆▆▇██
epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
eps_0▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_1▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_2▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_3▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_4▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_5▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_6▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_0▁▂▂▄▄▆▇██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_1▁▁▂▄▄▆▇██▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
lr_2▁▁▂▃▄▆▇██▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
lr_3▁▁▂▃▄▆▇██▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂
lr_4▁▁▂▃▄▅▇▇▇▂▂▃▃▄▄▅▅▆▇▇████████▇▇▇▇▇▆▆▆▅▅▅▄
lr_5▁▁▁▂▂▃▃▄▄▂▃▃▃▄▄▅▆▆▇▇████████▇▇▇▇▇▆▆▆▅▅▅▄
lr_6▁▁▂▃▄▆▇██▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂
mom_0██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_1██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_2██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_3██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_4██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_5██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_6██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
raw_loss▆▄▇▇█▄▄▃▅▅▅▁▃▄▆▄▃▂▆▅▂▂▂▄▁▃▂▂▃▄▁▃▄▄▃▂▃▁▁▂
sqr_mom_0▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_1▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_2▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_3▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_4▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_5▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_6▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss█████▇▇▇▇▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁
valid_loss█▇▆▃▃▃▂▁▁
wd_0▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_1▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_2▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_3▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_4▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_5▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_6▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
model_type.show_results(model, valid_ds, detection_threshold=.5)
checkpoint_path = f'bbox-k{k}-m{model_choice}-{dataset_name}.pth'

save_icevision_checkpoint(model, 
                        model_name=model_name,
                        backbone_name=backbone_name,
                        classes =  parser.class_map.get_classes(), 
                        img_size=384, 
                        filename=checkpoint_path,
                        meta={'icevision_version': '0.9.1'})

Inference

Might get a CUDA OOM error here. If so, restart kernel and load what we just saved. You'll have to go back and re-define learn, model, valid_ds etc., though.

checkpoint_and_model = model_from_checkpoint(checkpoint_path, 
    model_name=model_name,
    backbone_name=backbone_name,
    img_size=384) 

model.to('cuda')
device=next(model.parameters()).device
device
Use load_from_local loader
/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/mmcv/cnn/utils/weight_init.py:118: UserWarning: init_cfg without layer key, if you do not define override key either, this init_cfg will do nothing
  warnings.warn(
device(type='cuda', index=0)
infer_ds = valid_ds
infer_dl = model_type.infer_dl(infer_ds, batch_size=4, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)
#Do NOT use: preds = model_type.predict(model, valid_ds, keep_images=True)
show_preds(preds=preds[0:5])
len(train_ds), len(valid_ds), len(preds)
(1904, 970, 970)

let's try to figure out how to get what we want from these predictions. hmmm

def get_bblist(pred):
    my_bblist = []
    bblist = pred.pred.detection.bboxes
    for i in range(len(bblist)):
        my_bblist.append([bblist[i].xmin, bblist[i].ymin, bblist[i].xmax, bblist[i].ymax])
    return my_bblist

get_bblist(preds[1])      
[[104.32266, 100.92419, 218.78952, 210.76654],
 [113.43626, 226.69371, 191.9419, 303.52826],
 [1.1537857, 69.05007, 70.789154, 219.51254]]
results = []
for i in range(len(preds)):
    if (len(preds[i].pred.detection.scores) == 0): continue   # sometimes you get a zero box/prediction. ??
    #print(f"i = {i}, file = {str(Path(valid_ds[i].common.filepath).stem)+'.csv'}, bboxes = {get_bblist(preds[i])}, scores={preds[i].pred.detection.scores}\n")
    worst_score = np.min(np.array(preds[i].pred.detection.scores))
    line_list = [str(Path(valid_ds[i].common.filepath).stem)+'.csv', get_bblist(preds[i]), preds[i].pred.detection.scores, worst_score, i]
    results.append(line_list)
    
# store as pandas dataframe
res_df = pd.DataFrame(results, columns=['filename', 'bblist','scores','worst_score','i'])
res_df = res_df.sort_values('worst_score')  # order by worst score as a "top losses" kind of thing
res_df.head() # take a look
filename bblist scores worst_score i
385 06240907_proc_00973.csv [[163.79924, 134.47836, 275.4303, 244.87671], [0.8026047, 126.37773, 139.32632, 292.40704], [190.97601, 264.7258, 258.71545, 333.8126]] [0.9729451, 0.94984376, 0.50001764] 0.500018 385
472 06241902_proc_01305.csv [[108.70906, 101.53219, 218.08482, 217.0892], [1.9381962, 84.965996, 59.986267, 231.3284], [123.28911, 228.08128, 180.5442, 284.22955]] [0.97011507, 0.5928285, 0.5002006] 0.500201 472
312 06240907_proc_00292.csv [[2.6407547, 122.064354, 147.58685, 295.88235], [201.03159, 185.91422, 252.44965, 245.18521], [224.0113, 154.67923, 284.4107, 211.29602], [150.4933, 238.28488, 204.63936, 293.23886]] [0.960453, 0.72788745, 0.6765359, 0.50031626] 0.500316 312
703 06241902_proc_01802.csv [[100.36381, 108.06838, 212.20728, 214.30856], [0.3653469, 75.12563, 58.0689, 230.51862]] [0.9819204, 0.5004542] 0.500454 703
542 06240907_proc_00580.csv [[166.64502, 139.25168, 272.06723, 240.16678], [199.38095, 262.43307, 269.6079, 333.44827], [209.20874, 55.649307, 256.18094, 127.14867], [1.7261848, 138.99355, 116.95537, 281.23218], [163.15091, 50.10095, 212.35336, 118.81209], [0.55441284, 184.34184, 75.460976, 283.9815], [38.83757, 141.32932, 130.9435, 252.63617]] [0.9669667, 0.9094971, 0.80517113, 0.75459, 0.7254705, 0.5280292, 0.5005095] 0.500510 542
if not kfold:
    res_df.to_csv('bboxes_top_losses_real.csv', index=False)
else:
    res_df.to_csv(f'bboxes_top_losses_real_k{k}.csv', index=False)