*This is a mashup of IceVision's "Custom Parser" example and their "Getting Started (Object Detection)" notebooks

Note: The WandB links will 404, because there is no "drscotthawley" WandB account. We just used sed to replace the real username in the .ipynb files.

Installing IceVision and IceData

If on Colab run the following cell, else check the installation instructions

# Warning: This takes a while!
! [ -e /content ] &&  wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
! [ -e /content ] &&  chmod +x install_colab.sh && ./install_colab.sh 
! [ -e /content ] &&  pip install git+git://github.com/airctic/icevision.git --upgrade && kill -9 -1

!pip install espiownage -Uqq
from espiownage.core import *
sysinfo()

TORCH_VERSION=torch1.8.0; CUDA_VERSION=cu101
CUDA available = True, Device count = 1, Current device = 0
Device name = TITAN X (Pascal)
hostname: lecun

Run Parameters

dataset_name = 'preclean'  # choose from: cleaner, preclean, spnet, cyclegan, fake
k = 1               # for k-fold cross-validation
model_choice = 0    # IceVision object detector backbone; see below
nk = 5              # number of k-folds (leave as 5)
use_wandb = True    # can set to false if no WandB login/tracking is desired
project = 'bbox_kfold' # project name for wandb

Imports

from icevision.all import *   # this one takes a while
import pandas as pd
from mrspuff.utils import on_colab
from icevision.models.checkpoint import *  # as a test to make sure IV is installed properly

Dataset

data_dir = get_data(dataset_name); data_dir

Path('/home/drscotthawley/.espiownage/data/espiownage-preclean')

Read in Training Data

df = pd.read_csv(data_dir / "bboxes/annotations.csv")
# shuffle rows: if you don't do this next line then my manual/DIY k-folding will go badly
df = df.sample(frac=1).reset_index(drop=True) 
df['label'] = 'AN'  # all objects as one class: "antinode"
df.head()

Define IceVision Data Parser

template_record = ObjectDetectionRecord()

class BBoxParser(Parser):
    def __init__(self, template_record, data_dir):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        self.df = pd.read_csv(data_dir / "bboxes/annotations.csv")
        self.df['label'] = 'AN'  # make them all the same object
        # shuffle rows: if you don't do this next line then my manual/DIY k-folding will go badly
        self.df = self.df.sample(frac=1).reset_index(drop=True)  # shuffle rows
        self.class_map = ClassMap(list(self.df['label'].unique()))
        
    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
        
    def __len__(self) -> int:
        return len(self.df)
        
    def record_id(self, o) -> Hashable:
        return o.filename
        
    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(self.data_dir / 'images' / o.filename)
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)
        
        record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
        record.detection.add_labels([o.label])

parser = BBoxParser(template_record, data_dir)

Split the Dataset: Random or K-fold

kfold = True 
if not kfold: 
    print("\n------\n Random splitting")
    parser = BBoxParser(template_record, data_dir)
else:
    # k = ^^ now defined above in 'Run Parameters'  # manual k-folding index -- change this yourself k = 0 to (nk-1)
    nk = 5 # number of k-folds
    print(f"\n-----\n K-fold splitting: {k+1}/{nk}")
    n = len(df)
    idmap, indlist = IDMap(list(df['filename'][0:n])), list(range(n))
    val_size = int(round(n/nk))

    if k < nk-1:
        val_list = indlist[k*val_size:(k+1)*val_size]
        train_list = indlist[0:k*val_size] + indlist[(k+1)*val_size:n]
    else:   # last one might be a bit different
        val_list = indlist[k*val_size:]
        train_list = indlist[0:-len(val_list)]
    val_id_list = list([df['filename'][i] for i in val_list])
    train_id_list = list([df['filename'][i] for i in train_list])
    presplits = list([train_id_list,val_id_list])
    train_records, valid_records = parser.parse(data_splitter=FixedSplitter(presplits))

-----
 K-fold splitting: 3/5

INFO     - Autofixing records | icevision.parsers.parser:parse:122

Quick check: Let's take a look at one record of target data:

show_record(train_records[5], display_label=False, figsize=(14, 10))

Define Transforms and DataSets

Following the Getting Started "refrigerator" notebook...

# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384  
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)

Model selection

##   this has been moved up to "Run Parameters" look up^^ # 
selection = model_choice

extra_args = {}

if selection == 0:
    model_type = models.mmdet.retinanet
    backbone = model_type.backbones.resnet50_fpn_1x
    model_name='mmdet.retinanet'
    backbone_name = 'resnet50_fpn_1x'

elif selection == 1:
  # The Retinanet model is also implemented in the torchvision library
    model_type = models.torchvision.retinanet
    backbone = model_type.backbones.resnet50_fpn
    model_name='torchvision.retinanet'
    backbone_name = 'resnet50_fpn'

elif selection == 2:
    model_type = models.ross.efficientdet
    backbone = model_type.backbones.tf_lite0
    # The efficientdet model requires an img_size parameter
    model_name='ross.efficientdet'
    backbone_name = 'tf_lite0'
    extra_args['img_size'] = image_size

elif selection == 3:
    model_type = models.ultralytics.yolov5
    backbone = model_type.backbones.small
    model_name='ultralytics.yolov5'
    backbone_name = 'small'
    # The yolov5 model requires an img_size parameter
    extra_args['img_size'] = image_size

model_type, backbone, extra_args

(<module 'icevision.models.mmdet.models.retinanet' from '/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/icevision/models/mmdet/models/retinanet/__init__.py'>,
 <icevision.models.mmdet.models.retinanet.backbones.resnet_fpn.MMDetRetinanetBackboneConfig at 0x7f7eedc751f0>,
 {})

Optional: WandB tracking

if use_wandb: 
    !pip install wandb -qqq
    import wandb
    from fastai.callback.wandb import *
    from fastai.callback.tracker import SaveModelCallback
    wandb.login()

model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args)

Use load_from_local loader
The model and loaded state dict do not match exactly

size mismatch for bbox_head.retina_cls.weight: copying a param with shape torch.Size([720, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([9, 256, 3, 3]).
size mismatch for bbox_head.retina_cls.bias: copying a param with shape torch.Size([720]) from checkpoint, the shape in current model is torch.Size([9]).

/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/mmcv/cnn/utils/weight_init.py:118: UserWarning: init_cfg without layer key, if you do not define override key either, this init_cfg will do nothing
  warnings.warn(

Define DataLoaders

(batch size & num workers can be decreased if CUDA OOM errors occur)

train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

# show batch - target data -- Ideally these won't all look the same! ;-) 
model_type.show_batch(first(valid_dl), ncols=4)

metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]

if use_wandb:
    wandb.init(project=project, name=f'k={k},m={model_choice},{dataset_name}')
    cbs = [WandbCallback()]
else:
    cbs = []

learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics, 
            cbs=cbs)

learn.lr_find(end_lr=0.005)

/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/fastai/callback/schedule.py:269: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "ro" (-> color='r'). The keyword argument will take precedence.
  ax.plot(val, idx, 'ro', label=nm, c=color)

SuggestedLRs(valley=7.351428212132305e-05)

epochs = 11 if kfold else 30   # really 10 or 11 is where things settle down
lr = 1e-4
freeze_epochs=2
print(f"Training for {epochs} epochs, starting with {freeze_epochs} frozen epochs...")
learn.fine_tune(epochs, lr, freeze_epochs=2)

Training for 11 epochs, starting with 2 frozen epochs...
Could not gather input dimensions
WandbCallback requires use of "SaveModelCallback" to log best model
WandbCallback was not able to prepare a DataLoader for logging prediction samples -> 'Dataset' object has no attribute 'items'

wandb.finish()

model_type.show_results(model, valid_ds, detection_threshold=.5)

checkpoint_path = f'bbox-k{k}-m{model_choice}-{dataset_name}.pth'

save_icevision_checkpoint(model, 
                        model_name=model_name,
                        backbone_name=backbone_name,
                        classes =  parser.class_map.get_classes(), 
                        img_size=384, 
                        filename=checkpoint_path,
                        meta={'icevision_version': '0.9.1'})

Inference

Might get a CUDA OOM error here. If so, restart kernel and load what we just saved. You'll have to go back and re-define learn, model, valid_ds etc., though.

checkpoint_and_model = model_from_checkpoint(checkpoint_path, 
    model_name=model_name,
    backbone_name=backbone_name,
    img_size=384) 

model.to('cuda')
device=next(model.parameters()).device
device

Use load_from_local loader

/home/drscotthawley/envs/iv2/lib/python3.8/site-packages/mmcv/cnn/utils/weight_init.py:118: UserWarning: init_cfg without layer key, if you do not define override key either, this init_cfg will do nothing
  warnings.warn(

device(type='cuda', index=0)

infer_ds = valid_ds
infer_dl = model_type.infer_dl(infer_ds, batch_size=4, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)
#Do NOT use: preds = model_type.predict(model, valid_ds, keep_images=True)

show_preds(preds=preds[0:5])

len(train_ds), len(valid_ds), len(preds)

(1904, 970, 970)

let's try to figure out how to get what we want from these predictions. hmmm

def get_bblist(pred):
    my_bblist = []
    bblist = pred.pred.detection.bboxes
    for i in range(len(bblist)):
        my_bblist.append([bblist[i].xmin, bblist[i].ymin, bblist[i].xmax, bblist[i].ymax])
    return my_bblist

get_bblist(preds[1])

[[104.32266, 100.92419, 218.78952, 210.76654],
 [113.43626, 226.69371, 191.9419, 303.52826],
 [1.1537857, 69.05007, 70.789154, 219.51254]]

results = []
for i in range(len(preds)):
    if (len(preds[i].pred.detection.scores) == 0): continue   # sometimes you get a zero box/prediction. ??
    #print(f"i = {i}, file = {str(Path(valid_ds[i].common.filepath).stem)+'.csv'}, bboxes = {get_bblist(preds[i])}, scores={preds[i].pred.detection.scores}\n")
    worst_score = np.min(np.array(preds[i].pred.detection.scores))
    line_list = [str(Path(valid_ds[i].common.filepath).stem)+'.csv', get_bblist(preds[i]), preds[i].pred.detection.scores, worst_score, i]
    results.append(line_list)
    
# store as pandas dataframe
res_df = pd.DataFrame(results, columns=['filename', 'bblist','scores','worst_score','i'])
res_df = res_df.sort_values('worst_score')  # order by worst score as a "top losses" kind of thing
res_df.head() # take a look

if not kfold:
    res_df.to_csv('bboxes_top_losses_real.csv', index=False)
else:
    res_df.to_csv(f'bboxes_top_losses_real_k{k}.csv', index=False)

COCOMetric	0.66297
epoch	9.33475
eps_0	1e-05
eps_1	1e-05
eps_2	1e-05
eps_3	1e-05
eps_4	1e-05
eps_5	1e-05
eps_6	1e-05
lr_0	0.0
lr_1	0.0
lr_2	0.0
lr_3	0.0
lr_4	0.0
lr_5	1e-05
lr_6	2e-05
mom_0	0.90368
mom_1	0.90368
mom_2	0.90368
mom_3	0.90368
mom_4	0.90368
mom_5	0.90368
mom_6	0.90368
raw_loss	0.33765
sqr_mom_0	0.99
sqr_mom_1	0.99
sqr_mom_2	0.99
sqr_mom_3	0.99
sqr_mom_4	0.99
sqr_mom_5	0.99
sqr_mom_6	0.99
train_loss	0.2849
valid_loss	0.26979
wd_0	0.01
wd_1	0.01
wd_2	0.01
wd_3	0.01
wd_4	0.01
wd_5	0.01
wd_6	0.01

COCOMetric	▁▂▃▆▆▆▇██
epoch	▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
eps_0	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_1	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_2	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_3	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_4	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_5	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eps_6	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_0	▁▂▂▄▄▆▇██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_1	▁▁▂▄▄▆▇██▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
lr_2	▁▁▂▃▄▆▇██▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
lr_3	▁▁▂▃▄▆▇██▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂
lr_4	▁▁▂▃▄▅▇▇▇▂▂▃▃▄▄▅▅▆▇▇████████▇▇▇▇▇▆▆▆▅▅▅▄
lr_5	▁▁▁▂▂▃▃▄▄▂▃▃▃▄▄▅▆▆▇▇████████▇▇▇▇▇▆▆▆▅▅▅▄
lr_6	▁▁▂▃▄▆▇██▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂
mom_0	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_1	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_2	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_3	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_4	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_5	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
mom_6	██▇▆▅▃▂▁▁███▇▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅
raw_loss	▆▄▇▇█▄▄▃▅▅▅▁▃▄▆▄▃▂▆▅▂▂▂▄▁▃▂▂▃▄▁▃▄▄▃▂▃▁▁▂
sqr_mom_0	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_1	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_2	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_3	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_4	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_5	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
sqr_mom_6	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss	█████▇▇▇▇▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁
valid_loss	█▇▆▃▃▃▂▁▁
wd_0	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_1	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_2	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_3	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_4	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_5	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
wd_6	▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

	filename	bblist	scores	worst_score	i
385	06240907_proc_00973.csv	[[163.79924, 134.47836, 275.4303, 244.87671], [0.8026047, 126.37773, 139.32632, 292.40704], [190.97601, 264.7258, 258.71545, 333.8126]]	[0.9729451, 0.94984376, 0.50001764]	0.500018	385
472	06241902_proc_01305.csv	[[108.70906, 101.53219, 218.08482, 217.0892], [1.9381962, 84.965996, 59.986267, 231.3284], [123.28911, 228.08128, 180.5442, 284.22955]]	[0.97011507, 0.5928285, 0.5002006]	0.500201	472
312	06240907_proc_00292.csv	[[2.6407547, 122.064354, 147.58685, 295.88235], [201.03159, 185.91422, 252.44965, 245.18521], [224.0113, 154.67923, 284.4107, 211.29602], [150.4933, 238.28488, 204.63936, 293.23886]]	[0.960453, 0.72788745, 0.6765359, 0.50031626]	0.500316	312
703	06241902_proc_01802.csv	[[100.36381, 108.06838, 212.20728, 214.30856], [0.3653469, 75.12563, 58.0689, 230.51862]]	[0.9819204, 0.5004542]	0.500454	703
542	06240907_proc_00580.csv	[[166.64502, 139.25168, 272.06723, 240.16678], [199.38095, 262.43307, 269.6079, 333.44827], [209.20874, 55.649307, 256.18094, 127.14867], [1.7261848, 138.99355, 116.95537, 281.23218], [163.15091, 50.10095, 212.35336, 118.81209], [0.55441284, 184.34184, 75.460976, 283.9815], [38.83757, 141.32932, 130.9435, 252.63617]]	[0.9669667, 0.9094971, 0.80517113, 0.75459, 0.7254705, 0.5280292, 0.5005095]	0.500510	542

IceVision Bboxes - Real Data