Here's using the data before we started cleaning. This is only for bounding boxes, not ring counts

Installing IceVision and IceData

If on Colab run the following cell, else check the installation instructions

 
!if -e /content:
    try:
        !wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
        !chmod +x install_colab.sh && ./install_colab.sh
    except:
        print("Ignore the error messages and just keep going")
    
!if !-e /content:
    # For Icevision Install of MMD.  cf. https://airctic.com/0.8.1/install/
    import torch, re 
    tv, cv = torch.__version__, torch.version.cuda
    tv = re.sub('\+cu.*','',tv)
    TORCH_VERSION = 'torch'+tv[0:-1]+'0'
    CUDA_VERSION = 'cu'+cv.replace('.','')

    print(f"TORCH_VERSION={TORCH_VERSION}; CUDA_VERSION={CUDA_VERSION}")

    !pip install -qq mmcv-full=="1.3.8" -f https://download.openmmlab.com/mmcv/dist/{CUDA_VERSION}/{TORCH_VERSION}/index.html --upgrade
    !pip install mmdet -qq

Imports

As always, let's import everything from icevision. Additionally, we will also need pandas (you might need to install it with pip install pandas).

from icevision.all import *
import pandas as pd
INFO     - The mmdet config folder already exists. No need to downloaded it. Path : /home/drscotthawley/.icevision/mmdetection_configs/mmdetection_configs-2.10.0/configs | icevision.models.mmdet.download_configs:download_mmdet_configs:17

Download dataset

We're going to be using a small sample of the chess dataset, the full dataset is offered by roboflow here

#data_dir = icedata.load_data(data_url, 'chess_sample') / 'chess_sample-master'

# SPNET Real Dataset link (currently proprietary, thus link may not work)
#data_url = "https://anonymized.machine.com/~drscotthawley/spnet_sample-master.zip"
#data_dir = icedata.load_data(data_url, 'spnet_sample') / 'spnet_sample-master' 

# espiownage cyclegan dataset:
#data_url = 'https://anonymized.machine.com/~drscotthawley/espiownage-cyclegan.tgz'
#data_dir = icedata.load_data(data_url, 'espiownage-cyclegan') / 'espiownage-cyclegan'

from pathlib import Path
# pre-cleaned "annotations_15ormore"
data_dir = Path('/home/drscotthawley/datasets/espiownage-preclean') 

Understand the data format

In this task we were given a .csv file with annotations, let's take a look at that.

df = pd.read_csv(data_dir / "bboxes/annotations.csv")
df.head()
filename width height label xmin ymin xmax ymax
0 06240907_proc_00254.png 512 384 1 31 135 184 290
1 06240907_proc_00256.png 512 384 0 0 0 48 24
2 06240907_proc_00270.png 512 384 1 51 152 170 283
3 06240907_proc_00281.png 512 384 6 0 104 194 333
4 06240907_proc_00282.png 512 384 9 0 103 190 328

At first glance, we can make the following assumptions:

  • Multiple rows with the same filename, width, height
  • A label for each row
  • A bbox [xmin, ymin, xmax, ymax] for each row

Once we know what our data provides we can create our custom Parser.

set(np.array(df['label']).flatten())
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}

Label all as "AN" for antinodes:

df['label'] = "AN"
df.head()
filename width height label xmin ymin xmax ymax
0 06240907_proc_00254.png 512 384 AN 31 135 184 290
1 06240907_proc_00256.png 512 384 AN 0 0 48 24
2 06240907_proc_00270.png 512 384 AN 51 152 170 283
3 06240907_proc_00281.png 512 384 AN 0 104 194 333
4 06240907_proc_00282.png 512 384 AN 0 103 190 328

Create the Parser

The first step is to create a template record for our specific type of dataset, in this case we're doing standard object detection:

template_record = ObjectDetectionRecord()

Now use the method generate_template that will print out all the necessary steps we have to implement.

Parser.generate_template(template_record)
class MyParser(Parser):
    def __init__(self, template_record):
        super().__init__(template_record=template_record)
    def __iter__(self) -> Any:
    def __len__(self) -> int:
    def record_id(self, o: Any) -> Hashable:
    def parse_fields(self, o: Any, record: BaseRecord, is_new: bool):
        record.set_filepath(<Union[str, Path]>)
        record.set_img_size(<ImgSize>)
        record.detection.set_class_map(<ClassMap>)
        record.detection.add_labels(<Sequence[Hashable]>)
        record.detection.add_bboxes(<Sequence[BBox]>)
# but currently not a priority!
class ChessParser(Parser):
    def __init__(self, template_record, data_dir):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        self.df = pd.read_csv(data_dir / "bboxes/annotations.csv")
        self.df['label'] = 'AN'  # make them all the same object
        self.class_map = ClassMap(list(self.df['label'].unique()))
        
    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
        
    def __len__(self) -> int:
        return len(self.df)
        
    def record_id(self, o) -> Hashable:
        return o.filename
        
    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(self.data_dir / 'images' / o.filename)
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)
        
        record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
        record.detection.add_labels([o.label])

Let's randomly split the data and parser with Parser.parse:

parser = ChessParser(template_record, data_dir)
train_records, valid_records = parser.parse()
INFO     - Autofixing records | icevision.parsers.parser:parse:136

Let's take a look at one record:

show_record(train_records[5], display_label=False, figsize=(14, 10))
train_records[0]
BaseRecord

common: 
	- Record ID: 1733
	- Filepath: /home/drscotthawley/datasets/espiownage-preclean/images/06241902_proc_01612.png
	- Img: None
	- Image size ImgSize(width=512, height=384)
detection: 
	- Class Map: <ClassMap: {'background': 0, 'AN': 1}>
	- Labels: [1]
	- BBoxes: [<BBox (xmin:123, ymin:78, xmax:278, ymax:199)>]

Moving On...

Following the Getting Started "refrigerator" notebook...

# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384  
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

look at the (augmented) target data

samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)
model_type = models.mmdet.retinanet
backbone = model_type.backbones.resnet50_fpn_1x(pretrained=True)
selection = 0


extra_args = {}

if selection == 0:
  model_type = models.mmdet.retinanet
  backbone = model_type.backbones.resnet50_fpn_1x

elif selection == 1:
  # The Retinanet model is also implemented in the torchvision library
  model_type = models.torchvision.retinanet
  backbone = model_type.backbones.resnet50_fpn

elif selection == 2:
  model_type = models.ross.efficientdet
  backbone = model_type.backbones.tf_lite0
  # The efficientdet model requires an img_size parameter
  extra_args['img_size'] = image_size

elif selection == 3:
  model_type = models.ultralytics.yolov5
  backbone = model_type.backbones.small
  # The yolov5 model requires an img_size parameter
  extra_args['img_size'] = image_size

model_type, backbone, extra_args
(<module 'icevision.models.mmdet.models.retinanet' from '/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/icevision/models/mmdet/models/retinanet/__init__.py'>,
 <icevision.models.mmdet.models.retinanet.backbones.resnet_fpn.MMDetRetinanetBackboneConfig at 0x7fbd55fb4ac0>,
 {})
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args) 
/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/mmdet/core/anchor/builder.py:16: UserWarning: ``build_anchor_generator`` would be deprecated soon, please use ``build_prior_generator`` 
  warnings.warn(
Use load_from_local loader
The model and loaded state dict do not match exactly

size mismatch for bbox_head.retina_cls.weight: copying a param with shape torch.Size([720, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([9, 256, 3, 3]).
size mismatch for bbox_head.retina_cls.bias: copying a param with shape torch.Size([720]) from checkpoint, the shape in current model is torch.Size([9]).
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)
model_type.show_batch(first(valid_dl), ncols=4)
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
learn.lr_find()

# For Sparse-RCNN, use lower `end_lr`
# learn.lr_find(end_lr=0.005)
/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/mmdet/core/anchor/anchor_generator.py:324: UserWarning: ``grid_anchors`` would be deprecated soon. Please use ``grid_priors`` 
  warnings.warn('``grid_anchors`` would be deprecated soon. '
/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/mmdet/core/anchor/anchor_generator.py:360: UserWarning: ``single_level_grid_anchors`` would be deprecated soon. Please use ``single_level_grid_priors`` 
  warnings.warn(
SuggestedLRs(lr_min=4.786300996784121e-05, lr_steep=0.00019054606673307717)
learn.fine_tune(60, 1e-4, freeze_epochs=2)
epoch train_loss valid_loss COCOMetric time
0 0.628738 0.437826 0.514727 00:48
1 0.412872 0.378050 0.567241 00:44
epoch train_loss valid_loss COCOMetric time
0 0.354089 0.334836 0.595713 00:50
1 0.335962 0.316987 0.622491 00:50
2 0.338899 0.326741 0.607302 00:50
3 0.322360 0.320515 0.599768 00:50
4 0.312069 0.298707 0.627741 00:50
5 0.308055 0.305053 0.611845 00:50
6 0.310953 0.302702 0.620569 00:50
7 0.290897 0.288313 0.632744 00:50
8 0.305819 0.286483 0.632389 00:51
9 0.296975 0.313745 0.605334 00:50
10 0.291933 0.322442 0.613203 00:50
11 0.300425 0.298569 0.622661 00:50
12 0.296336 0.279761 0.636645 00:50
13 0.289199 0.289476 0.625221 00:50
14 0.284827 0.290011 0.633490 00:50
15 0.293201 0.288848 0.631713 00:51
16 0.280764 0.277616 0.643085 00:51
17 0.285662 0.288259 0.630202 00:50
18 0.285365 0.284308 0.629831 00:50
19 0.279484 0.279261 0.645131 00:50
20 0.284591 0.290445 0.645740 00:51
21 0.264967 0.283705 0.629803 00:50
22 0.272232 0.298420 0.640418 00:50
23 0.273236 0.289712 0.623446 00:51
24 0.273841 0.281151 0.639068 00:50
25 0.264536 0.293069 0.630431 00:50
26 0.270271 0.278765 0.643298 00:51
27 0.270044 0.278168 0.642723 00:51
28 0.261548 0.276091 0.644049 00:50
29 0.263425 0.279138 0.646881 00:51
30 0.258017 0.295086 0.620357 00:50
31 0.256617 0.339622 0.630245 00:51
32 0.249056 0.281358 0.639875 00:50
33 0.245856 0.301372 0.621532 00:50
34 0.254324 0.285309 0.641008 00:51
35 0.240817 0.281161 0.631880 00:50
36 0.243032 0.286913 0.633281 00:51
37 0.235965 0.296027 0.636694 00:50
38 0.248673 0.282317 0.629260 00:51
39 0.232628 0.282413 0.634627 00:50
40 0.235770 0.283882 0.633989 00:51
41 0.236786 0.287039 0.629688 00:50
42 0.234161 0.292528 0.633725 00:50
43 0.226214 0.294780 0.630469 00:50
44 0.224719 0.288680 0.630724 00:51
45 0.222353 0.292924 0.632929 00:50
46 0.226082 0.297262 0.634819 00:51
47 0.223392 0.296832 0.630262 00:51
48 0.223149 0.299595 0.626511 00:50
49 0.226242 0.295939 0.622982 00:50
50 0.216078 0.304341 0.627145 00:51
51 0.216927 0.298405 0.628118 00:50
52 0.218880 0.298187 0.628449 00:51
53 0.210994 0.305599 0.626025 00:50
54 0.217220 0.302754 0.625990 00:51
55 0.222322 0.303560 0.627346 00:50
56 0.213083 0.303888 0.626371 00:51
57 0.205493 0.305007 0.626272 00:51
58 0.221445 0.304262 0.626704 00:50
59 0.218648 0.304007 0.626770 00:51
learn.save('iv_bbox_preclean')
learn.load('iv_bbox_preclean'); 
model_type.show_results(model, valid_ds, detection_threshold=.5)