Here's using the data before we started cleaning. This is only for bounding boxes, not ring counts

Installing IceVision and IceData

If on Colab run the following cell, else check the installation instructions

!if -e /content:
    try:
        !wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
        !chmod +x install_colab.sh && ./install_colab.sh
    except:
        print("Ignore the error messages and just keep going")
    
!if !-e /content:
    # For Icevision Install of MMD.  cf. https://airctic.com/0.8.1/install/
    import torch, re 
    tv, cv = torch.__version__, torch.version.cuda
    tv = re.sub('\+cu.*','',tv)
    TORCH_VERSION = 'torch'+tv[0:-1]+'0'
    CUDA_VERSION = 'cu'+cv.replace('.','')

    print(f"TORCH_VERSION={TORCH_VERSION}; CUDA_VERSION={CUDA_VERSION}")

    !pip install -qq mmcv-full=="1.3.8" -f https://download.openmmlab.com/mmcv/dist/{CUDA_VERSION}/{TORCH_VERSION}/index.html --upgrade
    !pip install mmdet -qq

Imports

As always, let's import everything from icevision. Additionally, we will also need pandas (you might need to install it with pip install pandas).

from icevision.all import *
import pandas as pd

INFO     - The mmdet config folder already exists. No need to downloaded it. Path : /home/drscotthawley/.icevision/mmdetection_configs/mmdetection_configs-2.10.0/configs | icevision.models.mmdet.download_configs:download_mmdet_configs:17

Download dataset

We're going to be using a small sample of the chess dataset, the full dataset is offered by roboflow here

#data_dir = icedata.load_data(data_url, 'chess_sample') / 'chess_sample-master'

# SPNET Real Dataset link (currently proprietary, thus link may not work)
#data_url = "https://anonymized.machine.com/~drscotthawley/spnet_sample-master.zip"
#data_dir = icedata.load_data(data_url, 'spnet_sample') / 'spnet_sample-master' 

# espiownage cyclegan dataset:
#data_url = 'https://anonymized.machine.com/~drscotthawley/espiownage-cyclegan.tgz'
#data_dir = icedata.load_data(data_url, 'espiownage-cyclegan') / 'espiownage-cyclegan'

from pathlib import Path
# pre-cleaned "annotations_15ormore"
data_dir = Path('/home/drscotthawley/datasets/espiownage-preclean')

Understand the data format

In this task we were given a .csv file with annotations, let's take a look at that.

df = pd.read_csv(data_dir / "bboxes/annotations.csv")
df.head()

At first glance, we can make the following assumptions:

Multiple rows with the same filename, width, height
A label for each row
A bbox [xmin, ymin, xmax, ymax] for each row

Once we know what our data provides we can create our custom Parser.

set(np.array(df['label']).flatten())

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}

Label all as "AN" for antinodes:

df['label'] = "AN"
df.head()

Create the Parser

The first step is to create a template record for our specific type of dataset, in this case we're doing standard object detection:

template_record = ObjectDetectionRecord()

Now use the method generate_template that will print out all the necessary steps we have to implement.

Parser.generate_template(template_record)

class MyParser(Parser):
    def __init__(self, template_record):
        super().__init__(template_record=template_record)
    def __iter__(self) -> Any:
    def __len__(self) -> int:
    def record_id(self, o: Any) -> Hashable:
    def parse_fields(self, o: Any, record: BaseRecord, is_new: bool):
        record.set_filepath(<Union[str, Path]>)
        record.set_img_size(<ImgSize>)
        record.detection.set_class_map(<ClassMap>)
        record.detection.add_labels(<Sequence[Hashable]>)
        record.detection.add_bboxes(<Sequence[BBox]>)

# but currently not a priority!
class ChessParser(Parser):
    def __init__(self, template_record, data_dir):
        super().__init__(template_record=template_record)
        
        self.data_dir = data_dir
        self.df = pd.read_csv(data_dir / "bboxes/annotations.csv")
        self.df['label'] = 'AN'  # make them all the same object
        self.class_map = ClassMap(list(self.df['label'].unique()))
        
    def __iter__(self) -> Any:
        for o in self.df.itertuples():
            yield o
        
    def __len__(self) -> int:
        return len(self.df)
        
    def record_id(self, o) -> Hashable:
        return o.filename
        
    def parse_fields(self, o, record, is_new):
        if is_new:
            record.set_filepath(self.data_dir / 'images' / o.filename)
            record.set_img_size(ImgSize(width=o.width, height=o.height))
            record.detection.set_class_map(self.class_map)
        
        record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
        record.detection.add_labels([o.label])

Let's randomly split the data and parser with Parser.parse:

parser = ChessParser(template_record, data_dir)

train_records, valid_records = parser.parse()

INFO     - Autofixing records | icevision.parsers.parser:parse:136

Let's take a look at one record:

show_record(train_records[5], display_label=False, figsize=(14, 10))

train_records[0]

BaseRecord

common: 
	- Record ID: 1733
	- Filepath: /home/drscotthawley/datasets/espiownage-preclean/images/06241902_proc_01612.png
	- Img: None
	- Image size ImgSize(width=512, height=384)
detection: 
	- Class Map: <ClassMap: {'background': 0, 'AN': 1}>
	- Labels: [1]
	- BBoxes: [<BBox (xmin:123, ymin:78, xmax:278, ymax:199)>]

Moving On...

Following the Getting Started "refrigerator" notebook...

# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384  
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])

# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

look at the (augmented) target data

samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)

model_type = models.mmdet.retinanet
backbone = model_type.backbones.resnet50_fpn_1x(pretrained=True)

selection = 0


extra_args = {}

if selection == 0:
  model_type = models.mmdet.retinanet
  backbone = model_type.backbones.resnet50_fpn_1x

elif selection == 1:
  # The Retinanet model is also implemented in the torchvision library
  model_type = models.torchvision.retinanet
  backbone = model_type.backbones.resnet50_fpn

elif selection == 2:
  model_type = models.ross.efficientdet
  backbone = model_type.backbones.tf_lite0
  # The efficientdet model requires an img_size parameter
  extra_args['img_size'] = image_size

elif selection == 3:
  model_type = models.ultralytics.yolov5
  backbone = model_type.backbones.small
  # The yolov5 model requires an img_size parameter
  extra_args['img_size'] = image_size

model_type, backbone, extra_args

(<module 'icevision.models.mmdet.models.retinanet' from '/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/icevision/models/mmdet/models/retinanet/__init__.py'>,
 <icevision.models.mmdet.models.retinanet.backbones.resnet_fpn.MMDetRetinanetBackboneConfig at 0x7fbd55fb4ac0>,
 {})

model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args)

/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/mmdet/core/anchor/builder.py:16: UserWarning: ``build_anchor_generator`` would be deprecated soon, please use ``build_prior_generator`` 
  warnings.warn(

Use load_from_local loader
The model and loaded state dict do not match exactly

size mismatch for bbox_head.retina_cls.weight: copying a param with shape torch.Size([720, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([9, 256, 3, 3]).
size mismatch for bbox_head.retina_cls.bias: copying a param with shape torch.Size([720]) from checkpoint, the shape in current model is torch.Size([9]).

train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

model_type.show_batch(first(valid_dl), ncols=4)

metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]

learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)

learn.lr_find()

# For Sparse-RCNN, use lower `end_lr`
# learn.lr_find(end_lr=0.005)

/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/mmdet/core/anchor/anchor_generator.py:324: UserWarning: ``grid_anchors`` would be deprecated soon. Please use ``grid_priors`` 
  warnings.warn('``grid_anchors`` would be deprecated soon. '
/home/drscotthawley/envs/icevision/lib/python3.8/site-packages/mmdet/core/anchor/anchor_generator.py:360: UserWarning: ``single_level_grid_anchors`` would be deprecated soon. Please use ``single_level_grid_priors`` 
  warnings.warn(

SuggestedLRs(lr_min=4.786300996784121e-05, lr_steep=0.00019054606673307717)

learn.fine_tune(60, 1e-4, freeze_epochs=2)

learn.save('iv_bbox_preclean')
learn.load('iv_bbox_preclean');

model_type.show_results(model, valid_ds, detection_threshold=.5)

Note: Notice that a lot of rings get missed, even for this "exact" CycleGAN data. This does not bode well for the real data.

Note: We could try varying the detection_threshold=0.5 and plot an ROC curve.

epoch	train_loss	valid_loss	COCOMetric	time
0	0.628738	0.437826	0.514727	00:48
1	0.412872	0.378050	0.567241	00:44

epoch	train_loss	valid_loss	COCOMetric	time
0	0.354089	0.334836	0.595713	00:50
1	0.335962	0.316987	0.622491	00:50
2	0.338899	0.326741	0.607302	00:50
3	0.322360	0.320515	0.599768	00:50
4	0.312069	0.298707	0.627741	00:50
5	0.308055	0.305053	0.611845	00:50
6	0.310953	0.302702	0.620569	00:50
7	0.290897	0.288313	0.632744	00:50
8	0.305819	0.286483	0.632389	00:51
9	0.296975	0.313745	0.605334	00:50
10	0.291933	0.322442	0.613203	00:50
11	0.300425	0.298569	0.622661	00:50
12	0.296336	0.279761	0.636645	00:50
13	0.289199	0.289476	0.625221	00:50
14	0.284827	0.290011	0.633490	00:50
15	0.293201	0.288848	0.631713	00:51
16	0.280764	0.277616	0.643085	00:51
17	0.285662	0.288259	0.630202	00:50
18	0.285365	0.284308	0.629831	00:50
19	0.279484	0.279261	0.645131	00:50
20	0.284591	0.290445	0.645740	00:51
21	0.264967	0.283705	0.629803	00:50
22	0.272232	0.298420	0.640418	00:50
23	0.273236	0.289712	0.623446	00:51
24	0.273841	0.281151	0.639068	00:50
25	0.264536	0.293069	0.630431	00:50
26	0.270271	0.278765	0.643298	00:51
27	0.270044	0.278168	0.642723	00:51
28	0.261548	0.276091	0.644049	00:50
29	0.263425	0.279138	0.646881	00:51
30	0.258017	0.295086	0.620357	00:50
31	0.256617	0.339622	0.630245	00:51
32	0.249056	0.281358	0.639875	00:50
33	0.245856	0.301372	0.621532	00:50
34	0.254324	0.285309	0.641008	00:51
35	0.240817	0.281161	0.631880	00:50
36	0.243032	0.286913	0.633281	00:51
37	0.235965	0.296027	0.636694	00:50
38	0.248673	0.282317	0.629260	00:51
39	0.232628	0.282413	0.634627	00:50
40	0.235770	0.283882	0.633989	00:51
41	0.236786	0.287039	0.629688	00:50
42	0.234161	0.292528	0.633725	00:50
43	0.226214	0.294780	0.630469	00:50
44	0.224719	0.288680	0.630724	00:51
45	0.222353	0.292924	0.632929	00:50
46	0.226082	0.297262	0.634819	00:51
47	0.223392	0.296832	0.630262	00:51
48	0.223149	0.299595	0.626511	00:50
49	0.226242	0.295939	0.622982	00:50
50	0.216078	0.304341	0.627145	00:51
51	0.216927	0.298405	0.628118	00:50
52	0.218880	0.298187	0.628449	00:51
53	0.210994	0.305599	0.626025	00:50
54	0.217220	0.302754	0.625990	00:51
55	0.222322	0.303560	0.627346	00:50
56	0.213083	0.303888	0.626371	00:51
57	0.205493	0.305007	0.626272	00:51
58	0.221445	0.304262	0.626704	00:50
59	0.218648	0.304007	0.626770	00:51

	filename	width	height	label	xmin	ymin	xmax	ymax
0	06240907_proc_00254.png	512	384	1	31	135	184	290
1	06240907_proc_00256.png	512	384	0	0	0	48	24
2	06240907_proc_00270.png	512	384	1	51	152	170	283
3	06240907_proc_00281.png	512	384	6	0	104	194	333
4	06240907_proc_00282.png	512	384	9	0	103	190	328