This is a mashup of IceVision's "Custom Parser" example and their "Getting Started (Object Detection)" notebooks, to analyze SPNet Real dataset, for which I generated bounding boxes. -- shawley, July 1, 2021
Installing IceVision and IceData
If on Colab run the following cell, else check the installation instructions
#try:
# !wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
# !chmod +x install_colab.sh && ./install_colab.sh
#except:
# print("Ignore the error messages and just keep going")
!pip install espiownage -Uqq
from espiownage.core import *
sysinfo()
# FOR COLAB: Install IceVision, then grab latest and force a hard restart.
# Locally, you can & should still run (most of) the install_colab.sh script
! [ -e /content ] && wget https://raw.githubusercontent.com/airctic/icevision/master/install_colab.sh
! [ -e /content ] && chmod +x install_colab.sh && ./install_colab.sh
! [ -e /content ] && pip install git+git://github.com/airctic/icevision.git --upgrade && kill -9 -1
from icevision.all import *
import pandas as pd
The real test: if this next import doesn't pass then your IV install is old/messed up:
from icevision.models.checkpoint import *
We're going to be using a small sample of the chess dataset, the full dataset is offered by roboflow here
#data_dir = icedata.load_data(data_url, 'chess_sample') / 'chess_sample-master'
# SPNET Real Dataset link (currently proprietary, thus link may not work)
#data_url = "https://anonymized.machine.com/~drscotthawley/spnet_sample-master.zip"
#data_dir = icedata.load_data(data_url, 'spnet_sample') / 'spnet_sample-master'
# public espiownage cyclegan dataset:
#data_url = 'https://anonymized.machine.com/~drscotthawley/espiownage-cyclegan.tgz'
#data_dir = icedata.load_data(data_url, 'espiownage-cyclegan') / 'espiownage-cyclegan'
# real data, cleaned
#data_url = 'https://anonymized.machine.com/~drscotthawley/espiownage-cleaner.tgz'
#data_dir = icedata.load_data(data_url, 'espiownage-cleaner') / 'espiownage-cleaner'
# local data already there:
from pathlib import Path
data_dir = Path('/home/drscotthawley/datasets/espiownage-cleaner') # real data is local and private
In this task we were given a .csv
file with annotations, let's take a look at that.
df = pd.read_csv(data_dir / "bboxes/annotations.csv")
# shuffle rows: if you don't do this next line then my manual/DIY k-folding will go badly
df = df.sample(frac=1).reset_index(drop=True)
df.head()
At first glance, we can make the following assumptions:
- Multiple rows with the same filename, width, height
- A label for each row
- A bbox [xmin, ymin, xmax, ymax] for each row
Once we know what our data provides we can create our custom Parser
.
set(np.array(df['label']).flatten())
df['label'] = 'AN' # antinode
df.head()
The first step is to create a template record for our specific type of dataset, in this case we're doing standard object detection:
template_record = ObjectDetectionRecord()
Now use the method generate_template
that will print out all the necessary steps we have to implement.
Parser.generate_template(template_record)
We can copy the template and use it as our starting point. Let's go over each of the methods we have to define:
__init__
: What happens here is completely up to you, normally we have to pass some reference to our data,data_dir
in our case.__iter__
: This tells our parser how to iterate over our data, each item returned here will be passed toparse_fields
aso
. In our case we calldf.itertuples
to iterate over alldf
rows.__len__
: How many items will be iterating over.imageid
: Should return aHashable
(int
,str
, etc). In our case we want all the dataset items that have the samefilename
to be unified in the same record.parse_fields
: Here is where the attributes of the record are collected, the template will suggest what methods we need to call on the record and what parameters it expects. The parametero
it receives is the item returned by__iter__
.
!!! danger "Important"
Be sure to pass the correct type on all record methods!
class BBoxParser(Parser):
def __init__(self, template_record, data_dir):
super().__init__(template_record=template_record)
self.data_dir = data_dir
self.df = pd.read_csv(data_dir / "bboxes/annotations.csv")
self.df['label'] = 'AN' # make them all the same object
# shuffle rows: if you don't do this next line then my manual/DIY k-folding will go badly
self.df = self.df.sample(frac=1).reset_index(drop=True) # shuffle rows
self.class_map = ClassMap(list(self.df['label'].unique()))
def __iter__(self) -> Any:
for o in self.df.itertuples():
yield o
def __len__(self) -> int:
return len(self.df)
def record_id(self, o) -> Hashable:
return o.filename
def parse_fields(self, o, record, is_new):
if is_new:
record.set_filepath(self.data_dir / 'images' / o.filename)
record.set_img_size(ImgSize(width=o.width, height=o.height))
record.detection.set_class_map(self.class_map)
record.detection.add_bboxes([BBox.from_xyxy(o.xmin, o.ymin, o.xmax, o.ymax)])
record.detection.add_labels([o.label])
parser = BBoxParser(template_record, data_dir)
kfold = True
if not kfold:
print("\n------\n Random splitting")
parser = BBoxParser(template_record, data_dir)
else:
k = 0 # manual k-folding index -- change this yourself k = 0 to (nk-1)
nk = 5 # number of k-folds
print(f"\n-----\n K-fold splitting: {k+1}/{nk}")
n = len(df)
idmap, indlist = IDMap(list(df['filename'][0:n])), list(range(n))
val_size = int(round(n/nk))
if k < nk-1:
val_list = indlist[k*val_size:(k+1)*val_size]
train_list = indlist[0:k*val_size] + indlist[(k+1)*val_size:n]
else: # last one might be a bit different
val_list = indlist[k*val_size:]
train_list = indlist[0:-len(val_list)]
val_id_list = list([df['filename'][i] for i in val_list])
train_id_list = list([df['filename'][i] for i in train_list])
presplits = list([train_id_list,val_id_list])
train_records, valid_records = parser.parse(data_splitter=FixedSplitter(presplits))
Let's take a look at one record:
show_record(train_records[5], display_label=False, figsize=(14, 10))
train_records[0]
# size is set to 384 because EfficientDet requires its inputs to be divisible by 128
image_size = 384
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=image_size, presize=512), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(image_size), tfms.A.Normalize()])
# Datasets
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)
samples = [train_ds[0] for _ in range(3)]
show_samples(samples, ncols=3)
selection = 0
extra_args = {}
if selection == 0:
model_type = models.mmdet.retinanet
backbone = model_type.backbones.resnet50_fpn_1x
elif selection == 1:
# The Retinanet model is also implemented in the torchvision library
model_type = models.torchvision.retinanet
backbone = model_type.backbones.resnet50_fpn
elif selection == 2:
model_type = models.ross.efficientdet
backbone = model_type.backbones.tf_lite0
# The efficientdet model requires an img_size parameter
extra_args['img_size'] = image_size
elif selection == 3:
model_type = models.ultralytics.yolov5
backbone = model_type.backbones.small
# The yolov5 model requires an img_size parameter
extra_args['img_size'] = image_size
model_type, backbone, extra_args
model = model_type.model(backbone=backbone(pretrained=True), num_classes=len(parser.class_map), **extra_args)
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)
model_type.show_batch(first(valid_dl), ncols=4)
metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
learn.lr_find(end_lr=0.005)
# For Sparse-RCNN, use lower `end_lr`
# learn.lr_find(end_lr=0.005)
epochs = 11 if kfold else 30 # go faster for kfold; 10 is good enough ;-)
freeze_epochs=2
print(f"Training for {epochs} epochs, starting with {freeze_epochs} frozen epochs...")
learn.fine_tune(epochs, 1e-4, freeze_epochs=2)
model_type.show_results(model, valid_ds, detection_threshold=.5)
checkpoint_path = f'espi-retinanet-checkpoint-real-k{k}.pth'
save_icevision_checkpoint(model,
model_name='mmdet.retinanet',
backbone_name='resnet50_fpn_1x',
classes = parser.class_map.get_classes(),
img_size=384,
filename=checkpoint_path,
meta={'icevision_version': '0.9.1'})
checkpoint_and_model = model_from_checkpoint(checkpoint_path,
model_name='mmdet.retinanet',
backbone_name='resnet50_fpn_1x',
img_size=384)
model.to('cuda')
device=next(model.parameters()).device
device
infer_ds = valid_ds
infer_dl = model_type.infer_dl(infer_ds, batch_size=4, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)
#Do NOT use: preds = model_type.predict(model, valid_ds, keep_images=True)
show_preds(preds=preds[0:10])
len(train_ds), len(valid_ds), len(preds)
let's try to figure out how to get what we want from these predictions. hmmm
preds[0].pred
preds[1].pred.detection.scores
preds[1].pred.detection.bboxes
preds[1].pred.detection.bboxes[0].xmin
def get_bblist(pred):
my_bblist = []
bblist = pred.pred.detection.bboxes
for i in range(len(bblist)):
my_bblist.append([bblist[i].xmin, bblist[i].ymin, bblist[i].xmax, bblist[i].ymax])
return my_bblist
get_bblist(preds[1])
preds[3].pred
results = []
for i in range(len(preds)):
if (len(preds[i].pred.detection.scores) == 0): continue # sometimes you get a zero box/prediction. ??
#print(f"i = {i}, file = {str(Path(valid_ds[i].common.filepath).stem)+'.csv'}, bboxes = {get_bblist(preds[i])}, scores={preds[i].pred.detection.scores}\n")
worst_score = np.min(np.array(preds[i].pred.detection.scores))
line_list = [str(Path(valid_ds[i].common.filepath).stem)+'.csv', get_bblist(preds[i]), preds[i].pred.detection.scores, worst_score, i]
results.append(line_list)
# store as pandas dataframe
res_df = pd.DataFrame(results, columns=['filename', 'bblist','scores','worst_score','i'])
res_df = res_df.sort_values('worst_score') # order by worst score as a "top losses" kind of thing
res_df.head() # take a look
if not kfold:
res_df.to_csv('bboxes_top_losses_real.csv', index=False)
else:
res_df.to_csv(f'bboxes_top_losses_real_k{k}.csv', index=False)