Acknowledgement: I took Zach Mueller's Image Segmentation tutoral notebook (based on the main FastAI lesson notebook) and modified it to do regression (as per Zach's suggestions) and to work with my own data.

 
!pip install -Uqq fastai espiownage==0.0.45 mrspuff typing_extensions -q --upgrade
import espiownage
from espiownage.core import *
sysinfo()
print(f"espiownage version {espiownage.__version__}")
TORCH_VERSION=torch1.9.0; CUDA_VERSION=cu111
CUDA available = True, Device count = 1, Current device = 0
Device name = GeForce RTX 3080
hostname: bengio
espiownage version 0.0.44
from fastai.vision.all import *

from fastcore.xtras import Path

from fastai.callback.hook import summary
from fastai.callback.progress import ProgressCallback
from fastai.callback.schedule import lr_find, fit_flat_cos

from fastai.data.block import DataBlock
from fastai.data.external import untar_data, URLs
from fastai.data.transforms import get_image_files, FuncSplitter, Normalize

from fastai.layers import Mish   # MishJIT gives me trouble :-( 
from fastai.losses import BaseLoss, MSELossFlat, CrossEntropyLossFlat, BCEWithLogitsLossFlat
from fastai.optimizer import ranger

from fastai.torch_core import tensor

from fastai.vision.augment import aug_transforms
from fastai.vision.core import PILImage, PILMask
from fastai.vision.data import ImageBlock, MaskBlock, imagenet_stats
from fastai.vision.learner import unet_learner

from PIL import Image
import numpy as np
import random

from torch import nn
from torchvision.models.resnet import resnet34

import torch
import torch.nn.functional as F

import glob
from pathlib import Path

Run parameters

These will go in WandB automatically

dataset_name = 'cleaner'  # choose from: cleaner, preclean, spnet, cyclegan, fake
project = 'segreg_kfold'

Setup data

path = get_data(dataset_name)
/home/drscotthawley/datasets/espiownage-cleaner
bin_size = 0.7  
maskdir = path / ('masks_'+str(bin_size))
# We can also generate masks dynamically using `espiownage`'s `gen_masks` script:
#!gen_masks --quiet --step={bin_size} --maskdir={maskdir} --files={str(path/'annotations')+'/*.csv'}


path_im = path/'images'
path_mask = path/maskdir
 
meta_names = sorted(glob.glob(str(path/'annotations')+'/*.csv'))
img_names = [meta_to_img_path(x, img_bank=path_im) for x in meta_names] # img_names
mask_names = sorted(get_image_files(path_mask))
print("lengths of input lists:",len(meta_names), len(img_names), len(mask_names))

# shuffle and check that things line up
# (precaution for DIY kfold split)

def shuffle_together(*ls):
    "shuffle any number of lists in the same way"
    l =list(zip(*ls))
    random.shuffle(l)
    return zip(*l)

random.seed(0) # so you can start again/elsewhere & keep going from the same 'shuffle'
img_names, meta_names, mask_names = shuffle_together(img_names, meta_names, mask_names)

#sanity checks:
assert len(img_names)==len(meta_names)
assert len(img_names)==len(mask_names)
for i in range(len(img_names)):
    assert os.path.basename(meta_to_mask_path(meta_names[i],mask_dir=str(path_mask)+'/')) == os.path.basename(mask_names[i]), "mask and meta don't agree"
    assert os.path.basename(meta_to_img_path(meta_names[i])) == os.path.basename(img_names[i]), f'{os.path.basename(meta_to_img_path(meta_names[i]))} != {os.path.basename(img_names[i])}'

print("\nThe following should match up with each other and also be SAME THING each time you restart this notebook:")
for x in [meta_names, img_names, mask_names]:
    print(os.path.basename(x[0]))
lengths of input lists: 1955 1955 1955

The following should match up with each other and also be SAME THING each time you restart this notebook:
06240907_proc_01617.csv
06240907_proc_01617.png
06240907_proc_01617_P.png

^expected output:

06240907_proc_01617.csv
06240907_proc_01617.png
06240907_proc_01617_P.png
get_msk = lambda o: path/maskdir/f'{o.stem}_P{o.suffix}'

colors = list(range(int(11/bin_size) + 1))
print("colors = ",colors)

codes = [str(n) for n in range(len(colors))]; 
print("codes = ",codes)

yrange = len(codes); 
print("yrange = ",yrange)

sz = (384, 512)
half = tuple(int(x/2) for x in sz); 
print("half = ",half)
colors =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
codes =  ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15']
yrange =  16
half =  (192, 256)
def sr_acc_old(inp, targ):          # scores both voids and objects
    targ = targ.squeeze(1)
    return 1 - (inp-targ).abs().round().clamp(max=1).mean() 

def sr_acc(inp, targ, bin_size=1):
    "segmentation regression accuracy: Are we within +/- bin_size?  tries to score only objects, not voids"
    targ = targ.squeeze(1)  
    inp,targ = flatten_check(inp,targ) # https://docs.fast.ai/metrics.html#flatten_check
    mask = targ != void_code  # non_voids
    if len(targ[mask]) == 0:  # Empty image (all void)
        where_correct = (inp-targ).abs() < bin_size              # gonna be ~100%!
    else:
        where_correct = (inp[mask]-targ[mask]).abs() < bin_size  # don't count voids in metric
    return where_correct.float().mean()

# Cell
def sr_acc05(inp, targ): return sr_acc(inp, targ, bin_size=0.5)
def sr_acc07(inp, targ): return sr_acc(inp, targ, bin_size=0.7)
def sr_acc1(inp, targ):  return sr_acc(inp, targ, bin_size=1)
def sr_acc15(inp, targ): return sr_acc(inp, targ, bin_size=1.5)
def sr_acc2(inp, targ):  return sr_acc(inp, targ, bin_size=2)
!pip install wandb -qqq
import wandb
from fastai.callback.wandb import *
wandb.login()
wandb: Currently logged in as: drscotthawley (use `wandb login --relogin` to force relogin)
True

K-fold splitting

k = 3  # choose 0 to 4 

nk = 5
nv = int(len(img_names)/nk) # size of val set
bgn = k*nv                   # ind to start val set
inds = list(range(bgn, bgn+nv)) # indices for this val set

db = DataBlock(blocks=(ImageBlock, MaskBlock(codes)),
    get_items=get_image_files,
    splitter=IndexSplitter(inds),
    get_y=get_msk,
    batch_tfms=[*aug_transforms(size=half, flip_vert=True), Normalize.from_stats(*imagenet_stats)])
dls = db.dataloaders(path/'images', fnames=img_names, bs=4)
dls.vocab = codes
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['0']
/home/drscotthawley/.local/lib/python3.8/site-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
/home/drscotthawley/.local/lib/python3.8/site-packages/torch/_tensor.py:1023: UserWarning: torch.solve is deprecated in favor of torch.linalg.solveand will be removed in a future PyTorch release.
torch.linalg.solve has its arguments reversed and does not return the LU factorization.
To get the LU factorization see torch.lu, which can be used with torch.lu_solve or torch.lu_unpack.
X = torch.solve(B, A).solution
should be replaced with
X = torch.linalg.solve(A, B) (Triggered internally at  /pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:760.)
  ret = func(*args, **kwargs)

Do the training

opt = ranger

hrfac = 1.2  # 'headroom factor'
y_range=(0,int(len(codes)*hrfac))  # balance between "clamping" to range of real data vs too much "compression" from sigmoid nonlineari

#learn = unet_learner(dls, resnet34, yrange=len(codes), loss_func=MSELossFlat(), metrics=acc_camvid, self_attention=True, act_cls=Mish, opt_func=opt)
metrics = [mae, sr_acc_old, sr_acc05, sr_acc07, sr_acc1, sr_acc15, sr_acc2]

# run parameters
epochs, lr = 12*4, 1e-3

wandb.init(project=project, name=f'k={k} {dataset_name}') # <-- let wandb make up names  #name=f"k={k},e{epochs},lr{lr}")
learn = unet_learner(dls, resnet34, n_out=1, y_range=y_range, loss_func=MSELossFlat(), 
                     metrics=metrics, self_attention=True, act_cls=Mish, opt_func=opt,
                     cbs=WandbCallback())

#lr = learn.lr_find().valley
#print("Suggested Learning Rate =",lr)


print("----- HALF SIZE TRAINING")

print("Training: frozen epochs...")
learn.fit_flat_cos(12, slice(lr))  # these frozen epochs don't yield much improvement btw

print("unfreezing model, lowering lr by 4")
learn.unfreeze()
lrs = slice(lr/400, lr/4)

print("Training: unfrozen epochs...")

learn.fit_flat_cos(12, lrs)

halfweights = 'seg_reg_real_half'
print(f"Saving model: {halfweights}")
learn.save(halfweights)
#  Nope we're not finished! Save wandb.finish() until after Full size training.

print("\n----- FULL SIZE TRAINING -----")

db = DataBlock(blocks=(ImageBlock, MaskBlock(codes)),
    get_items=get_image_files,
    splitter=IndexSplitter(inds),
    get_y=get_msk,
    batch_tfms=[*aug_transforms(size=sz, flip_vert=True), Normalize.from_stats(*imagenet_stats)])
dls = db.dataloaders(path/'images', fnames=img_names, bs=2)  # smaller batch size because we're now full size
dls.vocab = codes

learn = unet_learner(dls, resnet34, n_out=1, y_range=y_range, loss_func=MSELossFlat(), 
                     metrics=metrics, self_attention=True, act_cls=Mish, opt_func=opt,
                     cbs=WandbCallback())
learn.load(halfweights)

#learn.lr_find(end_lr=5e-3)

lr = 3e-4
print("Training: frozen epochs...")
learn.fit_flat_cos(10, slice(lr))

print("unfreezing model, lowering lr by...stuff")
learn.unfreeze()
lrs = slice(1e-6,lr/10); lrs

print("Training: unfrozen epochs...")
learn.fit_flat_cos(10, lrs)

print("Finishing WandB")
wandb.finish()

fullweights = 'seg_reg_real_full'
print(f"Saving model: {fullweights}")
learn.save(fullweights)
Tracking run with wandb version 0.12.2
Syncing run k=3 cleaner to Weights & Biases (Documentation).
Project page: https://wandb.ai/drscotthawley/segreg_kfold
Run page: https://wandb.ai/drscotthawley/segreg_kfold/runs/5ms8f1s2
Run data is saved locally in /home/drscotthawley/espi-work/wandb/run-20210924_201146-5ms8f1s2

/home/drscotthawley/.local/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
----- HALF SIZE TRAINING
Training: frozen epochs...
WandbCallback requires use of "SaveModelCallback" to log best model
epoch train_loss valid_loss mae sr_acc_old sr_acc05 sr_acc07 sr_acc1 sr_acc15 sr_acc2 time
0 6.534555 6.061792 1.310463 0.488215 0.131544 0.186860 0.276798 0.392246 0.493538 00:32
1 6.070246 5.238260 0.956390 0.665987 0.107606 0.152313 0.279984 0.388711 0.490437 00:30
2 5.666637 5.225455 0.935112 0.663938 0.106694 0.147394 0.296349 0.408412 0.510755 00:30
3 5.214216 5.459582 1.004720 0.631669 0.170046 0.242577 0.347754 0.451726 0.548873 00:31
4 5.111706 5.160557 0.911066 0.664337 0.113878 0.191666 0.321296 0.419707 0.509259 00:30
5 4.982654 4.861915 0.885254 0.668803 0.132225 0.182782 0.312289 0.441875 0.531061 00:30
6 4.860633 4.789404 0.864155 0.673723 0.152340 0.196764 0.328114 0.431040 0.530194 00:30
7 5.012252 4.734384 0.868836 0.661927 0.151500 0.231060 0.355324 0.461666 0.554321 00:30
8 4.760670 4.789464 0.860030 0.667044 0.135768 0.205961 0.332530 0.437689 0.537599 00:30
9 4.479697 4.774634 0.908895 0.647915 0.178456 0.239167 0.350655 0.464236 0.569530 00:30
10 4.445107 4.559572 0.840050 0.670682 0.169299 0.237522 0.345230 0.456197 0.549110 00:30
11 4.194604 4.514355 0.838738 0.662236 0.194273 0.261217 0.371452 0.483761 0.573466 00:30
WandbCallback was not able to get prediction samples -> mask_data must be a 2D array
unfreezing model, lowering lr by 4
Training: unfrozen epochs...
33.33% [4/12 02:12<04:25]
epoch train_loss valid_loss mae sr_acc_old sr_acc05 sr_acc07 sr_acc1 sr_acc15 sr_acc2 time
0 4.225204 4.586683 0.850552 0.655045 0.205017 0.269548 0.379746 0.489467 0.586611 00:32
1 4.179651 4.601808 0.825264 0.668183 0.191751 0.250017 0.363917 0.475513 0.574736 00:33
2 4.407197 4.414093 0.807017 0.671599 0.188859 0.248738 0.356572 0.469019 0.573667 00:32
3 3.903284 4.384595 0.803973 0.666001 0.198684 0.257297 0.363221 0.468512 0.582685 00:33

99.23% [388/391 00:28<00:00 4.2899]

Inference

this will generate a bunch of images of segmentation masks and a list of filenames of top losses

learn.load(fullweights)

preds, targs, losses = learn.get_preds(with_loss=True) # validation set only
print(preds.shape, targs.shape)
len(preds)

def save_tmask(tmask, fname='', norm=False): # save tensor mask
    tmask_new = tmask[0].squeeze().cpu().numpy() 
    use_min, use_max = 0, np.max(np.array(colors))    # use scale of max ring count
    if norm: use_min, use_max = tmask_new.min(), tmask_new.max()   # auto scale for just this image
    rescaled = (255.0 / use_max * (tmask_new - use_min)).astype(np.uint8)
    im = Image.fromarray(rescaled)
    if fname != '': im.save(fname)
    return im

seg_img_dir = 'seg_reg_images'
#!rm -rf {seg_img_dir};  # leave 'em
! mkdir {seg_img_dir}

results = []
for i in range(len(preds)):
    #line_list = [dls.valid.items[i].stem]+[round(targs[i].cpu().numpy().item(),2), round(preds[i][0].cpu().numpy().item(),2), losses[i].cpu().numpy(), i]
    filestem = dls.valid.items[i].stem
    line_list = [filestem]+[losses[i].cpu().numpy(), i]
    save_tmask(preds[i], seg_img_dir+'/'+filestem+'_pred.png')
    results.append(line_list)

# store as pandas dataframe
res_df = pd.DataFrame(results, columns=['filename', 'loss','i'])

res_df = res_df.sort_values('loss', ascending=False) # top loss order
res_df.to_csv(f'segreg_top_losses_real_k{k}.csv', index=False)