docs/vision/semantic_segmentation.ipynb
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This tutorial trains a DeepLabV3 with Mobilenet V2 as backbone model from the TensorFlow Model Garden package (tensorflow-models).
Model Garden contains a collection of state-of-the-art models, implemented with TensorFlow's high-level APIs. The implementations demonstrate the best practices for modeling, letting users to take full advantage of TensorFlow for their research and product development.
Dataset: Oxford-IIIT Pets
This tutorial demonstrates how to:
!pip install -U -q "tf-models-official"
import os
import pprint
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
import tensorflow as tf
import tensorflow_datasets as tfds
import orbit
import tensorflow_models as tfm
from official.vision.data import tfrecord_lib
from official.vision.utils import summary_manager
from official.vision.serving import export_saved_model_lib
from official.vision.utils.object_detection import visualization_utils
pp = pprint.PrettyPrinter(indent=4) # Set Pretty Print Indentation
print(tf.__version__) # Check the version of tensorflow used
%matplotlib inline
Models in Official repository (of model-garden) require models in a TFRecords dataformat.
Please check this resource to learn more about TFRecords data format.
Oxford_IIIT_pet:3 dataset is taken from Tensorflow Datasets
(train_ds, val_ds, test_ds), info = tfds.data_source(
'oxford_iiit_pet:4.*.*',
split=['train+test[:50%]', 'test[50%:80%]', 'test[80%:100%]'],
with_info=True)
info
def process_record(record):
keys_to_features = {
'image/encoded': tfrecord_lib.convert_to_feature(
tf.io.encode_jpeg(record['image']).numpy()),
'image/height': tfrecord_lib.convert_to_feature(record['image'].shape[0]),
'image/width': tfrecord_lib.convert_to_feature(record['image'].shape[1]),
'image/segmentation/class/encoded':tfrecord_lib.convert_to_feature(
tf.io.encode_png(record['segmentation_mask'] - 1).numpy())
}
example = tf.train.Example(
features=tf.train.Features(feature=keys_to_features))
return example
output_dir = './oxford_iiit_pet_tfrecords/'
LOG_EVERY = 100
if not os.path.exists(output_dir):
os.mkdir(output_dir)
def write_tfrecords(dataset, output_path, num_shards=1):
writers = [
tf.io.TFRecordWriter(
output_path + '-%05d-of-%05d.tfrecord' % (i, num_shards))
for i in range(num_shards)
]
for idx, record in enumerate(dataset):
if idx % LOG_EVERY == 0:
print('On image %d', idx)
tf_example = process_record(record)
writers[idx % num_shards].write(tf_example.SerializeToString())
output_train_tfrecs = output_dir + 'train'
write_tfrecords(train_ds, output_train_tfrecs, num_shards=10)
output_val_tfrecs = output_dir + 'val'
write_tfrecords(val_ds, output_val_tfrecs, num_shards=5)
output_test_tfrecs = output_dir + 'test'
write_tfrecords(test_ds, output_test_tfrecs, num_shards=5)
train_data_tfrecords = './oxford_iiit_pet_tfrecords/train*'
val_data_tfrecords = './oxford_iiit_pet_tfrecords/val*'
test_data_tfrecords = './oxford_iiit_pet_tfrecords/test*'
trained_model = './trained_model/'
export_dir = './exported_model/'
In Model Garden, the collections of parameters that define a model are called configs. Model Garden can create a config based on a known set of parameters via a factory.
Use the mnv2_deeplabv3_pascal experiment configuration, as defined by tfm.vision.configs.semantic_segmentation.mnv2_deeplabv3_pascal.
Please find all the registered experiements here
The configuration defines an experiment to train a DeepLabV3 model with MobilenetV2 as backbone and ASPP as decoder.
There are also other alternative experiments available such as
seg_deeplabv3_pascalseg_deeplabv3plus_pascalseg_resnetfpn_pascalmnv2_deeplabv3plus_cityscapesand more. One can switch to them by changing the experiment name argument to the get_exp_config function.
exp_config = tfm.core.exp_factory.get_exp_config('mnv2_deeplabv3_pascal')
model_ckpt_path = './model_ckpt/'
if not os.path.exists(model_ckpt_path):
os.mkdir(model_ckpt_path)
!gcloud storage cp gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63.data-00000-of-00001 './model_ckpt/'
!gcloud storage cp gs://tf_model_garden/cloud/vision-2.0/deeplab/deeplabv3_mobilenetv2_coco/best_ckpt-63.index './model_ckpt/'
num_classes = 3
WIDTH, HEIGHT = 128, 128
input_size = [HEIGHT, WIDTH, 3]
BATCH_SIZE = 16
# Backbone Config
exp_config.task.init_checkpoint = model_ckpt_path + 'best_ckpt-63'
exp_config.task.freeze_backbone = True
# Model Config
exp_config.task.model.num_classes = num_classes
exp_config.task.model.input_size = input_size
# Training Data Config
exp_config.task.train_data.aug_scale_min = 1.0
exp_config.task.train_data.aug_scale_max = 1.0
exp_config.task.train_data.input_path = train_data_tfrecords
exp_config.task.train_data.global_batch_size = BATCH_SIZE
exp_config.task.train_data.dtype = 'float32'
exp_config.task.train_data.output_size = [HEIGHT, WIDTH]
exp_config.task.train_data.preserve_aspect_ratio = False
exp_config.task.train_data.seed = 21 # Reproducable Training Data
# Validation Data Config
exp_config.task.validation_data.input_path = val_data_tfrecords
exp_config.task.validation_data.global_batch_size = BATCH_SIZE
exp_config.task.validation_data.dtype = 'float32'
exp_config.task.validation_data.output_size = [HEIGHT, WIDTH]
exp_config.task.validation_data.preserve_aspect_ratio = False
exp_config.task.validation_data.groundtruth_padded_size = [HEIGHT, WIDTH]
exp_config.task.validation_data.seed = 21 # Reproducable Validation Data
exp_config.task.validation_data.resize_eval_groundtruth = True # To enable validation loss
logical_device_names = [logical_device.name
for logical_device in tf.config.list_logical_devices()]
if 'GPU' in ''.join(logical_device_names):
print('This may be broken in Colab.')
device = 'GPU'
elif 'TPU' in ''.join(logical_device_names):
print('This may be broken in Colab.')
device = 'TPU'
else:
print('Running on CPU is slow, so only train for a few steps.')
device = 'CPU'
train_steps = 2000
exp_config.trainer.steps_per_loop = int(train_ds.__len__() // BATCH_SIZE)
exp_config.trainer.summary_interval = exp_config.trainer.steps_per_loop # steps_per_loop = num_of_validation_examples // eval_batch_size
exp_config.trainer.checkpoint_interval = exp_config.trainer.steps_per_loop
exp_config.trainer.validation_interval = exp_config.trainer.steps_per_loop
exp_config.trainer.validation_steps = int(train_ds.__len__() // BATCH_SIZE) # validation_steps = num_of_validation_examples // eval_batch_size
exp_config.trainer.train_steps = train_steps
exp_config.trainer.optimizer_config.warmup.linear.warmup_steps = exp_config.trainer.steps_per_loop
exp_config.trainer.optimizer_config.learning_rate.type = 'cosine'
exp_config.trainer.optimizer_config.learning_rate.cosine.decay_steps = train_steps
exp_config.trainer.optimizer_config.learning_rate.cosine.initial_learning_rate = 0.1
exp_config.trainer.optimizer_config.warmup.linear.warmup_learning_rate = 0.05
pp.pprint(exp_config.as_dict())
display.Javascript('google.colab.output.setIframeHeight("500px");')
# Setting up the Strategy
if exp_config.runtime.mixed_precision_dtype == tf.float16:
tf.keras.mixed_precision.set_global_policy('mixed_float16')
if 'GPU' in ''.join(logical_device_names):
distribution_strategy = tf.distribute.MirroredStrategy()
elif 'TPU' in ''.join(logical_device_names):
tf.tpu.experimental.initialize_tpu_system()
tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='/device:TPU_SYSTEM:0')
distribution_strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
print('Warning: this will be really slow.')
distribution_strategy = tf.distribute.OneDeviceStrategy(logical_device_names[0])
print("Done")
Task object (tfm.core.base_task.Task) from the config_definitions.TaskConfig.The Task object has all the methods necessary for building the dataset, building the model, and running training & evaluation. These methods are driven by tfm.core.train_lib.run_experiment.
model_dir = './trained_model/'
with distribution_strategy.scope():
task = tfm.core.task_factory.get_task(exp_config.task, logging_dir=model_dir)
for images, masks in task.build_inputs(exp_config.task.train_data).take(1):
print()
print(f'images.shape: {str(images.shape):16} images.dtype: {images.dtype!r}')
print(f'masks.shape: {str(masks["masks"].shape):16} images.dtype: {masks["masks"].dtype!r}')
def plot_masks(display_list):
plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i+1)
plt.title(title[i])
plt.imshow(tf.keras.utils.array_to_img(display_list[i]))
plt.axis('off')
plt.show()
Image Title represents what is depicted from the image.
Same helper function can be used while visualizing predicted mask
num_examples = 3
for images, masks in task.build_inputs(exp_config.task.train_data).take(num_examples):
plot_masks([images[0], masks['masks'][0]])
IoU: is defined as the area of the intersection divided by the area of the union of a predicted mask and ground truth mask.
model, eval_logs = tfm.core.train_lib.run_experiment(
distribution_strategy=distribution_strategy,
task=task,
mode='train_and_eval',
params=exp_config,
model_dir=model_dir,
eval_summary_manager=summary_manager.maybe_build_eval_summary_manager(
params=exp_config, model_dir=model_dir),
run_post_eval=True)
%load_ext tensorboard
%tensorboard --logdir './trained_model'
The keras.Model object returned by train_lib.run_experiment expects the data to be normalized by the dataset loader using the same mean and variance statiscics in preprocess_ops.normalize_image(image, offset=MEAN_RGB, scale=STDDEV_RGB). This export function handles those details, so you can pass tf.uint8 images and get the correct results.
export_saved_model_lib.export_inference_graph(
input_type='image_tensor',
batch_size=1,
input_image_size=[HEIGHT, WIDTH],
params=exp_config,
checkpoint_path=tf.train.latest_checkpoint(model_dir),
export_dir=export_dir)
imported = tf.saved_model.load(export_dir)
model_fn = imported.signatures['serving_default']
def create_mask(pred_mask):
pred_mask = tf.math.argmax(pred_mask, axis=-1)
pred_mask = pred_mask[..., tf.newaxis]
return pred_mask[0]
for record in test_ds.take(15):
image = tf.image.resize(record['image'], size=[HEIGHT, WIDTH])
image = tf.cast(image, dtype=tf.uint8)
mask = tf.image.resize(record['segmentation_mask'], size=[HEIGHT, WIDTH])
predicted_mask = model_fn(tf.expand_dims(record['image'], axis=0))
plot_masks([image, mask, create_mask(predicted_mask['logits'])])