Back to Models

you may not use this file except in compliance with the License.

official/projects/yolo/darknet_image_calssification.ipynb

2.20.012.1 KB
Original Source

Copyright 2023 The TensorFlow Authors.

python
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

YOLO Image classification

This tutorial trains Darkent from Tensorflow Model Garden package (tf-models-official) to classify images in the cats_vs_dogs dataset.

Model Garden contains a collection of state-of-the-art vision models, implemented with TensorFlow's high-level APIs. The implementations demonstrate the best practices for modeling, letting users to take full advantage of TensorFlow for their research and product development.

Dataset: cats_vs_dogs

  • A large set of images of cats and dogs.

This tutorial demonstrates how to:

  • Use models from the TensorFlow Models package
  • Train/Fine-tune a pre-built Darkent variations for Image Classification
  • Export the trained/tuned darknet model

Clone the model-garden repository

python
! git clone -q https://github.com/tensorflow/models.git
python
! pip install -q -U tensorflow_datasets
! pip install -q --user -r models/official/requirements.txt

Note: Please restart runtime and continue with running the notebook

python
import os
import sys

import os
os.environ['PYTHONPATH'] += ":/content/models"

sys.path.append("/content/models")

Import necessary libraries

python
import pprint
import logging
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds

from official import core
from official.vision.data import tfrecord_lib
from official.vision import configs
from official.vision.configs import common
from official.projects.yolo.common import registry_imports
from official.projects.yolo.serving import export_saved_model
from official.projects.yolo.serving import export_module_factory
from official.vision.serving import export_saved_model_lib

logging.disable(logging.WARNING)
pp = pprint.PrettyPrinter(indent=4)
%matplotlib inline

Load dataset from Tensorflow Datasets(tfds)

python
(train_ds, validation_ds, test_ds), ds_info = tfds.load(
    name='cats_vs_dogs',
    split=['train[:70%]', 'train[70%:90%]', 'train[90%:100%]'],
    with_info=True)
label_info = ds_info.features['label']
ds_info

Write data to TFrecords

Helper functions to preproces the data

python
def process_record(record):
    """
    Process a single record for TFRecords.

    This function takes a record, typically containing image and label data,
    and converts it into a TFRecord example. Detailed explaination is available here
    https://www.tensorflow.org/api_docs/python/tf/train/Example

    Args:
        record (dict): A dictionary containing the record data with the following keys:
            - 'image': A tensor representing the image data.
            - 'label': A tensor representing the label associated with the image.

    Returns:
        tf.train.Example: A TFRecord example containing the processed data with
        the following features:
            - 'image/encoded': The encoded image data as a feature.
            - 'image/class/label': The label data as a feature.
    """
    keys_to_features = {
        'image/encoded': tfrecord_lib.convert_to_feature(
            tf.io.encode_jpeg(record['image']).numpy()),
        'image/class/label': tfrecord_lib.convert_to_feature(
            record['label'].numpy())
    }
    example = tf.train.Example(features=tf.train.Features(feature=keys_to_features))
    return example

python
def write_tfrecords(dataset, output_path, num_shards=1):
    """
    Write a dataset to TFRecords files.

    This function takes a dataset and writes it to one or more TFRecords files,
    splitting the data into shards if specified.

    Args:
        dataset (iterable): An iterable containing the data records to be written
            to TFRecords. Each record should be in a format suitable for processing
            with the 'process_record' function.
        output_path (str): The base path where the TFRecords files will be saved.
            If 'num_shards' is greater than 1, a unique suffix for each shard will
            be added to the base path.
        num_shards (int, optional): The number of TFRecords files to split the data
            into. Defaults to 1, indicating no sharding.

    Reuturns:
        None
    """
    writers = [
        tf.io.TFRecordWriter(
            output_path + '-%05d-of-%05d.tfrecord' % (i, num_shards))
        for i in range(num_shards)
    ]
    for idx, record in enumerate(dataset):
        if idx % LOG_EVERY == 0:
            print('On image %d' % idx)
        tf_example = process_record(record)
        writers[idx % num_shards].write(tf_example.SerializeToString())

LOG_EVERY = 1000
output_dir = './cat_vs_dogs_tfrecords/'
if not os.path.exists(output_dir):
  os.mkdir(output_dir)

Writing training data to TFRecords

python
output_train_tfrecs = output_dir + 'train'
write_tfrecords(train_ds, output_train_tfrecs,
                num_shards=int(train_ds.cardinality().numpy() * 0.1))

Writing validation data to TFRecords

python
output_validation_tfrecs = output_dir + 'validation'
write_tfrecords(validation_ds, output_validation_tfrecs,
                num_shards=int(validation_ds.cardinality().numpy() *0.1))

Writing testing data to TFRecords

python
output_test_tfrecs = output_dir + 'test'
write_tfrecords(test_ds, output_test_tfrecs,
                num_shards=int(test_ds.cardinality().numpy() *0.1))

Experiment Configuration

Load the existing configuration

python
exp_config = core.exp_factory.get_exp_config('darknet_classification')

Change the configuration parameters for custom dataset

python
BATCH_SIZE = 16
IMG_SIZE = 224

epochs = 10
steps_per_epoch = int(train_ds.cardinality().numpy() / BATCH_SIZE)
validation_steps =  int(validation_ds.cardinality().numpy() / BATCH_SIZE)
num_steps = epochs * steps_per_epoch

lr = 0.012
warmpup_lr = 0.1 * lr

exp_config.task.model.input_size = [IMG_SIZE, IMG_SIZE, 3]
exp_config.task.model.num_classes = ds_info.features['label'].num_classes

exp_config.task.train_data.input_path = f'{output_train_tfrecs}*'
exp_config.task.train_data.global_batch_size = BATCH_SIZE

exp_config.task.validation_data.input_path = f'{output_validation_tfrecs}*'
exp_config.task.validation_data.global_batch_size = BATCH_SIZE

exp_config.trainer.checkpoint_interval = steps_per_epoch
exp_config.trainer.best_checkpoint_export_subdir = 'best_ckpt'
exp_config.trainer.optimizer_config.optimizer.type = 'sgd'
exp_config.trainer.optimizer_config.optimizer.sgd.momentum = 0.9
exp_config.trainer.optimizer_config.learning_rate.type = 'cosine'
exp_config.trainer.optimizer_config.learning_rate.cosine.decay_steps = num_steps
exp_config.trainer.optimizer_config.learning_rate.cosine.initial_learning_rate = lr
exp_config.trainer.optimizer_config.warmup.type = 'linear'
exp_config.trainer.optimizer_config.warmup.linear.warmup_learning_rate = warmpup_lr
exp_config.trainer.optimizer_config.warmup.linear.warmup_steps = int(0.1 * steps_per_epoch)

exp_config.trainer.train_steps = num_steps
exp_config.trainer.steps_per_loop = steps_per_epoch
exp_config.trainer.validation_steps = validation_steps
exp_config.trainer.validation_interval = steps_per_epoch
exp_config.trainer.summary_interval = steps_per_epoch

Set up the distribution strategy

python
# Detect hardware
try:
  tpu_resolver = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
  tpu_resolver = None
  gpus = tf.config.experimental.list_logical_devices("GPU")

# Select appropriate distribution strategy
if tpu_resolver:
  tf.config.experimental_connect_to_cluster(tpu_resolver)
  tf.tpu.experimental.initialize_tpu_system(tpu_resolver)
  distribution_strategy = tf.distribute.experimental.TPUStrategy(tpu_resolver)
  print('Running on TPU ', tpu_resolver.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
  distribution_strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  distribution_strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  distribution_strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')

print("Number of accelerators: ", distribution_strategy.num_replicas_in_sync)

Check the new configuration

python
pprint.pprint(exp_config.as_dict())

Create the Task object (tfm.core.base_task.Task) from the config_definitions.TaskConfig.

The Task object has all the methods necessary for building the dataset, building the model, and running training & evaluation. These methods are driven by tfm.core.train_lib.run_experiment.

python
model_dir = './trained_model/'
with distribution_strategy.scope():
  task = core.task_factory.get_task(exp_config.task, logging_dir=model_dir)
python
for images, labels in task.build_inputs(exp_config.task.train_data).take(1):
  print(f'images.shape: {str(images.shape):16}  images.dtype: {images.dtype!r}')
  print(f'labels.shape: {str(labels.shape):16}  labels.dtype: {labels.dtype!r}')

Save the configuration

python
core.train_utils.serialize_config(exp_config, model_dir)

Visualize the training data

Use ds_info (which is an instance of tfds.core.DatasetInfo) to lookup the text descriptions of each class ID.

python
label_info = ds_info.features['label']

Visualize a batch of the data.

python
def show_batch(images, labels, predictions=None):
  plt.figure(figsize=(10, 10))
  min = images.numpy().min()
  max = images.numpy().max()
  delta = max - min

  for i in range(BATCH_SIZE):
    plt.subplot(4, 4, i + 1)
    plt.imshow((images[i]-min) / delta)
    if predictions is None:
      plt.title(label_info.int2str(labels[i]))
    else:
      if labels[i] == predictions[i]:
        color = 'g'
      else:
        color = 'r'
      plt.title(label_info.int2str(predictions[i]), color=color)
    plt.axis("off")
  plt.show()
python
for images, labels in task.build_inputs(exp_config.task.validation_data).take(1):
  show_batch(images, labels)

Train and Evaluate

python
model, eval_logs = core.train_lib.run_experiment(
    distribution_strategy=distribution_strategy,
    task=task,
    mode='train_and_eval',
    params=exp_config,
    model_dir=model_dir,
    run_post_eval=True)

Export the trained model

python
EXPORT_DIR_PATH = "./exported_model/"
!python -m official.projects.yolo.serving.export_saved_model \
  --experiment="darknet_classification" \
  --export_dir=$EXPORT_DIR_PATH/ \
  --checkpoint_path=$model_dir \
  --config_file=$model_dir/params.yaml \
  --batch_size=$BATCH_SIZE \
  --input_type="image_tensor" \
  --input_image_size=$IMG_SIZE,$IMG_SIZE

Test the exported model.

Importing SavedModel

python
imported = tf.saved_model.load('/content/exported_model/saved_model')
model_fn = imported.signatures['serving_default']

Visualize the test predictions.

python
def resize_image(record):
  image = tf.image.resize(record['image'], size=(IMG_SIZE, IMG_SIZE))
  image = tf.cast(image, tf.uint8)
  return image, record['label']
python
test_ds_resized = test_ds.map(resize_image).shuffle(100)
test_ds_batched = test_ds_resized.batch(BATCH_SIZE)
python
for images, labels in test_ds_batched.take(1):
  predictions = model_fn(inputs=images)['logits']
  predictions = tf.argmax(predictions, axis=-1)

show_batch(images, labels, predictions)