Dunfey · Hotel WWDC as data, est. 1983
Front desk everything
Years
Topics

2020 AI & Machine Learning

WWDC20 · 31 min · AI & Machine Learning

Get models on device using Core ML Converters

With Core ML you can bring incredible machine learning models to your app and run them entirely on-device. And when you use Core ML Converters, you can incorporate almost any trained model from TensorFlow or PyTorch and take full advantage of the GPU, CPU, and Neural Engine. Discover everything you need to begin converting existing models from other ML platforms and explore how to create custom operations that extend the capabilities of your models. Once you’ve converted a model to Core ML, learn more about deployment strategy for those models by watching “Use model deployment and security with Core ML.”

Watch at developer.apple.com ↗

Transcript all transcripts

Code shown on screen · 15 snippets

TensorFlow conversion using tfcoreml python · at 2:58 ↗
# pip install tfcoreml
# pip install coremltools

import tfcoreml
mlmodel = tfcoreml.convert(tf_model, mlmodel_path="/tmp/model.mlmodel")
New TensorFlow model conversion python · at 3:16 ↗
# pip install coremltools

import coremltools as ct
mlmodel = ct.convert(tf_model)
ONNX conversion to Core ML python · at 3:57 ↗
# pip install onnx-coreml
# pip install coremltools

import onnx_coreml
onnx_model = torch.export(torch_model)
mlmodel = onnx_coreml.convert(onnx_model)
New PyTorch model conversion python · at 4:28 ↗
# pip install coremltools

import coremltools as ct 
mlmodel = ct.convert(torch_script_model)
Unified conversion API python · at 4:52 ↗
import coremltools as ct

model = ct.convert(
  source_model # TF1, TF2, or PyTorch model
)
Demo 1: TF2 conversion python · at 6:42 ↗
import coremltools as ct 
import tensorflow as tf

tf_model = tf.keras.applications.MobileNet()
mlmodel = ct.convert(tf_model)
Demo 1: Pytorch conversion python · at 7:41 ↗
import coremltools as ct 
import torch
import torchvision 

torch_model = torchvision.models.mobilenet_v2()

torch_model.eval()
example_input = torch.rand(1, 3, 256, 256)
traced_model = torch.jit.trace(torch_model, example_input)

mlmodel = ct.convert(traced_model,
                    inputs=[ct.TensorType(shape=example_input.shape)])

print(mlmodel)

spec = mlmodel.get_spec()
ct.utils.rename_feature(spec, "input.1", "myInputName")
ct.utils.rename_feature(spec, "1648", "myOutputName")
mlmodel = ct.models.MLModel(spec)

print(mlmodel)
Demo 1 : TF1 conversion python · at 10:37 ↗
import coremltools as ct 
import tensorflow as tf

mlmodel = ct.convert("mobilenet_frozen_graph.pb",
                    inputs=[ct.ImageType(bias=[-1,-1,-1], scale=1/127.0)],
                    classifier_config=ct.ClassifierConfig("labels.txt"))

mlmodel.short_description = 'An image classifier'
mlmodel.license = 'Apache 2.0'
mlmodel.author = "Original Paper: A. Howard, M. Zhu, B. Chen, D. Kalenichenko, W. Wang, "\
                 "T. Weyand, M. Andreetto, H. Adam"

mlmodel.save("mobilenet.mlmodel")
Demo 1 Recap: Using coremltools convert python · at 13:33 ↗
import coremltools as ct
mlmodel = ct.convert("./tf1_inception_model.pb")
mlmodel = ct.convert("./tf2_inception_model.h5")
mlmodel = ct.convert(torch_model, inputs=[ct.TensorType(shape=example_input.shape)])
Converting a Deep Speech model python · at 15:45 ↗
import numpy as np
import IPython.display as ipd

import coremltools as ct

### Pretrained models and chekpoints are available on the repository: 
https://github.com/mozilla/DeepSpeech

!python DeepSpeech.py --export_dir /tmp --checkpoint_dir ./deepspeech-0.7.1-checkpoint --alphabet_config_path=alphabet.txt --scorer_path=kenlm.scorer >/dev/null 2>&1

ls /tmp/*.pb

tf_model = "/tmp/output_graph.pb"

from demo_utils import inspect_tf_outputs

inspect_tf_outputs(tf_model)

outputs = ["logits", "new_state_c", "new_state_h"]

mlmodel = ct.convert(tf_model, outputs=outputs)

audiofile = "./audio_sample_16bit_mono_16khz.wav"

ipd.Audio(audiofile) 

from demo_utils import preprocessing, postprocessing

mfccs = preprocessing(audiofile)

mfccs.shape

from demo_utils import inspect_inputs

inspect_inputs(mlmodel, tf_model)

start = 0 
step = 16

max_time_steps = mfccs.shape[1]

logits_sequence = []

input_dict = {}

input_dict["input_lengths"]  = np.array([step]).astype(np.float32)

input_dict["previous_state_c"] = np.zeros([1, 2048]).astype(np.float32) # Initializing cell state 
input_dict["previous_state_h"] = np.zeros([1, 2048]).astype(np.float32) # Initializing hidden state 


print("Transcription: \n")

while (start + step) < max_time_steps:
    input_dict["input_node"] = mfccs[:, start:(start + step), :, :]
    
    # Evaluation
    preds = mlmodel.predict(input_dict)
    
    
    start += step
    logits_sequence.append(preds["logits"])

    
    # Updating states
    input_dict["previous_state_c"] = preds["new_state_c"]
    input_dict["previous_state_h"] = preds["new_state_h"]
    
    
    # Decoding
    probs = np.concatenate(logits_sequence)
    transcription = postprocessing(probs)
    print(transcription[0][1], end="\r", flush=True)

!python DeepSpeech.py --n_steps -1 --export_dir /tmp --checkpoint_dir ./deepspeech-0.7.1-checkpoint --alphabet_config_path=alphabet.txt --scorer_path=kenlm.scorer >/dev/null 2>&1

mlmodel = ct.convert(tf_model, outputs=outputs)

inspect_inputs(mlmodel,tf_model)

input_dict = {}

input_dict["input_node"] = mfccs

input_dict["input_lengths"] = np.array([mfccs.shape[1]]).astype(np.float32)
input_dict["previous_state_c"] = np.zeros([1, 2048]).astype(np.float32) # Initializing cell state 
input_dict["previous_state_h"] = np.zeros([1, 2048]).astype(np.float32) # Initializing hidden state 

probs = mlmodel.predict(input_dict)["logits"]

transcription = postprocessing(probs)

print(transcription[0][1])
Deep Speech Demo Recap: Convert with input type python · at 21:52 ↗
import coremltools as ct

input = ct.TensorType(name="input_node", shape=(1, 16, 19, 26))
model = ct.convert(tf_model, outputs=outputs, inputs=[input])
MIL Builder API sample python · at 26:26 ↗
from coremltools.converters.mil import Builder as mb

@mb.program(input_specs=[mb.TensorSpec(shape=(1, 100, 100, 3))])
def prog(x):
    x = mb.relu(x=x)
    x = mb.transpose(x=x, perm=[0, 3, 1, 2])
    x = mb.reduce_mean(x=x, axes=[2, 3], keep_dims=False)
    x = mb.log(x=x)
    return x
Converting with composite ops python · at 28:20 ↗
import coremltools as ct

from transformers import TFT5Model

model = TFT5Model.from_pretrained('t5-small')

mlmodel = ct.convert(model)

# Einsum Notation

 $$ \Large "bnqd,bnkd \rightarrow bnqk" $$

$$ \large C(b, n, q, k) = \sum_d A(b, n, q, d) \times  B(b, n, k, d) $$

$$ \Large C = AB^{T}$$

from coremltools.converters.mil import Builder as mb

from coremltools.converters.mil import register_tf_op

@register_tf_op
def Einsum(context, node):

		assert node.attr['equation'] == 'bnqd,bnkd->bnqk'

    a = context[node.inputs[0]]
    b = context[node.inputs[1]]

    x = mb.matmul(x=a, y=b, transpose_x=False, transpose_y=True, name=node.name)

    context.add(node.name, x)

mlmodel = ct.convert(model)

print(mlmodel)
Recap: Custom operation python · at 29:50 ↗
@register_tf_op
def Einsum(context, node):
    assert node.attr['equation'] == 'bnqd,bnkd->bnqk'

    a = context[node.inputs[0]]
    b = context[node.inputs[1]]

    x = mb.matmul(x=a, y=b, transpose_x=False, transpose_y=True, name=node.name)
    
    context.add(node.name, x)
Deep Speech demo utilities python · at 29:50 ↗
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.ops import gen_audio_ops as contrib_audio


from deepspeech_training.util.text import Alphabet
from ds_ctcdecoder import ctc_beam_search_decoder, Scorer


## Preprocessing + Postprocessing functions are constructed using code in DeepSpeech repository: https://github.com/mozilla/DeepSpeech

audio_window_samples = 512
audio_step_samples = 320
n_input  = 26
audio_sample_rate = 16000
context = 9

lm_alpha = 0.931289039105002
lm_beta  = 1.1834137581510284
scorer_path = "./kenlm.scorer"

beam_width = 1024
cutoff_prob = 1.0
cutoff_top_n = 300

alphabet = Alphabet("./alphabet.txt")

scorer = Scorer(lm_alpha, lm_beta, scorer_path, alphabet)


def audiofile_to_features(wav_filename):
    
    samples = tf.io.read_file(wav_filename)
    
    decoded = contrib_audio.decode_wav(samples, desired_channels=1)
    
    spectrogram = contrib_audio.audio_spectrogram(decoded.audio,
                                                  window_size=audio_window_samples,
                                                  stride=audio_step_samples,
                                                  magnitude_squared=True)
        
    mfccs = contrib_audio.mfcc(spectrogram = spectrogram,
                               sample_rate = decoded.sample_rate,
                               dct_coefficient_count=n_input,
                               upper_frequency_limit=audio_sample_rate/2)
    
    mfccs = tf.reshape(mfccs, [-1, n_input])

    return mfccs, tf.shape(input=mfccs)[0]



def create_overlapping_windows(batch_x):
    
    batch_size = tf.shape(input=batch_x)[0]
    window_width = 2 * context + 1
    num_channels = n_input

    eye_filter = tf.constant(np.eye(window_width * num_channels)
                               .reshape(window_width, num_channels, window_width * num_channels), tf.float32) 
    
    # Create overlapping windows
    batch_x = tf.nn.conv1d(input=batch_x, filters=eye_filter, stride=1, padding='SAME')

    batch_x = tf.reshape(batch_x, [batch_size, -1, window_width, num_channels])

    return batch_x


sess = tf.Session(graph=tf.Graph())

with sess.graph.as_default() as g:
    path = tf.placeholder(tf.string)
    _features, _ = audiofile_to_features(path)
    _features = tf.expand_dims(_features, 0)
    _features = create_overlapping_windows(_features)

    

def preprocessing(input_file_path):
    return _features.eval(session=sess, feed_dict={path: input_file_path})




def postprocessing(logits):
    logits = np.squeeze(logits)

    decoded = ctc_beam_search_decoder(logits, alphabet, beam_width,
                                      scorer=scorer, cutoff_prob=cutoff_prob,
                                      cutoff_top_n=cutoff_top_n)

    return decoded



def inspect_tf_outputs(path):
    
    with open(path, 'rb') as f:
        serialized = f.read()
    gdef = tf.GraphDef()
    gdef.ParseFromString(serialized)

    with tf.Graph().as_default() as g:
        tf.import_graph_def(gdef, name="")

    output_nodes = []
    for op in g.get_operations():
    
        if op.type == "Const":
            continue
        
        if all([len(g.get_tensor_by_name(tensor.name).consumers()) == 0 for tensor in op.outputs]):
            
            output_nodes.append(op.name)

    return output_nodes


def inspect_inputs(mlmodel, tfmodel):
    
    names = []
    ranks = []
    shapes = []

    spec = mlmodel.get_spec()

    with open(tfmodel, 'rb') as f:
        serialized = f.read()
    gdef = tf.GraphDef()
    gdef.ParseFromString(serialized)

    with tf.Graph().as_default() as g:
        tf.import_graph_def(gdef, name="")

    for tensor in spec.description.input:
        name = tensor.name
        shape = tensor.type.multiArrayType.shape

        if tensor.type.multiArrayType.shapeRange:
            for dim, size in enumerate(tensor.type.multiArrayType.shapeRange.sizeRanges):
                if size.upperBound == -1:
                    shape[dim] = -1
                elif size.lowerBound < size.upperBound:
                    shape[dim] = -1
                elif size.lowerBound == size.upperBound:
                    assert shape[dim] == size.lowerBound
                else:
                    raise TypeError("Invalid shape range")

        coreml_shape = tuple(None if i == -1 else i for i in shape)

        tf_shape = tuple(g.get_tensor_by_name(name + ":0").shape.as_list())

        shapes.append({"Core ML shape": coreml_shape, "TF shape": tf_shape})
        names.append(name)
        ranks.append(len(coreml_shape))


    columns = [shapes[i] for i in np.argsort(ranks)[::-1]]
    indices = [names[i] for i in np.argsort(ranks)[::-1]]

    return pd.DataFrame(columns, index= indices)

Resources