WWDC26 · 21 min · AI & Machine Learning

Meet Core AI

Discover Core AI, Apple’s new framework for on-device AI model deployment. Tour the ecosystem, from Python libraries for converting, authoring, and optimizing models, to a Swift API for simple plug-and-play inference and advanced use cases with strict latency and memory requirements. Explore the new Core AI models repository with ready-to-run examples for popular architectures. See how deep Xcode integration, including ahead-of-time model compilation, streamlines the workflow so you can deliver smarter, more responsive app experiences.

Watch at developer.apple.com ↗

Transcript all transcripts

Chapters

0:00 — Introduction
0:33 — What is Core AI
4:57 — Model conversion
6:16 — App integration
10:48 — Profiling with Instruments
11:15 — Optimizing performance
14:13 — Additional features
15:34 — Specialization
20:07 — Next steps

Code shown on screen · 13 snippets

Convert a PyTorch model to Core AI python · at 5:08 ↗

import torch
import coreai_torch
# Load trained snake model and sample input for tracing
pt_model = SnakeTransformer().load_checkpoint("snake.pt")
example  = torch.randn(1, 5, 16)

# Export the torch program including dynamic shape for input sequence
seq_len  = torch.export.Dim("seq_len", min=1, max=256)
exported = torch.export.export(
    pt_model, args=(example,), 
    dynamic_shapes={"features": {1: seq_len}},
)
exported = exported.run_decompositions(coreai_torch.get_decomp_table())

# Convert torch graph → Core AI graph
ai_program = coreai_torch.TorchConverter().add_exported_program(
    exported, input_names=["features"], output_names=["logits"],
).to_coreai()

# Save as a .aimodel asset the runtime can load
ai_program.save_asset("SnakeTransformer.aimodel")

Verify converted model numerics python · at 5:44 ↗

import torch
import numpy as np
from coreai. runtime import AIModel, NDArray
# Load models
pt_model = SnakeTransformer().load_checkpoint("snake.pt")
ai_model = await AIModel.load("SnakeTransformer.aimodel")
function = ai_model.load_function("main")
# Assemble input sample - 10 frames of 16-dim game features, shape (1, 10, 16)
features = np.array(lextract_features(game) for - in range (10)],
dtype=np.float32)[np.newaxis]
# PyTorch reference
with torch.no_grad():
	pytorch_logits = pt_model(torch.from_numpy(features)) . numpy )[0, -1]
# Core AI inference
result = await function({ "features": NDArray(data=features)} )
coreai_logits = result["logits"]. numpy()[0, -1]
# Validate
max_diff = np.max(np.abs(pytorch_logits - coreai_logits))
	assert max_diff < 0.01

Core AI framework core types swift · at 7:41 ↗

// Core types within Core AI
import CoreAI

// Load the '.aimodel' file
let model = try await AIModel(contentsOf: modelURL)

// Load the main inference function
let mainFunction: InferenceFunction = try model.loadFunction(named: "main")!

// Construct the n-dimensional input data
let inputNDArray: NDArray = nextInput()

// Run inference
var outputs = try await mainFunction.run(inputs: ["input": inputNDArray])

guard let outputNDArray = outputs.remove("output")?.ndArray else {
  // Handle unexpected missing output
}

Initialize ModelPlayer with AIModel swift · at 8:33 ↗

// Initialize the player by loading the AIModel and InferenceFunction
struct ModelPlayer {
  let nextActionFunction: InferenceFunction

  init(modelURL: URL) async throws {
    let model = try await AIModel(contentsOf: modelURL)
    self.nextActionFunction = try model.loadFunction(named: "main")!
  }
}

Run inference with NDArray inputs swift · at 8:49 ↗

extension ModelPlayer: SnakePlayer {

  mutating func chooseAction(game: SnakeGame) async throws -> Direction {

    // Create an NDArray for the next input and write board features into it
    var inputFeatures = NDArray(shape: [game.stepCount, hiddenDim], scalarType: .float32)
    writeFeatures(of: game, into: inputFeatures.mutableView())

    // Run inference and extract the expected logits output NDArray
    var outputs = try await nextActionFunction.run(inputs: ["features": inputFeatures])
    guard let logits = outputs.remove("logits")?.ndArray else {
      throw ModelError.missingOutput
    }

    return predictedDirection(from: logits.view())
  }

  func writeFeatures(of game: SnakeGame, into view: consuming NDArray.MutableView<Float>) { … }
  func predictedDirection(from logits: NDArray.View<Float>) -> Direction { … }
}

Input features for the snake model swift · at 10:10 ↗

// Features at each time step
var features = [Float]()

// Distance to wall in all directions, normalized between [0, 1]
features += [dWallUp, dWallDown, dWallLeft, dWallRight]

// Distance to nearest food, normalized between [-1, 1]
features += [dFoodX, dFoodY]

// Direction encoded as one-hot: [1,0,0,0]=up, [0,1,0,0]=down, etc.
features += dir.oneHotEncoding

// Distance to the other snake, normalized to [-1, 1]
features += [dUserX, dUserY]

// Direction of the opponent snake
features += dirU.oneHotEncoding

Add KV cache buffers to PyTorch module python · at 12:18 ↗

# Update torch module to include key and value caches
# Use register_buffer to later make the exported torch program treat them as mutable

class SnakeTransformerStateful(nn.Module):
    def __init__(self, ...):
        super().__init__()
        self.register_buffer(
            "k_cache", torch.zeros(N_LAYERS, 1, MAX_SEQ_LEN, D_MODEL))
        self.register_buffer(
            "v_cache", torch.zeros(N_LAYERS, 1, MAX_SEQ_LEN, D_MODEL))
        # …

Update forward pass to read/write KV caches python · at 12:50 ↗

# During forward pass, read/write KV caches

class SnakeTransformerStateful(nn.Module):

    def forward(self, features, position_ids):
        new_k, new_v = [], []
        for i, block in enumerate(self.blocks):
            # read previous keys/values from caches
            k_prev = self.k_cache[i]
            v_prev = self.v_cache[i]
            # ... compute q/k/v for the new token, attend over valid prefix ...
            new_k.append(k_updated)
            new_v.append(v_updated)

        # Update key/value caches
        self.k_cache.copy_(torch.stack(new_k))
        self.v_cache.copy_(torch.stack(new_v))

        return self.action_head(self.ln_final(x))

Re-convert model with state names python · at 12:59 ↗

# Updated coreai-torch conversion code using key/value cache states
import torch
import coreai_torch

exported = torch.export.export(
    stateful_model,
    args=(example_features, example_position_ids),
    dynamic_shapes={"position_ids": {1: seq_len}},
)
exported = exported.run_decompositions(coreai_torch.get_decomp_table())

ai_program = coreai_torch.TorchConverter().add_exported_program(
    exported,
    input_names=["features", "position_ids"],
    state_names=["keyCache", "valueCache"],
    output_names=["logits"],
).to_coreai()

ai_program.save_asset("SnakeTransformer.aimodel")

Store KV cache NDArrays in ModelPlayer swift · at 13:17 ↗

// Add stored properties for the key and value caches
struct ModelPlayer {
    let nextActionFunction: InferenceFunction

    var keyCache: NDArray
    var valueCache: NDArray

    init(modelURL: URL) async throws {
        let model = try await AIModel(contentsOf: modelURL)
        self.nextActionFunction = try model.loadFunction(named: "main")!

        self.keyCache = NDArray(shape: [layers, maxContext, hiddenDim], scalarType: .float32)
        self.valueCache = NDArray(shape: [layers, maxContext, hiddenDim], scalarType: .float32)
    }
}

Pass state views to inference function swift · at 13:45 ↗

extension ModelPlayer: SnakePlayer {
    mutating func chooseAction(game: SnakeGame, snakeID: Int) async throws -> Direction {
        // …

        var stateViews = InferenceFunction.MutableViews()
        stateViews.insert(&keyCache, for: "keyCache")
        stateViews.insert(&valueCache, for: "valueCache")

        // Run inference and extract the expected logits output NDArray
        var outputs = try await nextActionFunction.run(
            inputs: ["features": inputFeatures],
            states: stateViews)
        // …
    }
}

Check model cache before loading swift · at 16:22 ↗

// Check if your model can be loaded from the cache
let cache = AIModelCache.default

guard let model = try cache.model(for: modelURL, options: .default) else {
    Task { @MainActor in
        informUser("Preparing AI features. This may take a while…")
    }
}

Request model specialization swift · at 16:42 ↗

// Explicitly request specialization
try await AIModel.specialize(contentsOf: modelURL)

Resources

[documentation] Core AI PyTorch Extensions
[documentation] Core AI Python
[documentation] Core AI Optimization
[documentation] Core AI
[documentation] Compiling Core AI models ahead of time
[documentation] Managing model specialization and caching

Integrate on-device AI models into your app using Core AI

WWDC26 · 4 snippets

24 min
Dive into Core AI model authoring and optimization

WWDC26 · 4 snippets

29 min
Optimize custom machine learning operations with Metal tensors

WWDC26 · 9 snippets

16 min

Chapters

Code shown on screen · 13 snippets

Resources

Related sessions

Integrate on-device AI models into your app using Core AI

Dive into Core AI model authoring and optimization

Optimize custom machine learning operations with Metal tensors