WWDC26 · 21 min · AI & Machine Learning

Bring an LLM provider to the Foundation Models framework

Extend the Foundation Models framework by implementing a LanguageModelExecutor for new models. Explore how to interface with the LanguageModelSession’s transcript, manage session state effectively, and optimize KV cache utilization. Find out how to support custom segment types and unlock advanced capabilities for your generative AI features.

Watch at developer.apple.com ↗

Transcript all transcripts

Chapters

0:00 — Introduction
3:37 — Packaging
4:48 — Protocol
14:50 — Authentication
15:51 — Customization
19:47 — Next steps

Code shown on screen · 14 snippets

Choose a language model swift · at 2:00 ↗

import FoundationModels
import MLXFoundationModels

// On-device Apple Foundation Model
let model = SystemLanguageModel()

// Private Cloud Compute model
// let model = PrivateCloudComputeLanguageModel()

// Custom Core AI model
// let model = try await CoreAILanguageModel(resourcesAt: modelURL)

// Open-source MLX model from HuggingFace
// let model = MLXLanguageModel(modelID: "mlx-community/my-model")

let session = LanguageModelSession(model: model)
let response = try await session.respond(to: "...")
print(response.content)

Configure Package.swift for your model package swift · at 3:46 ↗

// Package.swift

let package = Package(
    name: "MyModel",
    platforms: [
        .macOS(.v27), .iOS(.v27), .visionOS(.v27), .watchOS(.v27)
    ],
    products: [
        .library(name: "MyModel", targets: ["MyModel"])
    ],
    dependencies: [
        .package(url: "...", .upToNextMinor(from: "1.0.0"))
    ],
    targets: [
        .target(name: "MyModelRuntime"),
        // public: LanguageModel conformance
        .target(name: "MyModel", dependencies: ["MyModelRuntime"]),
        .testTarget(name: "MyModelTests", dependencies: ["MyModel"])
    ]
)

LanguageModel and LanguageModelExecutor protocols swift · at 4:56 ↗

// LanguageModel protocol

public protocol LanguageModel: Sendable {
    var capabilities: LanguageModelCapabilities { get }
    var executorConfiguration: Executor.Configuration { get }
}

// LanguageModelExecutor protocol

public protocol LanguageModelExecutor: Sendable {
    init(configuration: Configuration) throws
    func prewarm(model: Model, transcript: Transcript)
    func respond(
        to request: LanguageModelExecutorGenerationRequest,
        model: Model,
        streamingInto channel: LanguageModelExecutorGenerationChannel
    ) async throws
}

Implement LanguageModel and Executor conformances swift · at 6:25 ↗

// LanguageModel conformance
public struct MyLanguageModel: LanguageModel {
    typealias Executor = MyLanguageModelExecutor

    public var capabilities: LanguageModelCapabilities {
        LanguageModelCapabilities(capabilities: [
            .toolCalling, .guidedGeneration, .reasoning
        ])
    }

    public var executorConfiguration: Executor.Configuration {
        Executor.Configuration(/* ... */)
    }
}

// Executor conformance
public struct MyLanguageModelExecutor: LanguageModelExecutor {
    public typealias Model = MyLanguageModel

    public struct Configuration: Hashable, Sendable { /* ... */ }

    public init(configuration: Configuration) throws { /* ... */ }

    public func respond(
        to request: LanguageModelExecutorGenerationRequest,
        model: MyLanguageModel,
        streamingInto channel: LanguageModelExecutorGenerationChannel
    ) async throws { /* ... */ }
}

Manage model resources with prewarm and respond swift · at 7:28 ↗

// One approach to managing resources

struct MyLanguageModelExecutor: LanguageModelExecutor {

    private mutating func loadModelIfNeeded() throws -> LoadedWeights {
        let weights = try loadedModel ?? loadWeights()
        loadedModel = weights
        return weights
    }

    func prewarm(transcript: Transcript) {
        loadedModel = try? loadModelIfNeeded()
    }

    func respond( ... ) async throws {
        let weights = try loadModelIfNeeded()
        // ...generate with 'weights'...
    }
}

Map Transcript entries to model messages swift · at 9:00 ↗

// Transcript entries

let transcript = Transcript(entries: [
    .instructions( ... ),  // "You are a helpful assistant"

    .prompt( ... ),        // "What's the weather in Pittsburgh?"
    .toolCalls( ... ),     // getWeather(location: "Pittsburgh")
    .toolOutput( ... ),    // 65°F, sunny
    .response( ... ),      // "It's 65°F and sunny in Pittsburgh"

    .prompt( ... ),        // "What's the address of Apple Park?"
    .response( ... ),      // "One Apple Park Way, Cupertino, CA 95014"
])

Read generation and context options from the request swift · at 10:42 ↗

// Parse generation and context options

func respond(
    to request: LanguageModelExecutorGenerationRequest,
    model: MyLanguageModel,
    streamingInto channel: LanguageModelExecutorGenerationChannel
) async throws {
    let reasoningLevel = request.contextOptions.reasoningLevel
    let temperature = request.generationOptions.temperature
    let maxTokens = request.generationOptions.maximumResponseTokens
}

Stream tokens and metadata through the channel swift · at 11:47 ↗

// Streaming text tokens

func respond( ... ) async throws {
    // 1. Report metadata
    await channel.send(.response(action: .updateMetadata([
        "modelID": "my-model-2026-06-08",
        "requestID": request.id.uuidString
    ])))
    // 2. Report prompt token usage before generating
    await channel.send(.response(action: .updateUsage(
        input: .init(totalTokenCount: promptTokens, cachedTokenCount: cachedTokens),
        output: .init(totalTokenCount: 0, reasoningTokenCount: 0)
    )))
    // 3. Stream text deltas as the model generates
    for try await token in tokens {
        await channel.send(.response(action: .appendText(token)))
    }
}

Honor the developer's intent or throw swift · at 13:33 ↗

// Honor the developer's intention where possible

// The developer set sampling: .greedy, but our service only takes temperature
if request.generationOptions.sampling?.kind == .greedy {
    serviceRequest.temperature = 0
}

// Otherwise, throw an error

// The token budget is too small to satisfy the schema
if let schema = request.schema,
   let budget = request.generationOptions.maximumResponseTokens,
   budget < minimumTokens(for: schema) {
    throw LanguageModelError.unsupportedCapability(
        .init(
            capability: .guidedGeneration,
            debugDescription: "Token budget too small to satisfy this schema."
        )
    )
}

Built-in errors that any model can throw swift · at 13:57 ↗

// Built-in errors that any model can throw

public enum LanguageModelError: LocalizedError, CustomDebugStringConvertible {
    // Transcript grew past the model's context window. Trim entries and retry.
    case contextSizeExceeded(     )
    // Too many requests in a short window. Space them out or reduce load.
    case rateLimited(     )
    // Model declined to answer. Fall back to a message of your choosing.
    case refusal(     )
    // Safety guardrails tripped on the prompt or the response.
    case guardrailViolation(     )
    // Model lacks a feature you used, such as guided generation or tools.
    case unsupportedCapability(     )
    // Prompt contains content the model can't process (bad files, unknown formats).
    case unsupportedTranscriptContent(     )
    // A generation guide (e.g., a regex pattern) isn't supported by this model.
    case unsupportedGenerationGuide(     )
    // Prompt asked for output in a language or locale the model doesn't support.
    case unsupportedLanguageOrLocale(     )
    // Request timed out before the model produced a response.
    case timeout(     )
}

Handle errors from your model executor swift · at 14:14 ↗

// Custom errors

public enum MyModelError: Error, LocalizedError {
    // User hit monthly token limit. Prompt upgrade or wait for reset.
    case exceededSubscriptionTierLimit
    // Model variant isn't enabled on this account.
    case modelNotProvisioned
    // Billing or policy review locked this account.
    case accountSuspended

    public var errorDescription: String? {
        switch self {
        case .exceededSubscriptionTierLimit:
            String(localized: "Your plan limit has been reached.")
        // ...
        }
    }
}

Attach custom metadata to responses swift · at 16:08 ↗

// Attach service-specific performance metadata

let elapsed = Date().timeIntervalSince(startTime)
let tokensPerSecond = Double(tokenCount) / elapsed
let timeToFirstToken = firstTokenTime?.timeIntervalSince(startTime) ?? 0

await channel.send(.metadataUpdate([
    "tokensPerSecond": tokensPerSecond,
    "timeToFirstToken": timeToFirstToken
]))

Define and use custom Transcript segments swift · at 17:05 ↗

// Define a custom segment
public struct AudioSegment: Transcript.CustomSegment {
    public var id: String
    public var content: URL
}

// Pass it in a prompt
let recording = AudioSegment(id: UUID().uuidString, content: URL(filePath: "/path/to/recording.m4a"))
let response = try await session.respond {
    "Where was Frank Lloyd Wright's original architecture school located?"
    recording
}

// Emit a custom segment from the executor
for try await event in stream {
    switch event {
    case .audioFileGenerated(let file):
        await channel.send(.response(action: .updateCustomSegment(
            AudioSegment(id: file.id, content: file.url)
        )))
    }
}

Implement server-side tools in your model swift · at 18:09 ↗

// Configure server-side tools
public struct MyLanguageModel: LanguageModel {
    public struct ServerTool: Sendable {
        public static let webSearch: ServerTool = ...
    }
    public init(serverTools: [ServerTool] = []) { }
}

// Surface tool results through the channel
let client = MyServerClient(serverTools: model.serverTools)
let response = try await client.send(prompt: .init(request))
for try await chunk in response {
    switch chunk {
    case .webSearch(let webSearch):
        await channel.send(.response(action: .updateCustomSegment(
            WebSearchSegment(url: webSearch.url, content: webSearch.html)
        )))
    case .textDelta(let textDelta):
        await channel.send(.response(action: .appendText(
            textDelta.text, tokenCount: textDelta.tokenCount
        )))
    }
}

Resources

[documentation] Foundation Models
[documentation] Core AI Models
[documentation] MLX Swift LM on GitHub

Build AI-powered scripts with the fm CLI and Python SDK

WWDC26 · 6 snippets

17 min
Build with the new Apple Foundation Model on Private Cloud Compute

WWDC26 · 7 snippets

11 min
What’s new in the Foundation Models framework

WWDC26 · 7 snippets

21 min
Build agentic app experiences with the Foundation Models framework

WWDC26 · 17 snippets

22 min

Chapters

Code shown on screen · 14 snippets

Resources

Related sessions

Build AI-powered scripts with the fm CLI and Python SDK

Build with the new Apple Foundation Model on Private Cloud Compute

What’s new in the Foundation Models framework

Build agentic app experiences with the Foundation Models framework