Dunfey · Hotel WWDC as data, est. 1983
Front desk everything
Years
Topics

2022 Swift

WWDC22 · 22 min · Swift

Swift Regex: Beyond the basics

Go beyond the basics of string processing with Swift Regex. We’ll share an overview of Regex and how it works, explore Foundation’s rich data parsers and discover how to integrate your own, and delve into captures. We’ll also provide best practices for matching strings and wielding Regex-powered algorithms with ease.

Watch at developer.apple.com ↗

Transcript all transcripts

Code shown on screen · 20 snippets

Regex matching "Hi, WWDC22!" swift · at 0:39 ↗
Regex {
    "Hi, WWDC"
    Repeat(.digit, count: 2)
    "!"
}
Simple Regex from a string swift · at 1:06 ↗
let input = "name:  John Appleseed,  user_id:  100"

let regex = try Regex(#"user_id:\s*(\d+)"#)

if let match = input.firstMatch(of: regex) {
    print("Matched: \(match[0])")
    print("User ID: \(match[1])")
}
Simple Regex from a literal swift · at 1:56 ↗
let input = "name:  John Appleseed,  user_id:  100"

let regex = /user_id:\s*(\d+)/

if let match = input.firstMatch(of: regex) {
    print("Matched: \(match.0)")
    print("User ID: \(match.1)")
}
Simple regex builder swift · at 2:08 ↗
import RegexBuilder

let input = "name:  John Appleseed,  user_id:  100"

let regex = Regex {
    "user_id:"
    OneOrMore(.whitespace)
    Capture(.localizedInteger)
}

if let match = input.firstMatch(of: regex) {
    print("Matched: \(match.0)")
    print("User ID: \(match.1)")
}
A trivial Regex interpreted by the Regex engine swift · at 2:38 ↗
let regex = Regex {
    OneOrMore("a")
    OneOrMore(.digit)
}

let match = "aaa12".wholeMatch(of: regex)
Regex-powered algorithms swift · at 3:49 ↗
let input = "name:  John Appleseed,  user_id:  100"

let regex = /user_id:\s*(\d+)/

input.firstMatch(of: regex)           // Regex.Match<(Substring, Substring)>
input.wholeMatch(of: regex)           // nil
input.prefixMatch(of: regex)          // nil

input.starts(with: regex)             // false
input.replacing(regex, with: "456")   // "name:  John Appleseed,  456"
input.trimmingPrefix(regex)           // "name:  John Appleseed,  user_id:  100"
input.split(separator: /\s*,\s*/)     // ["name:  John Appleseed", "user_id:  100"]

switch "abc" {
case /\w+/:
    print("It's a word!")
}
Use Foundation parsers in regex builder swift · at 5:14 ↗
let statement = """
    DSLIP    04/06/20 Paypal  $3,020.85
    CREDIT   04/03/20 Payroll $69.73
    DEBIT    04/02/20 Rent    ($38.25)
    DEBIT    03/31/20 Grocery ($27.44)
    DEBIT    03/24/20 IRS     ($52,249.98)
    """

let regex = Regex {
    Capture(.date(format: "\(month: .twoDigits)/\(day: .twoDigits)/\(year: .twoDigits)"))
    OneOrMore(.whitespace)
    OneOrMore(.word)
    OneOrMore(.whitespace)
    Capture(.currency(code: "USD").sign(strategy: .accounting))
}
XCTest log regex (version 1) swift · at 6:24 ↗
import RegexBuilder

let regex = Regex {
    "Test Suite '"
    /[a-zA-Z][a-zA-Z0-9]*/
    "' "
    ChoiceOf {
        "started"
        "passed"
        "failed"
    }
    " at "
    OneOrMore(.any)
    Optionally(".")
}
Test our Regex against some inputs swift · at 6:25 ↗
let testSuiteTestInputs = [
    "Test Suite 'RegexDSLTests' started at 2022-06-06 09:41:00.001",
    "Test Suite 'RegexDSLTests' failed at 2022-06-06 09:41:00.001.",
    "Test Suite 'RegexDSLTests' passed at 2022-06-06 09:41:00.001."
]

for line in testSuiteTestInputs {
    if let match = line.wholeMatch(of: regex) {
        print("Matched: \(match.output)")
    }
}
Example of capture swift · at 10:28 ↗
let regex = Regex {
   "a"
   Capture("b")
   "c"
   /d(e)f/
}

if let match = "abcdef".wholeMatch(of: regex) {
    let (wholeMatch, b, e) = match.output
}
XCTest log regex (version 2, with captures) swift · at 11:10 ↗
import RegexBuilder

let regex = Regex {
    "Test Suite '"
    Capture(/[a-zA-Z][a-zA-Z0-9]*/)
    "' "
    Capture {
        ChoiceOf {
            "started"
            "passed"
            "failed"
        }
    }
    " at "
    Capture(OneOrMore(.any))
    Optionally(".")
}
Test our Regex (version 2) against some inputs swift · at 11:21 ↗
let testSuiteTestInputs = [
    "Test Suite 'RegexDSLTests' started at 2022-06-06 09:41:00.001",
    "Test Suite 'RegexDSLTests' failed at 2022-06-06 09:41:00.001.",
    "Test Suite 'RegexDSLTests' passed at 2022-06-06 09:41:00.001."
]

for line in testSuiteTestInputs {
    if let (whole, name, status, dateTime) = line.wholeMatch(of: regex)?.output {
        print("Matched: \"\(name)\", \"\(status)\", \"\(dateTime)\"")
    }
}
XCTest log regex (version 3, with reluctant repetition) swift · at 11:51 ↗
import RegexBuilder

let regex = Regex {
    "Test Suite '"
    Capture(/[a-zA-Z][a-zA-Z0-9]*/)
    "' "
    Capture {
        ChoiceOf {
            "started"
            "passed"
            "failed"
        }
    }
    " at "
    Capture(OneOrMore(.any, .reluctant))
    Optionally(".")
}
Example of transforming capture swift · at 15:20 ↗
Regex {
    Capture {
        OneOrMore(.digit)
    } transform: {
        Int($0)     // Int.init?(_: some StringProtocol)
    }
} // Regex<(Substring, Int?)>
Example of transforming capture and removing optionality swift · at 15:55 ↗
Regex {
    TryCapture {
        OneOrMore(.digit)
    } transform: {
        Int($0)     // Int.init?(_: some StringProtocol)
    }
} // Regex<(Substring, Int)>
XCTest log regex (version 4, with transforming capture) swift · at 16:21 ↗
enum TestStatus: String {
    case started = "started"
    case passed = "passed"
    case failed = "failed"
}

let regex = Regex {
    "Test Suite '"
    Capture(/[a-zA-Z][a-zA-Z0-9]*/)
    "' "
    TryCapture {
        ChoiceOf {
            "started"
            "passed"
            "failed"
        }
    } transform: {
        TestStatus(rawValue: String($0))
    }
    " at "
    Capture(OneOrMore(.any, .reluctant))
    Optionally(".")
} // Regex<(Substring, Substring, TestStatus, Substring)>
XCTest log regex (version 5, with Foundation ISO 8601 date parser) swift · at 17:23 ↗
let regex = Regex {
    "Test Suite '"
    Capture(/[a-zA-Z][a-zA-Z0-9]*/)
    "' "
    TryCapture {
        ChoiceOf {
            "started"
            "passed"
            "failed"
        }
    } transform: {
        TestStatus(rawValue: String($0))
    }
    " at "
    Capture(.iso8601(
        timeZone: .current, includingFractionalSeconds: true, dateTimeSeparator: .space))
    Optionally(".")
} // Regex<(Substring, Substring, TestStatus, Date)>
XCTest log duration parser swift · at 18:19 ↗
let input = "Test Case '-[RegexDSLTests testCharacterClass]' passed (0.001 seconds)."

let regex = Regex {
    "Test Case "
    OneOrMore(.any, .reluctant)
    "("
    Capture {
        .localizedDouble
    }
    " seconds)."
}

if let match = input.wholeMatch(of: regex) {
    print("Time: \(match.output)")
}
CDoubleParser definition swift · at 19:16 ↗
import Darwin

struct CDoubleParser: CustomConsumingRegexComponent {
    typealias RegexOutput = Double

    func consuming(
        _ input: String, startingAt index: String.Index, in bounds: Range<String.Index>
    ) throws -> (upperBound: String.Index, output: Double)? {
        input[index...].withCString { startAddress in
            var endAddress: UnsafeMutablePointer<CChar>!
            let output = strtod(startAddress, &endAddress)
            guard endAddress > startAddress else { return nil }
            let parsedLength = startAddress.distance(to: endAddress)
            let upperBound = input.utf8.index(index, offsetBy: parsedLength)
            return (upperBound, output)
        }
    }
}
Use CDoubleParser in regex builder swift · at 20:13 ↗
let input = "Test Case '-[RegexDSLTests testCharacterClass]' passed (0.001 seconds)."

let regex = Regex {
    "Test Case "
    OneOrMore(.any, .reluctant)
    "("
    Capture {
        CDoubleParser()
    }
    " seconds)."
} // Regex<(Substring, Double)>

if let match = input.wholeMatch(of: regex) {
    print("Time: \(match.1)")
}

Resources