2022 Swift
Swift Regex: Beyond the basics
Go beyond the basics of string processing with Swift Regex. We’ll share an overview of Regex and how it works, explore Foundation’s rich data parsers and discover how to integrate your own, and delve into captures. We’ll also provide best practices for matching strings and wielding Regex-powered algorithms with ease.
Watch at developer.apple.com ↗Code shown on screen · 20 snippets
Regex matching "Hi, WWDC22!"
Regex {
"Hi, WWDC"
Repeat(.digit, count: 2)
"!"
} Simple Regex from a string
let input = "name: John Appleseed, user_id: 100"
let regex = try Regex(#"user_id:\s*(\d+)"#)
if let match = input.firstMatch(of: regex) {
print("Matched: \(match[0])")
print("User ID: \(match[1])")
} Simple Regex from a literal
let input = "name: John Appleseed, user_id: 100"
let regex = /user_id:\s*(\d+)/
if let match = input.firstMatch(of: regex) {
print("Matched: \(match.0)")
print("User ID: \(match.1)")
} Simple regex builder
import RegexBuilder
let input = "name: John Appleseed, user_id: 100"
let regex = Regex {
"user_id:"
OneOrMore(.whitespace)
Capture(.localizedInteger)
}
if let match = input.firstMatch(of: regex) {
print("Matched: \(match.0)")
print("User ID: \(match.1)")
} A trivial Regex interpreted by the Regex engine
let regex = Regex {
OneOrMore("a")
OneOrMore(.digit)
}
let match = "aaa12".wholeMatch(of: regex) Regex-powered algorithms
let input = "name: John Appleseed, user_id: 100"
let regex = /user_id:\s*(\d+)/
input.firstMatch(of: regex) // Regex.Match<(Substring, Substring)>
input.wholeMatch(of: regex) // nil
input.prefixMatch(of: regex) // nil
input.starts(with: regex) // false
input.replacing(regex, with: "456") // "name: John Appleseed, 456"
input.trimmingPrefix(regex) // "name: John Appleseed, user_id: 100"
input.split(separator: /\s*,\s*/) // ["name: John Appleseed", "user_id: 100"]
switch "abc" {
case /\w+/:
print("It's a word!")
} Use Foundation parsers in regex builder
let statement = """
DSLIP 04/06/20 Paypal $3,020.85
CREDIT 04/03/20 Payroll $69.73
DEBIT 04/02/20 Rent ($38.25)
DEBIT 03/31/20 Grocery ($27.44)
DEBIT 03/24/20 IRS ($52,249.98)
"""
let regex = Regex {
Capture(.date(format: "\(month: .twoDigits)/\(day: .twoDigits)/\(year: .twoDigits)"))
OneOrMore(.whitespace)
OneOrMore(.word)
OneOrMore(.whitespace)
Capture(.currency(code: "USD").sign(strategy: .accounting))
} XCTest log regex (version 1)
import RegexBuilder
let regex = Regex {
"Test Suite '"
/[a-zA-Z][a-zA-Z0-9]*/
"' "
ChoiceOf {
"started"
"passed"
"failed"
}
" at "
OneOrMore(.any)
Optionally(".")
} Test our Regex against some inputs
let testSuiteTestInputs = [
"Test Suite 'RegexDSLTests' started at 2022-06-06 09:41:00.001",
"Test Suite 'RegexDSLTests' failed at 2022-06-06 09:41:00.001.",
"Test Suite 'RegexDSLTests' passed at 2022-06-06 09:41:00.001."
]
for line in testSuiteTestInputs {
if let match = line.wholeMatch(of: regex) {
print("Matched: \(match.output)")
}
} Example of capture
let regex = Regex {
"a"
Capture("b")
"c"
/d(e)f/
}
if let match = "abcdef".wholeMatch(of: regex) {
let (wholeMatch, b, e) = match.output
} XCTest log regex (version 2, with captures)
import RegexBuilder
let regex = Regex {
"Test Suite '"
Capture(/[a-zA-Z][a-zA-Z0-9]*/)
"' "
Capture {
ChoiceOf {
"started"
"passed"
"failed"
}
}
" at "
Capture(OneOrMore(.any))
Optionally(".")
} Test our Regex (version 2) against some inputs
let testSuiteTestInputs = [
"Test Suite 'RegexDSLTests' started at 2022-06-06 09:41:00.001",
"Test Suite 'RegexDSLTests' failed at 2022-06-06 09:41:00.001.",
"Test Suite 'RegexDSLTests' passed at 2022-06-06 09:41:00.001."
]
for line in testSuiteTestInputs {
if let (whole, name, status, dateTime) = line.wholeMatch(of: regex)?.output {
print("Matched: \"\(name)\", \"\(status)\", \"\(dateTime)\"")
}
} XCTest log regex (version 3, with reluctant repetition)
import RegexBuilder
let regex = Regex {
"Test Suite '"
Capture(/[a-zA-Z][a-zA-Z0-9]*/)
"' "
Capture {
ChoiceOf {
"started"
"passed"
"failed"
}
}
" at "
Capture(OneOrMore(.any, .reluctant))
Optionally(".")
} Example of transforming capture
Regex {
Capture {
OneOrMore(.digit)
} transform: {
Int($0) // Int.init?(_: some StringProtocol)
}
} // Regex<(Substring, Int?)> Example of transforming capture and removing optionality
Regex {
TryCapture {
OneOrMore(.digit)
} transform: {
Int($0) // Int.init?(_: some StringProtocol)
}
} // Regex<(Substring, Int)> XCTest log regex (version 4, with transforming capture)
enum TestStatus: String {
case started = "started"
case passed = "passed"
case failed = "failed"
}
let regex = Regex {
"Test Suite '"
Capture(/[a-zA-Z][a-zA-Z0-9]*/)
"' "
TryCapture {
ChoiceOf {
"started"
"passed"
"failed"
}
} transform: {
TestStatus(rawValue: String($0))
}
" at "
Capture(OneOrMore(.any, .reluctant))
Optionally(".")
} // Regex<(Substring, Substring, TestStatus, Substring)> XCTest log regex (version 5, with Foundation ISO 8601 date parser)
let regex = Regex {
"Test Suite '"
Capture(/[a-zA-Z][a-zA-Z0-9]*/)
"' "
TryCapture {
ChoiceOf {
"started"
"passed"
"failed"
}
} transform: {
TestStatus(rawValue: String($0))
}
" at "
Capture(.iso8601(
timeZone: .current, includingFractionalSeconds: true, dateTimeSeparator: .space))
Optionally(".")
} // Regex<(Substring, Substring, TestStatus, Date)> XCTest log duration parser
let input = "Test Case '-[RegexDSLTests testCharacterClass]' passed (0.001 seconds)."
let regex = Regex {
"Test Case "
OneOrMore(.any, .reluctant)
"("
Capture {
.localizedDouble
}
" seconds)."
}
if let match = input.wholeMatch(of: regex) {
print("Time: \(match.output)")
} CDoubleParser definition
import Darwin
struct CDoubleParser: CustomConsumingRegexComponent {
typealias RegexOutput = Double
func consuming(
_ input: String, startingAt index: String.Index, in bounds: Range<String.Index>
) throws -> (upperBound: String.Index, output: Double)? {
input[index...].withCString { startAddress in
var endAddress: UnsafeMutablePointer<CChar>!
let output = strtod(startAddress, &endAddress)
guard endAddress > startAddress else { return nil }
let parsedLength = startAddress.distance(to: endAddress)
let upperBound = input.utf8.index(index, offsetBy: parsedLength)
return (upperBound, output)
}
}
} Use CDoubleParser in regex builder
let input = "Test Case '-[RegexDSLTests testCharacterClass]' passed (0.001 seconds)."
let regex = Regex {
"Test Case "
OneOrMore(.any, .reluctant)
"("
Capture {
CDoubleParser()
}
" seconds)."
} // Regex<(Substring, Double)>
if let match = input.wholeMatch(of: regex) {
print("Time: \(match.1)")
} Resources
Related sessions
-
38 min -
23 min -
38 min