2022 EssentialsSwift
WWDC22 · 23 min · Essentials / Swift
Meet Swift Regex
Learn how you can process strings more effectively when you take advantage of Swift Regex. Come for concise literals but stay for Regex builders — a new, declarative approach to string processing. We’ll also explore the Unicode models in String and share how Swift Regex can make Unicode-correct processing easy.
Watch at developer.apple.com ↗Code shown on screen · 24 snippets
Processing collections
let transaction = "DEBIT 03/05/2022 Doug's Dugout Dogs $33.27"
let fragments = transaction.split(whereSeparator: \.isWhitespace)
// ["DEBIT", "03/05/2022", "Doug\'s", "Dugout", "Dogs", "$33.27"] Low-level index manipulation
var slice = transaction[...]
// Extract a field, advancing `slice` to the start of the next field
func extractField() -> Substring {
let endIdx = {
var start = slice.startIndex
while true {
// Position of next whitespace (including tabs)
guard let spaceIdx = slice[start...].firstIndex(where: \.isWhitespace) else {
return slice.endIndex
}
// Tab suffices
if slice[spaceIdx] == "\t" {
return spaceIdx
}
// Otherwise check for a second whitespace character
let afterSpaceIdx = slice.index(after: spaceIdx)
if afterSpaceIdx == slice.endIndex || slice[afterSpaceIdx].isWhitespace {
return spaceIdx
}
// Skip over the single space and try again
start = afterSpaceIdx
}
}()
defer { slice = slice[endIdx...].drop(while: \.isWhitespace) }
return slice[..<endIdx]
}
let kind = extractField()
let date = try Date(String(extractField()), strategy: Date.FormatStyle(date: .numeric))
let account = extractField()
let amount = try Decimal(String(extractField()), format: .currency(code: "USD")) Regex literals
// Regex literals
let digits = /\d+/
// digits: Regex<Substring> Regex created at run-time
// Run-time construction
let runtimeString = #"\d+"#
let digits = try Regex(runtimeString)
// digits: Regex<AnyRegexOutput> Regex builder
// Regex builders
let digits = OneOrMore(.digit)
// digits: Regex<Substring> Split approach with a regex literal
let transaction = "DEBIT 03/05/2022 Doug's Dugout Dogs $33.27"
let fragments = transaction.split(separator: /\s{2,}|\t/)
// ["DEBIT", "03/05/2022", "Doug's Dugout Dogs", "$33.27"] Normalize field separators
let transaction = "DEBIT 03/05/2022 Doug's Dugout Dogs $33.27"
let normalized = transaction.replacing(/\s{2,}|\t/, with: "\t")
// DEBIT»03/05/2022»Doug's Dugout Dogs»$33.27 Create a Regex builder
// CREDIT 03/02/2022 Payroll from employer $200.23
// CREDIT 03/03/2022 Suspect A $2,000,000.00
// DEBIT 03/03/2022 Ted's Pet Rock Sanctuary $2,000,000.00
// DEBIT 03/05/2022 Doug's Dugout Dogs $33.27
import RegexBuilder
let fieldSeparator = /\s{2,}|\t/
let transactionMatcher = Regex {
/CREDIT|DEBIT/
fieldSeparator
One(.date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt))
fieldSeparator
OneOrMore {
NegativeLookahead { fieldSeparator }
CharacterClass.any
}
fieldSeparator
One(.localizedCurrency(code: "USD").locale(Locale(identifier: "en_US")))
} Use Captures to extract portions of input
let fieldSeparator = /\s{2,}|\t/
let transactionMatcher = Regex {
Capture { /CREDIT|DEBIT/ }
fieldSeparator
Capture { One(.date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt)) }
fieldSeparator
Capture {
OneOrMore {
NegativeLookahead { fieldSeparator }
CharacterClass.any
}
}
fieldSeparator
Capture { One(.localizedCurrency(code: "USD").locale(Locale(identifier: "en_US"))) }
}
// transactionMatcher: Regex<(Substring, Substring, Date, Substring, Decimal)> Plot twist!
private let ledger = """
KIND DATE INSTITUTION AMOUNT
----------------------------------------------------------------
CREDIT 03/01/2022 Payroll from employer $200.23
CREDIT 03/03/2022 Suspect A $2,000,000.00
DEBIT 03/03/2022 Ted's Pet Rock Sanctuary $2,000,000.00
DEBIT 03/05/2022 Doug's Dugout Dogs $33.27
DEBIT 06/03/2022 Oxford Comma Supply Ltd. £57.33
"""
// 😱 Use named captures
let regex = #/
(?<date> \d{2} / \d{2} / \d{4})
(?<middle> \P{currencySymbol}+)
(?<currency> \p{currencySymbol})
/#
// Regex<(Substring, date: Substring, middle: Substring, currency: Substring)> Use Foundation's date parser
let regex = #/
(?<date> \d{2} / \d{2} / \d{4})
(?<middle> \P{currencySymbol}+)
(?<currency> \p{currencySymbol})
/#
// Regex<(Substring, date: Substring, middle: Substring, currency: Substring)>
func pickStrategy(_ currency: Substring) -> Date.ParseStrategy {
switch currency {
case "$": return .date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt)
case "£": return .date(.numeric, locale: Locale(identifier: "en_GB"), timeZone: .gmt)
default: fatalError("We found another one!")
}
} Find and replace
let regex = #/
(?<date> \d{2} / \d{2} / \d{4})
(?<middle> \P{currencySymbol}+)
(?<currency> \p{currencySymbol})
/#
// Regex<(Substring, date: Substring, middle: Substring, currency: Substring)>
func pickStrategy(_ currency: Substring) -> Date.ParseStrategy { … }
ledger.replace(regex) { match -> String in
let date = try! Date(String(match.date), strategy: pickStrategy(match.currency))
// ISO 8601, it's the only way to be sure
let newDate = date.formatted(.iso8601.year().month().day())
return newDate + match.middle + match.currency
} A zombie love story
let aZombieLoveStory = "🧟♀️💖🧠"
// Characters: 🧟♀️, 💖, 🧠 A zombie love story in unicode scalars
aZombieLoveStory.unicodeScalars
// Unicode scalar values: U+1F9DF, U+200D, U+2640, U+FE0F, U+1F496, U+1F9E0 A zombie love story in UTF8
aZombieLoveStory.utf8
// UTF-8 code units: F0 9F A7 9F E2 80 8D E2 99 80 EF B8 8F F0 9F 92 96 F0 9F A7 A0 Unicode canonical equivalence
"café".elementsEqual("cafe\u{301}")
// true String's views are compared at binary level
"café".elementsEqual("cafe\u{301}")
// true
"café".unicodeScalars.elementsEqual("cafe\u{301}".unicodeScalars)
// false
"café".utf8.elementsEqual("cafe\u{301}".utf8)
// false Unicode processing
switch ("🧟♀️💖🧠", "The Brain Cafe\u{301}") {
case (/.\N{SPARKLING HEART}./, /.*café/.ignoresCase()):
print("Oh no! 🧟♀️💖🧠, but 🧠💖☕️!")
default:
print("No conflicts found")
} Complex scalar processing
let input = "Oh no! 🧟♀️💖🧠, but 🧠💖☕️!"
input.firstMatch(of: /.\N{SPARKLING HEART}./)
// 🧟♀️💖🧠
input.firstMatch(of: /.\N{SPARKLING HEART}./.matchingSemantics(.unicodeScalar))
// ️💖🧠 Live transaction matcher
let timestamp = Regex { ... } // proprietary
let details = try Regex(inputString)
let amountMatcher = /[\d.]+/
// CREDIT <proprietary> <redacted> 200.23 A1B34EFF ...
let fieldSeparator = /\s{2,}|\t/
let transactionMatcher = Regex {
Capture { /CREDIT|DEBIT/ }
fieldSeparator
Capture { timestamp }
fieldSeparator
Capture { details }
fieldSeparator
// ...
} Replace field separator
let field = OneOrMore {
NegativeLookahead { fieldSeparator }
CharacterClass.any
} Use TryCapture
// CREDIT <proprietary> <redacted> 200.23 A1B34EFF ...
let fieldSeparator = /\s{2,}|\t/
let field = OneOrMore {
NegativeLookahead { fieldSeparator }
CharacterClass.any
}
let transactionMatcher = Regex {
Capture { /CREDIT|DEBIT/ }
fieldSeparator
TryCapture(field) { timestamp ~= $0 ? $0 : nil }
fieldSeparator
TryCapture(field) { details ~= $0 ? $0 : nil }
fieldSeparator
// ...
} Fixing the scaling issues
// CREDIT <proprietary> <redacted> 200.23 A1B34EFF ...
let fieldSeparator = Local { /\s{2,}|\t/ }
let field = OneOrMore {
NegativeLookahead { fieldSeparator }
CharacterClass.any
}
let transactionMatcher = Regex {
Capture { /CREDIT|DEBIT/ }
fieldSeparator
TryCapture(field) { timestamp ~= $0 ? $0 : nil }
fieldSeparator
TryCapture(field) { details ~= $0 ? $0 : nil }
fieldSeparator
// ...
} Related sessions
-
38 min -
22 min -
21 min -
3 min