Dunfey · Hotel WWDC as data, est. 1983
Front desk everything
Years
Topics

2022 EssentialsSwift

WWDC22 · 23 min · Essentials / Swift

Meet Swift Regex

Learn how you can process strings more effectively when you take advantage of Swift Regex. Come for concise literals but stay for Regex builders — a new, declarative approach to string processing. We’ll also explore the Unicode models in String and share how Swift Regex can make Unicode-correct processing easy.

Watch at developer.apple.com ↗

Transcript all transcripts

Code shown on screen · 24 snippets

Processing collections swift · at 1:35 ↗
let transaction = "DEBIT     03/05/2022    Doug's Dugout Dogs         $33.27"

let fragments = transaction.split(whereSeparator: \.isWhitespace)
// ["DEBIT", "03/05/2022", "Doug\'s", "Dugout", "Dogs", "$33.27"]
Low-level index manipulation swift · at 1:49 ↗
var slice = transaction[...]

// Extract a field, advancing `slice` to the start of the next field
func extractField() -> Substring {
  let endIdx = {
    var start = slice.startIndex
    while true {
      // Position of next whitespace (including tabs)
      guard let spaceIdx = slice[start...].firstIndex(where: \.isWhitespace) else {
        return slice.endIndex
      }

      // Tab suffices
      if slice[spaceIdx] == "\t" {
        return spaceIdx
      }

      // Otherwise check for a second whitespace character
      let afterSpaceIdx = slice.index(after: spaceIdx)
      if afterSpaceIdx == slice.endIndex || slice[afterSpaceIdx].isWhitespace {
        return spaceIdx
      }

      // Skip over the single space and try again
      start = afterSpaceIdx
    }
  }()
  defer { slice = slice[endIdx...].drop(while: \.isWhitespace) }
  return slice[..<endIdx]
}

let kind = extractField()
let date = try Date(String(extractField()), strategy:  Date.FormatStyle(date: .numeric))
let account = extractField()
let amount = try Decimal(String(extractField()), format: .currency(code: "USD"))
Regex literals swift · at 2:47 ↗
// Regex literals
let digits = /\d+/
// digits: Regex<Substring>
Regex created at run-time swift · at 3:20 ↗
// Run-time construction
let runtimeString = #"\d+"#
let digits = try Regex(runtimeString)
// digits: Regex<AnyRegexOutput>
Regex builder swift · at 3:44 ↗
// Regex builders
let digits = OneOrMore(.digit)
// digits: Regex<Substring>
Split approach with a regex literal swift · at 3:56 ↗
let transaction = "DEBIT     03/05/2022    Doug's Dugout Dogs         $33.27"

let fragments = transaction.split(separator: /\s{2,}|\t/)
// ["DEBIT", "03/05/2022", "Doug's Dugout Dogs", "$33.27"]
Normalize field separators swift · at 4:36 ↗
let transaction = "DEBIT     03/05/2022    Doug's Dugout Dogs         $33.27"

let normalized = transaction.replacing(/\s{2,}|\t/, with: "\t")
// DEBIT»03/05/2022»Doug's Dugout Dogs»$33.27
Create a Regex builder swift · at 6:55 ↗
// CREDIT    03/02/2022    Payroll from employer         $200.23
// CREDIT    03/03/2022    Suspect A                     $2,000,000.00
// DEBIT     03/03/2022    Ted's Pet Rock Sanctuary      $2,000,000.00
// DEBIT     03/05/2022    Doug's Dugout Dogs            $33.27

import RegexBuilder
let fieldSeparator = /\s{2,}|\t/
let transactionMatcher = Regex {
  /CREDIT|DEBIT/
  fieldSeparator
  One(.date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt))
  fieldSeparator
  OneOrMore {
    NegativeLookahead { fieldSeparator }
    CharacterClass.any
  }
  fieldSeparator
  One(.localizedCurrency(code: "USD").locale(Locale(identifier: "en_US")))
}
Use Captures to extract portions of input swift · at 9:04 ↗
let fieldSeparator = /\s{2,}|\t/
let transactionMatcher = Regex {
  Capture { /CREDIT|DEBIT/ }
  fieldSeparator

  Capture { One(.date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt)) }
  fieldSeparator

  Capture {
    OneOrMore {
      NegativeLookahead { fieldSeparator }
      CharacterClass.any
    }
  }
  fieldSeparator
  Capture { One(.localizedCurrency(code: "USD").locale(Locale(identifier: "en_US"))) }
}
// transactionMatcher: Regex<(Substring, Substring, Date, Substring, Decimal)>
Plot twist! swift · at 10:31 ↗
private let ledger = """
KIND      DATE          INSTITUTION                AMOUNT
----------------------------------------------------------------
CREDIT    03/01/2022    Payroll from employer      $200.23
CREDIT    03/03/2022    Suspect A                  $2,000,000.00
DEBIT     03/03/2022    Ted's Pet Rock Sanctuary   $2,000,000.00
DEBIT     03/05/2022    Doug's Dugout Dogs         $33.27
DEBIT     06/03/2022    Oxford Comma Supply Ltd.   £57.33
"""
// 😱
Use named captures swift · at 10:53 ↗
let regex = #/
  (?<date>     \d{2} / \d{2} / \d{4})
  (?<middle>   \P{currencySymbol}+)
  (?<currency> \p{currencySymbol})
/#
// Regex<(Substring, date: Substring, middle: Substring, currency: Substring)>
Use Foundation's date parser swift · at 11:33 ↗
let regex = #/
  (?<date>     \d{2} / \d{2} / \d{4})
  (?<middle>   \P{currencySymbol}+)
  (?<currency> \p{currencySymbol})
/#
// Regex<(Substring, date: Substring, middle: Substring, currency: Substring)>

func pickStrategy(_ currency: Substring) -> Date.ParseStrategy {
  switch currency {
  case "$": return .date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt)
  case "£": return .date(.numeric, locale: Locale(identifier: "en_GB"), timeZone: .gmt)
  default: fatalError("We found another one!")
  }
}
Find and replace swift · at 11:48 ↗
let regex = #/
  (?<date>     \d{2} / \d{2} / \d{4})
  (?<middle>   \P{currencySymbol}+)
  (?<currency> \p{currencySymbol})
/#
// Regex<(Substring, date: Substring, middle: Substring, currency: Substring)>

func pickStrategy(_ currency: Substring) -> Date.ParseStrategy {  }

ledger.replace(regex) { match -> String in
  let date = try! Date(String(match.date), strategy: pickStrategy(match.currency))

  // ISO 8601, it's the only way to be sure
  let newDate = date.formatted(.iso8601.year().month().day())

  return newDate + match.middle + match.currency
}
A zombie love story swift · at 12:45 ↗
let aZombieLoveStory = "🧟‍♀️💖🧠"
// Characters: 🧟‍♀️, 💖, 🧠
A zombie love story in unicode scalars swift · at 13:01 ↗
aZombieLoveStory.unicodeScalars
// Unicode scalar values: U+1F9DF, U+200D, U+2640, U+FE0F, U+1F496, U+1F9E0
A zombie love story in UTF8 swift · at 13:44 ↗
aZombieLoveStory.utf8
// UTF-8 code units: F0 9F A7 9F E2 80 8D E2 99 80 EF B8 8F F0 9F 92 96 F0 9F A7 A0
Unicode canonical equivalence swift · at 14:12 ↗
"café".elementsEqual("cafe\u{301}")
// true
String's views are compared at binary level swift · at 14:49 ↗
"café".elementsEqual("cafe\u{301}")
// true

"café".unicodeScalars.elementsEqual("cafe\u{301}".unicodeScalars)
// false

"café".utf8.elementsEqual("cafe\u{301}".utf8)
// false
Unicode processing swift · at 15:14 ↗
switch ("🧟‍♀️💖🧠", "The Brain Cafe\u{301}") {
case (/.\N{SPARKLING HEART}./, /.*café/.ignoresCase()):
  print("Oh no! 🧟‍♀️💖🧠, but 🧠💖☕️!")
default:
  print("No conflicts found")
}
Complex scalar processing swift · at 15:54 ↗
let input = "Oh no! 🧟‍♀️💖🧠, but 🧠💖☕️!"

input.firstMatch(of: /.\N{SPARKLING HEART}./)
// 🧟‍♀️💖🧠

input.firstMatch(of: /.\N{SPARKLING HEART}./.matchingSemantics(.unicodeScalar))
// ️💖🧠
Live transaction matcher swift · at 17:56 ↗
let timestamp = Regex { ... } // proprietary
let details = try Regex(inputString)
let amountMatcher = /[\d.]+/

// CREDIT    <proprietary>      <redacted>        200.23        A1B34EFF     ...
let fieldSeparator = /\s{2,}|\t/
let transactionMatcher = Regex {
  Capture { /CREDIT|DEBIT/ }
  fieldSeparator

  Capture { timestamp }
  fieldSeparator

  Capture { details }
  fieldSeparator

  // ...
}
Replace field separator swift · at 18:26 ↗
let field = OneOrMore {
  NegativeLookahead { fieldSeparator }
  CharacterClass.any
}
Use TryCapture swift · at 18:55 ↗
// CREDIT    <proprietary>      <redacted>        200.23        A1B34EFF     ...
let fieldSeparator = /\s{2,}|\t/
let field = OneOrMore {
  NegativeLookahead { fieldSeparator }
  CharacterClass.any
}
let transactionMatcher = Regex {
  Capture { /CREDIT|DEBIT/ }
  fieldSeparator

  TryCapture(field) { timestamp ~= $0 ? $0 : nil }
  fieldSeparator

  TryCapture(field) { details ~= $0 ? $0 : nil }
  fieldSeparator

  // ...
}
Fixing the scaling issues swift · at 21:45 ↗
// CREDIT    <proprietary>      <redacted>        200.23        A1B34EFF     ...
let fieldSeparator = Local { /\s{2,}|\t/ } 
let field = OneOrMore {
  NegativeLookahead { fieldSeparator }
  CharacterClass.any
}
let transactionMatcher = Regex {
  Capture { /CREDIT|DEBIT/ }
  fieldSeparator

  TryCapture(field) { timestamp ~= $0 ? $0 : nil }
  fieldSeparator

  TryCapture(field) { details ~= $0 ? $0 : nil }
  fieldSeparator

  // ...
}