diff --git a/.gitignore b/.gitignore
index a7e7e4d09..ff85b9fa3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
 .DS_Store
 
+# The current toolchain is dumping files in the package root, rude
+*.emit-module.*
+
 # Xcode
 #
 # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
diff --git a/Documentation/Evolution/ProposalOverview.md b/Documentation/Evolution/ProposalOverview.md
index 7656526a6..5f526f963 100644
--- a/Documentation/Evolution/ProposalOverview.md
+++ b/Documentation/Evolution/ProposalOverview.md
@@ -3,6 +3,7 @@
 
 ## Regex Type and Overview
 
+- [Second review](https://forums.swift.org/t/se-0350-second-review-regex-type-and-overview/56886)
 - [Proposal](https://github.com/apple/swift-evolution/blob/main/proposals/0350-regex-type-overview.md), [Thread](https://forums.swift.org/t/se-0350-regex-type-and-overview/56530)
 - [Pitch thread](https://forums.swift.org/t/pitch-regex-type-and-overview/56029)
 
diff --git a/Package.swift b/Package.swift
index f8162e762..f9eb95e8e 100644
--- a/Package.swift
+++ b/Package.swift
@@ -10,6 +10,13 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
     #"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999"#,
 ])
 
+let stdlibSettings: [PackageDescription.SwiftSetting] = [
+    .unsafeFlags(["-enable-library-evolution"]),
+    .unsafeFlags(["-Xfrontend", "-disable-implicit-concurrency-module-import"]),
+    .unsafeFlags(["-Xfrontend", "-disable-implicit-string-processing-module-import"]),
+    availabilityDefinition
+]
+
 let package = Package(
     name: "swift-experimental-string-processing",
     products: [
@@ -36,10 +43,7 @@ let package = Package(
         .target(
             name: "_RegexParser",
             dependencies: [],
-            swiftSettings: [
-                .unsafeFlags(["-enable-library-evolution"]),
-                availabilityDefinition
-            ]),
+            swiftSettings: stdlibSettings),
         .testTarget(
             name: "MatchingEngineTests",
             dependencies: [
@@ -51,29 +55,21 @@ let package = Package(
         .target(
             name: "_StringProcessing",
             dependencies: ["_RegexParser", "_CUnicode"],
-            swiftSettings: [
-                .unsafeFlags(["-enable-library-evolution"]),
-                availabilityDefinition
-            ]),
+            swiftSettings: stdlibSettings),
         .target(
             name: "RegexBuilder",
             dependencies: ["_StringProcessing", "_RegexParser"],
-            swiftSettings: [
-                .unsafeFlags(["-enable-library-evolution"]),
-                .unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"]),
-                availabilityDefinition
-            ]),
+            swiftSettings: stdlibSettings),
         .testTarget(
             name: "RegexTests",
             dependencies: ["_StringProcessing"],
             swiftSettings: [
-                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
+                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
             ]),
         .testTarget(
             name: "RegexBuilderTests",
             dependencies: ["_StringProcessing", "RegexBuilder"],
             swiftSettings: [
-                .unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"]),
                 .unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
             ]),
         .testTarget(
@@ -102,7 +98,6 @@ let package = Package(
             name: "Exercises",
             dependencies: ["_RegexParser", "_StringProcessing", "RegexBuilder"],
             swiftSettings: [
-                .unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"]),
                 .unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
             ]),
         .testTarget(
diff --git a/README.md b/README.md
index 42586ad2b..67c708a75 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ See [Declarative String Processing Overview][decl-string]
 
 ## Requirements
 
-- [Swift Trunk Development Snapshot](https://www.swift.org/download/#snapshots) DEVELOPMENT-SNAPSHOT-2022-03-09 or later.
+- [Swift Trunk Development Snapshot](https://www.swift.org/download/#snapshots) DEVELOPMENT-SNAPSHOT-2022-04-20 or later.
 
 ## Trying it out
 
diff --git a/Sources/PatternConverter/PatternConverter.swift b/Sources/PatternConverter/PatternConverter.swift
index a10698526..497d54506 100644
--- a/Sources/PatternConverter/PatternConverter.swift
+++ b/Sources/PatternConverter/PatternConverter.swift
@@ -50,7 +50,7 @@ struct PatternConverter: ParsableCommand {
     print("Converting '\(delim)\(regex)\(delim)'")
 
     let ast = try _RegexParser.parse(
-      regex,
+      regex, .semantic,
       experimentalSyntax ? .experimental : .traditional)
 
     // Show rendered source ranges
diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift
index e8cd4ac54..ae66310af 100644
--- a/Sources/RegexBuilder/Anchor.swift
+++ b/Sources/RegexBuilder/Anchor.swift
@@ -12,6 +12,12 @@
 @_implementationOnly import _RegexParser
 @_spi(RegexBuilder) import _StringProcessing
 
+/// A regex component that matches a specific condition at a particular position
+/// in an input string.
+///
+/// You can use anchors to guarantee that a match only occurs at certain points
+/// in an input string, such as at the beginning of the string or at the end of
+/// a line.
 @available(SwiftStdlib 5.7, *)
 public struct Anchor {
   internal enum Kind {
@@ -53,14 +59,24 @@ extension Anchor: RegexComponent {
 
 @available(SwiftStdlib 5.7, *)
 extension Anchor {
+  /// An anchor that matches at the start of the input string.
+  ///
+  /// This anchor is equivalent to `\A` in regex syntax.
   public static var startOfSubject: Anchor {
     Anchor(kind: .startOfSubject)
   }
-
+  
+  /// An anchor that matches at the end of the input string or at the end of
+  /// the line immediately before the the end of the string.
+  ///
+  /// This anchor is equivalent to `\Z` in regex syntax.
   public static var endOfSubjectBeforeNewline: Anchor {
     Anchor(kind: .endOfSubjectBeforeNewline)
   }
-
+  
+  /// An anchor that matches at the end of the input string.
+  ///
+  /// This anchor is equivalent to `\z` in regex syntax.
   public static var endOfSubject: Anchor {
     Anchor(kind: .endOfSubject)
   }
@@ -70,26 +86,53 @@ extension Anchor {
 //    Anchor(kind: resetStartOfMatch)
 //  }
 
+  /// An anchor that matches at the first position of a match in the input
+  /// string.
   public static var firstMatchingPositionInSubject: Anchor {
     Anchor(kind: .firstMatchingPositionInSubject)
   }
 
+  /// An anchor that matches at a grapheme cluster boundary.
+  ///
+  /// This anchor is equivalent to `\y` in regex syntax.
   public static var textSegmentBoundary: Anchor {
     Anchor(kind: .textSegmentBoundary)
   }
   
+  /// An anchor that matches at the start of a line, including the start of
+  /// the input string.
+  ///
+  /// This anchor is equivalent to `^` in regex syntax when the `m` option
+  /// has been enabled or `anchorsMatchLineEndings(true)` has been called.
   public static var startOfLine: Anchor {
     Anchor(kind: .startOfLine)
   }
 
+  /// An anchor that matches at the end of a line, including at the end of
+  /// the input string.
+  ///
+  /// This anchor is equivalent to `$` in regex syntax when the `m` option
+  /// has been enabled or `anchorsMatchLineEndings(true)` has been called.
   public static var endOfLine: Anchor {
     Anchor(kind: .endOfLine)
   }
 
+  /// An anchor that matches at a word boundary.
+  ///
+  /// Word boundaries are identified using the Unicode default word boundary
+  /// algorithm by default. To specify a different word boundary algorithm,
+  /// see the `RegexComponent.wordBoundaryKind(_:)` method.
+  ///
+  /// This anchor is equivalent to `\b` in regex syntax.
   public static var wordBoundary: Anchor {
     Anchor(kind: .wordBoundary)
   }
   
+  /// The inverse of this anchor, which matches at every position that this
+  /// anchor does not.
+  ///
+  /// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted
+  /// version corresponds to `\B` and `\Y`, respectively.
   public var inverted: Anchor {
     var result = self
     result.isInverted.toggle()
@@ -97,6 +140,13 @@ extension Anchor {
   }
 }
 
+/// A regex component that allows a match to continue only if its contents
+/// match at the given location.
+///
+/// A lookahead is a zero-length assertion that its included regex matches at
+/// a particular position. Lookaheads do not advance the overall matching
+/// position in the input string — once a lookahead succeeds, matching continues
+/// in the regex from the same position.
 @available(SwiftStdlib 5.7, *)
 public struct Lookahead<Output>: _BuiltinRegexComponent {
   public var regex: Regex<Output>
@@ -105,19 +155,48 @@ public struct Lookahead<Output>: _BuiltinRegexComponent {
     self.regex = regex
   }
 
+  /// Creates a lookahead from the given regex component.
   public init<R: RegexComponent>(
-    _ component: R,
-    negative: Bool = false
+    _ component: R
   ) where R.RegexOutput == Output {
-    self.init(node: .nonCapturingGroup(
-      negative ? .negativeLookahead : .lookahead, component.regex.root))
+    self.init(node: .nonCapturingGroup(.lookahead, component.regex.root))
   }
+  
+  /// Creates a lookahead from the regex generated by the given builder closure.
+  public init<R: RegexComponent>(
+    @RegexComponentBuilder _ component: () -> R
+  ) where R.RegexOutput == Output {
+    self.init(node: .nonCapturingGroup(.lookahead, component().regex.root))
+  }
+}
 
+/// A regex component that allows a match to continue only if its contents
+/// do not match at the given location.
+///
+/// A negative lookahead is a zero-length assertion that its included regex
+/// does not match at a particular position. Lookaheads do not advance the
+/// overall matching position in the input string — once a lookahead succeeds,
+/// matching continues in the regex from the same position.
+@available(SwiftStdlib 5.7, *)
+public struct NegativeLookahead<Output>: _BuiltinRegexComponent {
+  public var regex: Regex<Output>
+  
+  init(_ regex: Regex<Output>) {
+    self.regex = regex
+  }
+  
+  /// Creates a negative lookahead from the given regex component.
+  public init<R: RegexComponent>(
+    _ component: R
+  ) where R.RegexOutput == Output {
+    self.init(node: .nonCapturingGroup(.negativeLookahead, component.regex.root))
+  }
+  
+  /// Creates a negative lookahead from the regex generated by the given builder
+  /// closure.
   public init<R: RegexComponent>(
-    negative: Bool = false,
     @RegexComponentBuilder _ component: () -> R
   ) where R.RegexOutput == Output {
-    self.init(node: .nonCapturingGroup(
-      negative ? .negativeLookahead : .lookahead, component().regex.root))
+    self.init(node: .nonCapturingGroup(.negativeLookahead, component().regex.root))
   }
 }
diff --git a/Sources/_RegexParser/Regex/AST/AST.swift b/Sources/_RegexParser/Regex/AST/AST.swift
index a7dcd2015..be1548b72 100644
--- a/Sources/_RegexParser/Regex/AST/AST.swift
+++ b/Sources/_RegexParser/Regex/AST/AST.swift
@@ -125,7 +125,9 @@ extension AST.Node {
     switch self {
     case .atom(let a):
       return a.isQuantifiable
-    case .group, .conditional, .customCharacterClass, .absentFunction:
+    case .group(let g):
+      return g.isQuantifiable
+    case .conditional, .customCharacterClass, .absentFunction:
       return true
     case .alternation, .concatenation, .quantification, .quote, .trivia,
         .empty:
diff --git a/Sources/_RegexParser/Regex/AST/Atom.swift b/Sources/_RegexParser/Regex/AST/Atom.swift
index e17ce68bb..19e2fb498 100644
--- a/Sources/_RegexParser/Regex/AST/Atom.swift
+++ b/Sources/_RegexParser/Regex/AST/Atom.swift
@@ -29,7 +29,13 @@ extension AST {
       /// A Unicode scalar value written as a literal
       ///
       /// \u{...}, \0dd, \x{...}, ...
-      case scalar(Unicode.Scalar)
+      case scalar(Scalar)
+
+      /// A whitespace-separated sequence of Unicode scalar values which are
+      /// implicitly splatted out.
+      ///
+      /// `\u{A B C}` -> `\u{A}\u{B}\u{C}`
+      case scalarSequence(ScalarSequence)
 
       /// A Unicode property, category, or script, including those written using
       /// POSIX syntax.
@@ -84,6 +90,7 @@ extension AST.Atom {
     switch kind {
     case .char(let v):                  return v
     case .scalar(let v):                return v
+    case .scalarSequence(let v):        return v
     case .property(let v):              return v
     case .escaped(let v):               return v
     case .keyboardControl(let v):       return v
@@ -106,6 +113,30 @@ extension AST.Atom {
   }
 }
 
+extension AST.Atom {
+  public struct Scalar: Hashable {
+    public var value: UnicodeScalar
+    public var location: SourceLocation
+
+    public init(_ value: UnicodeScalar, _ location: SourceLocation) {
+      self.value = value
+      self.location = location
+    }
+  }
+
+  public struct ScalarSequence: Hashable {
+    public var scalars: [Scalar]
+    public var trivia: [AST.Trivia]
+
+    public init(_ scalars: [Scalar], trivia: [AST.Trivia]) {
+      precondition(scalars.count > 1, "Expected multiple scalars")
+      self.scalars = scalars
+      self.trivia = trivia
+    }
+    public var scalarValues: [Unicode.Scalar] { scalars.map(\.value) }
+  }
+}
+
 extension AST.Atom {
 
   // TODO: We might scrap this and break out a few categories so
@@ -396,6 +427,9 @@ extension AST.Atom.CharacterProperty {
     case script(Unicode.Script)
     case scriptExtension(Unicode.Script)
 
+    /// Character name in the form `\p{name=...}`
+    case named(String)
+    
     case posix(Unicode.POSIXProperty)
 
     /// Some special properties implemented by PCRE and Oniguruma.
@@ -665,6 +699,23 @@ extension AST.Atom.EscapedBuiltin {
       return nil
     }
   }
+
+  public var isQuantifiable: Bool {
+    switch self {
+    case .alarm, .escape, .formfeed, .newline, .carriageReturn, .tab,
+        .singleDataUnit, .decimalDigit, .notDecimalDigit, .horizontalWhitespace,
+        .notHorizontalWhitespace, .notNewline, .newlineSequence, .whitespace,
+        .notWhitespace, .verticalTab, .notVerticalTab, .wordCharacter,
+        .notWordCharacter, .backspace, .graphemeCluster, .trueAnychar:
+      return true
+
+    case .wordBoundary, .notWordBoundary, .startOfSubject,
+        .endOfSubjectBeforeNewline, .endOfSubject,
+        .firstMatchingPositionInSubject, .resetStartOfMatch, .textSegment,
+        .notTextSegment:
+      return false
+    }
+  }
 }
 
 extension AST.Atom {
@@ -677,7 +728,7 @@ extension AST.Atom {
     case .char(let c):
       return c
     case .scalar(let s):
-      return Character(s)
+      return Character(s.value)
 
     case .escaped(let c):
       return c.scalarValue.map(Character.init)
@@ -693,8 +744,9 @@ extension AST.Atom {
       // the AST? Or defer for the matching engine?
       return nil
 
-    case .property, .any, .startOfLine, .endOfLine, .backreference, .subpattern,
-        .callout, .backtrackingDirective, .changeMatchingOptions:
+    case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
+        .backreference, .subpattern, .callout, .backtrackingDirective,
+        .changeMatchingOptions:
       return nil
     }
   }
@@ -716,13 +768,21 @@ extension AST.Atom {
   /// A string literal representation of the atom, if possible.
   ///
   /// Individual characters are returned as-is, and Unicode scalars are
-  /// presented using "\u{nnnn}" syntax.
+  /// presented using "\u{nn nn ...}" syntax.
   public var literalStringValue: String? {
+    func scalarLiteral(_ u: [UnicodeScalar]) -> String {
+      let digits = u.map { String($0.value, radix: 16, uppercase: true) }
+        .joined(separator: " ")
+      return "\\u{\(digits)}"
+    }
     switch kind {
     case .char(let c):
       return String(c)
     case .scalar(let s):
-      return "\\u{\(String(s.value, radix: 16, uppercase: true))}"
+      return scalarLiteral([s.value])
+
+    case .scalarSequence(let s):
+      return scalarLiteral(s.scalarValues)
 
     case .keyboardControl(let x):
       return "\\C-\(x)"
@@ -746,6 +806,10 @@ extension AST.Atom {
     case .changeMatchingOptions:
       return false
     // TODO: Are callouts quantifiable?
+    case .escaped(let esc):
+      return esc.isQuantifiable
+    case .startOfLine, .endOfLine:
+      return false
     default:
       return true
     }
diff --git a/Sources/_RegexParser/Regex/AST/Group.swift b/Sources/_RegexParser/Regex/AST/Group.swift
index 8ecaadeda..6fd46abe7 100644
--- a/Sources/_RegexParser/Regex/AST/Group.swift
+++ b/Sources/_RegexParser/Regex/AST/Group.swift
@@ -136,3 +136,18 @@ extension AST.Group {
     }
   }
 }
+
+extension AST.Group {
+  var isQuantifiable: Bool {
+    switch kind.value {
+    case .capture, .namedCapture, .balancedCapture, .nonCapture,
+        .nonCaptureReset, .atomicNonCapturing, .scriptRun, .atomicScriptRun,
+        .changeMatchingOptions:
+      return true
+      
+    case .lookahead, .negativeLookahead, .nonAtomicLookahead,
+        .lookbehind, .negativeLookbehind, .nonAtomicLookbehind:
+      return false
+    }
+  }
+}
diff --git a/Sources/_RegexParser/Regex/AST/MatchingOptions.swift b/Sources/_RegexParser/Regex/AST/MatchingOptions.swift
index e779c39fb..d3dbc1666 100644
--- a/Sources/_RegexParser/Regex/AST/MatchingOptions.swift
+++ b/Sources/_RegexParser/Regex/AST/MatchingOptions.swift
@@ -17,7 +17,7 @@ extension AST {
       case caseInsensitive          // i
       case allowDuplicateGroupNames // J
       case multiline                // m
-      case noAutoCapture            // n
+      case namedCapturesOnly        // n
       case singleLine               // s
       case reluctantByDefault       // U
       case extended                 // x
diff --git a/Sources/_RegexParser/Regex/Parse/CaptureList.swift b/Sources/_RegexParser/Regex/Parse/CaptureList.swift
index d112b2010..0287e7337 100644
--- a/Sources/_RegexParser/Regex/Parse/CaptureList.swift
+++ b/Sources/_RegexParser/Regex/Parse/CaptureList.swift
@@ -26,15 +26,18 @@ extension CaptureList {
     public var name: String?
     public var type: Any.Type?
     public var optionalDepth: Int
+    public var location: SourceLocation
 
     public init(
       name: String? = nil,
       type: Any.Type? = nil,
-      optionalDepth: Int
+      optionalDepth: Int,
+      _ location: SourceLocation
     ) {
       self.name = name
       self.type = type
       self.optionalDepth = optionalDepth
+      self.location = location
     }
   }
 }
@@ -61,13 +64,14 @@ extension AST.Node {
     case let .group(g):
       switch g.kind.value {
       case .capture:
-        list.append(.init(optionalDepth: nesting))
+        list.append(.init(optionalDepth: nesting, g.location))
 
       case .namedCapture(let name):
-        list.append(.init(name: name.value, optionalDepth: nesting))
+        list.append(.init(name: name.value, optionalDepth: nesting, g.location))
 
       case .balancedCapture(let b):
-        list.append(.init(name: b.name?.value, optionalDepth: nesting))
+        list.append(.init(name: b.name?.value, optionalDepth: nesting,
+                          g.location))
 
       default: break
       }
@@ -124,7 +128,8 @@ extension CaptureList.Capture: Equatable {
   public static func == (lhs: Self, rhs: Self) -> Bool {
     lhs.name == rhs.name &&
     lhs.optionalDepth == rhs.optionalDepth &&
-    lhs.type == rhs.type
+    lhs.type == rhs.type &&
+    lhs.location == rhs.location
   }
 }
 extension CaptureList: Equatable {}
diff --git a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
index 911312121..c0ece78ff 100644
--- a/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
+++ b/Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
@@ -18,7 +18,7 @@ extension Source {
     // This follows the rules provided by UAX44-LM3, including trying to drop an
     // "is" prefix, which isn't required by UTS#18 RL1.2, but is nice for
     // consistency with other engines and the Unicode.Scalar.Properties names.
-    let str = str.filter { !$0.isWhitespace && $0 != "_" && $0 != "-" }
+    let str = str.filter { !$0.isPatternWhitespace && $0 != "_" && $0 != "-" }
                  .lowercased()
     if let m = match(str) {
       return m
@@ -32,8 +32,8 @@ extension Source {
   static private func classifyGeneralCategory(
     _ str: String
   ) -> Unicode.ExtendedGeneralCategory? {
-    // This uses the aliases defined in
-    // https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
+    // This uses the aliases defined in https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt.
+    // Additionally, uses the `L& = Lc` alias defined by PCRE.
     withNormalizedForms(str) { str in
       switch str {
       case "c", "other":                   return .other
@@ -43,7 +43,7 @@ extension Source {
       case "co", "privateuse":             return .privateUse
       case "cs", "surrogate":              return .surrogate
       case "l", "letter":                  return .letter
-      case "lc", "casedletter":            return .casedLetter
+      case "lc", "l&", "casedletter":      return .casedLetter
       case "ll", "lowercaseletter":        return .lowercaseLetter
       case "lm", "modifierletter":         return .modifierLetter
       case "lo", "otherletter":            return .otherLetter
@@ -428,6 +428,8 @@ extension Source {
         if let cat = classifyGeneralCategory(value) {
           return .generalCategory(cat)
         }
+      case "name", "na":
+        return .named(value)
       default:
         break
       }
diff --git a/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift b/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift
index 0856361d8..4ae518dcd 100644
--- a/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift
+++ b/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift
@@ -96,7 +96,7 @@ public func swiftCompilerParseRegexLiteral(
   _ input: String, captureBufferOut: UnsafeMutableRawBufferPointer
 ) throws -> (regexToEmit: String, version: Int) {
   do {
-    let ast = try parseWithDelimiters(input)
+    let ast = try parseWithDelimiters(input, .semantic)
     // Serialize the capture structure for later type inference.
     assert(captureBufferOut.count >= input.utf8.count)
     ast.captureStructure.encode(to: captureBufferOut)
diff --git a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
index c3d74c30b..d87fba918 100644
--- a/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
+++ b/Sources/_RegexParser/Regex/Parse/Diagnostics.swift
@@ -15,6 +15,8 @@ enum ParseError: Error, Hashable {
   // TODO: I wonder if it makes sense to store the string.
   // This can make equality weird.
 
+  // MARK: Syntactic Errors
+
   case numberOverflow(String)
   case expectedNumDigits(String, Int)
   case expectedNumber(String, kind: RadixKind)
@@ -43,7 +45,6 @@ enum ParseError: Error, Hashable {
 
   case cannotReferToWholePattern
 
-  case notQuantifiable
   case quantifierRequiresOperand(String)
 
   case backtrackingDirectiveMustHaveName(String)
@@ -55,7 +56,6 @@ enum ParseError: Error, Hashable {
   case cannotRemoveMatchingOptionsAfterCaret
 
   case expectedCustomCharacterClassMembers
-  case invalidCharacterClassRangeOperand
 
   case emptyProperty
   case unknownProperty(key: String?, value: String)
@@ -73,6 +73,17 @@ enum ParseError: Error, Hashable {
   case cannotRemoveExtendedSyntaxInMultilineMode
 
   case expectedCalloutArgument
+
+  // MARK: Semantic Errors
+
+  case unsupported(String)
+  case deprecatedUnicode(String)
+  case invalidReference(Int)
+  case duplicateNamedCapture(String)
+  case invalidCharacterClassRangeOperand
+  case invalidQuantifierRange(Int, Int)
+  case invalidCharacterRange(from: Character, to: Character)
+  case notQuantifiable
 }
 
 extension IdentifierKind {
@@ -88,18 +99,23 @@ extension IdentifierKind {
 extension ParseError: CustomStringConvertible {
   var description: String {
     switch self {
+    // MARK: Syntactic Errors
     case let .numberOverflow(s):
       return "number overflow: \(s)"
     case let .expectedNumDigits(s, i):
       return "expected \(i) digits in '\(s)'"
     case let .expectedNumber(s, kind: kind):
-      let radix: String
-      if kind == .decimal {
-        radix = ""
-      } else {
-        radix = " of radix \(kind.radix)"
+      let number: String
+      switch kind {
+      case .octal:
+        number = "octal number"
+      case .decimal:
+        number = "number"
+      case .hex:
+        number = "hexadecimal number"
       }
-      return "expected a numbers in '\(s)'\(radix)"
+      let suffix = s.isEmpty ? "" : " in '\(s)'"
+      return "expected \(number)\(suffix)"
     case let .expected(s):
       return "expected '\(s)'"
     case .unexpectedEndOfInput:
@@ -114,8 +130,6 @@ extension ParseError: CustomStringConvertible {
       return "invalid escape sequence '\\\(c)'"
     case .cannotReferToWholePattern:
       return "cannot refer to whole pattern here"
-    case .notQuantifiable:
-      return "expression is not quantifiable"
     case .quantifierRequiresOperand(let q):
       return "quantifier '\(q)' must appear after expression"
     case .backtrackingDirectiveMustHaveName(let b):
@@ -167,6 +181,23 @@ extension ParseError: CustomStringConvertible {
       return "extended syntax may not be disabled in multi-line mode"
     case .expectedCalloutArgument:
       return "expected argument to callout"
+
+    // MARK: Semantic Errors
+
+    case let .unsupported(kind):
+      return "\(kind) is not currently supported"
+    case let .deprecatedUnicode(kind):
+      return "\(kind) is a deprecated Unicode property, and is not supported"
+    case let .invalidReference(i):
+      return "no capture numbered \(i)"
+    case let .duplicateNamedCapture(str):
+      return "group named '\(str)' already exists"
+    case let .invalidQuantifierRange(lhs, rhs):
+      return "range lower bound '\(lhs)' must be less than or equal to upper bound '\(rhs)'"
+    case let .invalidCharacterRange(from: lhs, to: rhs):
+      return "character '\(lhs)' must compare less than or equal to '\(rhs)'"
+    case .notQuantifiable:
+      return "expression is not quantifiable"
     }
   }
 }
diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
index 9633b607e..e8783dc86 100644
--- a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
+++ b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
@@ -157,6 +157,19 @@ extension Source {
     return .init(start ..< currentPosition)
   }
 
+  /// Attempt to eat a given prefix that satisfies a given predicate, with the
+  /// source location recorded.
+  mutating func tryEatLocatedPrefix(
+    maxLength: Int? = nil,
+    _ f: (Char) -> Bool
+  ) -> Located<String>? {
+    let result = recordLoc { src in
+      src.tryEatPrefix(maxLength: maxLength, f)
+    }
+    guard let result = result else { return nil }
+    return result.map(\.string)
+  }
+
   /// Throws an expected ASCII character error if not matched
   mutating func expectASCII() throws -> Located<Character> {
     try recordLoc { src in
@@ -217,13 +230,13 @@ extension Source {
   /// return the scalar value, or throw an error if the string is malformed or
   /// would overflow the scalar.
   private static func validateUnicodeScalar(
-    _ str: String, _ kind: RadixKind
-  ) throws -> Unicode.Scalar {
-    let num = try validateNumber(str, UInt32.self, kind)
+    _ str: Source.Located<String>, _ kind: RadixKind
+  ) throws -> AST.Atom.Scalar {
+    let num = try validateNumber(str.value, UInt32.self, kind)
     guard let scalar = Unicode.Scalar(num) else {
       throw ParseError.misc("Invalid scalar value U+\(num.hexStr)")
     }
-    return scalar
+    return .init(scalar, str.location)
   }
 
   /// Try to eat a number of a particular type and radix off the front.
@@ -266,20 +279,65 @@ extension Source {
   /// Eat a scalar value from hexadecimal notation off the front
   private mutating func expectUnicodeScalar(
     numDigits: Int
-  ) throws -> Located<Unicode.Scalar> {
-    try recordLoc { src in
+  ) throws -> AST.Atom.Scalar {
+    let str = try recordLoc { src -> String in
       let str = src.eat(upToCount: numDigits).string
       guard str.count == numDigits else {
         throw ParseError.expectedNumDigits(str, numDigits)
       }
-      return try Source.validateUnicodeScalar(str, .hex)
+      return str
     }
+    return try Source.validateUnicodeScalar(str, .hex)
+  }
+
+  /// Try to lex a seqence of hex digit unicode scalars.
+  ///
+  ///     UniScalarSequence   -> Whitespace? UniScalarSequencElt+
+  ///     UniScalarSequencElt -> HexDigit{1...} Whitespace?
+  ///
+  mutating func expectUnicodeScalarSequence(
+    eating ending: Character
+  ) throws -> AST.Atom.Kind {
+    try recordLoc { src in
+      var scalars = [AST.Atom.Scalar]()
+      var trivia = [AST.Trivia]()
+
+      // Eat up any leading whitespace.
+      if let t = src.lexWhitespace() { trivia.append(t) }
+
+      while true {
+        let str = src.lexUntil { src in
+          // Hit the ending, stop lexing.
+          if src.isEmpty || src.peek() == ending {
+            return true
+          }
+          // Eat up trailing whitespace, and stop lexing to record the scalar.
+          if let t = src.lexWhitespace() {
+            trivia.append(t)
+            return true
+          }
+          // Not the ending or trivia, must be a digit of the scalar.
+          return false
+        }
+        guard !str.value.isEmpty else { break }
+        scalars.append(try Source.validateUnicodeScalar(str, .hex))
+      }
+      guard !scalars.isEmpty else {
+        throw ParseError.expectedNumber("", kind: .hex)
+      }
+      try src.expect(ending)
+
+      if scalars.count == 1 {
+        return .scalar(scalars[0])
+      }
+      return .scalarSequence(.init(scalars, trivia: trivia))
+    }.value
   }
 
   /// Eat a scalar off the front, starting from after the
   /// backslash and base character (e.g. `\u` or `\x`).
   ///
-  ///     UniScalar -> 'u{' HexDigit{1...} '}'
+  ///     UniScalar -> 'u{' UniScalarSequence '}'
   ///                | 'u'  HexDigit{4}
   ///                | 'x{' HexDigit{1...} '}'
   ///                | 'x'  HexDigit{0...2}
@@ -289,49 +347,60 @@ extension Source {
   ///
   mutating func expectUnicodeScalar(
     escapedCharacter base: Character
-  ) throws -> Located<Unicode.Scalar> {
+  ) throws -> AST.Atom.Kind {
     try recordLoc { src in
+
+      func nullScalar() -> AST.Atom.Kind {
+        let pos = src.currentPosition
+        return .scalar(.init(UnicodeScalar(0), SourceLocation(pos ..< pos)))
+      }
+
       // TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
       switch base {
       // Hex numbers.
-      case "u" where src.tryEat("{"), "x" where src.tryEat("{"):
-        let str = try src.lexUntil(eating: "}").value
-        return try Source.validateUnicodeScalar(str, .hex)
+      case "u" where src.tryEat("{"):
+        return try src.expectUnicodeScalarSequence(eating: "}")
+
+      case "x" where src.tryEat("{"):
+        let str = try src.lexUntil(eating: "}")
+        return .scalar(try Source.validateUnicodeScalar(str, .hex))
 
       case "x":
         // \x expects *up to* 2 digits.
-        guard let digits = src.tryEatPrefix(maxLength: 2, \.isHexDigit) else {
+        guard let digits = src.tryEatLocatedPrefix(maxLength: 2, \.isHexDigit)
+        else {
           // In PCRE, \x without any valid hex digits is \u{0}.
           // TODO: This doesn't appear to be followed by ICU or Oniguruma, so
           // could be changed to throw an error if we had a parsing mode for
           // them.
-          return Unicode.Scalar(0)
+          return nullScalar()
         }
-        return try Source.validateUnicodeScalar(digits.string, .hex)
+        return .scalar(try Source.validateUnicodeScalar(digits, .hex))
 
       case "u":
-        return try src.expectUnicodeScalar(numDigits: 4).value
+        return .scalar(try src.expectUnicodeScalar(numDigits: 4))
       case "U":
-        return try src.expectUnicodeScalar(numDigits: 8).value
+        return .scalar(try src.expectUnicodeScalar(numDigits: 8))
 
       // Octal numbers.
       case "o" where src.tryEat("{"):
-        let str = try src.lexUntil(eating: "}").value
-        return try Source.validateUnicodeScalar(str, .octal)
+        let str = try src.lexUntil(eating: "}")
+        return .scalar(try Source.validateUnicodeScalar(str, .octal))
 
       case "0":
         // We can read *up to* 3 more octal digits.
         // FIXME: PCRE can only read up to 2 octal digits, if we get a strict
         // PCRE mode, we should limit it here.
-        guard let digits = src.tryEatPrefix(maxLength: 3, \.isOctalDigit) else {
-          return Unicode.Scalar(0)
+        guard let digits = src.tryEatLocatedPrefix(maxLength: 3, \.isOctalDigit)
+        else {
+          return nullScalar()
         }
-        return try Source.validateUnicodeScalar(digits.string, .octal)
+        return .scalar(try Source.validateUnicodeScalar(digits, .octal))
 
       default:
         fatalError("Unexpected scalar start")
       }
-    }
+    }.value
   }
 
   /// Try to consume a quantifier
@@ -434,13 +503,22 @@ extension Source {
   private mutating func lexUntil(
     _ predicate: (inout Source) throws -> Bool
   ) rethrows -> Located<String> {
+    // We track locations outside of recordLoc, as the predicate may advance the
+    // input when we hit the end, and we don't want that to affect the location
+    // of what was lexed in the `result`. We still want the recordLoc call to
+    // attach locations to any thrown errors though.
+    // TODO: We should find a better way of doing this, `lexUntil` seems full
+    // of footguns.
+    let start = currentPosition
+    var end = currentPosition
+    var result = ""
     try recordLoc { src in
-      var result = ""
       while try !predicate(&src) {
         result.append(src.eat())
+        end = src.currentPosition
       }
-      return result
     }
+    return .init(result, start ..< end)
   }
 
   private mutating func lexUntil(eating end: String) throws -> Located<String> {
@@ -576,6 +654,16 @@ extension Source {
     // inside a custom character class (and only treats whitespace as
     // non-semantic there for the extra-extended `(?xx)` mode). If we get a
     // strict-PCRE mode, we'll need to add a case for that.
+    return lexWhitespace()
+  }
+
+  /// Try to consume whitespace as trivia
+  ///
+  ///     Whitespace -> WhitespaceChar+
+  ///
+  /// Unlike `lexNonSemanticWhitespace`, this will always attempt to lex
+  /// whitespace.
+  mutating func lexWhitespace() -> AST.Trivia? {
     let trivia: Located<String>? = recordLoc { src in
       src.tryEatPrefix(\.isPatternWhitespace)?.string
     }
@@ -616,7 +704,7 @@ extension Source {
       case "i": return advanceAndReturn(.caseInsensitive)
       case "J": return advanceAndReturn(.allowDuplicateGroupNames)
       case "m": return advanceAndReturn(.multiline)
-      case "n": return advanceAndReturn(.noAutoCapture)
+      case "n": return advanceAndReturn(.namedCapturesOnly)
       case "s": return advanceAndReturn(.singleLine)
       case "U": return advanceAndReturn(.reluctantByDefault)
       case "x":
@@ -914,6 +1002,10 @@ extension Source {
         }
         // TODO: (name:)
 
+        // If (?n) is set, a bare (...) group is non-capturing.
+        if context.syntax.contains(.namedCapturesOnly) {
+          return .nonCapture
+        }
         return .capture
       }
     }
@@ -1149,7 +1241,7 @@ extension Source {
 
       // We should either have a unicode scalar.
       if src.tryEat(sequence: "U+") {
-        let str = try src.lexUntil(eating: "}").value
+        let str = try src.lexUntil(eating: "}")
         return .scalar(try Source.validateUnicodeScalar(str, .hex))
       }
 
@@ -1577,8 +1669,7 @@ extension Source {
       switch char {
       // Hexadecimal and octal unicode scalars.
       case "u", "x", "U", "o", "0":
-        return try .scalar(
-          src.expectUnicodeScalar(escapedCharacter: char).value)
+        return try src.expectUnicodeScalar(escapedCharacter: char)
       default:
         break
       }
diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift
index ec6e1c26c..112f32358 100644
--- a/Sources/_RegexParser/Regex/Parse/Parse.swift
+++ b/Sources/_RegexParser/Regex/Parse/Parse.swift
@@ -227,9 +227,6 @@ extension Parser {
         if let (amt, kind, trivia) =
             try source.lexQuantifier(context: context) {
           let location = loc(_start)
-          guard operand.isQuantifiable else {
-            throw Source.LocatedError(ParseError.notQuantifiable, location)
-          }
           result.append(.quantification(
             .init(amt, kind, operand, location, trivia: trivia)))
         } else {
@@ -287,23 +284,34 @@ extension Parser {
   private mutating func applySyntaxOptions(
     of opts: AST.MatchingOptionSequence
   ) {
-    // We skip this for multi-line, as extended syntax is always enabled there.
-    if context.syntax.contains(.multilineExtendedSyntax) { return }
+    func mapOption(_ option: SyntaxOptions,
+                   _ pred: (AST.MatchingOption) -> Bool) {
+      if opts.resetsCurrentOptions {
+        context.syntax.remove(option)
+      }
+      if opts.adding.contains(where: pred) {
+        context.syntax.insert(option)
+      }
+      if opts.removing.contains(where: pred) {
+        context.syntax.remove(option)
+      }
+    }
+    func mapOption(_ option: SyntaxOptions, _ kind: AST.MatchingOption.Kind) {
+      mapOption(option, { $0.kind == kind })
+    }
 
-    // Check if we're introducing or removing extended syntax.
+    // (?n)
+    mapOption(.namedCapturesOnly, .namedCapturesOnly)
+
+    // (?x), (?xx)
+    // We skip this for multi-line, as extended syntax is always enabled there.
     // TODO: PCRE differentiates between (?x) and (?xx) where only the latter
     // handles non-semantic whitespace in a custom character class. Other
     // engines such as Oniguruma, Java, and ICU do this under (?x). Therefore,
     // treat (?x) and (?xx) as the same option here. If we ever get a strict
     // PCRE mode, we will need to change this to handle that.
-    if opts.resetsCurrentOptions {
-      context.syntax.remove(.extendedSyntax)
-    }
-    if opts.adding.contains(where: \.isAnyExtended) {
-      context.syntax.insert(.extendedSyntax)
-    }
-    if opts.removing.contains(where: \.isAnyExtended) {
-      context.syntax.remove(.extendedSyntax)
+    if !context.syntax.contains(.multilineExtendedSyntax) {
+      mapOption(.extendedSyntax, \.isAnyExtended)
     }
   }
 
@@ -532,11 +540,6 @@ extension Parser {
       // Range between atoms.
       if let (dashLoc, rhs) =
           try source.lexCustomCharClassRangeEnd(context: context) {
-        guard atom.isValidCharacterClassRangeBound &&
-              rhs.isValidCharacterClassRangeBound else {
-          throw ParseError.invalidCharacterClassRangeOperand
-        }
-        // TODO: Validate lower <= upper?
         members.append(.range(.init(atom, dashLoc, rhs)))
         continue
       }
@@ -547,13 +550,31 @@ extension Parser {
   }
 }
 
+public enum ASTStage {
+  /// The regex is parsed, and a syntactically valid AST is returned. Otherwise
+  /// an error is thrown. This is useful for e.g syntax coloring.
+  case syntactic
+
+  /// The regex is parsed, and a syntactically and semantically valid AST is
+  /// returned. Otherwise an error is thrown. A semantically valid AST has been
+  /// checked for e.g unsupported constructs and invalid backreferences.
+  case semantic
+}
+
 public func parse<S: StringProtocol>(
-  _ regex: S, _ syntax: SyntaxOptions
+  _ regex: S, _ stage: ASTStage, _ syntax: SyntaxOptions
 ) throws -> AST where S.SubSequence == Substring
 {
   let source = Source(String(regex))
   var parser = Parser(source, syntax: syntax)
-  return try parser.parse()
+  let ast = try parser.parse()
+  switch stage {
+  case .syntactic:
+    break
+  case .semantic:
+    try validate(ast)
+  }
+  return ast
 }
 
 /// Retrieve the default set of syntax options that a delimiter and literal
@@ -580,11 +601,12 @@ fileprivate func defaultSyntaxOptions(
 /// Parses a given regex string with delimiters, inferring the syntax options
 /// from the delimiters used.
 public func parseWithDelimiters<S: StringProtocol>(
-  _ regex: S
+  _ regex: S, _ stage: ASTStage
 ) throws -> AST where S.SubSequence == Substring {
   let (contents, delim) = droppingRegexDelimiters(String(regex))
   do {
-    return try parse(contents, defaultSyntaxOptions(delim, contents: contents))
+    let syntax = defaultSyntaxOptions(delim, contents: contents)
+    return try parse(contents, stage, syntax)
   } catch let error as LocatedErrorProtocol {
     // Convert the range in 'contents' to the range in 'regex'.
     let delimCount = delim.opening.count
diff --git a/Sources/_RegexParser/Regex/Parse/Sema.swift b/Sources/_RegexParser/Regex/Parse/Sema.swift
new file mode 100644
index 000000000..9d5ae4576
--- /dev/null
+++ b/Sources/_RegexParser/Regex/Parse/Sema.swift
@@ -0,0 +1,407 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+/// Validate a regex AST for semantic validity. Once bytecode is emitted at
+/// compile time, this could potentially be subsumed by the bytecode generator.
+fileprivate struct RegexValidator {
+  let ast: AST
+  let captures: CaptureList
+
+  init(_ ast: AST) {
+    self.ast = ast
+    self.captures = ast.captureList
+  }
+
+  func error(_ kind: ParseError, at loc: SourceLocation) -> Error {
+    Source.LocatedError(kind, loc)
+  }
+}
+
+extension String {
+  fileprivate var quoted: String { "'\(self)'" }
+}
+
+extension RegexValidator {
+  func validate() throws {
+    for opt in ast.globalOptions?.options ?? [] {
+      try validateGlobalMatchingOption(opt)
+    }
+    try validateCaptures()
+    try validateNode(ast.root)
+  }
+
+  func validateGlobalMatchingOption(_ opt: AST.GlobalMatchingOption) throws {
+    switch opt.kind {
+    case .limitDepth, .limitHeap, .limitMatch, .notEmpty, .notEmptyAtStart,
+        .noAutoPossess, .noDotStarAnchor, .noJIT, .noStartOpt, .utfMode,
+        .unicodeProperties:
+      // These are PCRE specific, and not something we're likely to ever
+      // support.
+      throw error(.unsupported("global matching option"), at: opt.location)
+
+    case .newlineMatching:
+      // We have implemented the correct behavior for multi-line literals, but
+      // these should also affect '.' and '\N' matching, which we haven't
+      // implemented.
+      throw error(.unsupported("newline matching mode"), at: opt.location)
+
+    case .newlineSequenceMatching:
+      // We haven't yet implemented the '\R' matching specifics of these.
+      throw error(
+        .unsupported("newline sequence matching mode"), at: opt.location)
+    }
+  }
+
+  func validateCaptures() throws {
+    // TODO: Should this be validated when creating the capture list?
+    var usedNames = Set<String>()
+    for capture in captures.captures {
+      guard let name = capture.name else { continue }
+      guard usedNames.insert(name).inserted else {
+        throw error(.duplicateNamedCapture(name), at: capture.location)
+      }
+    }
+  }
+
+  func validateReference(_ ref: AST.Reference) throws {
+    switch ref.kind {
+    case .absolute(let i):
+      guard i <= captures.captures.count else {
+        throw error(.invalidReference(i), at: ref.innerLoc)
+      }
+    case .relative:
+      throw error(.unsupported("relative capture reference"), at: ref.innerLoc)
+    case .named:
+      // TODO: This could be implemented by querying the capture list for an
+      // index.
+      throw error(.unsupported("named capture reference"), at: ref.innerLoc)
+    }
+    if let recLevel = ref.recursionLevel {
+      throw error(.unsupported("recursion level"), at: recLevel.location)
+    }
+  }
+
+  func validateMatchingOption(_ opt: AST.MatchingOption) throws {
+    let loc = opt.location
+    switch opt.kind {
+    case .allowDuplicateGroupNames:
+      // Not currently supported as we need to figure out what to do with
+      // the capture type.
+      throw error(.unsupported("duplicate group naming"), at: loc)
+
+    case .unicodeWordBoundaries:
+      throw error(.unsupported("unicode word boundary mode"), at: loc)
+
+    case .textSegmentWordMode, .textSegmentGraphemeMode:
+      throw error(.unsupported("text segment mode"), at: loc)
+
+    case .byteSemantics:
+      throw error(.unsupported("byte semantic mode"), at: loc)
+
+    case .caseInsensitive, .possessiveByDefault, .reluctantByDefault,
+        .unicodeScalarSemantics, .graphemeClusterSemantics,
+        .singleLine, .multiline, .namedCapturesOnly, .extended, .extraExtended,
+        .asciiOnlyDigit, .asciiOnlyWord, .asciiOnlySpace, .asciiOnlyPOSIXProps:
+      break
+    }
+  }
+
+  func validateMatchingOptions(_ opts: AST.MatchingOptionSequence) throws {
+    for opt in opts.adding {
+      try validateMatchingOption(opt)
+    }
+    for opt in opts.removing {
+      try validateMatchingOption(opt)
+    }
+  }
+
+  func validateBinaryProperty(
+    _ prop: Unicode.BinaryProperty, at loc: SourceLocation
+  ) throws {
+    switch prop {
+    case .asciiHexDigit, .alphabetic, .bidiMirrored, .cased, .caseIgnorable,
+        .changesWhenCasefolded, .changesWhenCasemapped,
+        .changesWhenNFKCCasefolded, .changesWhenLowercased,
+        .changesWhenTitlecased, .changesWhenUppercased, .dash, .deprecated,
+        .defaultIgnorableCodePoint, .diacratic, .extender,
+        .fullCompositionExclusion, .graphemeBase, .graphemeExtended, .hexDigit,
+        .idContinue, .ideographic, .idStart, .idsBinaryOperator,
+        .idsTrinaryOperator, .joinControl, .logicalOrderException, .lowercase,
+        .math, .noncharacterCodePoint, .patternSyntax, .patternWhitespace,
+        .quotationMark, .radical, .regionalIndicator, .softDotted,
+        .sentenceTerminal, .terminalPunctuation, .unifiedIdiograph, .uppercase,
+        .variationSelector, .whitespace, .xidContinue, .xidStart:
+      break
+
+    case .emojiModifierBase, .emojiModifier, .emoji, .emojiPresentation:
+      // These are available on macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1.
+      // TODO: We should ideally check deployment target for such conditionally
+      // available properties.
+      break
+
+    case .expandsOnNFC, .expandsOnNFD, .expandsOnNFKD, .expandsOnNFKC:
+      throw error(.deprecatedUnicode(prop.rawValue.quoted), at: loc)
+
+    case .bidiControl, .compositionExclusion, .emojiComponent,
+        .extendedPictographic, .graphemeLink, .hyphen, .otherAlphabetic,
+        .otherDefaultIgnorableCodePoint, .otherGraphemeExtended,
+        .otherIDContinue, .otherIDStart, .otherLowercase, .otherMath,
+        .otherUppercase, .prependedConcatenationMark:
+      throw error(.unsupported(prop.rawValue.quoted), at: loc)
+    }
+  }
+
+  func validateCharacterProperty(
+    _ prop: AST.Atom.CharacterProperty, at loc: SourceLocation
+  ) throws {
+    // TODO: We could re-add the .other case to diagnose unknown properties
+    // here instead of in the parser.
+    // TODO: Should we store an 'inner location' for the contents of `\p{...}`?
+    switch prop.kind {
+    case .binary(let b, _):
+      try validateBinaryProperty(b, at: loc)
+    case .any, .assigned, .ascii, .generalCategory, .posix, .named, .script,
+        .scriptExtension:
+      break
+    case .pcreSpecial:
+      throw error(.unsupported("PCRE property"), at: loc)
+    case .onigurumaSpecial:
+      throw error(.unsupported("Unicode block property"), at: loc)
+    }
+  }
+
+  func validateEscaped(
+    _ esc: AST.Atom.EscapedBuiltin, at loc: SourceLocation
+  ) throws {
+    switch esc {
+    case .resetStartOfMatch, .singleDataUnit,
+        // '\N' needs to be emitted using 'emitAny'.
+        .notNewline:
+      throw error(.unsupported("'\\\(esc.character)'"), at: loc)
+
+    // Character classes.
+    case .decimalDigit, .notDecimalDigit, .whitespace, .notWhitespace,
+        .wordCharacter, .notWordCharacter, .graphemeCluster, .trueAnychar,
+        .horizontalWhitespace, .notHorizontalWhitespace,
+        .verticalTab, .notVerticalTab:
+      break
+
+    case .newlineSequence:
+      break
+
+    // Assertions.
+    case .wordBoundary, .notWordBoundary, .startOfSubject,
+        .endOfSubjectBeforeNewline, .endOfSubject, .textSegment,
+        .notTextSegment, .firstMatchingPositionInSubject:
+      break
+
+    // Literal escapes.
+    case .alarm, .backspace, .escape, .formfeed, .newline, .carriageReturn,
+        .tab:
+      break
+    }
+  }
+
+  func validateAtom(_ atom: AST.Atom, inCustomCharacterClass: Bool) throws {
+    switch atom.kind {
+    case .escaped(let esc):
+      try validateEscaped(esc, at: atom.location)
+
+    case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
+      // We need to implement the scalar computations for these.
+      throw error(.unsupported("control sequence"), at: atom.location)
+
+    case .property(let p):
+      try validateCharacterProperty(p, at: atom.location)
+
+    case .backreference(let r):
+      try validateReference(r)
+
+    case .subpattern:
+      throw error(.unsupported("subpattern"), at: atom.location)
+
+    case .callout:
+      // These are PCRE and Oniguruma specific, supporting them is future work.
+      throw error(.unsupported("callout"), at: atom.location)
+
+    case .backtrackingDirective:
+      // These are PCRE-specific, and are unlikely to be fully supported.
+      throw error(.unsupported("backtracking directive"), at: atom.location)
+
+    case .changeMatchingOptions(let opts):
+      try validateMatchingOptions(opts)
+
+    case .namedCharacter:
+      // TODO: We should error on unknown Unicode scalar names.
+      break
+
+    case .scalarSequence:
+      // Not currently supported in a custom character class.
+      if inCustomCharacterClass {
+        throw error(.unsupported("scalar sequence in custom character class"),
+                    at: atom.location)
+      }
+
+    case .char, .scalar, .startOfLine, .endOfLine, .any:
+      break
+    }
+  }
+
+  func validateCustomCharacterClass(_ c: AST.CustomCharacterClass) throws {
+    for member in c.members {
+      try validateCharacterClassMember(member)
+    }
+  }
+
+  func validateCharacterClassRange(
+    _ range: AST.CustomCharacterClass.Range
+  ) throws {
+    let lhs = range.lhs
+    let rhs = range.rhs
+
+    try validateAtom(lhs, inCustomCharacterClass: true)
+    try validateAtom(rhs, inCustomCharacterClass: true)
+
+    guard lhs.isValidCharacterClassRangeBound else {
+      throw error(.invalidCharacterClassRangeOperand, at: lhs.location)
+    }
+    guard rhs.isValidCharacterClassRangeBound else {
+      throw error(.invalidCharacterClassRangeOperand, at: rhs.location)
+    }
+
+    guard let lhsChar = lhs.literalCharacterValue else {
+      throw error(
+        .unsupported("character class range operand"), at: lhs.location)
+    }
+
+    guard let rhsChar = rhs.literalCharacterValue else {
+      throw error(
+        .unsupported("character class range operand"), at: rhs.location)
+    }
+
+    guard lhsChar <= rhsChar else {
+      throw error(
+        .invalidCharacterRange(from: lhsChar, to: rhsChar), at: range.dashLoc)
+    }
+  }
+
+  func validateCharacterClassMember(
+    _ member: AST.CustomCharacterClass.Member
+  ) throws {
+    switch member {
+    case .custom(let c):
+      try validateCustomCharacterClass(c)
+
+    case .range(let r):
+      try validateCharacterClassRange(r)
+
+    case .atom(let a):
+      try validateAtom(a, inCustomCharacterClass: true)
+
+    case .setOperation(let lhs, _, let rhs):
+      for lh in lhs { try validateCharacterClassMember(lh) }
+      for rh in rhs { try validateCharacterClassMember(rh) }
+
+    case .quote, .trivia:
+      break
+    }
+  }
+
+  func validateGroup(_ group: AST.Group) throws {
+    let kind = group.kind
+    switch kind.value {
+    case .capture, .namedCapture, .nonCapture, .lookahead, .negativeLookahead:
+      break
+
+    case .balancedCapture:
+      // These are .NET specific, and kinda niche.
+      throw error(.unsupported("balanced capture"), at: kind.location)
+
+    case .nonCaptureReset:
+      // We need to figure out how these interact with typed captures.
+      throw error(.unsupported("branch reset group"), at: kind.location)
+
+    case .atomicNonCapturing:
+      throw error(.unsupported("atomic group"), at: kind.location)
+
+    case .nonAtomicLookahead:
+      throw error(.unsupported("non-atomic lookahead"), at: kind.location)
+
+    case .lookbehind, .negativeLookbehind, .nonAtomicLookbehind:
+      throw error(.unsupported("lookbehind"), at: kind.location)
+
+    case .scriptRun, .atomicScriptRun:
+      throw error(.unsupported("script run"), at: kind.location)
+
+    case .changeMatchingOptions(let opts):
+      try validateMatchingOptions(opts)
+    }
+    try validateNode(group.child)
+  }
+
+  func validateQuantification(_ quant: AST.Quantification) throws {
+    try validateNode(quant.child)
+    guard quant.child.isQuantifiable else {
+      throw error(.notQuantifiable, at: quant.child.location)
+    }
+    switch quant.amount.value {
+    case .range(let lhs, let rhs):
+      guard lhs.value <= rhs.value else {
+        throw error(
+          .invalidQuantifierRange(lhs.value, rhs.value), at: quant.location)
+      }
+    case .zeroOrMore, .oneOrMore, .zeroOrOne, .exactly, .nOrMore, .upToN:
+      break
+    }
+  }
+
+  func validateNode(_ node: AST.Node) throws {
+    switch node {
+    case .alternation(let a):
+      for branch in a.children {
+        try validateNode(branch)
+      }
+    case .concatenation(let c):
+      for child in c.children {
+        try validateNode(child)
+      }
+
+    case .group(let g):
+      try validateGroup(g)
+
+    case .conditional(let c):
+      // Note even once we get runtime support for this, we need to change the
+      // parsing to incorporate what is specified in the syntax proposal.
+      throw error(.unsupported("conditional"), at: c.location)
+
+    case .quantification(let q):
+      try validateQuantification(q)
+
+    case .atom(let a):
+      try validateAtom(a, inCustomCharacterClass: false)
+
+    case .customCharacterClass(let c):
+      try validateCustomCharacterClass(c)
+
+    case .absentFunction(let a):
+      // These are Oniguruma specific.
+      throw error(.unsupported("absent function"), at: a.location)
+
+    case .quote, .trivia, .empty:
+      break
+    }
+  }
+}
+
+/// Check a regex AST for semantic validity.
+public func validate(_ ast: AST) throws {
+  try RegexValidator(ast).validate()
+}
diff --git a/Sources/_RegexParser/Regex/Parse/SyntaxOptions.swift b/Sources/_RegexParser/Regex/Parse/SyntaxOptions.swift
index 0a6270f1b..dbfe5f2d6 100644
--- a/Sources/_RegexParser/Regex/Parse/SyntaxOptions.swift
+++ b/Sources/_RegexParser/Regex/Parse/SyntaxOptions.swift
@@ -63,6 +63,9 @@ public struct SyntaxOptions: OptionSet {
     return [Self(1 << 6), .extendedSyntax]
   }
 
+  /// `(?n)`
+  public static var namedCapturesOnly: Self { Self(1 << 7) }
+
   /*
 
     /// `<digit>*` == `[[:digit:]]*` == `\d*`
diff --git a/Sources/_RegexParser/Regex/Printing/DumpAST.swift b/Sources/_RegexParser/Regex/Printing/DumpAST.swift
index a9cf6b424..b8937d518 100644
--- a/Sources/_RegexParser/Regex/Printing/DumpAST.swift
+++ b/Sources/_RegexParser/Regex/Printing/DumpAST.swift
@@ -138,6 +138,9 @@ extension AST.Atom {
     switch kind {
     case .escaped(let c): return "\\\(c.character)"
 
+    case .scalarSequence(let s):
+      return s.scalars.map(\.value.halfWidthCornerQuoted).joined()
+
     case .namedCharacter(let charName):
       return "\\N{\(charName)}"
 
diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index 21fcfa703..d30cab209 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -80,10 +80,16 @@ extension Compiler.ByteCodeGen {
       }
 
     case .endOfSubjectBeforeNewline:
-      builder.buildAssert { (input, pos, bounds) in
+      builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
         if pos == input.endIndex { return true }
-        return input.index(after: pos) == input.endIndex
-         && input[pos].isNewline
+        switch semanticLevel {
+        case .graphemeCluster:
+          return input.index(after: pos) == input.endIndex
+           && input[pos].isNewline
+        case .unicodeScalar:
+          return input.unicodeScalars.index(after: pos) == input.endIndex
+           && input.unicodeScalars[pos].isNewline
+        }
       }
 
     case .endOfSubject:
@@ -115,8 +121,14 @@ extension Compiler.ByteCodeGen {
 
     case .startOfLine:
       if options.anchorsMatchNewlines {
-        builder.buildAssert { (input, pos, bounds) in
-          pos == input.startIndex || input[input.index(before: pos)].isNewline
+        builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
+          if pos == input.startIndex { return true }
+          switch semanticLevel {
+          case .graphemeCluster:
+            return input[input.index(before: pos)].isNewline
+          case .unicodeScalar:
+            return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
+          }
         }
       } else {
         builder.buildAssert { (input, pos, bounds) in
@@ -126,8 +138,14 @@ extension Compiler.ByteCodeGen {
       
     case .endOfLine:
       if options.anchorsMatchNewlines {
-        builder.buildAssert { (input, pos, bounds) in
-          pos == input.endIndex || input[pos].isNewline
+        builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
+          if pos == input.endIndex { return true }
+          switch semanticLevel {
+          case .graphemeCluster:
+            return input[pos].isNewline
+          case .unicodeScalar:
+            return input.unicodeScalars[pos].isNewline
+          }
         }
       } else {
         builder.buildAssert { (input, pos, bounds) in
@@ -168,7 +186,15 @@ extension Compiler.ByteCodeGen {
   }
   
   mutating func emitCharacter(_ c: Character) throws {
-    // FIXME: Does semantic level matter?
+    // Unicode scalar matches the specific scalars that comprise a character
+    if options.semanticLevel == .unicodeScalar {
+      print("emitting '\(c)' as a sequence of \(c.unicodeScalars.count) scalars")
+      for scalar in c.unicodeScalars {
+        try emitScalar(scalar)
+      }
+      return
+    }
+    
     if options.isCaseInsensitive && c.isCased {
       // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
       builder.buildConsume { input, bounds in
@@ -625,22 +651,44 @@ extension Compiler.ByteCodeGen {
       try emitAtom(a)
 
     case let .quotedLiteral(s):
-      // TODO: Should this incorporate options?
-      if options.isCaseInsensitive {
-        // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
-        builder.buildConsume { input, bounds in
-          var iterator = s.makeIterator()
+      if options.semanticLevel == .graphemeCluster {
+        if options.isCaseInsensitive {
+          // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
+          builder.buildConsume { input, bounds in
+            var iterator = s.makeIterator()
+            var currentIndex = bounds.lowerBound
+            while let ch = iterator.next() {
+              guard currentIndex < bounds.upperBound,
+                    ch.lowercased() == input[currentIndex].lowercased()
+              else { return nil }
+              input.formIndex(after: &currentIndex)
+            }
+            return currentIndex
+          }
+        } else {
+          builder.buildMatchSequence(s)
+        }
+      } else {
+        builder.buildConsume {
+          [caseInsensitive = options.isCaseInsensitive] input, bounds in
+          // TODO: Case folding
+          var iterator = s.unicodeScalars.makeIterator()
           var currentIndex = bounds.lowerBound
-          while let ch = iterator.next() {
-            guard currentIndex < bounds.upperBound,
-                  ch.lowercased() == input[currentIndex].lowercased()
-            else { return nil }
-            input.formIndex(after: &currentIndex)
+          while let scalar = iterator.next() {
+            guard currentIndex < bounds.upperBound else { return nil }
+            if caseInsensitive {
+              if scalar.properties.lowercaseMapping != input.unicodeScalars[currentIndex].properties.lowercaseMapping {
+                return nil
+              }
+            } else {
+              if scalar != input.unicodeScalars[currentIndex] {
+                return nil
+              }
+            }
+            input.unicodeScalars.formIndex(after: &currentIndex)
           }
           return currentIndex
         }
-      } else {
-        builder.buildMatchSequence(s)
       }
 
     case let .regexLiteral(l):
diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift
index 47faa23ed..1c20761c8 100644
--- a/Sources/_StringProcessing/Compiler.swift
+++ b/Sources/_StringProcessing/Compiler.swift
@@ -38,7 +38,7 @@ class Compiler {
 func _compileRegex(
   _ regex: String, _ syntax: SyntaxOptions = .traditional
 ) throws -> Executor {
-  let ast = try parse(regex, syntax)
+  let ast = try parse(regex, .semantic, syntax)
   let program = try Compiler(ast: ast).emit()
   return Executor(program: program)
 }
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index 356b7cc4b..48f353e52 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -111,6 +111,51 @@ extension DSLTree.Atom {
   }
 }
 
+extension String {
+  /// Compares this string to `other` using the loose matching rule UAX44-LM2,
+  /// which ignores case, whitespace, underscores, and nearly all medial
+  /// hyphens.
+  ///
+  /// FIXME: Only ignore medial hyphens
+  /// FIXME: Special case for U+1180 HANGUL JUNGSEONG O-E
+  /// See https://www.unicode.org/reports/tr44/#Matching_Rules
+  fileprivate func isEqualByUAX44LM2(to other: String) -> Bool {
+    var index = startIndex
+    var otherIndex = other.startIndex
+    
+    while index < endIndex && otherIndex < other.endIndex {
+      if self[index].isWhitespace || self[index] == "-" || self[index] == "_" {
+        formIndex(after: &index)
+        continue
+      }
+      if other[otherIndex].isWhitespace || other[otherIndex] == "-" || other[otherIndex] == "_" {
+        other.formIndex(after: &otherIndex)
+        continue
+      }
+      
+      if self[index] != other[otherIndex] && self[index].lowercased() != other[otherIndex].lowercased() {
+        return false
+      }
+
+      formIndex(after: &index)
+      other.formIndex(after: &otherIndex)
+    }
+    return index == endIndex && otherIndex == other.endIndex
+  }
+}
+
+func consumeName(_ name: String, opts: MatchingOptions) -> MEProgram<String>.ConsumeFunction {
+  let consume = opts.semanticLevel == .graphemeCluster
+    ? consumeCharacterWithSingleScalar
+    : consumeScalar
+  
+  return consume(propertyScalarPredicate {
+    // FIXME: name aliases not covered by $0.nameAlias are missed
+    // e.g. U+FEFF has both 'BYTE ORDER MARK' and 'BOM' as aliases
+    $0.name?.isEqualByUAX44LM2(to: name) == true
+      || $0.nameAlias?.isEqualByUAX44LM2(to: name) == true
+  })
+}
 
 // TODO: This is basically an AST interpreter, which would
 // be good or interesting to build regardless, and serves
@@ -131,6 +176,13 @@ extension AST.Atom {
     }
   }
 
+  var singleScalar: UnicodeScalar? {
+    switch kind {
+    case .scalar(let s): return s.value
+    default: return nil
+    }
+  }
+
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram<String>.ConsumeFunction? {
@@ -148,7 +200,7 @@ extension AST.Atom {
     case let .scalar(s):
       assertionFailure(
         "Should have been handled by tree conversion")
-      return consumeScalar { $0 == s }
+      return consumeScalar { $0 == s.value }
 
     case let .char(c):
       assertionFailure(
@@ -167,10 +219,7 @@ extension AST.Atom {
       return try p.generateConsumer(opts)
 
     case let .namedCharacter(name):
-      return consumeScalarProp {
-        // TODO: alias? casing?
-        $0.name == name || $0.nameAlias == name
-      }
+      return consumeName(name, opts: opts)
       
     case .any:
       assertionFailure(
@@ -181,9 +230,9 @@ extension AST.Atom {
       // handled in emitAssertion
       return nil
 
-    case .escaped, .keyboardControl, .keyboardMeta, .keyboardMetaControl,
-        .backreference, .subpattern, .callout, .backtrackingDirective,
-        .changeMatchingOptions:
+    case .scalarSequence, .escaped, .keyboardControl, .keyboardMeta,
+        .keyboardMetaControl, .backreference, .subpattern, .callout,
+        .backtrackingDirective, .changeMatchingOptions:
       // FIXME: implement
       return nil
     }
@@ -312,8 +361,9 @@ extension DSLTree.CustomCharacterClass {
         }
       }
       if isInverted {
-        // FIXME: semantic level
-        return input.index(after: bounds.lowerBound)
+        return opts.semanticLevel == .graphemeCluster
+          ? input.index(after: bounds.lowerBound)
+          : input.unicodeScalars.index(after: bounds.lowerBound)
       }
       return nil
     }
@@ -321,38 +371,26 @@ extension DSLTree.CustomCharacterClass {
 }
 
 // NOTE: Conveniences, though not most performant
-private func consumeScalarScript(
-  _ s: Unicode.Script
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar {
-    Unicode.Script($0) == s
-  }
+typealias ScalarPredicate = (UnicodeScalar) -> Bool
+
+private func scriptScalarPredicate(_ s: Unicode.Script) -> ScalarPredicate {
+  { Unicode.Script($0) == s }
 }
-private func consumeScalarScriptExtension(
-  _ s: Unicode.Script
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar {
-    let extensions = Unicode.Script.extensions(for: $0)
-    return extensions.contains(s)
-  }
+private func scriptExtensionScalarPredicate(_ s: Unicode.Script) -> ScalarPredicate {
+  { Unicode.Script.extensions(for: $0).contains(s) }
 }
-private func consumeScalarGC(
-  _ gc: Unicode.GeneralCategory
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar { gc == $0.properties.generalCategory }
+private func categoryScalarPredicate(_ gc: Unicode.GeneralCategory) -> ScalarPredicate {
+  { gc == $0.properties.generalCategory }
 }
-private func consumeScalarGCs(
-  _ gcs: [Unicode.GeneralCategory]
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar { gcs.contains($0.properties.generalCategory) }
+private func categoriesScalarPredicate(_ gcs: [Unicode.GeneralCategory]) -> ScalarPredicate {
+  { gcs.contains($0.properties.generalCategory) }
 }
-private func consumeScalarProp(
-  _ p: @escaping (Unicode.Scalar.Properties) -> Bool
-) -> MEProgram<String>.ConsumeFunction {
-  consumeScalar { p($0.properties) }
+private func propertyScalarPredicate(_ p: @escaping (Unicode.Scalar.Properties) -> Bool) -> ScalarPredicate {
+  { p($0.properties) }
 }
+
 func consumeScalar(
-  _ p: @escaping (Unicode.Scalar) -> Bool
+  _ p: @escaping ScalarPredicate
 ) -> MEProgram<String>.ConsumeFunction {
   { input, bounds in
     // TODO: bounds check?
@@ -364,6 +402,37 @@ func consumeScalar(
     return nil
   }
 }
+func consumeCharacterWithLeadingScalar(
+  _ p: @escaping ScalarPredicate
+) -> MEProgram<String>.ConsumeFunction {
+  { input, bounds in
+    let curIdx = bounds.lowerBound
+    if p(input[curIdx].unicodeScalars.first!) {
+      return input.index(after: curIdx)
+    }
+    return nil
+  }
+}
+func consumeCharacterWithSingleScalar(
+  _ p: @escaping ScalarPredicate
+) -> MEProgram<String>.ConsumeFunction {
+  { input, bounds in
+    let curIdx = bounds.lowerBound
+    
+    if input[curIdx].hasExactlyOneScalar && p(input[curIdx].unicodeScalars.first!) {
+      return input.index(after: curIdx)
+    }
+    return nil
+  }
+}
+
+func consumeFunction(
+  for opts: MatchingOptions
+) -> (@escaping ScalarPredicate) -> MEProgram<String>.ConsumeFunction {
+  opts.semanticLevel == .graphemeCluster
+    ? consumeCharacterWithLeadingScalar
+    : consumeScalar
+}
 
 extension AST.Atom.CharacterProperty {
   func generateConsumer(
@@ -375,16 +444,15 @@ extension AST.Atom.CharacterProperty {
     ) -> MEProgram<String>.ConsumeFunction {
       return { input, bounds in
         if p(input, bounds) != nil { return nil }
-        // TODO: semantic level
+
         // TODO: bounds check
-        return input.unicodeScalars.index(
-          after: bounds.lowerBound)
+        return opts.semanticLevel == .graphemeCluster
+          ? input.index(after: bounds.lowerBound)
+          : input.unicodeScalars.index(after: bounds.lowerBound)
       }
     }
 
-    // FIXME: Below is largely scalar based, for convenience,
-    // but we want a comprehensive treatment to semantic mode
-    // switching.
+    let consume = consumeFunction(for: opts)
     let preInversion: MEProgram<String>.ConsumeFunction =
     try {
       switch kind {
@@ -395,11 +463,16 @@ extension AST.Atom.CharacterProperty {
           return input.index(after: bounds.lowerBound)
         }
       case .assigned:
-        return consumeScalar {
+        return consume {
           $0.properties.generalCategory != .unassigned
         }
       case .ascii:
-        return consumeScalar(\.isASCII)
+        // Note: ASCII must look at the whole character, not just the first
+        // scalar. That is, "e\u{301}" is not an ASCII character, even though
+        // the first scalar is.
+        return opts.semanticLevel == .graphemeCluster
+          ? consumeCharacterWithSingleScalar(\.isASCII)
+          : consumeScalar(\.isASCII)
 
       case .generalCategory(let p):
         return try p.generateConsumer(opts)
@@ -410,10 +483,13 @@ extension AST.Atom.CharacterProperty {
         return value ? cons : invert(cons)
 
       case .script(let s):
-        return consumeScalarScript(s)
+        return consume(scriptScalarPredicate(s))
 
       case .scriptExtension(let s):
-        return consumeScalarScriptExtension(s)
+        return consume(scriptExtensionScalarPredicate(s))
+        
+      case .named(let n):
+        return consumeName(n, opts: opts)
 
       case .posix(let p):
         return p.generateConsumer(opts)
@@ -436,49 +512,51 @@ extension Unicode.BinaryProperty {
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram<String>.ConsumeFunction {
-    switch self {
+    let consume = consumeFunction(for: opts)
 
+    // Note if you implement support for any of the below, you need to adjust
+    // the switch in Sema.swift to not have it be diagnosed as unsupported
+    // (potentially guarded on deployment version).
+    switch self {
     case .asciiHexDigit:
-      return consumeScalarProp {
+      return consume(propertyScalarPredicate {
         $0.isHexDigit && $0.isASCIIHexDigit
-      }
+      })
     case .alphabetic:
-      return consumeScalarProp(\.isAlphabetic)
+      return consume(propertyScalarPredicate(\.isAlphabetic))
     case .bidiControl:
       break
-
-
-    case .bidiMirrored: 
-      return consumeScalarProp(\.isBidiMirrored)
+    case .bidiMirrored:
+      return consume(propertyScalarPredicate(\.isBidiMirrored))
     case .cased:
-      return consumeScalarProp(\.isCased)
+      return consume(propertyScalarPredicate(\.isCased))
     case .compositionExclusion:
       break
     case .caseIgnorable:
-      return consumeScalarProp(\.isCaseIgnorable)
+      return consume(propertyScalarPredicate(\.isCaseIgnorable))
     case .changesWhenCasefolded:
-      return consumeScalarProp(\.changesWhenCaseFolded)
+      return consume(propertyScalarPredicate(\.changesWhenCaseFolded))
     case .changesWhenCasemapped:
-      return consumeScalarProp(\.changesWhenCaseMapped)
+      return consume(propertyScalarPredicate(\.changesWhenCaseMapped))
     case .changesWhenNFKCCasefolded:
-      return consumeScalarProp(\.changesWhenNFKCCaseFolded)
+      return consume(propertyScalarPredicate(\.changesWhenNFKCCaseFolded))
     case .changesWhenLowercased:
-      return consumeScalarProp(\.changesWhenLowercased)
+      return consume(propertyScalarPredicate(\.changesWhenLowercased))
     case .changesWhenTitlecased:
-      return consumeScalarProp(\.changesWhenTitlecased)
+      return consume(propertyScalarPredicate(\.changesWhenTitlecased))
     case .changesWhenUppercased:
-      return consumeScalarProp(\.changesWhenUppercased)
+      return consume(propertyScalarPredicate(\.changesWhenUppercased))
     case .dash:
-      return consumeScalarProp(\.isDash)
+      return consume(propertyScalarPredicate(\.isDash))
     case .deprecated:
-      return consumeScalarProp(\.isDeprecated)
+      return consume(propertyScalarPredicate(\.isDeprecated))
     case .defaultIgnorableCodePoint:
-      return consumeScalarProp(\.isDefaultIgnorableCodePoint)
+      return consume(propertyScalarPredicate(\.isDefaultIgnorableCodePoint))
     case .diacratic: // spelling?
-      return consumeScalarProp(\.isDiacritic)
+      return consume(propertyScalarPredicate(\.isDiacritic))
     case .emojiModifierBase:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmojiModifierBase)
+        return consume(propertyScalarPredicate(\.isEmojiModifierBase))
       } else {
         throw Unsupported(
           "isEmojiModifierBase on old OSes")
@@ -487,59 +565,59 @@ extension Unicode.BinaryProperty {
       break
     case .emojiModifier:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmojiModifier)
+        return consume(propertyScalarPredicate(\.isEmojiModifier))
       } else {
         throw Unsupported("isEmojiModifier on old OSes")
       }
     case .emoji:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmoji)
+        return consume(propertyScalarPredicate(\.isEmoji))
       } else {
         throw Unsupported("isEmoji on old OSes")
       }
     case .emojiPresentation:
       if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
-        return consumeScalarProp(\.isEmojiPresentation)
+        return consume(propertyScalarPredicate(\.isEmojiPresentation))
       } else {
         throw Unsupported(
           "isEmojiPresentation on old OSes")
       }
     case .extender:
-      return consumeScalarProp(\.isExtender)
+      return consume(propertyScalarPredicate(\.isExtender))
     case .extendedPictographic:
       break // NOTE: Stdlib has this data internally
     case .fullCompositionExclusion:
-      return consumeScalarProp(\.isFullCompositionExclusion)
+      return consume(propertyScalarPredicate(\.isFullCompositionExclusion))
     case .graphemeBase:
-      return consumeScalarProp(\.isGraphemeBase)
+      return consume(propertyScalarPredicate(\.isGraphemeBase))
     case .graphemeExtended:
-      return consumeScalarProp(\.isGraphemeExtend)
+      return consume(propertyScalarPredicate(\.isGraphemeExtend))
     case .graphemeLink:
       break
     case .hexDigit:
-      return consumeScalarProp(\.isHexDigit)
+      return consume(propertyScalarPredicate(\.isHexDigit))
     case .hyphen:
       break
     case .idContinue:
-      return consumeScalarProp(\.isIDContinue)
+      return consume(propertyScalarPredicate(\.isIDContinue))
     case .ideographic:
-      return consumeScalarProp(\.isIdeographic)
+      return consume(propertyScalarPredicate(\.isIdeographic))
     case .idStart:
-      return consumeScalarProp(\.isIDStart)
+      return consume(propertyScalarPredicate(\.isIDStart))
     case .idsBinaryOperator:
-      return consumeScalarProp(\.isIDSBinaryOperator)
+      return consume(propertyScalarPredicate(\.isIDSBinaryOperator))
     case .idsTrinaryOperator:
-      return consumeScalarProp(\.isIDSTrinaryOperator)
+      return consume(propertyScalarPredicate(\.isIDSTrinaryOperator))
     case .joinControl:
-      return consumeScalarProp(\.isJoinControl)
+      return consume(propertyScalarPredicate(\.isJoinControl))
     case .logicalOrderException:
-      return consumeScalarProp(\.isLogicalOrderException)
+      return consume(propertyScalarPredicate(\.isLogicalOrderException))
     case .lowercase:
-      return consumeScalarProp(\.isLowercase)
+      return consume(propertyScalarPredicate(\.isLowercase))
     case .math:
-      return consumeScalarProp(\.isMath)
+      return consume(propertyScalarPredicate(\.isMath))
     case .noncharacterCodePoint:
-      return consumeScalarProp(\.isNoncharacterCodePoint)
+      return consume(propertyScalarPredicate(\.isNoncharacterCodePoint))
     case .otherAlphabetic:
       break
     case .otherDefaultIgnorableCodePoint:
@@ -557,37 +635,37 @@ extension Unicode.BinaryProperty {
     case .otherUppercase:
       break
     case .patternSyntax:
-      return consumeScalarProp(\.isPatternSyntax)
+      return consume(propertyScalarPredicate(\.isPatternSyntax))
     case .patternWhitespace:
-      return consumeScalarProp(\.isPatternWhitespace)
+      return consume(propertyScalarPredicate(\.isPatternWhitespace))
     case .prependedConcatenationMark:
       break
     case .quotationMark:
-      return consumeScalarProp(\.isQuotationMark)
+      return consume(propertyScalarPredicate(\.isQuotationMark))
     case .radical:
-      return consumeScalarProp(\.isRadical)
+      return consume(propertyScalarPredicate(\.isRadical))
     case .regionalIndicator:
-      return consumeScalar { s in
+      return consume { s in
         (0x1F1E6...0x1F1FF).contains(s.value)
       }
     case .softDotted:
-      return consumeScalarProp(\.isSoftDotted)
+      return consume(propertyScalarPredicate(\.isSoftDotted))
     case .sentenceTerminal:
-      return consumeScalarProp(\.isSentenceTerminal)
+      return consume(propertyScalarPredicate(\.isSentenceTerminal))
     case .terminalPunctuation:
-      return consumeScalarProp(\.isTerminalPunctuation)
+      return consume(propertyScalarPredicate(\.isTerminalPunctuation))
     case .unifiedIdiograph: // spelling?
-      return consumeScalarProp(\.isUnifiedIdeograph)
+      return consume(propertyScalarPredicate(\.isUnifiedIdeograph))
     case .uppercase:
-      return consumeScalarProp(\.isUppercase)
+      return consume(propertyScalarPredicate(\.isUppercase))
     case .variationSelector:
-      return consumeScalarProp(\.isVariationSelector)
+      return consume(propertyScalarPredicate(\.isVariationSelector))
     case .whitespace:
-      return consumeScalarProp(\.isWhitespace)
+      return consume(propertyScalarPredicate(\.isWhitespace))
     case .xidContinue:
-      return consumeScalarProp(\.isXIDContinue)
+      return consume(propertyScalarPredicate(\.isXIDContinue))
     case .xidStart:
-      return consumeScalarProp(\.isXIDStart)
+      return consume(propertyScalarPredicate(\.isXIDStart))
     case .expandsOnNFC, .expandsOnNFD, .expandsOnNFKD,
         .expandsOnNFKC:
       throw Unsupported("Unicode-deprecated: \(self)")
@@ -602,42 +680,44 @@ extension Unicode.POSIXProperty {
   func generateConsumer(
     _ opts: MatchingOptions
   ) -> MEProgram<String>.ConsumeFunction {
-    // FIXME: semantic levels, modes, etc
+    let consume = consumeFunction(for: opts)
+
+    // FIXME: modes, etc
     switch self {
     case .alnum:
-      return consumeScalarProp {
+      return consume(propertyScalarPredicate {
         $0.isAlphabetic || $0.numericType != nil
-      }
+      })
     case .blank:
-      return consumeScalar { s in
+      return consume { s in
         s.properties.generalCategory == .spaceSeparator ||
         s == "\t"
       }
 
     case .graph:
-      return consumeScalarProp { p in
+      return consume(propertyScalarPredicate { p in
         !(
           p.isWhitespace ||
           p.generalCategory == .control ||
           p.generalCategory == .surrogate ||
           p.generalCategory == .unassigned
         )
-      }
+      })
     case .print:
-      return consumeScalarProp { p in
+      return consume(propertyScalarPredicate { p in
         // FIXME: better def
         p.generalCategory != .control
-      }
+      })
     case .word:
-      return consumeScalarProp { p in
+      return consume(propertyScalarPredicate { p in
         // FIXME: better def
         p.isAlphabetic || p.numericType != nil
         || p.isJoinControl
         || p.isDash// marks and connectors...
-      }
+      })
 
     case .xdigit:
-      return consumeScalarProp(\.isHexDigit) // or number
+      return consume(propertyScalarPredicate(\.isHexDigit)) // or number
 
     }
   }
@@ -648,112 +728,115 @@ extension Unicode.ExtendedGeneralCategory {
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram<String>.ConsumeFunction {
+    let consume = consumeFunction(for: opts)
+
     switch self {
     case .letter:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .uppercaseLetter, .lowercaseLetter,
         .titlecaseLetter, .modifierLetter,
         .otherLetter
-      ])
+      ]))
 
     case .mark:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .nonspacingMark, .spacingMark, .enclosingMark
-      ])
+      ]))
 
     case .number:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .decimalNumber, .letterNumber, .otherNumber
-      ])
+      ]))
 
     case .symbol:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .mathSymbol, .currencySymbol, .modifierSymbol,
         .otherSymbol
-      ])
+      ]))
 
     case .punctuation:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .connectorPunctuation, .dashPunctuation,
         .openPunctuation, .closePunctuation,
         .initialPunctuation, .finalPunctuation,
         .otherPunctuation
-      ])
+      ]))
 
     case .separator:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .spaceSeparator, .lineSeparator, .paragraphSeparator
-      ])
+      ]))
 
     case .other:
-      return consumeScalarGCs([
+      return consume(categoriesScalarPredicate([
         .control, .format, .surrogate, .privateUse, .unassigned
-      ])
+      ]))
 
     case .casedLetter:
-      throw Unsupported(
-        "TODO: cased letter? not the property?")
+      return consume(categoriesScalarPredicate([
+        .uppercaseLetter, .lowercaseLetter, .titlecaseLetter
+      ]))
 
     case .control:
-      return consumeScalarGC(.control)
+      return consume(categoryScalarPredicate(.control))
     case .format:
-      return consumeScalarGC(.format)
+      return consume(categoryScalarPredicate(.format))
     case .unassigned:
-      return consumeScalarGC(.unassigned)
+      return consume(categoryScalarPredicate(.unassigned))
     case .privateUse:
-      return consumeScalarGC(.privateUse)
+      return consume(categoryScalarPredicate(.privateUse))
     case .surrogate:
-      return consumeScalarGC(.surrogate)
+      return consume(categoryScalarPredicate(.surrogate))
     case .lowercaseLetter:
-      return consumeScalarGC(.lowercaseLetter)
+      return consume(categoryScalarPredicate(.lowercaseLetter))
     case .modifierLetter:
-      return consumeScalarGC(.modifierLetter)
+      return consume(categoryScalarPredicate(.modifierLetter))
     case .otherLetter:
-      return consumeScalarGC(.otherLetter)
+      return consume(categoryScalarPredicate(.otherLetter))
     case .titlecaseLetter:
-      return consumeScalarGC(.titlecaseLetter)
+      return consume(categoryScalarPredicate(.titlecaseLetter))
     case .uppercaseLetter:
-      return consumeScalarGC(.uppercaseLetter)
+      return consume(categoryScalarPredicate(.uppercaseLetter))
     case .spacingMark:
-      return consumeScalarGC(.spacingMark)
+      return consume(categoryScalarPredicate(.spacingMark))
     case .enclosingMark:
-      return consumeScalarGC(.enclosingMark)
+      return consume(categoryScalarPredicate(.enclosingMark))
     case .nonspacingMark:
-      return consumeScalarGC(.nonspacingMark)
+      return consume(categoryScalarPredicate(.nonspacingMark))
     case .decimalNumber:
-      return consumeScalarGC(.decimalNumber)
+      return consume(categoryScalarPredicate(.decimalNumber))
     case .letterNumber:
-      return consumeScalarGC(.letterNumber)
+      return consume(categoryScalarPredicate(.letterNumber))
     case .otherNumber:
-      return consumeScalarGC(.otherNumber)
+      return consume(categoryScalarPredicate(.otherNumber))
     case .connectorPunctuation:
-      return consumeScalarGC(.connectorPunctuation)
+      return consume(categoryScalarPredicate(.connectorPunctuation))
     case .dashPunctuation:
-      return consumeScalarGC(.dashPunctuation)
+      return consume(categoryScalarPredicate(.dashPunctuation))
     case .closePunctuation:
-      return consumeScalarGC(.closePunctuation)
+      return consume(categoryScalarPredicate(.closePunctuation))
     case .finalPunctuation:
-      return consumeScalarGC(.finalPunctuation)
+      return consume(categoryScalarPredicate(.finalPunctuation))
     case .initialPunctuation:
-      return consumeScalarGC(.initialPunctuation)
+      return consume(categoryScalarPredicate(.initialPunctuation))
     case .otherPunctuation:
-      return consumeScalarGC(.otherPunctuation)
+      return consume(categoryScalarPredicate(.otherPunctuation))
     case .openPunctuation:
-      return consumeScalarGC(.openPunctuation)
+      return consume(categoryScalarPredicate(.openPunctuation))
     case .currencySymbol:
-      return consumeScalarGC(.currencySymbol)
+      return consume(categoryScalarPredicate(.currencySymbol))
     case .modifierSymbol:
-      return consumeScalarGC(.modifierSymbol)
+      return consume(categoryScalarPredicate(.modifierSymbol))
     case .mathSymbol:
-      return consumeScalarGC(.mathSymbol)
+      return consume(categoryScalarPredicate(.mathSymbol))
     case .otherSymbol:
-      return consumeScalarGC(.otherSymbol)
+      return consume(categoryScalarPredicate(.otherSymbol))
     case .lineSeparator:
-      return consumeScalarGC(.lineSeparator)
+      return consume(categoryScalarPredicate(.lineSeparator))
     case .paragraphSeparator:
-      return consumeScalarGC(.paragraphSeparator)
+      return consume(categoryScalarPredicate(.paragraphSeparator))
     case .spaceSeparator:
-      return consumeScalarGC(.spaceSeparator)
+      return consume(categoryScalarPredicate(.spaceSeparator))
     }
   }
 }
diff --git a/Sources/_StringProcessing/MatchingOptions.swift b/Sources/_StringProcessing/MatchingOptions.swift
index 665715a60..f5c554bdc 100644
--- a/Sources/_StringProcessing/MatchingOptions.swift
+++ b/Sources/_StringProcessing/MatchingOptions.swift
@@ -117,7 +117,6 @@ extension MatchingOptions {
 
 // Deprecated CharacterClass.MatchLevel API
 extension MatchingOptions {
-  @available(*, deprecated)
   var matchLevel: _CharacterClassModel.MatchLevel {
     switch semanticLevel {
     case .graphemeCluster:
@@ -135,7 +134,7 @@ extension MatchingOptions {
     case caseInsensitive
     case allowDuplicateGroupNames
     case multiline
-    case noAutoCapture
+    case namedCapturesOnly
     case singleLine
     case reluctantByDefault
 
@@ -174,8 +173,8 @@ extension MatchingOptions {
         self = .allowDuplicateGroupNames
       case .multiline:
         self = .multiline
-      case .noAutoCapture:
-        self = .noAutoCapture
+      case .namedCapturesOnly:
+        self = .namedCapturesOnly
       case .singleLine:
         self = .singleLine
       case .reluctantByDefault:
diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift
index 1b5c2a4c5..601447968 100644
--- a/Sources/_StringProcessing/PrintAsPattern.swift
+++ b/Sources/_StringProcessing/PrintAsPattern.swift
@@ -671,13 +671,19 @@ extension AST.Atom {
   }
   
   var _dslBase: String {
+    func scalarLiteral(_ s: UnicodeScalar) -> String {
+      let hex = String(s.value, radix: 16, uppercase: true)
+      return "\\u{\(hex)}"
+    }
     switch kind {
     case let .char(c):
       return String(c)
 
     case let .scalar(s):
-      let hex = String(s.value, radix: 16, uppercase: true)
-      return "\\u{\(hex)}"
+      return scalarLiteral(s.value)
+
+    case let .scalarSequence(seq):
+      return seq.scalarValues.map(scalarLiteral).joined()
 
     case let .property(p):
       return p._dslBase
@@ -769,13 +775,9 @@ extension AST.Atom {
   
   var _regexBase: String {
     switch kind {
-    case let .char(c):
-      return String(c)
-      
-    case let .scalar(s):
-      let hex = String(s.value, radix: 16, uppercase: true)
-      return "\\u{\(hex)}"
-      
+    case .char, .scalar, .scalarSequence:
+      return literalStringValue!
+
     case let .property(p):
       return p._regexBase
       
diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift
index ef98a7b8f..79a515033 100644
--- a/Sources/_StringProcessing/Regex/ASTConversion.swift
+++ b/Sources/_StringProcessing/Regex/ASTConversion.swift
@@ -60,18 +60,24 @@ extension AST.Node {
           var result = ""
           var idx = idx
           while idx < astChildren.endIndex {
-            let atom: AST.Atom? = astChildren[idx].as()
+            guard let atom: AST.Atom = astChildren[idx].as() else { break }
 
             // TODO: For printing, nice to coalesce
             // scalars literals too. We likely need a different
             // approach even before we have a better IR.
-            guard let char = atom?.singleCharacter else {
+            if let char = atom.singleCharacter  {
+              result.append(char)
+            } else if let scalar = atom.singleScalar {
+              result.append(Character(scalar))
+            } else if case .scalarSequence(let seq) = atom.kind {
+              result += seq.scalarValues.map(Character.init)
+            } else {
               break
             }
-            result.append(char)
+            
             astChildren.formIndex(after: &idx)
           }
-          return result.count <= 1 ? nil : (idx, result)
+          return result.isEmpty ? nil : (idx, result)
         }
 
         // No need to nest single children concatenations
@@ -96,7 +102,7 @@ extension AST.Node {
             curIdx = nextIdx
           } else {
             children.append(astChildren[curIdx].dslTreeNode)
-            children.formIndex(after: &curIdx)
+            astChildren.formIndex(after: &curIdx)
           }
         }
         return .concatenation(children)
@@ -132,7 +138,15 @@ extension AST.Node {
         return .trivia(v.contents)
 
       case let .atom(v):
-        return .atom(v.dslTreeAtom)
+        switch v.kind {
+        case .scalarSequence(let seq):
+          // Scalar sequences are splatted into concatenated scalars, which
+          // becomes a quoted literal. Sequences nested in concatenations have
+          // already been coalesced, this just handles the lone atom case.
+          return .quotedLiteral(String(seq.scalarValues.map(Character.init)))
+        default:
+          return .atom(v.dslTreeAtom)
+        }
 
       case let .customCharacterClass(ccc):
         return .customCharacterClass(ccc.dslTreeClass)
@@ -207,7 +221,7 @@ extension AST.Atom {
 
     switch self.kind {
     case let .char(c):                    return .char(c)
-    case let .scalar(s):                  return .scalar(s)
+    case let .scalar(s):                  return .char(Character(s.value))
     case .any:                            return .any
     case let .backreference(r):           return .backreference(.init(ast: r))
     case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq))
diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift
index 23222da00..6dd8e17b6 100644
--- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift
+++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift
@@ -17,7 +17,7 @@ extension Regex where Output == AnyRegexOutput {
   ///
   /// - Parameter pattern: The regular expression.
   public init(_ pattern: String) throws {
-    self.init(ast: try parse(pattern, .traditional))
+    self.init(ast: try parse(pattern, .semantic, .traditional))
   }
 }
 
@@ -31,7 +31,7 @@ extension Regex {
     _ pattern: String,
     as: Output.Type = Output.self
   ) throws {
-    self.init(ast: try parse(pattern, .traditional))
+    self.init(ast: try parse(pattern, .semantic, .traditional))
   }
 }
 
@@ -62,6 +62,7 @@ public struct AnyRegexOutput {
     /// The depth of `Optioals`s wrapping the underlying value. For example,
     /// `Substring` has optional depth `0`, and `Int??` has optional depth `2`.
     let optionalDepth: Int
+
     /// The bounds of the output element.
     let bounds: Range<String.Index>?
   }
@@ -90,7 +91,7 @@ extension AnyRegexOutput {
   /// - Parameter type: The expected output type.
   /// - Returns: The output, if the underlying value can be converted to the
   ///   output type; otherwise `nil`.
-  public func `as`<Output>(_ type: Output.Type) -> Output? {
+  public func `as`<Output>(_ type: Output.Type = Output.self) -> Output? {
     let elements = _elements.map {
       StructuredCapture(
         optionalCount: $0.optionalDepth,
@@ -206,23 +207,30 @@ extension Regex.Match where Output == AnyRegexOutput {
   /// - Parameter type: The expected output type.
   /// - Returns: A match generic over the output type, if the underlying values
   ///   can be converted to the output type; otherwise, `nil`.
-  public func `as`<Output>(_ type: Output.Type) -> Regex<Output>.Match? {
+  public func `as`<Output>(
+    _ type: Output.Type = Output.self
+  ) -> Regex<Output>.Match? {
     fatalError("FIXME: Not implemented")
   }
 }
 
 @available(SwiftStdlib 5.7, *)
-extension Regex where Output == AnyRegexOutput {
+extension Regex {
   /// Returns whether a named-capture with `name` exists
   public func contains(captureNamed name: String) -> Bool {
-    fatalError("FIXME: not implemented")
+    program.tree.root._captureList.captures.contains(where: {
+      $0.name == name
+    })
   }
+}
 
+@available(SwiftStdlib 5.7, *)
+extension Regex where Output == AnyRegexOutput {
   /// Creates a type-erased regex from an existing regex.
   ///
   /// Use this initializer to fit a regex with strongly typed captures into the
   /// use site of a dynamic regex, i.e. one that was created from a string.
-  public init<Output>(_ match: Regex<Output>) {
+  public init<Output>(_ regex: Regex<Output>) {
     fatalError("FIXME: Not implemented")
   }
 
@@ -231,7 +239,9 @@ extension Regex where Output == AnyRegexOutput {
   /// - Parameter type: The expected output type.
   /// - Returns: A regex generic over the output type if the underlying types can be converted.
   ///   Returns `nil` otherwise.
-  public func `as`<Output>(_ type: Output.Type) -> Regex<Output>? {
+  public func `as`<Output>(
+    _ type: Output.Type = Output.self
+  ) -> Regex<Output>? {
     fatalError("FIXME: Not implemented")
   }
 }
diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift
index 1f9a35dad..29d2267b2 100644
--- a/Sources/_StringProcessing/Regex/Core.swift
+++ b/Sources/_StringProcessing/Regex/Core.swift
@@ -44,7 +44,7 @@ public struct Regex<Output>: RegexComponent {
   // Compiler interface. Do not change independently.
   @usableFromInline
   init(_regexString pattern: String) {
-    self.init(ast: try! parse(pattern, .traditional))
+    self.init(ast: try! parse(pattern, .semantic, .traditional))
   }
 
   // Compiler interface. Do not change independently.
@@ -53,7 +53,7 @@ public struct Regex<Output>: RegexComponent {
     assert(version == currentRegexLiteralFormatVersion)
     // The version argument is passed by the compiler using the value defined
     // in libswiftParseRegexLiteral.
-    self.init(ast: try! parseWithDelimiters(pattern))
+    self.init(ast: try! parseWithDelimiters(pattern, .semantic))
   }
 
   public var regex: Regex<Output> {
diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift
index b279c08e4..ff057f2ee 100644
--- a/Sources/_StringProcessing/Regex/DSLTree.swift
+++ b/Sources/_StringProcessing/Regex/DSLTree.swift
@@ -472,7 +472,7 @@ extension DSLTree.Node {
       list.append(.init(
         name: name,
         type: child.valueCaptureType?.base,
-        optionalDepth: nesting))
+        optionalDepth: nesting, .fake))
       child._addCaptures(to: &list, optionalNesting: nesting)
 
     case let .nonCapturingGroup(kind, child):
diff --git a/Sources/_StringProcessing/Unicode/CharacterProps.swift b/Sources/_StringProcessing/Unicode/CharacterProps.swift
index cfa68c425..80f6819a6 100644
--- a/Sources/_StringProcessing/Unicode/CharacterProps.swift
+++ b/Sources/_StringProcessing/Unicode/CharacterProps.swift
@@ -12,3 +12,9 @@
 
 // TODO
 
+extension Character {
+  /// Whether this character is made up of exactly one Unicode scalar value.
+  var hasExactlyOneScalar: Bool {
+    unicodeScalars.index(after: unicodeScalars.startIndex) == unicodeScalars.endIndex
+  }
+}
diff --git a/Sources/_StringProcessing/Unicode/ScalarProps.swift b/Sources/_StringProcessing/Unicode/ScalarProps.swift
index 52a870357..0894fa572 100644
--- a/Sources/_StringProcessing/Unicode/ScalarProps.swift
+++ b/Sources/_StringProcessing/Unicode/ScalarProps.swift
@@ -46,3 +46,19 @@ extension Unicode.Script {
     return result
   }
 }
+
+extension UnicodeScalar {
+  var isHorizontalWhitespace: Bool {
+    value == 0x09 || properties.generalCategory == .spaceSeparator
+  }
+  
+  var isNewline: Bool {
+    switch value {
+      case 0x000A...0x000D /* LF ... CR */: return true
+      case 0x0085 /* NEXT LINE (NEL) */: return true
+      case 0x2028 /* LINE SEPARATOR */: return true
+      case 0x2029 /* PARAGRAPH SEPARATOR */: return true
+      default: return false
+    }
+  }
+}
diff --git a/Sources/_StringProcessing/Utility/ASTBuilder.swift b/Sources/_StringProcessing/Utility/ASTBuilder.swift
index 51d4f8bfc..78477e2b5 100644
--- a/Sources/_StringProcessing/Utility/ASTBuilder.swift
+++ b/Sources/_StringProcessing/Utility/ASTBuilder.swift
@@ -338,10 +338,26 @@ func escaped(
   atom(.escaped(e))
 }
 func scalar(_ s: Unicode.Scalar) -> AST.Node {
-  atom(.scalar(s))
+  .atom(scalar_a(s))
+}
+func scalar_a(_ s: Unicode.Scalar) -> AST.Atom {
+  atom_a(.scalar(.init(s, .fake)))
 }
 func scalar_m(_ s: Unicode.Scalar) -> AST.CustomCharacterClass.Member {
-  atom_m(.scalar(s))
+  .atom(scalar_a(s))
+}
+
+func scalarSeq(_ s: Unicode.Scalar...) -> AST.Node {
+  .atom(scalarSeq_a(s))
+}
+func scalarSeq_a(_ s: Unicode.Scalar...) -> AST.Atom {
+  scalarSeq_a(s)
+}
+func scalarSeq_a(_ s: [Unicode.Scalar]) -> AST.Atom {
+  atom_a(.scalarSequence(.init(s.map { .init($0, .fake) }, trivia: [])))
+}
+func scalarSeq_m(_ s: Unicode.Scalar...) -> AST.CustomCharacterClass.Member {
+  .atom(scalarSeq_a(s))
 }
 
 func backreference(_ r: AST.Reference.Kind, recursionLevel: Int? = nil) -> AST.Node {
diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift
index 4d0c12c1f..85dd1ca37 100644
--- a/Sources/_StringProcessing/_CharacterClassModel.swift
+++ b/Sources/_StringProcessing/_CharacterClassModel.swift
@@ -178,15 +178,18 @@ public struct _CharacterClassModel: Hashable {
         matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits)
       case .hexDigit:
         matched = c.isHexDigit && (c.isASCII || !options.usesASCIIDigits)
-      case .horizontalWhitespace: fatalError("Not implemented")
-      case .newlineSequence:
-        matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces)
-      case .verticalWhitespace: fatalError("Not implemented")
+      case .horizontalWhitespace:
+        matched = c.unicodeScalars.first?.isHorizontalWhitespace == true
+          && (c.isASCII || !options.usesASCIISpaces)
+      case .newlineSequence, .verticalWhitespace:
+        matched = c.unicodeScalars.first?.isNewline == true
+          && (c.isASCII || !options.usesASCIISpaces)
       case .whitespace:
         matched = c.isWhitespace && (c.isASCII || !options.usesASCIISpaces)
       case .word:
         matched = c.isWordCharacter && (c.isASCII || !options.usesASCIIWord)
-      case .custom(let set): matched = set.any { $0.matches(c, with: options) }
+      case .custom(let set):
+        matched = set.any { $0.matches(c, with: options) }
       }
       if isInverted {
         matched.toggle()
@@ -194,28 +197,38 @@ public struct _CharacterClassModel: Hashable {
       return matched ? next : nil
     case .unicodeScalar:
       let c = str.unicodeScalars[i]
+      var nextIndex = str.unicodeScalars.index(after: i)
       var matched: Bool
       switch cc {
       case .any: matched = true
       case .anyScalar: matched = true
-      case .anyGrapheme: fatalError("Not matched in this mode")
+      case .anyGrapheme:
+        matched = true
+        nextIndex = str.index(after: i)
       case .digit:
         matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits)
       case .hexDigit:
         matched = Character(c).isHexDigit && (c.isASCII || !options.usesASCIIDigits)
-      case .horizontalWhitespace: fatalError("Not implemented")
-      case .newlineSequence: fatalError("Not implemented")
-      case .verticalWhitespace: fatalError("Not implemented")
+      case .horizontalWhitespace:
+        matched = c.isHorizontalWhitespace && (c.isASCII || !options.usesASCIISpaces)
+      case .verticalWhitespace:
+        matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces)
+      case .newlineSequence:
+        matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces)
+        if c == "\r" && nextIndex != str.endIndex && str.unicodeScalars[nextIndex] == "\n" {
+          str.unicodeScalars.formIndex(after: &nextIndex)
+        }
       case .whitespace:
         matched = c.properties.isWhitespace && (c.isASCII || !options.usesASCIISpaces)
       case .word:
         matched = (c.properties.isAlphabetic || c == "_") && (c.isASCII || !options.usesASCIIWord)
-      case .custom: fatalError("Not supported")
+      case .custom(let set):
+        matched = set.any { $0.matches(Character(c), with: options) }
       }
       if isInverted {
         matched.toggle()
       }
-      return matched ? str.unicodeScalars.index(after: i) : nil
+      return matched ? nextIndex : nil
     }
   }
 }
@@ -451,9 +464,13 @@ extension AST.Atom.EscapedBuiltin {
     case .notHorizontalWhitespace:
       return .horizontalWhitespace.inverted
 
-    case .notNewline: return .newlineSequence.inverted
     case .newlineSequence: return .newlineSequence
 
+    // FIXME: This is more like '.' than inverted '\R', as it is affected
+    // by e.g (*CR). We should therefore really be emitting it through
+    // emitAny(). For now we treat it as semantically invalid.
+    case .notNewline: return .newlineSequence.inverted
+
     case .whitespace:    return .whitespace
     case .notWhitespace: return .whitespace.inverted
 
diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index 4e08ea103..c0c6491ac 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -115,7 +115,7 @@ class RegexDSLTests: XCTestCase {
     {
       let disallowedChars = CharacterClass.hexDigit
         .symmetricDifference("a"..."z")
-      Lookahead(disallowedChars, negative: true)      // No: 0-9 + g-z
+      NegativeLookahead(disallowedChars)      // No: 0-9 + g-z
 
       OneOrMore(("b"..."g").union("d"..."n"))         // b-n
       
@@ -487,7 +487,7 @@ class RegexDSLTests: XCTestCase {
     {
       OneOrMore("a")
       Lookahead(CharacterClass.digit)
-      Lookahead("2", negative: true)
+      NegativeLookahead { "2" }
       CharacterClass.word
     }
   }
@@ -742,43 +742,6 @@ class RegexDSLTests: XCTestCase {
     }
   }
 
-  func testDynamicCaptures() throws {
-    do {
-      let regex = try Regex("aabcc.")
-      let line = "aabccd"
-      let match = try XCTUnwrap(line.wholeMatch(of: regex))
-      XCTAssertEqual(match.0, line[...])
-      let output = match.output
-      XCTAssertEqual(output[0].substring, line[...])
-    }
-    do {
-      let regex = try Regex(
-          #"""
-          (?<lower>[0-9A-F]+)(?:\.\.(?<upper>[0-9A-F]+))?\s+;\s+(?<desc>\w+).*
-          """#)
-      let line = """
-        A6F0..A6F1    ; Extend # Mn   [2] BAMUM COMBINING MARK KOQNDON..BAMUM \
-        COMBINING MARK TUKWENTIS
-        """
-      let match = try XCTUnwrap(line.wholeMatch(of: regex))
-      XCTAssertEqual(match.0, line[...])
-      let output = match.output
-      XCTAssertEqual(output[0].substring, line[...])
-      XCTAssertTrue(output[1].substring == "A6F0")
-      XCTAssertTrue(output["lower"]?.substring == "A6F0")
-      XCTAssertTrue(output[2].substring == "A6F1")
-      XCTAssertTrue(output["upper"]?.substring == "A6F1")
-      XCTAssertTrue(output[3].substring == "Extend")
-      XCTAssertTrue(output["desc"]?.substring == "Extend")
-      let typedOutput = try XCTUnwrap(output.as(
-        (Substring, lower: Substring, upper: Substring?, Substring).self))
-      XCTAssertEqual(typedOutput.0, line[...])
-      XCTAssertTrue(typedOutput.lower == "A6F0")
-      XCTAssertTrue(typedOutput.upper == "A6F1")
-      XCTAssertTrue(typedOutput.3 == "Extend")
-    }
-  }
-
   func testBackreference() throws {
     try _testDSLCaptures(
       ("abc#41#42abcabcabc", ("abc#41#42abcabcabc", "abc", 42, "abc", nil)),
@@ -889,6 +852,54 @@ class RegexDSLTests: XCTestCase {
         }
       }
     }
+
+    // Post-hoc captured reference w/ attempted match before capture
+    // #"(?:\w\1|(\w):)+"#
+    //
+    // This tests that the reference `a` simply fails to match instead of
+    // erroring when encountered before a match is captured into `a`. The
+    // matching process here goes like this:
+    //  - the first time through, the first alternation is taken
+    //    - `.word` matches on "a"
+    //    - the `a` backreference fails on ":", because `a` hasn't matched yet
+    //    - backtrack to the beginning of the input
+    //  - now the second alternation is taken
+    //    - `.word` matches on "a" and is captured as `a`
+    //    - the literal ":" matches
+    //  - proceeding from the position of the first "b" in the first alternation
+    //    - `.word` matches on "b"
+    //    - the `a` backreference now contains "a", and matches on "a"
+    //  - proceeding from the position of the first "c" in the first alternation
+    //    - `.word` matches on "c"
+    //    - the `a` backreference still contains "a", and matches on "a"
+    //  - proceeding from the position of the first "o" in the first alternation
+    //    - `.word` matches on "o"
+    //    - the `a` backreference still contains "a", so it fails on ":"
+    //  - now the second alternation is taken
+    //    - `.word` matches on "o" and is captured as `a`
+    //    - the literal ":" matches
+    //  - continuing as above from the second "b"...
+    try _testDSLCaptures(
+      ("a:bacao:boco", ("a:bacao:boco", "o")),
+      matchType: (Substring, Substring?).self,
+      ==
+    ) {
+      // NOTE: "expression too complex to type check" when inferring the generic
+      // parameter.
+      OneOrMore {
+        let a = Reference(Substring.self)
+        ChoiceOf<(Substring, Substring?)> {
+          Regex {
+            .word
+            a
+          }
+          Regex {
+            Capture(.word, as: a)
+            ":"
+          }
+        }
+      }
+    }
   }
   
   func testSemanticVersionExample() {
diff --git a/Tests/RegexTests/AnyRegexOutputTests.swift b/Tests/RegexTests/AnyRegexOutputTests.swift
new file mode 100644
index 000000000..8d91c0ec8
--- /dev/null
+++ b/Tests/RegexTests/AnyRegexOutputTests.swift
@@ -0,0 +1,157 @@
+
+import _StringProcessing
+import XCTest
+
+// Test that our existential capture and concrete captures are
+// the same
+private func checkSame(
+  _ aro: AnyRegexOutput,
+  _ concrete: (Substring, fieldA: Substring, fieldB: Substring)
+) {
+  XCTAssertEqual(aro[0].substring, concrete.0)
+
+  XCTAssertEqual(aro["fieldA"]!.substring, concrete.1)
+  XCTAssertEqual(aro["fieldA"]!.substring, concrete.fieldA)
+
+  XCTAssertEqual(aro[1].substring, concrete.1)
+
+  XCTAssertEqual(aro["fieldB"]!.substring, concrete.2)
+  XCTAssertEqual(aro["fieldB"]!.substring, concrete.fieldB)
+
+  XCTAssertEqual(aro[2].substring, concrete.2)
+
+}
+private func checkSame(
+  _ aro: Regex<AnyRegexOutput>.Match,
+  _ concrete: Regex<(Substring, fieldA: Substring, fieldB: Substring)>.Match
+) {
+  checkSame(aro.output, concrete.output)
+
+  XCTAssertEqual(aro.0, concrete.0)
+  XCTAssertEqual(aro[0].substring, concrete.0)
+
+  XCTAssertEqual(aro["fieldA"]!.substring, concrete.1)
+  XCTAssertEqual(aro["fieldA"]!.substring, concrete.fieldA)
+  XCTAssertEqual(aro[1].substring, concrete.1)
+
+  XCTAssertEqual(aro["fieldB"]!.substring, concrete.2)
+  XCTAssertEqual(aro["fieldB"]!.substring, concrete.fieldB)
+  XCTAssertEqual(aro[2].substring, concrete.2)
+}
+private func checkSame(
+  _ aro: Regex<AnyRegexOutput>,
+  _ concrete: Regex<(Substring, fieldA: Substring, fieldB: Substring)>
+) {
+  XCTAssertEqual(
+    aro.contains(captureNamed: "fieldA"),
+    concrete.contains(captureNamed: "fieldA"))
+  XCTAssertEqual(
+    aro.contains(captureNamed: "fieldB"),
+    concrete.contains(captureNamed: "fieldB"))
+  XCTAssertEqual(
+    aro.contains(captureNamed: "notAField"),
+    concrete.contains(captureNamed: "notAField"))
+}
+
+extension RegexTests {
+  func testAnyRegexOutput() {
+    let regex = try! Regex(#"""
+    (?x)
+    (?<fieldA> [^,]*)
+    ,
+    (?<fieldB> [^,]*)
+    """#)
+
+    let match = "abc,def".wholeMatch(of: regex)!
+    XCTAssertEqual(match.0, "abc,def")
+    XCTAssertEqual(match[0].substring, "abc,def")
+
+    XCTAssertEqual(match["fieldA"]!.substring, "abc")
+    XCTAssertEqual(match.output["fieldA"]!.substring, "abc")
+    XCTAssertEqual(match[1].substring, "abc")
+
+    XCTAssertEqual(match["fieldB"]!.substring, "def")
+    XCTAssertEqual(match.output["fieldB"]!.substring, "def")
+    XCTAssertEqual(match[2].substring, "def")
+
+    XCTAssertNil(match["notACapture"])
+    XCTAssertNil(match.output["notACapture"])
+    XCTAssertEqual(match.count, 3)
+
+    XCTAssert(regex.contains(captureNamed: "fieldA"))
+    XCTAssert(regex.contains(captureNamed: "fieldB"))
+    XCTAssertFalse(regex.contains(captureNamed: "notAField"))
+
+    // MARK: Check equivalence with concrete
+
+    let regexConcrete:
+      Regex<(Substring, fieldA: Substring, fieldB: Substring)>
+    = try! Regex(#"""
+    (?x)
+    (?<fieldA> [^,]*)
+    ,
+    (?<fieldB> [^,]*)
+    """#)
+    checkSame(regex, regexConcrete)
+
+    let matchConcrete = "abc,def".wholeMatch(of: regexConcrete)!
+    checkSame(match, matchConcrete)
+
+    let output = match.output
+    let concreteOutput = matchConcrete.output
+    checkSame(output, concreteOutput)
+
+    // TODO: ARO init from concrete match tuple
+
+    let concreteOutputCasted = output.as(
+      (Substring, fieldA: Substring, fieldB: Substring).self
+    )!
+    checkSame(output, concreteOutputCasted)
+
+    var concreteOutputCopy = concreteOutput
+    concreteOutputCopy = output.as()!
+    checkSame(output, concreteOutputCopy)
+
+    // TODO: Regex<ARO>.Match: init from tuple match and as to tuple match
+
+    // TODO: Regex<ARO>: init from tuple regex and as cast to tuple regex
+
+  }
+
+  func testDynamicCaptures() throws {
+    do {
+      let regex = try Regex("aabcc.")
+      let line = "aabccd"
+      let match = try XCTUnwrap(line.wholeMatch(of: regex))
+      XCTAssertEqual(match.0, line[...])
+      let output = match.output
+      XCTAssertEqual(output[0].substring, line[...])
+    }
+    do {
+      let regex = try Regex(
+          #"""
+          (?<lower>[0-9A-F]+)(?:\.\.(?<upper>[0-9A-F]+))?\s+;\s+(?<desc>\w+).*
+          """#)
+      let line = """
+        A6F0..A6F1    ; Extend # Mn   [2] BAMUM COMBINING MARK KOQNDON..BAMUM \
+        COMBINING MARK TUKWENTIS
+        """
+      let match = try XCTUnwrap(line.wholeMatch(of: regex))
+      XCTAssertEqual(match.0, line[...])
+      let output = match.output
+      XCTAssertEqual(output[0].substring, line[...])
+      XCTAssertTrue(output[1].substring == "A6F0")
+      XCTAssertTrue(output["lower"]?.substring == "A6F0")
+      XCTAssertTrue(output[2].substring == "A6F1")
+      XCTAssertTrue(output["upper"]?.substring == "A6F1")
+      XCTAssertTrue(output[3].substring == "Extend")
+      XCTAssertTrue(output["desc"]?.substring == "Extend")
+      let typedOutput = try XCTUnwrap(output.as(
+        (Substring, lower: Substring, upper: Substring?, Substring).self))
+      XCTAssertEqual(typedOutput.0, line[...])
+      XCTAssertTrue(typedOutput.lower == "A6F0")
+      XCTAssertTrue(typedOutput.upper == "A6F1")
+      XCTAssertTrue(typedOutput.3 == "Extend")
+    }
+  }
+}
diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift
index b48e1f0a5..9efbf2f76 100644
--- a/Tests/RegexTests/CaptureTests.swift
+++ b/Tests/RegexTests/CaptureTests.swift
@@ -16,36 +16,44 @@ import XCTest
 
 extension CaptureList.Capture {
   static var cap: Self {
-    return Self(optionalDepth: 0)
+    return Self(optionalDepth: 0, .fake)
   }
 
   static var opt: Self {
-    return Self(optionalDepth: 1)
+    return Self(optionalDepth: 1, .fake)
   }
   static var opt_opt: Self {
-    return Self(optionalDepth: 2)
+    return Self(optionalDepth: 2, .fake)
   }
   static var opt_opt_opt: Self {
-    return Self(optionalDepth: 3)
+    return Self(optionalDepth: 3, .fake)
   }
   static var opt_opt_opt_opt: Self {
-    return Self(optionalDepth: 4)
+    return Self(optionalDepth: 4, .fake)
   }
   static var opt_opt_opt_opt_opt: Self {
-    return Self(optionalDepth: 5)
+    return Self(optionalDepth: 5, .fake)
   }
   static var opt_opt_opt_opt_opt_opt: Self {
-    return Self(optionalDepth: 6)
+    return Self(optionalDepth: 6, .fake)
   }
 
-  static func named(_ name: String) -> Self {
-    return Self(name: name, optionalDepth: 0)
+  static func named(_ name: String, opt: Int = 0) -> Self {
+    return Self(name: name, optionalDepth: opt, .fake)
   }
 }
 extension CaptureList {
   static func caps(count: Int) -> Self {
     Self(Array(repeating: .cap, count: count))
   }
+
+  var withoutLocs: Self {
+    var copy = self
+    for idx in copy.captures.indices {
+      copy.captures[idx].location = .fake
+    }
+    return copy
+  }
 }
 
 extension StructuredCapture {
@@ -150,8 +158,8 @@ func captureTest(
   file: StaticString = #file,
   line: UInt = #line
 ) {
-  let ast = try! parse(regex, .traditional)
-  let capList = ast.root._captureList
+  let ast = try! parse(regex, .semantic, .traditional)
+  let capList = ast.root._captureList.withoutLocs
   guard capList == expected else {
     XCTFail("""
       Expected:
diff --git a/Tests/RegexTests/DiagnosticTests.swift b/Tests/RegexTests/DiagnosticTests.swift
index 428020b80..0100a3a86 100644
--- a/Tests/RegexTests/DiagnosticTests.swift
+++ b/Tests/RegexTests/DiagnosticTests.swift
@@ -20,7 +20,7 @@ extension RegexTests {
     XCTAssert(SourceLocation.fake.isFake)
     XCTAssert(group(.capture, "a").location.isFake)
 
-    let ast = try! parse("(a)", .traditional).root
+    let ast = try! parse("(a)", .semantic, .traditional).root
     XCTAssert(ast.location.isReal)
   }
 
@@ -31,7 +31,7 @@ extension RegexTests {
     //
     // Input should be a concatenation or alternation
     func flatTest(_ str: String, _ expected: [String]) {
-      guard let ast = try? parse(str, .traditional).root else {
+      guard let ast = try? parse(str, .semantic, .traditional).root else {
         XCTFail("Fail to parse: \(str)")
         return
       }
@@ -54,7 +54,7 @@ extension RegexTests {
 
     func renderTest(_ str: String, _ expected: [String]) {
       let lines = try! parse(
-        str, .traditional
+        str, .semantic, .traditional
       )._render(in: str)
       func fail() {
         XCTFail("""
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index 345e80e22..36056e85a 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -169,6 +169,8 @@ func firstMatchTest(
       XCTAssertEqual(found, match, file: file, line: line)
     }
   } catch {
+    // FIXME: This allows non-matches to succeed even when xfail'd
+    // When xfail == true, this should report failure for match == nil
     if !xfail && match != nil {
       XCTFail("\(error)", file: file, line: line)
     }
@@ -182,7 +184,9 @@ func firstMatchTests(
   syntax: SyntaxOptions = .traditional,
   enableTracing: Bool = false,
   dumpAST: Bool = false,
-  xfail: Bool = false
+  xfail: Bool = false,
+  file: StaticString = #filePath,
+  line: UInt = #line
 ) {
   for (input, match) in tests {
     firstMatchTest(
@@ -192,7 +196,9 @@ func firstMatchTests(
       syntax: syntax,
       enableTracing: enableTracing,
       dumpAST: dumpAST,
-      xfail: xfail)
+      xfail: xfail,
+      file: file,
+      line: line)
   }
 }
 
@@ -279,7 +285,20 @@ extension RegexTests {
     firstMatchTest(#"\0707"#, input: "12387\u{1C7}xyz", match: "\u{1C7}")
 
     // code point sequence
-    firstMatchTest(#"\u{61 62 63}"#, input: "123abcxyz", match: "abc", xfail: true)
+    firstMatchTest(#"\u{61 62 63}"#, input: "123abcxyz", match: "abc")
+    firstMatchTest(#"3\u{  61  62 63 }"#, input: "123abcxyz", match: "3abc")
+    firstMatchTest(#"\u{61 62}\u{63}"#, input: "123abcxyz", match: "abc")
+    firstMatchTest(#"\u{61}\u{62 63}"#, input: "123abcxyz", match: "abc")
+    firstMatchTest(#"9|\u{61 62 63}"#, input: "123abcxyz", match: "abc")
+    firstMatchTest(#"(?:\u{61 62 63})"#, input: "123abcxyz", match: "abc")
+    firstMatchTest(#"23\u{61 62 63}xy"#, input: "123abcxyz", match: "23abcxy")
+
+    // o + horn + dot_below
+    firstMatchTest(
+      #"\u{006f 031b 0323}"#,
+      input: "\u{006f}\u{031b}\u{0323}",
+      match: "\u{006f}\u{031b}\u{0323}"
+    )
 
     // Escape sequences that represent scalar values.
     firstMatchTest(#"\a[\b]\e\f\n\r\t"#,
@@ -400,7 +419,8 @@ extension RegexTests {
       "a++a",
       ("babc", nil),
       ("baaabc", nil),
-      ("bb", nil))
+      ("bb", nil),
+      xfail: true)
     firstMatchTests(
       "a+?a",
       ("babc", nil),
@@ -462,15 +482,11 @@ extension RegexTests {
       "a{2,4}+a",
       ("babc", nil),
       ("baabc", nil),
-      ("baaabc", nil),
       ("baaaaabc", "aaaaa"),
       ("baaaaaaaabc", "aaaaa"),
       ("bb", nil))
     firstMatchTests(
       "a{,4}+a",
-      ("babc", nil),
-      ("baabc", nil),
-      ("baaabc", nil),
       ("baaaaabc", "aaaaa"),
       ("baaaaaaaabc", "aaaaa"),
       ("bb", nil))
@@ -478,11 +494,44 @@ extension RegexTests {
       "a{2,}+a",
       ("babc", nil),
       ("baabc", nil),
+      ("bb", nil))
+    
+    // XFAIL'd versions of the above
+    firstMatchTests(
+      "a{2,4}+a",
+      ("baaabc", nil),
+      xfail: true)
+    firstMatchTests(
+      "a{,4}+a",
+      ("babc", nil),
+      ("baabc", nil),
+      ("baaabc", nil),
+      xfail: true)
+    firstMatchTests(
+      "a{2,}+a",
       ("baaabc", nil),
       ("baaaaabc", nil),
       ("baaaaaaaabc", nil),
-      ("bb", nil))
+      xfail: true)
 
+    // XFAIL'd possessive tests
+    firstMatchTests(
+      "a?+a",
+      ("a", nil),
+      xfail: true)
+    firstMatchTests(
+      "(a|a)?+a",
+      ("a", nil),
+      xfail: true)
+    firstMatchTests(
+      "(a|a){2,4}+a",
+      ("a", nil),
+      ("aa", nil))
+    firstMatchTests(
+      "(a|a){2,4}+a",
+      ("aaa", nil),
+      ("aaaa", nil),
+      xfail: true)
 
     firstMatchTests(
       "(?:a{2,4}?b)+",
@@ -681,7 +730,7 @@ extension RegexTests {
     firstMatchTest(
       #"\N{ASTERISK}+"#, input: "123***xyz", match: "***")
     firstMatchTest(
-      #"\N {2}"#, input: "123  xyz", match: "3  ")
+      #"\N {2}"#, input: "123  xyz", match: "3  ", xfail: true)
 
     firstMatchTest(#"\N{U+2C}"#, input: "123,xyz", match: ",")
     firstMatchTest(#"\N{U+1F4BF}"#, input: "123💿xyz", match: "💿")
@@ -693,6 +742,14 @@ extension RegexTests {
     firstMatchTest(#"\p{gc=L}"#, input: "123abcXYZ", match: "a")
     firstMatchTest(#"\p{Lu}"#, input: "123abcXYZ", match: "X")
 
+    // U+0374 GREEK NUMERAL SIGN (Lm)
+    // U+00AA FEMININE ORDINAL INDICATOR (Lo)
+    firstMatchTest(#"\p{L}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "\u{0374}")
+    firstMatchTest(#"\p{Lc}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "a")
+    firstMatchTest(#"\p{Lc}"#, input: "\u{0374}\u{00AA}123XYZ", match: "X")
+    firstMatchTest(#"\p{L&}"#, input: "\u{0374}\u{00AA}123abcXYZ", match: "a")
+    firstMatchTest(#"\p{L&}"#, input: "\u{0374}\u{00AA}123XYZ", match: "X")
+
     firstMatchTest(
       #"\P{Cc}"#, input: "\n\n\nXYZ", match: "X")
     firstMatchTest(
@@ -938,15 +995,19 @@ extension RegexTests {
 
     // TODO: Oniguruma \y and \Y
     firstMatchTests(
-      #"\u{65}"#,             // Scalar 'e' is present in both:
-      ("Cafe\u{301}", "e"),   // composed and
-      ("Sol Cafe", "e"))      // standalone
+      #"\u{65}"#,             // Scalar 'e' is present in both
+      ("Cafe\u{301}", nil),   // but scalar mode requires boundary at end of match
+      xfail: true)
+    firstMatchTests(
+      #"\u{65}"#,             // Scalar 'e' is present in both
+      ("Sol Cafe", "e"))      // standalone is okay
+
     firstMatchTests(
       #"\u{65}\y"#,           // Grapheme boundary assertion
       ("Cafe\u{301}", nil),
       ("Sol Cafe", "e"))
     firstMatchTests(
-      #"\u{65}\Y"#,           // Grapheme non-boundary assertion
+      #"(?u)\u{65}\Y"#,       // Grapheme non-boundary assertion
       ("Cafe\u{301}", "e"),
       ("Sol Cafe", nil))
   }
@@ -966,7 +1027,7 @@ extension RegexTests {
     firstMatchTest(
       #"a(?:b)c"#, input: "123abcxyz", match: "abc")
     firstMatchTest(
-      "(?|(a)|(b)|(c))", input: "123abcxyz", match: "a")
+      "(?|(a)|(b)|(c))", input: "123abcxyz", match: "a", xfail: true)
 
     firstMatchTest(
       #"(?:a|.b)c"#, input: "123abcacxyz", match: "abc")
@@ -1082,6 +1143,8 @@ extension RegexTests {
     firstMatchTest(#"(.)(.)\g-02"#, input: "abac", match: "aba", xfail: true)
     firstMatchTest(#"(?<a>.)(.)\k<a>"#, input: "abac", match: "aba", xfail: true)
     firstMatchTest(#"\g'+2'(.)(.)"#, input: "abac", match: "aba", xfail: true)
+
+    firstMatchTest(#"\1(.)"#, input: "112", match: nil)
   }
   
   func testMatchExamples() {
@@ -1353,11 +1416,14 @@ extension RegexTests {
     // as a character.
 
     firstMatchTest(#"\u{65}\u{301}$"#, input: eDecomposed, match: eDecomposed)
-    // FIXME: Decomposed character in regex literal doesn't match an equivalent character
-    firstMatchTest(#"\u{65}\u{301}$"#, input: eComposed, match: eComposed,
-      xfail: true)
+    firstMatchTest(#"\u{65}\u{301}$"#, input: eComposed, match: eComposed)
 
-    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: "e")
+    firstMatchTest(#"\u{65 301}$"#, input: eDecomposed, match: eDecomposed)
+    firstMatchTest(#"\u{65 301}$"#, input: eComposed, match: eComposed)
+
+    // FIXME: Implicit \y at end of match
+    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil,
+      xfail: true)
     firstMatchTest(#"\u{65}$"#, input: eDecomposed, match: nil)
     // FIXME: \y is unsupported
     firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil,
@@ -1381,12 +1447,10 @@ extension RegexTests {
       (eComposed, true),
       (eDecomposed, true))
 
-    // FIXME: Decomposed character in regex literal doesn't match an equivalent character
     matchTest(
       #"e\u{301}$"#,
       (eComposed, true),
-      (eDecomposed, true),
-      xfail: true)
+      (eDecomposed, true))
 
     matchTest(
       #"e$"#,
@@ -1407,9 +1471,7 @@ extension RegexTests {
       (eDecomposed, true))
     // \p{Letter}
     firstMatchTest(#"\p{Letter}$"#, input: eComposed, match: eComposed)
-    // FIXME: \p{Letter} doesn't match a decomposed character
-    firstMatchTest(#"\p{Letter}$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
+    firstMatchTest(#"\p{Letter}$"#, input: eDecomposed, match: eDecomposed)
     
     // \d
     firstMatchTest(#"\d"#, input: "5", match: "5")
@@ -1470,9 +1532,11 @@ extension RegexTests {
     firstMatchTest(#"🇰🇷"#, input: flag, match: flag)
     firstMatchTest(#"[🇰🇷]"#, input: flag, match: flag)
     firstMatchTest(#"\u{1F1F0}\u{1F1F7}"#, input: flag, match: flag)
-    
+    firstMatchTest(#"\u{1F1F0 1F1F7}"#, input: flag, match: flag)
+
     // First Unicode scalar followed by CCC of regional indicators
-    firstMatchTest(#"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: flag)
+    firstMatchTest(#"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: flag,
+              xfail: true)
 
     // FIXME: CCC of Regional Indicator doesn't match with both parts of a flag character
     // A CCC of regional indicators x 2
@@ -1513,8 +1577,7 @@ extension RegexTests {
     
     // FIXME: \O is unsupported
     firstMatchTest(#"(?u)\O\u{301}"#, input: eDecomposed, match: eDecomposed)
-    firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed,
-      xfail: true)
+    firstMatchTest(#"(?u)e\O"#, input: eDecomposed, match: eDecomposed)
     firstMatchTest(#"\O"#, input: eComposed, match: eComposed)
     firstMatchTest(#"\O"#, input: eDecomposed, match: nil,
               xfail: true)
diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
index aeefe6477..ed930b0fe 100644
--- a/Tests/RegexTests/ParseTests.swift
+++ b/Tests/RegexTests/ParseTests.swift
@@ -33,30 +33,56 @@ extension AST.CustomCharacterClass.Member: ExpressibleByExtendedGraphemeClusterL
   }
 }
 
+enum SemanticErrorKind {
+  case unsupported, invalid
+}
 
 class RegexTests: XCTestCase {}
 
 func parseTest(
   _ input: String, _ expectedAST: AST.Node,
+  throwsError errorKind: SemanticErrorKind? = nil,
   syntax: SyntaxOptions = .traditional,
   captures expectedCaptures: CaptureList = [],
   file: StaticString = #file,
   line: UInt = #line
 ) {
   parseTest(
-    input, .init(expectedAST, globalOptions: nil), syntax: syntax,
-    captures: expectedCaptures, file: file, line: line
+    input, .init(expectedAST, globalOptions: nil), throwsError: errorKind,
+    syntax: syntax, captures: expectedCaptures, file: file, line: line
   )
 }
 
 func parseTest(
   _ input: String, _ expectedAST: AST,
+  throwsError errorKind: SemanticErrorKind? = nil,
   syntax: SyntaxOptions = .traditional,
   captures expectedCaptures: CaptureList = [],
   file: StaticString = #file,
   line: UInt = #line
 ) {
-  let ast = try! parse(input, syntax)
+  let ast: AST
+  do {
+    ast = try parse(input, errorKind != nil ? .syntactic : .semantic, syntax)
+  } catch {
+    XCTFail("unexpected error: \(error)", file: file, line: line)
+    return
+  }
+  if let errorKind = errorKind {
+    do {
+      _ = try parse(input, .semantic, syntax)
+      XCTFail("expected semantically invalid AST", file: file, line: line)
+    } catch let e as Source.LocatedError<ParseError> {
+      switch e.error {
+      case .unsupported:
+        XCTAssertEqual(errorKind, .unsupported, "\(e)", file: file, line: line)
+      default:
+        XCTAssertEqual(errorKind, .invalid, "\(e)", file: file, line: line)
+      }
+    } catch {
+      XCTFail("Error without source location: \(error)", file: file, line: line)
+    }
+  }
   guard ast == expectedAST
           || ast._dump() == expectedAST._dump() // EQ workaround
   else {
@@ -68,7 +94,7 @@ func parseTest(
             file: file, line: line)
     return
   }
-  let captures = ast.captureList
+  let captures = ast.captureList.withoutLocs
   guard captures == expectedCaptures else {
     XCTFail("""
 
@@ -143,15 +169,37 @@ func delimiterLexingTest(
 /// true, there may be additional characters that follow the literal that are
 /// not considered part of it.
 func parseWithDelimitersTest(
-  _ input: String, _ expecting: AST.Node, ignoreTrailing: Bool = false,
-  file: StaticString = #file, line: UInt = #line
+  _ input: String, _ expecting: AST.Node,
+  throwsError errorKind: SemanticErrorKind? = nil,
+  ignoreTrailing: Bool = false, file: StaticString = #file, line: UInt = #line
 ) {
   // First try lexing.
   let literal = delimiterLexingTest(
     input, ignoreTrailing: ignoreTrailing, file: file, line: line)
 
-  let orig = try! parseWithDelimiters(literal)
-  let ast = orig.root
+  let ast: AST.Node
+  do {
+    ast = try parseWithDelimiters(
+      literal, errorKind != nil ? .syntactic : .semantic).root
+  } catch {
+    XCTFail("unexpected error: \(error)", file: file, line: line)
+    return
+  }
+  if let errorKind = errorKind {
+    do {
+      _ = try parseWithDelimiters(input, .semantic)
+      XCTFail("expected semantically invalid AST", file: file, line: line)
+    } catch let e as Source.LocatedError<ParseError> {
+      switch e.error {
+      case .unsupported:
+        XCTAssertEqual(errorKind, .unsupported, "\(e)", file: file, line: line)
+      default:
+        XCTAssertEqual(errorKind, .invalid, "\(e)", file: file, line: line)
+      }
+    } catch {
+      XCTFail("Error without source location: \(error)", file: file, line: line)
+    }
+  }
   guard ast == expecting
           || ast._dump() == expecting._dump() // EQ workaround
   else {
@@ -170,8 +218,8 @@ func parseNotEqualTest(
   syntax: SyntaxOptions = .traditional,
   file: StaticString = #file, line: UInt = #line
 ) {
-  let lhsAST = try! parse(lhs, syntax)
-  let rhsAST = try! parse(rhs, syntax)
+  let lhsAST = try! parse(lhs, .syntactic, syntax)
+  let rhsAST = try! parse(rhs, .syntactic, syntax)
   if lhsAST == rhsAST || lhsAST._dump() == rhsAST._dump() {
     XCTFail("""
               AST: \(lhsAST._dump())
@@ -187,7 +235,7 @@ func rangeTest(
   at locFn: (AST.Node) -> SourceLocation = \.location,
   file: StaticString = #file, line: UInt = #line
 ) {
-  let ast = try! parse(input, syntax).root
+  let ast = try! parse(input, .syntactic, syntax).root
   let range = input.offsets(of: locFn(ast).range)
   let expected = expectedRange(input)
 
@@ -207,7 +255,7 @@ func diagnosticTest(
   file: StaticString = #file, line: UInt = #line
 ) {
   do {
-    let ast = try parse(input, syntax)
+    let ast = try parse(input, .semantic, syntax)
     XCTFail("""
 
       Passed \(ast)
@@ -236,7 +284,7 @@ func diagnosticWithDelimitersTest(
     input, ignoreTrailing: ignoreTrailing, file: file, line: line)
 
   do {
-    let orig = try parseWithDelimiters(literal)
+    let orig = try parseWithDelimiters(literal, .semantic)
     let ast = orig.root
     XCTFail("""
 
@@ -433,10 +481,32 @@ extension RegexTests {
     parseTest(#"\x5X"#, concat(scalar("\u{5}"), "X"))
     parseTest(#"\x12ab"#, concat(scalar("\u{12}"), "a", "b"))
 
+    parseTest(#"\u{    a   }"#, scalar("\u{A}"))
+    parseTest(#"\u{  a  }\u{ B }"#, concat(scalar("\u{A}"), scalar("\u{B}")))
+
+    // MARK: Scalar sequences
+
+    parseTest(#"\u{A bC}"#, scalarSeq("\u{A}", "\u{BC}"))
+    parseTest(#"\u{ A bC }"#, scalarSeq("\u{A}", "\u{BC}"))
+    parseTest(#"\u{A bC }"#, scalarSeq("\u{A}", "\u{BC}"))
+    parseTest(#"\u{ A bC}"#, scalarSeq("\u{A}", "\u{BC}"))
+    parseTest(#"\u{  A   b C }"#, scalarSeq("\u{A}", "\u{B}", "\u{C}"))
+
+    parseTest(
+      #"\u{3b1 3b3 3b5 3b9}"#,
+      scalarSeq("\u{3b1}", "\u{3b3}", "\u{3b5}", "\u{3b9}")
+    )
+
     // MARK: Character classes
 
     parseTest(#"abc\d"#, concat("a", "b", "c", escaped(.decimalDigit)))
 
+    // FIXME: '\N' should be emitted through 'emitAny', not through the
+    // _CharacterClassModel model.
+    parseTest(#"\N"#, escaped(.notNewline), throwsError: .unsupported)
+
+    parseTest(#"\R"#, escaped(.newlineSequence))
+
     parseTest(
       "[-|$^:?+*())(*-+-]",
       charClass(
@@ -449,6 +519,8 @@ extension RegexTests {
     parseTest("[-a-]", charClass("-", "a", "-"))
 
     parseTest("[a-z]", charClass(range_m("a", "z")))
+    parseTest("[a-a]", charClass(range_m("a", "a")))
+    parseTest("[B-a]", charClass(range_m("B", "a")))
 
     // FIXME: AST builder helpers for custom char class types
     parseTest("[a-d--a-c]", charClass(
@@ -595,10 +667,34 @@ extension RegexTests {
       range_m(.keyboardControl("A"), .keyboardControl("B")),
       range_m(.keyboardMetaControl("A"), .keyboardMetaControl("B")),
       range_m(.keyboardMeta("A"), .keyboardMeta("B"))
-    ))
+    ), throwsError: .unsupported)
+
+    parseTest(
+      #"[\N{DOLLAR SIGN}-\N{APOSTROPHE}]"#, charClass(
+        range_m(.namedCharacter("DOLLAR SIGN"), .namedCharacter("APOSTROPHE"))),
+      throwsError: .unsupported)
+
+    parseTest(
+      #"[\u{AA}-\u{BB}]"#,
+      charClass(range_m(scalar_a("\u{AA}"), scalar_a("\u{BB}")))
+    )
 
-    parseTest(#"[\N{DOLLAR SIGN}-\N{APOSTROPHE}]"#, charClass(
-      range_m(.namedCharacter("DOLLAR SIGN"), .namedCharacter("APOSTROPHE"))))
+    // Not currently supported, we need to figure out what their semantics are.
+    parseTest(
+      #"[\u{AA BB}-\u{CC}]"#,
+      charClass(range_m(scalarSeq_a("\u{AA}", "\u{BB}"), scalar_a("\u{CC}"))),
+      throwsError: .unsupported
+    )
+    parseTest(
+      #"[\u{CC}-\u{AA BB}]"#,
+      charClass(range_m(scalar_a("\u{CC}"), scalarSeq_a("\u{AA}", "\u{BB}"))),
+      throwsError: .unsupported
+    )
+    parseTest(
+      #"[\u{a b c}]"#,
+      charClass(scalarSeq_m("\u{A}", "\u{B}", "\u{C}")),
+      throwsError: .unsupported
+    )
 
     // MARK: Operators
 
@@ -691,13 +787,13 @@ extension RegexTests {
     parseTest(#"\\#u{3000}"#, "\u{3000}")
 
     // Control and meta controls.
-    parseTest(#"\c "#, atom(.keyboardControl(" ")))
-    parseTest(#"\c!"#, atom(.keyboardControl("!")))
-    parseTest(#"\c~"#, atom(.keyboardControl("~")))
-    parseTest(#"\C--"#, atom(.keyboardControl("-")))
-    parseTest(#"\M-\C-a"#, atom(.keyboardMetaControl("a")))
-    parseTest(#"\M-\C--"#, atom(.keyboardMetaControl("-")))
-    parseTest(#"\M-a"#, atom(.keyboardMeta("a")))
+    parseTest(#"\c "#, atom(.keyboardControl(" ")), throwsError: .unsupported)
+    parseTest(#"\c!"#, atom(.keyboardControl("!")), throwsError: .unsupported)
+    parseTest(#"\c~"#, atom(.keyboardControl("~")), throwsError: .unsupported)
+    parseTest(#"\C--"#, atom(.keyboardControl("-")), throwsError: .unsupported)
+    parseTest(#"\M-\C-a"#, atom(.keyboardMetaControl("a")), throwsError: .unsupported)
+    parseTest(#"\M-\C--"#, atom(.keyboardMetaControl("-")), throwsError: .unsupported)
+    parseTest(#"\M-a"#, atom(.keyboardMeta("a")), throwsError: .unsupported)
 
     // MARK: Comments
 
@@ -734,6 +830,9 @@ extension RegexTests {
     parseTest(
       #"a{0,0}"#,
       quantRange(0...0, of: "a"))
+    parseTest(
+      #"a{1,1}"#,
+      quantRange(1...1, of: "a"))
 
     // Make sure ranges get treated as literal if invalid.
     parseTest("{", "{")
@@ -786,11 +885,42 @@ extension RegexTests {
 
     // Balanced captures
     parseTest(#"(?<a-c>)"#, balancedCapture(name: "a", priorName: "c", empty()),
-              captures: [.named("a")])
+              throwsError: .unsupported, captures: [.named("a")])
     parseTest(#"(?<-c>)"#, balancedCapture(name: nil, priorName: "c", empty()),
-              captures: [.cap])
+              throwsError: .unsupported, captures: [.cap])
     parseTest(#"(?'a-b'c)"#, balancedCapture(name: "a", priorName: "b", "c"),
-              captures: [.named("a")])
+              throwsError: .unsupported, captures: [.named("a")])
+
+    // Capture resets.
+    // FIXME: The captures in each branch should be unified. For now, we don't
+    // treat any capture reset as semantically valid.
+    parseTest(
+      "(?|(a)|(b))",
+      nonCaptureReset(alt(capture("a"), capture("b"))),
+      throwsError: .unsupported, captures: [.opt, .opt]
+    )
+    parseTest(
+      "(?|(?<x>a)|(b))",
+      nonCaptureReset(alt(namedCapture("x", "a"), capture("b"))),
+      throwsError: .unsupported, captures: [.named("x", opt: 1), .opt]
+    )
+    parseTest(
+      "(?|(a)|(?<x>b))",
+      nonCaptureReset(alt(capture("a"), namedCapture("x", "b"))),
+      throwsError: .unsupported, captures: [.opt, .named("x", opt: 1)]
+    )
+    parseTest(
+      "(?|(?<x>a)|(?<x>b))",
+      nonCaptureReset(alt(namedCapture("x", "a"), namedCapture("x", "b"))),
+      throwsError: .invalid, captures: [.named("x", opt: 1), .named("x", opt: 1)]
+    )
+
+    // TODO: Reject mismatched names?
+    parseTest(
+      "(?|(?<x>a)|(?<y>b))",
+      nonCaptureReset(alt(namedCapture("x", "a"), namedCapture("y", "b"))),
+      throwsError: .unsupported, captures: [.named("x", opt: 1), .named("y", opt: 1)]
+    )
 
     // Other groups
     parseTest(
@@ -798,13 +928,13 @@ extension RegexTests {
       concat("a", nonCapture("b"), "c"))
     parseTest(
       #"a(?|b)c"#,
-      concat("a", nonCaptureReset("b"), "c"))
+      concat("a", nonCaptureReset("b"), "c"), throwsError: .unsupported)
     parseTest(
       #"a(?>b)c"#,
-      concat("a", atomicNonCapturing("b"), "c"))
+      concat("a", atomicNonCapturing("b"), "c"), throwsError: .unsupported)
     parseTest(
       "a(*atomic:b)c",
-      concat("a", atomicNonCapturing("b"), "c"))
+      concat("a", atomicNonCapturing("b"), "c"), throwsError: .unsupported)
 
     parseTest("a(?=b)c", concat("a", lookahead("b"), "c"))
     parseTest("a(*pla:b)c", concat("a", lookahead("b"), "c"))
@@ -815,31 +945,42 @@ extension RegexTests {
     parseTest("a(*negative_lookahead:b)c",
               concat("a", negativeLookahead("b"), "c"))
 
-    parseTest("a(?<=b)c", concat("a", lookbehind("b"), "c"))
-    parseTest("a(*plb:b)c", concat("a", lookbehind("b"), "c"))
-    parseTest("a(*positive_lookbehind:b)c", concat("a", lookbehind("b"), "c"))
-
-    parseTest("a(?<!b)c", concat("a", negativeLookbehind("b"), "c"))
-    parseTest("a(*nlb:b)c", concat("a", negativeLookbehind("b"), "c"))
+    parseTest("a(?<=b)c",
+              concat("a", lookbehind("b"), "c"), throwsError: .unsupported)
+    parseTest("a(*plb:b)c",
+              concat("a", lookbehind("b"), "c"), throwsError: .unsupported)
+    parseTest("a(*positive_lookbehind:b)c",
+              concat("a", lookbehind("b"), "c"), throwsError: .unsupported)
+
+    parseTest("a(?<!b)c",
+              concat("a", negativeLookbehind("b"), "c"), throwsError: .unsupported)
+    parseTest("a(*nlb:b)c",
+              concat("a", negativeLookbehind("b"), "c"), throwsError: .unsupported)
     parseTest("a(*negative_lookbehind:b)c",
-              concat("a", negativeLookbehind("b"), "c"))
+              concat("a", negativeLookbehind("b"), "c"), throwsError: .unsupported)
 
-    parseTest("a(?*b)c", concat("a", nonAtomicLookahead("b"), "c"))
-    parseTest("a(*napla:b)c", concat("a", nonAtomicLookahead("b"), "c"))
+    parseTest("a(?*b)c",
+              concat("a", nonAtomicLookahead("b"), "c"), throwsError: .unsupported)
+    parseTest("a(*napla:b)c",
+              concat("a", nonAtomicLookahead("b"), "c"), throwsError: .unsupported)
     parseTest("a(*non_atomic_positive_lookahead:b)c",
-              concat("a", nonAtomicLookahead("b"), "c"))
+              concat("a", nonAtomicLookahead("b"), "c"), throwsError: .unsupported)
 
-    parseTest("a(?<*b)c", concat("a", nonAtomicLookbehind("b"), "c"))
-    parseTest("a(*naplb:b)c", concat("a", nonAtomicLookbehind("b"), "c"))
+    parseTest("a(?<*b)c",
+              concat("a", nonAtomicLookbehind("b"), "c"), throwsError: .unsupported)
+    parseTest("a(*naplb:b)c",
+              concat("a", nonAtomicLookbehind("b"), "c"), throwsError: .unsupported)
     parseTest("a(*non_atomic_positive_lookbehind:b)c",
-              concat("a", nonAtomicLookbehind("b"), "c"))
+              concat("a", nonAtomicLookbehind("b"), "c"), throwsError: .unsupported)
 
-    parseTest("a(*sr:b)c", concat("a", scriptRun("b"), "c"))
-    parseTest("a(*script_run:b)c", concat("a", scriptRun("b"), "c"))
+    parseTest("a(*sr:b)c", concat("a", scriptRun("b"), "c"), throwsError: .unsupported)
+    parseTest("a(*script_run:b)c",
+              concat("a", scriptRun("b"), "c"), throwsError: .unsupported)
 
-    parseTest("a(*asr:b)c", concat("a", atomicScriptRun("b"), "c"))
+    parseTest("a(*asr:b)c",
+              concat("a", atomicScriptRun("b"), "c"), throwsError: .unsupported)
     parseTest("a(*atomic_script_run:b)c",
-              concat("a", atomicScriptRun("b"), "c"))
+              concat("a", atomicScriptRun("b"), "c"), throwsError: .unsupported)
 
     // Matching option changing groups.
     parseTest("(?-)", changeMatchingOptions(
@@ -900,13 +1041,13 @@ extension RegexTests {
     ))
     parseTest("(?^J:)", changeMatchingOptions(
       unsetMatchingOptions(adding: .allowDuplicateGroupNames), empty()
-    ))
+    ), throwsError: .unsupported)
     parseTest("(?^y{w}:)", changeMatchingOptions(
       unsetMatchingOptions(adding: .textSegmentWordMode), empty()
-    ))
+    ), throwsError: .unsupported)
 
     let allOptions: [AST.MatchingOption.Kind] = [
-      .caseInsensitive, .allowDuplicateGroupNames, .multiline, .noAutoCapture,
+      .caseInsensitive, .allowDuplicateGroupNames, .multiline, .namedCapturesOnly,
       .singleLine, .reluctantByDefault, .extraExtended, .extended,
       .unicodeWordBoundaries, .asciiOnlyDigit, .asciiOnlyPOSIXProps,
       .asciiOnlySpace, .asciiOnlyWord, .textSegmentGraphemeMode,
@@ -915,10 +1056,10 @@ extension RegexTests {
     ]
     parseTest("(?iJmnsUxxxwDPSWy{g}y{w}Xub-iJmnsUxxxwDPSW)", changeMatchingOptions(
       matchingOptions(adding: allOptions, removing: allOptions.dropLast(5))
-    ))
+    ), throwsError: .unsupported)
     parseTest("(?iJmnsUxxxwDPSWy{g}y{w}Xub-iJmnsUxxxwDPSW:)", changeMatchingOptions(
       matchingOptions(adding: allOptions, removing: allOptions.dropLast(5)), empty()
-    ))
+    ), throwsError: .unsupported)
 
     parseTest(
       "a(b(?i)c)d", concat(
@@ -973,11 +1114,18 @@ extension RegexTests {
       "d"
     )), captures: [.cap])
 
+    parseTest("(?n)(?^:())(?<x>)()", concat(
+      changeMatchingOptions(matchingOptions(adding: .namedCapturesOnly)),
+      changeMatchingOptions(unsetMatchingOptions(), capture(empty())),
+      namedCapture("x", empty()),
+      nonCapture(empty())
+    ), captures: [.cap, .named("x")])
+
     // MARK: References
 
     // \1 ... \9 are always backreferences.
     for i in 1 ... 9 {
-      parseTest("\\\(i)", backreference(.absolute(i)))
+      parseTest("\\\(i)", backreference(.absolute(i)), throwsError: .invalid)
       parseTest(
         "()()()()()()()()()\\\(i)",
         concat(Array(repeating: capture(empty()), count: 9)
@@ -986,10 +1134,10 @@ extension RegexTests {
       )
     }
 
-    parseTest(#"\10"#, backreference(.absolute(10)))
-    parseTest(#"\18"#, backreference(.absolute(18)))
-    parseTest(#"\7777"#, backreference(.absolute(7777)))
-    parseTest(#"\91"#, backreference(.absolute(91)))
+    parseTest(#"\10"#, backreference(.absolute(10)), throwsError: .invalid)
+    parseTest(#"\18"#, backreference(.absolute(18)), throwsError: .invalid)
+    parseTest(#"\7777"#, backreference(.absolute(7777)), throwsError: .invalid)
+    parseTest(#"\91"#, backreference(.absolute(91)), throwsError: .invalid)
 
     parseTest(
       #"()()()()()()()()()()\10"#,
@@ -1005,7 +1153,7 @@ extension RegexTests {
     )
     parseTest(#"()()\10"#, concat(
       capture(empty()), capture(empty()), backreference(.absolute(10))),
-              captures: [.cap, .cap]
+              throwsError: .invalid, captures: [.cap, .cap]
     )
 
     // A capture of three empty captures.
@@ -1016,7 +1164,7 @@ extension RegexTests {
       // There are 9 capture groups in total here.
       #"((()()())(()()()))\10"#, concat(capture(concat(
         fourCaptures, fourCaptures)), backreference(.absolute(10))),
-      captures: .caps(count: 9)
+      throwsError: .invalid, captures: .caps(count: 9)
     )
     parseTest(
       // There are 10 capture groups in total here.
@@ -1040,7 +1188,7 @@ extension RegexTests {
       concat(Array(repeating: capture(empty()), count: 40) + [scalar(" ")]),
       captures: .caps(count: 40)
     )
-    parseTest(#"\40"#, backreference(.absolute(40)))
+    parseTest(#"\40"#, backreference(.absolute(40)), throwsError: .invalid)
     parseTest(
       String(repeating: "()", count: 40) + #"\40"#,
       concat(Array(repeating: capture(empty()), count: 40)
@@ -1048,14 +1196,14 @@ extension RegexTests {
       captures: .caps(count: 40)
     )
 
-    parseTest(#"\7"#, backreference(.absolute(7)))
+    parseTest(#"\7"#, backreference(.absolute(7)), throwsError: .invalid)
 
-    parseTest(#"\11"#, backreference(.absolute(11)))
+    parseTest(#"\11"#, backreference(.absolute(11)), throwsError: .invalid)
     parseTest(
-      String(repeating: "()", count: 11) + #"\11"#,
-      concat(Array(repeating: capture(empty()), count: 11)
+      String(repeating: "()", count: 12) + #"\11"#,
+      concat(Array(repeating: capture(empty()), count: 12)
              + [backreference(.absolute(11))]),
-      captures: .caps(count: 11)
+      captures: .caps(count: 12)
     )
     parseTest(#"\011"#, scalar("\u{9}"))
     parseTest(
@@ -1065,64 +1213,78 @@ extension RegexTests {
     )
 
     parseTest(#"\0113"#, scalar("\u{4B}"))
-    parseTest(#"\113"#, backreference(.absolute(113)))
-    parseTest(#"\377"#, backreference(.absolute(377)))
-    parseTest(#"\81"#, backreference(.absolute(81)))
-
-    parseTest(#"\g1"#, backreference(.absolute(1)))
-    parseTest(#"\g001"#, backreference(.absolute(1)))
-    parseTest(#"\g52"#, backreference(.absolute(52)))
-    parseTest(#"\g-01"#, backreference(.relative(-1)))
-    parseTest(#"\g+30"#, backreference(.relative(30)))
-
-    parseTest(#"\g{1}"#, backreference(.absolute(1)))
-    parseTest(#"\g{001}"#, backreference(.absolute(1)))
-    parseTest(#"\g{52}"#, backreference(.absolute(52)))
-    parseTest(#"\g{-01}"#, backreference(.relative(-1)))
-    parseTest(#"\g{+30}"#, backreference(.relative(30)))
-    parseTest(#"\k<+4>"#, backreference(.relative(4)))
-    parseTest(#"\k<2>"#, backreference(.absolute(2)))
-    parseTest(#"\k'-3'"#, backreference(.relative(-3)))
-    parseTest(#"\k'1'"#, backreference(.absolute(1)))
-
-    parseTest(#"\k{a0}"#, backreference(.named("a0")))
-    parseTest(#"\k<bc>"#, backreference(.named("bc")))
-    parseTest(#"\g{abc}"#, backreference(.named("abc")))
-    parseTest(#"(?P=abc)"#, backreference(.named("abc")))
+    parseTest(#"\113"#, backreference(.absolute(113)), throwsError: .invalid)
+    parseTest(#"\377"#, backreference(.absolute(377)), throwsError: .invalid)
+    parseTest(#"\81"#, backreference(.absolute(81)), throwsError: .invalid)
+
+    parseTest(#"\g1"#, backreference(.absolute(1)), throwsError: .invalid)
+    parseTest(#"\g001"#, backreference(.absolute(1)), throwsError: .invalid)
+    parseTest(#"\g52"#, backreference(.absolute(52)), throwsError: .invalid)
+    parseTest(#"\g-01"#, backreference(.relative(-1)), throwsError: .unsupported)
+    parseTest(#"\g+30"#, backreference(.relative(30)), throwsError: .unsupported)
+
+    parseTest(#"\g{1}"#, backreference(.absolute(1)), throwsError: .invalid)
+    parseTest(#"\g{001}"#, backreference(.absolute(1)), throwsError: .invalid)
+    parseTest(#"\g{52}"#, backreference(.absolute(52)), throwsError: .invalid)
+    parseTest(#"\g{-01}"#, backreference(.relative(-1)), throwsError: .unsupported)
+    parseTest(#"\g{+30}"#, backreference(.relative(30)), throwsError: .unsupported)
+    parseTest(#"\k<+4>"#, backreference(.relative(4)), throwsError: .unsupported)
+    parseTest(#"\k<2>"#, backreference(.absolute(2)), throwsError: .invalid)
+    parseTest(#"\k'-3'"#, backreference(.relative(-3)), throwsError: .unsupported)
+    parseTest(#"\k'1'"#, backreference(.absolute(1)), throwsError: .invalid)
+
+    parseTest(#"\k{a0}"#, backreference(.named("a0")), throwsError: .unsupported)
+    parseTest(#"\k<bc>"#, backreference(.named("bc")), throwsError: .unsupported)
+    parseTest(#"\g{abc}"#, backreference(.named("abc")), throwsError: .unsupported)
+    parseTest(#"(?P=abc)"#, backreference(.named("abc")), throwsError: .unsupported)
 
     // Oniguruma recursion levels.
-    parseTest(#"\k<bc-0>"#, backreference(.named("bc"), recursionLevel: 0))
-    parseTest(#"\k<a+0>"#, backreference(.named("a"), recursionLevel: 0))
-    parseTest(#"\k<1+1>"#, backreference(.absolute(1), recursionLevel: 1))
-    parseTest(#"\k<3-8>"#, backreference(.absolute(3), recursionLevel: -8))
-    parseTest(#"\k'-3-8'"#, backreference(.relative(-3), recursionLevel: -8))
-    parseTest(#"\k'bc-8'"#, backreference(.named("bc"), recursionLevel: -8))
-    parseTest(#"\k'+3-8'"#, backreference(.relative(3), recursionLevel: -8))
-    parseTest(#"\k'+3+8'"#, backreference(.relative(3), recursionLevel: 8))
-
-    parseTest(#"(?R)"#, subpattern(.recurseWholePattern))
-    parseTest(#"(?0)"#, subpattern(.recurseWholePattern))
-    parseTest(#"(?1)"#, subpattern(.absolute(1)))
-    parseTest(#"(?+12)"#, subpattern(.relative(12)))
-    parseTest(#"(?-2)"#, subpattern(.relative(-2)))
-    parseTest(#"(?&hello)"#, subpattern(.named("hello")))
-    parseTest(#"(?P>P)"#, subpattern(.named("P")))
+    parseTest(#"\k<bc-0>"#, backreference(.named("bc"), recursionLevel: 0), throwsError: .unsupported)
+    parseTest(#"\k<a+0>"#, backreference(.named("a"), recursionLevel: 0), throwsError: .unsupported)
+    parseTest(#"\k<1+1>"#, backreference(.absolute(1), recursionLevel: 1), throwsError: .invalid)
+    parseTest(#"\k<3-8>"#, backreference(.absolute(3), recursionLevel: -8), throwsError: .invalid)
+    parseTest(#"\k'-3-8'"#, backreference(.relative(-3), recursionLevel: -8), throwsError: .unsupported)
+    parseTest(#"\k'bc-8'"#, backreference(.named("bc"), recursionLevel: -8), throwsError: .unsupported)
+    parseTest(#"\k'+3-8'"#, backreference(.relative(3), recursionLevel: -8), throwsError: .unsupported)
+    parseTest(#"\k'+3+8'"#, backreference(.relative(3), recursionLevel: 8), throwsError: .unsupported)
+
+    parseTest(#"(?R)"#, subpattern(.recurseWholePattern), throwsError: .unsupported)
+    parseTest(#"(?0)"#, subpattern(.recurseWholePattern), throwsError: .unsupported)
+    parseTest(#"(?1)"#, subpattern(.absolute(1)), throwsError: .unsupported)
+    parseTest(#"(?+12)"#, subpattern(.relative(12)), throwsError: .unsupported)
+    parseTest(#"(?-2)"#, subpattern(.relative(-2)), throwsError: .unsupported)
+    parseTest(#"(?&hello)"#, subpattern(.named("hello")), throwsError: .unsupported)
+    parseTest(#"(?P>P)"#, subpattern(.named("P")), throwsError: .unsupported)
 
     parseTest(#"[(?R)]"#, charClass("(", "?", "R", ")"))
     parseTest(#"[(?&a)]"#, charClass("(", "?", "&", "a", ")"))
     parseTest(#"[(?1)]"#, charClass("(", "?", "1", ")"))
 
-    parseTest(#"\g<1>"#, subpattern(.absolute(1)))
-    parseTest(#"\g<001>"#, subpattern(.absolute(1)))
-    parseTest(#"\g'52'"#, subpattern(.absolute(52)))
-    parseTest(#"\g'-01'"#, subpattern(.relative(-1)))
-    parseTest(#"\g'+30'"#, subpattern(.relative(30)))
-    parseTest(#"\g'abc'"#, subpattern(.named("abc")))
+    parseTest(#"\g<1>"#, subpattern(.absolute(1)), throwsError: .unsupported)
+    parseTest(#"\g<001>"#, subpattern(.absolute(1)), throwsError: .unsupported)
+    parseTest(#"\g'52'"#, subpattern(.absolute(52)), throwsError: .unsupported)
+    parseTest(#"\g'-01'"#, subpattern(.relative(-1)), throwsError: .unsupported)
+    parseTest(#"\g'+30'"#, subpattern(.relative(30)), throwsError: .unsupported)
+    parseTest(#"\g'abc'"#, subpattern(.named("abc")), throwsError: .unsupported)
 
     // Backreferences are not valid in custom character classes.
     parseTest(#"[\8]"#, charClass("8"))
     parseTest(#"[\9]"#, charClass("9"))
 
+    // These are valid references.
+    parseTest(#"()\1"#, concat(
+      capture(empty()), backreference(.absolute(1))
+    ), captures: [.cap])
+    parseTest(#"\1()"#, concat(
+      backreference(.absolute(1)), capture(empty())
+    ), captures: [.cap])
+    parseTest(#"()()\2"#, concat(
+      capture(empty()), capture(empty()), backreference(.absolute(2))
+    ), captures: [.cap, .cap])
+    parseTest(#"()\2()"#, concat(
+      capture(empty()), backreference(.absolute(2)), capture(empty())
+    ), captures: [.cap, .cap])
+
     // MARK: Character names.
 
     parseTest(#"\N{abc}"#, atom(.namedCharacter("abc")))
@@ -1130,7 +1292,7 @@ extension RegexTests {
     parseTest(#"\N{abc}+"#, oneOrMore(of: atom(.namedCharacter("abc"))))
     parseTest(
       #"\N {2}"#,
-      concat(atom(.escaped(.notNewline)), exactly(2, of: " "))
+      concat(atom(.escaped(.notNewline)), exactly(2, of: " ")), throwsError: .unsupported
     )
 
     parseTest(#"\N{AA}"#, atom(.namedCharacter("AA")))
@@ -1156,6 +1318,9 @@ extension RegexTests {
       #"\p{C}+"#,
       oneOrMore(of: prop(.generalCategory(.other))))
 
+    // L& defined by PCRE.
+    parseTest(#"\p{L&}"#, prop(.generalCategory(.casedLetter)))
+
     // UAX44-LM3 means all of the below are equivalent.
     let lowercaseLetter = prop(.generalCategory(.lowercaseLetter))
     parseTest(#"\p{ll}"#, lowercaseLetter)
@@ -1193,13 +1358,13 @@ extension RegexTests {
     parseTest(#"\p{isAlphabetic}"#, prop(.binary(.alphabetic)))
     parseTest(#"\p{isAlpha=isFalse}"#, prop(.binary(.alphabetic, value: false)))
 
-    parseTest(#"\p{In_Runic}"#, prop(.onigurumaSpecial(.inRunic)))
+    parseTest(#"\p{In_Runic}"#, prop(.onigurumaSpecial(.inRunic)), throwsError: .unsupported)
 
-    parseTest(#"\p{Xan}"#, prop(.pcreSpecial(.alphanumeric)))
-    parseTest(#"\p{Xps}"#, prop(.pcreSpecial(.posixSpace)))
-    parseTest(#"\p{Xsp}"#, prop(.pcreSpecial(.perlSpace)))
-    parseTest(#"\p{Xuc}"#, prop(.pcreSpecial(.universallyNamed)))
-    parseTest(#"\p{Xwd}"#, prop(.pcreSpecial(.perlWord)))
+    parseTest(#"\p{Xan}"#, prop(.pcreSpecial(.alphanumeric)), throwsError: .unsupported)
+    parseTest(#"\p{Xps}"#, prop(.pcreSpecial(.posixSpace)), throwsError: .unsupported)
+    parseTest(#"\p{Xsp}"#, prop(.pcreSpecial(.perlSpace)), throwsError: .unsupported)
+    parseTest(#"\p{Xuc}"#, prop(.pcreSpecial(.universallyNamed)), throwsError: .unsupported)
+    parseTest(#"\p{Xwd}"#, prop(.pcreSpecial(.perlWord)), throwsError: .unsupported)
 
     parseTest(#"\p{alnum}"#, prop(.posix(.alnum)))
     parseTest(#"\p{is_alnum}"#, prop(.posix(.alnum)))
@@ -1209,48 +1374,55 @@ extension RegexTests {
     parseTest(#"\p{word}"#,  prop(.posix(.word)))
     parseTest(#"\p{xdigit}"#, prop(.posix(.xdigit)))
 
+    parseTest(#"\p{name=A}"#, prop(.named("A")))
+    parseTest(#"\p{Name=B}"#, prop(.named("B")))
+    parseTest(#"\p{isName=C}"#, prop(.named("C")))
+    parseTest(#"\p{na=D}"#, prop(.named("D")))
+    parseTest(#"\p{NA=E}"#, prop(.named("E")))
+    parseTest(#"\p{na=isI}"#, prop(.named("isI")))
+
     // MARK: Conditionals
 
     parseTest(#"(?(1))"#, conditional(
-      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty()))
+      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported)
     parseTest(#"(?(1)|)"#, conditional(
-      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty()))
+      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported)
     parseTest(#"(?(1)a)"#, conditional(
-      .groupMatched(ref(1)), trueBranch: "a", falseBranch: empty()))
+      .groupMatched(ref(1)), trueBranch: "a", falseBranch: empty()), throwsError: .unsupported)
     parseTest(#"(?(1)a|)"#, conditional(
-      .groupMatched(ref(1)), trueBranch: "a", falseBranch: empty()))
+      .groupMatched(ref(1)), trueBranch: "a", falseBranch: empty()), throwsError: .unsupported)
     parseTest(#"(?(1)|b)"#, conditional(
-      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: "b"))
+      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: "b"), throwsError: .unsupported)
     parseTest(#"(?(1)a|b)"#, conditional(
-      .groupMatched(ref(1)), trueBranch: "a", falseBranch: "b"))
+      .groupMatched(ref(1)), trueBranch: "a", falseBranch: "b"), throwsError: .unsupported)
 
     parseTest(#"(?(1)(a|b|c)|d)"#, conditional(
       .groupMatched(ref(1)),
       trueBranch: capture(alt("a", "b", "c")),
       falseBranch: "d"
-    ), captures: [.opt])
+    ), throwsError: .unsupported, captures: [.opt])
 
     parseTest(#"(?(+3))"#, conditional(
-      .groupMatched(ref(plus: 3)), trueBranch: empty(), falseBranch: empty()))
+      .groupMatched(ref(plus: 3)), trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported)
     parseTest(#"(?(-21))"#, conditional(
-      .groupMatched(ref(minus: 21)), trueBranch: empty(), falseBranch: empty()))
+      .groupMatched(ref(minus: 21)), trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported)
 
     // Oniguruma recursion levels.
     parseTest(#"(?(1+1))"#, conditional(
       .groupMatched(ref(1, recursionLevel: 1)),
-      trueBranch: empty(), falseBranch: empty())
+      trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported
     )
     parseTest(#"(?(-1+1))"#, conditional(
       .groupMatched(ref(minus: 1, recursionLevel: 1)),
-      trueBranch: empty(), falseBranch: empty())
+      trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported
     )
     parseTest(#"(?(1-3))"#, conditional(
       .groupMatched(ref(1, recursionLevel: -3)),
-      trueBranch: empty(), falseBranch: empty())
+      trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported
     )
     parseTest(#"(?(+1-3))"#, conditional(
       .groupMatched(ref(plus: 1, recursionLevel: -3)),
-      trueBranch: empty(), falseBranch: empty())
+      trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported
     )
     parseTest(
       #"(?<a>)(?(a+5))"#,
@@ -1258,7 +1430,7 @@ extension RegexTests {
         .groupMatched(ref("a", recursionLevel: 5)),
         trueBranch: empty(), falseBranch: empty()
       )),
-      captures: [.named("a")]
+      throwsError: .unsupported, captures: [.named("a")]
     )
     parseTest(
       #"(?<a1>)(?(a1-5))"#,
@@ -1266,50 +1438,50 @@ extension RegexTests {
         .groupMatched(ref("a1", recursionLevel: -5)),
         trueBranch: empty(), falseBranch: empty()
       )),
-      captures: [.named("a1")]
+      throwsError: .unsupported, captures: [.named("a1")]
     )
 
     parseTest(#"(?(1))?"#, zeroOrOne(of: conditional(
-      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty())))
+      .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty())), throwsError: .unsupported)
 
     parseTest(#"(?(R)a|b)"#, conditional(
-      .recursionCheck, trueBranch: "a", falseBranch: "b"))
+      .recursionCheck, trueBranch: "a", falseBranch: "b"), throwsError: .unsupported)
     parseTest(#"(?(R1))"#, conditional(
-      .groupRecursionCheck(ref(1)), trueBranch: empty(), falseBranch: empty()))
+      .groupRecursionCheck(ref(1)), trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported)
     parseTest(#"(?(R&abc)a|b)"#, conditional(
-      .groupRecursionCheck(ref("abc")), trueBranch: "a", falseBranch: "b"))
+      .groupRecursionCheck(ref("abc")), trueBranch: "a", falseBranch: "b"), throwsError: .unsupported)
 
     parseTest(#"(?(<abc>)a|b)"#, conditional(
-      .groupMatched(ref("abc")), trueBranch: "a", falseBranch: "b"))
+      .groupMatched(ref("abc")), trueBranch: "a", falseBranch: "b"), throwsError: .unsupported)
     parseTest(#"(?('abc')a|b)"#, conditional(
-      .groupMatched(ref("abc")), trueBranch: "a", falseBranch: "b"))
+      .groupMatched(ref("abc")), trueBranch: "a", falseBranch: "b"), throwsError: .unsupported)
 
     parseTest(#"(?(abc)a|b)"#, conditional(
       groupCondition(.capture, concat("a", "b", "c")),
       trueBranch: "a", falseBranch: "b"
-    ), captures: [.cap])
+    ), throwsError: .unsupported, captures: [.cap])
 
     parseTest(#"(?(?:abc)a|b)"#, conditional(
       groupCondition(.nonCapture, concat("a", "b", "c")),
       trueBranch: "a", falseBranch: "b"
-    ))
+    ), throwsError: .unsupported)
 
     parseTest(#"(?(?=abc)a|b)"#, conditional(
       groupCondition(.lookahead, concat("a", "b", "c")),
       trueBranch: "a", falseBranch: "b"
-    ))
+    ), throwsError: .unsupported)
     parseTest(#"(?(?!abc)a|b)"#, conditional(
       groupCondition(.negativeLookahead, concat("a", "b", "c")),
       trueBranch: "a", falseBranch: "b"
-    ))
+    ), throwsError: .unsupported)
     parseTest(#"(?(?<=abc)a|b)"#, conditional(
       groupCondition(.lookbehind, concat("a", "b", "c")),
       trueBranch: "a", falseBranch: "b"
-    ))
+    ), throwsError: .unsupported)
     parseTest(#"(?(?<!abc)a|b)"#, conditional(
       groupCondition(.negativeLookbehind, concat("a", "b", "c")),
       trueBranch: "a", falseBranch: "b"
-    ))
+    ), throwsError: .unsupported)
 
     parseTest(#"(?((a)?(b))(a)+|b)"#, conditional(
       groupCondition(.capture, concat(
@@ -1317,7 +1489,7 @@ extension RegexTests {
       )),
       trueBranch: oneOrMore(of: capture("a")),
       falseBranch: "b"
-    ), captures: [.cap, .opt, .cap, .opt])
+    ), throwsError: .unsupported, captures: [.cap, .opt, .cap, .opt])
 
     parseTest(#"(?(?:(a)?(b))(a)+|b)"#, conditional(
       groupCondition(.nonCapture, concat(
@@ -1325,12 +1497,12 @@ extension RegexTests {
       )),
       trueBranch: oneOrMore(of: capture("a")),
       falseBranch: "b"
-    ), captures: [.opt, .cap, .opt])
+    ), throwsError: .unsupported, captures: [.opt, .cap, .opt])
 
     parseTest(#"(?<xxx>y)(?(xxx)a|b)"#, concat(
       namedCapture("xxx", "y"),
       conditional(.groupMatched(ref("xxx")), trueBranch: "a", falseBranch: "b")
-    ), captures: [.named("xxx")])
+    ), throwsError: .unsupported, captures: [.named("xxx")])
 
     parseTest(#"(?(1)(?(2)(?(3)))|a)"#, conditional(
       .groupMatched(ref(1)),
@@ -1339,115 +1511,119 @@ extension RegexTests {
                                                       trueBranch: empty(),
                                                       falseBranch: empty()),
                               falseBranch: empty()),
-      falseBranch: "a"))
+      falseBranch: "a"), throwsError: .unsupported)
 
     parseTest(#"(?(DEFINE))"#, conditional(
-      .defineGroup, trueBranch: empty(), falseBranch: empty()))
+      .defineGroup, trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported)
 
     parseTest(#"(?(VERSION>=3.1))"#, conditional(
       pcreVersionCheck(.greaterThanOrEqual, 3, 1),
-      trueBranch: empty(), falseBranch: empty())
+      trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported
     )
     parseTest(#"(?(VERSION=0.1))"#, conditional(
       pcreVersionCheck(.equal, 0, 1),
-      trueBranch: empty(), falseBranch: empty())
+      trueBranch: empty(), falseBranch: empty()), throwsError: .unsupported
     )
 
     // MARK: Callouts
 
     // PCRE callouts
 
-    parseTest(#"(?C)"#, pcreCallout(.number(0)))
-    parseTest(#"(?C0)"#, pcreCallout(.number(0)))
-    parseTest(#"(?C20)"#, pcreCallout(.number(20)))
-    parseTest("(?C{abc})", pcreCallout(.string("abc")))
+    parseTest(#"(?C)"#, pcreCallout(.number(0)), throwsError: .unsupported)
+    parseTest(#"(?C0)"#, pcreCallout(.number(0)), throwsError: .unsupported)
+    parseTest(#"(?C20)"#, pcreCallout(.number(20)), throwsError: .unsupported)
+    parseTest("(?C{abc})", pcreCallout(.string("abc")), throwsError: .unsupported)
 
     for delim in ["`", "'", "\"", "^", "%", "#", "$"] {
-      parseTest("(?C\(delim)hello\(delim))", pcreCallout(.string("hello")))
+      parseTest("(?C\(delim)hello\(delim))", pcreCallout(.string("hello")),
+                throwsError: .unsupported)
     }
 
     // Oniguruma named callouts
 
-    parseTest("(*X)", onigurumaNamedCallout("X"))
-    parseTest("(*foo[t])", onigurumaNamedCallout("foo", tag: "t"))
-    parseTest("(*foo[a0]{b})", onigurumaNamedCallout("foo", tag: "a0", args: "b"))
-    parseTest("(*foo{b})", onigurumaNamedCallout("foo", args: "b"))
-    parseTest("(*foo[a]{a,b,c})", onigurumaNamedCallout("foo", tag: "a", args: "a", "b", "c"))
-    parseTest("(*foo{a,b,c})", onigurumaNamedCallout("foo", args: "a", "b", "c"))
-    parseTest("(*foo{%%$,!!,>>})", onigurumaNamedCallout("foo", args: "%%$", "!!", ">>"))
-    parseTest("(*foo{a, b, c})", onigurumaNamedCallout("foo", args: "a", " b", " c"))
+    parseTest("(*X)", onigurumaNamedCallout("X"), throwsError: .unsupported)
+    parseTest("(*foo[t])", onigurumaNamedCallout("foo", tag: "t"), throwsError: .unsupported)
+    parseTest("(*foo[a0]{b})", onigurumaNamedCallout("foo", tag: "a0", args: "b"), throwsError: .unsupported)
+    parseTest("(*foo{b})", onigurumaNamedCallout("foo", args: "b"), throwsError: .unsupported)
+    parseTest("(*foo[a]{a,b,c})", onigurumaNamedCallout("foo", tag: "a", args: "a", "b", "c"), throwsError: .unsupported)
+    parseTest("(*foo{a,b,c})", onigurumaNamedCallout("foo", args: "a", "b", "c"), throwsError: .unsupported)
+    parseTest("(*foo{%%$,!!,>>})", onigurumaNamedCallout("foo", args: "%%$", "!!", ">>"), throwsError: .unsupported)
+    parseTest("(*foo{a, b, c})", onigurumaNamedCallout("foo", args: "a", " b", " c"), throwsError: .unsupported)
 
     // Oniguruma 'of contents' callouts
 
-    parseTest("(?{x})", onigurumaCalloutOfContents("x"))
-    parseTest("(?{{{x}}y}}})", onigurumaCalloutOfContents("x}}y"))
-    parseTest("(?{{{x}}})", onigurumaCalloutOfContents("x"))
-    parseTest("(?{x}[tag])", onigurumaCalloutOfContents("x", tag: "tag"))
-    parseTest("(?{x}[tag]<)", onigurumaCalloutOfContents("x", tag: "tag", direction: .inRetraction))
-    parseTest("(?{x}X)", onigurumaCalloutOfContents("x", direction: .both))
-    parseTest("(?{x}>)", onigurumaCalloutOfContents("x"))
-    parseTest("(?{\\x})", onigurumaCalloutOfContents("\\x"))
-    parseTest("(?{\\})", onigurumaCalloutOfContents("\\"))
+    parseTest("(?{x})", onigurumaCalloutOfContents("x"), throwsError: .unsupported)
+    parseTest("(?{{{x}}y}}})", onigurumaCalloutOfContents("x}}y"), throwsError: .unsupported)
+    parseTest("(?{{{x}}})", onigurumaCalloutOfContents("x"), throwsError: .unsupported)
+    parseTest("(?{x}[tag])", onigurumaCalloutOfContents("x", tag: "tag"), throwsError: .unsupported)
+    parseTest("(?{x}[tag]<)", onigurumaCalloutOfContents("x", tag: "tag", direction: .inRetraction), throwsError: .unsupported)
+    parseTest("(?{x}X)", onigurumaCalloutOfContents("x", direction: .both), throwsError: .unsupported)
+    parseTest("(?{x}>)", onigurumaCalloutOfContents("x"), throwsError: .unsupported)
+    parseTest("(?{\\x})", onigurumaCalloutOfContents("\\x"), throwsError: .unsupported)
+    parseTest("(?{\\})", onigurumaCalloutOfContents("\\"), throwsError: .unsupported)
 
     // MARK: Backtracking directives
 
-    parseTest("(*ACCEPT)?", zeroOrOne(of: backtrackingDirective(.accept)))
+    parseTest("(*ACCEPT)?", zeroOrOne(of: backtrackingDirective(.accept)), throwsError: .unsupported)
     parseTest(
       "(*ACCEPT:a)??",
-      zeroOrOne(.reluctant, of: backtrackingDirective(.accept, name: "a"))
+      zeroOrOne(.reluctant, of: backtrackingDirective(.accept, name: "a")),
+      throwsError: .unsupported
     )
-    parseTest("(*:a)", backtrackingDirective(.mark, name: "a"))
-    parseTest("(*MARK:a)", backtrackingDirective(.mark, name: "a"))
-    parseTest("(*F)", backtrackingDirective(.fail))
-    parseTest("(*COMMIT)", backtrackingDirective(.commit))
-    parseTest("(*SKIP)", backtrackingDirective(.skip))
-    parseTest("(*SKIP:SKIP)", backtrackingDirective(.skip, name: "SKIP"))
-    parseTest("(*PRUNE)", backtrackingDirective(.prune))
-    parseTest("(*THEN)", backtrackingDirective(.then))
+    parseTest("(*:a)", backtrackingDirective(.mark, name: "a"), throwsError: .unsupported)
+    parseTest("(*MARK:a)", backtrackingDirective(.mark, name: "a"), throwsError: .unsupported)
+    parseTest("(*F)", backtrackingDirective(.fail), throwsError: .unsupported)
+    parseTest("(*COMMIT)", backtrackingDirective(.commit), throwsError: .unsupported)
+    parseTest("(*SKIP)", backtrackingDirective(.skip), throwsError: .unsupported)
+    parseTest("(*SKIP:SKIP)", backtrackingDirective(.skip, name: "SKIP"), throwsError: .unsupported)
+    parseTest("(*PRUNE)", backtrackingDirective(.prune), throwsError: .unsupported)
+    parseTest("(*THEN)", backtrackingDirective(.then), throwsError: .unsupported)
 
     // MARK: Oniguruma absent functions
 
-    parseTest("(?~)", absentRepeater(empty()))
-    parseTest("(?~abc)", absentRepeater(concat("a", "b", "c")))
-    parseTest("(?~a+)", absentRepeater(oneOrMore(of: "a")))
-    parseTest("(?~~)", absentRepeater("~"))
-    parseTest("(?~a|b|c)", absentRepeater(alt("a", "b", "c")))
-    parseTest("(?~(a))", absentRepeater(capture("a")), captures: [])
-    parseTest("(?~)*", zeroOrMore(of: absentRepeater(empty())))
-
-    parseTest("(?~|abc)", absentStopper(concat("a", "b", "c")))
-    parseTest("(?~|a+)", absentStopper(oneOrMore(of: "a")))
-    parseTest("(?~|~)", absentStopper("~"))
-    parseTest("(?~|(a))", absentStopper(capture("a")), captures: [])
-    parseTest("(?~|a){2}", exactly(2, of: absentStopper("a")))
-
-    parseTest("(?~|a|b)", absentExpression("a", "b"))
-    parseTest("(?~|~|~)", absentExpression("~", "~"))
+    parseTest("(?~)", absentRepeater(empty()), throwsError: .unsupported)
+    parseTest("(?~abc)", absentRepeater(concat("a", "b", "c")), throwsError: .unsupported)
+    parseTest("(?~a+)", absentRepeater(oneOrMore(of: "a")), throwsError: .unsupported)
+    parseTest("(?~~)", absentRepeater("~"), throwsError: .unsupported)
+    parseTest("(?~a|b|c)", absentRepeater(alt("a", "b", "c")), throwsError: .unsupported)
+    parseTest("(?~(a))", absentRepeater(capture("a")), throwsError: .unsupported, captures: [])
+    parseTest("(?~)*", zeroOrMore(of: absentRepeater(empty())), throwsError: .unsupported)
+
+    parseTest("(?~|abc)", absentStopper(concat("a", "b", "c")), throwsError: .unsupported)
+    parseTest("(?~|a+)", absentStopper(oneOrMore(of: "a")), throwsError: .unsupported)
+    parseTest("(?~|~)", absentStopper("~"), throwsError: .unsupported)
+    parseTest("(?~|(a))", absentStopper(capture("a")), throwsError: .unsupported, captures: [])
+    parseTest("(?~|a){2}", exactly(2, of: absentStopper("a")), throwsError: .unsupported)
+
+    parseTest("(?~|a|b)", absentExpression("a", "b"), throwsError: .unsupported)
+    parseTest("(?~|~|~)", absentExpression("~", "~"), throwsError: .unsupported)
     parseTest("(?~|(a)|(?:b))", absentExpression(capture("a"), nonCapture("b")),
-              captures: [])
+              throwsError: .unsupported, captures: [])
     parseTest("(?~|(a)|(?:(b)|c))", absentExpression(
       capture("a"), nonCapture(alt(capture("b"), "c"))
-    ), captures: [.opt])
-    parseTest("(?~|a|b)?", zeroOrOne(of: absentExpression("a", "b")))
+    ), throwsError: .unsupported, captures: [.opt])
+    parseTest("(?~|a|b)?", zeroOrOne(of: absentExpression("a", "b")), throwsError: .unsupported)
 
-    parseTest("(?~|)", absentRangeClear())
+    parseTest("(?~|)", absentRangeClear(), throwsError: .unsupported)
 
     // TODO: It's not really clear what this means, but Oniguruma parses it...
     // Maybe we should diagnose it?
-    parseTest("(?~|)+", oneOrMore(of: absentRangeClear()))
+    parseTest("(?~|)+", oneOrMore(of: absentRangeClear()), throwsError: .unsupported)
 
     // MARK: Global matching options
 
     parseTest("(*CR)(*UTF)(*LIMIT_DEPTH=3)", ast(
       empty(), opts: .newlineMatching(.carriageReturnOnly), .utfMode,
       .limitDepth(.init(faking: 3))
-    ))
+    ), throwsError: .unsupported)
 
     parseTest(
-      "(*BSR_UNICODE)3", ast("3", opts: .newlineSequenceMatching(.anyUnicode)))
+      "(*BSR_UNICODE)3", ast("3", opts: .newlineSequenceMatching(.anyUnicode)),
+      throwsError: .unsupported)
     parseTest(
       "(*BSR_ANYCRLF)", ast(
-        empty(), opts: .newlineSequenceMatching(.anyCarriageReturnOrLinefeed)))
+        empty(), opts: .newlineSequenceMatching(.anyCarriageReturnOrLinefeed)),
+      throwsError: .unsupported)
 
     // TODO: Diagnose on multiple line matching modes?
     parseTest(
@@ -1455,7 +1631,7 @@ extension RegexTests {
       ast(empty(), opts: [
         .carriageReturnOnly, .linefeedOnly, .carriageAndLinefeedOnly,
         .anyCarriageReturnOrLinefeed, .anyUnicode, .nulCharacter
-      ].map { .newlineMatching($0) }))
+      ].map { .newlineMatching($0) }), throwsError: .unsupported)
 
     parseTest(
       """
@@ -1468,7 +1644,7 @@ extension RegexTests {
         .limitMatch(.init(faking: 2)), .notEmpty, .notEmptyAtStart,
         .noAutoPossess, .noDotStarAnchor, .noJIT, .noStartOpt, .utfMode,
         .unicodeProperties
-      )
+      ), throwsError: .unsupported
     )
 
     parseTest("[(*CR)]", charClass("(", "*", "C", "R", ")"))
@@ -1682,7 +1858,7 @@ extension RegexTests {
       # h
       """,
       ast(empty(), opts: .newlineMatching(.carriageReturnOnly)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1693,7 +1869,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.carriageReturnOnly)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1704,7 +1880,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.linefeedOnly)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1715,7 +1891,7 @@ extension RegexTests {
       # h
       """,
       ast(empty(), opts: .newlineMatching(.carriageAndLinefeedOnly)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1726,7 +1902,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.carriageAndLinefeedOnly)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1737,7 +1913,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.anyCarriageReturnOrLinefeed)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1748,7 +1924,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.anyCarriageReturnOrLinefeed)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1759,7 +1935,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.anyCarriageReturnOrLinefeed)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1770,7 +1946,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.anyUnicode)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1791,7 +1967,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.anyUnicode)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1802,7 +1978,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("e", "f"), opts: .newlineMatching(.nulCharacter)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1813,7 +1989,7 @@ extension RegexTests {
       # h
       """,
       ast(concat("b", "c", "e", "f"), opts: .newlineMatching(.nulCharacter)),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
     parseTest(
       """
@@ -1827,7 +2003,7 @@ extension RegexTests {
           opts: .newlineMatching(.carriageReturnOnly),
                 .newlineMatching(.nulCharacter)
          ),
-      syntax: .extendedSyntax
+      throwsError: .unsupported, syntax: .extendedSyntax
     )
 
     // MARK: Parse with delimiters
@@ -1923,6 +2099,26 @@ extension RegexTests {
       """, changeMatchingOptions(matchingOptions(adding: .extended))
     )
 
+    parseWithDelimitersTest(#"""
+      #/
+      \p{
+        gc
+         =
+        digit
+      }
+      /#
+      """#, prop(.generalCategory(.decimalNumber)))
+
+    parseWithDelimitersTest(#"""
+      #/
+      \u{
+        aB
+          B
+      c
+      }
+      /#
+      """#, scalarSeq("\u{AB}", "\u{B}", "\u{C}"))
+
     // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter
     // if it's clear that it's part of the regex syntax.
 
@@ -1930,30 +2126,37 @@ extension RegexTests {
       #"re'(?'a_bcA0'\')'"#, namedCapture("a_bcA0", "'"))
     parseWithDelimitersTest(
       #"re'(?'a_bcA0-c1A'x*)'"#,
-      balancedCapture(name: "a_bcA0", priorName: "c1A", zeroOrMore(of: "x")))
+      balancedCapture(name: "a_bcA0", priorName: "c1A", zeroOrMore(of: "x")),
+      throwsError: .unsupported)
 
     parseWithDelimitersTest(
       #"rx' (?'a_bcA0' a b)'"#, concat(namedCapture("a_bcA0", concat("a", "b"))))
 
     parseWithDelimitersTest(
       #"re'(?('a_bcA0')x|y)'"#, conditional(
-        .groupMatched(ref("a_bcA0")), trueBranch: "x", falseBranch: "y"))
+        .groupMatched(ref("a_bcA0")), trueBranch: "x", falseBranch: "y"),
+      throwsError: .unsupported
+    )
     parseWithDelimitersTest(
       #"re'(?('+20')\')'"#, conditional(
-        .groupMatched(ref(plus: 20)), trueBranch: "'", falseBranch: empty()))
-
+        .groupMatched(ref(plus: 20)), trueBranch: "'", falseBranch: empty()),
+      throwsError: .unsupported
+    )
     parseWithDelimitersTest(
-      #"re'a\k'b0A''"#, concat("a", backreference(.named("b0A"))))
+      #"re'a\k'b0A''"#, concat("a", backreference(.named("b0A"))), throwsError: .unsupported)
     parseWithDelimitersTest(
-      #"re'\k'+2-1''"#, backreference(.relative(2), recursionLevel: -1))
+      #"re'\k'+2-1''"#, backreference(.relative(2), recursionLevel: -1),
+      throwsError: .unsupported
+    )
 
     parseWithDelimitersTest(
-      #"re'a\g'b0A''"#, concat("a", subpattern(.named("b0A"))))
+      #"re'a\g'b0A''"#, concat("a", subpattern(.named("b0A"))), throwsError: .unsupported)
     parseWithDelimitersTest(
-      #"re'\g'-1'\''"#, concat(subpattern(.relative(-1)), "'"))
+      #"re'\g'-1'\''"#, concat(subpattern(.relative(-1)), "'"), throwsError: .unsupported)
 
     parseWithDelimitersTest(
-      #"re'(?C'a*b\c 🔥_ ;')'"#, pcreCallout(.string(#"a*b\c 🔥_ ;"#)))
+      #"re'(?C'a*b\c 🔥_ ;')'"#, pcreCallout(.string(#"a*b\c 🔥_ ;"#)),
+      throwsError: .unsupported)
 
     // Fine, because we don't end up skipping.
     delimiterLexingTest(#"re'(?'"#)
@@ -1990,6 +2193,12 @@ extension RegexTests {
 
     parseNotEqualTest(#"[\p{Any}]"#, #"[[:Any:]]"#)
 
+    parseNotEqualTest(#"\u{A}"#, #"\u{B}"#)
+    parseNotEqualTest(#"\u{A B}"#, #"\u{B A}"#)
+    parseNotEqualTest(#"\u{AB}"#, #"\u{A B}"#)
+    parseNotEqualTest(#"[\u{AA BB}-\u{CC}]"#, #"[\u{AA DD}-\u{CC}]"#)
+    parseNotEqualTest(#"[\u{AA BB}-\u{DD}]"#, #"[\u{AA BB}-\u{CC}]"#)
+
     parseNotEqualTest(#"[abc[:space:]\d]+"#,
                       #"[abc[:upper:]\d]+"#)
 
@@ -2117,6 +2326,20 @@ extension RegexTests {
       $0.as(CustomCC.self)!.members[0].as(CustomCC.Range.self)!.dashLoc
     })
 
+    // MARK: Unicode scalars
+
+    rangeTest(#"\u{65}"#, range(3 ..< 5), at: {
+      $0.as(AST.Atom.self)!.as(AST.Atom.Scalar.self)!.location
+    })
+
+    rangeTest(#"\u{  65 58 }"#, range(5 ..< 7), at: {
+      $0.as(AST.Atom.self)!.as(AST.Atom.ScalarSequence.self)!.scalars[0].location
+    })
+
+    rangeTest(#"\u{  65 58 }"#, range(8 ..< 10), at: {
+      $0.as(AST.Atom.self)!.as(AST.Atom.ScalarSequence.self)!.scalars[1].location
+    })
+
     // MARK: References
 
     rangeTest(#"\k<a+2>"#, range(3 ..< 6), at: {
@@ -2297,6 +2520,13 @@ extension RegexTests {
     diagnosticTest("[[::]]", .emptyProperty)
     diagnosticTest("[[:=:]]", .emptyProperty)
 
+    diagnosticTest(#"|([\d-c])?"#, .invalidCharacterClassRangeOperand)
+
+    diagnosticTest(#"[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
+    diagnosticTest(#"(?i)[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
+    diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b"))
+    diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}"))
+
     // MARK: Bad escapes
 
     diagnosticTest("\\", .expectedEscape)
@@ -2323,6 +2553,7 @@ extension RegexTests {
     diagnosticTest(#"\e\#u{301}"#, .invalidEscape("e\u{301}"))
     diagnosticTest(#"\\#u{E9}"#, .invalidEscape("é"))
     diagnosticTest(#"\˂"#, .invalidEscape("˂"))
+    diagnosticTest(#"\d\#u{301}"#, .invalidEscape("d\u{301}"))
 
     // MARK: Character properties
 
@@ -2334,6 +2565,10 @@ extension RegexTests {
     diagnosticTest(#"\p{aaa\p{b}}"#, .unknownProperty(key: nil, value: "aaa"))
     diagnosticTest(#"[[:{:]]"#, .unknownProperty(key: nil, value: "{"))
 
+    // We only filter pattern whitespace, which doesn't include things like
+    // non-breaking spaces.
+    diagnosticTest(#"\p{L\#u{A0}l}"#, .unknownProperty(key: nil, value: "L\u{A0}l"))
+
     // MARK: Matching options
 
     diagnosticTest("(?-y{g})", .cannotRemoveTextSegmentOptions)
@@ -2394,6 +2629,12 @@ extension RegexTests {
 
     diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
 
+    diagnosticTest("(?<x>)(?<x>)", .duplicateNamedCapture("x"))
+    diagnosticTest("(?<x>)|(?<x>)", .duplicateNamedCapture("x"))
+    diagnosticTest("((?<x>))(?<x>)", .duplicateNamedCapture("x"))
+    diagnosticTest("(|(?<x>))(?<x>)", .duplicateNamedCapture("x"))
+    diagnosticTest("(?<x>)(?<y>)(?<x>)", .duplicateNamedCapture("x"))
+
     // MARK: Quantifiers
 
     diagnosticTest("*", .quantifierRequiresOperand("*"))
@@ -2402,11 +2643,43 @@ extension RegexTests {
     diagnosticTest("*?", .quantifierRequiresOperand("*?"))
     diagnosticTest("{5}", .quantifierRequiresOperand("{5}"))
     diagnosticTest("{1,3}", .quantifierRequiresOperand("{1,3}"))
+    diagnosticTest("a{3,2}", .invalidQuantifierRange(3, 2))
+
+    // These are not quantifiable.
+    diagnosticTest(#"\b?"#, .notQuantifiable)
+    diagnosticTest(#"\B*"#, .notQuantifiable)
+    diagnosticTest(#"\A+"#, .notQuantifiable)
+    diagnosticTest(#"\Z??"#, .notQuantifiable)
+    diagnosticTest(#"\G*?"#, .notQuantifiable)
+    diagnosticTest(#"\z+?"#, .notQuantifiable)
+    diagnosticTest(#"^*"#, .notQuantifiable)
+    diagnosticTest(#"$?"#, .notQuantifiable)
+    diagnosticTest(#"(?=a)+"#, .notQuantifiable)
+    diagnosticTest(#"(?i)*"#, .notQuantifiable)
+    diagnosticTest(#"\K{1}"#, .unsupported(#"'\K'"#))
+    diagnosticTest(#"\y{2,5}"#, .notQuantifiable)
+    diagnosticTest(#"\Y{3,}"#, .notQuantifiable)
 
     // MARK: Unicode scalars
 
     diagnosticTest(#"\u{G}"#, .expectedNumber("G", kind: .hex))
 
+    diagnosticTest(#"\u{"#, .expectedNumber("", kind: .hex))
+    diagnosticTest(#"\u{ "#, .expectedNumber("", kind: .hex))
+    diagnosticTest(#"\u{}"#, .expectedNumber("", kind: .hex))
+    diagnosticTest(#"\u{ }"#, .expectedNumber("", kind: .hex))
+    diagnosticTest(#"\u{  }"#, .expectedNumber("", kind: .hex))
+    diagnosticTest(#"\u{ G}"#, .expectedNumber("G", kind: .hex))
+    diagnosticTest(#"\u{G }"#, .expectedNumber("G", kind: .hex))
+    diagnosticTest(#"\u{ G }"#, .expectedNumber("G", kind: .hex))
+    diagnosticTest(#"\u{ GH }"#, .expectedNumber("GH", kind: .hex))
+    diagnosticTest(#"\u{ G H }"#, .expectedNumber("G", kind: .hex))
+    diagnosticTest(#"\u{ ABC G }"#, .expectedNumber("G", kind: .hex))
+    diagnosticTest(#"\u{ FFFFFFFFF A }"#, .numberOverflow("FFFFFFFFF"))
+
+    diagnosticTest(#"[\d--\u{a b}]"#, .unsupported("scalar sequence in custom character class"))
+    diagnosticTest(#"[\d--[\u{a b}]]"#, .unsupported("scalar sequence in custom character class"))
+
     // MARK: Matching options
 
     diagnosticTest(#"(?^-"#, .cannotRemoveMatchingOptionsAfterCaret)
@@ -2441,6 +2714,16 @@ extension RegexTests {
 
     diagnosticTest(#"\k<a->"#, .expectedNumber("", kind: .decimal))
     diagnosticTest(#"\k<1+>"#, .expectedNumber("", kind: .decimal))
+    diagnosticTest(#"()\k<1+1>"#, .unsupported("recursion level"))
+    diagnosticTest(#"()\k<1-1>"#, .unsupported("recursion level"))
+
+    diagnosticTest(#"\k<0>"#, .cannotReferToWholePattern)
+    diagnosticTest(#"\1"#, .invalidReference(1))
+    diagnosticTest(#"(?:)\1"#, .invalidReference(1))
+    diagnosticTest(#"()\2"#, .invalidReference(2))
+    diagnosticTest(#"\2()"#, .invalidReference(2))
+    diagnosticTest(#"(?:)()\2"#, .invalidReference(2))
+    diagnosticTest(#"(?:)(?:)\2"#, .invalidReference(2))
 
     // MARK: Conditionals
 
@@ -2479,13 +2762,13 @@ extension RegexTests {
 
     diagnosticTest("(*MARK)", .backtrackingDirectiveMustHaveName("MARK"))
     diagnosticTest("(*:)", .expectedNonEmptyContents)
-    diagnosticTest("(*MARK:a)?", .notQuantifiable)
-    diagnosticTest("(*FAIL)+", .notQuantifiable)
-    diagnosticTest("(*COMMIT:b)*", .notQuantifiable)
-    diagnosticTest("(*PRUNE:a)??", .notQuantifiable)
-    diagnosticTest("(*SKIP:a)*?", .notQuantifiable)
-    diagnosticTest("(*F)+?", .notQuantifiable)
-    diagnosticTest("(*:a){2}", .notQuantifiable)
+    diagnosticTest("(*MARK:a)?", .unsupported("backtracking directive"))
+    diagnosticTest("(*FAIL)+", .unsupported("backtracking directive"))
+    diagnosticTest("(*COMMIT:b)*", .unsupported("backtracking directive"))
+    diagnosticTest("(*PRUNE:a)??", .unsupported("backtracking directive"))
+    diagnosticTest("(*SKIP:a)*?", .unsupported("backtracking directive"))
+    diagnosticTest("(*F)+?", .unsupported("backtracking directive"))
+    diagnosticTest("(*:a){2}", .unsupported("backtracking directive"))
 
     // MARK: Oniguruma absent functions
 
@@ -2543,5 +2826,9 @@ extension RegexTests {
   func testCompilerInterfaceDiagnostics() {
     compilerInterfaceDiagnosticMessageTest(
       "#/[x*/#", "cannot parse regular expression: expected ']'")
+    compilerInterfaceDiagnosticMessageTest(
+      "/a{3,2}/", "cannot parse regular expression: range lower bound '3' must be less than or equal to upper bound '2'")
+    compilerInterfaceDiagnosticMessageTest(
+      #"#/\u{}/#"#, "cannot parse regular expression: expected hexadecimal number")
   }
 }
diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift
new file mode 100644
index 000000000..d13b47b8d
--- /dev/null
+++ b/Tests/RegexTests/UTS18Tests.swift
@@ -0,0 +1,618 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+// This test suite includes tests that verify the behavior of `Regex` as it
+// relates to Unicode Technical Standard #18: Unicode Regular Expressions.
+//
+// Please note: Quotations of UTS18 in this file mostly use 'Character' to mean
+// Unicode code point, and 'String' to mean 'sequence of code points' — they
+// are not the Swift meanings of those terms.
+//
+// See https://unicode.org/reports/tr18/ for more.
+
+import XCTest
+@testable // for internal `matches(of:)`
+import _StringProcessing
+
+extension UnicodeScalar {
+  var value4Digits: String {
+    let valueString = String(value, radix: 16, uppercase: true)
+    if valueString.count >= 4 { return valueString }
+    return String(repeating: "0", count: 4 - valueString.count) + valueString
+  }
+}
+
+class UTS18Tests: XCTestCase {
+  var input: String {
+    "ABCdefghîøu\u{308}\u{FFF0} -–—[]123"
+  // 01234567890       1       234567890
+  // 0                10               20
+  }
+}
+
+fileprivate func regex(_ pattern: String) -> Regex<Substring> {
+  try! Regex(pattern, as: Substring.self)
+}
+
+fileprivate extension String {
+  subscript<R: RangeExpression>(pos bounds: R) -> Substring
+    where R.Bound == Int
+  {
+    let bounds = bounds.relative(to: 0..<count)
+    return dropFirst(bounds.lowerBound).prefix(bounds.count)
+  }
+}
+
+fileprivate func expectFirstMatch<Output: Equatable>(
+  _ input: String,
+  _ r: Regex<Output>,
+  _ output: Output,
+  file: StaticString = #file,
+  line: UInt = #line)
+{
+  XCTAssertEqual(input.firstMatch(of: r)?.output, output, file: file, line: line)
+}
+
+#if os(Linux)
+func XCTExpectFailure(_ message: String? = nil, body: () -> Void) {}
+#endif
+
+// MARK: - Basic Unicode Support: Level 1
+
+// C1. An implementation claiming conformance to Level 1 of this specification
+// shall meet the requirements described in the following sections:
+extension UTS18Tests {
+  // RL1.1 Hex Notation
+  //
+  // To meet this requirement, an implementation shall supply a mechanism for
+  // specifying any Unicode code point (from U+0000 to U+10FFFF), using the
+  // hexadecimal code point representation.
+  func testHexNotation() {
+    expectFirstMatch("ab", regex(#"\u{61}\u{62}"#), "ab")
+    expectFirstMatch("𝄞", regex(#"\u{1D11E}"#), "𝄞")
+  }
+  
+  // 1.1.1 Hex Notation and Normalization
+  //
+  // TODO: Does this section make a recommendation?
+  
+  // RL1.2	Properties
+  // To meet this requirement, an implementation shall provide at least a
+  // minimal list of properties, consisting of the following:
+  // - General_Category
+  // - Script and Script_Extensions
+  // - Alphabetic
+  // - Uppercase
+  // - Lowercase
+  // - White_Space
+  // - Noncharacter_Code_Point
+  // - Default_Ignorable_Code_Point
+  // - ANY, ASCII, ASSIGNED
+  // The values for these properties must follow the Unicode definitions, and
+  // include the property and property value aliases from the UCD. Matching of
+  // Binary, Enumerated, Catalog, and Name values must follow the Matching
+  // Rules from [UAX44] with one exception: implementations are not required
+  // to ignore an initial prefix string of "is" in property values.
+  func testProperties() {
+    // General_Category
+    expectFirstMatch(input, regex(#"\p{Lu}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{lu}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{uppercase letter}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{Uppercase Letter}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{Uppercase_Letter}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{uppercaseletter}+"#), input[pos: ..<3])
+    
+    expectFirstMatch(input, regex(#"\p{P}+"#), "-–—[]")
+    expectFirstMatch(input, regex(#"\p{Pd}+"#), "-–—")
+    
+    expectFirstMatch(input, regex(#"\p{Any}+"#), input[...])
+    expectFirstMatch(input, regex(#"\p{Assigned}+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"\p{ASCII}+"#), input[pos: ..<8])
+    
+    // Script and Script_Extensions
+    //    U+3042  あ  HIRAGANA LETTER A  Hira  {Hira}
+    XCTAssertTrue("\u{3042}".contains(regex(#"\p{Hira}"#)))
+    XCTAssertTrue("\u{3042}".contains(regex(#"\p{sc=Hira}"#)))
+    XCTAssertTrue("\u{3042}".contains(regex(#"\p{scx=Hira}"#)))
+    //    U+30FC  ー  KATAKANA-HIRAGANA PROLONGED SOUND MARK  Zyyy = Common  {Hira, Kana}
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{Hira}"#)))      // Implicit = Script_Extensions
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{Kana}"#)))
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{sc=Zyyy}"#)))   // Explicit = Script
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{scx=Hira}"#)))
+    XCTAssertTrue("\u{30FC}".contains(regex(#"\p{scx=Kana}"#)))
+    XCTAssertFalse("\u{30FC}".contains(regex(#"\p{sc=Hira}"#)))
+    XCTAssertFalse("\u{30FC}".contains(regex(#"\p{sc=Kana}"#)))
+    
+    // Uppercase, etc
+    expectFirstMatch(input, regex(#"\p{Uppercase}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{isUppercase}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{Uppercase=true}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{is Uppercase}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{is uppercase = true}+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"\p{lowercase}+"#), input[pos: 3..<11])
+    expectFirstMatch(input, regex(#"\p{whitespace}+"#), input[pos: 12..<13])
+
+    // Block vs Writing System
+    let greekScalar = "Θ" // U+0398
+    let greekExtendedScalar = "ἀ" // U+1F00
+    XCTAssertTrue(greekScalar.contains(regex(#"\p{Greek}"#)))
+    XCTAssertTrue(greekExtendedScalar.contains(regex(#"\p{Greek}"#)))
+  }
+  
+  func testProperties_XFail() {
+    XCTExpectFailure("Need to support 'age' and 'block' properties") {
+      // XCTAssertFalse("z".contains(#/\p{age=3.1}/#))
+      XCTFail(#"\(#/\p{age=3.1}/#)"#)
+      // XCTAssertTrue("\u{1F00}".contains(#/\p{Block=Greek}/#))
+      XCTFail(#"\(#/\p{Block=Greek}/#)"#)
+    }
+  }
+  
+  // RL1.2a	Compatibility Properties
+  // To meet this requirement, an implementation shall provide the properties
+  // listed in Annex C: Compatibility Properties, with the property values as
+  // listed there. Such an implementation shall document whether it is using
+  // the Standard Recommendation or POSIX-compatible properties.
+  func testCompatibilityProperties() throws {
+    // FIXME: These tests seem insufficient
+    expectFirstMatch(input, regex(#"[[:alpha:]]+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"[[:upper:]]+"#), input[pos: ..<3])
+    expectFirstMatch(input, regex(#"[[:lower:]]+"#), input[pos: 3..<11])
+    expectFirstMatch(input, regex(#"[[:punct:]]+"#), input[pos: 13..<18])
+    expectFirstMatch(input, regex(#"[[:digit:]]+"#), input[pos: 18..<21])
+    expectFirstMatch(input, regex(#"[[:xdigit:]]+"#), input[pos: ..<6])
+    expectFirstMatch(input, regex(#"[[:alnum:]]+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"[[:space:]]+"#), input[pos: 12..<13])
+    // TODO: blank
+    // TODO: cntrl
+    expectFirstMatch(input, regex(#"[[:graph:]]+"#), input[pos: ..<11])
+    expectFirstMatch(input, regex(#"[[:print:]]+"#), input[...])
+    expectFirstMatch(input, regex(#"[[:word:]]+"#), input[pos: ..<11])
+  }
+  
+  //RL1.3 Subtraction and Intersection
+  //
+  // To meet this requirement, an implementation shall supply mechanisms for
+  // union, intersection and set-difference of sets of characters within
+  // regular expression character class expressions.
+  func testSubtractionAndIntersection() throws {
+    // Non-ASCII letters
+    expectFirstMatch(input, regex(#"[\p{Letter}--\p{ASCII}]+"#), input[pos: 8..<11])
+    // Digits that aren't 1 or 2
+    expectFirstMatch(input, regex(#"[\p{digit}--[12]]+"#), input[pos: 20..<21])
+    
+    // ASCII-only letters
+    expectFirstMatch(input, regex(#"[\p{Letter}&&\p{ASCII}]+"#), input[pos: ..<8])
+    // Digits that are 2 or 3
+    expectFirstMatch(input, regex(#"[\p{digit}&&[23]]+"#), input[pos: 19..<21])
+    
+    // Non-ASCII lowercase + non-lowercase ASCII
+    expectFirstMatch(input, regex(#"[\p{lowercase}~~\p{ascii}]+"#), input[pos: ..<3])
+    XCTAssertTrue("123%&^ABC".contains(regex(#"^[\p{lowercase}~~\p{ascii}]+$"#)))
+  }
+  
+  func testSubtractionAndIntersectionPrecedence() {
+    expectFirstMatch("ABC123-", regex(#"[[:alnum:]]*-"#), "ABC123-")
+    expectFirstMatch("ABC123-", regex(#"[[:alnum:]--\p{Uppercase}]*-"#), "123-")
+    // Union binds more closely than difference
+    expectFirstMatch("ABC123-", regex(#"[[:alnum:]--\p{Uppercase}[:digit:]]*-"#), "-")
+    // TODO: Test for intersection precedence
+  }
+  
+  // RL1.4 Simple Word Boundaries
+  // To meet this requirement, an implementation shall extend the word boundary
+  // mechanism so that:
+  // - The class of <word_character> includes all the Alphabetic values from the
+  //   Unicode character database, from UnicodeData.txt, plus the decimals
+  //   (General_Category=Decimal_Number, or equivalently Numeric_Type=Decimal),
+  //   and the U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER
+  //   (Join_Control=True). See also Annex C: Compatibility Properties.
+  // - Nonspacing marks are never divided from their base characters, and
+  //   otherwise ignored in locating boundaries.
+  func testSimpleWordBoundaries() {
+    let simpleWordRegex = regex(#".+?\b"#).wordBoundaryKind(.unicodeLevel1)
+    expectFirstMatch(input, simpleWordRegex, input[pos: ..<11])
+    expectFirstMatch("don't", simpleWordRegex, "don")
+    expectFirstMatch("Cafe\u{301}", simpleWordRegex, "Café")
+  }
+  
+  // RL1.5 Simple Loose Matches
+  //
+  // To meet this requirement, if an implementation provides for case-
+  // insensitive matching, then it shall provide at least the simple, default
+  // Unicode case-insensitive matching, and specify which properties are closed
+  // and which are not.
+  //
+  // To meet this requirement, if an implementation provides for case
+  // conversions, then it shall provide at least the simple, default Unicode
+  // case folding.
+  func testSimpleLooseMatches() {
+    expectFirstMatch("Dåb", regex(#"Dåb"#).ignoresCase(), "Dåb")
+    expectFirstMatch("dÅB", regex(#"Dåb"#).ignoresCase(), "dÅB")
+    expectFirstMatch("D\u{212B}B", regex(#"Dåb"#).ignoresCase(), "D\u{212B}B")
+  }
+
+  func testSimpleLooseMatches_XFail() {
+    XCTExpectFailure("Need case folding support") {
+      let sigmas = "σΣς"
+      expectFirstMatch(sigmas, regex(#"σ+"#).ignoresCase(), sigmas[...])
+      expectFirstMatch(sigmas, regex(#"Σ+"#).ignoresCase(), sigmas[...])
+      expectFirstMatch(sigmas, regex(#"ς+"#).ignoresCase(), sigmas[...])
+      
+      // TODO: Test German sharp S
+      // TODO: Test char classes, e.g. [\p{Block=Phonetic_Extensions} [A-E]]
+    }
+  }
+  
+  // RL1.6 Line Boundaries
+  //
+  // To meet this requirement, if an implementation provides for line-boundary
+  // testing, it shall recognize not only CRLF, LF, CR, but also NEL (U+0085),
+  // PARAGRAPH SEPARATOR (U+2029) and LINE SEPARATOR (U+2028).
+  func testLineBoundaries() {
+    let lineInput = """
+      01
+      02\r\
+      03\n\
+      04\u{a}\
+      05\u{b}\
+      06\u{c}\
+      07\u{d}\
+      08\u{d}\u{a}\
+      09\u{85}\
+      10\u{2028}\
+      11\u{2029}\
+      12
+      """
+    // Check the input counts
+    var lines = lineInput.matches(of: regex(#"\d{2}"#))
+    XCTAssertEqual(lines.count, 12)
+    // Test \R - newline sequence
+    lines = lineInput.matches(of: regex(#"\d{2}\R^"#).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 11)
+    // Test \v - vertical space
+    lines = lineInput.matches(of: regex(#"\d{2}\v^"#).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 11)
+    // Test anchors as line boundaries
+    lines = lineInput.matches(of: regex(#"^\d{2}$"#).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 12)
+    // Test that dot does not match line endings
+    lines = lineInput.matches(of: regex(#".+"#))
+    XCTAssertEqual(lines.count, 12)
+    
+    // Unicode scalar semantics - \R still matches all, including \r\n sequence
+    lines = lineInput.matches(
+      of: regex(#"\d{2}\R(?=\d)"#).matchingSemantics(.unicodeScalar).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 11)
+    // Unicode scalar semantics - \v matches all except for \r\n sequence
+    lines = lineInput.matches(
+      of: regex(#"\d{2}\v(?=\d)"#).matchingSemantics(.unicodeScalar).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 10)
+
+    // Does not contain an empty line
+    XCTAssertFalse(lineInput.contains(regex(#"^$"#)))
+    // Does contain an empty line (between \n and \r, which are reversed here)
+    let empty = "\n\r"
+    XCTAssertTrue(empty.contains(regex(#"^$"#).anchorsMatchLineEndings()))
+  }
+  
+  // RL1.7 Supplementary Code Points
+  //
+  // To meet this requirement, an implementation shall handle the full range of
+  // Unicode code points, including values from U+FFFF to U+10FFFF. In
+  // particular, where UTF-16 is used, a sequence consisting of a leading
+  // surrogate followed by a trailing surrogate shall be handled as a single
+  // code point in matching.
+  func testSupplementaryCodePoints() {
+    XCTAssertTrue("👍".contains(regex(#"\u{1F44D}"#)))
+    XCTAssertTrue("👍".contains(regex(#"[\u{1F440}-\u{1F44F}]"#)))
+    XCTAssertTrue("👍👎".contains(regex(#"^[\u{1F440}-\u{1F44F}]+$"#)))
+  }
+}
+
+// MARK: - Extended Unicode Support: Level 2
+
+// C2.  An implementation claiming conformance to Level 2 of this specification
+// shall satisfy C1, and meet the requirements described in the following
+// sections:
+extension UTS18Tests {
+  // RL2.1 Canonical Equivalents
+  //
+  // Specific recommendation?
+  func testCanonicalEquivalents() {
+    let equivalents = [
+      "\u{006f}\u{031b}\u{0323}",     // o + horn + dot_below
+      "\u{006f}\u{0323}\u{031b}",     // o + dot_below + horn
+      "\u{01a1}\u{0323}",             // o-horn + dot_below
+      "\u{1ecd}\u{031b}",             // o-dot_below + horn
+      "\u{1ee3}",                     // o-horn-dot_below
+    ]
+    
+    let regexes = [
+      regex(#"\u{006f}\u{031b}\u{0323}"#),   // o + horn + dot_below
+      regex(#"\u{006f}\u{0323}\u{031b}"#),   // o + dot_below + horn
+      regex(#"\u{01a1}\u{0323}"#),           // o-horn + dot_below
+      regex(#"\u{1ecd}\u{031b}"#),           // o-dot_below + horn
+      regex(#"\u{1ee3}"#),                   // o-horn-dot_below
+    ]
+
+    // Default: Grapheme cluster semantics
+    for (regexNum, regex) in regexes.enumerated() {
+      for (equivNum, equiv) in equivalents.enumerated() {
+        XCTAssertTrue(
+          equiv.contains(regex),
+          "Grapheme cluster semantics: Regex \(regexNum) didn't match with string \(equivNum)")
+      }
+    }
+    
+    // Unicode scalar semantics
+    for (regexNum, regex) in regexes.enumerated() {
+      for (equivNum, equiv) in equivalents.enumerated() {
+        let regex = regex.matchingSemantics(.unicodeScalar)
+        if regexNum == equivNum {
+          XCTAssertTrue(
+            equiv.contains(regex),
+            "Unicode scalar semantics: Regex \(regexNum) didn't match with string \(equivNum)")
+        } else {
+          XCTAssertFalse(
+            equiv.contains(regex),
+            "Unicode scalar semantics: Regex \(regexNum) incorrectly matched with string \(equivNum)")
+        }
+      }
+    }
+  }
+  
+  // RL2.2 Extended Grapheme Clusters and Character Classes with Strings
+  //
+  // To meet this requirement, an implementation shall provide a mechanism for
+  // matching against an arbitrary extended grapheme cluster, Character Classes
+  // with Strings, and extended grapheme cluster boundaries.
+  func testExtendedGraphemeClusters() {
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef.$"#)))
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef\X$"#)))
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef\X$"#).matchingSemantics(.unicodeScalar)))
+    XCTAssertTrue("abcdef🇬🇭".contains(regex(#"abcdef.+\y"#).matchingSemantics(.unicodeScalar)))
+  }
+  
+  func testCharacterClassesWithStrings() {
+    let regex = regex(#"[a-z🧐🇧🇪🇧🇫🇧🇬]"#)
+    XCTAssertTrue("🧐".contains(regex))
+    XCTAssertTrue("🇧🇫".contains(regex))
+  }
+  
+  // RL2.3 Default Word Boundaries
+  //
+  // To meet this requirement, an implementation shall provide a mechanism for
+  // matching Unicode default word boundaries.
+  func testDefaultWordBoundaries() {
+    XCTExpectFailure { XCTFail("Implement tests") }
+  }
+
+  // RL2.4 Default Case Conversion
+  //
+  // To meet this requirement, if an implementation provides for case
+  // conversions, then it shall provide at least the full, default Unicode case
+  // folding.
+  func testDefaultCaseConversion() {
+    XCTExpectFailure { XCTFail("Implement tests") }
+  }
+  
+  // RL2.5 Name Properties
+  //
+  // To meet this requirement, an implementation shall support individually
+  // named characters.
+  func testNameProperty() throws {
+    // Name property
+    XCTAssertTrue("\u{FEFF}".contains(regex(#"\p{name=ZERO WIDTH NO-BREAK SPACE}"#)))
+    // Name property and Matching Rules
+    XCTAssertTrue("\u{FEFF}".contains(regex(#"\p{name=zerowidthno breakspace}"#)))
+    
+    // Computed name
+    XCTAssertTrue("강".contains(regex(#"\p{name=HANGUL SYLLABLE GANG}"#)))
+    
+    // Graphic symbol
+    XCTAssertTrue("\u{1F514}".contains(regex(#"\p{name=BELL}"#)))
+    
+    // Name match failures
+    XCTAssertFalse("\u{FEFF}".contains(regex(#"\p{name=ZERO WIDTH NO-BRAKE SPACE}"#)))
+    XCTAssertFalse("\u{FEFF}".contains(regex(#"\p{name=ZERO WIDTH NO-BREAK SPACE ZZZZ}"#)))
+    XCTAssertFalse("\u{FEFF}".contains(regex(#"\p{name=ZERO WIDTH NO-BREAK}"#)))
+    XCTAssertFalse("\u{FEFF}".contains(regex(#"\p{name=z}"#)))
+  }
+  
+  func testNameProperty_XFail() throws {
+    XCTExpectFailure("Need more expansive name alias matching") {
+      // Name_Alias property
+      XCTAssertTrue("\u{FEFF}".contains(regex(#"\p{name=BYTE ORDER MARK}"#)))
+      // Name_Alias property (again)
+      XCTAssertTrue("\u{FEFF}".contains(regex(#"\p{name=BOM}"#)))
+      
+      // Control character
+      XCTAssertTrue("\u{7}".contains(regex(#"\p{name=BEL}"#)))
+    }
+  }
+  
+  func testIndividuallyNamedCharacters() {
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{WHITE SMILING FACE}"#)))
+    XCTAssertTrue("\u{3B1}".contains(regex(#"\N{GREEK SMALL LETTER ALPHA}"#)))
+    XCTAssertTrue("\u{10450}".contains(regex(#"\N{SHAVIAN LETTER PEEP}"#)))
+    
+    XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{ZERO WIDTH NO-BREAK SPACE}"#)))
+    XCTAssertTrue("강".contains(regex(#"\N{HANGUL SYLLABLE GANG}"#)))
+    XCTAssertTrue("\u{1F514}".contains(regex(#"\N{BELL}"#)))
+    XCTAssertTrue("🐯".contains(regex(#"\N{TIGER FACE}"#)))
+    XCTAssertFalse("🐯".contains(regex(#"\N{TIEGR FACE}"#)))
+
+    // Loose matching
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{whitesmilingface}"#)))
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{wHiTe_sMiLiNg_fAcE}"#)))
+    XCTAssertTrue("\u{263A}".contains(regex(#"\N{White Smiling-Face}"#)))
+    XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{zerowidthno breakspace}"#)))
+
+    // Matching semantic level
+    XCTAssertFalse("👩‍👩‍👧‍👦".contains(regex(#".\N{ZERO WIDTH JOINER}"#)))
+    XCTAssertTrue("👩‍👩‍👧‍👦".contains(regex(#"(?u).\N{ZERO WIDTH JOINER}"#)))
+  }
+
+  func testIndividuallyNamedCharacters_XFail() {
+    XCTExpectFailure("Need to support named chars in custom character classes") {
+      XCTFail(#"[\N{GREEK SMALL LETTER ALPHA}-\N{GREEK SMALL LETTER BETA}]+"#)
+      // XCTAssertTrue("^\u{3B1}\u{3B2}$".contains(#/[\N{GREEK SMALL LETTER ALPHA}-\N{GREEK SMALL LETTER BETA}]+/#))
+    }
+    
+    XCTExpectFailure("Other named char failures -- investigate") {
+      XCTAssertTrue("\u{C}".contains(regex(#"\N{FORM FEED}"#)))
+      XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{BYTE ORDER MARK}"#)))
+      XCTAssertTrue("\u{FEFF}".contains(regex(#"\N{BOM}"#)))
+      XCTAssertTrue("\u{7}".contains(regex(#"\N{BEL}"#)))
+    }
+    
+    XCTExpectFailure("Need to recognize invalid names at compile time") {
+      XCTFail("This should be a compilation error, not a match failure:")
+      XCTAssertFalse("abc".contains(regex(#"\N{NOT AN ACTUAL CHARACTER NAME}"#)))
+    }
+  }
+
+  // RL2.6 Wildcards in Property Values
+  //
+  // To meet this requirement, an implementation shall support wildcards in
+  // Unicode property values.
+  func testWildcardsInPropertyValues() {
+    XCTExpectFailure { XCTFail("Implement tests") }
+  }
+  
+  // RL2.7 Full Properties
+  //
+  // To meet this requirement, an implementation shall support all of the
+  // properties listed below that are in the supported version of the Unicode
+  // Standard (or Unicode Technical Standard, respectively), with values that
+  // match the Unicode definitions for that version.
+  func testFullProperties() {
+    // MARK: General
+    // Name (Name_Alias)
+    // Block
+    // Age
+    // General_Category
+    // Script (Script_Extensions)
+    // White_Space
+    // Alphabetic
+    // Hangul_Syllable_Type
+    // Noncharacter_Code_Point
+    // Default_Ignorable_Code_Point
+    // Deprecated
+    // Logical_Order_Exception
+    // Variation_Selector
+
+    // MARK: Numeric
+    // Numeric_Value
+    // Numeric_Type
+    // Hex_Digit
+    // ASCII_Hex_Digit
+
+    // MARK: Identifiers
+    // ID_Continue
+    // ID_Start
+    // XID_Continue
+    // XID_Start
+    // Pattern_Syntax
+    // Pattern_White_Space
+    // Identifier_Status
+    // Identifier_Type
+
+    // MARK: CJK
+    // Ideographic
+    // Unified_Ideograph
+    // Radical
+    // IDS_Binary_Operator
+    // IDS_Trinary_Operator
+    // Equivalent_Unified_Ideograph
+    XCTExpectFailure {
+      XCTFail(#"Unsupported: \(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#)"#)
+      // XCTAssertTrue("⼚⺁厂".contains(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#))
+    }
+
+    // MARK: Case
+    // Uppercase
+    // Lowercase
+    // Simple_Lowercase_Mapping
+    // Simple_Titlecase_Mapping
+    // Simple_Uppercase_Mapping
+    // Simple_Case_Folding
+    // Soft_Dotted
+    // Cased
+    // Case_Ignorable
+    // Changes_When_Lowercased
+    // Changes_When_Uppercased
+    XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Uppercased}"#)))
+    XCTAssertTrue("a".contains(regex(#"\p{Changes_When_Uppercased=true}"#)))
+    XCTAssertFalse("A".contains(regex(#"\p{Changes_When_Uppercased}"#)))
+    // Changes_When_Titlecased
+    // Changes_When_Casefolded
+    // Changes_When_Casemapped
+
+    // MARK: Normalization
+    // Canonical_Combining_Class
+    // Decomposition_Type
+    // NFC_Quick_Check
+    // NFKC_Quick_Check
+    // NFD_Quick_Check
+    // NFKD_Quick_Check
+    // NFKC_Casefold
+    // Changes_When_NFKC_Casefolded
+
+    // MARK: Emoji
+    // Emoji
+    // Emoji_Presentation
+    // Emoji_Modifier
+    // Emoji_Modifier_Base
+    // Emoji_Component
+    // Extended_Pictographic
+    // Basic_Emoji*
+    // Emoji_Keycap_Sequence*
+    // RGI_Emoji_Modifier_Sequence*
+    // RGI_Emoji_Flag_Sequence*
+    // RGI_Emoji_Tag_Sequence*
+    // RGI_Emoji_ZWJ_Sequence*
+    // RGI_Emoji*
+
+    // MARK: Shaping and Rendering
+    // Join_Control
+    // Joining_Group
+    // Joining_Type
+    // Vertical_Orientation
+    // Line_Break
+    // Grapheme_Cluster_Break
+    // Sentence_Break
+    // Word_Break
+    // East_Asian_Width
+    // Prepended_Concatenation_Mark
+
+    // MARK: Bidirectional
+    // Bidi_Class
+    // Bidi_Control
+    // Bidi_Mirrored
+    // Bidi_Mirroring_Glyph
+    // Bidi_Paired_Bracket
+    // Bidi_Paired_Bracket_Type
+
+    // MARK: Miscellaneous
+    // Math
+    // Quotation_Mark
+    // Dash
+    // Sentence_Terminal
+    // Terminal_Punctuation
+    // Diacritic
+    // Extender
+    // Grapheme_Base
+    // Grapheme_Extend
+    // Regional_Indicator
+  }
+}