Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressi
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions", "src\System.Text.RegularExpressions.csproj", "{0409C086-D7CC-43F8-9762-C94FB1E47F5B}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generators.Tests", "tests\System.Text.RegularExpressions.Generators.Tests\System.Text.RegularExpressions.Generators.Tests.csproj", "{32ABFCDA-10FD-4A98-A429-145C28021EBE}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Tests", "tests\FunctionalTests\System.Text.RegularExpressions.Tests.csproj", "{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Tests", "tests\System.Text.RegularExpressions.Tests.csproj", "{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.UnitTests", "tests\UnitTests\System.Text.RegularExpressions.UnitTests.csproj", "{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{2ACCCAAB-F0CE-4839-82BD-F174861DEA78}"
EndProject
Expand Down Expand Up @@ -53,22 +53,22 @@ Global
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Release|Any CPU.Build.0 = Release|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.Build.0 = Release|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.Build.0 = Release|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{63551298-BFD4-43FC-8465-AC454228B83C} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{32ABFCDA-10FD-4A98-A429-145C28021EBE} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{08F0E5F4-BBD5-45CC-BB12-BA37A83AD7B6} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
{77CDA838-6489-4816-8847-DE2C7F5E1DCE} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
{3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,25 +127,17 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
// The set contains one and only one character, meaning every match starts
// with the same literal value (potentially case-insensitive). Search for that.
FixedDistanceLiteral = (chars[0], 0);
FindMode = (_rightToLeft, set.CaseInsensitive) switch
{
(false, false) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive,
(false, true) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive,
(true, false) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive,
(true, true) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive,
};
FindMode = set.CaseInsensitive ?
FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive :
FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive;
}
else
{
// The set may match multiple characters. Search for that.
FixedDistanceSets = new() { (chars, set.CharClass, 0, set.CaseInsensitive) };
FindMode = (_rightToLeft, set.CaseInsensitive) switch
{
(false, false) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive,
(false, true) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive,
(true, false) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive,
(true, true) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive,
};
FindMode = set.CaseInsensitive ?
FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive :
FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive;
_asciiLookups = new uint[1][];
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ internal static class RegexTreeAnalyzer
public static AnalysisResults Analyze(RegexCode code)
{
var results = new AnalysisResults(code);
results._complete = TryAnalyze(code.Tree.Root, results, isAtomicBySelfOrParent: true);
results._complete = TryAnalyze(code.Tree.Root, results, isAtomicByAncestor: true);
return results;

static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicBySelfOrParent)
static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor)
{
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
return false;
}

if (isAtomicBySelfOrParent)
if (isAtomicByAncestor)
{
// We've been told by our parent that we should be considered atomic, so add ourselves
// to the atomic collection.
Expand All @@ -45,14 +45,15 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
}

// Update state for certain node types.
bool isAtomicBySelf = false;
switch (node.Kind)
{
// Some node types add atomicity around what they wrap. Set isAtomicBySelfOrParent to true for such nodes
// even if it was false upon entering the method.
case RegexNodeKind.Atomic:
case RegexNodeKind.NegativeLookaround:
case RegexNodeKind.PositiveLookaround:
isAtomicBySelfOrParent = true;
isAtomicBySelf = true;
break;

// Track any nodes that are themselves captures.
Expand All @@ -70,7 +71,7 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
// Determine whether the child should be treated as atomic (whether anything
// can backtrack into it), which is influenced by whether this node (the child's
// parent) is considered atomic by itself or by its parent.
bool treatChildAsAtomic = isAtomicBySelfOrParent && node.Kind switch
bool treatChildAsAtomic = (isAtomicByAncestor | isAtomicBySelf) && node.Kind switch
{
// If the parent is atomic, so is the child. That's the whole purpose
// of the Atomic node, and lookarounds are also implicitly atomic.
Expand Down Expand Up @@ -104,14 +105,16 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
}

// If the child contains captures, so too does this parent.
if (results.MayContainCapture(child))
if (results._containsCapture.Contains(child))
{
results._containsCapture.Add(node);
}

// If the child might require backtracking into it, so too might the parent,
// unless the parent is itself considered atomic.
if (!isAtomicBySelfOrParent && results.MayBacktrack(child))
// unless the parent is itself considered atomic. Here we don't consider parental
// atomicity, as we need to surface upwards to the parent whether any backtracking
// will be visible from this node to it.
if (!isAtomicBySelf && (results._mayBacktrack?.Contains(child) == true))
{
(results._mayBacktrack ??= new()).Add(node);
}
Expand Down Expand Up @@ -149,7 +152,7 @@ internal sealed class AnalysisResults
/// <summary>Gets the code that was analyzed.</summary>
public RegexCode Code { get; }

/// <summary>Gets whether a node is considered atomic based on itself or its ancestry.</summary>
/// <summary>Gets whether a node is considered atomic based on its ancestry.</summary>
public bool IsAtomicByAncestor(RegexNode node) => _isAtomicByAncestor.Contains(node);

/// <summary>Gets whether a node directly or indirectly contains captures.</summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ namespace System.Text.RegularExpressions.Tests
public static class RegexGeneratorHelper
{
private static readonly CSharpParseOptions s_previewParseOptions = CSharpParseOptions.Default.WithLanguageVersion(LanguageVersion.Preview);
private static readonly MetadataReference[] s_refs = CreateReferences();
private static readonly EmitOptions s_emitOptions = new EmitOptions(debugInformationFormat: DebugInformationFormat.Embedded);
private static readonly CSharpGeneratorDriver s_generatorDriver = CSharpGeneratorDriver.Create(new[] { new RegexGenerator().AsSourceGenerator() }, parseOptions: s_previewParseOptions);
private static Compilation? s_compilation;

internal static MetadataReference[] References { get; } = CreateReferences();

private static MetadataReference[] CreateReferences()
{
if (PlatformDetection.IsBrowser)
Expand All @@ -49,6 +50,61 @@ private static MetadataReference[] CreateReferences()
};
}

internal static byte[] CreateAssemblyImage(string source, string assemblyName)
{
CSharpCompilation compilation = CSharpCompilation.Create(
assemblyName,
new[] { CSharpSyntaxTree.ParseText(source, CSharpParseOptions.Default.WithLanguageVersion(LanguageVersion.Preview)) },
References,
new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary));

var ms = new MemoryStream();
if (compilation.Emit(ms).Success)
{
return ms.ToArray();
}

throw new InvalidOperationException();
}

internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
{
var proj = new AdhocWorkspace()
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
.AddProject("RegexGeneratorTest", "RegexGeneratorTest.dll", "C#")
.WithMetadataReferences(additionalRefs is not null ? References.Concat(additionalRefs) : References)
.WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary, allowUnsafe: allowUnsafe)
.WithNullableContextOptions(NullableContextOptions.Enable))
.WithParseOptions(new CSharpParseOptions(langVersion))
.AddDocument("RegexGenerator.g.cs", SourceText.From(code, Encoding.UTF8)).Project;

Assert.True(proj.Solution.Workspace.TryApplyChanges(proj.Solution));

Compilation? comp = await proj!.GetCompilationAsync(CancellationToken.None).ConfigureAwait(false);
Debug.Assert(comp is not null);

var generator = new RegexGenerator();
CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion));
GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken);
GeneratorDriverRunResult generatorResults = gd.GetRunResult();
if (!compile)
{
return generatorResults.Diagnostics;
}

comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray());
EmitResult results = comp.Emit(Stream.Null, cancellationToken: cancellationToken);
if (!results.Success || results.Diagnostics.Length != 0 || generatorResults.Diagnostics.Length != 0)
{
throw new ArgumentException(
string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine +
string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => t.ToString())));
}

return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray();
}

internal static async Task<Regex> SourceGenRegexAsync(
string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default)
{
Expand Down Expand Up @@ -109,7 +165,7 @@ internal static async Task<Regex[]> SourceGenRegexAsync(
var proj = new AdhocWorkspace()
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
.AddProject("Test", "test.dll", "C#")
.WithMetadataReferences(s_refs)
.WithMetadataReferences(References)
.WithCompilationOptions(
new CSharpCompilationOptions(
OutputKind.DynamicallyLinkedLibrary,
Expand Down
Loading