Skip to content

Commit 8ca8947

Browse files
Update IdentifierUtils (#394)
* initial commit * incrementing version * adjusting name to be explicit * Update src/powerquery-parser/parser/parser/parserUtils.ts Co-authored-by: Copilot <[email protected]> * ran 'npm audit fix' * fixing lint issue from copilot * removing ':' * about to work on IdentifierUtilsOptions * perhaps complete * some touch ups * adding 'allowRecursive' to getAllowedIdentifiers * couple fixes * simplifying logic and adding tests * renamed insertQuotes * tweaking TQuotedAndUnquoted type --------- Co-authored-by: Copilot <[email protected]>
1 parent 98cbf98 commit 8ca8947

File tree

9 files changed

+698
-90
lines changed

9 files changed

+698
-90
lines changed

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@microsoft/powerquery-parser",
3-
"version": "0.17.0",
3+
"version": "0.18.0",
44
"description": "A parser for the Power Query/M formula language.",
55
"author": "Microsoft",
66
"license": "MIT",

src/powerquery-parser/language/identifierUtils.ts

Lines changed: 255 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,114 @@ import { Assert, Pattern, StringUtils } from "../common";
55

66
export enum IdentifierKind {
77
Generalized = "Generalized",
8+
GeneralizedWithQuotes = "GeneralizedWithQuotes",
89
Invalid = "Invalid",
9-
Quote = "Quote",
10-
QuoteRequired = "QuoteRequired",
1110
Regular = "Regular",
11+
RegularWithQuotes = "RegularWithQuotes",
12+
RegularWithRequiredQuotes = "RegularWithRequiredQuotes",
1213
}
1314

14-
// Assuming the text is a quoted identifier, finds the quotes that enclose the identifier.
15-
// Otherwise returns undefined.
16-
export function findQuotedIdentifierQuotes(text: string, index: number): StringUtils.FoundQuotes | undefined {
17-
if (text[index] !== "#") {
18-
return undefined;
15+
export interface CommonIdentifierUtilsOptions {
16+
readonly allowGeneralizedIdentifier?: boolean;
17+
readonly allowTrailingPeriod?: boolean;
18+
}
19+
20+
export interface GetAllowedIdentifiersOptions extends CommonIdentifierUtilsOptions {
21+
readonly allowRecursive?: boolean;
22+
}
23+
24+
// Identifiers have multiple forms that can be used interchangeably.
25+
// For example, if you have `[key = 1]`, you can use `key` or `#""key""`.
26+
// The `getAllowedIdentifiers` function returns all the forms of the identifier that are allowed in the current context.
27+
export function getAllowedIdentifiers(text: string, options?: GetAllowedIdentifiersOptions): ReadonlyArray<string> {
28+
const allowGeneralizedIdentifier: boolean =
29+
options?.allowGeneralizedIdentifier ?? DefaultAllowGeneralizedIdentifier;
30+
31+
const quotedAndUnquoted: TQuotedAndUnquoted | undefined = getQuotedAndUnquoted(text, options);
32+
33+
if (quotedAndUnquoted === undefined) {
34+
return [];
1935
}
2036

21-
return StringUtils.findQuotes(text, index + 1);
37+
let result: string[];
38+
39+
switch (quotedAndUnquoted.identifierKind) {
40+
case IdentifierKind.Generalized:
41+
case IdentifierKind.GeneralizedWithQuotes:
42+
result = allowGeneralizedIdentifier ? [quotedAndUnquoted.withQuotes, quotedAndUnquoted.withoutQuotes] : [];
43+
break;
44+
45+
case IdentifierKind.Invalid:
46+
result = [];
47+
break;
48+
49+
case IdentifierKind.RegularWithQuotes:
50+
result = [quotedAndUnquoted.withQuotes, quotedAndUnquoted.withoutQuotes];
51+
break;
52+
53+
case IdentifierKind.RegularWithRequiredQuotes:
54+
result = [quotedAndUnquoted.withQuotes];
55+
break;
56+
57+
case IdentifierKind.Regular:
58+
result = [quotedAndUnquoted.withoutQuotes, quotedAndUnquoted.withQuotes];
59+
break;
60+
61+
default:
62+
throw Assert.isNever(quotedAndUnquoted);
63+
}
64+
65+
if (options?.allowRecursive) {
66+
result = result.concat(result.map((value: string) => prefixInclusiveConstant(value)));
67+
}
68+
69+
return result;
2270
}
2371

24-
// Determines what kind of identifier the text is.
25-
// It's possible that the text is a partially completed identifier,
26-
// which is why we have the `allowTrailingPeriod` parameter.
27-
export function getIdentifierKind(text: string, allowTrailingPeriod: boolean): IdentifierKind {
28-
if (isRegularIdentifier(text, allowTrailingPeriod)) {
72+
// An identifier can have multiple forms:
73+
// - Regular: `foo`
74+
// - Regular with quotes: `#""foo""`
75+
// - Regular with required quotes: `#""foo bar""`
76+
// - Regular with required quotes is used when the identifier has spaces or special characters,
77+
// and when generalized identifiers are not allowed.
78+
// - Generalized: `foo bar`
79+
// - Generalized with quotes: `#""foo bar""`
80+
// - Invalid: `foo..bar`
81+
export function getIdentifierKind(text: string, options?: CommonIdentifierUtilsOptions): IdentifierKind {
82+
const allowGeneralizedIdentifier: boolean =
83+
options?.allowGeneralizedIdentifier ?? DefaultAllowGeneralizedIdentifier;
84+
85+
if (isRegularIdentifier(text, options)) {
2986
return IdentifierKind.Regular;
30-
} else if (isQuotedIdentifier(text)) {
31-
return isRegularIdentifier(text.slice(2, -1), false) ? IdentifierKind.Quote : IdentifierKind.QuoteRequired;
32-
} else if (isGeneralizedIdentifier(text)) {
87+
} else if (allowGeneralizedIdentifier && isGeneralizedIdentifier(text)) {
3388
return IdentifierKind.Generalized;
89+
}
90+
// If the identifier is quoted it's either:
91+
// - a regular identifier with quotes,
92+
// - a generalized identifier with quotes,
93+
else if (isQuotedIdentifier(text)) {
94+
const stripped: string = stripQuotes(text);
95+
96+
if (isRegularIdentifier(stripped, options)) {
97+
return IdentifierKind.RegularWithQuotes;
98+
} else if (isGeneralizedIdentifier(stripped) && allowGeneralizedIdentifier) {
99+
return IdentifierKind.GeneralizedWithQuotes;
100+
} else {
101+
return IdentifierKind.RegularWithRequiredQuotes;
102+
}
34103
} else {
35104
return IdentifierKind.Invalid;
36105
}
37106
}
38107

39-
// Assuming the text is an identifier, returns the length of the identifier.
40-
export function getIdentifierLength(text: string, index: number, allowTrailingPeriod: boolean): number | undefined {
108+
// I'd prefer if this was internal, but it's used by the lexer so it's marked as public.
109+
// Returns the length of the identifier starting at the given index.
110+
export function getIdentifierLength(
111+
text: string,
112+
index: number,
113+
options?: CommonIdentifierUtilsOptions,
114+
): number | undefined {
115+
const allowTrailingPeriod: boolean = options?.allowTrailingPeriod ?? DefaultAllowTrailingPeriod;
41116
const startingIndex: number = index;
42117
const textLength: number = text.length;
43118

@@ -62,26 +137,37 @@ export function getIdentifierLength(text: string, index: number, allowTrailingPe
62137

63138
break;
64139

65-
case IdentifierRegexpState.RegularIdentifier:
66-
// Don't consider `..` or `...` part of an identifier.
67-
if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") {
68-
index += 1;
69-
}
140+
// We should allow a single period as part of the identifier,
141+
// but only if it's not the last character and not followed by another period.
142+
// Allow an exception for when it's the last character and allowTrailingPeriod is true.
143+
case IdentifierRegexpState.RegularIdentifier: {
144+
const currentChr: string | undefined = text[index];
70145

71-
matchLength = StringUtils.regexMatchLength(Pattern.IdentifierPartCharacters, text, index);
72-
73-
if (matchLength === undefined) {
146+
if (currentChr === undefined) {
74147
state = IdentifierRegexpState.Done;
75-
} else {
76-
index += matchLength;
148+
} else if (currentChr === ".") {
149+
const nextChr: string | undefined = text[index + 1];
77150

78-
// Don't consider `..` or `...` part of an identifier.
79-
if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") {
151+
// If we have a single period we might include it as part of the identifier when:
152+
// 1. It's not the last character and not followed by another period
153+
// 2. It's the last character and allowTrailingPeriod is true
154+
if ((nextChr && nextChr !== ".") || (nextChr === undefined && allowTrailingPeriod)) {
80155
index += 1;
156+
} else {
157+
state = IdentifierRegexpState.Done;
158+
}
159+
} else {
160+
matchLength = StringUtils.regexMatchLength(Pattern.IdentifierPartCharacters, text, index);
161+
162+
if (matchLength === undefined) {
163+
state = IdentifierRegexpState.Done;
164+
} else {
165+
index += matchLength;
81166
}
82167
}
83168

84169
break;
170+
}
85171

86172
default:
87173
throw Assert.isNever(state);
@@ -91,8 +177,81 @@ export function getIdentifierLength(text: string, index: number, allowTrailingPe
91177
return index !== startingIndex ? index - startingIndex : undefined;
92178
}
93179

180+
// Removes the quotes from a quoted identifier if possible.
181+
// When given an invalid identifier, returns undefined.
182+
export function getNormalizedIdentifier(text: string, options?: CommonIdentifierUtilsOptions): string | undefined {
183+
const allowGeneralizedIdentifier: boolean =
184+
options?.allowGeneralizedIdentifier ?? DefaultAllowGeneralizedIdentifier;
185+
186+
const quotedAndUnquoted: TQuotedAndUnquoted = getQuotedAndUnquoted(text, options);
187+
188+
switch (quotedAndUnquoted.identifierKind) {
189+
case IdentifierKind.Regular:
190+
case IdentifierKind.RegularWithQuotes:
191+
return quotedAndUnquoted.withoutQuotes;
192+
193+
case IdentifierKind.GeneralizedWithQuotes:
194+
case IdentifierKind.Generalized:
195+
return allowGeneralizedIdentifier ? quotedAndUnquoted.withoutQuotes : undefined;
196+
197+
case IdentifierKind.Invalid:
198+
return undefined;
199+
200+
case IdentifierKind.RegularWithRequiredQuotes:
201+
return quotedAndUnquoted.withQuotes;
202+
203+
default:
204+
throw Assert.isNever(quotedAndUnquoted);
205+
}
206+
}
207+
208+
type TQuotedAndUnquoted =
209+
| {
210+
readonly identifierKind: IdentifierKind.Generalized;
211+
readonly withQuotes: string;
212+
readonly withoutQuotes: string;
213+
}
214+
| {
215+
readonly identifierKind: IdentifierKind.GeneralizedWithQuotes;
216+
readonly withQuotes: string;
217+
readonly withoutQuotes: string;
218+
}
219+
| {
220+
readonly identifierKind: IdentifierKind.Invalid;
221+
}
222+
| {
223+
readonly identifierKind: IdentifierKind.Regular;
224+
readonly withQuotes: string;
225+
readonly withoutQuotes: string;
226+
}
227+
| {
228+
readonly identifierKind: IdentifierKind.RegularWithQuotes;
229+
readonly withQuotes: string;
230+
readonly withoutQuotes: string;
231+
}
232+
| {
233+
readonly identifierKind: IdentifierKind.RegularWithRequiredQuotes;
234+
readonly withQuotes: string;
235+
};
236+
237+
const enum IdentifierRegexpState {
238+
Done = "Done",
239+
RegularIdentifier = "RegularIdentifier",
240+
Start = "Start",
241+
}
242+
243+
// Finds the locations of quotes in a quoted identifier.
244+
// Returns undefined if the identifier is not quoted.
245+
function findQuotedIdentifierQuotes(text: string, index: number): StringUtils.FoundQuotes | undefined {
246+
if (text[index] !== "#") {
247+
return undefined;
248+
}
249+
250+
return StringUtils.findQuotes(text, index + 1);
251+
}
252+
94253
// Assuming the text is a generalized identifier, returns the length of the identifier.
95-
export function getGeneralizedIdentifierLength(text: string, index: number): number | undefined {
254+
function getGeneralizedIdentifierLength(text: string, index: number): number | undefined {
96255
const startingIndex: number = index;
97256
const textLength: number = text.length;
98257

@@ -133,31 +292,78 @@ export function getGeneralizedIdentifierLength(text: string, index: number): num
133292
return index !== startingIndex ? index - startingIndex : undefined;
134293
}
135294

136-
export function isGeneralizedIdentifier(text: string): boolean {
137-
return getGeneralizedIdentifierLength(text, 0) === text.length;
295+
// Returns the quoted and unquoted versions of the identifier (if applicable).
296+
function getQuotedAndUnquoted(text: string, options?: CommonIdentifierUtilsOptions): TQuotedAndUnquoted {
297+
const identifierKind: IdentifierKind = getIdentifierKind(text, options);
298+
299+
switch (identifierKind) {
300+
case IdentifierKind.Generalized:
301+
return {
302+
identifierKind,
303+
withoutQuotes: text,
304+
withQuotes: makeQuoted(text),
305+
};
306+
307+
case IdentifierKind.GeneralizedWithQuotes:
308+
return {
309+
identifierKind,
310+
withoutQuotes: stripQuotes(text),
311+
withQuotes: text,
312+
};
313+
314+
case IdentifierKind.Invalid:
315+
return {
316+
identifierKind,
317+
};
318+
319+
case IdentifierKind.RegularWithQuotes:
320+
return {
321+
identifierKind,
322+
withoutQuotes: stripQuotes(text),
323+
withQuotes: text,
324+
};
325+
326+
case IdentifierKind.RegularWithRequiredQuotes:
327+
return {
328+
identifierKind,
329+
withQuotes: text,
330+
};
331+
332+
case IdentifierKind.Regular:
333+
return {
334+
identifierKind,
335+
withoutQuotes: text,
336+
withQuotes: makeQuoted(text),
337+
};
338+
339+
default:
340+
throw Assert.isNever(identifierKind);
341+
}
138342
}
139343

140-
export function isRegularIdentifier(text: string, allowTrailingPeriod: boolean): boolean {
141-
return getIdentifierLength(text, 0, allowTrailingPeriod) === text.length;
344+
function makeQuoted(text: string): string {
345+
return `#"${text}"`;
142346
}
143347

144-
export function isQuotedIdentifier(text: string): boolean {
145-
return findQuotedIdentifierQuotes(text, 0) !== undefined;
348+
function prefixInclusiveConstant(text: string): string {
349+
return `@${text}`;
146350
}
147351

148-
// Removes the quotes from a quoted identifier if possible.
149-
export function normalizeIdentifier(text: string): string {
150-
if (isQuotedIdentifier(text)) {
151-
const stripped: string = text.slice(2, -1);
352+
function isGeneralizedIdentifier(text: string): boolean {
353+
return text.length > 0 && getGeneralizedIdentifierLength(text, 0) === text.length;
354+
}
152355

153-
return isRegularIdentifier(stripped, false) ? stripped : text;
154-
} else {
155-
return text;
156-
}
356+
function isRegularIdentifier(text: string, options?: CommonIdentifierUtilsOptions): boolean {
357+
return text.length > 0 && getIdentifierLength(text, 0, options) === text.length;
157358
}
158359

159-
const enum IdentifierRegexpState {
160-
Done = "Done",
161-
RegularIdentifier = "RegularIdentifier",
162-
Start = "Start",
360+
function isQuotedIdentifier(text: string): boolean {
361+
return findQuotedIdentifierQuotes(text, 0) !== undefined;
362+
}
363+
364+
function stripQuotes(text: string): string {
365+
return text.slice(2, -1);
163366
}
367+
368+
const DefaultAllowTrailingPeriod: boolean = false;
369+
const DefaultAllowGeneralizedIdentifier: boolean = false;

src/powerquery-parser/language/type/typeUtils/isEqualType.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@ export function isEqualType(left: Type.TPowerQueryType, right: Type.TPowerQueryT
2222
}
2323

2424
export function isEqualFunctionParameter(left: Type.FunctionParameter, right: Type.FunctionParameter): boolean {
25-
return left.isNullable !== right.isNullable || left.isOptional !== right.isOptional || left.type !== right.type;
25+
return (
26+
left.nameLiteral === right.nameLiteral &&
27+
left.isNullable === right.isNullable &&
28+
left.isOptional === right.isOptional &&
29+
left.type === right.type
30+
);
2631
}
2732

2833
export function isEqualFunctionSignature(

0 commit comments

Comments
 (0)