Skip to content

Commit 416542e

Browse files
authored
Merge pull request #41 from CVEProject/hk/006a_exact_phrase_search
work-in-progress for #6 (exact phrase search)
2 parents 92aab82 + 49dc344 commit 416542e

File tree

3 files changed

+1126
-17
lines changed

3 files changed

+1126
-17
lines changed

src/search/SearchRequest.ts

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,23 @@ export class SearchRequest {
8787
*/
8888
static tokenizeSearchText(searchText: string): string[] {
8989
// based on code generated by gemini
90-
const regex = /"([^"]*)"|\S+/g; // tokenizes all words between double quotes as well as every word outside of quotes
91-
let tokens = [];
92-
let match;
93-
94-
while ((match = regex.exec(searchText)) !== null) {
95-
if (match[1]) {
96-
tokens.push(match[1]); // Add the content within quotes
97-
} else {
98-
tokens.push(match[0]); // Add the non-quoted token
90+
let matches = SearchRequest.extractQuotedStrings(searchText);
91+
// filter out ',' tokens
92+
matches = matches.flatMap(item => item === ',' ? [] : item);
93+
94+
// Process each match to unescape characters in quoted strings.
95+
return matches.map(match => {
96+
// Check if the match is a quoted string.
97+
if (match.startsWith('"') && match.endsWith('"')) {
98+
// Remove leading/trailing quotes and then unescape the characters.
99+
let unescaped = match.slice(1, -1);
100+
unescaped = unescaped.replace(/\\"/g, '"');
101+
unescaped = unescaped.replace(/\\\\/g, '\\');
102+
return unescaped;
99103
}
100-
}
101-
return tokens;
104+
// For unquoted words, return as-is.
105+
return match;
106+
});
102107
}
103108

104109

@@ -403,6 +408,25 @@ export class SearchRequest {
403408
}
404409

405410

411+
/** returns true iff searchText is a quoted string
412+
*/
413+
static isQuotedString = (searchText: string): boolean => {
414+
return new RegExp(/"([^"]*)"/g).test(searchText);
415+
// return new RegExp(/"(?:[^"\\]|\\.)+"|[^\s,]+/g).test(searchText);
416+
};
417+
418+
419+
static extractQuotedStrings = (searchText: string): string[] => {
420+
// based on code generated by gemini
421+
// Regex to find tokens: quoted strings with escaped quotes, or unquoted words.
422+
const regex = /"[^"]*"|[\S]+/g
423+
// const regex = /"([^"]*)"/g;
424+
const matches = searchText.match(regex);
425+
return matches ?? [];
426+
}
427+
428+
429+
406430
/**
407431
* determine the SearchReuestTypeId based on searchText
408432
* @param searchText the search text
@@ -414,7 +438,12 @@ export class SearchRequest {
414438
if (searchText.includes('{')) {
415439
return 'SEARCH_STRING_CANNOT_CONTAIN_RESERVED_CHARACTERS'
416440
}
417-
// disallow wildcards
441+
// else if (searchText[0] == '"' && searchText[searchText.length - 1] == '"') {
442+
// else if (new RegExp(/"([^"]*)"/g).test(searchText)) {
443+
// else if (new RegExp(/"(?:[^"\\]|\\.)+"|[^\s,]+/g).test(searchText)) {
444+
else if (SearchRequest.isQuotedString(searchText)) {
445+
return 'SEARCH_PHRASE';
446+
}
418447
else if (searchText.includes('*')) {
419448
return 'SEARCH_AS_WILDCARD_ASTERISK'
420449
}
@@ -429,11 +458,6 @@ export class SearchRequest {
429458
else if (searchText.includes('%')) {
430459
return 'WILDCARD_PERCENT_SEARCH_NOT_SUPPORTED'
431460
}
432-
// else if (searchText[0] == '"' && searchText[searchText.length - 1] == '"') {
433-
else if (new RegExp(/"([^"]*)"/g).test(searchText)) {
434-
return 'SEARCH_PHRASE'
435-
}
436-
437461
// process urls
438462
else if (SearchRequest.isUrl(searchText)) {
439463
// original from https://jsfiddle.net/DanielD/8S4nq/

0 commit comments

Comments
 (0)