@@ -26,6 +26,7 @@ limitations under the License.
2626#include " json.hpp"
2727#include " md5.h"
2828#include " parser.h"
29+ #include " re2/re2.h"
2930#include " state.h"
3031#include " static_analysis.h"
3132#include " string_utils.h"
@@ -35,6 +36,10 @@ using json = nlohmann::json;
3536
3637namespace {
3738
39+ static const Fodder EF; // Empty fodder.
40+
41+ static const LocationRange E; // Empty.
42+
3843/* * Turn a path e.g. "/a/b/c" into a dir, e.g. "/a/b/". If there is no path returns "".
3944 */
4045std::string dir_name (const std::string &path)
@@ -881,6 +886,11 @@ class Interpreter {
881886 builtins[" parseJson" ] = &Interpreter::builtinParseJson;
882887 builtins[" encodeUTF8" ] = &Interpreter::builtinEncodeUTF8;
883888 builtins[" decodeUTF8" ] = &Interpreter::builtinDecodeUTF8;
889+ builtins[" regexFullMatch" ] = &Interpreter::builtinRegexFullMatch;
890+ builtins[" regexPartialMatch" ] = &Interpreter::builtinRegexPartialMatch;
891+ builtins[" regexQuoteMeta" ] = &Interpreter::builtinRegexQuoteMeta;
892+ builtins[" regexReplace" ] = &Interpreter::builtinRegexReplace;
893+ builtins[" regexGlobalReplace" ] = &Interpreter::builtinRegexGlobalReplace;
884894 }
885895
886896 /* * Clean up the heap, stack, stash, and builtin function ASTs. */
@@ -1373,6 +1383,129 @@ class Interpreter {
13731383 return decodeUTF8 ();
13741384 }
13751385
1386+ const AST *regexMatch (const std::string &pattern, const std::string &string, bool full)
1387+ {
1388+ RE2 re (pattern, RE2::CannedOptions::Quiet);
1389+ if (!re.ok ()) {
1390+ std::stringstream ss;
1391+ ss << " Invalid regex '" << re.pattern () << " ': " << re.error ();
1392+ throw makeError (stack.top ().location , ss.str ());
1393+ }
1394+
1395+ int num_groups = re.NumberOfCapturingGroups ();
1396+
1397+ std::vector<std::string> rcaptures (num_groups);
1398+ std::vector<RE2::Arg> rargv (num_groups);
1399+ std::vector<const RE2::Arg*> rargs (num_groups);
1400+ for (int i = 0 ; i < num_groups; ++i) {
1401+ rargs[i] = &rargv[i];
1402+ rargv[i] = &rcaptures[i];
1403+ }
1404+
1405+ if (full ? RE2::FullMatchN (string, re, rargs.data (), num_groups)
1406+ : RE2::PartialMatchN (string, re, rargs.data (), num_groups)) {
1407+ std::map<const Identifier *, HeapSimpleObject::Field> fields;
1408+
1409+ const Identifier *fid = alloc->makeIdentifier (U" string" );
1410+ fields[fid].hide = ObjectField::VISIBLE;
1411+ fields[fid].body = alloc->make <LiteralString>(E, EF, decode_utf8 (string), LiteralString::DOUBLE, " " , " " );
1412+
1413+ fid = alloc->makeIdentifier (U" captures" );
1414+ fields[fid].hide = ObjectField::VISIBLE;
1415+ std::vector<Array::Element> captures;
1416+ for (int i = 0 ; i < num_groups; ++i) {
1417+ captures.push_back (Array::Element (
1418+ alloc->make <LiteralString>(E, EF, decode_utf8 (rcaptures[i]), LiteralString::DOUBLE, " " , " " ),
1419+ EF));
1420+ }
1421+ fields[fid].body = alloc->make <Array>(E, EF, captures, false , EF);
1422+
1423+ fid = alloc->makeIdentifier (U" namedCaptures" );
1424+ fields[fid].hide = ObjectField::VISIBLE;
1425+ DesugaredObject::Fields named_captures;
1426+ const std::map<std::string, int > &named_groups = re.NamedCapturingGroups ();
1427+ for (auto it = named_groups.cbegin (); it != named_groups.cend (); ++it) {
1428+ named_captures.push_back (DesugaredObject::Field (
1429+ ObjectField::VISIBLE,
1430+ alloc->make <LiteralString>(E, EF, decode_utf8 (it->first ), LiteralString::DOUBLE, " " , " " ),
1431+ alloc->make <LiteralString>(E, EF, decode_utf8 (rcaptures[it->second -1 ]), LiteralString::DOUBLE, " " , " " )));
1432+ }
1433+ fields[fid].body = alloc->make <DesugaredObject>(E, ASTs{}, named_captures);
1434+
1435+ scratch = makeObject<HeapSimpleObject>(BindingFrame{}, fields, ASTs{});
1436+ } else {
1437+ scratch = makeNull ();
1438+ }
1439+ return nullptr ;
1440+ }
1441+
1442+ const AST *builtinRegexFullMatch (const LocationRange &loc, const std::vector<Value> &args)
1443+ {
1444+ validateBuiltinArgs (loc, " regexFullMatch" , args, {Value::STRING, Value::STRING});
1445+
1446+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1447+ std::string string = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1448+
1449+ return regexMatch (pattern, string, true );
1450+ }
1451+
1452+ const AST *builtinRegexPartialMatch (const LocationRange &loc, const std::vector<Value> &args)
1453+ {
1454+ validateBuiltinArgs (loc, " regexPartialMatch" , args, {Value::STRING, Value::STRING});
1455+
1456+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1457+ std::string string = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1458+
1459+ return regexMatch (pattern, string, false );
1460+ }
1461+
1462+ const AST *builtinRegexQuoteMeta (const LocationRange &loc, const std::vector<Value> &args)
1463+ {
1464+ validateBuiltinArgs (loc, " regexQuoteMeta" , args, {Value::STRING});
1465+ scratch = makeString (decode_utf8 (RE2::QuoteMeta (encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value ))));
1466+ return nullptr ;
1467+ }
1468+
1469+ const AST *builtinRegexReplace (const LocationRange &loc, const std::vector<Value> &args)
1470+ {
1471+ validateBuiltinArgs (loc, " regexReplace" , args, {Value::STRING, Value::STRING, Value::STRING});
1472+
1473+ std::string string = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1474+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1475+ std::string replace = encode_utf8 (static_cast <HeapString *>(args[2 ].v .h )->value );
1476+
1477+ RE2 re (pattern, RE2::CannedOptions::Quiet);
1478+ if (!re.ok ()) {
1479+ std::stringstream ss;
1480+ ss << " Invalid regex '" << re.pattern () << " ': " << re.error ();
1481+ throw makeError (stack.top ().location , ss.str ());
1482+ }
1483+
1484+ RE2::Replace (&string, re, replace);
1485+ scratch = makeString (decode_utf8 (string));
1486+ return nullptr ;
1487+ }
1488+
1489+ const AST *builtinRegexGlobalReplace (const LocationRange &loc, const std::vector<Value> &args)
1490+ {
1491+ validateBuiltinArgs (loc, " regexGlobalReplace" , args, {Value::STRING, Value::STRING, Value::STRING});
1492+
1493+ std::string string = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1494+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1495+ std::string replace = encode_utf8 (static_cast <HeapString *>(args[2 ].v .h )->value );
1496+
1497+ RE2 re (pattern, RE2::CannedOptions::Quiet);
1498+ if (!re.ok ()) {
1499+ std::stringstream ss;
1500+ ss << " Invalid regex '" << re.pattern () << " ': " << re.error ();
1501+ throw makeError (stack.top ().location , ss.str ());
1502+ }
1503+
1504+ RE2::GlobalReplace (&string, re, replace);
1505+ scratch = makeString (decode_utf8 (string));
1506+ return nullptr ;
1507+ }
1508+
13761509 const AST *builtinTrace (const LocationRange &loc, const std::vector<Value> &args)
13771510 {
13781511 if (args[0 ].t != Value::STRING) {
0 commit comments