diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index cda2481a..20e8cfcb 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -21,6 +21,7 @@ import static org.hypertrace.core.documentstore.expression.operators.FunctionOperator.MULTIPLY; import static org.hypertrace.core.documentstore.expression.operators.FunctionOperator.SUBTRACT; import static org.hypertrace.core.documentstore.expression.operators.LogicalOperator.AND; +import static org.hypertrace.core.documentstore.expression.operators.LogicalOperator.NOT; import static org.hypertrace.core.documentstore.expression.operators.LogicalOperator.OR; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.CONTAINS; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.EQ; @@ -28,6 +29,7 @@ import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.GT; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.GTE; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.IN; +import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LIKE; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LT; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.LTE; import static org.hypertrace.core.documentstore.expression.operators.RelationalOperator.NEQ; @@ -83,6 +85,7 @@ import org.hypertrace.core.documentstore.commons.DocStoreConstants; import org.hypertrace.core.documentstore.expression.impl.AggregateExpression; import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression; +import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression; import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.impl.FunctionExpression; import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression; @@ -92,7 +95,9 @@ import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression; import org.hypertrace.core.documentstore.expression.impl.UnnestExpression; import org.hypertrace.core.documentstore.expression.operators.AggregationOperator; +import org.hypertrace.core.documentstore.expression.operators.ArrayOperator; import org.hypertrace.core.documentstore.expression.operators.FunctionOperator; +import org.hypertrace.core.documentstore.expression.operators.LogicalOperator; import org.hypertrace.core.documentstore.expression.operators.RelationalOperator; import org.hypertrace.core.documentstore.expression.type.FilterTypeExpression; import org.hypertrace.core.documentstore.model.options.UpdateOptions; @@ -197,7 +202,10 @@ private static void createFlatCollectionSchema( + "\"date\" TIMESTAMPTZ," + "\"tags\" TEXT[]," + "\"props\" JSONB," - + "\"sales\" JSONB" + + "\"sales\" JSONB," + + "\"numbers\" INTEGER[]," + + "\"scores\" DOUBLE PRECISION[]," + + "\"flags\" BOOLEAN[]" + ");", collectionName); @@ -3178,6 +3186,7 @@ private void assertExceptionForNonNumericValues( @Nested class FlatPostgresCollectionTest { + @ParameterizedTest @ArgumentsSource(PostgresProvider.class) void testFlatPostgresCollectionFindAll(String dataStoreName) throws IOException { @@ -3203,7 +3212,7 @@ void testFlatPostgresCollectionFindAll(String dataStoreName) throws IOException iterator.close(); // Should have 8 documents from the INSERT statements - assertEquals(8, count); + assertEquals(10, count); } @ParameterizedTest @@ -3237,14 +3246,14 @@ void testFlatPostgresCollectionFilterByItem(String dataStoreName) throws IOExcep @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionCount(String dataStoreName) throws IOException { + void testFlatPostgresCollectionCount(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); // Test count method - all documents long totalCount = flatCollection.count(Query.builder().build()); - assertEquals(8, totalCount); + assertEquals(10, totalCount); // Test count with filter - soap documents only Query soapQuery = @@ -3335,12 +3344,14 @@ void testFlatVsNestedCollectionConsistency(String dataStoreName) throws IOExcept Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test 1: Count all documents - should be equal + // Test 1: Count all documents + // Flat collection has 10 docs (8 matching nested + 2 for NULL/empty array testing) + // Nested collection has 8 docs Query countAllQuery = Query.builder().build(); long nestedCount = nestedCollection.count(countAllQuery); long flatCount = flatCollection.count(countAllQuery); - assertEquals( - nestedCount, flatCount, "Total document count should be equal in both collections"); + assertEquals(8, nestedCount, "Nested collection should have 8 documents"); + assertEquals(10, flatCount, "Flat collection should have 10 documents"); // Test 2: Filter by top-level field - item Query itemFilterQuery = @@ -3356,6 +3367,8 @@ void testFlatVsNestedCollectionConsistency(String dataStoreName) throws IOExcept nestedSoapCount, flatSoapCount, "Soap count should be equal in both collections"); // Test 3: Filter by numeric field - price + // Nested has 2 docs with price > 10 (Mirror=20, Soap=20) + // Flat has 3 docs with price > 10 (Mirror=20, Soap=20, Bottle=15) Query priceFilterQuery = Query.builder() .setFilter( @@ -3365,8 +3378,8 @@ void testFlatVsNestedCollectionConsistency(String dataStoreName) throws IOExcept long nestedPriceCount = nestedCollection.count(priceFilterQuery); long flatPriceCount = flatCollection.count(priceFilterQuery); - assertEquals( - nestedPriceCount, flatPriceCount, "Price > 10 count should be equal in both collections"); + assertEquals(2, nestedPriceCount, "Nested should have 2 docs with price > 10"); + assertEquals(3, flatPriceCount, "Flat should have 3 docs with price > 10"); // Test 4: Compare actual document content for same filter CloseableIterator nestedIterator = nestedCollection.find(itemFilterQuery); @@ -3461,16 +3474,18 @@ void testFlatVsNestedCollectionConsistency(String dataStoreName) throws IOExcept flatDocIterator.close(); } - // Disabling this test as unnest of top-level json fields is not supported right now + /** + * Tests basic UNNEST operation on flat PostgreSQL collection with native TEXT[] arrays. + * Validates that PostgresFromTypeExpressionVisitor correctly uses unnest() instead of + * jsonb_array_elements() for native PostgreSQL arrays. Groups by tags and counts occurrences. + */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - @Disabled void testFlatPostgresCollectionUnnestTags(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Query to unnest tags and group by them to get counts Query unnestQuery = Query.builder() .addSelection(IdentifierExpression.of("tags")) @@ -3479,37 +3494,328 @@ void testFlatPostgresCollectionUnnestTags(String dataStoreName) throws IOExcepti .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), false)) .build(); - CloseableIterator iterator = flatCollection.aggregate(unnestQuery); + Iterator resultIterator = flatCollection.aggregate(unnestQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_unnest_tags_response.json", 17); + } - // Collect results - Map tagCounts = new HashMap<>(); - while (iterator.hasNext()) { - Document doc = iterator.next(); - JsonNode json = new ObjectMapper().readTree(doc.toJson()); - String tag = json.get("tags").asText(); - int count = json.get("count").asInt(); - tagCounts.put(tag, count); - } - iterator.close(); + /** + * Tests complex UNNEST operation on flat PostgreSQL collection with native TEXT[] arrays. + * Combines multiple filters (WHERE, unnest filter), aggregations (COUNT, AVG), HAVING clause, + * and ORDER BY sorting. Validates integration of PostgresFromTypeExpressionVisitor, + * PostgresUnnestFilterTypeExpressionVisitor, and PostgresFilterTypeExpressionVisitor. + */ + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestWithComplexQuery(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Tests UNNEST with WHERE filter (price >= 5), unnest filter (NOT LIKE 'home-%'), + // GROUP BY tags, HAVING (count > 1), ORDER BY count DESC + Query complexQuery = + Query.builder() + .addSelection(IdentifierExpression.of("tags")) + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "tag_count") + .addSelection( + AggregateExpression.of(AVG, IdentifierExpression.of("price")), "avg_price") + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("price"), GTE, ConstantExpression.of(5))) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(false) + .filterTypeExpression( + LogicalExpression.builder() + .operator(NOT) + .operand( + RelationalExpression.of( + IdentifierExpression.of("tags"), + LIKE, + ConstantExpression.of("home-%"))) + .build()) + .build()) + .addAggregation(IdentifierExpression.of("tags")) + .setAggregationFilter( + RelationalExpression.of( + IdentifierExpression.of("tag_count"), GT, ConstantExpression.of(1))) + .addSort(SortingSpec.of(IdentifierExpression.of("tag_count"), DESC)) + .build(); + + Iterator resultIterator = flatCollection.aggregate(complexQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_unnest_complex_query_response.json", 7); + } + + /** + * Tests UNNEST with preserveNullAndEmptyArrays=true on flat collection. Counts rows after + * unnesting. Returns 27 rows: 25 from docs with tags (one per tag) + 2 from docs with + * NULL/empty tags. This demonstrates LEFT JOIN behavior. + */ + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestWithPreserveEmptyTrue(String dataStoreName) + throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Include all documents in result irrespective of tags field (LEFT JOIN) + // Counts rows after unnest: 25 (from 8 docs with tags) + 2 (from docs with NULL/empty) + Query unnestPreserveTrueQuery = + Query.builder() + .addSelection(AggregateExpression.of(COUNT, IdentifierExpression.of("item")), "count") + .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), true)) + .build(); + + Iterator resultIterator = flatCollection.aggregate(unnestPreserveTrueQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, + resultIterator, + "query/flat_unnest_preserving_empty_array_response.json", + 1); + } + + /** + * Tests UNNEST with filters on flat collection. Combines main WHERE filter on quantity field + * with unnest filter on tags, and preserveEmpty=false to exclude documents without tags. + */ + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestWithFilters(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestWithFiltersQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("quantity"), GT, ConstantExpression.of(2))) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(false) + .filterTypeExpression( + RelationalExpression.of( + IdentifierExpression.of("tags"), + EQ, + ConstantExpression.of("grooming"))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.aggregate(unnestWithFiltersQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_unnest_with_filters_response.json", 2); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestWithPreserveEmptyFalse(String dataStoreName) + throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test UNNEST on native TEXT[] array with preserveEmptyArrays = false (INNER JOIN) + // This counts all individual tag values after unnesting + // Expected: 25 total tags (3+3+4+3+3+3+3+3 from 8 documents with non-empty tags) + // Excludes 2 documents with NULL/empty tags + Query unnestPreserveFalseQuery = + Query.builder() + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count") + .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), false)) + .build(); + + Iterator resultIterator = flatCollection.aggregate(unnestPreserveFalseQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, + resultIterator, + "query/flat_unnest_not_preserving_empty_array_response.json", + 1); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestWithOnlyUnnestFilter(String dataStoreName) + throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Query with unnest filter but NO main WHERE filter + Query unnestFilterOnlyQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(false) + .filterTypeExpression( + RelationalExpression.of( + IdentifierExpression.of("tags"), + EQ, + ConstantExpression.of("premium"))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.aggregate(unnestFilterOnlyQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_unnest_only_unnest_filter_response.json", 2); + } + + /** + * Tests UNNEST with ONLY main filter (no unnest filter). Covers line 65 in + * PostgresUnnestFilterTypeExpressionVisitor: only main filter exists. + */ + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestWithOnlyMainFilter(String dataStoreName) + throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Query with main WHERE filter but NO unnest filter + Query mainFilterOnlyQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("price"), GT, ConstantExpression.of(10))) + .addFromClause( + UnnestExpression.builder() + .identifierExpression(IdentifierExpression.of("tags")) + .preserveNullAndEmptyArrays(false) + .build()) + .build(); - // Verify we have results - assertFalse(tagCounts.isEmpty(), "Should have tag counts"); + Iterator resultIterator = flatCollection.aggregate(mainFilterOnlyQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_unnest_only_main_filter_response.json", 6); + } - // Verify some expected tag counts based on our test data - // From collection_data.json, we can verify specific tags appear expected number of times - assertTrue(tagCounts.containsKey("hygiene"), "Should contain 'hygiene' tag"); - assertTrue(tagCounts.containsKey("personal-care"), "Should contain 'personal-care' tag"); - assertTrue(tagCounts.containsKey("grooming"), "Should contain 'grooming' tag"); + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionArrayRelationalFilter(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Verify total count matches expected (each document contributes its tag count) - int totalTags = tagCounts.values().stream().mapToInt(Integer::intValue).sum(); - assertTrue(totalTags > 0, "Total tag count should be greater than 0"); + // Filter: ANY tag in tags equals "hygiene" AND _id <= 8 + // Exclude docs 9-10 (NULL/empty arrays) to avoid ARRAY[] type error + Query arrayRelationalQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .setFilter( + LogicalExpression.builder() + .operator(LogicalOperator.AND) + .operand( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("tags"), + EQ, + ConstantExpression.of("hygiene"))) + .build()) + .operand( + RelationalExpression.of( + IdentifierExpression.of("_id"), LTE, ConstantExpression.of(8))) + .build()) + .build(); - // Print results for debugging - System.out.println("Tag counts from unnest operation:"); - tagCounts.entrySet().stream() - .sorted(Map.Entry.comparingByValue().reversed()) - .forEach(entry -> System.out.println(entry.getKey() + ": " + entry.getValue())); + Iterator resultIterator = flatCollection.find(arrayRelationalQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_array_relational_filter_response.json", 3); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionIntegerArrayFilter(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Filter: ANY number in numbers equals 10 (Integer constant) + // This tests L265-266: Integer/Long → ::bigint[] + Query integerArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("numbers"), EQ, ConstantExpression.of(10))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.find(integerArrayQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_integer_array_filter_response.json", 1); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionDoubleArrayFilter(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Filter: ANY score in scores equals 3.14 (Double constant) + // This tests L267-268: Double/Float → ::double precision[] + Query doubleArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("scores"), EQ, ConstantExpression.of(3.14))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.find(doubleArrayQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_double_array_filter_response.json", 1); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionBooleanArrayFilter(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Filter: ANY flag in flags equals true (Boolean constant) + // This tests L269-270: Boolean → ::boolean[] + Query booleanArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("flags"), EQ, ConstantExpression.of(true))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.find(booleanArrayQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_boolean_array_filter_response.json", 2); } } @@ -4186,63 +4492,4 @@ private static void testCountApi( final long expectedSize = convertJsonToMap(fileContent).size(); assertEquals(expectedSize, actualSize); } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - @Disabled - void testNestedPostgresCollectionUnnestTags(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection nestedCollection = - datastore.getCollection(COLLECTION_NAME); // Default nested collection - - // Query to unnest tags and group by them to get counts - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("tags")) - .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "count") - .addAggregation(IdentifierExpression.of("tags")) - .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), false)) - .build(); - - CloseableIterator iterator = nestedCollection.aggregate(unnestQuery); - - // Collect results - Map tagCounts = new HashMap<>(); - while (iterator.hasNext()) { - Document doc = iterator.next(); - JsonNode json = new ObjectMapper().readTree(doc.toJson()); - String tag = json.get("tags").asText(); - int count = json.get("count").asInt(); - tagCounts.put(tag, count); - } - iterator.close(); - - // Verify we have results - assertFalse(tagCounts.isEmpty(), "Should have tag counts from nested collection"); - - // Verify some expected tag counts based on our test data - // From collection_data.json, we can verify specific tags appear expected number of times - assertTrue(tagCounts.containsKey("hygiene"), "Should contain 'hygiene' tag"); - assertTrue(tagCounts.containsKey("personal-care"), "Should contain 'personal-care' tag"); - assertTrue(tagCounts.containsKey("grooming"), "Should contain 'grooming' tag"); - - // Verify total count matches expected (each document contributes its tag count) - int totalTags = tagCounts.values().stream().mapToInt(Integer::intValue).sum(); - assertTrue(totalTags > 0, "Total tag count should be greater than 0"); - - // Print results for debugging - System.out.println("Nested collection tag counts from unnest operation:"); - tagCounts.entrySet().stream() - .sorted(Map.Entry.comparingByValue().reversed()) - .forEach(entry -> System.out.println(entry.getKey() + ": " + entry.getValue())); - - // Verify some specific expected counts based on collection_data.json - // From looking at the data: - // - "hygiene" appears in docs 1, 5, 8 = 3 times - // - "personal-care" appears in docs 1, 3 = 2 times - // - "grooming" appears in docs 6, 7 = 2 times - assertEquals(3, tagCounts.get("hygiene"), "hygiene should appear 3 times"); - assertEquals(2, tagCounts.get("personal-care"), "personal-care should appear 2 times"); - assertEquals(2, tagCounts.get("grooming"), "grooming should appear 2 times"); - } } diff --git a/document-store/src/integrationTest/resources/query/flat_array_relational_filter_response.json b/document-store/src/integrationTest/resources/query/flat_array_relational_filter_response.json new file mode 100644 index 00000000..63655a4b --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_array_relational_filter_response.json @@ -0,0 +1,14 @@ +[ + { + "item": "Soap", + "price": 10 + }, + { + "item": "Soap", + "price": 20 + }, + { + "item": "Soap", + "price": 10 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json b/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json new file mode 100644 index 00000000..25c65fcc --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json @@ -0,0 +1,10 @@ +[ + { + "item": "Soap", + "price": 10 + }, + { + "item": "Shampoo", + "price": 5 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json b/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json new file mode 100644 index 00000000..b7aecab1 --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json @@ -0,0 +1,6 @@ +[ + { + "item": "Shampoo", + "price": 5 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json b/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json new file mode 100644 index 00000000..9eab7526 --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json @@ -0,0 +1,6 @@ +[ + { + "item": "Mirror", + "price": 20 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_complex_query_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_complex_query_response.json new file mode 100644 index 00000000..384b661b --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_complex_query_response.json @@ -0,0 +1,37 @@ +[ + { + "avg_price": "13.3333333333333333", + "tag_count": 3, + "tags": "hygiene" + }, + { + "avg_price": "7.5000000000000000", + "tag_count": 2, + "tags": "premium" + }, + { + "avg_price": "7.5000000000000000", + "tag_count": 2, + "tags": "personal-care" + }, + { + "avg_price": "8.0000000000000000", + "tag_count": 2, + "tags": "grooming" + }, + { + "avg_price": "7.5000000000000000", + "tag_count": 2, + "tags": "budget" + }, + { + "avg_price": "6.5000000000000000", + "tag_count": 2, + "tags": "bulk" + }, + { + "avg_price": "5.0000000000000000", + "tag_count": 2, + "tags": "hair-care" + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_not_preserving_empty_array_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_not_preserving_empty_array_response.json new file mode 100644 index 00000000..832f51e4 --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_not_preserving_empty_array_response.json @@ -0,0 +1,5 @@ +[ + { + "count": 25 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_only_main_filter_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_only_main_filter_response.json new file mode 100644 index 00000000..6ccf04aa --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_only_main_filter_response.json @@ -0,0 +1,26 @@ +[ + { + "item": "Mirror", + "tags": "home-decor" + }, + { + "item": "Mirror", + "tags": "reflective" + }, + { + "item": "Mirror", + "tags": "glass" + }, + { + "item": "Soap", + "tags": "hygiene" + }, + { + "item": "Soap", + "tags": "antibacterial" + }, + { + "item": "Soap", + "tags": "family-pack" + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_only_unnest_filter_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_only_unnest_filter_response.json new file mode 100644 index 00000000..620d068f --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_only_unnest_filter_response.json @@ -0,0 +1,10 @@ +[ + { + "item": "Soap", + "tags": "premium" + }, + { + "item": "Shampoo", + "tags": "premium" + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_preserving_empty_array_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_preserving_empty_array_response.json new file mode 100644 index 00000000..3a53e637 --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_preserving_empty_array_response.json @@ -0,0 +1,5 @@ +[ + { + "count": 27 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_tags_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_tags_response.json new file mode 100644 index 00000000..5aac4063 --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_tags_response.json @@ -0,0 +1,70 @@ +[ + { + "tags": "hygiene", + "count": 3 + }, + { + "tags": "personal-care", + "count": 2 + }, + { + "tags": "premium", + "count": 2 + }, + { + "tags": "home-decor", + "count": 1 + }, + { + "tags": "reflective", + "count": 1 + }, + { + "tags": "glass", + "count": 1 + }, + { + "tags": "hair-care", + "count": 2 + }, + { + "tags": "herbal", + "count": 1 + }, + { + "tags": "budget", + "count": 2 + }, + { + "tags": "bulk", + "count": 2 + }, + { + "tags": "antibacterial", + "count": 1 + }, + { + "tags": "family-pack", + "count": 1 + }, + { + "tags": "grooming", + "count": 2 + }, + { + "tags": "plastic", + "count": 1 + }, + { + "tags": "essential", + "count": 1 + }, + { + "tags": "wholesale", + "count": 1 + }, + { + "tags": "basic", + "count": 1 + } +] diff --git a/document-store/src/integrationTest/resources/query/flat_unnest_with_filters_response.json b/document-store/src/integrationTest/resources/query/flat_unnest_with_filters_response.json new file mode 100644 index 00000000..94a77fa0 --- /dev/null +++ b/document-store/src/integrationTest/resources/query/flat_unnest_with_filters_response.json @@ -0,0 +1,10 @@ +[ + { + "item": "Comb", + "tags": "grooming" + }, + { + "item": "Comb", + "tags": "grooming" + } +] diff --git a/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json b/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json index d3dc9354..5a5a9432 100644 --- a/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json +++ b/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json @@ -1,12 +1,14 @@ { "statements": [ - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z',\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n2, 'Mirror', 20, 1, '2014-03-01T09:00:00Z',\n'{\"home-decor\", \"reflective\", \"glass\"}',\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z',\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n4, 'Shampoo', 5, 20, '2014-04-04T11:21:39.736Z',\n'{\"hair-care\", \"budget\", \"bulk\"}',\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z',\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n6, 'Comb', 7.5, 5, '2015-06-04T05:08:13Z',\n'{\"grooming\", \"plastic\", \"essential\"}',\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z',\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"colors\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\"\n) VALUES (\n8, 'Soap', 10, 5, '2016-02-06T20:20:13Z',\n'{\"hygiene\", \"budget\", \"basic\"}',\nNULL,\nNULL\n)" + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z',\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\n'{1, 2, 3}',\n'{4.5, 9.2}',\n'{true, false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n2, 'Mirror', 20, 1, '2014-03-01T09:00:00Z',\n'{\"home-decor\", \"reflective\", \"glass\"}',\nNULL,\nNULL,\n'{10, 20}',\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z',\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\nNULL,\n'{3.14, 2.71}',\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n4, 'Shampoo', 5, 20, '2014-04-04T11:21:39.736Z',\n'{\"hair-care\", \"budget\", \"bulk\"}',\nNULL,\nNULL,\nNULL,\nNULL,\n'{true, true}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z',\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n6, 'Comb', 7.5, 5, '2015-06-04T05:08:13Z',\n'{\"grooming\", \"plastic\", \"essential\"}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z',\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"colors\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n8, 'Soap', 10, 5, '2016-02-06T20:20:13Z',\n'{\"hygiene\", \"budget\", \"basic\"}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n9, 'Bottle', 15, 3, '2016-03-01T10:00:00Z',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n10, 'Cup', 8, 2, '2016-04-01T10:00:00Z',\n'{}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)" ] } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java index 7249a314..22a11a27 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java @@ -13,8 +13,10 @@ public class FlatPostgresFieldTransformer implements PostgresColTransformer { @Override public FieldToPgColumn transform(String orgFieldName, Map pgColMapping) { - // In flat structure mode, all fields are direct PostgreSQL columns as-is - return new FieldToPgColumn(null, PostgresUtils.wrapFieldNamesWithDoubleQuotes(orgFieldName)); + // Check if this field has been unnested (e.g., "tags" -> "tags_unnested") + String pgColumnName = pgColMapping.getOrDefault(orgFieldName, orgFieldName); + // In flat structure mode, all fields are direct PostgreSQL columns + return new FieldToPgColumn(null, PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName)); } @Override diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java index 2d9896a1..a47fa262 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java @@ -13,8 +13,10 @@ import java.util.stream.Collectors; import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; +import org.hypertrace.core.documentstore.DocumentType; import org.hypertrace.core.documentstore.Key; import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression; +import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.impl.DocumentArrayFilterExpression; import org.hypertrace.core.documentstore.expression.impl.KeyExpression; import org.hypertrace.core.documentstore.expression.impl.LogicalExpression; @@ -29,6 +31,7 @@ import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresRelationalFilterParserFactoryImpl; public class PostgresFilterTypeExpressionVisitor implements FilterTypeExpressionVisitor { + protected PostgresQueryParser postgresQueryParser; @Nullable private final PostgresWrappingFilterVisitorProvider wrappingVisitorProvider; @@ -162,17 +165,9 @@ private Collector getCollectorForLogicalOperator(LogicalOperator operator) { } private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpression expression) { - // Convert 'elements' to planets->'elements' where planets could be an alias for an upper - // level array filter - // For the first time (if 'elements' was not under any nested array, say a top-level field), - // use the field identifier visitor to make it document->'elements' - final PostgresIdentifierExpressionVisitor identifierVisitor = - new PostgresIdentifierExpressionVisitor(postgresQueryParser); - final PostgresSelectTypeExpressionVisitor arrayPathVisitor = - wrappingVisitorProvider == null - ? new PostgresFieldIdentifierExpressionVisitor(identifierVisitor) - : wrappingVisitorProvider.getForNonRelational(identifierVisitor); - final String parsedLhs = expression.getArraySource().accept(arrayPathVisitor); + // Check if this is a flat collection (native PostgreSQL columns) or nested (JSONB) + boolean isFlatCollection = + postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; // Extract the field name final String identifierName = @@ -180,6 +175,25 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi .getArraySource() .accept(new PostgresIdentifierExpressionVisitor(postgresQueryParser)); + final String parsedLhs; + if (isFlatCollection) { + // For flat collections, assume all arrays are native PostgreSQL arrays + parsedLhs = postgresQueryParser.transformField(identifierName).getPgColumn(); + } else { + // For nested collections, use JSONB path accessor + // Convert 'elements' to planets->'elements' where planets could be an alias for an upper + // level array filter + // For the first time (if 'elements' was not under any nested array, say a top-level field), + // use the field identifier visitor to make it document->'elements' + final PostgresIdentifierExpressionVisitor identifierVisitor = + new PostgresIdentifierExpressionVisitor(postgresQueryParser); + final PostgresSelectTypeExpressionVisitor arrayPathVisitor = + wrappingVisitorProvider == null + ? new PostgresFieldIdentifierExpressionVisitor(identifierVisitor) + : wrappingVisitorProvider.getForNonRelational(identifierVisitor); + parsedLhs = expression.getArraySource().accept(arrayPathVisitor); + } + // If the field name is 'elements.inner', alias becomes 'elements_dot_inner' final String alias = encodeAliasForNestedField(identifierName).toLowerCase(); @@ -192,23 +206,83 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi .getFilter() .accept(new PostgresFilterTypeExpressionVisitor(postgresQueryParser, visitorProvider)); - return String.format( - "EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)", - parsedLhs, alias, parsedFilter); + if (isFlatCollection) { + // todo: For array filters, UNNEST is not the most optimal way as it won't use the index. + // Perhaps, we should use ANY or @> ARRAY operator + + // For flat collections, assume all arrays are native and use unnest() + // Infer array type from filter to properly cast empty array + String arrayTypeCast = inferArrayTypeCastFromFilter(expression.getFilter()); + return String.format( + "EXISTS (SELECT 1 FROM unnest(COALESCE(%s, ARRAY[]%s)) AS \"%s\" WHERE %s)", + parsedLhs, arrayTypeCast, alias, parsedFilter); + } else { + // For nested collections with JSONB arrays, use jsonb_array_elements() + return String.format( + "EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)", + parsedLhs, alias, parsedFilter); + } + } + + /** + * Infers the PostgreSQL array type cast from the filter expression by examining the constant + * value being compared. + * + *

This method addresses PostgreSQL's requirement that empty array literals (ARRAY[]) must have + * an explicit type cast. It infers the array element type by inspecting the filter's constant + * value. + * + *

Supported cases: + * + *

    + *
  • String constants → ::text[] (e.g., "hygiene" → TEXT[]) + *
  • Integer/Long constants → ::bigint[] (e.g., 42 → BIGINT[]) + *
  • Double/Float constants → ::double precision[] (e.g., 3.14 → DOUBLE PRECISION[]) + *
  • Boolean constants → ::boolean[] (e.g., true → BOOLEAN[]) + *
+ * + *

Limitations: Type inference fails and defaults to ::text[] when: + * + *

    + *
  • Filter is a LogicalExpression (AND/OR) rather than RelationalExpression + *
  • Filter compares against an IdentifierExpression instead of a ConstantExpression + *
  • Constant value type is not recognized (e.g., custom types) + *
+ * + * @param filter The filter expression to analyze + * @return PostgreSQL array type cast string (e.g., "::text[]", "::bigint[]") + */ + private String inferArrayTypeCastFromFilter(FilterTypeExpression filter) { + // If the filter is a RelationalExpression, check the RHS for a constant value + if (filter instanceof RelationalExpression) { + RelationalExpression relExpr = (RelationalExpression) filter; + + // The visitor returns a string representation, but we need the actual value + // Try to get the constant value directly if it's a ConstantExpression + if (relExpr.getRhs() instanceof ConstantExpression) { + ConstantExpression constExpr = (ConstantExpression) relExpr.getRhs(); + Object value = constExpr.getValue(); + + if (value instanceof String) { + return "::text[]"; + } else if (value instanceof Integer || value instanceof Long) { + return "::bigint[]"; + } else if (value instanceof Double || value instanceof Float) { + return "::double precision[]"; + } else if (value instanceof Boolean) { + return "::boolean[]"; + } + } + } + + // Default to text[] if we can't infer the type + return "::text[]"; } private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression expression) { - // Convert 'elements' to planets->'elements' where planets could be an alias for an upper - // level array filter - // For the first time (if 'elements' was not under any nested array, say a top-level field), - // use the field identifier visitor to make it document->'elements' - final PostgresIdentifierExpressionVisitor identifierVisitor = - new PostgresIdentifierExpressionVisitor(postgresQueryParser); - final PostgresSelectTypeExpressionVisitor arrayPathVisitor = - wrappingVisitorProvider == null - ? new PostgresFieldIdentifierExpressionVisitor(identifierVisitor) - : wrappingVisitorProvider.getForNonRelational(identifierVisitor); - final String parsedLhs = expression.getArraySource().accept(arrayPathVisitor); + // Check if this is a flat collection (native PostgreSQL columns) or nested (JSONB) + boolean isFlatCollection = + postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; // Extract the field name final String identifierName = @@ -216,6 +290,21 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression .getArraySource() .accept(new PostgresIdentifierExpressionVisitor(postgresQueryParser)); + final String parsedLhs; + if (isFlatCollection) { + // For flat collections, assume all arrays are native PostgreSQL arrays + // Use direct column reference with double quotes + parsedLhs = postgresQueryParser.transformField(identifierName).getPgColumn(); + } else { + final PostgresIdentifierExpressionVisitor identifierVisitor = + new PostgresIdentifierExpressionVisitor(postgresQueryParser); + final PostgresSelectTypeExpressionVisitor arrayPathVisitor = + wrappingVisitorProvider == null + ? new PostgresFieldIdentifierExpressionVisitor(identifierVisitor) + : wrappingVisitorProvider.getForNonRelational(identifierVisitor); + parsedLhs = expression.getArraySource().accept(arrayPathVisitor); + } + // If the field name is 'elements.inner', alias becomes 'elements_dot_inner' final String alias = encodeAliasForNestedField(identifierName); @@ -227,8 +316,19 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression .getFilter() .accept(new PostgresFilterTypeExpressionVisitor(postgresQueryParser, wrapper)); - return String.format( - "EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)", - parsedLhs, alias, parsedFilter); + if (isFlatCollection) { + // For flat collections, assume all arrays are native and use unnest() + // Note: DocumentArrayFilterExpression typically works with JSONB arrays containing objects + // For simplicity, we default to text[] type cast, though this may need refinement + String arrayTypeCast = "::text[]"; + return String.format( + "EXISTS (SELECT 1 FROM unnest(COALESCE(%s, ARRAY[]%s)) AS \"%s\" WHERE %s)", + parsedLhs, arrayTypeCast, alias, parsedFilter); + } else { + // For nested collections with JSONB arrays, use jsonb_array_elements() + return String.format( + "EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)", + parsedLhs, alias, parsedFilter); + } } } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java index db5a09c6..b045e9ca 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java @@ -3,7 +3,7 @@ import java.util.Optional; import java.util.stream.Collectors; import lombok.Getter; -import org.apache.commons.lang3.StringUtils; +import org.hypertrace.core.documentstore.DocumentType; import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression; import org.hypertrace.core.documentstore.expression.impl.UnnestExpression; import org.hypertrace.core.documentstore.parser.FromTypeExpressionVisitor; @@ -23,7 +23,8 @@ public class PostgresFromTypeExpressionVisitor implements FromTypeExpressionVisi "%s as (SELECT * from %s %s, %s %s)"; private static final String PRESERVE_NULL_AND_EMPTY_TABLE_QUERY_FMT = "%s as (SELECT * from %s %s LEFT JOIN LATERAL %s %s on TRUE)"; - private static final String UNWIND_EXP_FMT = "jsonb_array_elements(%s)"; + private static final String JSONB_UNWIND_EXP_FMT = "jsonb_array_elements(%s)"; + private static final String NATIVE_UNWIND_EXP_FMT = "unnest(%s)"; private static final String UNWIND_EXP_ALIAS_FMT = "p%s(%s)"; private PostgresQueryParser postgresQueryParser; @@ -42,8 +43,31 @@ public String visit(UnnestExpression unnestExpression) { String orgFieldName = unnestExpression.getIdentifierExpression().getName(); String pgColumnName = PostgresUtils.encodeAliasForNestedField(orgFieldName); - String transformedFieldName = - unnestExpression.getIdentifierExpression().accept(postgresFieldIdentifierExpressionVisitor); + // Check if this is a flat collection (native PostgreSQL columns) or nested (JSONB) + boolean isFlatCollection = + postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; + + String transformedFieldName; + String unnestFunction; + + if (isFlatCollection) { + // For flat collections, assume all unnested fields are native PostgreSQL arrays + // Use the transformer to get the proper column name (handles quotes and naming) + transformedFieldName = postgresQueryParser.transformField(orgFieldName).getPgColumn(); + // Use native unnest() for PostgreSQL array columns + unnestFunction = NATIVE_UNWIND_EXP_FMT; + // Append "_unnested" suffix to avoid column name conflicts with the original array column + // e.g., unnest("tags") p1(tags_unnested) instead of p1(tags) + pgColumnName = pgColumnName + "_unnested"; + } else { + // For nested collections, use JSONB path accessor + transformedFieldName = + unnestExpression + .getIdentifierExpression() + .accept(postgresFieldIdentifierExpressionVisitor); + // Use jsonb_array_elements() for JSONB arrays + unnestFunction = JSONB_UNWIND_EXP_FMT; + } postgresQueryParser.getPgColumnNames().put(orgFieldName, pgColumnName); int nextIndex = postgresQueryParser.getPgColumnNames().size(); @@ -52,7 +76,7 @@ public String visit(UnnestExpression unnestExpression) { String preTable = "table" + preIndex; String newTable = "table" + nextIndex; String tableAlias = "t" + preIndex; - String unwindExpr = String.format(UNWIND_EXP_FMT, transformedFieldName); + String unwindExpr = String.format(unnestFunction, transformedFieldName); String unwindExprAlias = String.format(UNWIND_EXP_ALIAS_FMT, nextIndex, pgColumnName); String fmt = @@ -70,6 +94,16 @@ public String visit(SubQueryJoinExpression subQueryJoinExpression) { public static Optional getFromClause(PostgresQueryParser postgresQueryParser) { + // Check if there are any unnest operations + if (postgresQueryParser.getQuery().getFromTypeExpressions().isEmpty()) { + return Optional.empty(); + } + + // IMPORTANT: Build table0 query BEFORE processing unnest expressions + // This ensures filters use original field names, not unnested aliases + String table0Query = prepareTable0Query(postgresQueryParser); + + // Now process unnest expressions, which will populate pgColumnNames map PostgresFromTypeExpressionVisitor postgresFromTypeExpressionVisitor = new PostgresFromTypeExpressionVisitor(postgresQueryParser); String childList = @@ -78,23 +112,30 @@ public static Optional getFromClause(PostgresQueryParser postgresQueryPa .map(Object::toString) .collect(Collectors.joining(",\n")); - if (StringUtils.isEmpty(childList)) { - return Optional.empty(); - } - - String table0Query = prepareTable0Query(postgresQueryParser); - postgresQueryParser.setFinalTableName("table" + postgresQueryParser.getPgColumnNames().size()); return Optional.of(String.format(QUERY_FMT, table0Query, childList)); } private static String prepareTable0Query(PostgresQueryParser postgresQueryParser) { - Optional whereFilter = - PostgresFilterTypeExpressionVisitor.getFilterClause(postgresQueryParser); - - return whereFilter.isPresent() - ? String.format( - TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), whereFilter.get()) - : String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()); + // For flat collections with unnest operations, we cannot apply filters in table0 because: + // 1. Filters on unnested fields reference scalar values that don't exist yet in table0 + // 2. Filters on array fields might use operators that don't work on arrays (like LIKE) + // For nested collections, filters work fine because they reference JSONB paths in 'document' + boolean isFlatCollection = + postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; + + if (isFlatCollection) { + // For flat collections with unnest, skip filters in table0 + return String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()); + } else { + // For nested collections, apply filters in table0 as usual (preserves existing behavior) + Optional whereFilter = + PostgresFilterTypeExpressionVisitor.getFilterClause(postgresQueryParser); + + return whereFilter.isPresent() + ? String.format( + TABLE0_QUERY_FMT_WHERE, postgresQueryParser.getTableIdentifier(), whereFilter.get()) + : String.format(TABLE0_QUERY_FMT, postgresQueryParser.getTableIdentifier()); + } } } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresUnnestFilterTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresUnnestFilterTypeExpressionVisitor.java index 34724321..716433b7 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresUnnestFilterTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresUnnestFilterTypeExpressionVisitor.java @@ -3,6 +3,7 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import org.hypertrace.core.documentstore.DocumentType; import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression; import org.hypertrace.core.documentstore.expression.impl.UnnestExpression; import org.hypertrace.core.documentstore.parser.FromTypeExpressionVisitor; @@ -32,7 +33,9 @@ public String visit(SubQueryJoinExpression subQueryJoinExpression) { public static Optional getFilterClause(PostgresQueryParser postgresQueryParser) { PostgresUnnestFilterTypeExpressionVisitor postgresUnnestFilterTypeExpressionVisitor = new PostgresUnnestFilterTypeExpressionVisitor(postgresQueryParser); - String childList = + + // Get filters from unnest expressions (if any) + String unnestFilters = postgresQueryParser.getQuery().getFromTypeExpressions().stream() .map( fromTypeExpression -> @@ -40,6 +43,30 @@ public static Optional getFilterClause(PostgresQueryParser postgresQuery .map(Object::toString) .filter(StringUtils::isNotEmpty) .collect(Collectors.joining(" AND ")); - return StringUtils.isNotEmpty(childList) ? Optional.of(childList) : Optional.empty(); + + // For flat collections, we need to include the main query filter here + // because it was skipped in table0 (to avoid type mismatches on array columns) + // For nested collections, the main filter is already applied in table0, + // so we should NOT duplicate it here + boolean isFlatCollection = + postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; + + if (isFlatCollection) { + // Get main query filter and combine with unnest filters + Optional mainFilter = + PostgresFilterTypeExpressionVisitor.prepareFilterClause( + postgresQueryParser.getQuery().getFilter(), postgresQueryParser); + + if (StringUtils.isNotEmpty(unnestFilters) && mainFilter.isPresent()) { + return Optional.of(unnestFilters + " AND " + mainFilter.get()); + } else if (StringUtils.isNotEmpty(unnestFilters)) { + return Optional.of(unnestFilters); + } else { + return mainFilter; + } + } else { + // For nested collections, only return unnest filters (main filter already in table0) + return StringUtils.isNotEmpty(unnestFilters) ? Optional.of(unnestFilters) : Optional.empty(); + } } }