From 5ad32b02520c1a51e23903f1c442e549275b237d Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Mon, 12 Dec 2022 21:50:33 -0800 Subject: [PATCH 01/20] Initial read-only Snowflake Catalog implementation by @sfc-gh-mparmar (#1) Initial read-only Snowflake Catalog implementation built on top of the Snowflake JDBC driver, providing support for basic listing of namespaces, listing of tables, and loading/reads of tables. Auth options are passthrough to the JDBC driver. Co-authored-by: Maninder Parmar Co-authored-by: Maninder Parmar Co-authored-by: Dennis Huo --- .github/labeler.yml | 4 +- build.gradle | 16 ++ .../apache/iceberg/jdbc/JdbcClientPool.java | 6 +- settings.gradle | 2 + .../snowflake/JdbcSnowflakeClient.java | 158 ++++++++++++ .../iceberg/snowflake/SnowflakeCatalog.java | 220 +++++++++++++++++ .../iceberg/snowflake/SnowflakeClient.java | 44 ++++ .../iceberg/snowflake/SnowflakeResources.java | 29 +++ .../snowflake/SnowflakeTableOperations.java | 93 +++++++ .../snowflake/entities/SnowflakeSchema.java | 53 ++++ .../snowflake/entities/SnowflakeTable.java | 60 +++++ .../entities/SnowflakeTableMetadata.java | 126 ++++++++++ .../snowflake/FakeSnowflakeClient.java | 160 ++++++++++++ .../iceberg/snowflake/InMemoryFileIO.java | 52 ++++ .../snowflake/SnowflakeCatalogTest.java | 231 ++++++++++++++++++ spark/v3.3/build.gradle | 3 + 16 files changed, 1253 insertions(+), 4 deletions(-) create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java create mode 100644 snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java create mode 100644 snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java create mode 100644 snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java diff --git a/.github/labeler.yml b/.github/labeler.yml index 521e1a42aaae..c623fbc6dd4c 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -81,4 +81,6 @@ ALIYUN: GCP: - gcp/**/* DELL: - - dell/**/* \ No newline at end of file + - dell/**/* +SNOWFLAKE: + - snowflake/**/* diff --git a/build.gradle b/build.gradle index 026bc3ddd0af..dd8980590128 100644 --- a/build.gradle +++ b/build.gradle @@ -682,6 +682,22 @@ project(':iceberg-dell') { } } +project(':iceberg-snowflake') { + dependencies { + implementation project(':iceberg-core') + implementation project(':iceberg-common') + implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') + implementation project(':iceberg-aws') + implementation "com.fasterxml.jackson.core:jackson-databind" + implementation "com.fasterxml.jackson.core:jackson-core" + implementation "commons-dbutils:commons-dbutils:1.7" + + runtimeOnly("net.snowflake:snowflake-jdbc:3.13.22") + + testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') + } +} + @Memoized boolean versionFileExists() { return file('version.txt').exists() diff --git a/core/src/main/java/org/apache/iceberg/jdbc/JdbcClientPool.java b/core/src/main/java/org/apache/iceberg/jdbc/JdbcClientPool.java index daa04908f41e..60e5eb49a4f8 100644 --- a/core/src/main/java/org/apache/iceberg/jdbc/JdbcClientPool.java +++ b/core/src/main/java/org/apache/iceberg/jdbc/JdbcClientPool.java @@ -27,12 +27,12 @@ import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.ClientPoolImpl; -class JdbcClientPool extends ClientPoolImpl { +public class JdbcClientPool extends ClientPoolImpl { private final String dbUrl; private final Map properties; - JdbcClientPool(String dbUrl, Map props) { + public JdbcClientPool(String dbUrl, Map props) { this( Integer.parseInt( props.getOrDefault( @@ -42,7 +42,7 @@ class JdbcClientPool extends ClientPoolImpl { props); } - JdbcClientPool(int poolSize, String dbUrl, Map props) { + public JdbcClientPool(int poolSize, String dbUrl, Map props) { super(poolSize, SQLNonTransientConnectionException.class, true); properties = props; this.dbUrl = dbUrl; diff --git a/settings.gradle b/settings.gradle index 7a9e4c49633d..5b48518a0182 100644 --- a/settings.gradle +++ b/settings.gradle @@ -34,6 +34,7 @@ include 'hive-metastore' include 'nessie' include 'gcp' include 'dell' +include 'snowflake' project(':api').name = 'iceberg-api' project(':common').name = 'iceberg-common' @@ -51,6 +52,7 @@ project(':hive-metastore').name = 'iceberg-hive-metastore' project(':nessie').name = 'iceberg-nessie' project(':gcp').name = 'iceberg-gcp' project(':dell').name = 'iceberg-dell' +project(':snowflake').name = 'iceberg-snowflake' List knownFlinkVersions = System.getProperty("knownFlinkVersions").split(",") String flinkVersionsString = System.getProperty("flinkVersions") != null ? System.getProperty("flinkVersions") : System.getProperty("defaultFlinkVersions") diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java new file mode 100644 index 000000000000..9730a5f3724b --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.sql.SQLException; +import java.util.List; +import org.apache.commons.dbutils.QueryRunner; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.jdbc.JdbcClientPool; +import org.apache.iceberg.jdbc.UncheckedInterruptedException; +import org.apache.iceberg.jdbc.UncheckedSQLException; +import org.apache.iceberg.snowflake.entities.SnowflakeSchema; +import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This implementation of SnowflakeClient builds on top of Snowflake's JDBC driver to interact with + * Snowflake's Iceberg-aware resource model. Despite using JDBC libraries, the resource model is + * derived from Snowflake's own first-class support for Iceberg tables as opposed to using an opaque + * JDBC layer to store Iceberg metadata itself in an Iceberg-agnostic database. + * + *

This thus differs from the JdbcCatalog in that Snowflake's service provides the source of + * truth of Iceberg metadata, rather than serving as a storage layer for a client-defined Iceberg + * resource model. + */ +public class JdbcSnowflakeClient implements SnowflakeClient { + public static final String EXPECTED_JDBC_IMPL = "net.snowflake.client.jdbc.SnowflakeDriver"; + + private static final Logger LOG = LoggerFactory.getLogger(JdbcSnowflakeClient.class); + private final JdbcClientPool connectionPool; + + JdbcSnowflakeClient(JdbcClientPool conn) { + connectionPool = conn; + } + + @Override + public List listSchemas(Namespace namespace) { + StringBuilder baseQuery = new StringBuilder("SHOW SCHEMAS"); + Object[] queryParams = null; + if (namespace == null || namespace.isEmpty()) { + // for empty or null namespace search for all schemas at account level where the user + // has access to list. + baseQuery.append(" IN ACCOUNT"); + } else { + // otherwise restrict listing of schema within the database. + baseQuery.append(" IN DATABASE IDENTIFIER(?)"); + queryParams = new Object[] {namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1)}; + } + + final String finalQuery = baseQuery.toString(); + final Object[] finalQueryParams = queryParams; + QueryRunner run = new QueryRunner(true); + List schemas; + try { + schemas = + connectionPool.run( + conn -> + run.query(conn, finalQuery, SnowflakeSchema.createHandler(), finalQueryParams)); + } catch (SQLException e) { + throw new UncheckedSQLException( + e, + "Failed to list schemas for namespace %s", + namespace != null ? namespace.toString() : ""); + } catch (InterruptedException e) { + throw new UncheckedInterruptedException(e, "Interrupted while listing schemas"); + } + return schemas; + } + + @Override + public List listIcebergTables(Namespace namespace) { + StringBuilder baseQuery = new StringBuilder("SHOW ICEBERG TABLES"); + Object[] queryParams = null; + if (namespace.length() == SnowflakeResources.MAX_NAMESPACE_DEPTH) { + // For two level namespace, search for iceberg tables within the given schema. + baseQuery.append(" IN SCHEMA IDENTIFIER(?)"); + queryParams = + new Object[] { + String.format( + "%s.%s", + namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1), + namespace.level(SnowflakeResources.NAMESPACE_SCHEMA_LEVEL - 1)) + }; + } else if (namespace.length() == SnowflakeResources.NAMESPACE_DB_LEVEL) { + // For one level namespace, search for iceberg tables within the given database. + baseQuery.append(" IN DATABASE IDENTIFIER(?)"); + queryParams = new Object[] {namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1)}; + } else { + // For empty or db level namespace, search at account level. + baseQuery.append(" IN ACCOUNT"); + } + + final String finalQuery = baseQuery.toString(); + final Object[] finalQueryParams = queryParams; + QueryRunner run = new QueryRunner(true); + List tables; + try { + tables = + connectionPool.run( + conn -> + run.query(conn, finalQuery, SnowflakeTable.createHandler(), finalQueryParams)); + } catch (SQLException e) { + throw new UncheckedSQLException( + e, "Failed to list tables for namespace %s", namespace.toString()); + } catch (InterruptedException e) { + throw new UncheckedInterruptedException(e, "Interrupted while listing tables"); + } + return tables; + } + + @Override + public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) { + QueryRunner run = new QueryRunner(true); + + SnowflakeTableMetadata tableMeta; + try { + final String finalQuery = "SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"; + tableMeta = + connectionPool.run( + conn -> + run.query( + conn, + finalQuery, + SnowflakeTableMetadata.createHandler(), + tableIdentifier.toString())); + } catch (SQLException e) { + throw new UncheckedSQLException( + e, "Failed to get table metadata for %s", tableIdentifier.toString()); + } catch (InterruptedException e) { + throw new UncheckedInterruptedException(e, "Interrupted while getting table metadata"); + } + return tableMeta; + } + + @Override + public void close() { + connectionPool.close(); + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java new file mode 100644 index 000000000000..40259b54ff21 --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.io.Closeable; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.hadoop.Configurable; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.jdbc.JdbcClientPool; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.snowflake.entities.SnowflakeSchema; +import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SnowflakeCatalog extends BaseMetastoreCatalog + implements Closeable, SupportsNamespaces, Configurable { + + private static final Logger LOG = LoggerFactory.getLogger(SnowflakeCatalog.class); + + private Object conf; + private String catalogName = SnowflakeResources.DEFAULT_CATALOG_NAME; + private Map catalogProperties = null; + private FileIO fileIO; + private SnowflakeClient snowflakeClient; + + public SnowflakeCatalog() {} + + @VisibleForTesting + void setSnowflakeClient(SnowflakeClient snowflakeClient) { + this.snowflakeClient = snowflakeClient; + } + + @VisibleForTesting + void setFileIO(FileIO fileIO) { + this.fileIO = fileIO; + } + + @Override + public List listTables(Namespace namespace) { + LOG.debug("listTables with namespace: {}", namespace); + Preconditions.checkArgument( + namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, + "Snowflake doesn't support more than %s levels of namespace, got %s", + SnowflakeResources.MAX_NAMESPACE_DEPTH, + namespace); + + List sfTables = snowflakeClient.listIcebergTables(namespace); + + return sfTables.stream() + .map( + table -> + TableIdentifier.of(table.getDatabase(), table.getSchemaName(), table.getName())) + .collect(Collectors.toList()); + } + + @Override + public boolean dropTable(TableIdentifier identifier, boolean purge) { + throw new UnsupportedOperationException( + String.format("dropTable not supported; attempted for table '%s'", identifier)); + } + + @Override + public void renameTable(TableIdentifier from, TableIdentifier to) { + throw new UnsupportedOperationException( + String.format("renameTable not supported; attempted from '%s' to '%s'", from, to)); + } + + @Override + public void initialize(String name, Map properties) { + catalogProperties = properties; + + if (name != null) { + this.catalogName = name; + } + + if (snowflakeClient == null) { + String uri = properties.get(CatalogProperties.URI); + Preconditions.checkNotNull(uri, "JDBC connection URI is required"); + + try { + // We'll ensure the expected JDBC driver implementation class is initialized through + // reflection + // regardless of which classloader ends up using this JdbcSnowflakeClient, but we'll only + // warn if the expected driver fails to load, since users may use repackaged or custom + // JDBC drivers for Snowflake communcation. + Class.forName(JdbcSnowflakeClient.EXPECTED_JDBC_IMPL); + } catch (ClassNotFoundException cnfe) { + LOG.warn( + "Failed to load expected JDBC SnowflakeDriver - if queries fail by failing" + + " to find a suitable driver for jdbc:snowflake:// URIs, you must add the Snowflake " + + " JDBC driver to your jars/packages", + cnfe); + } + + JdbcClientPool connectionPool = new JdbcClientPool(uri, properties); + snowflakeClient = new JdbcSnowflakeClient(connectionPool); + } + + if (fileIO == null) { + String fileIOImpl = SnowflakeResources.DEFAULT_FILE_IO_IMPL; + + if (catalogProperties.containsKey(CatalogProperties.FILE_IO_IMPL)) { + fileIOImpl = catalogProperties.get(CatalogProperties.FILE_IO_IMPL); + } + + fileIO = CatalogUtil.loadFileIO(fileIOImpl, catalogProperties, conf); + } + } + + @Override + public void close() { + snowflakeClient.close(); + } + + @Override + public void createNamespace(Namespace namespace, Map metadata) { + throw new UnsupportedOperationException( + String.format("createNamespace not supported; attempted for namespace '%s'", namespace)); + } + + @Override + public List listNamespaces(Namespace namespace) { + LOG.debug("listNamespaces with namespace: {}", namespace); + Preconditions.checkArgument( + namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH - 1, + "Snowflake doesn't support more than %s levels of namespace, tried to list under %s", + SnowflakeResources.MAX_NAMESPACE_DEPTH, + namespace); + List sfSchemas = snowflakeClient.listSchemas(namespace); + + List namespaceList = + sfSchemas.stream() + .map(schema -> Namespace.of(schema.getDatabase(), schema.getName())) + .collect(Collectors.toList()); + return namespaceList; + } + + @Override + public Map loadNamespaceMetadata(Namespace namespace) + throws NoSuchNamespaceException { + LOG.debug("loadNamespaceMetadata with namespace: {}", namespace); + Map nameSpaceMetadata = Maps.newHashMap(); + nameSpaceMetadata.put("name", namespace.toString()); + return nameSpaceMetadata; + } + + @Override + public boolean dropNamespace(Namespace namespace) { + throw new UnsupportedOperationException( + String.format("dropNamespace not supported; attempted for namespace '%s'", namespace)); + } + + @Override + public boolean setProperties(Namespace namespace, Map properties) { + throw new UnsupportedOperationException( + String.format("setProperties not supported; attempted for namespace '%s'", namespace)); + } + + @Override + public boolean removeProperties(Namespace namespace, Set properties) { + throw new UnsupportedOperationException( + String.format("removeProperties not supported; attempted for namespace '%s'", namespace)); + } + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + Preconditions.checkArgument( + tableIdentifier.namespace().length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, + "Snowflake doesn't support more than %s levels of namespace, got %s", + SnowflakeResources.MAX_NAMESPACE_DEPTH, + tableIdentifier); + + return new SnowflakeTableOperations( + snowflakeClient, fileIO, catalogProperties, catalogName, tableIdentifier); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return null; + } + + @Override + public void setConf(Object conf) { + this.conf = conf; + } + + public Object getConf() { + return conf; + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java new file mode 100644 index 000000000000..560acedbd45e --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.io.Closeable; +import java.util.List; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.snowflake.entities.SnowflakeSchema; +import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; + +/** + * This interface abstracts out the underlying communication protocols for contacting Snowflake to + * obtain the various resource representations defined under "entities". Classes using this + * interface should minimize assumptions about whether an underlying client uses e.g. REST, JDBC or + * other underlying libraries/protocols. + */ +public interface SnowflakeClient extends Closeable { + List listSchemas(Namespace namespace); + + List listIcebergTables(Namespace namespace); + + SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier); + + @Override + void close(); +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java new file mode 100644 index 000000000000..16669253939b --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +final class SnowflakeResources { + static final String DEFAULT_CATALOG_NAME = "snowlog"; + static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.hadoop.HadoopFileIO"; + static final int MAX_NAMESPACE_DEPTH = 2; + static final int NAMESPACE_DB_LEVEL = 1; + static final int NAMESPACE_SCHEMA_LEVEL = 2; + + private SnowflakeResources() {} +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java new file mode 100644 index 000000000000..0da132331a61 --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.util.Map; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class SnowflakeTableOperations extends BaseMetastoreTableOperations { + + private static final Logger LOG = LoggerFactory.getLogger(SnowflakeTableOperations.class); + private final String catalogName; + + private final FileIO fileIO; + private final TableIdentifier tableIdentifier; + + private final SnowflakeClient snowflakeClient; + + private final Map catalogProperties; + + protected SnowflakeTableOperations( + SnowflakeClient snowflakeClient, + FileIO fileIO, + Map properties, + String catalogName, + TableIdentifier tableIdentifier) { + this.snowflakeClient = snowflakeClient; + this.fileIO = fileIO; + this.catalogProperties = properties; + this.catalogName = catalogName; + this.tableIdentifier = tableIdentifier; + } + + @Override + public void doRefresh() { + LOG.debug("Getting metadata location for table {}", tableIdentifier); + String location = getTableMetadataLocation(); + Preconditions.checkState( + location != null && !location.isEmpty(), + "Got null or empty location %s for table %s", + location, + tableIdentifier); + refreshFromMetadataLocation(location); + } + + @Override + public FileIO io() { + return fileIO; + } + + @Override + protected String tableName() { + return tableIdentifier.toString(); + } + + private String getTableMetadataLocation() { + SnowflakeTableMetadata metadata = snowflakeClient.getTableMetadata(tableIdentifier); + + if (metadata == null) { + throw new NoSuchTableException("Cannot find table %s", tableIdentifier); + } + if (!metadata.getStatus().equals("success")) { + LOG.warn( + "Got non-successful table metadata: {} with metadataLocation {} for table {}", + metadata.getStatus(), + metadata.getIcebergMetadataLocation(), + tableIdentifier); + } + return metadata.getIcebergMetadataLocation(); + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java new file mode 100644 index 000000000000..50410555ad48 --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake.entities; + +import java.util.List; +import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; + +public class SnowflakeSchema { + private String name; + private String databaseName; + + public SnowflakeSchema(String databaseName, String name) { + this.databaseName = databaseName; + this.name = name; + } + + public String getName() { + return name; + } + + public String getDatabase() { + return databaseName; + } + + public static ResultSetHandler> createHandler() { + return rs -> { + List schemas = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String name = rs.getString("name"); + schemas.add(new SnowflakeSchema(databaseName, name)); + } + return schemas; + }; + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java new file mode 100644 index 000000000000..f619ed0ca7fa --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake.entities; + +import java.util.List; +import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; + +public class SnowflakeTable { + private String databaseName; + private String schemaName; + private String name; + + public SnowflakeTable(String databaseName, String schemaName, String name) { + this.databaseName = databaseName; + this.schemaName = schemaName; + this.name = name; + } + + public String getName() { + return name; + } + + public String getDatabase() { + return databaseName; + } + + public String getSchemaName() { + return schemaName; + } + + public static ResultSetHandler> createHandler() { + return rs -> { + List tables = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("schema_name"); + String name = rs.getString("name"); + tables.add(new SnowflakeTable(databaseName, schemaName, name)); + } + return tables; + }; + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java new file mode 100644 index 000000000000..554b7db3bab4 --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake.entities; + +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +public class SnowflakeTableMetadata { + public static final Pattern SNOWFLAKE_AZURE_PATTERN = + Pattern.compile("azure://([^/]+)/([^/]+)/(.*)"); + + private String snowflakeMetadataLocation; + private String status; + private String icebergMetadataLocation; + + private String rawJsonVal; + + public SnowflakeTableMetadata( + String snowflakeMetadataLocation, + String icebergMetadataLocation, + String status, + String rawJsonVal) { + this.snowflakeMetadataLocation = snowflakeMetadataLocation; + this.icebergMetadataLocation = icebergMetadataLocation; + this.status = status; + this.rawJsonVal = rawJsonVal; + } + + /** Storage location of table metadata in Snowflake's path syntax. */ + public String getSnowflakeMetadataLocation() { + return snowflakeMetadataLocation; + } + + /** Storage location of table metadata in Iceberg's path syntax. */ + public String getIcebergMetadataLocation() { + return icebergMetadataLocation; + } + + public String getStatus() { + return status; + } + + /** + * Translates from Snowflake's path syntax to Iceberg's path syntax for paths matching known + * non-compatible Snowflake paths. Throws IllegalArgumentException if the prefix of the + * snowflakeLocation is a known non-compatible path syntax but fails to match the expected path + * components for a successful translation. + */ + public static String getIcebergLocationFromSnowflakeLocation(String snowflakeLocation) { + if (snowflakeLocation.startsWith("azure://")) { + // Convert from expected path of the form: + // azure://account.blob.core.windows.net/container/volumepath + // to: + // wasbs://container@account.blob.core.windows.net/volumepath + Matcher matcher = SNOWFLAKE_AZURE_PATTERN.matcher(snowflakeLocation); + Preconditions.checkArgument( + matcher.matches(), + "Location '%s' failed to match pattern '%s'", + snowflakeLocation, + SNOWFLAKE_AZURE_PATTERN); + return String.format( + "wasbs://%s@%s/%s", matcher.group(2), matcher.group(1), matcher.group(3)); + } else if (snowflakeLocation.startsWith("gcs://")) { + // Convert from expected path of the form: + // gcs://bucket/path + // to: + // gs://bucket/path + return "gs" + snowflakeLocation.substring(3); + } + return snowflakeLocation; + } + + /** + * Factory method for parsing a JSON string containing expected Snowflake table metadata into a + * SnowflakeTableMetadata object. + */ + public static SnowflakeTableMetadata parseJson(String json) { + JsonNode parsedVal; + try { + parsedVal = JsonUtil.mapper().readValue(json, JsonNode.class); + } catch (IOException ioe) { + throw new IllegalArgumentException(String.format("Malformed JSON: %s", json), ioe); + } + + String snowflakeMetadataLocation = JsonUtil.getString("metadataLocation", parsedVal); + String status = JsonUtil.getStringOrNull("status", parsedVal); + + String icebergMetadataLocation = + getIcebergLocationFromSnowflakeLocation(snowflakeMetadataLocation); + + return new SnowflakeTableMetadata( + snowflakeMetadataLocation, icebergMetadataLocation, status, json); + } + + public static ResultSetHandler createHandler() { + return rs -> { + if (!rs.next()) { + return null; + } + + String rawJsonVal = rs.getString("METADATA"); + return SnowflakeTableMetadata.parseJson(rawJsonVal); + }; + } +} diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java new file mode 100644 index 000000000000..1e5b723061e1 --- /dev/null +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.jdbc.UncheckedSQLException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.snowflake.entities.SnowflakeSchema; +import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; + +public class FakeSnowflakeClient implements SnowflakeClient { + // In-memory lookup by database/schema/tableName to table metadata. + private Map>> databases = + Maps.newTreeMap(); + + public FakeSnowflakeClient() {} + + /** + * Also adds parent database/schema if they don't already exist. If the tableName already exists + * under the given database/schema, the value is replaced with the provided metadata. + */ + public void addTable( + String database, String schema, String tableName, SnowflakeTableMetadata metadata) { + if (!databases.containsKey(database)) { + databases.put(database, Maps.newTreeMap()); + } + Map> schemas = databases.get(database); + if (!schemas.containsKey(schema)) { + schemas.put(schema, Maps.newTreeMap()); + } + Map tables = schemas.get(schema); + tables.put(tableName, metadata); + } + + @Override + public List listSchemas(Namespace namespace) { + Preconditions.checkArgument( + namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, + "Namespace {} must have namespace of length <= {}", + namespace, + SnowflakeResources.MAX_NAMESPACE_DEPTH); + List schemas = Lists.newArrayList(); + if (namespace.length() == 0) { + // "account-level" listing. + for (Map.Entry>> db : + databases.entrySet()) { + for (String schema : db.getValue().keySet()) { + schemas.add(new SnowflakeSchema(db.getKey(), schema)); + } + } + } else if (namespace.length() == SnowflakeResources.NAMESPACE_DB_LEVEL) { + String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); + if (databases.containsKey(dbName)) { + for (String schema : databases.get(dbName).keySet()) { + schemas.add(new SnowflakeSchema(dbName, schema)); + } + } else { + throw new UncheckedSQLException("Nonexistent database: '%s'", dbName); + } + } else { + throw new IllegalArgumentException( + String.format( + "Tried to listSchemas using a namespace with too many levels: '%s'", namespace)); + } + return schemas; + } + + @Override + public List listIcebergTables(Namespace namespace) { + Preconditions.checkArgument( + namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, + "Namespace {} must have namespace of length <= {}", + namespace, + SnowflakeResources.MAX_NAMESPACE_DEPTH); + List tables = Lists.newArrayList(); + if (namespace.length() == 0) { + // "account-level" listing. + for (Map.Entry>> db : + databases.entrySet()) { + for (Map.Entry> schema : + db.getValue().entrySet()) { + for (String tableName : schema.getValue().keySet()) { + tables.add(new SnowflakeTable(db.getKey(), schema.getKey(), tableName)); + } + } + } + } else if (namespace.length() == SnowflakeResources.NAMESPACE_DB_LEVEL) { + String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); + if (databases.containsKey(dbName)) { + for (Map.Entry> schema : + databases.get(dbName).entrySet()) { + for (String tableName : schema.getValue().keySet()) { + tables.add(new SnowflakeTable(dbName, schema.getKey(), tableName)); + } + } + } else { + throw new UncheckedSQLException("Nonexistent database: '%s'", dbName); + } + } else { + String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); + if (databases.containsKey(dbName)) { + String schemaName = namespace.level(SnowflakeResources.NAMESPACE_SCHEMA_LEVEL - 1); + if (databases.get(dbName).containsKey(schemaName)) { + for (String tableName : databases.get(dbName).get(schemaName).keySet()) { + tables.add(new SnowflakeTable(dbName, schemaName, tableName)); + } + } else { + throw new UncheckedSQLException( + "Nonexistent datbase.schema: '%s.%s'", dbName, schemaName); + } + } else { + throw new UncheckedSQLException("Nonexistent database: '%s'", dbName); + } + } + return tables; + } + + @Override + public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) { + Namespace ns = tableIdentifier.namespace(); + Preconditions.checkArgument( + ns.length() == SnowflakeResources.MAX_NAMESPACE_DEPTH, + "TableIdentifier {} must have namespace of length {}", + tableIdentifier, + SnowflakeResources.MAX_NAMESPACE_DEPTH); + String dbName = ns.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); + String schemaName = ns.level(SnowflakeResources.NAMESPACE_SCHEMA_LEVEL - 1); + if (!databases.containsKey(dbName) + || !databases.get(dbName).containsKey(schemaName) + || !databases.get(dbName).get(schemaName).containsKey(tableIdentifier.name())) { + throw new UncheckedSQLException("Nonexistent object: '%s'", tableIdentifier); + } + return databases.get(dbName).get(schemaName).get(tableIdentifier.name()); + } + + @Override + public void close() {} +} diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java new file mode 100644 index 000000000000..3873375f8e89 --- /dev/null +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.util.Map; +import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InMemoryInputFile; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +public class InMemoryFileIO implements FileIO { + + private Map inMemoryFiles = Maps.newHashMap(); + + public void addFile(String path, byte[] contents) { + inMemoryFiles.put(path, new InMemoryInputFile(path, contents)); + } + + @Override + public InputFile newInputFile(String path) { + if (!inMemoryFiles.containsKey(path)) { + throw new NotFoundException("No in-memory file found for path: %s", path); + } + return inMemoryFiles.get(path); + } + + @Override + public OutputFile newOutputFile(String path) { + return null; + } + + @Override + public void deleteFile(String path) {} +} diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java new file mode 100644 index 000000000000..f8f82efb60eb --- /dev/null +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableMetadataParser; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class SnowflakeCatalogTest { + + static final String TEST_CATALOG_NAME = "slushLog"; + private SnowflakeCatalog catalog; + + @Before + public void before() { + catalog = new SnowflakeCatalog(); + + FakeSnowflakeClient client = new FakeSnowflakeClient(); + client.addTable( + "DB_1", + "SCHEMA_1", + "TAB_1", + SnowflakeTableMetadata.parseJson( + "{\"metadataLocation\":\"s3://tab1/metadata/v3.metadata.json\",\"status\":\"success\"}")); + client.addTable( + "DB_1", + "SCHEMA_1", + "TAB_2", + SnowflakeTableMetadata.parseJson( + "{\"metadataLocation\":\"s3://tab2/metadata/v1.metadata.json\",\"status\":\"success\"}")); + client.addTable( + "DB_2", + "SCHEMA_2", + "TAB_3", + SnowflakeTableMetadata.parseJson( + "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab3/metadata/v334.metadata.json\",\"status\":\"success\"}")); + client.addTable( + "DB_2", + "SCHEMA_2", + "TAB_4", + SnowflakeTableMetadata.parseJson( + "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab4/metadata/v323.metadata.json\",\"status\":\"success\"}")); + client.addTable( + "DB_3", + "SCHEMA_3", + "TAB_5", + SnowflakeTableMetadata.parseJson( + "{\"metadataLocation\":\"gcs://tab5/metadata/v793.metadata.json\",\"status\":\"success\"}")); + client.addTable( + "DB_3", + "SCHEMA_4", + "TAB_6", + SnowflakeTableMetadata.parseJson( + "{\"metadataLocation\":\"gcs://tab6/metadata/v123.metadata.json\",\"status\":\"success\"}")); + + catalog.setSnowflakeClient(client); + + InMemoryFileIO fakeFileIO = new InMemoryFileIO(); + + Schema schema = + new Schema( + Types.NestedField.required(1, "x", Types.StringType.get(), "comment1"), + Types.NestedField.required(2, "y", Types.StringType.get(), "comment2")); + PartitionSpec partitionSpec = + PartitionSpec.builderFor(schema).identity("x").withSpecId(1000).build(); + fakeFileIO.addFile( + "s3://tab1/metadata/v3.metadata.json", + TableMetadataParser.toJson( + TableMetadata.newTableMetadata( + schema, partitionSpec, "s3://tab1/", ImmutableMap.of())) + .getBytes()); + fakeFileIO.addFile( + "wasbs://mycontainer@myaccount.blob.core.windows.net/tab3/metadata/v334.metadata.json", + TableMetadataParser.toJson( + TableMetadata.newTableMetadata( + schema, + partitionSpec, + "wasbs://mycontainer@myaccount.blob.core.windows.net/tab1/", + ImmutableMap.of())) + .getBytes()); + fakeFileIO.addFile( + "gs://tab5/metadata/v793.metadata.json", + TableMetadataParser.toJson( + TableMetadata.newTableMetadata( + schema, partitionSpec, "gs://tab5/", ImmutableMap.of())) + .getBytes()); + + catalog.setFileIO(fakeFileIO); + + Map properties = Maps.newHashMap(); + catalog.initialize(TEST_CATALOG_NAME, properties); + } + + @Test + public void testListNamespace() { + List namespaces = catalog.listNamespaces(); + Assert.assertEquals( + Lists.newArrayList( + Namespace.of("DB_1", "SCHEMA_1"), + Namespace.of("DB_2", "SCHEMA_2"), + Namespace.of("DB_3", "SCHEMA_3"), + Namespace.of("DB_3", "SCHEMA_4")), + namespaces); + } + + @Test + public void testListNamespaceWithinDB() { + String dbName = "DB_1"; + List namespaces = catalog.listNamespaces(Namespace.of(dbName)); + Assert.assertEquals(Lists.newArrayList(Namespace.of(dbName, "SCHEMA_1")), namespaces); + } + + @Test + public void testListNamespaceWithinNonExistentDB() { + // Existence check for nonexistent parent namespaces is optional in the SupportsNamespaces + // interface. + String dbName = "NONEXISTENT_DB"; + Assert.assertThrows(RuntimeException.class, () -> catalog.listNamespaces(Namespace.of(dbName))); + } + + @Test + public void testListNamespaceWithinSchema() { + // No "sub-namespaces" beyond database.schema; invalid to try to list namespaces given + // a database.schema. + String dbName = "DB_3"; + String schemaName = "SCHEMA_4"; + Assert.assertThrows( + IllegalArgumentException.class, + () -> catalog.listNamespaces(Namespace.of(dbName, schemaName))); + } + + @Test + public void testListTables() { + List tables = catalog.listTables(Namespace.empty()); + Assert.assertEquals( + Lists.newArrayList( + TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1"), + TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2"), + TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_3"), + TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_4"), + TableIdentifier.of("DB_3", "SCHEMA_3", "TAB_5"), + TableIdentifier.of("DB_3", "SCHEMA_4", "TAB_6")), + tables); + } + + @Test + public void testListTablesWithinDB() { + String dbName = "DB_1"; + List tables = catalog.listTables(Namespace.of(dbName)); + Assert.assertEquals( + Lists.newArrayList( + TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1"), + TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2")), + tables); + } + + @Test + public void testListTablesWithinNonexistentDB() { + String dbName = "NONEXISTENT_DB"; + Assert.assertThrows(RuntimeException.class, () -> catalog.listTables(Namespace.of(dbName))); + } + + @Test + public void testListTablesWithinSchema() { + String dbName = "DB_2"; + String schemaName = "SCHEMA_2"; + List tables = catalog.listTables(Namespace.of(dbName, schemaName)); + Assert.assertEquals( + Lists.newArrayList( + TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_3"), + TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_4")), + tables); + } + + @Test + public void testListTablesWithinNonexistentSchema() { + String dbName = "DB_2"; + String schemaName = "NONEXISTENT_DB"; + Assert.assertThrows( + RuntimeException.class, () -> catalog.listTables(Namespace.of(dbName, schemaName))); + } + + @Test + public void testLoadS3Table() { + Table table = catalog.loadTable(TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TAB_1")); + Assert.assertEquals(table.location(), "s3://tab1/"); + } + + @Test + public void testLoadAzureTable() { + Table table = catalog.loadTable(TableIdentifier.of(Namespace.of("DB_2", "SCHEMA_2"), "TAB_3")); + Assert.assertEquals( + table.location(), "wasbs://mycontainer@myaccount.blob.core.windows.net/tab1/"); + } + + @Test + public void testLoadGcsTable() { + Table table = catalog.loadTable(TableIdentifier.of(Namespace.of("DB_3", "SCHEMA_3"), "TAB_5")); + Assert.assertEquals(table.location(), "gs://tab5/"); + } +} diff --git a/spark/v3.3/build.gradle b/spark/v3.3/build.gradle index 577700787e69..30f3eb02bb5f 100644 --- a/spark/v3.3/build.gradle +++ b/spark/v3.3/build.gradle @@ -221,6 +221,9 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio implementation(project(':iceberg-nessie')) { exclude group: 'com.google.code.findbugs', module: 'jsr305' } + implementation (project(':iceberg-snowflake')) { + exclude group: 'net.snowflake' , module: 'snowflake-jdbc' + } integrationImplementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}" integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${sparkVersion}" From 930a3f06ddaa0926d3654004cf841405ba979680 Mon Sep 17 00:00:00 2001 From: Dennis Huo <7410123+dennishuo@users.noreply.github.com> Date: Wed, 14 Dec 2022 14:33:16 -0800 Subject: [PATCH 02/20] Add JdbcSnowflakeClientTest using mocks (#2) Add JdbcSnowflakeClientTest using mocks; provides full coverage of JdbcSnowflakeClient and entities' ResultSetHandler logic. Also update target Spark runtime versions to be included. --- .../snowflake/JdbcSnowflakeClient.java | 19 +- .../snowflake/entities/SnowflakeSchema.java | 24 + .../snowflake/entities/SnowflakeTable.java | 25 + .../entities/SnowflakeTableMetadata.java | 34 +- .../snowflake/JdbcSnowflakeClientTest.java | 453 ++++++++++++++++++ spark/v3.1/build.gradle | 3 + spark/v3.2/build.gradle | 3 + 7 files changed, 553 insertions(+), 8 deletions(-) create mode 100644 snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index 9730a5f3724b..d2870a94df18 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -26,6 +26,7 @@ import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.jdbc.UncheckedInterruptedException; import org.apache.iceberg.jdbc.UncheckedSQLException; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.snowflake.entities.SnowflakeSchema; import org.apache.iceberg.snowflake.entities.SnowflakeTable; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; @@ -47,11 +48,17 @@ public class JdbcSnowflakeClient implements SnowflakeClient { private static final Logger LOG = LoggerFactory.getLogger(JdbcSnowflakeClient.class); private final JdbcClientPool connectionPool; + private QueryRunner queryRunner = new QueryRunner(true); JdbcSnowflakeClient(JdbcClientPool conn) { connectionPool = conn; } + @VisibleForTesting + void setQueryRunner(QueryRunner queryRunner) { + this.queryRunner = queryRunner; + } + @Override public List listSchemas(Namespace namespace) { StringBuilder baseQuery = new StringBuilder("SHOW SCHEMAS"); @@ -68,13 +75,13 @@ public List listSchemas(Namespace namespace) { final String finalQuery = baseQuery.toString(); final Object[] finalQueryParams = queryParams; - QueryRunner run = new QueryRunner(true); List schemas; try { schemas = connectionPool.run( conn -> - run.query(conn, finalQuery, SnowflakeSchema.createHandler(), finalQueryParams)); + queryRunner.query( + conn, finalQuery, SnowflakeSchema.createHandler(), finalQueryParams)); } catch (SQLException e) { throw new UncheckedSQLException( e, @@ -111,13 +118,13 @@ public List listIcebergTables(Namespace namespace) { final String finalQuery = baseQuery.toString(); final Object[] finalQueryParams = queryParams; - QueryRunner run = new QueryRunner(true); List tables; try { tables = connectionPool.run( conn -> - run.query(conn, finalQuery, SnowflakeTable.createHandler(), finalQueryParams)); + queryRunner.query( + conn, finalQuery, SnowflakeTable.createHandler(), finalQueryParams)); } catch (SQLException e) { throw new UncheckedSQLException( e, "Failed to list tables for namespace %s", namespace.toString()); @@ -129,15 +136,13 @@ public List listIcebergTables(Namespace namespace) { @Override public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) { - QueryRunner run = new QueryRunner(true); - SnowflakeTableMetadata tableMeta; try { final String finalQuery = "SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"; tableMeta = connectionPool.run( conn -> - run.query( + queryRunner.query( conn, finalQuery, SnowflakeTableMetadata.createHandler(), diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java index 50410555ad48..b8acccff6f54 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java @@ -20,6 +20,7 @@ import java.util.List; import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.collect.Lists; public class SnowflakeSchema { @@ -39,6 +40,29 @@ public String getDatabase() { return databaseName; } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof SnowflakeSchema)) { + return false; + } + + SnowflakeSchema that = (SnowflakeSchema) o; + return Objects.equal(this.databaseName, that.databaseName) + && Objects.equal(this.name, that.name); + } + + @Override + public int hashCode() { + return Objects.hashCode(databaseName, name); + } + + @Override + public String toString() { + return String.format("%s.%s", databaseName, name); + } + public static ResultSetHandler> createHandler() { return rs -> { List schemas = Lists.newArrayList(); diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java index f619ed0ca7fa..fbb8ecd5cac5 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java @@ -20,6 +20,7 @@ import java.util.List; import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.collect.Lists; public class SnowflakeTable { @@ -45,6 +46,30 @@ public String getSchemaName() { return schemaName; } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof SnowflakeTable)) { + return false; + } + + SnowflakeTable that = (SnowflakeTable) o; + return Objects.equal(this.databaseName, that.databaseName) + && Objects.equal(this.schemaName, that.schemaName) + && Objects.equal(this.name, that.name); + } + + @Override + public int hashCode() { + return Objects.hashCode(databaseName, schemaName, name); + } + + @Override + public String toString() { + return String.format("%s.%s.%s", databaseName, schemaName, name); + } + public static ResultSetHandler> createHandler() { return rs -> { List tables = Lists.newArrayList(); diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java index 554b7db3bab4..d58bc81e3f73 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java @@ -23,6 +23,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -31,9 +32,12 @@ public class SnowflakeTableMetadata { Pattern.compile("azure://([^/]+)/([^/]+)/(.*)"); private String snowflakeMetadataLocation; - private String status; private String icebergMetadataLocation; + private String status; + // Note: Since not all sources will necessarily come from a raw JSON representation, this raw + // JSON should only be considered a convenient debugging field. Equality of two + // SnowflakeTableMetadata instances should not depend on equality of this field. private String rawJsonVal; public SnowflakeTableMetadata( @@ -61,6 +65,34 @@ public String getStatus() { return status; } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof SnowflakeTableMetadata)) { + return false; + } + + // Only consider parsed fields, not the raw JSON that may or may not be the original source of + // this instance. + SnowflakeTableMetadata that = (SnowflakeTableMetadata) o; + return Objects.equal(this.snowflakeMetadataLocation, that.snowflakeMetadataLocation) + && Objects.equal(this.icebergMetadataLocation, that.icebergMetadataLocation) + && Objects.equal(this.status, that.status); + } + + @Override + public int hashCode() { + return Objects.hashCode(snowflakeMetadataLocation, icebergMetadataLocation, status); + } + + @Override + public String toString() { + return String.format( + "snowflakeMetadataLocation: '%s', icebergMetadataLocation: '%s', status: '%s", + snowflakeMetadataLocation, icebergMetadataLocation, status); + } + /** * Translates from Snowflake's path syntax to Iceberg's path syntax for paths matching known * non-compatible Snowflake paths. Throws IllegalArgumentException if the prefix of the diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java new file mode 100644 index 000000000000..0e5b881b1c29 --- /dev/null +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -0,0 +1,453 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import org.apache.commons.dbutils.QueryRunner; +import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.jdbc.JdbcClientPool; +import org.apache.iceberg.jdbc.UncheckedInterruptedException; +import org.apache.iceberg.jdbc.UncheckedSQLException; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.snowflake.entities.SnowflakeSchema; +import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; +import org.assertj.core.api.Assertions; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentMatchers; +import org.mockito.Mock; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.junit.MockitoJUnitRunner; +import org.mockito.stubbing.Answer; + +@RunWith(MockitoJUnitRunner.class) +public class JdbcSnowflakeClientTest { + @Mock private Connection mockConnection; + @Mock private JdbcClientPool mockClientPool; + @Mock private QueryRunner mockQueryRunner; + @Mock private ResultSet mockResultSet; + + private JdbcSnowflakeClient snowflakeClient; + + @Before + public void before() throws SQLException, InterruptedException { + snowflakeClient = new JdbcSnowflakeClient(mockClientPool); + snowflakeClient.setQueryRunner(mockQueryRunner); + + doAnswer( + new Answer() { + @Override + public Object answer(InvocationOnMock invocation) throws Throwable { + return ((ClientPool.Action) invocation.getArguments()[0]).run(mockConnection); + } + }) + .when(mockClientPool) + .run(any(ClientPool.Action.class)); + doAnswer( + new Answer() { + @Override + public Object answer(InvocationOnMock invocation) throws Throwable { + return ((ResultSetHandler) invocation.getArguments()[2]).handle(mockResultSet); + } + }) + .when(mockQueryRunner) + .query( + any(Connection.class), + any(String.class), + any(ResultSetHandler.class), + ArgumentMatchers.any()); + } + + /** + * For the root/empty Namespace, expect an underlying query to list schemas at the ACCOUNT level + * with no query parameters. + */ + @Test + public void testListSchemasInAccount() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("database_name")) + .thenReturn("DB_1") + .thenReturn("DB_1") + .thenReturn("DB_2"); + when(mockResultSet.getString("name")) + .thenReturn("SCHEMA_1") + .thenReturn("SCHEMA_2") + .thenReturn("SCHEMA_3"); + + List actualList = snowflakeClient.listSchemas(Namespace.of()); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW SCHEMAS IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + + List expectedList = + Lists.newArrayList( + new SnowflakeSchema("DB_1", "SCHEMA_1"), + new SnowflakeSchema("DB_1", "SCHEMA_2"), + new SnowflakeSchema("DB_2", "SCHEMA_3")); + Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + } + + /** + * For a 1-level Namespace, expect an underlying query to list schemas at the DATABASE level and + * supply the Namespace as a query param in an IDENTIFIER. + */ + @Test + public void testListSchemasInDatabase() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("database_name")).thenReturn("DB_1").thenReturn("DB_1"); + when(mockResultSet.getString("name")).thenReturn("SCHEMA_1").thenReturn("SCHEMA_2"); + + List actualList = snowflakeClient.listSchemas(Namespace.of("DB_1")); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW SCHEMAS IN DATABASE IDENTIFIER(?)"), + any(ResultSetHandler.class), + eq("DB_1")); + + List expectedList = + Lists.newArrayList( + new SnowflakeSchema("DB_1", "SCHEMA_1"), new SnowflakeSchema("DB_1", "SCHEMA_2")); + Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + } + + /** + * Any unexpected SQLException from the underlying connection will propagate out as an + * UncheckedSQLException when listing schemas. + */ + @Test + public void testListSchemasSQLException() throws SQLException, InterruptedException { + when(mockClientPool.run(any(ClientPool.Action.class))) + .thenThrow(new SQLException("Fake SQL exception")); + Assert.assertThrows( + UncheckedSQLException.class, () -> snowflakeClient.listSchemas(Namespace.of("DB_1"))); + } + + /** + * Any unexpected InterruptedException from the underlying connection will propagate out as an + * UncheckedInterruptedException when listing schemas. + */ + @Test + public void testListSchemasInterruptedException() throws SQLException, InterruptedException { + when(mockClientPool.run(any(ClientPool.Action.class))) + .thenThrow(new InterruptedException("Fake interrupted exception")); + Assert.assertThrows( + UncheckedInterruptedException.class, + () -> snowflakeClient.listSchemas(Namespace.of("DB_1"))); + } + + /** + * For the root/empty Namespace, expect an underlying query to list tables at the ACCOUNT level + * with no query parameters. + */ + @Test + public void testListIcebergTablesInAccount() throws SQLException { + when(mockResultSet.next()) + .thenReturn(true) + .thenReturn(true) + .thenReturn(true) + .thenReturn(true) + .thenReturn(false); + when(mockResultSet.getString("database_name")) + .thenReturn("DB_1") + .thenReturn("DB_1") + .thenReturn("DB_1") + .thenReturn("DB_2"); + when(mockResultSet.getString("schema_name")) + .thenReturn("SCHEMA_1") + .thenReturn("SCHEMA_1") + .thenReturn("SCHEMA_2") + .thenReturn("SCHEMA_3"); + when(mockResultSet.getString("name")) + .thenReturn("TABLE_1") + .thenReturn("TABLE_2") + .thenReturn("TABLE_3") + .thenReturn("TABLE_4"); + + List actualList = snowflakeClient.listIcebergTables(Namespace.of()); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW ICEBERG TABLES IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + + List expectedList = + Lists.newArrayList( + new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_1"), + new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_2"), + new SnowflakeTable("DB_1", "SCHEMA_2", "TABLE_3"), + new SnowflakeTable("DB_2", "SCHEMA_3", "TABLE_4")); + Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + } + + /** + * For a 1-level Namespace, expect an underlying query to list tables at the DATABASE level and + * supply the Namespace as a query param in an IDENTIFIER. + */ + @Test + public void testListIcebergTablesInDatabase() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("database_name")) + .thenReturn("DB_1") + .thenReturn("DB_1") + .thenReturn("DB_1"); + when(mockResultSet.getString("schema_name")) + .thenReturn("SCHEMA_1") + .thenReturn("SCHEMA_1") + .thenReturn("SCHEMA_2"); + when(mockResultSet.getString("name")) + .thenReturn("TABLE_1") + .thenReturn("TABLE_2") + .thenReturn("TABLE_3"); + + List actualList = snowflakeClient.listIcebergTables(Namespace.of("DB_1")); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW ICEBERG TABLES IN DATABASE IDENTIFIER(?)"), + any(ResultSetHandler.class), + eq("DB_1")); + + List expectedList = + Lists.newArrayList( + new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_1"), + new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_2"), + new SnowflakeTable("DB_1", "SCHEMA_2", "TABLE_3")); + Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + } + + /** + * For a 2-level Namespace, expect an underlying query to list tables at the SCHEMA level and + * supply the Namespace as a query param in an IDENTIFIER. + */ + @Test + public void testListIcebergTablesInSchema() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("database_name")).thenReturn("DB_1").thenReturn("DB_1"); + when(mockResultSet.getString("schema_name")).thenReturn("SCHEMA_1").thenReturn("SCHEMA_1"); + when(mockResultSet.getString("name")).thenReturn("TABLE_1").thenReturn("TABLE_2"); + + List actualList = + snowflakeClient.listIcebergTables(Namespace.of("DB_1", "SCHEMA_1")); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW ICEBERG TABLES IN SCHEMA IDENTIFIER(?)"), + any(ResultSetHandler.class), + eq("DB_1.SCHEMA_1")); + + List expectedList = + Lists.newArrayList( + new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_1"), + new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_2")); + Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + } + + /** + * Any unexpected SQLException from the underlying connection will propagate out as an + * UncheckedSQLException when listing tables. + */ + @Test + public void testListIcebergTablesSQLException() throws SQLException, InterruptedException { + when(mockClientPool.run(any(ClientPool.Action.class))) + .thenThrow(new SQLException("Fake SQL exception")); + Assert.assertThrows( + UncheckedSQLException.class, () -> snowflakeClient.listIcebergTables(Namespace.of("DB_1"))); + } + + /** + * Any unexpected InterruptedException from the underlying connection will propagate out as an + * UncheckedInterruptedException when listing tables. + */ + @Test + public void testListIcebergTablesInterruptedException() + throws SQLException, InterruptedException { + when(mockClientPool.run(any(ClientPool.Action.class))) + .thenThrow(new InterruptedException("Fake interrupted exception")); + Assert.assertThrows( + UncheckedInterruptedException.class, + () -> snowflakeClient.listIcebergTables(Namespace.of("DB_1"))); + } + + /** + * Test parsing of table metadata JSON from a GET_ICEBERG_TABLE_INFORMATION call, with the S3 path + * unaltered between snowflake/iceberg path representations. + */ + @Test + public void testGetS3TableMetadata() throws SQLException { + when(mockResultSet.next()).thenReturn(true); + when(mockResultSet.getString("METADATA")) + .thenReturn( + "{\"metadataLocation\":\"s3://tab1/metadata/v3.metadata.json\",\"status\":\"success\"}"); + + SnowflakeTableMetadata actualMetadata = + snowflakeClient.getTableMetadata( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1")); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"), + any(ResultSetHandler.class), + eq("DB_1.SCHEMA_1.TABLE_1")); + + SnowflakeTableMetadata expectedMetadata = + new SnowflakeTableMetadata( + "s3://tab1/metadata/v3.metadata.json", + "s3://tab1/metadata/v3.metadata.json", + "success", + null); + Assert.assertEquals(expectedMetadata, actualMetadata); + } + + /** + * Test parsing of table metadata JSON from a GET_ICEBERG_TABLE_INFORMATION call, with the Azure + * path translated from an azure:// format to a wasbs:// format. + */ + @Test + public void testGetAzureTableMetadata() throws SQLException { + when(mockResultSet.next()).thenReturn(true); + when(mockResultSet.getString("METADATA")) + .thenReturn( + "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab3/metadata/v334.metadata.json\",\"status\":\"success\"}"); + + SnowflakeTableMetadata actualMetadata = + snowflakeClient.getTableMetadata( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1")); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"), + any(ResultSetHandler.class), + eq("DB_1.SCHEMA_1.TABLE_1")); + + SnowflakeTableMetadata expectedMetadata = + new SnowflakeTableMetadata( + "azure://myaccount.blob.core.windows.net/mycontainer/tab3/metadata/v334.metadata.json", + "wasbs://mycontainer@myaccount.blob.core.windows.net/tab3/metadata/v334.metadata.json", + "success", + null); + Assert.assertEquals(expectedMetadata, actualMetadata); + } + + /** + * Test parsing of table metadata JSON from a GET_ICEBERG_TABLE_INFORMATION call, with the GCS + * path translated from a gcs:// format to a gs:// format. + */ + @Test + public void testGetGcsTableMetadata() throws SQLException { + when(mockResultSet.next()).thenReturn(true); + when(mockResultSet.getString("METADATA")) + .thenReturn( + "{\"metadataLocation\":\"gcs://tab5/metadata/v793.metadata.json\",\"status\":\"success\"}"); + + SnowflakeTableMetadata actualMetadata = + snowflakeClient.getTableMetadata( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1")); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"), + any(ResultSetHandler.class), + eq("DB_1.SCHEMA_1.TABLE_1")); + + SnowflakeTableMetadata expectedMetadata = + new SnowflakeTableMetadata( + "gcs://tab5/metadata/v793.metadata.json", + "gs://tab5/metadata/v793.metadata.json", + "success", + null); + Assert.assertEquals(expectedMetadata, actualMetadata); + } + + /** Malformed JSON from a ResultSet should propagate as an IllegalArgumentException. */ + @Test + public void testGetTableMetadataMalformedJson() throws SQLException { + when(mockResultSet.next()).thenReturn(true); + when(mockResultSet.getString("METADATA")).thenReturn("{\"malformed_no_closing_bracket"); + Assert.assertThrows( + IllegalArgumentException.class, + () -> + snowflakeClient.getTableMetadata( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1"))); + } + + /** + * Any unexpected SQLException from the underlying connection will propagate out as an + * UncheckedSQLException when getting table metadata. + */ + @Test + public void testGetTableMetadataSQLException() throws SQLException, InterruptedException { + when(mockClientPool.run(any(ClientPool.Action.class))) + .thenThrow(new SQLException("Fake SQL exception")); + Assert.assertThrows( + UncheckedSQLException.class, + () -> + snowflakeClient.getTableMetadata( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1"))); + } + + /** + * Any unexpected InterruptedException from the underlying connection will propagate out as an + * UncheckedInterruptedException when getting table metadata. + */ + @Test + public void testGetTableMetadataInterruptedException() throws SQLException, InterruptedException { + when(mockClientPool.run(any(ClientPool.Action.class))) + .thenThrow(new InterruptedException("Fake interrupted exception")); + Assert.assertThrows( + UncheckedInterruptedException.class, + () -> + snowflakeClient.getTableMetadata( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1"))); + } + + /** Calling close() propagates to closing underlying client pool. */ + @Test + public void testClose() { + snowflakeClient.close(); + verify(mockClientPool).close(); + } +} diff --git a/spark/v3.1/build.gradle b/spark/v3.1/build.gradle index eca34afcbd02..c7861d36e555 100644 --- a/spark/v3.1/build.gradle +++ b/spark/v3.1/build.gradle @@ -213,6 +213,9 @@ project(':iceberg-spark:iceberg-spark-runtime-3.1_2.12') { implementation(project(':iceberg-nessie')) { exclude group: 'com.google.code.findbugs', module: 'jsr305' } + implementation (project(':iceberg-snowflake')) { + exclude group: 'net.snowflake' , module: 'snowflake-jdbc' + } integrationImplementation "org.apache.spark:spark-hive_2.12:${sparkVersion}" integrationImplementation 'org.junit.vintage:junit-vintage-engine' diff --git a/spark/v3.2/build.gradle b/spark/v3.2/build.gradle index 8de93d0df8ac..971decde100e 100644 --- a/spark/v3.2/build.gradle +++ b/spark/v3.2/build.gradle @@ -222,6 +222,9 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio implementation(project(':iceberg-nessie')) { exclude group: 'com.google.code.findbugs', module: 'jsr305' } + implementation (project(':iceberg-snowflake')) { + exclude group: 'net.snowflake' , module: 'snowflake-jdbc' + } integrationImplementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}" integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${sparkVersion}" From 076a14abc7ccf1d74e610d2c296082662ec0c3c8 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Thu, 15 Dec 2022 16:19:06 -0800 Subject: [PATCH 03/20] Add test { useJUnitPlatform() } tuple to iceberg-snowflake for consistency and future interoperability with inheriting from abstact unittest base classes. --- build.gradle | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.gradle b/build.gradle index 58a291ea7a00..a861494623d9 100644 --- a/build.gradle +++ b/build.gradle @@ -697,6 +697,10 @@ project(':iceberg-dell') { } project(':iceberg-snowflake') { + test { + useJUnitPlatform() + } + dependencies { implementation project(':iceberg-core') implementation project(':iceberg-common') From a7b5aa7841e4daad234d87b34443edac6da86768 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 16 Dec 2022 16:14:03 -0800 Subject: [PATCH 04/20] Extract versions into versions.props per PR review --- build.gradle | 4 ++-- versions.props | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index a861494623d9..1ba89e465f9a 100644 --- a/build.gradle +++ b/build.gradle @@ -708,9 +708,9 @@ project(':iceberg-snowflake') { implementation project(':iceberg-aws') implementation "com.fasterxml.jackson.core:jackson-databind" implementation "com.fasterxml.jackson.core:jackson-core" - implementation "commons-dbutils:commons-dbutils:1.7" + implementation "commons-dbutils:commons-dbutils" - runtimeOnly("net.snowflake:snowflake-jdbc:3.13.22") + runtimeOnly("net.snowflake:snowflake-jdbc") testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') } diff --git a/versions.props b/versions.props index 60e4035b50fe..df11d937fbcc 100644 --- a/versions.props +++ b/versions.props @@ -28,6 +28,8 @@ org.scala-lang.modules:scala-collection-compat_2.12 = 2.6.0 org.scala-lang.modules:scala-collection-compat_2.13 = 2.6.0 com.emc.ecs:object-client-bundle = 3.3.2 org.immutables:value = 2.9.2 +commons-dbutils:commons-dbutils = 1.7 +net.snowflake:snowflake-jdbc = 3.13.22 # test deps org.junit.vintage:junit-vintage-engine = 5.8.2 From dd5255c06f35e5007938f6d1b119a33e64e59f9c Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 16 Dec 2022 16:21:24 -0800 Subject: [PATCH 05/20] Misc test-related refactors per review suggestions -Convert unittests to all use assertj/Assertions for "fluent assertions" -Refactor test injection into overloaded initialize() method -Add test cases for close() propagation -Use CloseableGroup. --- .../snowflake/JdbcSnowflakeClient.java | 5 +- .../iceberg/snowflake/SnowflakeCatalog.java | 105 +++++++------- .../iceberg/snowflake/SnowflakeResources.java | 2 +- .../snowflake/FakeSnowflakeClient.java | 24 +++- .../iceberg/snowflake/InMemoryFileIO.java | 21 ++- .../snowflake/JdbcSnowflakeClientTest.java | 7 + .../snowflake/SnowflakeCatalogTest.java | 130 +++++++++++------- 7 files changed, 185 insertions(+), 109 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index d2870a94df18..b670e2330222 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -27,6 +27,7 @@ import org.apache.iceberg.jdbc.UncheckedInterruptedException; import org.apache.iceberg.jdbc.UncheckedSQLException; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.snowflake.entities.SnowflakeSchema; import org.apache.iceberg.snowflake.entities.SnowflakeTable; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; @@ -48,10 +49,12 @@ public class JdbcSnowflakeClient implements SnowflakeClient { private static final Logger LOG = LoggerFactory.getLogger(JdbcSnowflakeClient.class); private final JdbcClientPool connectionPool; - private QueryRunner queryRunner = new QueryRunner(true); + private QueryRunner queryRunner; JdbcSnowflakeClient(JdbcClientPool conn) { + Preconditions.checkArgument(null != conn, "JdbcClientPool must be non-null"); connectionPool = conn; + queryRunner = new QueryRunner(true); } @VisibleForTesting diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 40259b54ff21..3b1122bd2c7a 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -19,6 +19,7 @@ package org.apache.iceberg.snowflake; import java.io.Closeable; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -32,9 +33,9 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.hadoop.Configurable; +import org.apache.iceberg.io.CloseableGroup; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.jdbc.JdbcClientPool; -import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.snowflake.entities.SnowflakeSchema; @@ -47,24 +48,15 @@ public class SnowflakeCatalog extends BaseMetastoreCatalog private static final Logger LOG = LoggerFactory.getLogger(SnowflakeCatalog.class); + private CloseableGroup closeableGroup; private Object conf; - private String catalogName = SnowflakeResources.DEFAULT_CATALOG_NAME; - private Map catalogProperties = null; + private String catalogName; + private Map catalogProperties; private FileIO fileIO; private SnowflakeClient snowflakeClient; public SnowflakeCatalog() {} - @VisibleForTesting - void setSnowflakeClient(SnowflakeClient snowflakeClient) { - this.snowflakeClient = snowflakeClient; - } - - @VisibleForTesting - void setFileIO(FileIO fileIO) { - this.fileIO = fileIO; - } - @Override public List listTables(Namespace namespace) { LOG.debug("listTables with namespace: {}", namespace); @@ -97,49 +89,64 @@ public void renameTable(TableIdentifier from, TableIdentifier to) { @Override public void initialize(String name, Map properties) { - catalogProperties = properties; - - if (name != null) { - this.catalogName = name; + String uri = properties.get(CatalogProperties.URI); + Preconditions.checkNotNull(uri, "JDBC connection URI is required"); + try { + // We'll ensure the expected JDBC driver implementation class is initialized through + // reflection + // regardless of which classloader ends up using this JdbcSnowflakeClient, but we'll only + // warn if the expected driver fails to load, since users may use repackaged or custom + // JDBC drivers for Snowflake communcation. + Class.forName(JdbcSnowflakeClient.EXPECTED_JDBC_IMPL); + } catch (ClassNotFoundException cnfe) { + LOG.warn( + "Failed to load expected JDBC SnowflakeDriver - if queries fail by failing" + + " to find a suitable driver for jdbc:snowflake:// URIs, you must add the Snowflake " + + " JDBC driver to your jars/packages", + cnfe); } + JdbcClientPool connectionPool = new JdbcClientPool(uri, properties); - if (snowflakeClient == null) { - String uri = properties.get(CatalogProperties.URI); - Preconditions.checkNotNull(uri, "JDBC connection URI is required"); - - try { - // We'll ensure the expected JDBC driver implementation class is initialized through - // reflection - // regardless of which classloader ends up using this JdbcSnowflakeClient, but we'll only - // warn if the expected driver fails to load, since users may use repackaged or custom - // JDBC drivers for Snowflake communcation. - Class.forName(JdbcSnowflakeClient.EXPECTED_JDBC_IMPL); - } catch (ClassNotFoundException cnfe) { - LOG.warn( - "Failed to load expected JDBC SnowflakeDriver - if queries fail by failing" - + " to find a suitable driver for jdbc:snowflake:// URIs, you must add the Snowflake " - + " JDBC driver to your jars/packages", - cnfe); - } - - JdbcClientPool connectionPool = new JdbcClientPool(uri, properties); - snowflakeClient = new JdbcSnowflakeClient(connectionPool); + String fileIOImpl = SnowflakeResources.DEFAULT_FILE_IO_IMPL; + if (properties.containsKey(CatalogProperties.FILE_IO_IMPL)) { + fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); } - if (fileIO == null) { - String fileIOImpl = SnowflakeResources.DEFAULT_FILE_IO_IMPL; - - if (catalogProperties.containsKey(CatalogProperties.FILE_IO_IMPL)) { - fileIOImpl = catalogProperties.get(CatalogProperties.FILE_IO_IMPL); - } - - fileIO = CatalogUtil.loadFileIO(fileIOImpl, catalogProperties, conf); - } + initialize( + name, + new JdbcSnowflakeClient(connectionPool), + CatalogUtil.loadFileIO(fileIOImpl, properties, conf), + properties); + } + + /** + * Initialize using caller-supplied SnowflakeClient and FileIO. + * + * @param name The name of the catalog, defaults to "snowflake_catalog" + * @param snowflakeClient The client encapsulating network communication with Snowflake + * @param fileIO The {@link FileIO} to use for table operations + * @param properties The catalog options to use and propagate to dependencies + */ + @SuppressWarnings("checkstyle:HiddenField") + public void initialize( + String name, SnowflakeClient snowflakeClient, FileIO fileIO, Map properties) { + Preconditions.checkArgument(null != snowflakeClient, "snowflakeClient must be non-null"); + Preconditions.checkArgument(null != fileIO, "fileIO must be non-null"); + this.catalogName = name == null ? SnowflakeResources.DEFAULT_CATALOG_NAME : name; + this.snowflakeClient = snowflakeClient; + this.fileIO = fileIO; + this.catalogProperties = properties; + this.closeableGroup = new CloseableGroup(); + closeableGroup.addCloseable(snowflakeClient); + closeableGroup.addCloseable(fileIO); + closeableGroup.setSuppressCloseFailure(true); } @Override - public void close() { - snowflakeClient.close(); + public void close() throws IOException { + if (null != closeableGroup) { + closeableGroup.close(); + } } @Override diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java index 16669253939b..7e58b6a2301d 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java @@ -19,7 +19,7 @@ package org.apache.iceberg.snowflake; final class SnowflakeResources { - static final String DEFAULT_CATALOG_NAME = "snowlog"; + static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.hadoop.HadoopFileIO"; static final int MAX_NAMESPACE_DEPTH = 2; static final int NAMESPACE_DB_LEVEL = 1; diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java index 1e5b723061e1..ff49b74458cb 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java @@ -34,6 +34,7 @@ public class FakeSnowflakeClient implements SnowflakeClient { // In-memory lookup by database/schema/tableName to table metadata. private Map>> databases = Maps.newTreeMap(); + private boolean closed = false; public FakeSnowflakeClient() {} @@ -43,6 +44,7 @@ public FakeSnowflakeClient() {} */ public void addTable( String database, String schema, String tableName, SnowflakeTableMetadata metadata) { + Preconditions.checkState(!closed, "Cannot call addTable after calling close()"); if (!databases.containsKey(database)) { databases.put(database, Maps.newTreeMap()); } @@ -56,6 +58,7 @@ public void addTable( @Override public List listSchemas(Namespace namespace) { + Preconditions.checkState(!closed, "Cannot call listSchemas after calling close()"); Preconditions.checkArgument( namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, "Namespace {} must have namespace of length <= {}", @@ -77,7 +80,7 @@ public List listSchemas(Namespace namespace) { schemas.add(new SnowflakeSchema(dbName, schema)); } } else { - throw new UncheckedSQLException("Nonexistent database: '%s'", dbName); + throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); } } else { throw new IllegalArgumentException( @@ -89,6 +92,7 @@ public List listSchemas(Namespace namespace) { @Override public List listIcebergTables(Namespace namespace) { + Preconditions.checkState(!closed, "Cannot call listIcebergTables after calling close()"); Preconditions.checkArgument( namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, "Namespace {} must have namespace of length <= {}", @@ -116,7 +120,7 @@ public List listIcebergTables(Namespace namespace) { } } } else { - throw new UncheckedSQLException("Nonexistent database: '%s'", dbName); + throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); } } else { String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); @@ -128,10 +132,10 @@ public List listIcebergTables(Namespace namespace) { } } else { throw new UncheckedSQLException( - "Nonexistent datbase.schema: '%s.%s'", dbName, schemaName); + "Object does not exist: database.schema: '%s.%s'", dbName, schemaName); } } else { - throw new UncheckedSQLException("Nonexistent database: '%s'", dbName); + throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); } } return tables; @@ -139,6 +143,8 @@ public List listIcebergTables(Namespace namespace) { @Override public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) { + Preconditions.checkState(!closed, "Cannot call getTableMetadata after calling close()"); + Namespace ns = tableIdentifier.namespace(); Preconditions.checkArgument( ns.length() == SnowflakeResources.MAX_NAMESPACE_DEPTH, @@ -150,11 +156,17 @@ public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) if (!databases.containsKey(dbName) || !databases.get(dbName).containsKey(schemaName) || !databases.get(dbName).get(schemaName).containsKey(tableIdentifier.name())) { - throw new UncheckedSQLException("Nonexistent object: '%s'", tableIdentifier); + throw new UncheckedSQLException("Object does not exist: object: '%s'", tableIdentifier); } return databases.get(dbName).get(schemaName).get(tableIdentifier.name()); } + public boolean isClosed() { + return closed; + } + @Override - public void close() {} + public void close() { + closed = true; + } } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java index 3873375f8e89..813ae49b43de 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java @@ -24,18 +24,22 @@ import org.apache.iceberg.io.InMemoryInputFile; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Maps; public class InMemoryFileIO implements FileIO { private Map inMemoryFiles = Maps.newHashMap(); + private boolean closed = false; public void addFile(String path, byte[] contents) { + Preconditions.checkState(!closed, "Cannot call addFile after calling close()"); inMemoryFiles.put(path, new InMemoryInputFile(path, contents)); } @Override public InputFile newInputFile(String path) { + Preconditions.checkState(!closed, "Cannot call newInputFile after calling close()"); if (!inMemoryFiles.containsKey(path)) { throw new NotFoundException("No in-memory file found for path: %s", path); } @@ -44,9 +48,22 @@ public InputFile newInputFile(String path) { @Override public OutputFile newOutputFile(String path) { - return null; + throw new UnsupportedOperationException( + String.format("newOutputFile not supported; attempted for path '%s'", path)); } @Override - public void deleteFile(String path) {} + public void deleteFile(String path) { + throw new UnsupportedOperationException( + String.format("deleteFile not supported; attempted for path '%s'", path)); + } + + public boolean isClosed() { + return closed; + } + + @Override + public void close() { + closed = true; + } } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index 0e5b881b1c29..4f6f9d32e4d1 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -89,6 +89,13 @@ public Object answer(InvocationOnMock invocation) throws Throwable { ArgumentMatchers.any()); } + @Test + public void testNullClientPoolInConstructor() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> new JdbcSnowflakeClient(null)) + .withMessageContaining("JdbcClientPool must be non-null"); + } + /** * For the root/empty Namespace, expect an underlying query to list schemas at the ACCOUNT level * with no query parameters. diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index f8f82efb60eb..e66155e4e886 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -18,7 +18,7 @@ */ package org.apache.iceberg.snowflake; -import java.util.List; +import java.io.IOException; import java.util.Map; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; @@ -28,11 +28,10 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.apache.iceberg.types.Types; -import org.junit.Assert; +import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Test; @@ -40,52 +39,53 @@ public class SnowflakeCatalogTest { static final String TEST_CATALOG_NAME = "slushLog"; private SnowflakeCatalog catalog; + private FakeSnowflakeClient fakeClient; + private InMemoryFileIO fakeFileIO; + private Map properties; @Before public void before() { catalog = new SnowflakeCatalog(); - FakeSnowflakeClient client = new FakeSnowflakeClient(); - client.addTable( + fakeClient = new FakeSnowflakeClient(); + fakeClient.addTable( "DB_1", "SCHEMA_1", "TAB_1", SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"s3://tab1/metadata/v3.metadata.json\",\"status\":\"success\"}")); - client.addTable( + fakeClient.addTable( "DB_1", "SCHEMA_1", "TAB_2", SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"s3://tab2/metadata/v1.metadata.json\",\"status\":\"success\"}")); - client.addTable( + fakeClient.addTable( "DB_2", "SCHEMA_2", "TAB_3", SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab3/metadata/v334.metadata.json\",\"status\":\"success\"}")); - client.addTable( + fakeClient.addTable( "DB_2", "SCHEMA_2", "TAB_4", SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab4/metadata/v323.metadata.json\",\"status\":\"success\"}")); - client.addTable( + fakeClient.addTable( "DB_3", "SCHEMA_3", "TAB_5", SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"gcs://tab5/metadata/v793.metadata.json\",\"status\":\"success\"}")); - client.addTable( + fakeClient.addTable( "DB_3", "SCHEMA_4", "TAB_6", SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"gcs://tab6/metadata/v123.metadata.json\",\"status\":\"success\"}")); - catalog.setSnowflakeClient(client); - - InMemoryFileIO fakeFileIO = new InMemoryFileIO(); + fakeFileIO = new InMemoryFileIO(); Schema schema = new Schema( @@ -115,29 +115,39 @@ public void before() { schema, partitionSpec, "gs://tab5/", ImmutableMap.of())) .getBytes()); - catalog.setFileIO(fakeFileIO); + properties = Maps.newHashMap(); + catalog.initialize(TEST_CATALOG_NAME, fakeClient, fakeFileIO, properties); + } - Map properties = Maps.newHashMap(); - catalog.initialize(TEST_CATALOG_NAME, properties); + @Test + public void testInitializeNullClient() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> catalog.initialize(TEST_CATALOG_NAME, null, fakeFileIO, properties)) + .withMessageContaining("snowflakeClient must be non-null"); + } + + @Test + public void testInitializeNullFileIO() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> catalog.initialize(TEST_CATALOG_NAME, fakeClient, null, properties)) + .withMessageContaining("fileIO must be non-null"); } @Test public void testListNamespace() { - List namespaces = catalog.listNamespaces(); - Assert.assertEquals( - Lists.newArrayList( + Assertions.assertThat(catalog.listNamespaces()) + .containsExactly( Namespace.of("DB_1", "SCHEMA_1"), Namespace.of("DB_2", "SCHEMA_2"), Namespace.of("DB_3", "SCHEMA_3"), - Namespace.of("DB_3", "SCHEMA_4")), - namespaces); + Namespace.of("DB_3", "SCHEMA_4")); } @Test public void testListNamespaceWithinDB() { String dbName = "DB_1"; - List namespaces = catalog.listNamespaces(Namespace.of(dbName)); - Assert.assertEquals(Lists.newArrayList(Namespace.of(dbName, "SCHEMA_1")), namespaces); + Assertions.assertThat(catalog.listNamespaces(Namespace.of(dbName))) + .containsExactly(Namespace.of(dbName, "SCHEMA_1")); } @Test @@ -145,7 +155,10 @@ public void testListNamespaceWithinNonExistentDB() { // Existence check for nonexistent parent namespaces is optional in the SupportsNamespaces // interface. String dbName = "NONEXISTENT_DB"; - Assert.assertThrows(RuntimeException.class, () -> catalog.listNamespaces(Namespace.of(dbName))); + Assertions.assertThatExceptionOfType(RuntimeException.class) + .isThrownBy(() -> catalog.listNamespaces(Namespace.of(dbName))) + .withMessageContaining("does not exist") + .withMessageContaining(dbName); } @Test @@ -154,78 +167,95 @@ public void testListNamespaceWithinSchema() { // a database.schema. String dbName = "DB_3"; String schemaName = "SCHEMA_4"; - Assert.assertThrows( - IllegalArgumentException.class, - () -> catalog.listNamespaces(Namespace.of(dbName, schemaName))); + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> catalog.listNamespaces(Namespace.of(dbName, schemaName))) + .withMessageContaining("more than 2 levels of namespace") + .withMessageContaining("DB_3.SCHEMA_4"); } @Test public void testListTables() { - List tables = catalog.listTables(Namespace.empty()); - Assert.assertEquals( - Lists.newArrayList( + Assertions.assertThat(catalog.listTables(Namespace.empty())) + .containsExactly( TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1"), TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2"), TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_3"), TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_4"), TableIdentifier.of("DB_3", "SCHEMA_3", "TAB_5"), - TableIdentifier.of("DB_3", "SCHEMA_4", "TAB_6")), - tables); + TableIdentifier.of("DB_3", "SCHEMA_4", "TAB_6")); } @Test public void testListTablesWithinDB() { String dbName = "DB_1"; - List tables = catalog.listTables(Namespace.of(dbName)); - Assert.assertEquals( - Lists.newArrayList( + Assertions.assertThat(catalog.listTables(Namespace.of(dbName))) + .containsExactly( TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1"), - TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2")), - tables); + TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2")); } @Test public void testListTablesWithinNonexistentDB() { String dbName = "NONEXISTENT_DB"; - Assert.assertThrows(RuntimeException.class, () -> catalog.listTables(Namespace.of(dbName))); + Assertions.assertThatExceptionOfType(RuntimeException.class) + .isThrownBy(() -> catalog.listTables(Namespace.of(dbName))) + .withMessageContaining("does not exist") + .withMessageContaining(dbName); } @Test public void testListTablesWithinSchema() { String dbName = "DB_2"; String schemaName = "SCHEMA_2"; - List tables = catalog.listTables(Namespace.of(dbName, schemaName)); - Assert.assertEquals( - Lists.newArrayList( + Assertions.assertThat(catalog.listTables(Namespace.of(dbName, schemaName))) + .containsExactly( TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_3"), - TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_4")), - tables); + TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_4")); } @Test public void testListTablesWithinNonexistentSchema() { String dbName = "DB_2"; - String schemaName = "NONEXISTENT_DB"; - Assert.assertThrows( - RuntimeException.class, () -> catalog.listTables(Namespace.of(dbName, schemaName))); + String schemaName = "NONEXISTENT_SCHEMA"; + Assertions.assertThatExceptionOfType(RuntimeException.class) + .isThrownBy(() -> catalog.listTables(Namespace.of(dbName, schemaName))) + .withMessageContaining("does not exist") + .withMessageContaining("DB_2.NONEXISTENT_SCHEMA"); } @Test public void testLoadS3Table() { Table table = catalog.loadTable(TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TAB_1")); - Assert.assertEquals(table.location(), "s3://tab1/"); + Assertions.assertThat(table.location()).isEqualTo("s3://tab1/"); } @Test public void testLoadAzureTable() { Table table = catalog.loadTable(TableIdentifier.of(Namespace.of("DB_2", "SCHEMA_2"), "TAB_3")); - Assert.assertEquals( - table.location(), "wasbs://mycontainer@myaccount.blob.core.windows.net/tab1/"); + Assertions.assertThat(table.location()) + .isEqualTo("wasbs://mycontainer@myaccount.blob.core.windows.net/tab1/"); } @Test public void testLoadGcsTable() { Table table = catalog.loadTable(TableIdentifier.of(Namespace.of("DB_3", "SCHEMA_3"), "TAB_5")); - Assert.assertEquals(table.location(), "gs://tab5/"); + Assertions.assertThat(table.location()).isEqualTo("gs://tab5/"); + } + + @Test + public void testCloseBeforeInitialize() throws IOException { + catalog = new SnowflakeCatalog(); + catalog.close(); + } + + @Test + public void testClose() throws IOException { + catalog.close(); + Assertions.assertThat(fakeClient.isClosed()) + .overridingErrorMessage("expected close() to propagate to snowflakeClient") + .isTrue(); + Assertions.assertThat(fakeFileIO.isClosed()) + .overridingErrorMessage("expected close() to propagate to fileIO") + .isTrue(); } } From 500b36b90b6c59085dd61d24f8f2e56568fc9eef Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 16 Dec 2022 16:45:30 -0800 Subject: [PATCH 06/20] Fix unsupported behaviors of loadNamedpaceMetadata and defaultWarehouseLocation --- .../apache/iceberg/snowflake/SnowflakeCatalog.java | 11 ++++++----- .../org/apache/iceberg/snowflake/SnowflakeClient.java | 3 --- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 3b1122bd2c7a..dd9174b72f28 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -37,7 +37,7 @@ import org.apache.iceberg.io.FileIO; import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.snowflake.entities.SnowflakeSchema; import org.apache.iceberg.snowflake.entities.SnowflakeTable; import org.slf4j.Logger; @@ -176,9 +176,7 @@ public List listNamespaces(Namespace namespace) { public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException { LOG.debug("loadNamespaceMetadata with namespace: {}", namespace); - Map nameSpaceMetadata = Maps.newHashMap(); - nameSpaceMetadata.put("name", namespace.toString()); - return nameSpaceMetadata; + return ImmutableMap.of(); } @Override @@ -213,7 +211,10 @@ protected TableOperations newTableOps(TableIdentifier tableIdentifier) { @Override protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { - return null; + throw new UnsupportedOperationException( + String.format( + "defaultWarehouseLocation not supported; attempted for tableIdentifier '%s'", + tableIdentifier)); } @Override diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java index 560acedbd45e..7456ca3771b7 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java @@ -38,7 +38,4 @@ public interface SnowflakeClient extends Closeable { List listIcebergTables(Namespace namespace); SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier); - - @Override - void close(); } From ad2c55f668aa9fee6a03d4cfa17c8292765a81f6 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 16 Dec 2022 17:11:51 -0800 Subject: [PATCH 07/20] Move TableIdentifier checks out of newTableOps into the SnowflakTableOperations class itself, add test case. --- .../iceberg/snowflake/SnowflakeCatalog.java | 6 ------ .../snowflake/SnowflakeTableOperations.java | 5 +++++ .../iceberg/snowflake/SnowflakeCatalogTest.java | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index dd9174b72f28..da7b6699dbda 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -199,12 +199,6 @@ public boolean removeProperties(Namespace namespace, Set properties) { @Override protected TableOperations newTableOps(TableIdentifier tableIdentifier) { - Preconditions.checkArgument( - tableIdentifier.namespace().length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, - "Snowflake doesn't support more than %s levels of namespace, got %s", - SnowflakeResources.MAX_NAMESPACE_DEPTH, - tableIdentifier); - return new SnowflakeTableOperations( snowflakeClient, fileIO, catalogProperties, catalogName, tableIdentifier); } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java index 0da132331a61..49ca3af2eebd 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java @@ -46,6 +46,11 @@ protected SnowflakeTableOperations( Map properties, String catalogName, TableIdentifier tableIdentifier) { + Preconditions.checkArgument( + tableIdentifier.namespace().length() == SnowflakeResources.MAX_NAMESPACE_DEPTH, + "tableIdentifier must be exactly %s levels of namespace, got %s", + SnowflakeResources.MAX_NAMESPACE_DEPTH, + tableIdentifier); this.snowflakeClient = snowflakeClient; this.fileIO = fileIO; this.catalogProperties = properties; diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index e66155e4e886..ef5a37067de6 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -242,6 +242,22 @@ public void testLoadGcsTable() { Assertions.assertThat(table.location()).isEqualTo("gs://tab5/"); } + @Test + public void testLoadTableWithMalformedTableIdentifier() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> + catalog.loadTable( + TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1", "BAD_NS_LEVEL"), "TAB_1"))) + .withMessageContaining("levels of namespace") + .withMessageContaining("DB_1.SCHEMA_1.BAD_NS_LEVEL.TAB_1"); + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> catalog.loadTable(TableIdentifier.of(Namespace.of("DB_WITHOUT_SCHEMA"), "TAB_1"))) + .withMessageContaining("levels of namespace") + .withMessageContaining("DB_WITHOUT_SCHEMA.TAB_1"); + } + @Test public void testCloseBeforeInitialize() throws IOException { catalog = new SnowflakeCatalog(); From 7f13674449d4ce483670670b9d27bb8da7881bf9 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 16 Dec 2022 19:34:22 -0800 Subject: [PATCH 08/20] Refactor out any Namespace-related business logic from the lower SnowflakeClient/JdbcSnowflakeClient layers and merge SnowflakeTable and SnowflakeSchema into a single SnowflakeIdentifier that also encompasses ROOT and DATABASE level identifiers. A SnowflakeIdentifier thus functions like a type-checked/constrained Iceberg TableIdentifier, and eliminates any tight coupling between a SnowflakeClient and Catalog business logic. Parsing of Namespace numerical levels into a SnowflakeIdentifier is now fully encapsulated in NamespaceHelpers so that callsites don't duplicate namespace-handling/validation logic. --- .../snowflake/JdbcSnowflakeClient.java | 91 +++++----- .../iceberg/snowflake/NamespaceHelpers.java | 86 +++++++++ .../iceberg/snowflake/SnowflakeCatalog.java | 43 +++-- .../iceberg/snowflake/SnowflakeClient.java | 29 ++- .../iceberg/snowflake/SnowflakeResources.java | 29 --- .../snowflake/SnowflakeTableOperations.java | 15 +- .../entities/SnowflakeIdentifier.java | 167 ++++++++++++++++++ .../snowflake/entities/SnowflakeSchema.java | 77 -------- .../snowflake/entities/SnowflakeTable.java | 85 --------- .../snowflake/FakeSnowflakeClient.java | 158 ++++++++--------- .../snowflake/JdbcSnowflakeClientTest.java | 116 ++++++------ .../snowflake/SnowflakeCatalogTest.java | 8 +- 12 files changed, 499 insertions(+), 405 deletions(-) create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java delete mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java create mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java delete mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java delete mode 100644 snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index b670e2330222..1e492f5c819f 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -21,15 +21,12 @@ import java.sql.SQLException; import java.util.List; import org.apache.commons.dbutils.QueryRunner; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.jdbc.UncheckedInterruptedException; import org.apache.iceberg.jdbc.UncheckedSQLException; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.snowflake.entities.SnowflakeSchema; -import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,33 +60,38 @@ void setQueryRunner(QueryRunner queryRunner) { } @Override - public List listSchemas(Namespace namespace) { + public List listSchemas(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW SCHEMAS"); Object[] queryParams = null; - if (namespace == null || namespace.isEmpty()) { - // for empty or null namespace search for all schemas at account level where the user - // has access to list. - baseQuery.append(" IN ACCOUNT"); - } else { - // otherwise restrict listing of schema within the database. - baseQuery.append(" IN DATABASE IDENTIFIER(?)"); - queryParams = new Object[] {namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1)}; + switch (scope.getType()) { + case ROOT: + // account-level listing + baseQuery.append(" IN ACCOUNT"); + break; + case DATABASE: + // database-level listing + baseQuery.append(" IN DATABASE IDENTIFIER(?)"); + queryParams = new Object[] {scope.toIdentifierString()}; + break; + default: + throw new IllegalArgumentException( + String.format("Unsupported scope type for listSchemas: %s", scope)); } final String finalQuery = baseQuery.toString(); final Object[] finalQueryParams = queryParams; - List schemas; + List schemas; try { schemas = connectionPool.run( conn -> queryRunner.query( - conn, finalQuery, SnowflakeSchema.createHandler(), finalQueryParams)); + conn, + finalQuery, + SnowflakeIdentifier.createSchemaHandler(), + finalQueryParams)); } catch (SQLException e) { - throw new UncheckedSQLException( - e, - "Failed to list schemas for namespace %s", - namespace != null ? namespace.toString() : ""); + throw new UncheckedSQLException(e, "Failed to list schemas for scope %s", scope); } catch (InterruptedException e) { throw new UncheckedInterruptedException(e, "Interrupted while listing schemas"); } @@ -97,40 +99,43 @@ public List listSchemas(Namespace namespace) { } @Override - public List listIcebergTables(Namespace namespace) { + public List listIcebergTables(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW ICEBERG TABLES"); Object[] queryParams = null; - if (namespace.length() == SnowflakeResources.MAX_NAMESPACE_DEPTH) { - // For two level namespace, search for iceberg tables within the given schema. - baseQuery.append(" IN SCHEMA IDENTIFIER(?)"); - queryParams = - new Object[] { - String.format( - "%s.%s", - namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1), - namespace.level(SnowflakeResources.NAMESPACE_SCHEMA_LEVEL - 1)) - }; - } else if (namespace.length() == SnowflakeResources.NAMESPACE_DB_LEVEL) { - // For one level namespace, search for iceberg tables within the given database. - baseQuery.append(" IN DATABASE IDENTIFIER(?)"); - queryParams = new Object[] {namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1)}; - } else { - // For empty or db level namespace, search at account level. - baseQuery.append(" IN ACCOUNT"); + switch (scope.getType()) { + case ROOT: + // account-level listing + baseQuery.append(" IN ACCOUNT"); + break; + case DATABASE: + // database-level listing + baseQuery.append(" IN DATABASE IDENTIFIER(?)"); + queryParams = new Object[] {scope.toIdentifierString()}; + break; + case SCHEMA: + // schema-level listing + baseQuery.append(" IN SCHEMA IDENTIFIER(?)"); + queryParams = new Object[] {scope.toIdentifierString()}; + break; + default: + throw new IllegalArgumentException( + String.format("Unsupported scope type for listIcebergTables: %s", scope)); } final String finalQuery = baseQuery.toString(); final Object[] finalQueryParams = queryParams; - List tables; + List tables; try { tables = connectionPool.run( conn -> queryRunner.query( - conn, finalQuery, SnowflakeTable.createHandler(), finalQueryParams)); + conn, + finalQuery, + SnowflakeIdentifier.createTableHandler(), + finalQueryParams)); } catch (SQLException e) { - throw new UncheckedSQLException( - e, "Failed to list tables for namespace %s", namespace.toString()); + throw new UncheckedSQLException(e, "Failed to list tables for scope %s", scope.toString()); } catch (InterruptedException e) { throw new UncheckedInterruptedException(e, "Interrupted while listing tables"); } @@ -138,7 +143,7 @@ public List listIcebergTables(Namespace namespace) { } @Override - public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) { + public SnowflakeTableMetadata getTableMetadata(SnowflakeIdentifier tableIdentifier) { SnowflakeTableMetadata tableMeta; try { final String finalQuery = "SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"; @@ -149,7 +154,7 @@ public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) conn, finalQuery, SnowflakeTableMetadata.createHandler(), - tableIdentifier.toString())); + tableIdentifier.toIdentifierString())); } catch (SQLException e) { throw new UncheckedSQLException( e, "Failed to get table metadata for %s", tableIdentifier.toString()); diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java new file mode 100644 index 000000000000..a8a9fa4de3df --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NamespaceHelpers { + private static final int MAX_NAMESPACE_DEPTH = 2; + private static final int NAMESPACE_ROOT_LEVEL = 0; + private static final int NAMESPACE_DB_LEVEL = 1; + private static final int NAMESPACE_SCHEMA_LEVEL = 2; + + private static final Logger LOG = LoggerFactory.getLogger(NamespaceHelpers.class); + + /** + * Converts a Namespace into a SnowflakeIdentifier representing ROOT, a DATABASE, or a SCHEMA. + * + * @throws IllegalArgumentException if the namespace is not a supported depth. + */ + public static SnowflakeIdentifier getSnowflakeIdentifierForNamespace(Namespace namespace) { + SnowflakeIdentifier identifier = null; + switch (namespace.length()) { + case NAMESPACE_ROOT_LEVEL: + identifier = SnowflakeIdentifier.ofRoot(); + break; + case NAMESPACE_DB_LEVEL: + identifier = SnowflakeIdentifier.ofDatabase(namespace.level(NAMESPACE_DB_LEVEL - 1)); + break; + case NAMESPACE_SCHEMA_LEVEL: + identifier = + SnowflakeIdentifier.ofSchema( + namespace.level(NAMESPACE_DB_LEVEL - 1), + namespace.level(NAMESPACE_SCHEMA_LEVEL - 1)); + break; + default: + throw new IllegalArgumentException( + String.format( + "Snowflake max namespace level is %d, got namespace '%s'", + MAX_NAMESPACE_DEPTH, namespace)); + } + LOG.debug("getSnowflakeIdentifierForNamespace({}) -> {}", namespace, identifier); + return identifier; + } + + /** + * Converts a TableIdentifier into a SnowflakeIdentifier of type TABLE; the identifier must have + * exactly the right namespace depth to represent a fully-qualified Snowflake table identifier. + */ + public static SnowflakeIdentifier getSnowflakeIdentifierForTableIdentifier( + TableIdentifier identifier) { + SnowflakeIdentifier namespaceScope = getSnowflakeIdentifierForNamespace(identifier.namespace()); + Preconditions.checkArgument( + namespaceScope.getType() == SnowflakeIdentifier.Type.SCHEMA, + "Namespace portion of '%s' must be at the SCHEMA level, got namespaceScope '%s'", + identifier, + namespaceScope); + SnowflakeIdentifier ret = + SnowflakeIdentifier.ofTable( + namespaceScope.getDatabaseName(), namespaceScope.getSchemaName(), identifier.name()); + LOG.debug("getSnowflakeIdentifierForTableIdentifier({}) -> {}", identifier, ret); + return ret; + } + + private NamespaceHelpers() {} +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index da7b6699dbda..4a68e14ee203 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -38,13 +38,14 @@ import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.snowflake.entities.SnowflakeSchema; -import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SnowflakeCatalog extends BaseMetastoreCatalog implements Closeable, SupportsNamespaces, Configurable { + public static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; + public static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.hadoop.HadoopFileIO"; private static final Logger LOG = LoggerFactory.getLogger(SnowflakeCatalog.class); @@ -60,18 +61,22 @@ public SnowflakeCatalog() {} @Override public List listTables(Namespace namespace) { LOG.debug("listTables with namespace: {}", namespace); + SnowflakeIdentifier scope = NamespaceHelpers.getSnowflakeIdentifierForNamespace(namespace); Preconditions.checkArgument( - namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, - "Snowflake doesn't support more than %s levels of namespace, got %s", - SnowflakeResources.MAX_NAMESPACE_DEPTH, + scope.getType() == SnowflakeIdentifier.Type.ROOT + || scope.getType() == SnowflakeIdentifier.Type.DATABASE + || scope.getType() == SnowflakeIdentifier.Type.SCHEMA, + "listTables must be at ROOT, DATABASE, or SCHEMA level; got %s from namespace %s", + scope, namespace); - List sfTables = snowflakeClient.listIcebergTables(namespace); + List sfTables = snowflakeClient.listIcebergTables(scope); return sfTables.stream() .map( table -> - TableIdentifier.of(table.getDatabase(), table.getSchemaName(), table.getName())) + TableIdentifier.of( + table.getDatabaseName(), table.getSchemaName(), table.getTableName())) .collect(Collectors.toList()); } @@ -107,7 +112,7 @@ public void initialize(String name, Map properties) { } JdbcClientPool connectionPool = new JdbcClientPool(uri, properties); - String fileIOImpl = SnowflakeResources.DEFAULT_FILE_IO_IMPL; + String fileIOImpl = DEFAULT_FILE_IO_IMPL; if (properties.containsKey(CatalogProperties.FILE_IO_IMPL)) { fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); } @@ -132,7 +137,7 @@ public void initialize( String name, SnowflakeClient snowflakeClient, FileIO fileIO, Map properties) { Preconditions.checkArgument(null != snowflakeClient, "snowflakeClient must be non-null"); Preconditions.checkArgument(null != fileIO, "fileIO must be non-null"); - this.catalogName = name == null ? SnowflakeResources.DEFAULT_CATALOG_NAME : name; + this.catalogName = name == null ? DEFAULT_CATALOG_NAME : name; this.snowflakeClient = snowflakeClient; this.fileIO = fileIO; this.catalogProperties = properties; @@ -158,16 +163,26 @@ public void createNamespace(Namespace namespace, Map metadata) { @Override public List listNamespaces(Namespace namespace) { LOG.debug("listNamespaces with namespace: {}", namespace); + SnowflakeIdentifier scope = NamespaceHelpers.getSnowflakeIdentifierForNamespace(namespace); Preconditions.checkArgument( - namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH - 1, - "Snowflake doesn't support more than %s levels of namespace, tried to list under %s", - SnowflakeResources.MAX_NAMESPACE_DEPTH, + scope.getType() == SnowflakeIdentifier.Type.ROOT + || scope.getType() == SnowflakeIdentifier.Type.DATABASE, + "listNamespaces must be at either ROOT or DATABASE level; got %s from namespace %s", + scope, namespace); - List sfSchemas = snowflakeClient.listSchemas(namespace); + List sfSchemas = snowflakeClient.listSchemas(scope); List namespaceList = sfSchemas.stream() - .map(schema -> Namespace.of(schema.getDatabase(), schema.getName())) + .map( + schema -> { + Preconditions.checkState( + schema.getType() == SnowflakeIdentifier.Type.SCHEMA, + "Got identifier of type %s from listSchemas for %s", + schema.getType(), + namespace); + return Namespace.of(schema.getDatabaseName(), schema.getSchemaName()); + }) .collect(Collectors.toList()); return namespaceList; } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java index 7456ca3771b7..f2212c59d95e 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java @@ -20,10 +20,7 @@ import java.io.Closeable; import java.util.List; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.snowflake.entities.SnowflakeSchema; -import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; /** @@ -33,9 +30,27 @@ * other underlying libraries/protocols. */ public interface SnowflakeClient extends Closeable { - List listSchemas(Namespace namespace); + /** + * Lists all Snowflake schemas within a given scope. Returned SnowflakeIdentifiers must have + * getType() == SnowflakeIdentifier.Type.SCHEMA. + * + * @param scope The scope in which to list, which may be ROOT or a single DATABASE. + */ + List listSchemas(SnowflakeIdentifier scope); - List listIcebergTables(Namespace namespace); + /** + * Lists all Snowflake Iceberg tables within a given scope. Returned SnowflakeIdentifiers must + * have getType() == SnowflakeIdentifier.Type.TABLE. + * + * @param scope The scope in which to list, which may be ROOT, a DATABASE, or a SCHEMA. + */ + List listIcebergTables(SnowflakeIdentifier scope); - SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier); + /** + * Returns Snowflake-level metadata containing locations to more detailed metadata. + * + * @param tableIdentifier The fully-qualified identifier that must be of type + * SnowflakeIdentifier.Type.TABLE. + */ + SnowflakeTableMetadata getTableMetadata(SnowflakeIdentifier tableIdentifier); } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java deleted file mode 100644 index 7e58b6a2301d..000000000000 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeResources.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.snowflake; - -final class SnowflakeResources { - static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; - static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.hadoop.HadoopFileIO"; - static final int MAX_NAMESPACE_DEPTH = 2; - static final int NAMESPACE_DB_LEVEL = 1; - static final int NAMESPACE_SCHEMA_LEVEL = 2; - - private SnowflakeResources() {} -} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java index 49ca3af2eebd..20ab4fbd28d2 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java @@ -24,6 +24,7 @@ import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,6 +36,7 @@ class SnowflakeTableOperations extends BaseMetastoreTableOperations { private final FileIO fileIO; private final TableIdentifier tableIdentifier; + private final SnowflakeIdentifier snowflakeIdentifierForTable; private final SnowflakeClient snowflakeClient; @@ -46,16 +48,13 @@ protected SnowflakeTableOperations( Map properties, String catalogName, TableIdentifier tableIdentifier) { - Preconditions.checkArgument( - tableIdentifier.namespace().length() == SnowflakeResources.MAX_NAMESPACE_DEPTH, - "tableIdentifier must be exactly %s levels of namespace, got %s", - SnowflakeResources.MAX_NAMESPACE_DEPTH, - tableIdentifier); this.snowflakeClient = snowflakeClient; this.fileIO = fileIO; this.catalogProperties = properties; this.catalogName = catalogName; this.tableIdentifier = tableIdentifier; + this.snowflakeIdentifierForTable = + NamespaceHelpers.getSnowflakeIdentifierForTableIdentifier(tableIdentifier); } @Override @@ -81,17 +80,17 @@ protected String tableName() { } private String getTableMetadataLocation() { - SnowflakeTableMetadata metadata = snowflakeClient.getTableMetadata(tableIdentifier); + SnowflakeTableMetadata metadata = snowflakeClient.getTableMetadata(snowflakeIdentifierForTable); if (metadata == null) { - throw new NoSuchTableException("Cannot find table %s", tableIdentifier); + throw new NoSuchTableException("Cannot find table %s", snowflakeIdentifierForTable); } if (!metadata.getStatus().equals("success")) { LOG.warn( "Got non-successful table metadata: {} with metadataLocation {} for table {}", metadata.getStatus(), metadata.getIcebergMetadataLocation(), - tableIdentifier); + snowflakeIdentifierForTable); } return metadata.getIcebergMetadataLocation(); } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java new file mode 100644 index 000000000000..ff65d40cffe0 --- /dev/null +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake.entities; + +import java.util.List; +import org.apache.commons.dbutils.ResultSetHandler; +import org.apache.iceberg.relocated.com.google.common.base.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; + +public class SnowflakeIdentifier { + public enum Type { + ROOT, + DATABASE, + SCHEMA, + TABLE + } + + private String databaseName; + private String schemaName; + private String tableName; + + protected SnowflakeIdentifier(String databaseName, String schemaName, String tableName) { + this.databaseName = databaseName; + this.schemaName = schemaName; + this.tableName = tableName; + } + + public static SnowflakeIdentifier ofRoot() { + return new SnowflakeIdentifier(null, null, null); + } + + public static SnowflakeIdentifier ofDatabase(String databaseName) { + Preconditions.checkArgument(null != databaseName, "databaseName must be non-null"); + return new SnowflakeIdentifier(databaseName, null, null); + } + + public static SnowflakeIdentifier ofSchema(String databaseName, String schemaName) { + Preconditions.checkArgument(null != databaseName, "databaseName must be non-null"); + Preconditions.checkArgument(null != schemaName, "schemaName must be non-null"); + return new SnowflakeIdentifier(databaseName, schemaName, null); + } + + public static SnowflakeIdentifier ofTable( + String databaseName, String schemaName, String tableName) { + Preconditions.checkArgument(null != databaseName, "databaseName must be non-null"); + Preconditions.checkArgument(null != schemaName, "schemaName must be non-null"); + Preconditions.checkArgument(null != tableName, "tableName must be non-null"); + return new SnowflakeIdentifier(databaseName, schemaName, tableName); + } + + /** + * If type is TABLE, expect non-null databaseName, schemaName, and tableName. If type is SCHEMA, + * expect non-null databaseName and schemaName. If type is DATABASE, expect non-null databaseName. + * If type is ROOT, expect all of databaseName, schemaName, and tableName to be null. + */ + public Type getType() { + if (null != tableName) { + return Type.TABLE; + } else if (null != schemaName) { + return Type.SCHEMA; + } else if (null != databaseName) { + return Type.DATABASE; + } else { + return Type.ROOT; + } + } + + public String getTableName() { + return tableName; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getSchemaName() { + return schemaName; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof SnowflakeIdentifier)) { + return false; + } + + SnowflakeIdentifier that = (SnowflakeIdentifier) o; + return Objects.equal(this.databaseName, that.databaseName) + && Objects.equal(this.schemaName, that.schemaName) + && Objects.equal(this.tableName, that.tableName); + } + + @Override + public int hashCode() { + return Objects.hashCode(databaseName, schemaName, tableName); + } + + /** Returns this identifier as a String suitable for use in a Snowflake IDENTIFIER param. */ + public String toIdentifierString() { + switch (getType()) { + case TABLE: + return String.format("%s.%s.%s", databaseName, schemaName, tableName); + case SCHEMA: + return String.format("%s.%s", databaseName, schemaName); + case DATABASE: + return databaseName; + default: + return ""; + } + } + + @Override + public String toString() { + return String.format("%s: '%s'", getType(), toIdentifierString()); + } + + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Schema identifiers, + * containing "database_name" and "name" (representing schemaName). + */ + public static ResultSetHandler> createSchemaHandler() { + return rs -> { + List schemas = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("name"); + schemas.add(SnowflakeIdentifier.ofSchema(databaseName, schemaName)); + } + return schemas; + }; + } + + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Table identifiers, + * containing "database_name", "schema_name", and "name" (representing tableName). + */ + public static ResultSetHandler> createTableHandler() { + return rs -> { + List tables = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("schema_name"); + String tableName = rs.getString("name"); + tables.add(SnowflakeIdentifier.ofTable(databaseName, schemaName, tableName)); + } + return tables; + }; + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java deleted file mode 100644 index b8acccff6f54..000000000000 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeSchema.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.snowflake.entities; - -import java.util.List; -import org.apache.commons.dbutils.ResultSetHandler; -import org.apache.iceberg.relocated.com.google.common.base.Objects; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; - -public class SnowflakeSchema { - private String name; - private String databaseName; - - public SnowflakeSchema(String databaseName, String name) { - this.databaseName = databaseName; - this.name = name; - } - - public String getName() { - return name; - } - - public String getDatabase() { - return databaseName; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } else if (!(o instanceof SnowflakeSchema)) { - return false; - } - - SnowflakeSchema that = (SnowflakeSchema) o; - return Objects.equal(this.databaseName, that.databaseName) - && Objects.equal(this.name, that.name); - } - - @Override - public int hashCode() { - return Objects.hashCode(databaseName, name); - } - - @Override - public String toString() { - return String.format("%s.%s", databaseName, name); - } - - public static ResultSetHandler> createHandler() { - return rs -> { - List schemas = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("database_name"); - String name = rs.getString("name"); - schemas.add(new SnowflakeSchema(databaseName, name)); - } - return schemas; - }; - } -} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java deleted file mode 100644 index fbb8ecd5cac5..000000000000 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTable.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.snowflake.entities; - -import java.util.List; -import org.apache.commons.dbutils.ResultSetHandler; -import org.apache.iceberg.relocated.com.google.common.base.Objects; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; - -public class SnowflakeTable { - private String databaseName; - private String schemaName; - private String name; - - public SnowflakeTable(String databaseName, String schemaName, String name) { - this.databaseName = databaseName; - this.schemaName = schemaName; - this.name = name; - } - - public String getName() { - return name; - } - - public String getDatabase() { - return databaseName; - } - - public String getSchemaName() { - return schemaName; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } else if (!(o instanceof SnowflakeTable)) { - return false; - } - - SnowflakeTable that = (SnowflakeTable) o; - return Objects.equal(this.databaseName, that.databaseName) - && Objects.equal(this.schemaName, that.schemaName) - && Objects.equal(this.name, that.name); - } - - @Override - public int hashCode() { - return Objects.hashCode(databaseName, schemaName, name); - } - - @Override - public String toString() { - return String.format("%s.%s.%s", databaseName, schemaName, name); - } - - public static ResultSetHandler> createHandler() { - return rs -> { - List tables = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("database_name"); - String schemaName = rs.getString("schema_name"); - String name = rs.getString("name"); - tables.add(new SnowflakeTable(databaseName, schemaName, name)); - } - return tables; - }; - } -} diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java index ff49b74458cb..50b8d5e3b388 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java @@ -20,14 +20,11 @@ import java.util.List; import java.util.Map; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.jdbc.UncheckedSQLException; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.snowflake.entities.SnowflakeSchema; -import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; public class FakeSnowflakeClient implements SnowflakeClient { @@ -57,108 +54,111 @@ public void addTable( } @Override - public List listSchemas(Namespace namespace) { + public List listSchemas(SnowflakeIdentifier scope) { Preconditions.checkState(!closed, "Cannot call listSchemas after calling close()"); - Preconditions.checkArgument( - namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, - "Namespace {} must have namespace of length <= {}", - namespace, - SnowflakeResources.MAX_NAMESPACE_DEPTH); - List schemas = Lists.newArrayList(); - if (namespace.length() == 0) { - // "account-level" listing. - for (Map.Entry>> db : - databases.entrySet()) { - for (String schema : db.getValue().keySet()) { - schemas.add(new SnowflakeSchema(db.getKey(), schema)); + List schemas = Lists.newArrayList(); + switch (scope.getType()) { + case ROOT: + // "account-level" listing. + for (Map.Entry>> db : + databases.entrySet()) { + for (String schema : db.getValue().keySet()) { + schemas.add(SnowflakeIdentifier.ofSchema(db.getKey(), schema)); + } } - } - } else if (namespace.length() == SnowflakeResources.NAMESPACE_DB_LEVEL) { - String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); - if (databases.containsKey(dbName)) { - for (String schema : databases.get(dbName).keySet()) { - schemas.add(new SnowflakeSchema(dbName, schema)); + break; + case DATABASE: + String dbName = scope.getDatabaseName(); + if (databases.containsKey(dbName)) { + for (String schema : databases.get(dbName).keySet()) { + schemas.add(SnowflakeIdentifier.ofSchema(dbName, schema)); + } + } else { + throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); } - } else { - throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); - } - } else { - throw new IllegalArgumentException( - String.format( - "Tried to listSchemas using a namespace with too many levels: '%s'", namespace)); + break; + default: + throw new IllegalArgumentException( + String.format("Unsupported scope type for listSchemas: '%s'", scope)); } return schemas; } @Override - public List listIcebergTables(Namespace namespace) { + public List listIcebergTables(SnowflakeIdentifier scope) { Preconditions.checkState(!closed, "Cannot call listIcebergTables after calling close()"); - Preconditions.checkArgument( - namespace.length() <= SnowflakeResources.MAX_NAMESPACE_DEPTH, - "Namespace {} must have namespace of length <= {}", - namespace, - SnowflakeResources.MAX_NAMESPACE_DEPTH); - List tables = Lists.newArrayList(); - if (namespace.length() == 0) { - // "account-level" listing. - for (Map.Entry>> db : - databases.entrySet()) { - for (Map.Entry> schema : - db.getValue().entrySet()) { - for (String tableName : schema.getValue().keySet()) { - tables.add(new SnowflakeTable(db.getKey(), schema.getKey(), tableName)); + List tables = Lists.newArrayList(); + switch (scope.getType()) { + case ROOT: + { + // "account-level" listing. + for (Map.Entry>> db : + databases.entrySet()) { + for (Map.Entry> schema : + db.getValue().entrySet()) { + for (String tableName : schema.getValue().keySet()) { + tables.add(SnowflakeIdentifier.ofTable(db.getKey(), schema.getKey(), tableName)); + } + } } + break; } - } - } else if (namespace.length() == SnowflakeResources.NAMESPACE_DB_LEVEL) { - String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); - if (databases.containsKey(dbName)) { - for (Map.Entry> schema : - databases.get(dbName).entrySet()) { - for (String tableName : schema.getValue().keySet()) { - tables.add(new SnowflakeTable(dbName, schema.getKey(), tableName)); + case DATABASE: + { + String dbName = scope.getDatabaseName(); + if (databases.containsKey(dbName)) { + for (Map.Entry> schema : + databases.get(dbName).entrySet()) { + for (String tableName : schema.getValue().keySet()) { + tables.add(SnowflakeIdentifier.ofTable(dbName, schema.getKey(), tableName)); + } + } + } else { + throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); } + break; } - } else { - throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); - } - } else { - String dbName = namespace.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); - if (databases.containsKey(dbName)) { - String schemaName = namespace.level(SnowflakeResources.NAMESPACE_SCHEMA_LEVEL - 1); - if (databases.get(dbName).containsKey(schemaName)) { - for (String tableName : databases.get(dbName).get(schemaName).keySet()) { - tables.add(new SnowflakeTable(dbName, schemaName, tableName)); + case SCHEMA: + { + String dbName = scope.getDatabaseName(); + if (databases.containsKey(dbName)) { + String schemaName = scope.getSchemaName(); + if (databases.get(dbName).containsKey(schemaName)) { + for (String tableName : databases.get(dbName).get(schemaName).keySet()) { + tables.add(SnowflakeIdentifier.ofTable(dbName, schemaName, tableName)); + } + } else { + throw new UncheckedSQLException( + "Object does not exist: database.schema: '%s.%s'", dbName, schemaName); + } + } else { + throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); } - } else { - throw new UncheckedSQLException( - "Object does not exist: database.schema: '%s.%s'", dbName, schemaName); + break; } - } else { - throw new UncheckedSQLException("Object does not exist: database: '%s'", dbName); - } + default: + throw new IllegalArgumentException( + String.format("Unsupported scope type for listing tables: %s", scope)); } return tables; } @Override - public SnowflakeTableMetadata getTableMetadata(TableIdentifier tableIdentifier) { + public SnowflakeTableMetadata getTableMetadata(SnowflakeIdentifier tableIdentifier) { Preconditions.checkState(!closed, "Cannot call getTableMetadata after calling close()"); - Namespace ns = tableIdentifier.namespace(); Preconditions.checkArgument( - ns.length() == SnowflakeResources.MAX_NAMESPACE_DEPTH, - "TableIdentifier {} must have namespace of length {}", - tableIdentifier, - SnowflakeResources.MAX_NAMESPACE_DEPTH); - String dbName = ns.level(SnowflakeResources.NAMESPACE_DB_LEVEL - 1); - String schemaName = ns.level(SnowflakeResources.NAMESPACE_SCHEMA_LEVEL - 1); + tableIdentifier.getType() == SnowflakeIdentifier.Type.TABLE, + "tableIdentifier must be type TABLE, get: %s", + tableIdentifier); + String dbName = tableIdentifier.getDatabaseName(); + String schemaName = tableIdentifier.getSchemaName(); if (!databases.containsKey(dbName) || !databases.get(dbName).containsKey(schemaName) - || !databases.get(dbName).get(schemaName).containsKey(tableIdentifier.name())) { + || !databases.get(dbName).get(schemaName).containsKey(tableIdentifier.getTableName())) { throw new UncheckedSQLException("Object does not exist: object: '%s'", tableIdentifier); } - return databases.get(dbName).get(schemaName).get(tableIdentifier.name()); + return databases.get(dbName).get(schemaName).get(tableIdentifier.getTableName()); } public boolean isClosed() { diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index 4f6f9d32e4d1..f69ea407c0fc 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -31,14 +31,10 @@ import org.apache.commons.dbutils.QueryRunner; import org.apache.commons.dbutils.ResultSetHandler; import org.apache.iceberg.ClientPool; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.jdbc.UncheckedInterruptedException; import org.apache.iceberg.jdbc.UncheckedSQLException; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.snowflake.entities.SnowflakeSchema; -import org.apache.iceberg.snowflake.entities.SnowflakeTable; +import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.assertj.core.api.Assertions; import org.junit.Assert; @@ -97,8 +93,8 @@ public void testNullClientPoolInConstructor() { } /** - * For the root/empty Namespace, expect an underlying query to list schemas at the ACCOUNT level - * with no query parameters. + * For the root scope, expect an underlying query to list schemas at the ACCOUNT level with no + * query parameters. */ @Test public void testListSchemasInAccount() throws SQLException { @@ -112,7 +108,8 @@ public void testListSchemasInAccount() throws SQLException { .thenReturn("SCHEMA_2") .thenReturn("SCHEMA_3"); - List actualList = snowflakeClient.listSchemas(Namespace.of()); + List actualList = + snowflakeClient.listSchemas(SnowflakeIdentifier.ofRoot()); verify(mockQueryRunner) .query( @@ -121,17 +118,16 @@ public void testListSchemasInAccount() throws SQLException { any(ResultSetHandler.class), eq((Object[]) null)); - List expectedList = - Lists.newArrayList( - new SnowflakeSchema("DB_1", "SCHEMA_1"), - new SnowflakeSchema("DB_1", "SCHEMA_2"), - new SnowflakeSchema("DB_2", "SCHEMA_3")); - Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + Assertions.assertThat(actualList) + .containsExactly( + SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"), + SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_2"), + SnowflakeIdentifier.ofSchema("DB_2", "SCHEMA_3")); } /** - * For a 1-level Namespace, expect an underlying query to list schemas at the DATABASE level and - * supply the Namespace as a query param in an IDENTIFIER. + * For a DATABASE scope, expect an underlying query to list schemas at the DATABASE level and + * supply the database as a query param in an IDENTIFIER. */ @Test public void testListSchemasInDatabase() throws SQLException { @@ -139,7 +135,8 @@ public void testListSchemasInDatabase() throws SQLException { when(mockResultSet.getString("database_name")).thenReturn("DB_1").thenReturn("DB_1"); when(mockResultSet.getString("name")).thenReturn("SCHEMA_1").thenReturn("SCHEMA_2"); - List actualList = snowflakeClient.listSchemas(Namespace.of("DB_1")); + List actualList = + snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1")); verify(mockQueryRunner) .query( @@ -148,10 +145,10 @@ public void testListSchemasInDatabase() throws SQLException { any(ResultSetHandler.class), eq("DB_1")); - List expectedList = - Lists.newArrayList( - new SnowflakeSchema("DB_1", "SCHEMA_1"), new SnowflakeSchema("DB_1", "SCHEMA_2")); - Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + Assertions.assertThat(actualList) + .containsExactly( + SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"), + SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_2")); } /** @@ -163,7 +160,8 @@ public void testListSchemasSQLException() throws SQLException, InterruptedExcept when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new SQLException("Fake SQL exception")); Assert.assertThrows( - UncheckedSQLException.class, () -> snowflakeClient.listSchemas(Namespace.of("DB_1"))); + UncheckedSQLException.class, + () -> snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1"))); } /** @@ -176,12 +174,12 @@ public void testListSchemasInterruptedException() throws SQLException, Interrupt .thenThrow(new InterruptedException("Fake interrupted exception")); Assert.assertThrows( UncheckedInterruptedException.class, - () -> snowflakeClient.listSchemas(Namespace.of("DB_1"))); + () -> snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1"))); } /** - * For the root/empty Namespace, expect an underlying query to list tables at the ACCOUNT level - * with no query parameters. + * For the root/empty scope, expect an underlying query to list tables at the ACCOUNT level with + * no query parameters. */ @Test public void testListIcebergTablesInAccount() throws SQLException { @@ -207,7 +205,8 @@ public void testListIcebergTablesInAccount() throws SQLException { .thenReturn("TABLE_3") .thenReturn("TABLE_4"); - List actualList = snowflakeClient.listIcebergTables(Namespace.of()); + List actualList = + snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofRoot()); verify(mockQueryRunner) .query( @@ -216,18 +215,17 @@ public void testListIcebergTablesInAccount() throws SQLException { any(ResultSetHandler.class), eq((Object[]) null)); - List expectedList = - Lists.newArrayList( - new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_1"), - new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_2"), - new SnowflakeTable("DB_1", "SCHEMA_2", "TABLE_3"), - new SnowflakeTable("DB_2", "SCHEMA_3", "TABLE_4")); - Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + Assertions.assertThat(actualList) + .containsExactly( + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"), + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_2"), + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_2", "TABLE_3"), + SnowflakeIdentifier.ofTable("DB_2", "SCHEMA_3", "TABLE_4")); } /** - * For a 1-level Namespace, expect an underlying query to list tables at the DATABASE level and - * supply the Namespace as a query param in an IDENTIFIER. + * For a DATABASE scope, expect an underlying query to list tables at the DATABASE level and + * supply the database as a query param in an IDENTIFIER. */ @Test public void testListIcebergTablesInDatabase() throws SQLException { @@ -245,7 +243,8 @@ public void testListIcebergTablesInDatabase() throws SQLException { .thenReturn("TABLE_2") .thenReturn("TABLE_3"); - List actualList = snowflakeClient.listIcebergTables(Namespace.of("DB_1")); + List actualList = + snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1")); verify(mockQueryRunner) .query( @@ -254,17 +253,16 @@ public void testListIcebergTablesInDatabase() throws SQLException { any(ResultSetHandler.class), eq("DB_1")); - List expectedList = - Lists.newArrayList( - new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_1"), - new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_2"), - new SnowflakeTable("DB_1", "SCHEMA_2", "TABLE_3")); - Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + Assertions.assertThat(actualList) + .containsExactly( + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"), + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_2"), + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_2", "TABLE_3")); } /** - * For a 2-level Namespace, expect an underlying query to list tables at the SCHEMA level and - * supply the Namespace as a query param in an IDENTIFIER. + * For a SCHEMA scope, expect an underlying query to list tables at the SCHEMA level and supply + * the schema as a query param in an IDENTIFIER. */ @Test public void testListIcebergTablesInSchema() throws SQLException { @@ -273,8 +271,8 @@ public void testListIcebergTablesInSchema() throws SQLException { when(mockResultSet.getString("schema_name")).thenReturn("SCHEMA_1").thenReturn("SCHEMA_1"); when(mockResultSet.getString("name")).thenReturn("TABLE_1").thenReturn("TABLE_2"); - List actualList = - snowflakeClient.listIcebergTables(Namespace.of("DB_1", "SCHEMA_1")); + List actualList = + snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1")); verify(mockQueryRunner) .query( @@ -283,11 +281,10 @@ public void testListIcebergTablesInSchema() throws SQLException { any(ResultSetHandler.class), eq("DB_1.SCHEMA_1")); - List expectedList = - Lists.newArrayList( - new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_1"), - new SnowflakeTable("DB_1", "SCHEMA_1", "TABLE_2")); - Assertions.assertThat(actualList).hasSameElementsAs(expectedList); + Assertions.assertThat(actualList) + .containsExactly( + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"), + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_2")); } /** @@ -299,7 +296,8 @@ public void testListIcebergTablesSQLException() throws SQLException, Interrupted when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new SQLException("Fake SQL exception")); Assert.assertThrows( - UncheckedSQLException.class, () -> snowflakeClient.listIcebergTables(Namespace.of("DB_1"))); + UncheckedSQLException.class, + () -> snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1"))); } /** @@ -313,7 +311,7 @@ public void testListIcebergTablesInterruptedException() .thenThrow(new InterruptedException("Fake interrupted exception")); Assert.assertThrows( UncheckedInterruptedException.class, - () -> snowflakeClient.listIcebergTables(Namespace.of("DB_1"))); + () -> snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1"))); } /** @@ -329,7 +327,7 @@ public void testGetS3TableMetadata() throws SQLException { SnowflakeTableMetadata actualMetadata = snowflakeClient.getTableMetadata( - TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1")); + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); verify(mockQueryRunner) .query( @@ -360,7 +358,7 @@ public void testGetAzureTableMetadata() throws SQLException { SnowflakeTableMetadata actualMetadata = snowflakeClient.getTableMetadata( - TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1")); + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); verify(mockQueryRunner) .query( @@ -391,7 +389,7 @@ public void testGetGcsTableMetadata() throws SQLException { SnowflakeTableMetadata actualMetadata = snowflakeClient.getTableMetadata( - TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1")); + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); verify(mockQueryRunner) .query( @@ -418,7 +416,7 @@ public void testGetTableMetadataMalformedJson() throws SQLException { IllegalArgumentException.class, () -> snowflakeClient.getTableMetadata( - TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1"))); + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))); } /** @@ -433,7 +431,7 @@ public void testGetTableMetadataSQLException() throws SQLException, InterruptedE UncheckedSQLException.class, () -> snowflakeClient.getTableMetadata( - TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1"))); + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))); } /** @@ -448,7 +446,7 @@ public void testGetTableMetadataInterruptedException() throws SQLException, Inte UncheckedInterruptedException.class, () -> snowflakeClient.getTableMetadata( - TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1"), "TABLE_1"))); + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))); } /** Calling close() propagates to closing underlying client pool. */ diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index ef5a37067de6..0541123c922e 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -169,7 +169,7 @@ public void testListNamespaceWithinSchema() { String schemaName = "SCHEMA_4"; Assertions.assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy(() -> catalog.listNamespaces(Namespace.of(dbName, schemaName))) - .withMessageContaining("more than 2 levels of namespace") + .withMessageContaining("level") .withMessageContaining("DB_3.SCHEMA_4"); } @@ -249,12 +249,12 @@ public void testLoadTableWithMalformedTableIdentifier() { () -> catalog.loadTable( TableIdentifier.of(Namespace.of("DB_1", "SCHEMA_1", "BAD_NS_LEVEL"), "TAB_1"))) - .withMessageContaining("levels of namespace") - .withMessageContaining("DB_1.SCHEMA_1.BAD_NS_LEVEL.TAB_1"); + .withMessageContaining("level") + .withMessageContaining("DB_1.SCHEMA_1.BAD_NS_LEVEL"); Assertions.assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy( () -> catalog.loadTable(TableIdentifier.of(Namespace.of("DB_WITHOUT_SCHEMA"), "TAB_1"))) - .withMessageContaining("levels of namespace") + .withMessageContaining("level") .withMessageContaining("DB_WITHOUT_SCHEMA.TAB_1"); } From 58d258e3aa150317ca8a16c7b395c82908eb04ea Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 16 Dec 2022 19:50:28 -0800 Subject: [PATCH 09/20] Finish migrating JdbcSnowflakeClientTest off any usage of org.junit.Assert in favor of assertj's Assertions. --- .../snowflake/JdbcSnowflakeClientTest.java | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index f69ea407c0fc..b04c7f7e8d49 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -37,7 +37,6 @@ import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.assertj.core.api.Assertions; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -159,9 +158,9 @@ public void testListSchemasInDatabase() throws SQLException { public void testListSchemasSQLException() throws SQLException, InterruptedException { when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new SQLException("Fake SQL exception")); - Assert.assertThrows( - UncheckedSQLException.class, - () -> snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1"))); + Assertions.assertThatExceptionOfType(UncheckedSQLException.class) + .isThrownBy(() -> snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1"))) + .withStackTraceContaining("Fake SQL exception"); } /** @@ -172,9 +171,9 @@ public void testListSchemasSQLException() throws SQLException, InterruptedExcept public void testListSchemasInterruptedException() throws SQLException, InterruptedException { when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new InterruptedException("Fake interrupted exception")); - Assert.assertThrows( - UncheckedInterruptedException.class, - () -> snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1"))); + Assertions.assertThatExceptionOfType(UncheckedInterruptedException.class) + .isThrownBy(() -> snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1"))) + .withStackTraceContaining("Fake interrupted exception"); } /** @@ -295,9 +294,9 @@ public void testListIcebergTablesInSchema() throws SQLException { public void testListIcebergTablesSQLException() throws SQLException, InterruptedException { when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new SQLException("Fake SQL exception")); - Assert.assertThrows( - UncheckedSQLException.class, - () -> snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1"))); + Assertions.assertThatExceptionOfType(UncheckedSQLException.class) + .isThrownBy(() -> snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1"))) + .withStackTraceContaining("Fake SQL exception"); } /** @@ -309,9 +308,9 @@ public void testListIcebergTablesInterruptedException() throws SQLException, InterruptedException { when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new InterruptedException("Fake interrupted exception")); - Assert.assertThrows( - UncheckedInterruptedException.class, - () -> snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1"))); + Assertions.assertThatExceptionOfType(UncheckedInterruptedException.class) + .isThrownBy(() -> snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1"))) + .withStackTraceContaining("Fake interrupted exception"); } /** @@ -342,7 +341,7 @@ public void testGetS3TableMetadata() throws SQLException { "s3://tab1/metadata/v3.metadata.json", "success", null); - Assert.assertEquals(expectedMetadata, actualMetadata); + Assertions.assertThat(actualMetadata).isEqualTo(expectedMetadata); } /** @@ -373,7 +372,7 @@ public void testGetAzureTableMetadata() throws SQLException { "wasbs://mycontainer@myaccount.blob.core.windows.net/tab3/metadata/v334.metadata.json", "success", null); - Assert.assertEquals(expectedMetadata, actualMetadata); + Assertions.assertThat(actualMetadata).isEqualTo(expectedMetadata); } /** @@ -404,7 +403,7 @@ public void testGetGcsTableMetadata() throws SQLException { "gs://tab5/metadata/v793.metadata.json", "success", null); - Assert.assertEquals(expectedMetadata, actualMetadata); + Assertions.assertThat(actualMetadata).isEqualTo(expectedMetadata); } /** Malformed JSON from a ResultSet should propagate as an IllegalArgumentException. */ @@ -412,11 +411,12 @@ public void testGetGcsTableMetadata() throws SQLException { public void testGetTableMetadataMalformedJson() throws SQLException { when(mockResultSet.next()).thenReturn(true); when(mockResultSet.getString("METADATA")).thenReturn("{\"malformed_no_closing_bracket"); - Assert.assertThrows( - IllegalArgumentException.class, - () -> - snowflakeClient.getTableMetadata( - SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))); + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> + snowflakeClient.getTableMetadata( + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))) + .withMessageContaining("{\"malformed_no_closing_bracket"); } /** @@ -427,11 +427,12 @@ public void testGetTableMetadataMalformedJson() throws SQLException { public void testGetTableMetadataSQLException() throws SQLException, InterruptedException { when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new SQLException("Fake SQL exception")); - Assert.assertThrows( - UncheckedSQLException.class, - () -> - snowflakeClient.getTableMetadata( - SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))); + Assertions.assertThatExceptionOfType(UncheckedSQLException.class) + .isThrownBy( + () -> + snowflakeClient.getTableMetadata( + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))) + .withStackTraceContaining("Fake SQL exception"); } /** @@ -442,11 +443,12 @@ public void testGetTableMetadataSQLException() throws SQLException, InterruptedE public void testGetTableMetadataInterruptedException() throws SQLException, InterruptedException { when(mockClientPool.run(any(ClientPool.Action.class))) .thenThrow(new InterruptedException("Fake interrupted exception")); - Assert.assertThrows( - UncheckedInterruptedException.class, - () -> - snowflakeClient.getTableMetadata( - SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))); + Assertions.assertThatExceptionOfType(UncheckedInterruptedException.class) + .isThrownBy( + () -> + snowflakeClient.getTableMetadata( + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))) + .withStackTraceContaining("Fake interrupted exception"); } /** Calling close() propagates to closing underlying client pool. */ From 0183129ec5822f307bbbe30c0d313750df862d78 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Mon, 19 Dec 2022 20:53:22 -0800 Subject: [PATCH 10/20] Style refactorings from review comments, expanded and moved InMemoryFileIO into core with its own unittest. --- build.gradle | 1 - .../apache/iceberg/io}/InMemoryFileIO.java | 27 +++-- .../apache/iceberg/io/InMemoryOutputFile.java | 26 +++- .../apache/iceberg/io/TestInMemoryFileIO.java | 111 ++++++++++++++++++ .../snowflake/JdbcSnowflakeClient.java | 25 ++-- .../iceberg/snowflake/NamespaceHelpers.java | 18 ++- .../iceberg/snowflake/SnowflakeCatalog.java | 54 ++++----- .../iceberg/snowflake/SnowflakeClient.java | 10 +- .../{entities => }/SnowflakeIdentifier.java | 110 ++++++++--------- .../SnowflakeTableMetadata.java | 7 +- .../snowflake/SnowflakeTableOperations.java | 20 +++- .../snowflake/FakeSnowflakeClient.java | 26 ++-- .../snowflake/JdbcSnowflakeClientTest.java | 14 +-- .../snowflake/SnowflakeCatalogTest.java | 15 ++- 14 files changed, 296 insertions(+), 168 deletions(-) rename {snowflake/src/test/java/org/apache/iceberg/snowflake => core/src/test/java/org/apache/iceberg/io}/InMemoryFileIO.java (72%) create mode 100644 core/src/test/java/org/apache/iceberg/io/TestInMemoryFileIO.java rename snowflake/src/main/java/org/apache/iceberg/snowflake/{entities => }/SnowflakeIdentifier.java (65%) rename snowflake/src/main/java/org/apache/iceberg/snowflake/{entities => }/SnowflakeTableMetadata.java (97%) diff --git a/build.gradle b/build.gradle index 1ba89e465f9a..8f8e23d7c9f1 100644 --- a/build.gradle +++ b/build.gradle @@ -705,7 +705,6 @@ project(':iceberg-snowflake') { implementation project(':iceberg-core') implementation project(':iceberg-common') implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') - implementation project(':iceberg-aws') implementation "com.fasterxml.jackson.core:jackson-databind" implementation "com.fasterxml.jackson.core:jackson-core" implementation "commons-dbutils:commons-dbutils" diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java b/core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java similarity index 72% rename from snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java rename to core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java index 813ae49b43de..3dde2a8ce553 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/InMemoryFileIO.java +++ b/core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java @@ -16,25 +16,25 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.iceberg.snowflake; +package org.apache.iceberg.io; import java.util.Map; import org.apache.iceberg.exceptions.NotFoundException; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.io.InMemoryInputFile; -import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Maps; public class InMemoryFileIO implements FileIO { - private Map inMemoryFiles = Maps.newHashMap(); + private Map inMemoryFiles = Maps.newHashMap(); private boolean closed = false; public void addFile(String path, byte[] contents) { Preconditions.checkState(!closed, "Cannot call addFile after calling close()"); - inMemoryFiles.put(path, new InMemoryInputFile(path, contents)); + inMemoryFiles.put(path, contents); + } + + public boolean fileExists(String path) { + return inMemoryFiles.containsKey(path); } @Override @@ -43,19 +43,22 @@ public InputFile newInputFile(String path) { if (!inMemoryFiles.containsKey(path)) { throw new NotFoundException("No in-memory file found for path: %s", path); } - return inMemoryFiles.get(path); + return new InMemoryInputFile(path, inMemoryFiles.get(path)); } @Override public OutputFile newOutputFile(String path) { - throw new UnsupportedOperationException( - String.format("newOutputFile not supported; attempted for path '%s'", path)); + Preconditions.checkState(!closed, "Cannot call newInputFile after calling close()"); + return new InMemoryOutputFile(path, this); } @Override public void deleteFile(String path) { - throw new UnsupportedOperationException( - String.format("deleteFile not supported; attempted for path '%s'", path)); + Preconditions.checkState(!closed, "Cannot call newInputFile after calling close()"); + if (!inMemoryFiles.containsKey(path)) { + throw new NotFoundException("No in-memory file found for path: %s", path); + } + inMemoryFiles.remove(path); } public boolean isClosed() { diff --git a/core/src/test/java/org/apache/iceberg/io/InMemoryOutputFile.java b/core/src/test/java/org/apache/iceberg/io/InMemoryOutputFile.java index e8740b125fa3..5d72cef622f0 100644 --- a/core/src/test/java/org/apache/iceberg/io/InMemoryOutputFile.java +++ b/core/src/test/java/org/apache/iceberg/io/InMemoryOutputFile.java @@ -30,19 +30,38 @@ public class InMemoryOutputFile implements OutputFile { private boolean exists = false; private ByteArrayOutputStream contents; + private InMemoryFileIO parentFileIO; public InMemoryOutputFile() { this("memory:" + UUID.randomUUID()); } public InMemoryOutputFile(String location) { + this(location, null); + } + + /** + * If the optional parentFileIO is provided, file-existence behaves similarly to S3FileIO; + * existence checks are performed up-front if creating without overwrite, but files only exist in + * the parentFileIO if close() has been called on the associated output streams (or pre-existing + * files are populated into the parentFileIO through other means). + * + * @param location the location returned by location() of this OutputFile, the InputFile obtained + * from calling toInputFile(), and the location for looking up the associated InputFile from a + * parentFileIO, if non-null. + * @param parentFileIO if non-null, commits an associated InMemoryInputFile on close() into the + * parentFileIO, and uses the parentFileIO for "already exists" checks if creating without + * overwriting. + */ + public InMemoryOutputFile(String location, InMemoryFileIO parentFileIO) { Preconditions.checkNotNull(location, "location is null"); this.location = location; + this.parentFileIO = parentFileIO; } @Override public PositionOutputStream create() { - if (exists) { + if (exists || (parentFileIO != null && parentFileIO.fileExists(location))) { throw new AlreadyExistsException("Already exists"); } return createOrOverwrite(); @@ -70,7 +89,7 @@ public byte[] toByteArray() { return contents.toByteArray(); } - private static class InMemoryPositionOutputStream extends PositionOutputStream { + private class InMemoryPositionOutputStream extends PositionOutputStream { private final ByteArrayOutputStream delegate; private boolean closed = false; @@ -112,6 +131,9 @@ public void flush() throws IOException { public void close() throws IOException { delegate.close(); closed = true; + if (parentFileIO != null) { + parentFileIO.addFile(location(), toByteArray()); + } } private void checkOpen() { diff --git a/core/src/test/java/org/apache/iceberg/io/TestInMemoryFileIO.java b/core/src/test/java/org/apache/iceberg/io/TestInMemoryFileIO.java new file mode 100644 index 000000000000..95118ec7d18e --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/io/TestInMemoryFileIO.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NotFoundException; +import org.assertj.core.api.Assertions; +import org.junit.Test; + +public class TestInMemoryFileIO { + String location = "s3://foo/bar.txt"; + + @Test + public void testBasicEndToEnd() throws IOException { + InMemoryFileIO fileIO = new InMemoryFileIO(); + Assertions.assertThat(fileIO.fileExists(location)).isFalse(); + + OutputStream outputStream = fileIO.newOutputFile(location).create(); + byte[] data = "hello world".getBytes(); + outputStream.write(data); + outputStream.close(); + Assertions.assertThat(fileIO.fileExists(location)).isTrue(); + + InputStream inputStream = fileIO.newInputFile(location).newStream(); + byte[] buf = new byte[data.length]; + inputStream.read(buf); + inputStream.close(); + Assertions.assertThat(new String(buf)).isEqualTo("hello world"); + + fileIO.deleteFile(location); + Assertions.assertThat(fileIO.fileExists(location)).isFalse(); + } + + @Test + public void testNewInputFileNotFound() throws IOException { + InMemoryFileIO fileIO = new InMemoryFileIO(); + Assertions.assertThatExceptionOfType(NotFoundException.class) + .isThrownBy(() -> fileIO.newInputFile("s3://nonexistent/file")); + } + + @Test + public void testDeleteFileNotFound() throws IOException { + InMemoryFileIO fileIO = new InMemoryFileIO(); + Assertions.assertThatExceptionOfType(NotFoundException.class) + .isThrownBy(() -> fileIO.deleteFile("s3://nonexistent/file")); + } + + @Test + public void testCreateNoOverwrite() throws IOException { + InMemoryFileIO fileIO = new InMemoryFileIO(); + fileIO.addFile(location, "hello world".getBytes()); + Assertions.assertThatExceptionOfType(AlreadyExistsException.class) + .isThrownBy(() -> fileIO.newOutputFile(location).create()); + } + + @Test + public void testOverwriteBeforeAndAfterClose() throws IOException { + byte[] oldData = "old data".getBytes(); + byte[] newData = "new data".getBytes(); + + InMemoryFileIO fileIO = new InMemoryFileIO(); + OutputStream outputStream = fileIO.newOutputFile(location).create(); + outputStream.write(oldData); + + // Even though we've called create() and started writing data, this file won't yet exist + // in the parentFileIO before we've closed it. + Assertions.assertThat(fileIO.fileExists(location)).isFalse(); + + // File appears after closing it. + outputStream.close(); + Assertions.assertThat(fileIO.fileExists(location)).isTrue(); + + // Start a new OutputFile and write new data but don't close() it yet. + outputStream = fileIO.newOutputFile(location).createOrOverwrite(); + outputStream.write(newData); + + // We'll still read old data. + InputStream inputStream = fileIO.newInputFile(location).newStream(); + byte[] buf = new byte[oldData.length]; + inputStream.read(buf); + inputStream.close(); + Assertions.assertThat(new String(buf)).isEqualTo("old data"); + + // Finally, close the new output stream; data should be overwritten with new data now. + outputStream.close(); + inputStream = fileIO.newInputFile(location).newStream(); + buf = new byte[newData.length]; + inputStream.read(buf); + inputStream.close(); + Assertions.assertThat(new String(buf)).isEqualTo("new data"); + } +} diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index 1e492f5c819f..a8b06e42f02d 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -26,25 +26,14 @@ import org.apache.iceberg.jdbc.UncheckedSQLException; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; -import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * This implementation of SnowflakeClient builds on top of Snowflake's JDBC driver to interact with - * Snowflake's Iceberg-aware resource model. Despite using JDBC libraries, the resource model is - * derived from Snowflake's own first-class support for Iceberg tables as opposed to using an opaque - * JDBC layer to store Iceberg metadata itself in an Iceberg-agnostic database. - * - *

This thus differs from the JdbcCatalog in that Snowflake's service provides the source of - * truth of Iceberg metadata, rather than serving as a storage layer for a client-defined Iceberg - * resource model. + * Snowflake's Iceberg-aware resource model. */ -public class JdbcSnowflakeClient implements SnowflakeClient { +class JdbcSnowflakeClient implements SnowflakeClient { public static final String EXPECTED_JDBC_IMPL = "net.snowflake.client.jdbc.SnowflakeDriver"; - private static final Logger LOG = LoggerFactory.getLogger(JdbcSnowflakeClient.class); private final JdbcClientPool connectionPool; private QueryRunner queryRunner; @@ -63,7 +52,7 @@ void setQueryRunner(QueryRunner queryRunner) { public List listSchemas(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW SCHEMAS"); Object[] queryParams = null; - switch (scope.getType()) { + switch (scope.type()) { case ROOT: // account-level listing baseQuery.append(" IN ACCOUNT"); @@ -88,7 +77,7 @@ public List listSchemas(SnowflakeIdentifier scope) { queryRunner.query( conn, finalQuery, - SnowflakeIdentifier.createSchemaHandler(), + SnowflakeIdentifier.SCHEMA_RESULT_SET_HANDLER, finalQueryParams)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to list schemas for scope %s", scope); @@ -102,7 +91,7 @@ public List listSchemas(SnowflakeIdentifier scope) { public List listIcebergTables(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW ICEBERG TABLES"); Object[] queryParams = null; - switch (scope.getType()) { + switch (scope.type()) { case ROOT: // account-level listing baseQuery.append(" IN ACCOUNT"); @@ -132,7 +121,7 @@ public List listIcebergTables(SnowflakeIdentifier scope) { queryRunner.query( conn, finalQuery, - SnowflakeIdentifier.createTableHandler(), + SnowflakeIdentifier.TABLE_RESULT_SET_HANDLER, finalQueryParams)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to list tables for scope %s", scope.toString()); @@ -143,7 +132,7 @@ public List listIcebergTables(SnowflakeIdentifier scope) { } @Override - public SnowflakeTableMetadata getTableMetadata(SnowflakeIdentifier tableIdentifier) { + public SnowflakeTableMetadata loadTableMetadata(SnowflakeIdentifier tableIdentifier) { SnowflakeTableMetadata tableMeta; try { final String finalQuery = "SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"; diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java index a8a9fa4de3df..6c6db739737d 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java @@ -21,11 +21,10 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class NamespaceHelpers { +class NamespaceHelpers { private static final int MAX_NAMESPACE_DEPTH = 2; private static final int NAMESPACE_ROOT_LEVEL = 0; private static final int NAMESPACE_DB_LEVEL = 1; @@ -38,7 +37,7 @@ public class NamespaceHelpers { * * @throws IllegalArgumentException if the namespace is not a supported depth. */ - public static SnowflakeIdentifier getSnowflakeIdentifierForNamespace(Namespace namespace) { + public static SnowflakeIdentifier toSnowflakeIdentifier(Namespace namespace) { SnowflakeIdentifier identifier = null; switch (namespace.length()) { case NAMESPACE_ROOT_LEVEL: @@ -59,7 +58,7 @@ public static SnowflakeIdentifier getSnowflakeIdentifierForNamespace(Namespace n "Snowflake max namespace level is %d, got namespace '%s'", MAX_NAMESPACE_DEPTH, namespace)); } - LOG.debug("getSnowflakeIdentifierForNamespace({}) -> {}", namespace, identifier); + LOG.debug("toSnowflakeIdentifier({}) -> {}", namespace, identifier); return identifier; } @@ -67,18 +66,17 @@ public static SnowflakeIdentifier getSnowflakeIdentifierForNamespace(Namespace n * Converts a TableIdentifier into a SnowflakeIdentifier of type TABLE; the identifier must have * exactly the right namespace depth to represent a fully-qualified Snowflake table identifier. */ - public static SnowflakeIdentifier getSnowflakeIdentifierForTableIdentifier( - TableIdentifier identifier) { - SnowflakeIdentifier namespaceScope = getSnowflakeIdentifierForNamespace(identifier.namespace()); + public static SnowflakeIdentifier toSnowflakeIdentifier(TableIdentifier identifier) { + SnowflakeIdentifier namespaceScope = toSnowflakeIdentifier(identifier.namespace()); Preconditions.checkArgument( - namespaceScope.getType() == SnowflakeIdentifier.Type.SCHEMA, + namespaceScope.type() == SnowflakeIdentifier.Type.SCHEMA, "Namespace portion of '%s' must be at the SCHEMA level, got namespaceScope '%s'", identifier, namespaceScope); SnowflakeIdentifier ret = SnowflakeIdentifier.ofTable( - namespaceScope.getDatabaseName(), namespaceScope.getSchemaName(), identifier.name()); - LOG.debug("getSnowflakeIdentifierForTableIdentifier({}) -> {}", identifier, ret); + namespaceScope.databaseName(), namespaceScope.schemaName(), identifier.name()); + LOG.debug("toSnowflakeIdentifier({}) -> {}", identifier, ret); return ret; } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 4a68e14ee203..3a76eb259e31 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -38,14 +38,13 @@ import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SnowflakeCatalog extends BaseMetastoreCatalog implements Closeable, SupportsNamespaces, Configurable { public static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; - public static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.hadoop.HadoopFileIO"; + public static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; private static final Logger LOG = LoggerFactory.getLogger(SnowflakeCatalog.class); @@ -60,12 +59,11 @@ public SnowflakeCatalog() {} @Override public List listTables(Namespace namespace) { - LOG.debug("listTables with namespace: {}", namespace); - SnowflakeIdentifier scope = NamespaceHelpers.getSnowflakeIdentifierForNamespace(namespace); + SnowflakeIdentifier scope = NamespaceHelpers.toSnowflakeIdentifier(namespace); Preconditions.checkArgument( - scope.getType() == SnowflakeIdentifier.Type.ROOT - || scope.getType() == SnowflakeIdentifier.Type.DATABASE - || scope.getType() == SnowflakeIdentifier.Type.SCHEMA, + scope.type() == SnowflakeIdentifier.Type.ROOT + || scope.type() == SnowflakeIdentifier.Type.DATABASE + || scope.type() == SnowflakeIdentifier.Type.SCHEMA, "listTables must be at ROOT, DATABASE, or SCHEMA level; got %s from namespace %s", scope, namespace); @@ -75,21 +73,20 @@ public List listTables(Namespace namespace) { return sfTables.stream() .map( table -> - TableIdentifier.of( - table.getDatabaseName(), table.getSchemaName(), table.getTableName())) + TableIdentifier.of(table.databaseName(), table.schemaName(), table.tableName())) .collect(Collectors.toList()); } @Override public boolean dropTable(TableIdentifier identifier, boolean purge) { throw new UnsupportedOperationException( - String.format("dropTable not supported; attempted for table '%s'", identifier)); + "SnowflakeCatalog does not currently support dropTable"); } @Override public void renameTable(TableIdentifier from, TableIdentifier to) { throw new UnsupportedOperationException( - String.format("renameTable not supported; attempted from '%s' to '%s'", from, to)); + "SnowflakeCatalog does not currently support renameTable"); } @Override @@ -98,10 +95,9 @@ public void initialize(String name, Map properties) { Preconditions.checkNotNull(uri, "JDBC connection URI is required"); try { // We'll ensure the expected JDBC driver implementation class is initialized through - // reflection - // regardless of which classloader ends up using this JdbcSnowflakeClient, but we'll only - // warn if the expected driver fails to load, since users may use repackaged or custom - // JDBC drivers for Snowflake communcation. + // reflection regardless of which classloader ends up using this JdbcSnowflakeClient, but + // we'll only warn if the expected driver fails to load, since users may use repackaged or + // custom JDBC drivers for Snowflake communication. Class.forName(JdbcSnowflakeClient.EXPECTED_JDBC_IMPL); } catch (ClassNotFoundException cnfe) { LOG.warn( @@ -133,7 +129,7 @@ public void initialize(String name, Map properties) { * @param properties The catalog options to use and propagate to dependencies */ @SuppressWarnings("checkstyle:HiddenField") - public void initialize( + void initialize( String name, SnowflakeClient snowflakeClient, FileIO fileIO, Map properties) { Preconditions.checkArgument(null != snowflakeClient, "snowflakeClient must be non-null"); Preconditions.checkArgument(null != fileIO, "fileIO must be non-null"); @@ -157,16 +153,15 @@ public void close() throws IOException { @Override public void createNamespace(Namespace namespace, Map metadata) { throw new UnsupportedOperationException( - String.format("createNamespace not supported; attempted for namespace '%s'", namespace)); + "SnowflakeCatalog does not currently support createNamespace"); } @Override public List listNamespaces(Namespace namespace) { - LOG.debug("listNamespaces with namespace: {}", namespace); - SnowflakeIdentifier scope = NamespaceHelpers.getSnowflakeIdentifierForNamespace(namespace); + SnowflakeIdentifier scope = NamespaceHelpers.toSnowflakeIdentifier(namespace); Preconditions.checkArgument( - scope.getType() == SnowflakeIdentifier.Type.ROOT - || scope.getType() == SnowflakeIdentifier.Type.DATABASE, + scope.type() == SnowflakeIdentifier.Type.ROOT + || scope.type() == SnowflakeIdentifier.Type.DATABASE, "listNamespaces must be at either ROOT or DATABASE level; got %s from namespace %s", scope, namespace); @@ -177,11 +172,11 @@ public List listNamespaces(Namespace namespace) { .map( schema -> { Preconditions.checkState( - schema.getType() == SnowflakeIdentifier.Type.SCHEMA, + schema.type() == SnowflakeIdentifier.Type.SCHEMA, "Got identifier of type %s from listSchemas for %s", - schema.getType(), + schema.type(), namespace); - return Namespace.of(schema.getDatabaseName(), schema.getSchemaName()); + return Namespace.of(schema.databaseName(), schema.schemaName()); }) .collect(Collectors.toList()); return namespaceList; @@ -190,26 +185,25 @@ public List listNamespaces(Namespace namespace) { @Override public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException { - LOG.debug("loadNamespaceMetadata with namespace: {}", namespace); return ImmutableMap.of(); } @Override public boolean dropNamespace(Namespace namespace) { throw new UnsupportedOperationException( - String.format("dropNamespace not supported; attempted for namespace '%s'", namespace)); + "SnowflakeCatalog does not currently support dropNamespace"); } @Override public boolean setProperties(Namespace namespace, Map properties) { throw new UnsupportedOperationException( - String.format("setProperties not supported; attempted for namespace '%s'", namespace)); + "SnowflakeCatalog does not currently support setProperties"); } @Override public boolean removeProperties(Namespace namespace, Set properties) { throw new UnsupportedOperationException( - String.format("removeProperties not supported; attempted for namespace '%s'", namespace)); + "SnowflakeCatalog does not currently support removeProperties"); } @Override @@ -221,9 +215,7 @@ protected TableOperations newTableOps(TableIdentifier tableIdentifier) { @Override protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { throw new UnsupportedOperationException( - String.format( - "defaultWarehouseLocation not supported; attempted for tableIdentifier '%s'", - tableIdentifier)); + "SnowflakeCatalog does not currently support defaultWarehouseLocation"); } @Override diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java index f2212c59d95e..674ed10e6c50 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java @@ -20,8 +20,6 @@ import java.io.Closeable; import java.util.List; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; -import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; /** * This interface abstracts out the underlying communication protocols for contacting Snowflake to @@ -29,10 +27,10 @@ * interface should minimize assumptions about whether an underlying client uses e.g. REST, JDBC or * other underlying libraries/protocols. */ -public interface SnowflakeClient extends Closeable { +interface SnowflakeClient extends Closeable { /** * Lists all Snowflake schemas within a given scope. Returned SnowflakeIdentifiers must have - * getType() == SnowflakeIdentifier.Type.SCHEMA. + * type() == SnowflakeIdentifier.Type.SCHEMA. * * @param scope The scope in which to list, which may be ROOT or a single DATABASE. */ @@ -40,7 +38,7 @@ public interface SnowflakeClient extends Closeable { /** * Lists all Snowflake Iceberg tables within a given scope. Returned SnowflakeIdentifiers must - * have getType() == SnowflakeIdentifier.Type.TABLE. + * have type() == SnowflakeIdentifier.Type.TABLE. * * @param scope The scope in which to list, which may be ROOT, a DATABASE, or a SCHEMA. */ @@ -52,5 +50,5 @@ public interface SnowflakeClient extends Closeable { * @param tableIdentifier The fully-qualified identifier that must be of type * SnowflakeIdentifier.Type.TABLE. */ - SnowflakeTableMetadata getTableMetadata(SnowflakeIdentifier tableIdentifier); + SnowflakeTableMetadata loadTableMetadata(SnowflakeIdentifier tableIdentifier); } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java similarity index 65% rename from snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java rename to snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java index ff65d40cffe0..bc2db979c292 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeIdentifier.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.iceberg.snowflake.entities; +package org.apache.iceberg.snowflake; import java.util.List; import org.apache.commons.dbutils.ResultSetHandler; @@ -24,7 +24,15 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -public class SnowflakeIdentifier { +/** + * Since the SnowflakeCatalog supports exactly two levels of Iceberg Namespaces, corresponding + * directly to the "database" and "schema" portions of Snowflake's resource model, this class + * represents a pre-validated and structured representation of a fully-qualified Snowflake resource + * identifier. Snowflake-specific helper libraries should operate on this representation instead of + * directly operating on TableIdentifiers or Namespaces wherever possible to avoid duplication of + * parsing/validation logic for Iceberg TableIdentifier/Namespace levels. + */ +class SnowflakeIdentifier { public enum Type { ROOT, DATABASE, @@ -32,29 +40,62 @@ public enum Type { TABLE } + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Schema identifiers, + * containing "database_name" and "name" (representing schemaName). + */ + public static final ResultSetHandler> SCHEMA_RESULT_SET_HANDLER = + rs -> { + List schemas = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("name"); + schemas.add(SnowflakeIdentifier.ofSchema(databaseName, schemaName)); + } + return schemas; + }; + + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Table identifiers, + * containing "database_name", "schema_name", and "name" (representing tableName). + */ + public static final ResultSetHandler> TABLE_RESULT_SET_HANDLER = + rs -> { + List tables = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("schema_name"); + String tableName = rs.getString("name"); + tables.add(SnowflakeIdentifier.ofTable(databaseName, schemaName, tableName)); + } + return tables; + }; + private String databaseName; private String schemaName; private String tableName; + private Type type; - protected SnowflakeIdentifier(String databaseName, String schemaName, String tableName) { + private SnowflakeIdentifier(String databaseName, String schemaName, String tableName, Type type) { this.databaseName = databaseName; this.schemaName = schemaName; this.tableName = tableName; + this.type = type; } public static SnowflakeIdentifier ofRoot() { - return new SnowflakeIdentifier(null, null, null); + return new SnowflakeIdentifier(null, null, null, Type.ROOT); } public static SnowflakeIdentifier ofDatabase(String databaseName) { Preconditions.checkArgument(null != databaseName, "databaseName must be non-null"); - return new SnowflakeIdentifier(databaseName, null, null); + return new SnowflakeIdentifier(databaseName, null, null, Type.DATABASE); } public static SnowflakeIdentifier ofSchema(String databaseName, String schemaName) { Preconditions.checkArgument(null != databaseName, "databaseName must be non-null"); Preconditions.checkArgument(null != schemaName, "schemaName must be non-null"); - return new SnowflakeIdentifier(databaseName, schemaName, null); + return new SnowflakeIdentifier(databaseName, schemaName, null, Type.SCHEMA); } public static SnowflakeIdentifier ofTable( @@ -62,7 +103,7 @@ public static SnowflakeIdentifier ofTable( Preconditions.checkArgument(null != databaseName, "databaseName must be non-null"); Preconditions.checkArgument(null != schemaName, "schemaName must be non-null"); Preconditions.checkArgument(null != tableName, "tableName must be non-null"); - return new SnowflakeIdentifier(databaseName, schemaName, tableName); + return new SnowflakeIdentifier(databaseName, schemaName, tableName, Type.TABLE); } /** @@ -70,27 +111,19 @@ public static SnowflakeIdentifier ofTable( * expect non-null databaseName and schemaName. If type is DATABASE, expect non-null databaseName. * If type is ROOT, expect all of databaseName, schemaName, and tableName to be null. */ - public Type getType() { - if (null != tableName) { - return Type.TABLE; - } else if (null != schemaName) { - return Type.SCHEMA; - } else if (null != databaseName) { - return Type.DATABASE; - } else { - return Type.ROOT; - } + public Type type() { + return type; } - public String getTableName() { + public String tableName() { return tableName; } - public String getDatabaseName() { + public String databaseName() { return databaseName; } - public String getSchemaName() { + public String schemaName() { return schemaName; } @@ -115,7 +148,7 @@ public int hashCode() { /** Returns this identifier as a String suitable for use in a Snowflake IDENTIFIER param. */ public String toIdentifierString() { - switch (getType()) { + switch (type()) { case TABLE: return String.format("%s.%s.%s", databaseName, schemaName, tableName); case SCHEMA: @@ -129,39 +162,6 @@ public String toIdentifierString() { @Override public String toString() { - return String.format("%s: '%s'", getType(), toIdentifierString()); - } - - /** - * Expects to handle ResultSets representing fully-qualified Snowflake Schema identifiers, - * containing "database_name" and "name" (representing schemaName). - */ - public static ResultSetHandler> createSchemaHandler() { - return rs -> { - List schemas = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("database_name"); - String schemaName = rs.getString("name"); - schemas.add(SnowflakeIdentifier.ofSchema(databaseName, schemaName)); - } - return schemas; - }; - } - - /** - * Expects to handle ResultSets representing fully-qualified Snowflake Table identifiers, - * containing "database_name", "schema_name", and "name" (representing tableName). - */ - public static ResultSetHandler> createTableHandler() { - return rs -> { - List tables = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("database_name"); - String schemaName = rs.getString("schema_name"); - String tableName = rs.getString("name"); - tables.add(SnowflakeIdentifier.ofTable(databaseName, schemaName, tableName)); - } - return tables; - }; + return String.format("%s: '%s'", type(), toIdentifierString()); } } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java similarity index 97% rename from snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java rename to snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java index d58bc81e3f73..e4c8bddf440d 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/entities/SnowflakeTableMetadata.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.iceberg.snowflake.entities; +package org.apache.iceberg.snowflake; import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; @@ -27,7 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; -public class SnowflakeTableMetadata { +class SnowflakeTableMetadata { public static final Pattern SNOWFLAKE_AZURE_PATTERN = Pattern.compile("azure://([^/]+)/([^/]+)/(.*)"); @@ -40,7 +40,7 @@ public class SnowflakeTableMetadata { // SnowflakeTableMetadata instances should not depend on equality of this field. private String rawJsonVal; - public SnowflakeTableMetadata( + SnowflakeTableMetadata( String snowflakeMetadataLocation, String icebergMetadataLocation, String status, @@ -120,6 +120,7 @@ public static String getIcebergLocationFromSnowflakeLocation(String snowflakeLoc // gs://bucket/path return "gs" + snowflakeLocation.substring(3); } + return snowflakeLocation; } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java index 20ab4fbd28d2..2539768b266f 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java @@ -23,9 +23,8 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; -import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,6 +36,7 @@ class SnowflakeTableOperations extends BaseMetastoreTableOperations { private final FileIO fileIO; private final TableIdentifier tableIdentifier; private final SnowflakeIdentifier snowflakeIdentifierForTable; + private final String fullTableName; private final SnowflakeClient snowflakeClient; @@ -53,8 +53,8 @@ protected SnowflakeTableOperations( this.catalogProperties = properties; this.catalogName = catalogName; this.tableIdentifier = tableIdentifier; - this.snowflakeIdentifierForTable = - NamespaceHelpers.getSnowflakeIdentifierForTableIdentifier(tableIdentifier); + this.snowflakeIdentifierForTable = NamespaceHelpers.toSnowflakeIdentifier(tableIdentifier); + this.fullTableName = String.format("%s.%s", catalogName, tableIdentifier.toString()); } @Override @@ -76,15 +76,22 @@ public FileIO io() { @Override protected String tableName() { - return tableIdentifier.toString(); + return fullTableName; + } + + @VisibleForTesting + String fullTableName() { + return tableName(); } private String getTableMetadataLocation() { - SnowflakeTableMetadata metadata = snowflakeClient.getTableMetadata(snowflakeIdentifierForTable); + SnowflakeTableMetadata metadata = + snowflakeClient.loadTableMetadata(snowflakeIdentifierForTable); if (metadata == null) { throw new NoSuchTableException("Cannot find table %s", snowflakeIdentifierForTable); } + if (!metadata.getStatus().equals("success")) { LOG.warn( "Got non-successful table metadata: {} with metadataLocation {} for table {}", @@ -92,6 +99,7 @@ private String getTableMetadataLocation() { metadata.getIcebergMetadataLocation(), snowflakeIdentifierForTable); } + return metadata.getIcebergMetadataLocation(); } } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java index 50b8d5e3b388..5e0588ce78c6 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java @@ -24,8 +24,6 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; -import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; public class FakeSnowflakeClient implements SnowflakeClient { // In-memory lookup by database/schema/tableName to table metadata. @@ -57,7 +55,7 @@ public void addTable( public List listSchemas(SnowflakeIdentifier scope) { Preconditions.checkState(!closed, "Cannot call listSchemas after calling close()"); List schemas = Lists.newArrayList(); - switch (scope.getType()) { + switch (scope.type()) { case ROOT: // "account-level" listing. for (Map.Entry>> db : @@ -68,7 +66,7 @@ public List listSchemas(SnowflakeIdentifier scope) { } break; case DATABASE: - String dbName = scope.getDatabaseName(); + String dbName = scope.databaseName(); if (databases.containsKey(dbName)) { for (String schema : databases.get(dbName).keySet()) { schemas.add(SnowflakeIdentifier.ofSchema(dbName, schema)); @@ -88,7 +86,7 @@ public List listSchemas(SnowflakeIdentifier scope) { public List listIcebergTables(SnowflakeIdentifier scope) { Preconditions.checkState(!closed, "Cannot call listIcebergTables after calling close()"); List tables = Lists.newArrayList(); - switch (scope.getType()) { + switch (scope.type()) { case ROOT: { // "account-level" listing. @@ -105,7 +103,7 @@ public List listIcebergTables(SnowflakeIdentifier scope) { } case DATABASE: { - String dbName = scope.getDatabaseName(); + String dbName = scope.databaseName(); if (databases.containsKey(dbName)) { for (Map.Entry> schema : databases.get(dbName).entrySet()) { @@ -120,9 +118,9 @@ public List listIcebergTables(SnowflakeIdentifier scope) { } case SCHEMA: { - String dbName = scope.getDatabaseName(); + String dbName = scope.databaseName(); if (databases.containsKey(dbName)) { - String schemaName = scope.getSchemaName(); + String schemaName = scope.schemaName(); if (databases.get(dbName).containsKey(schemaName)) { for (String tableName : databases.get(dbName).get(schemaName).keySet()) { tables.add(SnowflakeIdentifier.ofTable(dbName, schemaName, tableName)); @@ -144,21 +142,21 @@ public List listIcebergTables(SnowflakeIdentifier scope) { } @Override - public SnowflakeTableMetadata getTableMetadata(SnowflakeIdentifier tableIdentifier) { + public SnowflakeTableMetadata loadTableMetadata(SnowflakeIdentifier tableIdentifier) { Preconditions.checkState(!closed, "Cannot call getTableMetadata after calling close()"); Preconditions.checkArgument( - tableIdentifier.getType() == SnowflakeIdentifier.Type.TABLE, + tableIdentifier.type() == SnowflakeIdentifier.Type.TABLE, "tableIdentifier must be type TABLE, get: %s", tableIdentifier); - String dbName = tableIdentifier.getDatabaseName(); - String schemaName = tableIdentifier.getSchemaName(); + String dbName = tableIdentifier.databaseName(); + String schemaName = tableIdentifier.schemaName(); if (!databases.containsKey(dbName) || !databases.get(dbName).containsKey(schemaName) - || !databases.get(dbName).get(schemaName).containsKey(tableIdentifier.getTableName())) { + || !databases.get(dbName).get(schemaName).containsKey(tableIdentifier.tableName())) { throw new UncheckedSQLException("Object does not exist: object: '%s'", tableIdentifier); } - return databases.get(dbName).get(schemaName).get(tableIdentifier.getTableName()); + return databases.get(dbName).get(schemaName).get(tableIdentifier.tableName()); } public boolean isClosed() { diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index b04c7f7e8d49..86c0478a7cc2 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -34,8 +34,6 @@ import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.jdbc.UncheckedInterruptedException; import org.apache.iceberg.jdbc.UncheckedSQLException; -import org.apache.iceberg.snowflake.entities.SnowflakeIdentifier; -import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Test; @@ -325,7 +323,7 @@ public void testGetS3TableMetadata() throws SQLException { "{\"metadataLocation\":\"s3://tab1/metadata/v3.metadata.json\",\"status\":\"success\"}"); SnowflakeTableMetadata actualMetadata = - snowflakeClient.getTableMetadata( + snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); verify(mockQueryRunner) @@ -356,7 +354,7 @@ public void testGetAzureTableMetadata() throws SQLException { "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab3/metadata/v334.metadata.json\",\"status\":\"success\"}"); SnowflakeTableMetadata actualMetadata = - snowflakeClient.getTableMetadata( + snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); verify(mockQueryRunner) @@ -387,7 +385,7 @@ public void testGetGcsTableMetadata() throws SQLException { "{\"metadataLocation\":\"gcs://tab5/metadata/v793.metadata.json\",\"status\":\"success\"}"); SnowflakeTableMetadata actualMetadata = - snowflakeClient.getTableMetadata( + snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); verify(mockQueryRunner) @@ -414,7 +412,7 @@ public void testGetTableMetadataMalformedJson() throws SQLException { Assertions.assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy( () -> - snowflakeClient.getTableMetadata( + snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))) .withMessageContaining("{\"malformed_no_closing_bracket"); } @@ -430,7 +428,7 @@ public void testGetTableMetadataSQLException() throws SQLException, InterruptedE Assertions.assertThatExceptionOfType(UncheckedSQLException.class) .isThrownBy( () -> - snowflakeClient.getTableMetadata( + snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))) .withStackTraceContaining("Fake SQL exception"); } @@ -446,7 +444,7 @@ public void testGetTableMetadataInterruptedException() throws SQLException, Inte Assertions.assertThatExceptionOfType(UncheckedInterruptedException.class) .isThrownBy( () -> - snowflakeClient.getTableMetadata( + snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1"))) .withStackTraceContaining("Fake interrupted exception"); } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index 0541123c922e..678bb02db7fd 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -27,9 +27,9 @@ import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.InMemoryFileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.snowflake.entities.SnowflakeTableMetadata; import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; import org.junit.Before; @@ -259,8 +259,11 @@ public void testLoadTableWithMalformedTableIdentifier() { } @Test - public void testCloseBeforeInitialize() throws IOException { + public void testCloseBeforeInitializeDoesntThrow() throws IOException { catalog = new SnowflakeCatalog(); + + // Make sure no exception is thrown if we call close() before initialize(), in case callers + // add a catalog to auto-close() helpers but end up never using/initializing a catalog. catalog.close(); } @@ -274,4 +277,12 @@ public void testClose() throws IOException { .overridingErrorMessage("expected close() to propagate to fileIO") .isTrue(); } + + @Test + public void testTableNameFromTableOperations() { + SnowflakeTableOperations castedTableOps = + (SnowflakeTableOperations) + catalog.newTableOps(TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1")); + Assertions.assertThat(castedTableOps.fullTableName()).isEqualTo("slushLog.DB_1.SCHEMA_1.TAB_1"); + } } From ca6deab8c2e110cd064ab47120705b865bb14c15 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Tue, 20 Dec 2022 22:53:29 -0800 Subject: [PATCH 11/20] Fix behavior of getNamespaceMetadata to throw when the namespace doesn't exist. Refactor for naming conventions and consolidating identifier handling into NamespaceHandlers. Make FileIO instantiated fresh for each newTableOps call. --- .../snowflake/JdbcSnowflakeClient.java | 124 +++++++++++++ .../iceberg/snowflake/NamespaceHelpers.java | 40 +++++ .../iceberg/snowflake/SnowflakeCatalog.java | 107 +++++++----- .../iceberg/snowflake/SnowflakeClient.java | 10 ++ .../snowflake/SnowflakeIdentifier.java | 14 ++ .../snowflake/SnowflakeTableMetadata.java | 9 +- .../snowflake/SnowflakeTableOperations.java | 15 +- .../snowflake/FakeSnowflakeClient.java | 21 +++ .../snowflake/JdbcSnowflakeClientTest.java | 163 ++++++++++++++++++ .../snowflake/SnowflakeCatalogTest.java | 62 ++++--- 10 files changed, 488 insertions(+), 77 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index a8b06e42f02d..998c20fe4887 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -48,6 +48,126 @@ void setQueryRunner(QueryRunner queryRunner) { this.queryRunner = queryRunner; } + /** + * For rare cases where PreparedStatements aren't supported for user-supplied identifiers intended + * for use in special LIKE clauses, we can sanitize by "broadening" the identifier with + * single-character wildcards and manually post-filter client-side. + * + *

Note: This sanitization approach intentionally "broadens" the scope of matching results; + * callers must be able to handle this method returning an all-wildcard expression; i.e. the + * caller must treat the usage of the LIKE clause as only an optional optimization, and should + * post-filter for correctness as if the LIKE clause wasn't present in the query at all. + */ + @VisibleForTesting + String sanitizeIdentifierWithWildcardForLikeClause(String identifier) { + // Restrict identifiers to the "Unquoted object identifiers" synax documented at + // https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html + // + // Use a strict allowlist of characters, replace everything *not* matching the character set + // with "_", which is used as a single-character wildcard in Snowflake. + String sanitized = identifier.replaceAll("[^a-zA-Z0-9_$]", "_"); + if (sanitized.startsWith("$")) { + sanitized = "_" + sanitized.substring(1); + } + return sanitized; + } + + @Override + public boolean databaseExists(SnowflakeIdentifier database) { + Preconditions.checkArgument( + database.type() == SnowflakeIdentifier.Type.DATABASE, + "databaseExists requires a DATABASE identifier, got '%s'", + database); + + // Due to current limitations in PreparedStatement parameters for the LIKE clause in + // SHOW DATABASES queries, we'll use a fairly limited allowlist for identifier characters, + // using wildcards for non-allowed characters, and post-filter for matching. + final String finalQuery = + String.format( + "SHOW DATABASES LIKE '%s' IN ACCOUNT", + sanitizeIdentifierWithWildcardForLikeClause(database.databaseName())); + List databases; + try { + databases = + connectionPool.run( + conn -> + queryRunner.query( + conn, + finalQuery, + SnowflakeIdentifier.DATABASE_RESULT_SET_HANDLER, + (Object[]) null)); + } catch (SQLException e) { + throw new UncheckedSQLException(e, "Failed to check if database exists"); + } catch (InterruptedException e) { + throw new UncheckedInterruptedException(e, "Interrupted while checking if database exists"); + } + + // Filter to handle the edge case of '_' appearing as a wildcard that can't be remapped the way + // it can for predicates in SELECT statements. + databases.removeIf(db -> !database.databaseName().equalsIgnoreCase(db.databaseName())); + return !databases.isEmpty(); + } + + @Override + public boolean schemaExists(SnowflakeIdentifier schema) { + Preconditions.checkArgument( + schema.type() == SnowflakeIdentifier.Type.SCHEMA, + "schemaExists requires a SCHEMA identifier, got '%s'", + schema); + + if (!databaseExists(SnowflakeIdentifier.ofDatabase(schema.databaseName()))) { + return false; + } + + // Due to current limitations in PreparedStatement parameters for the LIKE clause in + // SHOW SCHEMAS queries, we'll use a fairly limited allowlist for identifier characters, + // using wildcards for non-allowed characters, and post-filter for matching. + final String finalQuery = + String.format( + "SHOW SCHEMAS LIKE '%s' IN DATABASE IDENTIFIER(?)", + sanitizeIdentifierWithWildcardForLikeClause(schema.schemaName())); + List schemas; + try { + schemas = + connectionPool.run( + conn -> + queryRunner.query( + conn, + finalQuery, + SnowflakeIdentifier.SCHEMA_RESULT_SET_HANDLER, + new Object[] {schema.databaseName()})); + } catch (SQLException e) { + throw new UncheckedSQLException(e, "Failed to check if schema exists"); + } catch (InterruptedException e) { + throw new UncheckedInterruptedException(e, "Interrupted while checking if schema exists"); + } + + // Filter to handle the edge case of '_' appearing as a wildcard that can't be remapped the way + // it can for predicates in SELECT statements. + schemas.removeIf(sc -> !schema.schemaName().equalsIgnoreCase(sc.schemaName())); + return !schemas.isEmpty(); + } + + @Override + public List listDatabases() { + List databases; + try { + databases = + connectionPool.run( + conn -> + queryRunner.query( + conn, + "SHOW DATABASES IN ACCOUNT", + SnowflakeIdentifier.DATABASE_RESULT_SET_HANDLER, + (Object[]) null)); + } catch (SQLException e) { + throw new UncheckedSQLException(e, "Failed to list databases"); + } catch (InterruptedException e) { + throw new UncheckedInterruptedException(e, "Interrupted while listing databases"); + } + return databases; + } + @Override public List listSchemas(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW SCHEMAS"); @@ -133,6 +253,10 @@ public List listIcebergTables(SnowflakeIdentifier scope) { @Override public SnowflakeTableMetadata loadTableMetadata(SnowflakeIdentifier tableIdentifier) { + Preconditions.checkArgument( + tableIdentifier.type() == SnowflakeIdentifier.Type.TABLE, + "loadTableMetadata requires a TABLE identifier, got '%s'", + tableIdentifier); SnowflakeTableMetadata tableMeta; try { final String finalQuery = "SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"; diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java index 6c6db739737d..b35bf9829d25 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java @@ -80,5 +80,45 @@ public static SnowflakeIdentifier toSnowflakeIdentifier(TableIdentifier identifi return ret; } + /** + * Converts a SnowflakeIdentifier of type ROOT, DATABASE, or SCHEMA into an equivalent Iceberg + * Namespace; throws IllegalArgumentException if not an appropriate type. + */ + public static Namespace toIcebergNamespace(SnowflakeIdentifier identifier) { + Namespace namespace = null; + switch (identifier.type()) { + case ROOT: + namespace = Namespace.of(); + break; + case DATABASE: + namespace = Namespace.of(identifier.databaseName()); + break; + case SCHEMA: + namespace = Namespace.of(identifier.databaseName(), identifier.schemaName()); + break; + default: + throw new IllegalArgumentException( + String.format("Cannot convert identifier '%s' to Namespace", identifier)); + } + LOG.debug("toIcebergNamespace({}) -> {}", identifier, namespace); + return namespace; + } + + /** + * Converts a SnowflakeIdentifier to an equivalent Iceberg TableIdentifier; the identifier must be + * of type TABLE. + */ + public static TableIdentifier toIcebergTableIdentifier(SnowflakeIdentifier identifier) { + Preconditions.checkArgument( + identifier.type() == SnowflakeIdentifier.Type.TABLE, + "SnowflakeIdentifier must be type TABLE, get '%s'", + identifier); + TableIdentifier ret = + TableIdentifier.of( + identifier.databaseName(), identifier.schemaName(), identifier.tableName()); + LOG.debug("toIcebergTableIdentifier({}) -> {}", identifier, ret); + return ret; + } + private NamespaceHelpers() {} } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 3a76eb259e31..0667625f0c9e 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -46,13 +46,19 @@ public class SnowflakeCatalog extends BaseMetastoreCatalog public static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; public static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; + static class FileIOFactory { + public FileIO newFileIO(String impl, Map properties, Object hadoopConf) { + return CatalogUtil.loadFileIO(impl, properties, hadoopConf); + } + } + private static final Logger LOG = LoggerFactory.getLogger(SnowflakeCatalog.class); private CloseableGroup closeableGroup; private Object conf; private String catalogName; private Map catalogProperties; - private FileIO fileIO; + private FileIOFactory fileIOFactory; private SnowflakeClient snowflakeClient; public SnowflakeCatalog() {} @@ -61,19 +67,15 @@ public SnowflakeCatalog() {} public List listTables(Namespace namespace) { SnowflakeIdentifier scope = NamespaceHelpers.toSnowflakeIdentifier(namespace); Preconditions.checkArgument( - scope.type() == SnowflakeIdentifier.Type.ROOT - || scope.type() == SnowflakeIdentifier.Type.DATABASE - || scope.type() == SnowflakeIdentifier.Type.SCHEMA, - "listTables must be at ROOT, DATABASE, or SCHEMA level; got %s from namespace %s", + scope.type() == SnowflakeIdentifier.Type.SCHEMA, + "listTables must be at SCHEMA level; got %s from namespace %s", scope, namespace); List sfTables = snowflakeClient.listIcebergTables(scope); return sfTables.stream() - .map( - table -> - TableIdentifier.of(table.databaseName(), table.schemaName(), table.tableName())) + .map(NamespaceHelpers::toIcebergTableIdentifier) .collect(Collectors.toList()); } @@ -108,16 +110,7 @@ public void initialize(String name, Map properties) { } JdbcClientPool connectionPool = new JdbcClientPool(uri, properties); - String fileIOImpl = DEFAULT_FILE_IO_IMPL; - if (properties.containsKey(CatalogProperties.FILE_IO_IMPL)) { - fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); - } - - initialize( - name, - new JdbcSnowflakeClient(connectionPool), - CatalogUtil.loadFileIO(fileIOImpl, properties, conf), - properties); + initialize(name, new JdbcSnowflakeClient(connectionPool), new FileIOFactory(), properties); } /** @@ -125,21 +118,24 @@ public void initialize(String name, Map properties) { * * @param name The name of the catalog, defaults to "snowflake_catalog" * @param snowflakeClient The client encapsulating network communication with Snowflake - * @param fileIO The {@link FileIO} to use for table operations + * @param fileIOFactory The {@link FileIOFactory} to use to instantiate a new FileIO for each new + * table operation * @param properties The catalog options to use and propagate to dependencies */ @SuppressWarnings("checkstyle:HiddenField") void initialize( - String name, SnowflakeClient snowflakeClient, FileIO fileIO, Map properties) { + String name, + SnowflakeClient snowflakeClient, + FileIOFactory fileIOFactory, + Map properties) { Preconditions.checkArgument(null != snowflakeClient, "snowflakeClient must be non-null"); - Preconditions.checkArgument(null != fileIO, "fileIO must be non-null"); + Preconditions.checkArgument(null != fileIOFactory, "fileIOFactory must be non-null"); this.catalogName = name == null ? DEFAULT_CATALOG_NAME : name; this.snowflakeClient = snowflakeClient; - this.fileIO = fileIO; + this.fileIOFactory = fileIOFactory; this.catalogProperties = properties; this.closeableGroup = new CloseableGroup(); closeableGroup.addCloseable(snowflakeClient); - closeableGroup.addCloseable(fileIO); closeableGroup.setSuppressCloseFailure(true); } @@ -159,24 +155,32 @@ public void createNamespace(Namespace namespace, Map metadata) { @Override public List listNamespaces(Namespace namespace) { SnowflakeIdentifier scope = NamespaceHelpers.toSnowflakeIdentifier(namespace); - Preconditions.checkArgument( - scope.type() == SnowflakeIdentifier.Type.ROOT - || scope.type() == SnowflakeIdentifier.Type.DATABASE, - "listNamespaces must be at either ROOT or DATABASE level; got %s from namespace %s", - scope, - namespace); - List sfSchemas = snowflakeClient.listSchemas(scope); + List results = null; + switch (scope.type()) { + case ROOT: + results = snowflakeClient.listDatabases(); + break; + case DATABASE: + results = snowflakeClient.listSchemas(scope); + break; + default: + throw new IllegalArgumentException( + String.format( + "listNamespaces must be at either ROOT or DATABASE level; got %s from namespace %s", + scope, namespace)); + } List namespaceList = - sfSchemas.stream() + results.stream() .map( - schema -> { + result -> { Preconditions.checkState( - schema.type() == SnowflakeIdentifier.Type.SCHEMA, - "Got identifier of type %s from listSchemas for %s", - schema.type(), + result.type() == SnowflakeIdentifier.Type.SCHEMA + || result.type() == SnowflakeIdentifier.Type.DATABASE, + "Got identifier of type %s from listNamespaces for %s", + result.type(), namespace); - return Namespace.of(schema.databaseName(), schema.schemaName()); + return NamespaceHelpers.toIcebergNamespace(result); }) .collect(Collectors.toList()); return namespaceList; @@ -185,7 +189,27 @@ public List listNamespaces(Namespace namespace) { @Override public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException { - return ImmutableMap.of(); + SnowflakeIdentifier id = NamespaceHelpers.toSnowflakeIdentifier(namespace); + boolean namespaceExists; + switch (id.type()) { + case DATABASE: + namespaceExists = snowflakeClient.databaseExists(id); + break; + case SCHEMA: + namespaceExists = snowflakeClient.schemaExists(id); + break; + default: + throw new IllegalArgumentException( + String.format( + "loadNamespaceMetadat must be at either DATABASE or SCHEMA level; got %s from namespace %s", + id, namespace)); + } + if (namespaceExists) { + return ImmutableMap.of(); + } else { + throw new NoSuchNamespaceException( + "Namespace '%s' with snowflake identifier '%s' doesn't exist", namespace, id); + } } @Override @@ -208,8 +232,13 @@ public boolean removeProperties(Namespace namespace, Set properties) { @Override protected TableOperations newTableOps(TableIdentifier tableIdentifier) { - return new SnowflakeTableOperations( - snowflakeClient, fileIO, catalogProperties, catalogName, tableIdentifier); + String fileIOImpl = DEFAULT_FILE_IO_IMPL; + if (catalogProperties.containsKey(CatalogProperties.FILE_IO_IMPL)) { + fileIOImpl = catalogProperties.get(CatalogProperties.FILE_IO_IMPL); + } + FileIO fileIO = fileIOFactory.newFileIO(fileIOImpl, catalogProperties, conf); + closeableGroup.addCloseable(fileIO); + return new SnowflakeTableOperations(snowflakeClient, fileIO, catalogName, tableIdentifier); } @Override diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java index 674ed10e6c50..2dfadb9a65b4 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeClient.java @@ -28,6 +28,16 @@ * other underlying libraries/protocols. */ interface SnowflakeClient extends Closeable { + + /** Returns true if the database exists, false otherwise. */ + boolean databaseExists(SnowflakeIdentifier database); + + /** Returns true if the schema and its parent database exists, false otherwise. */ + boolean schemaExists(SnowflakeIdentifier schema); + + /** Lists all Snowflake databases within the currently configured account. */ + List listDatabases(); + /** * Lists all Snowflake schemas within a given scope. Returned SnowflakeIdentifiers must have * type() == SnowflakeIdentifier.Type.SCHEMA. diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java index bc2db979c292..f989d002ac90 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java @@ -40,6 +40,20 @@ public enum Type { TABLE } + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Database identifiers, + * containing "name" (representing databaseName). + */ + public static final ResultSetHandler> DATABASE_RESULT_SET_HANDLER = + rs -> { + List databases = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("name"); + databases.add(SnowflakeIdentifier.ofDatabase(databaseName)); + } + return databases; + }; + /** * Expects to handle ResultSets representing fully-qualified Snowflake Schema identifiers, * containing "database_name" and "name" (representing schemaName). diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java index e4c8bddf440d..e46f0c2457f4 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java @@ -52,12 +52,12 @@ class SnowflakeTableMetadata { } /** Storage location of table metadata in Snowflake's path syntax. */ - public String getSnowflakeMetadataLocation() { + public String snowflakeMetadataLocation() { return snowflakeMetadataLocation; } /** Storage location of table metadata in Iceberg's path syntax. */ - public String getIcebergMetadataLocation() { + public String icebergMetadataLocation() { return icebergMetadataLocation; } @@ -99,7 +99,7 @@ public String toString() { * snowflakeLocation is a known non-compatible path syntax but fails to match the expected path * components for a successful translation. */ - public static String getIcebergLocationFromSnowflakeLocation(String snowflakeLocation) { + public static String snowflakeLocationToIcebergLocation(String snowflakeLocation) { if (snowflakeLocation.startsWith("azure://")) { // Convert from expected path of the form: // azure://account.blob.core.windows.net/container/volumepath @@ -139,8 +139,7 @@ public static SnowflakeTableMetadata parseJson(String json) { String snowflakeMetadataLocation = JsonUtil.getString("metadataLocation", parsedVal); String status = JsonUtil.getStringOrNull("status", parsedVal); - String icebergMetadataLocation = - getIcebergLocationFromSnowflakeLocation(snowflakeMetadataLocation); + String icebergMetadataLocation = snowflakeLocationToIcebergLocation(snowflakeMetadataLocation); return new SnowflakeTableMetadata( snowflakeMetadataLocation, icebergMetadataLocation, status, json); diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java index 2539768b266f..dcb361580acc 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java @@ -18,7 +18,6 @@ */ package org.apache.iceberg.snowflake; -import java.util.Map; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -31,7 +30,6 @@ class SnowflakeTableOperations extends BaseMetastoreTableOperations { private static final Logger LOG = LoggerFactory.getLogger(SnowflakeTableOperations.class); - private final String catalogName; private final FileIO fileIO; private final TableIdentifier tableIdentifier; @@ -40,18 +38,13 @@ class SnowflakeTableOperations extends BaseMetastoreTableOperations { private final SnowflakeClient snowflakeClient; - private final Map catalogProperties; - protected SnowflakeTableOperations( SnowflakeClient snowflakeClient, FileIO fileIO, - Map properties, String catalogName, TableIdentifier tableIdentifier) { this.snowflakeClient = snowflakeClient; this.fileIO = fileIO; - this.catalogProperties = properties; - this.catalogName = catalogName; this.tableIdentifier = tableIdentifier; this.snowflakeIdentifierForTable = NamespaceHelpers.toSnowflakeIdentifier(tableIdentifier); this.fullTableName = String.format("%s.%s", catalogName, tableIdentifier.toString()); @@ -60,7 +53,7 @@ protected SnowflakeTableOperations( @Override public void doRefresh() { LOG.debug("Getting metadata location for table {}", tableIdentifier); - String location = getTableMetadataLocation(); + String location = loadTableMetadataLocation(); Preconditions.checkState( location != null && !location.isEmpty(), "Got null or empty location %s for table %s", @@ -84,7 +77,7 @@ String fullTableName() { return tableName(); } - private String getTableMetadataLocation() { + private String loadTableMetadataLocation() { SnowflakeTableMetadata metadata = snowflakeClient.loadTableMetadata(snowflakeIdentifierForTable); @@ -96,10 +89,10 @@ private String getTableMetadataLocation() { LOG.warn( "Got non-successful table metadata: {} with metadataLocation {} for table {}", metadata.getStatus(), - metadata.getIcebergMetadataLocation(), + metadata.icebergMetadataLocation(), snowflakeIdentifierForTable); } - return metadata.getIcebergMetadataLocation(); + return metadata.icebergMetadataLocation(); } } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java index 5e0588ce78c6..f4732fb6ca76 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java @@ -51,6 +51,27 @@ public void addTable( tables.put(tableName, metadata); } + @Override + public boolean databaseExists(SnowflakeIdentifier database) { + return databases.containsKey(database.databaseName()); + } + + @Override + public boolean schemaExists(SnowflakeIdentifier schema) { + return databases.containsKey(schema.databaseName()) + && databases.get(schema.databaseName()).containsKey(schema.schemaName()); + } + + @Override + public List listDatabases() { + Preconditions.checkState(!closed, "Cannot call listDatabases after calling close()"); + List databaseIdentifiers = Lists.newArrayList(); + for (String databaseName : databases.keySet()) { + databaseIdentifiers.add(SnowflakeIdentifier.ofDatabase(databaseName)); + } + return databaseIdentifiers; + } + @Override public List listSchemas(SnowflakeIdentifier scope) { Preconditions.checkState(!closed, "Cannot call listSchemas after calling close()"); diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index 86c0478a7cc2..fbfc075979e6 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -89,6 +89,169 @@ public void testNullClientPoolInConstructor() { .withMessageContaining("JdbcClientPool must be non-null"); } + @Test + public void testDatabaseExists() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DB_1"); + + Assertions.assertThat(snowflakeClient.databaseExists(SnowflakeIdentifier.ofDatabase("DB_1"))) + .isTrue(); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + } + + @Test + public void testDatabaseExistsSpecialCharacters() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("$DB_1$.'!@#%^&*"); + + Assertions.assertThat( + snowflakeClient.databaseExists(SnowflakeIdentifier.ofDatabase("$DB_1$.'!@#%^&*"))) + .isTrue(); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW DATABASES LIKE '_DB_1$_________' IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + } + + @Test + public void testDatabaseDoesntExistNoResults() throws SQLException { + when(mockResultSet.next()).thenReturn(false); + + Assertions.assertThat(snowflakeClient.databaseExists(SnowflakeIdentifier.ofDatabase("DB_1"))) + .isFalse(); + } + + @Test + public void testDatabaseDoesntExistMismatchedResults() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DBZ1"); + + Assertions.assertThat(snowflakeClient.databaseExists(SnowflakeIdentifier.ofDatabase("DB_1"))) + .isFalse(); + } + + @Test + public void testSchemaExists() throws SQLException { + when(mockResultSet.next()) + .thenReturn(true) + .thenReturn(false) + .thenReturn(true) + .thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DB_1").thenReturn("SCHEMA_1"); + when(mockResultSet.getString("database_name")).thenReturn("DB_1"); + + Assertions.assertThat( + snowflakeClient.schemaExists(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"))) + .isTrue(); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW SCHEMAS LIKE 'SCHEMA_1' IN DATABASE IDENTIFIER(?)"), + any(ResultSetHandler.class), + eq("DB_1")); + } + + @Test + public void testSchemaExistsSpecialCharacters() throws SQLException { + when(mockResultSet.next()) + .thenReturn(true) + .thenReturn(false) + .thenReturn(true) + .thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DB_1").thenReturn("$SCHEMA_1$.'!@#%^&*"); + when(mockResultSet.getString("database_name")).thenReturn("DB_1"); + + Assertions.assertThat( + snowflakeClient.schemaExists( + SnowflakeIdentifier.ofSchema("DB_1", "$SCHEMA_1$.'!@#%^&*"))) + .isTrue(); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW SCHEMAS LIKE '_SCHEMA_1$_________' IN DATABASE IDENTIFIER(?)"), + any(ResultSetHandler.class), + eq("DB_1")); + } + + @Test + public void testSchemaDoesntExistMismatchDatabase() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DBZ1"); + + Assertions.assertThat( + snowflakeClient.schemaExists(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"))) + .isFalse(); + } + + @Test + public void testSchemaDoesntExistNoSchemaFound() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(false).thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DB_1"); + + Assertions.assertThat( + snowflakeClient.schemaExists(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"))) + .isFalse(); + } + + @Test + public void testSchemaDoesntExistSchemaMismatch() throws SQLException { + when(mockResultSet.next()) + .thenReturn(true) + .thenReturn(false) + .thenReturn(true) + .thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DB_1").thenReturn("SCHEMAZ1"); + when(mockResultSet.getString("database_name")).thenReturn("DB_1"); + + Assertions.assertThat( + snowflakeClient.schemaExists(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"))) + .isFalse(); + } + + @Test + public void testListDatabasesInAccount() throws SQLException { + when(mockResultSet.next()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false); + when(mockResultSet.getString("name")).thenReturn("DB_1").thenReturn("DB_2").thenReturn("DB_3"); + + List actualList = snowflakeClient.listDatabases(); + + verify(mockQueryRunner) + .query( + eq(mockConnection), + eq("SHOW DATABASES IN ACCOUNT"), + any(ResultSetHandler.class), + eq((Object[]) null)); + + Assertions.assertThat(actualList) + .containsExactly( + SnowflakeIdentifier.ofDatabase("DB_1"), + SnowflakeIdentifier.ofDatabase("DB_2"), + SnowflakeIdentifier.ofDatabase("DB_3")); + } + /** * For the root scope, expect an underlying query to list schemas at the ACCOUNT level with no * query parameters. diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index 678bb02db7fd..afa79768e1c2 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -27,6 +27,7 @@ import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InMemoryFileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; @@ -41,6 +42,7 @@ public class SnowflakeCatalogTest { private SnowflakeCatalog catalog; private FakeSnowflakeClient fakeClient; private InMemoryFileIO fakeFileIO; + private SnowflakeCatalog.FileIOFactory fakeFileIOFactory; private Map properties; @Before @@ -115,14 +117,23 @@ public void before() { schema, partitionSpec, "gs://tab5/", ImmutableMap.of())) .getBytes()); + fakeFileIOFactory = + new SnowflakeCatalog.FileIOFactory() { + @Override + public FileIO newFileIO(String impl, Map prop, Object hadoopConf) { + return fakeFileIO; + } + }; + properties = Maps.newHashMap(); - catalog.initialize(TEST_CATALOG_NAME, fakeClient, fakeFileIO, properties); + catalog.initialize(TEST_CATALOG_NAME, fakeClient, fakeFileIOFactory, properties); } @Test public void testInitializeNullClient() { Assertions.assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> catalog.initialize(TEST_CATALOG_NAME, null, fakeFileIO, properties)) + .isThrownBy( + () -> catalog.initialize(TEST_CATALOG_NAME, null, fakeFileIOFactory, properties)) .withMessageContaining("snowflakeClient must be non-null"); } @@ -130,17 +141,13 @@ public void testInitializeNullClient() { public void testInitializeNullFileIO() { Assertions.assertThatExceptionOfType(IllegalArgumentException.class) .isThrownBy(() -> catalog.initialize(TEST_CATALOG_NAME, fakeClient, null, properties)) - .withMessageContaining("fileIO must be non-null"); + .withMessageContaining("fileIOFactory must be non-null"); } @Test - public void testListNamespace() { + public void testListNamespaceInRoot() { Assertions.assertThat(catalog.listNamespaces()) - .containsExactly( - Namespace.of("DB_1", "SCHEMA_1"), - Namespace.of("DB_2", "SCHEMA_2"), - Namespace.of("DB_3", "SCHEMA_3"), - Namespace.of("DB_3", "SCHEMA_4")); + .containsExactly(Namespace.of("DB_1"), Namespace.of("DB_2"), Namespace.of("DB_3")); } @Test @@ -175,30 +182,25 @@ public void testListNamespaceWithinSchema() { @Test public void testListTables() { - Assertions.assertThat(catalog.listTables(Namespace.empty())) - .containsExactly( - TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1"), - TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2"), - TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_3"), - TableIdentifier.of("DB_2", "SCHEMA_2", "TAB_4"), - TableIdentifier.of("DB_3", "SCHEMA_3", "TAB_5"), - TableIdentifier.of("DB_3", "SCHEMA_4", "TAB_6")); + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> catalog.listTables(Namespace.empty())) + .withMessageContaining("listTables must be at SCHEMA level"); } @Test public void testListTablesWithinDB() { String dbName = "DB_1"; - Assertions.assertThat(catalog.listTables(Namespace.of(dbName))) - .containsExactly( - TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1"), - TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_2")); + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> catalog.listTables(Namespace.of(dbName))) + .withMessageContaining("listTables must be at SCHEMA level"); } @Test public void testListTablesWithinNonexistentDB() { String dbName = "NONEXISTENT_DB"; + String schemaName = "NONEXISTENT_SCHEMA"; Assertions.assertThatExceptionOfType(RuntimeException.class) - .isThrownBy(() -> catalog.listTables(Namespace.of(dbName))) + .isThrownBy(() -> catalog.listTables(Namespace.of(dbName, schemaName))) .withMessageContaining("does not exist") .withMessageContaining(dbName); } @@ -269,6 +271,7 @@ public void testCloseBeforeInitializeDoesntThrow() throws IOException { @Test public void testClose() throws IOException { + catalog.newTableOps(TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1")); catalog.close(); Assertions.assertThat(fakeClient.isClosed()) .overridingErrorMessage("expected close() to propagate to snowflakeClient") @@ -285,4 +288,19 @@ public void testTableNameFromTableOperations() { catalog.newTableOps(TableIdentifier.of("DB_1", "SCHEMA_1", "TAB_1")); Assertions.assertThat(castedTableOps.fullTableName()).isEqualTo("slushLog.DB_1.SCHEMA_1.TAB_1"); } + + @Test + public void testDatabaseExists() { + Assertions.assertThat(catalog.namespaceExists(Namespace.of("DB_1"))).isTrue(); + Assertions.assertThat(catalog.namespaceExists(Namespace.of("NONEXISTENT_DB"))).isFalse(); + } + + @Test + public void testSchemaExists() { + Assertions.assertThat(catalog.namespaceExists(Namespace.of("DB_1", "SCHEMA_1"))).isTrue(); + Assertions.assertThat(catalog.namespaceExists(Namespace.of("DB_1", "NONEXISTENT_SCHEMA"))) + .isFalse(); + Assertions.assertThat(catalog.namespaceExists(Namespace.of("NONEXISTENT_DB", "SCHEMA_1"))) + .isFalse(); + } } From b3a284247141c531837325a5b739f6d4b855380b Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Tue, 20 Dec 2022 23:03:32 -0800 Subject: [PATCH 12/20] Move private constructor to top, add assertion to test case. --- .../java/org/apache/iceberg/snowflake/NamespaceHelpers.java | 4 ++-- .../org/apache/iceberg/snowflake/SnowflakeCatalogTest.java | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java index b35bf9829d25..f3067bb280c0 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java @@ -32,6 +32,8 @@ class NamespaceHelpers { private static final Logger LOG = LoggerFactory.getLogger(NamespaceHelpers.class); + private NamespaceHelpers() {} + /** * Converts a Namespace into a SnowflakeIdentifier representing ROOT, a DATABASE, or a SCHEMA. * @@ -119,6 +121,4 @@ public static TableIdentifier toIcebergTableIdentifier(SnowflakeIdentifier ident LOG.debug("toIcebergTableIdentifier({}) -> {}", identifier, ret); return ret; } - - private NamespaceHelpers() {} } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index afa79768e1c2..e08b71dba046 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -267,6 +267,10 @@ public void testCloseBeforeInitializeDoesntThrow() throws IOException { // Make sure no exception is thrown if we call close() before initialize(), in case callers // add a catalog to auto-close() helpers but end up never using/initializing a catalog. catalog.close(); + + Assertions.assertThat(fakeClient.isClosed()) + .overridingErrorMessage("expected not to have called close() on snowflakeClient") + .isFalse(); } @Test From 676d024e131729a9042588fc218d10a39b962d83 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Tue, 20 Dec 2022 23:57:40 -0800 Subject: [PATCH 13/20] Define minimal ResultSetParser/QueryHarness classes to fully replace any use of commons-dbutils; refactor ResultSet handling fully into JdbcSnowflakeClient.java. --- build.gradle | 1 - .../snowflake/JdbcSnowflakeClient.java | 148 +++++++++++++----- .../snowflake/SnowflakeIdentifier.java | 48 ------ .../snowflake/SnowflakeTableMetadata.java | 12 -- .../snowflake/JdbcSnowflakeClientTest.java | 89 ++++++----- versions.props | 1 - 6 files changed, 157 insertions(+), 142 deletions(-) diff --git a/build.gradle b/build.gradle index 8f8e23d7c9f1..65041197a9ff 100644 --- a/build.gradle +++ b/build.gradle @@ -707,7 +707,6 @@ project(':iceberg-snowflake') { implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') implementation "com.fasterxml.jackson.core:jackson-databind" implementation "com.fasterxml.jackson.core:jackson-core" - implementation "commons-dbutils:commons-dbutils" runtimeOnly("net.snowflake:snowflake-jdbc") diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index 998c20fe4887..8ca32c449949 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -18,14 +18,17 @@ */ package org.apache.iceberg.snowflake; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; -import org.apache.commons.dbutils.QueryRunner; import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.jdbc.UncheckedInterruptedException; import org.apache.iceberg.jdbc.UncheckedSQLException; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; /** * This implementation of SnowflakeClient builds on top of Snowflake's JDBC driver to interact with @@ -34,18 +37,103 @@ class JdbcSnowflakeClient implements SnowflakeClient { public static final String EXPECTED_JDBC_IMPL = "net.snowflake.client.jdbc.SnowflakeDriver"; + @FunctionalInterface + interface ResultSetParser { + T parse(ResultSet rs) throws SQLException; + } + + /** + * This class wraps the basic boilerplate of setting up PreparedStatements and applying a + * ResultSetParser to translate a ResultSet into parsed objects. Allows easily injecting + * subclasses for debugging/testing purposes. + */ + static class QueryHarness { + public T query(Connection conn, String sql, ResultSetParser parser, String... args) + throws SQLException { + try (PreparedStatement statement = conn.prepareStatement(sql)) { + if (args != null) { + for (int i = 0; i < args.length; ++i) { + statement.setString(i + 1, args[i]); + } + } + + try (ResultSet rs = statement.executeQuery()) { + return parser.parse(rs); + } + } + } + } + + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Database identifiers, + * containing "name" (representing databaseName). + */ + public static final ResultSetParser> DATABASE_RESULT_SET_HANDLER = + rs -> { + List databases = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("name"); + databases.add(SnowflakeIdentifier.ofDatabase(databaseName)); + } + return databases; + }; + + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Schema identifiers, + * containing "database_name" and "name" (representing schemaName). + */ + public static final ResultSetParser> SCHEMA_RESULT_SET_HANDLER = + rs -> { + List schemas = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("name"); + schemas.add(SnowflakeIdentifier.ofSchema(databaseName, schemaName)); + } + return schemas; + }; + + /** + * Expects to handle ResultSets representing fully-qualified Snowflake Table identifiers, + * containing "database_name", "schema_name", and "name" (representing tableName). + */ + public static final ResultSetParser> TABLE_RESULT_SET_HANDLER = + rs -> { + List tables = Lists.newArrayList(); + while (rs.next()) { + String databaseName = rs.getString("database_name"); + String schemaName = rs.getString("schema_name"); + String tableName = rs.getString("name"); + tables.add(SnowflakeIdentifier.ofTable(databaseName, schemaName, tableName)); + } + return tables; + }; + + /** + * Expects to handle ResultSets representing a single record holding Snowflake Iceberg metadata. + */ + public static final ResultSetParser TABLE_METADATA_RESULT_SET_HANDLER = + rs -> { + if (!rs.next()) { + return null; + } + + String rawJsonVal = rs.getString("METADATA"); + return SnowflakeTableMetadata.parseJson(rawJsonVal); + }; + private final JdbcClientPool connectionPool; - private QueryRunner queryRunner; + private QueryHarness queryHarness; JdbcSnowflakeClient(JdbcClientPool conn) { Preconditions.checkArgument(null != conn, "JdbcClientPool must be non-null"); connectionPool = conn; - queryRunner = new QueryRunner(true); + queryHarness = new QueryHarness(); } @VisibleForTesting - void setQueryRunner(QueryRunner queryRunner) { - this.queryRunner = queryRunner; + void setQueryHarness(QueryHarness queryHarness) { + this.queryHarness = queryHarness; } /** @@ -91,11 +179,8 @@ public boolean databaseExists(SnowflakeIdentifier database) { databases = connectionPool.run( conn -> - queryRunner.query( - conn, - finalQuery, - SnowflakeIdentifier.DATABASE_RESULT_SET_HANDLER, - (Object[]) null)); + queryHarness.query( + conn, finalQuery, DATABASE_RESULT_SET_HANDLER, (String[]) null)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to check if database exists"); } catch (InterruptedException e) { @@ -131,11 +216,11 @@ public boolean schemaExists(SnowflakeIdentifier schema) { schemas = connectionPool.run( conn -> - queryRunner.query( + queryHarness.query( conn, finalQuery, - SnowflakeIdentifier.SCHEMA_RESULT_SET_HANDLER, - new Object[] {schema.databaseName()})); + SCHEMA_RESULT_SET_HANDLER, + new String[] {schema.databaseName()})); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to check if schema exists"); } catch (InterruptedException e) { @@ -155,11 +240,11 @@ public List listDatabases() { databases = connectionPool.run( conn -> - queryRunner.query( + queryHarness.query( conn, "SHOW DATABASES IN ACCOUNT", - SnowflakeIdentifier.DATABASE_RESULT_SET_HANDLER, - (Object[]) null)); + DATABASE_RESULT_SET_HANDLER, + (String[]) null)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to list databases"); } catch (InterruptedException e) { @@ -171,7 +256,7 @@ public List listDatabases() { @Override public List listSchemas(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW SCHEMAS"); - Object[] queryParams = null; + String[] queryParams = null; switch (scope.type()) { case ROOT: // account-level listing @@ -180,7 +265,7 @@ public List listSchemas(SnowflakeIdentifier scope) { case DATABASE: // database-level listing baseQuery.append(" IN DATABASE IDENTIFIER(?)"); - queryParams = new Object[] {scope.toIdentifierString()}; + queryParams = new String[] {scope.toIdentifierString()}; break; default: throw new IllegalArgumentException( @@ -188,17 +273,14 @@ public List listSchemas(SnowflakeIdentifier scope) { } final String finalQuery = baseQuery.toString(); - final Object[] finalQueryParams = queryParams; + final String[] finalQueryParams = queryParams; List schemas; try { schemas = connectionPool.run( conn -> - queryRunner.query( - conn, - finalQuery, - SnowflakeIdentifier.SCHEMA_RESULT_SET_HANDLER, - finalQueryParams)); + queryHarness.query( + conn, finalQuery, SCHEMA_RESULT_SET_HANDLER, finalQueryParams)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to list schemas for scope %s", scope); } catch (InterruptedException e) { @@ -210,7 +292,7 @@ public List listSchemas(SnowflakeIdentifier scope) { @Override public List listIcebergTables(SnowflakeIdentifier scope) { StringBuilder baseQuery = new StringBuilder("SHOW ICEBERG TABLES"); - Object[] queryParams = null; + String[] queryParams = null; switch (scope.type()) { case ROOT: // account-level listing @@ -219,12 +301,12 @@ public List listIcebergTables(SnowflakeIdentifier scope) { case DATABASE: // database-level listing baseQuery.append(" IN DATABASE IDENTIFIER(?)"); - queryParams = new Object[] {scope.toIdentifierString()}; + queryParams = new String[] {scope.toIdentifierString()}; break; case SCHEMA: // schema-level listing baseQuery.append(" IN SCHEMA IDENTIFIER(?)"); - queryParams = new Object[] {scope.toIdentifierString()}; + queryParams = new String[] {scope.toIdentifierString()}; break; default: throw new IllegalArgumentException( @@ -232,17 +314,13 @@ public List listIcebergTables(SnowflakeIdentifier scope) { } final String finalQuery = baseQuery.toString(); - final Object[] finalQueryParams = queryParams; + final String[] finalQueryParams = queryParams; List tables; try { tables = connectionPool.run( conn -> - queryRunner.query( - conn, - finalQuery, - SnowflakeIdentifier.TABLE_RESULT_SET_HANDLER, - finalQueryParams)); + queryHarness.query(conn, finalQuery, TABLE_RESULT_SET_HANDLER, finalQueryParams)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to list tables for scope %s", scope.toString()); } catch (InterruptedException e) { @@ -263,10 +341,10 @@ public SnowflakeTableMetadata loadTableMetadata(SnowflakeIdentifier tableIdentif tableMeta = connectionPool.run( conn -> - queryRunner.query( + queryHarness.query( conn, finalQuery, - SnowflakeTableMetadata.createHandler(), + TABLE_METADATA_RESULT_SET_HANDLER, tableIdentifier.toIdentifierString())); } catch (SQLException e) { throw new UncheckedSQLException( diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java index f989d002ac90..f06926ce44ae 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java @@ -18,11 +18,8 @@ */ package org.apache.iceberg.snowflake; -import java.util.List; -import org.apache.commons.dbutils.ResultSetHandler; import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; /** * Since the SnowflakeCatalog supports exactly two levels of Iceberg Namespaces, corresponding @@ -40,51 +37,6 @@ public enum Type { TABLE } - /** - * Expects to handle ResultSets representing fully-qualified Snowflake Database identifiers, - * containing "name" (representing databaseName). - */ - public static final ResultSetHandler> DATABASE_RESULT_SET_HANDLER = - rs -> { - List databases = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("name"); - databases.add(SnowflakeIdentifier.ofDatabase(databaseName)); - } - return databases; - }; - - /** - * Expects to handle ResultSets representing fully-qualified Snowflake Schema identifiers, - * containing "database_name" and "name" (representing schemaName). - */ - public static final ResultSetHandler> SCHEMA_RESULT_SET_HANDLER = - rs -> { - List schemas = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("database_name"); - String schemaName = rs.getString("name"); - schemas.add(SnowflakeIdentifier.ofSchema(databaseName, schemaName)); - } - return schemas; - }; - - /** - * Expects to handle ResultSets representing fully-qualified Snowflake Table identifiers, - * containing "database_name", "schema_name", and "name" (representing tableName). - */ - public static final ResultSetHandler> TABLE_RESULT_SET_HANDLER = - rs -> { - List tables = Lists.newArrayList(); - while (rs.next()) { - String databaseName = rs.getString("database_name"); - String schemaName = rs.getString("schema_name"); - String tableName = rs.getString("name"); - tables.add(SnowflakeIdentifier.ofTable(databaseName, schemaName, tableName)); - } - return tables; - }; - private String databaseName; private String schemaName; private String tableName; diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java index e46f0c2457f4..7d11cccc02d1 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.dbutils.ResultSetHandler; import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -144,15 +143,4 @@ public static SnowflakeTableMetadata parseJson(String json) { return new SnowflakeTableMetadata( snowflakeMetadataLocation, icebergMetadataLocation, status, json); } - - public static ResultSetHandler createHandler() { - return rs -> { - if (!rs.next()) { - return null; - } - - String rawJsonVal = rs.getString("METADATA"); - return SnowflakeTableMetadata.parseJson(rawJsonVal); - }; - } } diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index fbfc075979e6..849460688de4 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -28,8 +28,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; -import org.apache.commons.dbutils.QueryRunner; -import org.apache.commons.dbutils.ResultSetHandler; import org.apache.iceberg.ClientPool; import org.apache.iceberg.jdbc.JdbcClientPool; import org.apache.iceberg.jdbc.UncheckedInterruptedException; @@ -48,7 +46,7 @@ public class JdbcSnowflakeClientTest { @Mock private Connection mockConnection; @Mock private JdbcClientPool mockClientPool; - @Mock private QueryRunner mockQueryRunner; + @Mock private JdbcSnowflakeClient.QueryHarness mockQueryHarness; @Mock private ResultSet mockResultSet; private JdbcSnowflakeClient snowflakeClient; @@ -56,7 +54,7 @@ public class JdbcSnowflakeClientTest { @Before public void before() throws SQLException, InterruptedException { snowflakeClient = new JdbcSnowflakeClient(mockClientPool); - snowflakeClient.setQueryRunner(mockQueryRunner); + snowflakeClient.setQueryHarness(mockQueryHarness); doAnswer( new Answer() { @@ -71,15 +69,16 @@ public Object answer(InvocationOnMock invocation) throws Throwable { new Answer() { @Override public Object answer(InvocationOnMock invocation) throws Throwable { - return ((ResultSetHandler) invocation.getArguments()[2]).handle(mockResultSet); + return ((JdbcSnowflakeClient.ResultSetParser) invocation.getArguments()[2]) + .parse(mockResultSet); } }) - .when(mockQueryRunner) + .when(mockQueryHarness) .query( any(Connection.class), any(String.class), - any(ResultSetHandler.class), - ArgumentMatchers.any()); + any(JdbcSnowflakeClient.ResultSetParser.class), + ArgumentMatchers.any()); } @Test @@ -97,12 +96,12 @@ public void testDatabaseExists() throws SQLException { Assertions.assertThat(snowflakeClient.databaseExists(SnowflakeIdentifier.ofDatabase("DB_1"))) .isTrue(); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); } @Test @@ -114,12 +113,12 @@ public void testDatabaseExistsSpecialCharacters() throws SQLException { snowflakeClient.databaseExists(SnowflakeIdentifier.ofDatabase("$DB_1$.'!@#%^&*"))) .isTrue(); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW DATABASES LIKE '_DB_1$_________' IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); } @Test @@ -153,17 +152,17 @@ public void testSchemaExists() throws SQLException { snowflakeClient.schemaExists(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1"))) .isTrue(); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); - verify(mockQueryRunner) + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW SCHEMAS LIKE 'SCHEMA_1' IN DATABASE IDENTIFIER(?)"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1")); } @@ -182,17 +181,17 @@ public void testSchemaExistsSpecialCharacters() throws SQLException { SnowflakeIdentifier.ofSchema("DB_1", "$SCHEMA_1$.'!@#%^&*"))) .isTrue(); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); - verify(mockQueryRunner) + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW SCHEMAS LIKE '_SCHEMA_1$_________' IN DATABASE IDENTIFIER(?)"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1")); } @@ -238,12 +237,12 @@ public void testListDatabasesInAccount() throws SQLException { List actualList = snowflakeClient.listDatabases(); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW DATABASES IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); Assertions.assertThat(actualList) .containsExactly( @@ -271,12 +270,12 @@ public void testListSchemasInAccount() throws SQLException { List actualList = snowflakeClient.listSchemas(SnowflakeIdentifier.ofRoot()); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW SCHEMAS IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); Assertions.assertThat(actualList) .containsExactly( @@ -298,11 +297,11 @@ public void testListSchemasInDatabase() throws SQLException { List actualList = snowflakeClient.listSchemas(SnowflakeIdentifier.ofDatabase("DB_1")); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW SCHEMAS IN DATABASE IDENTIFIER(?)"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1")); Assertions.assertThat(actualList) @@ -368,12 +367,12 @@ public void testListIcebergTablesInAccount() throws SQLException { List actualList = snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofRoot()); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW ICEBERG TABLES IN ACCOUNT"), - any(ResultSetHandler.class), - eq((Object[]) null)); + any(JdbcSnowflakeClient.ResultSetParser.class), + eq((String[]) null)); Assertions.assertThat(actualList) .containsExactly( @@ -406,11 +405,11 @@ public void testListIcebergTablesInDatabase() throws SQLException { List actualList = snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofDatabase("DB_1")); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW ICEBERG TABLES IN DATABASE IDENTIFIER(?)"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1")); Assertions.assertThat(actualList) @@ -434,11 +433,11 @@ public void testListIcebergTablesInSchema() throws SQLException { List actualList = snowflakeClient.listIcebergTables(SnowflakeIdentifier.ofSchema("DB_1", "SCHEMA_1")); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SHOW ICEBERG TABLES IN SCHEMA IDENTIFIER(?)"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1.SCHEMA_1")); Assertions.assertThat(actualList) @@ -489,11 +488,11 @@ public void testGetS3TableMetadata() throws SQLException { snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1.SCHEMA_1.TABLE_1")); SnowflakeTableMetadata expectedMetadata = @@ -520,11 +519,11 @@ public void testGetAzureTableMetadata() throws SQLException { snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1.SCHEMA_1.TABLE_1")); SnowflakeTableMetadata expectedMetadata = @@ -551,11 +550,11 @@ public void testGetGcsTableMetadata() throws SQLException { snowflakeClient.loadTableMetadata( SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TABLE_1")); - verify(mockQueryRunner) + verify(mockQueryHarness) .query( eq(mockConnection), eq("SELECT SYSTEM$GET_ICEBERG_TABLE_INFORMATION(?) AS METADATA"), - any(ResultSetHandler.class), + any(JdbcSnowflakeClient.ResultSetParser.class), eq("DB_1.SCHEMA_1.TABLE_1")); SnowflakeTableMetadata expectedMetadata = diff --git a/versions.props b/versions.props index df11d937fbcc..356dfff264d7 100644 --- a/versions.props +++ b/versions.props @@ -28,7 +28,6 @@ org.scala-lang.modules:scala-collection-compat_2.12 = 2.6.0 org.scala-lang.modules:scala-collection-compat_2.13 = 2.6.0 com.emc.ecs:object-client-bundle = 3.3.2 org.immutables:value = 2.9.2 -commons-dbutils:commons-dbutils = 1.7 net.snowflake:snowflake-jdbc = 3.13.22 # test deps From cc493d0e1351261307c153a81c660e821e138ad9 Mon Sep 17 00:00:00 2001 From: Dennis Huo <7410123+dennishuo@users.noreply.github.com> Date: Fri, 23 Dec 2022 09:56:31 -0800 Subject: [PATCH 14/20] Update snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Eduard Tudenhöfner --- .../org/apache/iceberg/snowflake/SnowflakeTableOperations.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java index dcb361580acc..1fe90d7eff42 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableOperations.java @@ -47,7 +47,7 @@ protected SnowflakeTableOperations( this.fileIO = fileIO; this.tableIdentifier = tableIdentifier; this.snowflakeIdentifierForTable = NamespaceHelpers.toSnowflakeIdentifier(tableIdentifier); - this.fullTableName = String.format("%s.%s", catalogName, tableIdentifier.toString()); + this.fullTableName = String.format("%s.%s", catalogName, tableIdentifier); } @Override From ce7e28cd197f4de957957052703d2780dc7f2a5f Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 23 Dec 2022 12:00:27 -0800 Subject: [PATCH 15/20] Refactor style suggestions; remove debug-level logging, arguments in exceptions, private members if not accessed outside, move precondition checks, add test for NamespaceHelpers. --- .../snowflake/JdbcSnowflakeClient.java | 48 +++++-- .../iceberg/snowflake/NamespaceHelpers.java | 48 ++----- .../iceberg/snowflake/SnowflakeCatalog.java | 26 ++-- .../snowflake/FakeSnowflakeClient.java | 20 +-- .../snowflake/JdbcSnowflakeClientTest.java | 34 ++--- .../snowflake/NamespaceHelpersTest.java | 119 ++++++++++++++++++ .../snowflake/SnowflakeCatalogTest.java | 26 ++-- 7 files changed, 206 insertions(+), 115 deletions(-) create mode 100644 snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index 8ca32c449949..c5cf8cecae17 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -35,7 +35,7 @@ * Snowflake's Iceberg-aware resource model. */ class JdbcSnowflakeClient implements SnowflakeClient { - public static final String EXPECTED_JDBC_IMPL = "net.snowflake.client.jdbc.SnowflakeDriver"; + static final String EXPECTED_JDBC_IMPL = "net.snowflake.client.jdbc.SnowflakeDriver"; @FunctionalInterface interface ResultSetParser { @@ -182,9 +182,10 @@ public boolean databaseExists(SnowflakeIdentifier database) { queryHarness.query( conn, finalQuery, DATABASE_RESULT_SET_HANDLER, (String[]) null)); } catch (SQLException e) { - throw new UncheckedSQLException(e, "Failed to check if database exists"); + throw new UncheckedSQLException(e, "Failed to check if database '%s' exists", database); } catch (InterruptedException e) { - throw new UncheckedInterruptedException(e, "Interrupted while checking if database exists"); + throw new UncheckedInterruptedException( + e, "Interrupted while checking if database '%s' exists", database); } // Filter to handle the edge case of '_' appearing as a wildcard that can't be remapped the way @@ -222,9 +223,10 @@ public boolean schemaExists(SnowflakeIdentifier schema) { SCHEMA_RESULT_SET_HANDLER, new String[] {schema.databaseName()})); } catch (SQLException e) { - throw new UncheckedSQLException(e, "Failed to check if schema exists"); + throw new UncheckedSQLException(e, "Failed to check if schema '%s' exists", schema); } catch (InterruptedException e) { - throw new UncheckedInterruptedException(e, "Interrupted while checking if schema exists"); + throw new UncheckedInterruptedException( + e, "Interrupted while checking if schema '%s' exists", schema); } // Filter to handle the edge case of '_' appearing as a wildcard that can't be remapped the way @@ -250,6 +252,12 @@ public List listDatabases() { } catch (InterruptedException e) { throw new UncheckedInterruptedException(e, "Interrupted while listing databases"); } + databases.forEach( + db -> + Preconditions.checkState( + db.type() == SnowflakeIdentifier.Type.DATABASE, + "Expected DATABASE, got identifier '%s'", + db)); return databases; } @@ -282,10 +290,18 @@ public List listSchemas(SnowflakeIdentifier scope) { queryHarness.query( conn, finalQuery, SCHEMA_RESULT_SET_HANDLER, finalQueryParams)); } catch (SQLException e) { - throw new UncheckedSQLException(e, "Failed to list schemas for scope %s", scope); + throw new UncheckedSQLException(e, "Failed to list schemas for scope '%s'", scope); } catch (InterruptedException e) { - throw new UncheckedInterruptedException(e, "Interrupted while listing schemas"); + throw new UncheckedInterruptedException( + e, "Interrupted while listing schemas for scope '%s'", scope); } + schemas.forEach( + schema -> + Preconditions.checkState( + schema.type() == SnowflakeIdentifier.Type.SCHEMA, + "Expected SCHEMA, got identifier '%s' for scope '%s'", + schema, + scope)); return schemas; } @@ -322,10 +338,18 @@ public List listIcebergTables(SnowflakeIdentifier scope) { conn -> queryHarness.query(conn, finalQuery, TABLE_RESULT_SET_HANDLER, finalQueryParams)); } catch (SQLException e) { - throw new UncheckedSQLException(e, "Failed to list tables for scope %s", scope.toString()); + throw new UncheckedSQLException(e, "Failed to list tables for scope '%s'", scope); } catch (InterruptedException e) { - throw new UncheckedInterruptedException(e, "Interrupted while listing tables"); + throw new UncheckedInterruptedException( + e, "Interrupted while listing tables for scope '%s'", scope); } + tables.forEach( + table -> + Preconditions.checkState( + table.type() == SnowflakeIdentifier.Type.TABLE, + "Expected TABLE, got identifier '%s' for scope '%s'", + table, + scope)); return tables; } @@ -347,10 +371,10 @@ public SnowflakeTableMetadata loadTableMetadata(SnowflakeIdentifier tableIdentif TABLE_METADATA_RESULT_SET_HANDLER, tableIdentifier.toIdentifierString())); } catch (SQLException e) { - throw new UncheckedSQLException( - e, "Failed to get table metadata for %s", tableIdentifier.toString()); + throw new UncheckedSQLException(e, "Failed to get table metadata for '%s'", tableIdentifier); } catch (InterruptedException e) { - throw new UncheckedInterruptedException(e, "Interrupted while getting table metadata"); + throw new UncheckedInterruptedException( + e, "Interrupted while getting table metadata for '%s'", tableIdentifier); } return tableMeta; } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java index f3067bb280c0..28dacbca9817 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/NamespaceHelpers.java @@ -21,8 +21,6 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; class NamespaceHelpers { private static final int MAX_NAMESPACE_DEPTH = 2; @@ -30,8 +28,6 @@ class NamespaceHelpers { private static final int NAMESPACE_DB_LEVEL = 1; private static final int NAMESPACE_SCHEMA_LEVEL = 2; - private static final Logger LOG = LoggerFactory.getLogger(NamespaceHelpers.class); - private NamespaceHelpers() {} /** @@ -40,28 +36,20 @@ private NamespaceHelpers() {} * @throws IllegalArgumentException if the namespace is not a supported depth. */ public static SnowflakeIdentifier toSnowflakeIdentifier(Namespace namespace) { - SnowflakeIdentifier identifier = null; switch (namespace.length()) { case NAMESPACE_ROOT_LEVEL: - identifier = SnowflakeIdentifier.ofRoot(); - break; + return SnowflakeIdentifier.ofRoot(); case NAMESPACE_DB_LEVEL: - identifier = SnowflakeIdentifier.ofDatabase(namespace.level(NAMESPACE_DB_LEVEL - 1)); - break; + return SnowflakeIdentifier.ofDatabase(namespace.level(NAMESPACE_DB_LEVEL - 1)); case NAMESPACE_SCHEMA_LEVEL: - identifier = - SnowflakeIdentifier.ofSchema( - namespace.level(NAMESPACE_DB_LEVEL - 1), - namespace.level(NAMESPACE_SCHEMA_LEVEL - 1)); - break; + return SnowflakeIdentifier.ofSchema( + namespace.level(NAMESPACE_DB_LEVEL - 1), namespace.level(NAMESPACE_SCHEMA_LEVEL - 1)); default: throw new IllegalArgumentException( String.format( "Snowflake max namespace level is %d, got namespace '%s'", MAX_NAMESPACE_DEPTH, namespace)); } - LOG.debug("toSnowflakeIdentifier({}) -> {}", namespace, identifier); - return identifier; } /** @@ -75,11 +63,8 @@ public static SnowflakeIdentifier toSnowflakeIdentifier(TableIdentifier identifi "Namespace portion of '%s' must be at the SCHEMA level, got namespaceScope '%s'", identifier, namespaceScope); - SnowflakeIdentifier ret = - SnowflakeIdentifier.ofTable( - namespaceScope.databaseName(), namespaceScope.schemaName(), identifier.name()); - LOG.debug("toSnowflakeIdentifier({}) -> {}", identifier, ret); - return ret; + return SnowflakeIdentifier.ofTable( + namespaceScope.databaseName(), namespaceScope.schemaName(), identifier.name()); } /** @@ -87,23 +72,17 @@ public static SnowflakeIdentifier toSnowflakeIdentifier(TableIdentifier identifi * Namespace; throws IllegalArgumentException if not an appropriate type. */ public static Namespace toIcebergNamespace(SnowflakeIdentifier identifier) { - Namespace namespace = null; switch (identifier.type()) { case ROOT: - namespace = Namespace.of(); - break; + return Namespace.empty(); case DATABASE: - namespace = Namespace.of(identifier.databaseName()); - break; + return Namespace.of(identifier.databaseName()); case SCHEMA: - namespace = Namespace.of(identifier.databaseName(), identifier.schemaName()); - break; + return Namespace.of(identifier.databaseName(), identifier.schemaName()); default: throw new IllegalArgumentException( String.format("Cannot convert identifier '%s' to Namespace", identifier)); } - LOG.debug("toIcebergNamespace({}) -> {}", identifier, namespace); - return namespace; } /** @@ -113,12 +92,9 @@ public static Namespace toIcebergNamespace(SnowflakeIdentifier identifier) { public static TableIdentifier toIcebergTableIdentifier(SnowflakeIdentifier identifier) { Preconditions.checkArgument( identifier.type() == SnowflakeIdentifier.Type.TABLE, - "SnowflakeIdentifier must be type TABLE, get '%s'", + "SnowflakeIdentifier must be type TABLE, got '%s'", identifier); - TableIdentifier ret = - TableIdentifier.of( - identifier.databaseName(), identifier.schemaName(), identifier.tableName()); - LOG.debug("toIcebergTableIdentifier({}) -> {}", identifier, ret); - return ret; + return TableIdentifier.of( + identifier.databaseName(), identifier.schemaName(), identifier.tableName()); } } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 0667625f0c9e..2fc5473f706e 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -43,8 +43,8 @@ public class SnowflakeCatalog extends BaseMetastoreCatalog implements Closeable, SupportsNamespaces, Configurable { - public static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; - public static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; + private static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; + private static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; static class FileIOFactory { public FileIO newFileIO(String impl, Map properties, Object hadoopConf) { @@ -94,7 +94,7 @@ public void renameTable(TableIdentifier from, TableIdentifier to) { @Override public void initialize(String name, Map properties) { String uri = properties.get(CatalogProperties.URI); - Preconditions.checkNotNull(uri, "JDBC connection URI is required"); + Preconditions.checkArgument(null != uri, "JDBC connection URI is required"); try { // We'll ensure the expected JDBC driver implementation class is initialized through // reflection regardless of which classloader ends up using this JdbcSnowflakeClient, but @@ -171,18 +171,7 @@ public List listNamespaces(Namespace namespace) { } List namespaceList = - results.stream() - .map( - result -> { - Preconditions.checkState( - result.type() == SnowflakeIdentifier.Type.SCHEMA - || result.type() == SnowflakeIdentifier.Type.DATABASE, - "Got identifier of type %s from listNamespaces for %s", - result.type(), - namespace); - return NamespaceHelpers.toIcebergNamespace(result); - }) - .collect(Collectors.toList()); + results.stream().map(NamespaceHelpers::toIcebergNamespace).collect(Collectors.toList()); return namespaceList; } @@ -236,6 +225,13 @@ protected TableOperations newTableOps(TableIdentifier tableIdentifier) { if (catalogProperties.containsKey(CatalogProperties.FILE_IO_IMPL)) { fileIOImpl = catalogProperties.get(CatalogProperties.FILE_IO_IMPL); } + + // Initialize a fresh FileIO for each TableOperations created, because some FileIO + // implementations such as S3FileIO can become bound to a single S3 bucket. Additionally, + // FileIO implementations often support only a finite set of one or more URI schemes (i.e. + // S3FileIO only supports s3/s3a/s3n, and even ResolvingFileIO only supports the combination + // of schemes registered for S3FileIO and HadoopFileIO). Individual catalogs may need to + // support tables across different cloud/storage providers with disjoint FileIO implementations. FileIO fileIO = fileIOFactory.newFileIO(fileIOImpl, catalogProperties, conf); closeableGroup.addCloseable(fileIO); return new SnowflakeTableOperations(snowflakeClient, fileIO, catalogName, tableIdentifier); diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java index f4732fb6ca76..834dc3c6c4db 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/FakeSnowflakeClient.java @@ -27,7 +27,7 @@ public class FakeSnowflakeClient implements SnowflakeClient { // In-memory lookup by database/schema/tableName to table metadata. - private Map>> databases = + private final Map>> databases = Maps.newTreeMap(); private boolean closed = false; @@ -37,18 +37,18 @@ public FakeSnowflakeClient() {} * Also adds parent database/schema if they don't already exist. If the tableName already exists * under the given database/schema, the value is replaced with the provided metadata. */ - public void addTable( - String database, String schema, String tableName, SnowflakeTableMetadata metadata) { + public void addTable(SnowflakeIdentifier tableIdentifier, SnowflakeTableMetadata metadata) { Preconditions.checkState(!closed, "Cannot call addTable after calling close()"); - if (!databases.containsKey(database)) { - databases.put(database, Maps.newTreeMap()); + if (!databases.containsKey(tableIdentifier.databaseName())) { + databases.put(tableIdentifier.databaseName(), Maps.newTreeMap()); } - Map> schemas = databases.get(database); - if (!schemas.containsKey(schema)) { - schemas.put(schema, Maps.newTreeMap()); + Map> schemas = + databases.get(tableIdentifier.databaseName()); + if (!schemas.containsKey(tableIdentifier.schemaName())) { + schemas.put(tableIdentifier.schemaName(), Maps.newTreeMap()); } - Map tables = schemas.get(schema); - tables.put(tableName, metadata); + Map tables = schemas.get(tableIdentifier.schemaName()); + tables.put(tableIdentifier.tableName(), metadata); } @Override diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index 849460688de4..af7a3631499f 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -38,9 +38,7 @@ import org.junit.runner.RunWith; import org.mockito.ArgumentMatchers; import org.mockito.Mock; -import org.mockito.invocation.InvocationOnMock; import org.mockito.junit.MockitoJUnitRunner; -import org.mockito.stubbing.Answer; @RunWith(MockitoJUnitRunner.class) public class JdbcSnowflakeClientTest { @@ -56,23 +54,13 @@ public void before() throws SQLException, InterruptedException { snowflakeClient = new JdbcSnowflakeClient(mockClientPool); snowflakeClient.setQueryHarness(mockQueryHarness); - doAnswer( - new Answer() { - @Override - public Object answer(InvocationOnMock invocation) throws Throwable { - return ((ClientPool.Action) invocation.getArguments()[0]).run(mockConnection); - } - }) + doAnswer(invocation -> ((ClientPool.Action) invocation.getArguments()[0]).run(mockConnection)) .when(mockClientPool) .run(any(ClientPool.Action.class)); doAnswer( - new Answer() { - @Override - public Object answer(InvocationOnMock invocation) throws Throwable { - return ((JdbcSnowflakeClient.ResultSetParser) invocation.getArguments()[2]) - .parse(mockResultSet); - } - }) + invocation -> + ((JdbcSnowflakeClient.ResultSetParser) invocation.getArguments()[2]) + .parse(mockResultSet)) .when(mockQueryHarness) .query( any(Connection.class), @@ -101,7 +89,7 @@ public void testDatabaseExists() throws SQLException { eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); } @Test @@ -118,7 +106,7 @@ public void testDatabaseExistsSpecialCharacters() throws SQLException { eq(mockConnection), eq("SHOW DATABASES LIKE '_DB_1$_________' IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); } @Test @@ -157,7 +145,7 @@ public void testSchemaExists() throws SQLException { eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); verify(mockQueryHarness) .query( eq(mockConnection), @@ -186,7 +174,7 @@ public void testSchemaExistsSpecialCharacters() throws SQLException { eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); verify(mockQueryHarness) .query( eq(mockConnection), @@ -242,7 +230,7 @@ public void testListDatabasesInAccount() throws SQLException { eq(mockConnection), eq("SHOW DATABASES IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); Assertions.assertThat(actualList) .containsExactly( @@ -275,7 +263,7 @@ public void testListSchemasInAccount() throws SQLException { eq(mockConnection), eq("SHOW SCHEMAS IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); Assertions.assertThat(actualList) .containsExactly( @@ -372,7 +360,7 @@ public void testListIcebergTablesInAccount() throws SQLException { eq(mockConnection), eq("SHOW ICEBERG TABLES IN ACCOUNT"), any(JdbcSnowflakeClient.ResultSetParser.class), - eq((String[]) null)); + eq(null)); Assertions.assertThat(actualList) .containsExactly( diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java new file mode 100644 index 000000000000..fdddb73f8198 --- /dev/null +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.snowflake; + +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.Test; + +public class NamespaceHelpersTest { + @Test + public void testToSnowflakeIdentifierRoot() { + Assertions.assertThat(NamespaceHelpers.toSnowflakeIdentifier(Namespace.empty())) + .isEqualTo(SnowflakeIdentifier.ofRoot()); + } + + @Test + public void testToSnowflakeIdentifierDatabase() { + Assertions.assertThat(NamespaceHelpers.toSnowflakeIdentifier(Namespace.of("DB1"))) + .isEqualTo(SnowflakeIdentifier.ofDatabase("DB1")); + } + + @Test + public void testToSnowflakeIdentifierSchema() { + Assertions.assertThat(NamespaceHelpers.toSnowflakeIdentifier(Namespace.of("DB1", "SCHEMA1"))) + .isEqualTo(SnowflakeIdentifier.ofSchema("DB1", "SCHEMA1")); + } + + @Test + public void testToSnowflakeIdentifierMaxNamespaceLevel() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> + NamespaceHelpers.toSnowflakeIdentifier( + Namespace.of("DB1", "SCHEMA1", "THIRD_NS_LVL"))) + .withMessageContaining("max namespace level"); + } + + @Test + public void testToSnowflakeIdentifierTable() { + Assertions.assertThat( + NamespaceHelpers.toSnowflakeIdentifier(TableIdentifier.of("DB1", "SCHEMA1", "TABLE1"))) + .isEqualTo(SnowflakeIdentifier.ofTable("DB1", "SCHEMA1", "TABLE1")); + } + + @Test + public void testToSnowflakeIdentifierTableBadNamespace() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> + NamespaceHelpers.toSnowflakeIdentifier( + TableIdentifier.of(Namespace.of("DB1_WITHOUT_SCHEMA"), "TABLE1"))) + .withMessageContaining("must be at the SCHEMA level"); + } + + @Test + public void testToIcebergNamespaceRoot() { + Assertions.assertThat(NamespaceHelpers.toIcebergNamespace(SnowflakeIdentifier.ofRoot())) + .isEqualTo(Namespace.empty()); + } + + @Test + public void testToIcebergNamespaceDatabase() { + Assertions.assertThat( + NamespaceHelpers.toIcebergNamespace(SnowflakeIdentifier.ofDatabase("DB1"))) + .isEqualTo(Namespace.of("DB1")); + } + + @Test + public void testToIcebergNamespaceSchema() { + Assertions.assertThat( + NamespaceHelpers.toIcebergNamespace(SnowflakeIdentifier.ofSchema("DB1", "SCHEMA1"))) + .isEqualTo(Namespace.of("DB1", "SCHEMA1")); + } + + @Test + public void testToIcebergNamespaceTableFails() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> + NamespaceHelpers.toIcebergNamespace( + SnowflakeIdentifier.ofTable("DB1", "SCHEMA1", "TABLE1"))) + .withMessageContaining("Cannot convert identifier"); + } + + @Test + public void testToIcebergTableIdentifier() { + Assertions.assertThat( + NamespaceHelpers.toIcebergTableIdentifier( + SnowflakeIdentifier.ofTable("DB1", "SCHEMA1", "TABLE1"))) + .isEqualTo(TableIdentifier.of("DB1", "SCHEMA1", "TABLE1")); + } + + @Test + public void testToIcebergTableIdentifierWrongType() { + Assertions.assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy( + () -> + NamespaceHelpers.toIcebergTableIdentifier( + SnowflakeIdentifier.ofSchema("DB1", "SCHEMA1"))) + .withMessageContaining("must be type TABLE"); + } +} diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java index e08b71dba046..9f66f352e8c1 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/SnowflakeCatalogTest.java @@ -38,7 +38,7 @@ public class SnowflakeCatalogTest { - static final String TEST_CATALOG_NAME = "slushLog"; + private static final String TEST_CATALOG_NAME = "slushLog"; private SnowflakeCatalog catalog; private FakeSnowflakeClient fakeClient; private InMemoryFileIO fakeFileIO; @@ -51,39 +51,27 @@ public void before() { fakeClient = new FakeSnowflakeClient(); fakeClient.addTable( - "DB_1", - "SCHEMA_1", - "TAB_1", + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TAB_1"), SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"s3://tab1/metadata/v3.metadata.json\",\"status\":\"success\"}")); fakeClient.addTable( - "DB_1", - "SCHEMA_1", - "TAB_2", + SnowflakeIdentifier.ofTable("DB_1", "SCHEMA_1", "TAB_2"), SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"s3://tab2/metadata/v1.metadata.json\",\"status\":\"success\"}")); fakeClient.addTable( - "DB_2", - "SCHEMA_2", - "TAB_3", + SnowflakeIdentifier.ofTable("DB_2", "SCHEMA_2", "TAB_3"), SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab3/metadata/v334.metadata.json\",\"status\":\"success\"}")); fakeClient.addTable( - "DB_2", - "SCHEMA_2", - "TAB_4", + SnowflakeIdentifier.ofTable("DB_2", "SCHEMA_2", "TAB_4"), SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"azure://myaccount.blob.core.windows.net/mycontainer/tab4/metadata/v323.metadata.json\",\"status\":\"success\"}")); fakeClient.addTable( - "DB_3", - "SCHEMA_3", - "TAB_5", + SnowflakeIdentifier.ofTable("DB_3", "SCHEMA_3", "TAB_5"), SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"gcs://tab5/metadata/v793.metadata.json\",\"status\":\"success\"}")); fakeClient.addTable( - "DB_3", - "SCHEMA_4", - "TAB_6", + SnowflakeIdentifier.ofTable("DB_3", "SCHEMA_4", "TAB_6"), SnowflakeTableMetadata.parseJson( "{\"metadataLocation\":\"gcs://tab6/metadata/v123.metadata.json\",\"status\":\"success\"}")); From 2729e64d0138b8e5b0ade1c0f16d39fa0ea50133 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 6 Jan 2023 14:36:13 -0800 Subject: [PATCH 16/20] Fix precondition messages, remove getConf() --- core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java | 4 ++-- .../java/org/apache/iceberg/snowflake/SnowflakeCatalog.java | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java b/core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java index 3dde2a8ce553..3ea2c3198fe7 100644 --- a/core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java +++ b/core/src/test/java/org/apache/iceberg/io/InMemoryFileIO.java @@ -48,13 +48,13 @@ public InputFile newInputFile(String path) { @Override public OutputFile newOutputFile(String path) { - Preconditions.checkState(!closed, "Cannot call newInputFile after calling close()"); + Preconditions.checkState(!closed, "Cannot call newOutputFile after calling close()"); return new InMemoryOutputFile(path, this); } @Override public void deleteFile(String path) { - Preconditions.checkState(!closed, "Cannot call newInputFile after calling close()"); + Preconditions.checkState(!closed, "Cannot call deleteFile after calling close()"); if (!inMemoryFiles.containsKey(path)) { throw new NotFoundException("No in-memory file found for path: %s", path); } diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 2fc5473f706e..01cb93f3fac8 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -247,8 +247,4 @@ protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { public void setConf(Object conf) { this.conf = conf; } - - public Object getConf() { - return conf; - } } From 4c1e79f0acf5b65986e0cccdccda2f09d6be27ce Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 6 Jan 2023 16:12:54 -0800 Subject: [PATCH 17/20] Clean up varargs. --- .../iceberg/snowflake/JdbcSnowflakeClient.java | 14 +++----------- .../snowflake/JdbcSnowflakeClientTest.java | 15 +++++---------- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java index c5cf8cecae17..1618f76c10e5 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/JdbcSnowflakeClient.java @@ -178,9 +178,7 @@ public boolean databaseExists(SnowflakeIdentifier database) { try { databases = connectionPool.run( - conn -> - queryHarness.query( - conn, finalQuery, DATABASE_RESULT_SET_HANDLER, (String[]) null)); + conn -> queryHarness.query(conn, finalQuery, DATABASE_RESULT_SET_HANDLER)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to check if database '%s' exists", database); } catch (InterruptedException e) { @@ -218,10 +216,7 @@ public boolean schemaExists(SnowflakeIdentifier schema) { connectionPool.run( conn -> queryHarness.query( - conn, - finalQuery, - SCHEMA_RESULT_SET_HANDLER, - new String[] {schema.databaseName()})); + conn, finalQuery, SCHEMA_RESULT_SET_HANDLER, schema.databaseName())); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to check if schema '%s' exists", schema); } catch (InterruptedException e) { @@ -243,10 +238,7 @@ public List listDatabases() { connectionPool.run( conn -> queryHarness.query( - conn, - "SHOW DATABASES IN ACCOUNT", - DATABASE_RESULT_SET_HANDLER, - (String[]) null)); + conn, "SHOW DATABASES IN ACCOUNT", DATABASE_RESULT_SET_HANDLER)); } catch (SQLException e) { throw new UncheckedSQLException(e, "Failed to list databases"); } catch (InterruptedException e) { diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java index af7a3631499f..1374ad8ac283 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/JdbcSnowflakeClientTest.java @@ -88,8 +88,7 @@ public void testDatabaseExists() throws SQLException { .query( eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), - any(JdbcSnowflakeClient.ResultSetParser.class), - eq(null)); + any(JdbcSnowflakeClient.ResultSetParser.class)); } @Test @@ -105,8 +104,7 @@ public void testDatabaseExistsSpecialCharacters() throws SQLException { .query( eq(mockConnection), eq("SHOW DATABASES LIKE '_DB_1$_________' IN ACCOUNT"), - any(JdbcSnowflakeClient.ResultSetParser.class), - eq(null)); + any(JdbcSnowflakeClient.ResultSetParser.class)); } @Test @@ -144,8 +142,7 @@ public void testSchemaExists() throws SQLException { .query( eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), - any(JdbcSnowflakeClient.ResultSetParser.class), - eq(null)); + any(JdbcSnowflakeClient.ResultSetParser.class)); verify(mockQueryHarness) .query( eq(mockConnection), @@ -173,8 +170,7 @@ public void testSchemaExistsSpecialCharacters() throws SQLException { .query( eq(mockConnection), eq("SHOW DATABASES LIKE 'DB_1' IN ACCOUNT"), - any(JdbcSnowflakeClient.ResultSetParser.class), - eq(null)); + any(JdbcSnowflakeClient.ResultSetParser.class)); verify(mockQueryHarness) .query( eq(mockConnection), @@ -229,8 +225,7 @@ public void testListDatabasesInAccount() throws SQLException { .query( eq(mockConnection), eq("SHOW DATABASES IN ACCOUNT"), - any(JdbcSnowflakeClient.ResultSetParser.class), - eq(null)); + any(JdbcSnowflakeClient.ResultSetParser.class)); Assertions.assertThat(actualList) .containsExactly( From bc0c6eea801266efec9c77c566bf25485ae12c2e Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 6 Jan 2023 16:24:40 -0800 Subject: [PATCH 18/20] Make data members final, include rawJsonVal in toString for debuggability. --- .../iceberg/snowflake/SnowflakeIdentifier.java | 8 ++++---- .../iceberg/snowflake/SnowflakeTableMetadata.java | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java index f06926ce44ae..3082b1d8e58a 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeIdentifier.java @@ -37,10 +37,10 @@ public enum Type { TABLE } - private String databaseName; - private String schemaName; - private String tableName; - private Type type; + private final String databaseName; + private final String schemaName; + private final String tableName; + private final Type type; private SnowflakeIdentifier(String databaseName, String schemaName, String tableName, Type type) { this.databaseName = databaseName; diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java index 7d11cccc02d1..d9c29ad26b7d 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java @@ -30,14 +30,14 @@ class SnowflakeTableMetadata { public static final Pattern SNOWFLAKE_AZURE_PATTERN = Pattern.compile("azure://([^/]+)/([^/]+)/(.*)"); - private String snowflakeMetadataLocation; - private String icebergMetadataLocation; - private String status; + private final String snowflakeMetadataLocation; + private final String icebergMetadataLocation; + private final String status; // Note: Since not all sources will necessarily come from a raw JSON representation, this raw // JSON should only be considered a convenient debugging field. Equality of two // SnowflakeTableMetadata instances should not depend on equality of this field. - private String rawJsonVal; + private final String rawJsonVal; SnowflakeTableMetadata( String snowflakeMetadataLocation, @@ -88,8 +88,8 @@ public int hashCode() { @Override public String toString() { return String.format( - "snowflakeMetadataLocation: '%s', icebergMetadataLocation: '%s', status: '%s", - snowflakeMetadataLocation, icebergMetadataLocation, status); + "snowflakeMetadataLocation: '%s', icebergMetadataLocation: '%s', status: '%s', rawJsonVal: %s", + snowflakeMetadataLocation, icebergMetadataLocation, status, rawJsonVal); } /** From ebe5dd6ab42e05c19f3d86223e8bf8a095eb320c Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Tue, 10 Jan 2023 15:57:17 -0800 Subject: [PATCH 19/20] Combine some small test cases into roundtrip test cases, misc cleanup --- .../iceberg/snowflake/SnowflakeCatalog.java | 4 +- .../snowflake/SnowflakeTableMetadata.java | 8 ++- .../snowflake/NamespaceHelpersTest.java | 66 +++++++++---------- 3 files changed, 38 insertions(+), 40 deletions(-) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 01cb93f3fac8..703b5e199adf 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -170,9 +170,7 @@ public List listNamespaces(Namespace namespace) { scope, namespace)); } - List namespaceList = - results.stream().map(NamespaceHelpers::toIcebergNamespace).collect(Collectors.toList()); - return namespaceList; + return results.stream().map(NamespaceHelpers::toIcebergNamespace).collect(Collectors.toList()); } @Override diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java index d9c29ad26b7d..c550b3e13a3a 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeTableMetadata.java @@ -88,8 +88,12 @@ public int hashCode() { @Override public String toString() { return String.format( - "snowflakeMetadataLocation: '%s', icebergMetadataLocation: '%s', status: '%s', rawJsonVal: %s", - snowflakeMetadataLocation, icebergMetadataLocation, status, rawJsonVal); + "snowflakeMetadataLocation: '%s', icebergMetadataLocation: '%s', status: '%s'", + snowflakeMetadataLocation, icebergMetadataLocation, status); + } + + public String toDebugString() { + return String.format("%s, rawJsonVal: %s", toString(), rawJsonVal); } /** diff --git a/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java b/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java index fdddb73f8198..2dd7fb6ec9af 100644 --- a/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java +++ b/snowflake/src/test/java/org/apache/iceberg/snowflake/NamespaceHelpersTest.java @@ -25,21 +25,44 @@ public class NamespaceHelpersTest { @Test - public void testToSnowflakeIdentifierRoot() { - Assertions.assertThat(NamespaceHelpers.toSnowflakeIdentifier(Namespace.empty())) - .isEqualTo(SnowflakeIdentifier.ofRoot()); + public void testRoundTripRoot() { + Namespace icebergNamespace = Namespace.empty(); + SnowflakeIdentifier snowflakeIdentifier = + NamespaceHelpers.toSnowflakeIdentifier(icebergNamespace); + Assertions.assertThat(snowflakeIdentifier).isEqualTo(SnowflakeIdentifier.ofRoot()); + Assertions.assertThat(NamespaceHelpers.toIcebergNamespace(snowflakeIdentifier)) + .isEqualTo(icebergNamespace); } @Test - public void testToSnowflakeIdentifierDatabase() { - Assertions.assertThat(NamespaceHelpers.toSnowflakeIdentifier(Namespace.of("DB1"))) - .isEqualTo(SnowflakeIdentifier.ofDatabase("DB1")); + public void testRoundTripDatabase() { + Namespace icebergNamespace = Namespace.of("DB1"); + SnowflakeIdentifier snowflakeIdentifier = + NamespaceHelpers.toSnowflakeIdentifier(icebergNamespace); + Assertions.assertThat(snowflakeIdentifier).isEqualTo(SnowflakeIdentifier.ofDatabase("DB1")); + Assertions.assertThat(NamespaceHelpers.toIcebergNamespace(snowflakeIdentifier)) + .isEqualTo(icebergNamespace); } @Test - public void testToSnowflakeIdentifierSchema() { - Assertions.assertThat(NamespaceHelpers.toSnowflakeIdentifier(Namespace.of("DB1", "SCHEMA1"))) + public void testRoundTripSchema() { + Namespace icebergNamespace = Namespace.of("DB1", "SCHEMA1"); + SnowflakeIdentifier snowflakeIdentifier = + NamespaceHelpers.toSnowflakeIdentifier(icebergNamespace); + Assertions.assertThat(snowflakeIdentifier) .isEqualTo(SnowflakeIdentifier.ofSchema("DB1", "SCHEMA1")); + Assertions.assertThat(NamespaceHelpers.toIcebergNamespace(snowflakeIdentifier)) + .isEqualTo(icebergNamespace); + } + + @Test + public void testRoundTripTable() { + TableIdentifier icebergTable = TableIdentifier.of("DB1", "SCHEMA1", "TABLE1"); + SnowflakeIdentifier snowflakeIdentifier = NamespaceHelpers.toSnowflakeIdentifier(icebergTable); + Assertions.assertThat(snowflakeIdentifier) + .isEqualTo(SnowflakeIdentifier.ofTable("DB1", "SCHEMA1", "TABLE1")); + Assertions.assertThat(NamespaceHelpers.toIcebergTableIdentifier(snowflakeIdentifier)) + .isEqualTo(icebergTable); } @Test @@ -52,13 +75,6 @@ public void testToSnowflakeIdentifierMaxNamespaceLevel() { .withMessageContaining("max namespace level"); } - @Test - public void testToSnowflakeIdentifierTable() { - Assertions.assertThat( - NamespaceHelpers.toSnowflakeIdentifier(TableIdentifier.of("DB1", "SCHEMA1", "TABLE1"))) - .isEqualTo(SnowflakeIdentifier.ofTable("DB1", "SCHEMA1", "TABLE1")); - } - @Test public void testToSnowflakeIdentifierTableBadNamespace() { Assertions.assertThatExceptionOfType(IllegalArgumentException.class) @@ -69,26 +85,6 @@ public void testToSnowflakeIdentifierTableBadNamespace() { .withMessageContaining("must be at the SCHEMA level"); } - @Test - public void testToIcebergNamespaceRoot() { - Assertions.assertThat(NamespaceHelpers.toIcebergNamespace(SnowflakeIdentifier.ofRoot())) - .isEqualTo(Namespace.empty()); - } - - @Test - public void testToIcebergNamespaceDatabase() { - Assertions.assertThat( - NamespaceHelpers.toIcebergNamespace(SnowflakeIdentifier.ofDatabase("DB1"))) - .isEqualTo(Namespace.of("DB1")); - } - - @Test - public void testToIcebergNamespaceSchema() { - Assertions.assertThat( - NamespaceHelpers.toIcebergNamespace(SnowflakeIdentifier.ofSchema("DB1", "SCHEMA1"))) - .isEqualTo(Namespace.of("DB1", "SCHEMA1")); - } - @Test public void testToIcebergNamespaceTableFails() { Assertions.assertThatExceptionOfType(IllegalArgumentException.class) From 9e9b9e66034eb1cc241abafc953c6ff6c3d74680 Mon Sep 17 00:00:00 2001 From: Dennis Huo Date: Fri, 13 Jan 2023 15:49:21 -0800 Subject: [PATCH 20/20] Add comment for why a factory class is exposed for testing purposes. --- .../main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java | 1 + 1 file changed, 1 insertion(+) diff --git a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java index 703b5e199adf..19302d578497 100644 --- a/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java +++ b/snowflake/src/main/java/org/apache/iceberg/snowflake/SnowflakeCatalog.java @@ -46,6 +46,7 @@ public class SnowflakeCatalog extends BaseMetastoreCatalog private static final String DEFAULT_CATALOG_NAME = "snowflake_catalog"; private static final String DEFAULT_FILE_IO_IMPL = "org.apache.iceberg.io.ResolvingFileIO"; + // Injectable factory for testing purposes. static class FileIOFactory { public FileIO newFileIO(String impl, Map properties, Object hadoopConf) { return CatalogUtil.loadFileIO(impl, properties, hadoopConf);