chore: transition the library to microgenerator (#62)

plamut · web-flow · commit 030276c9a715 · 2020-09-24T10:18:39.000-05:00
* chore: remove old GAPIC code for v1 API

* Regenerate the v1 API with microgenerator

* Adjust dependencies and classifiers in setup.py

* Fix types aggregation in types.py

* Adjust import paths

* Fix and adjust unit tests

* Fix and adjust system tests

* Adjust unit test coverage threshold

Not all paths are covered, not even in the generated code, thus
the adjustment is necessary.

* Fix docs build

* Adjust quickstart sample

* Adjust sample in client docstring

* Remove beta API code and docs

* Simplify synth replacement rules and regenerate

Rules conditionally matching versions other than v1 are not needed
anymore.

* Consolidate imports in google.cloud.bigquery.storage

* Use gogole.cloud.bigquery.storage as import path

* Hide async client from most import paths

* Use GAPIC client mock in ReadRowsStream tests

* Remove redundant installations in nox sessions

* Include manual classes in reference docs

* Add UPGRADING guide

* Add minor CHANGELOG improvements
diff --git a/bigquery_storage/quickstart/quickstart.py b/bigquery_storage/quickstart/quickstart.py
@@ -17,46 +17,44 @@
 
 def main(project_id="your-project-id", snapshot_millis=0):
     # [START bigquerystorage_quickstart]
-    from google.cloud import bigquery_storage_v1
+    from google.cloud.bigquery.storage import BigQueryReadClient
+    from google.cloud.bigquery.storage import types
 
     # TODO(developer): Set the project_id variable.
     # project_id = 'your-project-id'
     #
     # The read session is created in this project. This project can be
     # different from that which contains the table.
 
-    client = bigquery_storage_v1.BigQueryReadClient()
+    client = BigQueryReadClient()
 
     # This example reads baby name data from the public datasets.
     table = "projects/{}/datasets/{}/tables/{}".format(
         "bigquery-public-data", "usa_names", "usa_1910_current"
     )
 
-    requested_session = bigquery_storage_v1.types.ReadSession()
+    requested_session = types.ReadSession()
     requested_session.table = table
     # This API can also deliver data serialized in Apache Arrow format.
     # This example leverages Apache Avro.
-    requested_session.data_format = bigquery_storage_v1.enums.DataFormat.AVRO
+    requested_session.data_format = types.DataFormat.AVRO
 
     # We limit the output columns to a subset of those allowed in the table,
     # and set a simple filter to only report names from the state of
     # Washington (WA).
-    requested_session.read_options.selected_fields.append("name")
-    requested_session.read_options.selected_fields.append("number")
-    requested_session.read_options.selected_fields.append("state")
+    requested_session.read_options.selected_fields = ["name", "number", "state"]
     requested_session.read_options.row_restriction = 'state = "WA"'
 
     # Set a snapshot time if it's been specified.
-    modifiers = None
     if snapshot_millis > 0:
-        requested_session.table_modifiers.snapshot_time.FromMilliseconds(
-            snapshot_millis
-        )
+        snapshot_time = types.Timestamp()
+        snapshot_time.FromMilliseconds(snapshot_millis)
+        requested_session.table_modifiers.snapshot_time = snapshot_time
 
     parent = "projects/{}".format(project_id)
     session = client.create_read_session(
-        parent,
-        requested_session,
+        parent=parent,
+        read_session=requested_session,
         # We'll use only a single stream for reading data from the table. However,
         # if you wanted to fan out multiple readers you could do so by having a
         # reader process each individual stream.
diff --git a/bigquery_storage/to_dataframe/main_test.py b/bigquery_storage/to_dataframe/main_test.py
@@ -21,7 +21,7 @@ def clients():
     # [START bigquerystorage_pandas_tutorial_create_client]
     import google.auth
     from google.cloud import bigquery
-    from google.cloud import bigquery_storage_v1beta1
+    from google.cloud.bigquery import storage
 
     # Explicitly create a credentials object. This allows you to use the same
     # credentials for both the BigQuery and BigQuery Storage clients, avoiding
@@ -32,9 +32,7 @@ def clients():
 
     # Make clients.
     bqclient = bigquery.Client(credentials=credentials, project=your_project_id,)
-    bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient(
-        credentials=credentials
-    )
+    bqstorageclient = storage.BigQueryReadClient(credentials=credentials)
     # [END bigquerystorage_pandas_tutorial_create_client]
     # [END bigquerystorage_pandas_tutorial_all]
     return bqclient, bqstorageclient
@@ -98,48 +96,46 @@ def test_query_to_dataframe(capsys, clients):
 
 
 def test_session_to_dataframe(capsys, clients):
-    from google.cloud import bigquery_storage_v1beta1
+    from google.cloud.bigquery.storage import types
 
     bqclient, bqstorageclient = clients
     your_project_id = bqclient.project
 
     # [START bigquerystorage_pandas_tutorial_all]
     # [START bigquerystorage_pandas_tutorial_read_session]
-    table = bigquery_storage_v1beta1.types.TableReference()
-    table.project_id = "bigquery-public-data"
-    table.dataset_id = "new_york_trees"
-    table.table_id = "tree_species"
+    project_id = "bigquery-public-data"
+    dataset_id = "new_york_trees"
+    table_id = "tree_species"
+    table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}"
 
     # Select columns to read with read options. If no read options are
     # specified, the whole table is read.
-    read_options = bigquery_storage_v1beta1.types.TableReadOptions()
-    read_options.selected_fields.append("species_common_name")
-    read_options.selected_fields.append("fall_color")
+    read_options = types.ReadSession.TableReadOptions(
+        selected_fields=["species_common_name", "fall_color"]
+    )
 
     parent = "projects/{}".format(your_project_id)
-    session = bqstorageclient.create_read_session(
-        table,
-        parent,
-        read_options=read_options,
+
+    requested_session = types.ReadSession(
+        table=table,
         # This API can also deliver data serialized in Apache Avro format.
         # This example leverages Apache Arrow.
-        format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW,
-        # We use a LIQUID strategy in this example because we only read from a
-        # single stream. Consider BALANCED if you're consuming multiple streams
-        # concurrently and want more consistent stream sizes.
-        sharding_strategy=(bigquery_storage_v1beta1.enums.ShardingStrategy.LIQUID),
+        data_format=types.DataFormat.ARROW,
+        read_options=read_options,
+    )
+    read_session = bqstorageclient.create_read_session(
+        parent=parent, read_session=requested_session
     )
 
     # This example reads from only a single stream. Read from multiple streams
     # to fetch data faster. Note that the session may not contain any streams
     # if there are no rows to read.
-    stream = session.streams[0]
-    position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream)
-    reader = bqstorageclient.read_rows(position)
+    stream = read_session.streams[0]
+    reader = bqstorageclient.read_rows(stream.name)
 
-    # Parse all Avro blocks and create a dataframe. This call requires a
+    # Parse all Arrow blocks and create a dataframe. This call requires a
     # session, because the session contains the schema for the row blocks.
-    dataframe = reader.to_dataframe(session)
+    dataframe = reader.to_dataframe(read_session)
     print(dataframe.head())
     # [END bigquerystorage_pandas_tutorial_read_session]
     # [END bigquerystorage_pandas_tutorial_all]