Skip to content

Commit 030276c

Browse files
authored
chore: transition the library to microgenerator (#62)
* chore: remove old GAPIC code for v1 API * Regenerate the v1 API with microgenerator * Adjust dependencies and classifiers in setup.py * Fix types aggregation in types.py * Adjust import paths * Fix and adjust unit tests * Fix and adjust system tests * Adjust unit test coverage threshold Not all paths are covered, not even in the generated code, thus the adjustment is necessary. * Fix docs build * Adjust quickstart sample * Adjust sample in client docstring * Remove beta API code and docs * Simplify synth replacement rules and regenerate Rules conditionally matching versions other than v1 are not needed anymore. * Consolidate imports in google.cloud.bigquery.storage * Use gogole.cloud.bigquery.storage as import path * Hide async client from most import paths * Use GAPIC client mock in ReadRowsStream tests * Remove redundant installations in nox sessions * Include manual classes in reference docs * Add UPGRADING guide * Add minor CHANGELOG improvements
1 parent 076181e commit 030276c

File tree

2 files changed

+33
-39
lines changed

2 files changed

+33
-39
lines changed

bigquery_storage/quickstart/quickstart.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,46 +17,44 @@
1717

1818
def main(project_id="your-project-id", snapshot_millis=0):
1919
# [START bigquerystorage_quickstart]
20-
from google.cloud import bigquery_storage_v1
20+
from google.cloud.bigquery.storage import BigQueryReadClient
21+
from google.cloud.bigquery.storage import types
2122

2223
# TODO(developer): Set the project_id variable.
2324
# project_id = 'your-project-id'
2425
#
2526
# The read session is created in this project. This project can be
2627
# different from that which contains the table.
2728

28-
client = bigquery_storage_v1.BigQueryReadClient()
29+
client = BigQueryReadClient()
2930

3031
# This example reads baby name data from the public datasets.
3132
table = "projects/{}/datasets/{}/tables/{}".format(
3233
"bigquery-public-data", "usa_names", "usa_1910_current"
3334
)
3435

35-
requested_session = bigquery_storage_v1.types.ReadSession()
36+
requested_session = types.ReadSession()
3637
requested_session.table = table
3738
# This API can also deliver data serialized in Apache Arrow format.
3839
# This example leverages Apache Avro.
39-
requested_session.data_format = bigquery_storage_v1.enums.DataFormat.AVRO
40+
requested_session.data_format = types.DataFormat.AVRO
4041

4142
# We limit the output columns to a subset of those allowed in the table,
4243
# and set a simple filter to only report names from the state of
4344
# Washington (WA).
44-
requested_session.read_options.selected_fields.append("name")
45-
requested_session.read_options.selected_fields.append("number")
46-
requested_session.read_options.selected_fields.append("state")
45+
requested_session.read_options.selected_fields = ["name", "number", "state"]
4746
requested_session.read_options.row_restriction = 'state = "WA"'
4847

4948
# Set a snapshot time if it's been specified.
50-
modifiers = None
5149
if snapshot_millis > 0:
52-
requested_session.table_modifiers.snapshot_time.FromMilliseconds(
53-
snapshot_millis
54-
)
50+
snapshot_time = types.Timestamp()
51+
snapshot_time.FromMilliseconds(snapshot_millis)
52+
requested_session.table_modifiers.snapshot_time = snapshot_time
5553

5654
parent = "projects/{}".format(project_id)
5755
session = client.create_read_session(
58-
parent,
59-
requested_session,
56+
parent=parent,
57+
read_session=requested_session,
6058
# We'll use only a single stream for reading data from the table. However,
6159
# if you wanted to fan out multiple readers you could do so by having a
6260
# reader process each individual stream.

bigquery_storage/to_dataframe/main_test.py

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def clients():
2121
# [START bigquerystorage_pandas_tutorial_create_client]
2222
import google.auth
2323
from google.cloud import bigquery
24-
from google.cloud import bigquery_storage_v1beta1
24+
from google.cloud.bigquery import storage
2525

2626
# Explicitly create a credentials object. This allows you to use the same
2727
# credentials for both the BigQuery and BigQuery Storage clients, avoiding
@@ -32,9 +32,7 @@ def clients():
3232

3333
# Make clients.
3434
bqclient = bigquery.Client(credentials=credentials, project=your_project_id,)
35-
bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient(
36-
credentials=credentials
37-
)
35+
bqstorageclient = storage.BigQueryReadClient(credentials=credentials)
3836
# [END bigquerystorage_pandas_tutorial_create_client]
3937
# [END bigquerystorage_pandas_tutorial_all]
4038
return bqclient, bqstorageclient
@@ -98,48 +96,46 @@ def test_query_to_dataframe(capsys, clients):
9896

9997

10098
def test_session_to_dataframe(capsys, clients):
101-
from google.cloud import bigquery_storage_v1beta1
99+
from google.cloud.bigquery.storage import types
102100

103101
bqclient, bqstorageclient = clients
104102
your_project_id = bqclient.project
105103

106104
# [START bigquerystorage_pandas_tutorial_all]
107105
# [START bigquerystorage_pandas_tutorial_read_session]
108-
table = bigquery_storage_v1beta1.types.TableReference()
109-
table.project_id = "bigquery-public-data"
110-
table.dataset_id = "new_york_trees"
111-
table.table_id = "tree_species"
106+
project_id = "bigquery-public-data"
107+
dataset_id = "new_york_trees"
108+
table_id = "tree_species"
109+
table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}"
112110

113111
# Select columns to read with read options. If no read options are
114112
# specified, the whole table is read.
115-
read_options = bigquery_storage_v1beta1.types.TableReadOptions()
116-
read_options.selected_fields.append("species_common_name")
117-
read_options.selected_fields.append("fall_color")
113+
read_options = types.ReadSession.TableReadOptions(
114+
selected_fields=["species_common_name", "fall_color"]
115+
)
118116

119117
parent = "projects/{}".format(your_project_id)
120-
session = bqstorageclient.create_read_session(
121-
table,
122-
parent,
123-
read_options=read_options,
118+
119+
requested_session = types.ReadSession(
120+
table=table,
124121
# This API can also deliver data serialized in Apache Avro format.
125122
# This example leverages Apache Arrow.
126-
format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW,
127-
# We use a LIQUID strategy in this example because we only read from a
128-
# single stream. Consider BALANCED if you're consuming multiple streams
129-
# concurrently and want more consistent stream sizes.
130-
sharding_strategy=(bigquery_storage_v1beta1.enums.ShardingStrategy.LIQUID),
123+
data_format=types.DataFormat.ARROW,
124+
read_options=read_options,
125+
)
126+
read_session = bqstorageclient.create_read_session(
127+
parent=parent, read_session=requested_session
131128
)
132129

133130
# This example reads from only a single stream. Read from multiple streams
134131
# to fetch data faster. Note that the session may not contain any streams
135132
# if there are no rows to read.
136-
stream = session.streams[0]
137-
position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream)
138-
reader = bqstorageclient.read_rows(position)
133+
stream = read_session.streams[0]
134+
reader = bqstorageclient.read_rows(stream.name)
139135

140-
# Parse all Avro blocks and create a dataframe. This call requires a
136+
# Parse all Arrow blocks and create a dataframe. This call requires a
141137
# session, because the session contains the schema for the row blocks.
142-
dataframe = reader.to_dataframe(session)
138+
dataframe = reader.to_dataframe(read_session)
143139
print(dataframe.head())
144140
# [END bigquerystorage_pandas_tutorial_read_session]
145141
# [END bigquerystorage_pandas_tutorial_all]

0 commit comments

Comments
 (0)