|
36 | 36 | from google.cloud import bigquery_storage_v1beta1 |
37 | 37 | except ImportError: # pragma: NO COVER |
38 | 38 | bigquery_storage_v1beta1 = None |
| 39 | + |
| 40 | +try: |
| 41 | + import fastavro # to parse BQ storage client results |
| 42 | +except ImportError: # pragma: NO COVER |
| 43 | + fastavro = None |
| 44 | + |
39 | 45 | try: |
40 | 46 | import pandas |
41 | 47 | except ImportError: # pragma: NO COVER |
@@ -1543,6 +1549,100 @@ def test_dbapi_fetchall(self): |
1543 | 1549 | row_tuples = [r.values() for r in rows] |
1544 | 1550 | self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) |
1545 | 1551 |
|
| 1552 | + @unittest.skipIf( |
| 1553 | + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" |
| 1554 | + ) |
| 1555 | + def test_dbapi_fetch_w_bqstorage_client_small_result_set(self): |
| 1556 | + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( |
| 1557 | + credentials=Config.CLIENT._credentials |
| 1558 | + ) |
| 1559 | + cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() |
| 1560 | + |
| 1561 | + # Reading small result sets causes an issue with BQ storage client, |
| 1562 | + # and the DB API should transparently fall back to the default client. |
| 1563 | + cursor.execute( |
| 1564 | + """ |
| 1565 | + SELECT id, `by`, time_ts |
| 1566 | + FROM `bigquery-public-data.hacker_news.comments` |
| 1567 | + ORDER BY `id` ASC |
| 1568 | + LIMIT 10 |
| 1569 | + """ |
| 1570 | + ) |
| 1571 | + |
| 1572 | + result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] |
| 1573 | + |
| 1574 | + field_name = operator.itemgetter(0) |
| 1575 | + fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] |
| 1576 | + |
| 1577 | + expected_data = [ |
| 1578 | + [ |
| 1579 | + ("by", "sama"), |
| 1580 | + ("id", 15), |
| 1581 | + ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), |
| 1582 | + ], |
| 1583 | + [ |
| 1584 | + ("by", "pg"), |
| 1585 | + ("id", 17), |
| 1586 | + ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), |
| 1587 | + ], |
| 1588 | + [ |
| 1589 | + ("by", "pg"), |
| 1590 | + ("id", 22), |
| 1591 | + ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), |
| 1592 | + ], |
| 1593 | + ] |
| 1594 | + self.assertEqual(fetched_data, expected_data) |
| 1595 | + |
| 1596 | + @unittest.skipIf( |
| 1597 | + bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" |
| 1598 | + ) |
| 1599 | + @unittest.skipIf(fastavro is None, "Requires `fastavro`") |
| 1600 | + def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): |
| 1601 | + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( |
| 1602 | + credentials=Config.CLIENT._credentials |
| 1603 | + ) |
| 1604 | + cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() |
| 1605 | + |
| 1606 | + # Pick a large enouhg LIMIT value to assure that the fallback to the |
| 1607 | + # default client is not needed due to the result set being too small |
| 1608 | + # (a known issue that causes problems when reding such result sets with |
| 1609 | + # BQ storage client). |
| 1610 | + cursor.execute( |
| 1611 | + """ |
| 1612 | + SELECT id, `by`, time_ts |
| 1613 | + FROM `bigquery-public-data.hacker_news.comments` |
| 1614 | + ORDER BY `id` ASC |
| 1615 | + LIMIT 100000 |
| 1616 | + """ |
| 1617 | + ) |
| 1618 | + |
| 1619 | + result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] |
| 1620 | + |
| 1621 | + field_name = operator.itemgetter(0) |
| 1622 | + fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] |
| 1623 | + |
| 1624 | + # Since DB API is not thread safe, only a single result stream should be |
| 1625 | + # requested by the BQ storage client, meaning that results should arrive |
| 1626 | + # in the sorted order. |
| 1627 | + expected_data = [ |
| 1628 | + [ |
| 1629 | + ("by", "sama"), |
| 1630 | + ("id", 15), |
| 1631 | + ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), |
| 1632 | + ], |
| 1633 | + [ |
| 1634 | + ("by", "pg"), |
| 1635 | + ("id", 17), |
| 1636 | + ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), |
| 1637 | + ], |
| 1638 | + [ |
| 1639 | + ("by", "pg"), |
| 1640 | + ("id", 22), |
| 1641 | + ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), |
| 1642 | + ], |
| 1643 | + ] |
| 1644 | + self.assertEqual(fetched_data, expected_data) |
| 1645 | + |
1546 | 1646 | def _load_table_for_dml(self, rows, dataset_id, table_id): |
1547 | 1647 | from google.cloud._testing import _NamedTemporaryFile |
1548 | 1648 | from google.cloud.bigquery.job import CreateDisposition |
|
0 commit comments