planetlabs · jreiberkyle · Jul 29, 2023 · Jul 18, 2023 · Jul 18, 2023 · Jul 20, 2023
diff --git a/planet/clients/data.py b/planet/clients/data.py
@@ -24,7 +24,7 @@
 from .. import exceptions
 from ..constants import PLANET_BASE_URL
 from ..http import Session
-from ..models import Paged, StreamingBody
+from ..models import Paged
 from ..specs import validate_data_item_type
 
 BASE_URL = f'{PLANET_BASE_URL}/data/v1/'
@@ -586,23 +586,20 @@ async def download_asset(self,
 
         Raises:
             planet.exceptions.APIError: On API error.
-            planet.exceptions.ClientError: If asset is not active or asset
-            description is not valid.
+            planet.exceptions.ClientError: If asset is not active, asset
+            description is not valid, or retry limit is exceeded.
         """
         try:
             location = asset['location']
         except KeyError:
             raise exceptions.ClientError(
                 'asset missing ["location"] entry. Is asset active?')
 
-        async with self._session.stream(method='GET', url=location) as resp:
-            body = StreamingBody(resp)
-            dl_path = Path(directory, filename or body.name)
-            dl_path.parent.mkdir(exist_ok=True, parents=True)
-            await body.write(dl_path,
-                             overwrite=overwrite,
-                             progress_bar=progress_bar)
-        return dl_path
+        return await self._session.write(location,
+                                         filename=filename,
+                                         directory=directory,
+                                         overwrite=overwrite,
+                                         progress_bar=progress_bar)
 
     @staticmethod
     def validate_checksum(asset: dict, filename: Path):

diff --git a/planet/clients/orders.py b/planet/clients/orders.py
@@ -25,7 +25,7 @@
 from .. import exceptions
 from ..constants import PLANET_BASE_URL
 from ..http import Session
-from ..models import Paged, StreamingBody
+from ..models import Paged
 
 BASE_URL = f'{PLANET_BASE_URL}/compute/ops'
 STATS_PATH = '/stats/orders/v2'
@@ -251,15 +251,15 @@ async def download_asset(self,
 
         Raises:
             planet.exceptions.APIError: On API error.
+            planet.exceptions.ClientError: If location is not valid or retry
+                limit is exceeded.
+
         """
-        async with self._session.stream(method='GET', url=location) as resp:
-            body = StreamingBody(resp)
-            dl_path = Path(directory, filename or body.name)
-            dl_path.parent.mkdir(exist_ok=True, parents=True)
-            await body.write(dl_path,
-                             overwrite=overwrite,
-                             progress_bar=progress_bar)
-        return dl_path
+        return await self._session.write(location,
+                                         filename=filename,
+                                         directory=directory,
+                                         overwrite=overwrite,
+                                         progress_bar=progress_bar)
 
     async def download_order(self,
                              order_id: str,

diff --git a/planet/http.py b/planet/http.py
@@ -16,14 +16,19 @@
 from __future__ import annotations  # https://stackoverflow.com/a/33533514
 import asyncio
 from collections import Counter
-from contextlib import asynccontextmanager
 from http import HTTPStatus
 import logging
+import mimetypes
+from pathlib import Path
 import random
+import re
+import string
 import time
-from typing import AsyncGenerator, Optional
+from typing import Optional
+from urllib.parse import urlparse
 
 import httpx
+from tqdm.asyncio import tqdm
 from typing_extensions import Literal
 
 from .auth import Auth, AuthType
@@ -42,7 +47,7 @@
     httpx.ReadTimeout,
     httpx.RemoteProtocolError,
     exceptions.BadGateway,
-    exceptions.TooManyRequests
+    exceptions.TooManyRequests,
 ]
 MAX_RETRIES = 5
 MAX_RETRY_BACKOFF = 64  # seconds
@@ -394,26 +399,80 @@ async def _send(self, request, stream=False) -> httpx.Response:
 
         return http_resp
 
-    @asynccontextmanager
-    async def stream(
-            self, method: str,
-            url: str) -> AsyncGenerator[models.StreamingResponse, None]:
-        """Submit a request and get the response as a stream context manager.
+    async def write(self,
+                    url: str,
+                    filename: Optional[str] = None,
+                    directory: Path = Path('.'),
+                    overwrite: bool = False,
+                    progress_bar: bool = False) -> Path:
+        """Write data to local file with limiting and retries.
 
         Parameters:
-            method: HTTP request method.
-            url: Location of the API endpoint.
+            url: Remote location url
+            filename: Custom name to assign to downloaded file.
+            directory: Base directory for file download. This directory will be
+                created if it does not already exist.
+            overwrite: Overwrite any existing files.
+            progress_bar: Show progress bar during download.
 
         Returns:
-            Context manager providing the streaming response.
+            Path to downloaded file.
+
+        Raises:
+            planet.exceptions.APIException: On API error.
+            planet.exceptions.ClientError: When retry limit is exceeded.
+
         """
-        request = self._client.build_request(method=method, url=url)
-        http_response = await self._retry(self._send, request, stream=True)
-        response = models.StreamingResponse(http_response)
+
+        async def _write():
+            async with self._client.stream('GET', url) as response:
+
+                dl_path = Path(
+                    directory,
+                    filename or _get_filename_from_response(response))
+                dl_path.parent.mkdir(exist_ok=True, parents=True)
+
+                await self._write_response(response,
+                                           dl_path,
+                                           overwrite=overwrite,
+                                           progress_bar=progress_bar)
+
+                return dl_path
+
+        async def _limited_write():
+            async with self._limiter:
+                dl_path = await _write()
+            return dl_path
+
+        return await self._retry(_limited_write)
+
+    async def _write_response(self,
+                              response,
+                              filename,
+                              overwrite,
+                              progress_bar):
+        total = int(response.headers["Content-Length"])
+
         try:
-            yield response
-        finally:
-            await response.aclose()
+            mode = 'wb' if overwrite else 'xb'
+            with open(filename, mode) as fp:
+
+                with tqdm(total=total,
+                          unit_scale=True,
+                          unit_divisor=1024 * 1024,
+                          unit='B',
+                          desc=str(filename),
+                          disable=not progress_bar) as progress:
+                    previous = response.num_bytes_downloaded
+
+                    async for chunk in response.aiter_bytes():
+                        fp.write(chunk)
+                        new = response.num_bytes_downloaded - previous
+                        progress.update(new - previous)
+                        previous = new
+                    progress.update()
+        except FileExistsError:
+            LOGGER.info(f'File {filename} exists, not overwriting')
 
     def client(self,
                name: Literal['data', 'orders', 'subscriptions'],
@@ -439,6 +498,51 @@ def client(self,
             raise exceptions.ClientError("No such client.")
 
 
+def _get_filename_from_response(response) -> str:
+    """The name of the response resource.
+
+        The default is to use the content-disposition header value from the
+        response. If not found, falls back to resolving the name from the url
+        or generating a random name with the type from the response.
+        """
+    name = (_get_filename_from_headers(response.headers)
+            or _get_filename_from_url(response.url)
+            or _get_random_filename(response.headers.get('content-type')))
+    return name
+
+
+def _get_filename_from_headers(headers):
+    """Get a filename from the Content-Disposition header, if available.
+
+    :param headers dict: a ``dict`` of response headers
+    :returns: a filename (i.e. ``basename``)
+    :rtype: str or None
+    """
+    cd = headers.get('content-disposition', '')
+    match = re.search('filename="?([^"]+)"?', cd)
+    return match.group(1) if match else None
+
+
+def _get_filename_from_url(url: str) -> Optional[str]:
+    """Get a filename from a url.
+
+    Getting a name for Landsat imagery uses this function.
+    """
+    path = urlparse(url).path
+    name = path[path.rfind('/') + 1:]
+    return name or None
+
+
+def _get_random_filename(content_type=None) -> str:
+    """Get a pseudo-random, Planet-looking filename.
+    """
+    extension = mimetypes.guess_extension(content_type or '') or ''
+    characters = string.ascii_letters + '0123456789'
+    letters = ''.join(random.sample(characters, 8))
+    name = 'planet-{}{}'.format(letters, extension)
+    return name
+
+
 class AuthSession(BaseSession):
     """Synchronous connection to the Planet Auth service."""