From 70ddd89d82b858ff545d87b2a58e48271da8a0c9 Mon Sep 17 00:00:00 2001 From: Julien Woillez Date: Tue, 23 Sep 2014 22:06:55 +0200 Subject: [PATCH 01/12] setup.py egg_info fix --- astroquery/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/astroquery/__init__.py b/astroquery/__init__.py index ee03665772..0f005b4b40 100644 --- a/astroquery/__init__.py +++ b/astroquery/__init__.py @@ -12,6 +12,7 @@ from ._astropy_init import __version__, __githash__, test # ---------------------------------------------------------------------------- + import os import logging From c3c55a3f2ed2e984389eb5daf157306a67cc6ce2 Mon Sep 17 00:00:00 2001 From: Clara Brasseur Date: Tue, 4 Feb 2020 15:28:16 -0500 Subject: [PATCH 02/12] additional caching functionality --- astroquery/__init__.py | 1 - astroquery/query.py | 3 ++- astroquery/setup_package.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/astroquery/__init__.py b/astroquery/__init__.py index 0f005b4b40..ee03665772 100644 --- a/astroquery/__init__.py +++ b/astroquery/__init__.py @@ -12,7 +12,6 @@ from ._astropy_init import __version__, __githash__, test # ---------------------------------------------------------------------------- - import os import logging diff --git a/astroquery/query.py b/astroquery/query.py index 998d1a1cdd..57c8077981 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -119,6 +119,7 @@ def from_cache(self, cache_location, cache_timeout): current_time = datetime.utcnow() cache_time = datetime.utcfromtimestamp(request_file.stat().st_mtime) expired = current_time-cache_time > timedelta(seconds=cache_timeout) + if not expired: with open(request_file, "rb") as f: response = pickle.load(f) @@ -344,7 +345,7 @@ def _request(self, method, url, response = query.from_cache(self.cache_location, cache_conf.cache_timeout) if not response: response = query.request(self._session, - self.cache_location, + cache_location, stream=stream, auth=auth, allow_redirects=allow_redirects, diff --git a/astroquery/setup_package.py b/astroquery/setup_package.py index 8a297455bd..e62302cef3 100644 --- a/astroquery/setup_package.py +++ b/astroquery/setup_package.py @@ -3,3 +3,4 @@ def get_package_data(): return {'astroquery': ['CITATION']} + From 33840de60c24d2158f3ae2324db23f953701bac6 Mon Sep 17 00:00:00 2001 From: Clara Brasseur Date: Tue, 4 Feb 2020 18:27:22 -0500 Subject: [PATCH 03/12] rethinking some of the caching --- astroquery/query.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/astroquery/query.py b/astroquery/query.py index 57c8077981..08c2affcb7 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -229,6 +229,23 @@ def _response_hook(self, response, *args, **kwargs): f"{response.text}\n" f"-----------------------------------------", '\t') log.log(5, f"HTTP response\n{response_log}") + + def clear_cache(): + """Removes all cache files.""" + + cache_files = [x for x in os.listdir(self.cache_location) if x.endswidth("pickle")] + for fle in cache_files: + os.remove(fle) + + def reset_cache_preferences(): + """Resets cache preferences to default values""" + + self.cache_location = os.path.join( + conf.cache_location, + self.__class__.__name__.split("Class")[0]) + + self.use_cache = conf.use_cache + self.cache_timeout = conf.default_cache_timeout @property def cache_location(self): @@ -345,7 +362,7 @@ def _request(self, method, url, response = query.from_cache(self.cache_location, cache_conf.cache_timeout) if not response: response = query.request(self._session, - cache_location, + self.cache_location, stream=stream, auth=auth, allow_redirects=allow_redirects, From 1e31d3955426f19e9ddae2078e76ac67b4ab7f55 Mon Sep 17 00:00:00 2001 From: Clara Brasseur Date: Wed, 5 Feb 2020 10:10:45 -0500 Subject: [PATCH 04/12] pep8 --- astroquery/__init__.py | 1 + astroquery/query.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/astroquery/__init__.py b/astroquery/__init__.py index ee03665772..968b94be38 100644 --- a/astroquery/__init__.py +++ b/astroquery/__init__.py @@ -60,3 +60,4 @@ class Cache_Conf(_config.ConfigNamespace): cache_conf = Cache_Conf() + diff --git a/astroquery/query.py b/astroquery/query.py index 08c2affcb7..a37948aab0 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -243,7 +243,7 @@ def reset_cache_preferences(): self.cache_location = os.path.join( conf.cache_location, self.__class__.__name__.split("Class")[0]) - + self.use_cache = conf.use_cache self.cache_timeout = conf.default_cache_timeout From 5442b5470429bf944f249969f53aa23267804319 Mon Sep 17 00:00:00 2001 From: "C.E. Brasseur" Date: Tue, 22 Mar 2022 22:14:32 +0000 Subject: [PATCH 05/12] cleanup after rebase --- astroquery/query.py | 2 +- astroquery/setup_package.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/astroquery/query.py b/astroquery/query.py index a37948aab0..5ca3418bfa 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -244,7 +244,7 @@ def reset_cache_preferences(): conf.cache_location, self.__class__.__name__.split("Class")[0]) - self.use_cache = conf.use_cache + self._cache_active = conf.use_cache self.cache_timeout = conf.default_cache_timeout @property diff --git a/astroquery/setup_package.py b/astroquery/setup_package.py index e62302cef3..8a297455bd 100644 --- a/astroquery/setup_package.py +++ b/astroquery/setup_package.py @@ -3,4 +3,3 @@ def get_package_data(): return {'astroquery': ['CITATION']} - From 2589b8a6ab02f6ec13ed9849a902980c4ce4a182 Mon Sep 17 00:00:00 2001 From: "C. E. Brasseur" Date: Thu, 4 Aug 2022 17:37:20 +0100 Subject: [PATCH 06/12] regularizing conf usage ande fixing suspend cache function --- astroquery/query.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/astroquery/query.py b/astroquery/query.py index 5ca3418bfa..f069c82252 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -241,10 +241,10 @@ def reset_cache_preferences(): """Resets cache preferences to default values""" self.cache_location = os.path.join( - conf.cache_location, + conf.default_cache_location, self.__class__.__name__.split("Class")[0]) - self._cache_active = conf.use_cache + self.cache_active = conf.default_cache_active self.cache_timeout = conf.default_cache_timeout @property @@ -354,6 +354,7 @@ def _request(self, method, url, files=files, timeout=timeout, json=json) if not cache: with cache_conf.set_temp("cache_active", False): + response = query.request(self._session, stream=stream, auth=auth, verify=verify, allow_redirects=allow_redirects, From b55eb433cec829341ae7c27ff7f868f57186c894 Mon Sep 17 00:00:00 2001 From: "C. E. Brasseur" Date: Mon, 12 Sep 2022 16:30:07 +0100 Subject: [PATCH 07/12] integrating better with astropy config framework --- astroquery/query.py | 8 +++++--- astroquery/tests/test_cache.py | 21 +++++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/astroquery/query.py b/astroquery/query.py index f069c82252..ef86446227 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -338,7 +338,7 @@ def _request(self, method, url, # ":" so replace them with an underscore local_filename = local_filename.replace(':', '_') - local_filepath = os.path.join(savedir or self.cache_location or '.', local_filename) + local_filepath = os.path.join(savedir or self._cache_location or '.', local_filename) response = self._download_file(url, local_filepath, cache=cache, timeout=timeout, continuation=continuation, method=method, @@ -361,15 +361,16 @@ def _request(self, method, url, json=json) else: response = query.from_cache(self.cache_location, cache_conf.cache_timeout) + if not response: response = query.request(self._session, - self.cache_location, + self._cache_location, stream=stream, auth=auth, allow_redirects=allow_redirects, verify=verify, json=json) - to_cache(response, query.request_file(self.cache_location)) + to_cache(response, query.request_file(self._cache_location)) self._last_query = query return response @@ -514,6 +515,7 @@ def __exit__(self, exc_type, exc_value, traceback): return False + class QueryWithLogin(BaseQuery): """ This is the base class for all the query classes which are required to diff --git a/astroquery/tests/test_cache.py b/astroquery/tests/test_cache.py index c74110eb51..82fb0bd7f1 100644 --- a/astroquery/tests/test_cache.py +++ b/astroquery/tests/test_cache.py @@ -10,6 +10,7 @@ from astroquery.query import QueryWithLogin from astroquery import cache_conf + URL1 = "http://fakeurl.edu" URL2 = "http://fakeurl.ac.uk" @@ -82,22 +83,22 @@ def test_basic_caching(changing_mocked_response): assert cache_conf.cache_active mytest.clear_cache() - assert len(os.listdir(mytest.cache_location)) == 0 + assert len(os.listdir(mytest.get_cache_location())) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT1 - assert len(os.listdir(mytest.cache_location)) == 1 + assert len(os.listdir(mytest.get_cache_location())) == 1 resp = mytest.test_func(URL2) # query that has not been cached assert resp.content == TEXT2 - assert len(os.listdir(mytest.cache_location)) == 2 + assert len(os.listdir(mytest.get_cache_location())) == 2 resp = mytest.test_func(URL1) assert resp.content == TEXT1 # query that was cached - assert len(os.listdir(mytest.cache_location)) == 2 # no new cache file + assert len(os.listdir(mytest.get_cache_location())) == 2 # no new cache file mytest.clear_cache() - assert len(os.listdir(mytest.cache_location)) == 0 + assert len(os.listdir(mytest.get_cache_location())) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT2 # Now get new response @@ -134,11 +135,11 @@ def test_login(changing_mocked_response): assert cache_conf.cache_active mytest.clear_cache() - assert len(os.listdir(mytest.cache_location)) == 0 + assert len(os.listdir(mytest.get_cache_location())) == 0 mytest.login("ceb") assert mytest.authenticated() - assert len(os.listdir(mytest.cache_location)) == 0 # request should not be cached + assert len(os.listdir(mytest.get_cache_location())) == 0 # request should not be cached mytest.login("ceb") assert not mytest.authenticated() # Should not be accessing cache @@ -151,7 +152,7 @@ def test_timeout(changing_mocked_response, monkeypatch): assert cache_conf.cache_active mytest.clear_cache() - assert len(os.listdir(mytest.cache_location)) == 0 + assert len(os.listdir(mytest.get_cache_location())) == 0 resp = mytest.test_func(URL1) # should be cached assert resp.content == TEXT1 @@ -183,11 +184,11 @@ def test_deactivate_directly(changing_mocked_response): cache_conf.cache_active = False mytest.clear_cache() - assert len(os.listdir(mytest.cache_location)) == 0 + assert len(os.listdir(mytest.get_cache_location())) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT1 - assert len(os.listdir(mytest.cache_location)) == 0 + assert len(os.listdir(mytest.get_cache_location())) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT2 From 7ab4992a0a645b22de722860999cec4bd09124d9 Mon Sep 17 00:00:00 2001 From: "C. E. Brasseur" Date: Mon, 19 Sep 2022 15:01:30 +0100 Subject: [PATCH 08/12] making new caching backwards compatible --- astroquery/query.py | 6 +++--- astroquery/tests/test_cache.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/astroquery/query.py b/astroquery/query.py index ef86446227..39743f35c7 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -338,7 +338,7 @@ def _request(self, method, url, # ":" so replace them with an underscore local_filename = local_filename.replace(':', '_') - local_filepath = os.path.join(savedir or self._cache_location or '.', local_filename) + local_filepath = os.path.join(savedir or self.cache_location or '.', local_filename) response = self._download_file(url, local_filepath, cache=cache, timeout=timeout, continuation=continuation, method=method, @@ -364,13 +364,13 @@ def _request(self, method, url, if not response: response = query.request(self._session, - self._cache_location, + self.cache_location, stream=stream, auth=auth, allow_redirects=allow_redirects, verify=verify, json=json) - to_cache(response, query.request_file(self._cache_location)) + to_cache(response, query.request_file(self.cache_location)) self._last_query = query return response diff --git a/astroquery/tests/test_cache.py b/astroquery/tests/test_cache.py index 82fb0bd7f1..29ad675ebe 100644 --- a/astroquery/tests/test_cache.py +++ b/astroquery/tests/test_cache.py @@ -83,22 +83,22 @@ def test_basic_caching(changing_mocked_response): assert cache_conf.cache_active mytest.clear_cache() - assert len(os.listdir(mytest.get_cache_location())) == 0 + assert len(os.listdir(mytest.cache_location)) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT1 - assert len(os.listdir(mytest.get_cache_location())) == 1 + assert len(os.listdir(mytest.cache_location)) == 1 resp = mytest.test_func(URL2) # query that has not been cached assert resp.content == TEXT2 - assert len(os.listdir(mytest.get_cache_location())) == 2 + assert len(os.listdir(mytest.cache_location)) == 2 resp = mytest.test_func(URL1) assert resp.content == TEXT1 # query that was cached - assert len(os.listdir(mytest.get_cache_location())) == 2 # no new cache file + assert len(os.listdir(mytest.cache_location)) == 2 # no new cache file mytest.clear_cache() - assert len(os.listdir(mytest.get_cache_location())) == 0 + assert len(os.listdir(mytest.cache_location)) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT2 # Now get new response @@ -135,11 +135,11 @@ def test_login(changing_mocked_response): assert cache_conf.cache_active mytest.clear_cache() - assert len(os.listdir(mytest.get_cache_location())) == 0 + assert len(os.listdir(mytest.cache_location)) == 0 mytest.login("ceb") assert mytest.authenticated() - assert len(os.listdir(mytest.get_cache_location())) == 0 # request should not be cached + assert len(os.listdir(mytest.cache_location)) == 0 # request should not be cached mytest.login("ceb") assert not mytest.authenticated() # Should not be accessing cache @@ -152,7 +152,7 @@ def test_timeout(changing_mocked_response, monkeypatch): assert cache_conf.cache_active mytest.clear_cache() - assert len(os.listdir(mytest.get_cache_location())) == 0 + assert len(os.listdir(mytest.cache_location)) == 0 resp = mytest.test_func(URL1) # should be cached assert resp.content == TEXT1 @@ -184,11 +184,11 @@ def test_deactivate_directly(changing_mocked_response): cache_conf.cache_active = False mytest.clear_cache() - assert len(os.listdir(mytest.get_cache_location())) == 0 + assert len(os.listdir(mytest.cache_location)) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT1 - assert len(os.listdir(mytest.get_cache_location())) == 0 + assert len(os.listdir(mytest.cache_location)) == 0 resp = mytest.test_func(URL1) assert resp.content == TEXT2 From 9cb1b8438841fbcfd858d8222f1c665cdcd99603 Mon Sep 17 00:00:00 2001 From: orionlee Date: Mon, 4 Nov 2019 16:24:08 -0800 Subject: [PATCH 09/12] MAST query result cache: Observations.query_criteria() --- astroquery/mast/discovery_portal.py | 53 ++++++++++++++++++++++++----- astroquery/mast/observations.py | 4 +-- astroquery/mast/tests/test_mast.py | 50 +++++++++++++++++++++++++++ astroquery/query.py | 3 +- 4 files changed, 99 insertions(+), 11 deletions(-) diff --git a/astroquery/mast/discovery_portal.py b/astroquery/mast/discovery_portal.py index 024d17cc96..2e2b140187 100644 --- a/astroquery/mast/discovery_portal.py +++ b/astroquery/mast/discovery_portal.py @@ -10,6 +10,7 @@ import uuid import json import time +import re import numpy as np @@ -210,7 +211,39 @@ def _request(self, method, url, params=None, data=None, headers=None, return all_responses - def _get_col_config(self, service, fetch_name=None): + def _request_w_cache(self, method, url, data=None, headers=None, retrieve_all=True, + cache=False, cache_opts=None): + # Note: the method only exposes 4 parameters of the underlying _request() function + # to play nice with existing mocks + # Caching: follow BaseQuery._request()'s pattern, which uses an AstroQuery object + if not cache: + response = self._request(method, url, data=data, headers=headers, retrieve_all=retrieve_all) + else: + cacher = self._get_cacher(method, url, data, headers, retrieve_all) + response = cacher.from_cache(self.cache_location) + if not response: + response = self._request(method, url, data=data, headers=headers, retrieve_all=retrieve_all) + to_cache(response, cacher.request_file(self.cache_location)) + return response + + def _get_cacher(self, method, url, data, headers, retrieve_all): + """ + Return an object that can cache the HTTP request based on the supplied arguments + """ + + # cacheBreaker parameter (to underlying MAST service) is not relevant (and breaks) local caching + # remove it from part of the cache key + data_no_cache_breaker = re.sub(r'^(.+)cacheBreaker%22%3A%20%22.+%22', r'\1', data) + # include retrieve_all as part of the cache key by appending it to data + # it cannot be added as part of req_kwargs dict, as it will be rejected by AstroQuery + data_w_retrieve_all = data_no_cache_breaker + " retrieve_all={}".format(retrieve_all) + req_kwargs = dict( + data=data_no_cache_breaker, + headers=headers + ) + return AstroQuery(method, url, **req_kwargs) + + def _get_col_config(self, service, fetch_name=None, cache=False): """ Gets the columnsConfig entry for given service and stores it in `self._column_configs`. @@ -246,7 +279,7 @@ def _get_col_config(self, service, fetch_name=None): if more: mashup_request = {'service': all_name, 'params': {}, 'format': 'extjs'} req_string = _prepare_service_request_string(mashup_request) - response = self._request("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers) + response = self._request_w_cache("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers, cache=cache) json_response = response[0].json() self._column_configs[service].update(json_response['data']['Tables'][0] @@ -300,7 +333,7 @@ def _parse_result(self, responses, verbose=False): return all_results @class_or_instance - def service_request_async(self, service, params, pagesize=None, page=None, **kwargs): + def service_request_async(self, service, params, pagesize=None, page=None, cache=False, cache_opts=None, **kwargs): """ Given a Mashup service and parameters, builds and excecutes a Mashup query. See documentation `here `__ @@ -320,6 +353,10 @@ def service_request_async(self, service, params, pagesize=None, page=None, **kwa Default None. Can be used to override the default behavior of all results being returned to obtain a specific page of results. + cache : Boolean, optional + try to use cached the query result if set to True + cache_opts : dict, optional + cache options, details TBD, e.g., cache expiration policy, etc. **kwargs : See MashupRequest properties `here `__ @@ -333,7 +370,7 @@ def service_request_async(self, service, params, pagesize=None, page=None, **kwa # setting self._current_service if service not in self._column_configs.keys(): fetch_name = kwargs.pop('fetch_name', None) - self._get_col_config(service, fetch_name) + self._get_col_config(service, fetch_name, cache) self._current_service = service # setting up pagination @@ -359,12 +396,12 @@ def service_request_async(self, service, params, pagesize=None, page=None, **kwa mashup_request[prop] = value req_string = _prepare_service_request_string(mashup_request) - response = self._request("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers, - retrieve_all=retrieve_all) + response = self._request_w_cache("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers, + retrieve_all=retrieve_all, cache=cache, cache_opts=cache_opts) return response - def build_filter_set(self, column_config_name, service_name=None, **filters): + def build_filter_set(self, column_config_name, service_name=None, cache=False, **filters): """ Takes user input dictionary of filters and returns a filterlist that the Mashup can understand. @@ -392,7 +429,7 @@ def build_filter_set(self, column_config_name, service_name=None, **filters): service_name = column_config_name if not self._column_configs.get(service_name): - self._get_col_config(service_name, fetch_name=column_config_name) + self._get_col_config(service_name, fetch_name=column_config_name, cache=cache) caom_col_config = self._column_configs[service_name] diff --git a/astroquery/mast/observations.py b/astroquery/mast/observations.py index 65d2186727..f593558681 100644 --- a/astroquery/mast/observations.py +++ b/astroquery/mast/observations.py @@ -246,7 +246,7 @@ def query_object_async(self, objectname, *, radius=0.2*u.deg, pagesize=None, pag return self.query_region_async(coordinates, radius=radius, pagesize=pagesize, page=page) @class_or_instance - def query_criteria_async(self, *, pagesize=None, page=None, **criteria): + def query_criteria_async(self, *, pagesize=None, page=None, cache=False, cache_opts=None, **criteria): """ Given an set of criteria, returns a list of MAST observations. Valid criteria are returned by ``get_metadata("observations")`` @@ -291,7 +291,7 @@ def query_criteria_async(self, *, pagesize=None, page=None, **criteria): params = {"columns": "*", "filters": mashup_filters} - return self._portal_api_connection.service_request_async(service, params) + return self._portal_api_connection.service_request_async(service, params, cache=cache, cache_opts=cache_opts) def query_region_count(self, coordinates, *, radius=0.2*u.deg, pagesize=None, page=None): """ diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 39d962ced5..2dbb845b31 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -76,6 +76,25 @@ def patch_post(request): return mp +_num_mockreturn = 0 + + +def _get_num_mockreturn(): + global _num_mockreturn + return _num_mockreturn + + +def _reset_mockreturn_counter(): + global _num_mockreturn + _num_mockreturn = 0 + + +def _inc_num_mockreturn(): + global _num_mockreturn + _num_mockreturn += 1 + return _num_mockreturn + + def post_mockreturn(self, method="POST", url=None, data=None, timeout=10, **kwargs): if "columnsconfig" in url: if "Mast.Catalogs.Tess.Cone" in data: @@ -102,6 +121,9 @@ def post_mockreturn(self, method="POST", url=None, data=None, timeout=10, **kwar with open(filename, 'rb') as infile: content = infile.read() + # For cache tests + _inc_num_mockreturn() + # returning as list because this is what the mast _request function does return [MockResponse(content)] @@ -365,6 +387,34 @@ def test_query_observations_criteria_async(patch_post): assert isinstance(responses, list) +def test_query_observations_criteria_async_cache(patch_post): + _reset_mockreturn_counter() + assert 0 == _get_num_mockreturn(), "Mock HTTP call counter reset to 0" + + responses_cache_miss = mast.Observations.query_criteria_async(dataproduct_type=["image"], + proposal_pi="Ost*", + s_dec=[43.5, 45.5], cache=True) + assert isinstance(responses_cache_miss, list) + num_mockreturn_after_first_call = _get_num_mockreturn() + assert num_mockreturn_after_first_call > 0, "Cache miss, some underlying HTTP call" + + responses_cache_hit = mast.Observations.query_criteria_async(dataproduct_type=["image"], + proposal_pi="Ost*", + s_dec=[43.5, 45.5], cache=True) + # assert the cached response is the same + assert len(responses_cache_hit) == len(responses_cache_miss) + assert responses_cache_hit[0].text == responses_cache_miss[0].text + # ensure the response really comes from the cache + assert num_mockreturn_after_first_call == _get_num_mockreturn(), \ + 'Cache hit: should reach cache only, i.e., no HTTP call' + + responses_no_cache = mast.Observations.query_criteria_async(dataproduct_type=["image"], + proposal_pi="Ost*", + s_dec=[43.5, 45.5], cache=False) + assert isinstance(responses_no_cache, list) + assert _get_num_mockreturn() > num_mockreturn_after_first_call, "Cache off , some underlying HTTP call" + + def test_observations_query_criteria(patch_post): # without position result = mast.Observations.query_criteria(dataproduct_type=["image"], diff --git a/astroquery/query.py b/astroquery/query.py index 39743f35c7..4e28bd8d94 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -123,7 +123,8 @@ def from_cache(self, cache_location, cache_timeout): if not expired: with open(request_file, "rb") as f: response = pickle.load(f) - if not isinstance(response, requests.Response): + if not isinstance(response, requests.Response)and not isinstance(response, list): + # MAST query response is a list of Response response = None else: log.debug(f"Cache expired for {request_file}...") From 23affe0565d5104faa70377ac4c767f6b482ccbb Mon Sep 17 00:00:00 2001 From: "C. E. Brasseur" Date: Thu, 29 Sep 2022 17:44:13 +0100 Subject: [PATCH 10/12] adjsut for new cache work --- astroquery/mast/discovery_portal.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/astroquery/mast/discovery_portal.py b/astroquery/mast/discovery_portal.py index 2e2b140187..724e21ad76 100644 --- a/astroquery/mast/discovery_portal.py +++ b/astroquery/mast/discovery_portal.py @@ -18,7 +18,8 @@ from astropy.table import Table, vstack, MaskedColumn -from ..query import BaseQuery +from astroquery import conf as asq_conf +from ..query import BaseQuery, QueryWithLogin, AstroQuery, to_cache from ..utils import async_to_sync from ..utils.class_or_instance import class_or_instance from ..exceptions import InputWarning, NoResultsWarning, RemoteServiceError @@ -216,11 +217,12 @@ def _request_w_cache(self, method, url, data=None, headers=None, retrieve_all=Tr # Note: the method only exposes 4 parameters of the underlying _request() function # to play nice with existing mocks # Caching: follow BaseQuery._request()'s pattern, which uses an AstroQuery object + if not cache: response = self._request(method, url, data=data, headers=headers, retrieve_all=retrieve_all) else: cacher = self._get_cacher(method, url, data, headers, retrieve_all) - response = cacher.from_cache(self.cache_location) + response = cacher.from_cache(self.cache_location, asq_conf.cache_timeout) if not response: response = self._request(method, url, data=data, headers=headers, retrieve_all=retrieve_all) to_cache(response, cacher.request_file(self.cache_location)) From 460fd24307747680920b1618ae5aee0d0a32e994 Mon Sep 17 00:00:00 2001 From: "C. E. Brasseur" Date: Mon, 3 Oct 2022 17:53:01 +0100 Subject: [PATCH 11/12] make path location get/set work properly --- astroquery/mast/core.py | 21 +++++++++++++++++++++ astroquery/mast/discovery_portal.py | 2 ++ 2 files changed, 23 insertions(+) diff --git a/astroquery/mast/core.py b/astroquery/mast/core.py index 712579a383..6d09c38489 100644 --- a/astroquery/mast/core.py +++ b/astroquery/mast/core.py @@ -59,6 +59,27 @@ def _login(self, token=None, store_token=False, reenter_token=False): return self._auth_obj.login(token, store_token, reenter_token) + @property + def cache_location(self): + cl = self._portal_api_connection.cache_location + cl_2 = self._service_api_connection.cache_location + + if cl != cl_2: + print("What should we do in this case? (currently setting it by force)") + self._service_api_connection.cache_location = cl + + return cl + + @cache_location.setter + def cache_location(self, loc): + self._portal_api_connection.cache_location = loc + self._service_api_connection.cache_location = loc + + def reset_cache_location(self): + """Resets the cache location to the default astropy cache""" + self._portal_api_connection.reset_cache_location() + self._service_api_connection.reset_cache_location() + def session_info(self, verbose=True): """ Displays information about current MAST user, and returns user info dictionary. diff --git a/astroquery/mast/discovery_portal.py b/astroquery/mast/discovery_portal.py index 724e21ad76..10bd975d14 100644 --- a/astroquery/mast/discovery_portal.py +++ b/astroquery/mast/discovery_portal.py @@ -134,6 +134,8 @@ def __init__(self, session=None): if session: self._session = session + self.name = "Mast" + def _request(self, method, url, params=None, data=None, headers=None, files=None, stream=False, auth=None, retrieve_all=True): """ From 1aa707871512eb6dc96bc236ed409eb368c449cf Mon Sep 17 00:00:00 2001 From: "C. E. Brasseur" Date: Thu, 16 Mar 2023 18:11:10 +0000 Subject: [PATCH 12/12] making caching default and controllable with existing infrastructure --- astroquery/__init__.py | 1 - astroquery/mast/core.py | 20 +++++------ astroquery/mast/discovery_portal.py | 54 ++++++++++++++++------------- astroquery/mast/missions.py | 2 +- astroquery/mast/services.py | 9 +++-- astroquery/query.py | 7 ++-- 6 files changed, 47 insertions(+), 46 deletions(-) diff --git a/astroquery/__init__.py b/astroquery/__init__.py index 968b94be38..ee03665772 100644 --- a/astroquery/__init__.py +++ b/astroquery/__init__.py @@ -60,4 +60,3 @@ class Cache_Conf(_config.ConfigNamespace): cache_conf = Cache_Conf() - diff --git a/astroquery/mast/core.py b/astroquery/mast/core.py index 6d09c38489..9fe63909cf 100644 --- a/astroquery/mast/core.py +++ b/astroquery/mast/core.py @@ -25,9 +25,11 @@ def __init__(self, mast_token=None): super().__init__() + self.name = "Mast" + # Initializing API connections - self._portal_api_connection = PortalAPI(self._session) - self._service_api_connection = ServiceAPI(self._session) + self._portal_api_connection = PortalAPI(self._session, self.name) + self._service_api_connection = ServiceAPI(self._session, self.name) if mast_token: self._authenticated = self._auth_obj = MastAuth(self._session, mast_token) @@ -59,24 +61,20 @@ def _login(self, token=None, store_token=False, reenter_token=False): return self._auth_obj.login(token, store_token, reenter_token) + @property def cache_location(self): - cl = self._portal_api_connection.cache_location - cl_2 = self._service_api_connection.cache_location - - if cl != cl_2: - print("What should we do in this case? (currently setting it by force)") - self._service_api_connection.cache_location = cl - - return cl - + return super().cache_location + @cache_location.setter def cache_location(self, loc): + self._cache_location = Path(loc) self._portal_api_connection.cache_location = loc self._service_api_connection.cache_location = loc def reset_cache_location(self): """Resets the cache location to the default astropy cache""" + self._cache_location = None self._portal_api_connection.reset_cache_location() self._service_api_connection.reset_cache_location() diff --git a/astroquery/mast/discovery_portal.py b/astroquery/mast/discovery_portal.py index 10bd975d14..512bdb3080 100644 --- a/astroquery/mast/discovery_portal.py +++ b/astroquery/mast/discovery_portal.py @@ -18,8 +18,8 @@ from astropy.table import Table, vstack, MaskedColumn -from astroquery import conf as asq_conf -from ..query import BaseQuery, QueryWithLogin, AstroQuery, to_cache +from astroquery import cache_conf +from ..query import BaseQuery, AstroQuery, to_cache from ..utils import async_to_sync from ..utils.class_or_instance import class_or_instance from ..exceptions import InputWarning, NoResultsWarning, RemoteServiceError @@ -128,16 +128,17 @@ class PortalAPI(BaseQuery): _column_configs = dict() _current_service = None - def __init__(self, session=None): + def __init__(self, session=None, name=None): super().__init__() if session: self._session = session - self.name = "Mast" + if name: + self.name = name def _request(self, method, url, params=None, data=None, headers=None, - files=None, stream=False, auth=None, retrieve_all=True): + files=None, cache=None, stream=False, auth=None, retrieve_all=True): """ Override of the parent method: A generic HTTP request method, similar to `~requests.Session.request` @@ -164,6 +165,8 @@ def _request(self, method, url, params=None, data=None, headers=None, files : None or dict stream : bool See `~requests.request` + cache : bool + Optional, if specified, overrides global cache settings. retrieve_all : bool Default True. Retrieve all pages of data or just the one indicated in the params value. @@ -173,6 +176,18 @@ def _request(self, method, url, params=None, data=None, headers=None, The response from the server. """ + if cache is None: # Global caching not overridden + cache = cache_conf.cache_active + + + if cache: # cache active, look for cached file + cacher = self._get_cacher(method, url, data, headers, retrieve_all) + response = cacher.from_cache(self.cache_location, cache_conf.cache_timeout) + if response: + return response + + + # Either cache is not active or a cached file was not found, proceed with query start_time = time.time() all_responses = [] total_pages = 1 @@ -212,23 +227,11 @@ def _request(self, method, url, params=None, data=None, headers=None, data = data.replace("page%22%3A%20"+str(cur_page)+"%2C", "page%22%3A%20"+str(cur_page+1)+"%2C") - return all_responses + if cache: # cache is active, so cache response before returning + to_cache(all_responses, cacher.request_file(self.cache_location)) - def _request_w_cache(self, method, url, data=None, headers=None, retrieve_all=True, - cache=False, cache_opts=None): - # Note: the method only exposes 4 parameters of the underlying _request() function - # to play nice with existing mocks - # Caching: follow BaseQuery._request()'s pattern, which uses an AstroQuery object + return all_responses - if not cache: - response = self._request(method, url, data=data, headers=headers, retrieve_all=retrieve_all) - else: - cacher = self._get_cacher(method, url, data, headers, retrieve_all) - response = cacher.from_cache(self.cache_location, asq_conf.cache_timeout) - if not response: - response = self._request(method, url, data=data, headers=headers, retrieve_all=retrieve_all) - to_cache(response, cacher.request_file(self.cache_location)) - return response def _get_cacher(self, method, url, data, headers, retrieve_all): """ @@ -238,6 +241,7 @@ def _get_cacher(self, method, url, data, headers, retrieve_all): # cacheBreaker parameter (to underlying MAST service) is not relevant (and breaks) local caching # remove it from part of the cache key data_no_cache_breaker = re.sub(r'^(.+)cacheBreaker%22%3A%20%22.+%22', r'\1', data) + # include retrieve_all as part of the cache key by appending it to data # it cannot be added as part of req_kwargs dict, as it will be rejected by AstroQuery data_w_retrieve_all = data_no_cache_breaker + " retrieve_all={}".format(retrieve_all) @@ -247,7 +251,7 @@ def _get_cacher(self, method, url, data, headers, retrieve_all): ) return AstroQuery(method, url, **req_kwargs) - def _get_col_config(self, service, fetch_name=None, cache=False): + def _get_col_config(self, service, fetch_name=None): """ Gets the columnsConfig entry for given service and stores it in `self._column_configs`. @@ -283,7 +287,7 @@ def _get_col_config(self, service, fetch_name=None, cache=False): if more: mashup_request = {'service': all_name, 'params': {}, 'format': 'extjs'} req_string = _prepare_service_request_string(mashup_request) - response = self._request_w_cache("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers, cache=cache) + response = self._request("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers) json_response = response[0].json() self._column_configs[service].update(json_response['data']['Tables'][0] @@ -337,7 +341,7 @@ def _parse_result(self, responses, verbose=False): return all_results @class_or_instance - def service_request_async(self, service, params, pagesize=None, page=None, cache=False, cache_opts=None, **kwargs): + def service_request_async(self, service, params, pagesize=None, page=None, **kwargs): """ Given a Mashup service and parameters, builds and excecutes a Mashup query. See documentation `here `__ @@ -400,8 +404,8 @@ def service_request_async(self, service, params, pagesize=None, page=None, cache mashup_request[prop] = value req_string = _prepare_service_request_string(mashup_request) - response = self._request_w_cache("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers, - retrieve_all=retrieve_all, cache=cache, cache_opts=cache_opts) + response = self._request("POST", self.MAST_REQUEST_URL, data=req_string, headers=headers, + retrieve_all=retrieve_all) return response diff --git a/astroquery/mast/missions.py b/astroquery/mast/missions.py index caa264aca7..4dba038eec 100644 --- a/astroquery/mast/missions.py +++ b/astroquery/mast/missions.py @@ -40,7 +40,7 @@ def __init__(self, *, mission='hst', service='search'): self.service = service self.mission = mission self.limit = 5000 - + service_dict = {self.service: {'path': self.service, 'args': {}}} self._service_api_connection.set_service_params(service_dict, f"{self.service}/{self.mission}") diff --git a/astroquery/mast/services.py b/astroquery/mast/services.py index b82bc36cba..44ea048871 100644 --- a/astroquery/mast/services.py +++ b/astroquery/mast/services.py @@ -109,12 +109,15 @@ class ServiceAPI(BaseQuery): REQUEST_URL = conf.server + "/api/v0.1/" SERVICES = {} - def __init__(self, session=None): + def __init__(self, session=None, name=None): super().__init__() if session: self._session = session + if name: + self.name = name + self.TIMEOUT = conf.timeout def set_service_params(self, service_dict, service_name="", server_prefix=False): @@ -143,7 +146,7 @@ def set_service_params(self, service_dict, service_name="", server_prefix=False) self.SERVICES = service_dict def _request(self, method, url, params=None, data=None, headers=None, - files=None, stream=False, auth=None, cache=False, use_json=False): + files=None, stream=False, auth=None, cache=None, use_json=False): """ Override of the parent method: A generic HTTP request method, similar to `~requests.Session.request` @@ -168,7 +171,7 @@ def _request(self, method, url, params=None, data=None, headers=None, stream : bool See `~requests.request` cache : bool - Default False. Use of built in caching + Optional, if specified, overrides global cache settings. use_json: bool Default False. if True then data is already in json format. diff --git a/astroquery/query.py b/astroquery/query.py index 4e28bd8d94..772067e819 100644 --- a/astroquery/query.py +++ b/astroquery/query.py @@ -231,7 +231,7 @@ def _response_hook(self, response, *args, **kwargs): f"-----------------------------------------", '\t') log.log(5, f"HTTP response\n{response_log}") - def clear_cache(): + def clear_cache(): """Removes all cache files.""" cache_files = [x for x in os.listdir(self.cache_location) if x.endswidth("pickle")] @@ -241,10 +241,7 @@ def clear_cache(): def reset_cache_preferences(): """Resets cache preferences to default values""" - self.cache_location = os.path.join( - conf.default_cache_location, - self.__class__.__name__.split("Class")[0]) - + self.reset_cache_location() self.cache_active = conf.default_cache_active self.cache_timeout = conf.default_cache_timeout