33import contextlib
44import io
55import threading
6+ import uuid
67import warnings
7- from typing import Any
8+ from typing import Any , Hashable
89
910from ..core import utils
1011from ..core .options import OPTIONS
1112from .locks import acquire
1213from .lru_cache import LRUCache
1314
1415# Global cache for storing open files.
15- FILE_CACHE : LRUCache [str , io .IOBase ] = LRUCache (
16+ FILE_CACHE : LRUCache [Any , io .IOBase ] = LRUCache (
1617 maxsize = OPTIONS ["file_cache_maxsize" ], on_evict = lambda k , v : v .close ()
1718)
1819assert FILE_CACHE .maxsize , "file cache must be at least size one"
1920
20-
2121REF_COUNTS : dict [Any , int ] = {}
2222
2323_DEFAULT_MODE = utils .ReprObject ("<unused>" )
@@ -85,12 +85,13 @@ def __init__(
8585 kwargs = None ,
8686 lock = None ,
8787 cache = None ,
88+ manager_id : Hashable | None = None ,
8889 ref_counts = None ,
8990 ):
90- """Initialize a FileManager .
91+ """Initialize a CachingFileManager .
9192
92- The cache and ref_counts arguments exist solely to facilitate
93- dependency injection, and should only be set for tests.
93+ The cache, manager_id and ref_counts arguments exist solely to
94+ facilitate dependency injection, and should only be set for tests.
9495
9596 Parameters
9697 ----------
@@ -120,6 +121,8 @@ def __init__(
120121 global variable and contains non-picklable file objects, an
121122 unpickled FileManager objects will be restored with the default
122123 cache.
124+ manager_id : hashable, optional
125+ Identifier for this CachingFileManager.
123126 ref_counts : dict, optional
124127 Optional dict to use for keeping track the number of references to
125128 the same file.
@@ -129,13 +132,17 @@ def __init__(
129132 self ._mode = mode
130133 self ._kwargs = {} if kwargs is None else dict (kwargs )
131134
132- self ._default_lock = lock is None or lock is False
133- self ._lock = threading .Lock () if self ._default_lock else lock
135+ self ._use_default_lock = lock is None or lock is False
136+ self ._lock = threading .Lock () if self ._use_default_lock else lock
134137
135138 # cache[self._key] stores the file associated with this object.
136139 if cache is None :
137140 cache = FILE_CACHE
138141 self ._cache = cache
142+ if manager_id is None :
143+ # Each call to CachingFileManager should separately open files.
144+ manager_id = str (uuid .uuid4 ())
145+ self ._manager_id = manager_id
139146 self ._key = self ._make_key ()
140147
141148 # ref_counts[self._key] stores the number of CachingFileManager objects
@@ -153,6 +160,7 @@ def _make_key(self):
153160 self ._args ,
154161 "a" if self ._mode == "w" else self ._mode ,
155162 tuple (sorted (self ._kwargs .items ())),
163+ self ._manager_id ,
156164 )
157165 return _HashedSequence (value )
158166
@@ -223,20 +231,14 @@ def close(self, needs_lock=True):
223231 if file is not None :
224232 file .close ()
225233
226- def __del__ (self ):
227- # If we're the only CachingFileManger referencing a unclosed file, we
228- # should remove it from the cache upon garbage collection.
234+ def __del__ (self ) -> None :
235+ # If we're the only CachingFileManger referencing a unclosed file,
236+ # remove it from the cache upon garbage collection.
229237 #
230- # Keeping our own count of file references might seem like overkill,
231- # but it's actually pretty common to reopen files with the same
232- # variable name in a notebook or command line environment, e.g., to
233- # fix the parameters used when opening a file:
234- # >>> ds = xarray.open_dataset('myfile.nc')
235- # >>> ds = xarray.open_dataset('myfile.nc', decode_times=False)
236- # This second assignment to "ds" drops CPython's ref-count on the first
237- # "ds" argument to zero, which can trigger garbage collections. So if
238- # we didn't check whether another object is referencing 'myfile.nc',
239- # the newly opened file would actually be immediately closed!
238+ # We keep track of our own reference count because we don't want to
239+ # close files if another identical file manager needs it. This can
240+ # happen if a CachingFileManager is pickled and unpickled without
241+ # closing the original file.
240242 ref_count = self ._ref_counter .decrement (self ._key )
241243
242244 if not ref_count and self ._key in self ._cache :
@@ -249,30 +251,40 @@ def __del__(self):
249251
250252 if OPTIONS ["warn_for_unclosed_files" ]:
251253 warnings .warn (
252- "deallocating {}, but file is not already closed. "
253- "This may indicate a bug." . format ( self ) ,
254+ f "deallocating { self } , but file is not already closed. "
255+ "This may indicate a bug." ,
254256 RuntimeWarning ,
255257 stacklevel = 2 ,
256258 )
257259
258260 def __getstate__ (self ):
259261 """State for pickling."""
260- # cache and ref_counts are intentionally omitted: we don't want to try
261- # to serialize these global objects.
262- lock = None if self ._default_lock else self ._lock
263- return (self ._opener , self ._args , self ._mode , self ._kwargs , lock )
262+ # cache is intentionally omitted: we don't want to try to serialize
263+ # these global objects.
264+ lock = None if self ._use_default_lock else self ._lock
265+ return (
266+ self ._opener ,
267+ self ._args ,
268+ self ._mode ,
269+ self ._kwargs ,
270+ lock ,
271+ self ._manager_id ,
272+ )
264273
265- def __setstate__ (self , state ):
274+ def __setstate__ (self , state ) -> None :
266275 """Restore from a pickle."""
267- opener , args , mode , kwargs , lock = state
268- self .__init__ (opener , * args , mode = mode , kwargs = kwargs , lock = lock )
276+ opener , args , mode , kwargs , lock , manager_id = state
277+ self .__init__ ( # type: ignore
278+ opener , * args , mode = mode , kwargs = kwargs , lock = lock , manager_id = manager_id
279+ )
269280
270- def __repr__ (self ):
281+ def __repr__ (self ) -> str :
271282 args_string = ", " .join (map (repr , self ._args ))
272283 if self ._mode is not _DEFAULT_MODE :
273284 args_string += f", mode={ self ._mode !r} "
274- return "{}({!r}, {}, kwargs={})" .format (
275- type (self ).__name__ , self ._opener , args_string , self ._kwargs
285+ return (
286+ f"{ type (self ).__name__ } ({ self ._opener !r} , { args_string } , "
287+ f"kwargs={ self ._kwargs } , manager_id={ self ._manager_id !r} )"
276288 )
277289
278290
0 commit comments