Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Unreleased

* Respect `AzureBlobFileSystem.protocol` tuple when removing protocols from fully-qualified
paths provided to `AzureBlobFileSystem` methods.

* Added `AzureBlobFileSystem.rm_file()`

2025.8.0
--------
Expand Down
21 changes: 21 additions & 0 deletions adlfs/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,27 @@ async def _rm_files(

sync_wrapper(_rm_files)

async def _rm_file(self, path: str, **kwargs):
"""Delete a file.

Parameters
----------
path: str
File to delete.
"""
container_name, p, version_id = self.split_path(path)
try:
async with self.service_client.get_container_client(
container=container_name
) as cc:
await cc.delete_blob(p, version_id=version_id)
except ResourceNotFoundError as e:
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), path
) from e
self.invalidate_cache(path)
self.invalidate_cache(self._parent(path))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Poking more through the adlfs codebase, I'm thinking we will also need to invalidate the path as well. While it does not seem like some of the other methods handle this (and arguably something we should circle back to addressing eventually), we can run into inconsistencies where the path can still reside in the dircache. For example, if ls() is run on the file, it still shows up as present:

fs = get_fs()
upload(fs)
print(fs.ls(f"{CONTAINER_NAME}/small.bin"))
fs.rm_file(f"{CONTAINER_NAME}/small.bin")
print("Still cached:", fs.ls(f"{CONTAINER_NAME}/small.bin"))  # Should throw FileNotFoundError

We should make sure to add a test case for this as well.


async def _separate_directory_markers_for_non_empty_directories(
self, file_paths: typing.Iterable[str]
) -> typing.Tuple[typing.List[str], typing.List[str]]:
Expand Down
45 changes: 45 additions & 0 deletions adlfs/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2210,3 +2210,48 @@ def test_write_max_concurrency(storage, max_concurrency, blob_size, blocksize):
with fs.open(path, "rb") as f:
assert f.read() == data
fs.rm(container_name, recursive=True)


def test_rm_file(storage):
fs = AzureBlobFileSystem(
account_name=storage.account_name,
connection_string=CONN_STR,
)
path = "data/test_file.txt"
with fs.open(path, "wb") as f:
f.write(b"test content")

assert fs.exists(path)
fs.rm_file(path)
with pytest.raises(FileNotFoundError):
fs.ls(path)
assert not fs.exists(path)
assert path not in fs.dircache


def test_rm_file_versioned_blob(storage, mocker):
from azure.storage.blob.aio import ContainerClient

fs = AzureBlobFileSystem(
account_name=storage.account_name,
connection_string=CONN_STR,
version_aware=True,
)
mock_delete_blob = mocker.patch.object(
ContainerClient, "delete_blob", return_value=None
)
path = f"data/test_file.txt?versionid={DEFAULT_VERSION_ID}"
fs.rm_file(path)
mock_delete_blob.assert_called_once_with(
"test_file.txt", version_id=DEFAULT_VERSION_ID
)


def test_rm_file_does_not_exist(storage):
fs = AzureBlobFileSystem(
account_name=storage.account_name,
connection_string=CONN_STR,
)
path = "data/non_existent_file.txt"
with pytest.raises(FileNotFoundError):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's also add another separate test case where we try to call rm_file() on a blob that does not exist. Mainly we want to be asserting that we are throwing that new FileNotFoundError.

fs.rm_file(path)