Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions gnomad/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,25 @@ def select_primitives_from_ht(ht: hl.Table) -> hl.Table:
)


def get_file_stats(url: str) -> Tuple[int, str, str]:
def get_file_stats(url: str, project_id: Optional[str] = None) -> Tuple[int, str, str]:
"""
Get size (as both int and str) and md5 for file at specified URL.

Typically used to get stats on VCFs.

:param url: Path to file of interest.
:param project_id: Google project ID. Specify if URL points to a requester-pays bucket.
:return: Tuple of file size and md5.
"""
one_gibibyte = 2 ** 30
one_mebibyte = 2 ** 20

output = subprocess.check_output(["gsutil", "stat", url]).decode("utf8")
if project_id:
output = subprocess.check_output(
["gsutil", "-u", project_id, "stat", url]
).decode("utf8")
else:
output = subprocess.check_output(["gsutil", "stat", url]).decode("utf8")
lines = output.split("\n")

info = {}
Expand Down