Skip to content

Commit ee3a4fb

Browse files
authored
Merge pull request #414 from broadinstitute/requester_pays
Added option to get file stats for requester-pays files
2 parents 0b660da + 8cda7ce commit ee3a4fb

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

gnomad/utils/file_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,25 @@ def select_primitives_from_ht(ht: hl.Table) -> hl.Table:
154154
)
155155

156156

157-
def get_file_stats(url: str) -> Tuple[int, str, str]:
157+
def get_file_stats(url: str, project_id: Optional[str] = None) -> Tuple[int, str, str]:
158158
"""
159159
Get size (as both int and str) and md5 for file at specified URL.
160160
161161
Typically used to get stats on VCFs.
162162
163163
:param url: Path to file of interest.
164+
:param project_id: Google project ID. Specify if URL points to a requester-pays bucket.
164165
:return: Tuple of file size and md5.
165166
"""
166167
one_gibibyte = 2 ** 30
167168
one_mebibyte = 2 ** 20
168169

169-
output = subprocess.check_output(["gsutil", "stat", url]).decode("utf8")
170+
if project_id:
171+
output = subprocess.check_output(
172+
["gsutil", "-u", project_id, "stat", url]
173+
).decode("utf8")
174+
else:
175+
output = subprocess.check_output(["gsutil", "stat", url]).decode("utf8")
170176
lines = output.split("\n")
171177

172178
info = {}

0 commit comments

Comments
 (0)