Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 76 additions & 41 deletions gnomad/resources/grch38/gnomad.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,20 @@
DATA_TYPES = ["genomes"]

GENOME_POPS = ["AFR", "AMI", "AMR", "ASJ", "EAS", "FIN", "NFE", "SAS", "OTH"]
SUBSETS = [
"non_v2",
"non_topmed",
"non_cancer",
"controls_and_biobanks",
"non_neuro",
"tgp",
"hgdp",
]
SUBSETS = {
"v3": [
"non_v2",
"non_topmed",
"non_cancer",
"controls_and_biobanks",
"non_neuro",
"tgp",
"hgdp",
],
"v4": ["ukb", "non_ukb", "non_topmed"],
}
"""
Order to sort subgroupings during VCF export.
Order to sort subgroupings during VCF export by version.

Ensures that INFO labels in VCF are in desired order (e.g., tgp_raw_AC_esn_XX).
"""
Expand All @@ -48,9 +51,21 @@
Used to stratify frequency annotations (AC, AN, AF) for each sex.
"""

POPS = ["afr", "ami", "amr", "asj", "eas", "fin", "nfe", "oth", "sas", "mid"]
POPS = {
"v3": ["afr", "ami", "amr", "asj", "eas", "fin", "nfe", "oth", "sas", "mid"],
"v4": [
"afr",
"amr",
"asj",
"eas",
"fin",
"mid",
"remaining",
"sas",
],
}
"""
Global populations in gnomAD v3.
Global ancestry groups in gnomAD by version.
"""

COHORTS_WITH_POP_STORED_AS_SUBPOP = ["tgp", "hgdp"]
Expand Down Expand Up @@ -186,36 +201,56 @@
Populations that are removed before popmax calculations.
"""

DOWNSAMPLINGS = [
10,
20,
50,
100,
200,
500,
1000,
2000,
5000,
10000,
15000,
20000,
25000,
30000,
40000,
50000,
60000,
70000,
75000,
80000,
85000,
90000,
95000,
100000,
110000,
120000,
]
DOWNSAMPLINGS = {
"v3": [
10,
20,
50,
100,
200,
500,
1000,
2000,
5000,
10000,
15000,
20000,
25000,
30000,
40000,
50000,
60000,
70000,
75000,
80000,
85000,
90000,
95000,
100000,
110000,
120000,
],
"v4": [
10,
100,
1000,
2000,
5000,
10000,
15000,
20000,
25000,
50000,
100000,
150000,
200000,
300000,
400000,
500000,
],
}
"""
List of the downsampling numbers to use for frequency calculations.
List of the downsampling numbers to use for frequency calculations by version.
"""

gnomad_syndip = VersionedMatrixTableResource(
Expand Down