Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions 1.chunkedgraph.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ terraform plan
terraform apply
```

This will output some variables useful for next steps:
This will output some variables useful for next steps.

```
kubernetes_cluster_context = "gcloud container clusters get-credentials chunkedgraph-ingest --zone us-east1-b --project neuromancer-seung-import"
Expand All @@ -59,8 +59,9 @@ region = "us-east1"
zone = "us-east1-b"
```

it will also pregenerate a yaml file to be the start of your values file, which will be placed in helm/config/{common_name}.yaml based on the common_name you configured in your terraform.tfvars file.

Use value of `kubernetes_cluster_context` to connect to your cluster.
Use value of `redis_host` in `helm/pychunkedgraph/values.yaml` (more info in Helm section).

You can also look these up again with `terraform show` from within the `terraform/` directory.

Expand Down Expand Up @@ -126,14 +127,15 @@ image:
## Helm ([docs](https://helm.sh/docs/))

`helm` is used to run the ingest. The provided chart installs kubernetes resources such as configmaps, secrets, deployments needed to run the ingest. Refer to example `helm/pychunkedgraph/example_values.yaml` file for more information.
Terraform will generate a values file for you that reflects the values you configured in your terraform.tfvars file in helm/config/{common_name}.yaml.

IMPORTANT: If you have a large dataset to ingest, it is recommended to do this layer by layer. See [scaling](#scaling).

> NOTE: Depending on your dataset, you will need to figure out the optimal limits for cpu and memory in your worker deployments. To do that adjust the `count` and `machine` variables in terraform.tfvars. It can vary with chunk size, size of supervoxels (atomic semgents in layer 1), number of edges per chunk and so on.

### Chart Installation

When all variables are ready, rename your values file to `values.yaml` (ignored by git because it can contain sensitive information). If a different name is preferred (for different datasets/project), use the format `values*.[yml|yaml]` which will also be ignored by git. Then the file name will need to be explicitly passed to `helm install` with `-f <values_file.yml>`.
When all variables are ready, rename your values file to `values.yaml` (ignored by git because it can contain sensitive information). If a different name is preferred (for different datasets/project), use the format `values*.[yml|yaml]` which will also be ignored by git. Then the file name will need to be explicitly passed to `helm install` with `-f <values_file.yml>`. Note, if you rerun terraform without changing the common_name then your old values file will get overwritten by terraform.

Then run:

Expand Down
8 changes: 8 additions & 0 deletions terraform/helm_config.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
resource "local_file" "helm_values" {
filename = "${path.module}/../helm/config/${var.common_name}.yaml"
content = templatefile("${path.module}/helm_values.tpl", {
redis_host = google_redis_instance.redis.host
google_project = var.project_id
bigtable_instance = google_bigtable_instance.instance.id
})
}
168 changes: 168 additions & 0 deletions terraform/helm_values.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
env:
- name: &commonEnvVars "pychunkedgraph"
vars:
REDIS_HOST: "${redis_host}"
REDIS_PORT: 6379
REDIS_PASSWORD: ""
BIGTABLE_PROJECT: &bt_project "${google_project}"
BIGTABLE_INSTANCE: &bt_instance "${bigtable_instance}"
GOOGLE_APPLICATION_CREDENTIALS: /root/.cloudvolume/secrets/google-secret.json
SHELL: /bin/bash
FLASK_APP: run_dev.py
APP_SETTINGS: pychunkedgraph.app.config.DeploymentWithRedisConfig

configfiles:
- name: &bashrc "bashrc"
files:
".bashrc": |-
alias watch='watch '
alias ingest='flask ingest'
alias rqx='flask rq'

configyamls:
- name: &dataset test
files:
- name: test.yml
content:
data_source:
EDGES: "<path_to_edges>"
COMPONENTS: "<path_to_components>"
WATERSHED: "<path_to_segmentation>"

graph_config:
CHUNK_SIZE: [] # [X, Y, Z]
FANOUT: 2
SPATIAL_BITS: 2
LAYER_ID_BITS: 8

backend_client:
TYPE: "bigtable"
CONFIG:
ADMIN: true
READ_ONLY: false
PROJECT: *bt_project
INSTANCE: *bt_instance

secrets:
- name: &cloudVolumeSecrets cloud-volume-secrets
files:
# these are used by python bigtable client and cloud-files
# must have the following permissions:
# * read gcs objects if edges/component files are stored in google cloud buckets
# if they're stored elsewhere use the secrets with appropriate permissions accordingly
# * bigtable - create and read tables
google-secret.json: |-
{
<contents_of_service_accout_secret>
}

deployments:
- enabled: true
name: &name master
nodeSelector:
cloud.google.com/gke-nodepool: master
hpa:
enabled: false
volumes: &commonVolumes
- name: *cloudVolumeSecrets
secret:
secretName: *cloudVolumeSecrets
- name: &datasetsVolume datasets-volume
configMap:
name: *dataset
- name: &bashrcVolume bashrc-volume
configMap:
name: *bashrc
containers:
- name: *name
image: &image
repository: <image_repo>
tag: "<image_tag>"
volumeMounts: &commonVolumeMounts
- name: *cloudVolumeSecrets
mountPath: /root/.cloudvolume/secrets
readOnly: true
- name: *datasetsVolume
mountPath: /app/datasets
- name: *bashrcVolume
mountPath: /root/
envFromConfigMap:
- *commonEnvVars
env:
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
resources:
requests:
memory: 500M

workerDeployments:
- enabled: true
name: &name l2
nodeSelector:
cloud.google.com/gke-nodepool: low
hpa:
enabled: false
volumes: *commonVolumes
containers:
- name: *name
command: [rq, worker, *name]
image: *image
volumeMounts: *commonVolumeMounts
envFromConfigMap:
- *commonEnvVars
resources:
requests:
memory: 1G

- enabled: true
name: &name l3
nodeSelector:
cloud.google.com/gke-nodepool: low
hpa:
enabled: false
volumes: *commonVolumes
containers:
- name: *name
command: [rq, worker, *name]
image: *image
volumeMounts: *commonVolumeMounts
envFromConfigMap:
- *commonEnvVars
resources:
requests:
memory: 1.5G

- enabled: true
name: &name l4
nodeSelector:
cloud.google.com/gke-nodepool: low
hpa:
enabled: false
volumes: *commonVolumes
containers:
- name: *name
command: [rq, worker, *name]
image: *image
volumeMounts: *commonVolumeMounts
envFromConfigMap:
- *commonEnvVars
resources:
requests:
memory: 2G

trackerDeployments:
count: 4 # number of layers in the chunkedgraph
enabled: false
volumes: *commonVolumes
hpa:
enabled: false
containers:
- image: *image
volumeMounts: *commonVolumeMounts
env:
- name: *commonEnvVars
resources:
requests:
memory: 100M