seung-lab · fcollman · May 1, 2024
diff --git a/1.chunkedgraph.md b/1.chunkedgraph.md
@@ -48,7 +48,7 @@ terraform plan
 terraform apply
 ```
 
-This will output some variables useful for next steps:
+This will output some variables useful for next steps.
 
 ```
 kubernetes_cluster_context = "gcloud container clusters get-credentials chunkedgraph-ingest --zone us-east1-b --project neuromancer-seung-import"
@@ -59,8 +59,9 @@ region = "us-east1"
 zone = "us-east1-b"
 ```
 
+it will also pregenerate a yaml file to be the start of your values file, which will be placed in helm/config/{common_name}.yaml based on the common_name you configured in your terraform.tfvars file. 
+
 Use value of `kubernetes_cluster_context` to connect to your cluster.
-Use value of `redis_host` in `helm/pychunkedgraph/values.yaml` (more info in Helm section).
 
 You can also look these up again with `terraform show` from within the `terraform/` directory.
 
@@ -126,14 +127,15 @@ image:
 ## Helm ([docs](https://helm.sh/docs/))
 
 `helm` is used to run the ingest. The provided chart installs kubernetes resources such as configmaps, secrets, deployments needed to run the ingest. Refer to example `helm/pychunkedgraph/example_values.yaml` file for more information.
+Terraform will generate a values file for you that reflects the values you configured in your terraform.tfvars file in helm/config/{common_name}.yaml.
 
 IMPORTANT: If you have a large dataset to ingest, it is recommended to do this layer by layer. See [scaling](#scaling).
 
 > NOTE: Depending on your dataset, you will need to figure out the optimal limits for cpu and memory in your worker deployments. To do that adjust the `count` and `machine` variables in terraform.tfvars. It can vary with chunk size, size of supervoxels (atomic semgents in layer 1), number of edges per chunk and so on.
 
 ### Chart Installation
 
-When all variables are ready, rename your values file to `values.yaml` (ignored by git because it can contain sensitive information). If a different name is preferred (for different datasets/project), use the format `values*.[yml|yaml]` which will also be ignored by git. Then the file name will need to be explicitly passed to `helm install` with `-f <values_file.yml>`.
+When all variables are ready, rename your values file to `values.yaml` (ignored by git because it can contain sensitive information). If a different name is preferred (for different datasets/project), use the format `values*.[yml|yaml]` which will also be ignored by git. Then the file name will need to be explicitly passed to `helm install` with `-f <values_file.yml>`.  Note, if you rerun terraform without changing the common_name then your old values file will get overwritten by terraform.
 
 Then run:
 

diff --git a/terraform/helm_config.tf b/terraform/helm_config.tf
@@ -0,0 +1,8 @@
+resource "local_file" "helm_values" {
+  filename = "${path.module}/../helm/config/${var.common_name}.yaml"
+  content  = templatefile("${path.module}/helm_values.tpl", {
+    redis_host       = google_redis_instance.redis.host
+    google_project   = var.project_id
+    bigtable_instance = google_bigtable_instance.instance.id
+  })
+}
diff --git a/terraform/helm_values.tpl b/terraform/helm_values.tpl
@@ -0,0 +1,168 @@
+env:
+  - name: &commonEnvVars "pychunkedgraph"
+    vars:
+      REDIS_HOST: "${redis_host}"
+      REDIS_PORT: 6379
+      REDIS_PASSWORD: ""
+      BIGTABLE_PROJECT: &bt_project "${google_project}"
+      BIGTABLE_INSTANCE: &bt_instance "${bigtable_instance}"
+      GOOGLE_APPLICATION_CREDENTIALS: /root/.cloudvolume/secrets/google-secret.json
+      SHELL: /bin/bash
+      FLASK_APP: run_dev.py
+      APP_SETTINGS: pychunkedgraph.app.config.DeploymentWithRedisConfig
+
+configfiles:
+- name: &bashrc "bashrc"
+  files:
+    ".bashrc": |-
+      alias watch='watch '
+      alias ingest='flask ingest'
+      alias rqx='flask rq'
+
+configyamls:
+- name: &dataset test
+  files:
+  - name: test.yml
+    content:
+      data_source:
+        EDGES: "<path_to_edges>"
+        COMPONENTS: "<path_to_components>"
+        WATERSHED: "<path_to_segmentation>"
+
+      graph_config:
+        CHUNK_SIZE: [] # [X, Y, Z]
+        FANOUT: 2
+        SPATIAL_BITS: 2
+        LAYER_ID_BITS: 8
+
+      backend_client:
+        TYPE: "bigtable"
+        CONFIG:
+          ADMIN: true
+          READ_ONLY: false
+          PROJECT: *bt_project
+          INSTANCE: *bt_instance
+
+secrets:
+- name: &cloudVolumeSecrets cloud-volume-secrets
+  files:
+    # these are used by python bigtable client and cloud-files
+    # must have the following permissions:
+    # * read gcs objects if edges/component files are stored in google cloud buckets
+    #   if they're stored elsewhere use the secrets with appropriate permissions accordingly
+    # * bigtable - create and read tables
+    google-secret.json: |-
+      {
+        <contents_of_service_accout_secret>
+      }
+
+deployments:
+- enabled: true
+  name: &name master
+  nodeSelector:
+    cloud.google.com/gke-nodepool: master
+  hpa:
+    enabled: false
+  volumes: &commonVolumes
+  - name: *cloudVolumeSecrets
+    secret:
+      secretName: *cloudVolumeSecrets
+  - name: &datasetsVolume datasets-volume
+    configMap:
+      name: *dataset
+  - name: &bashrcVolume bashrc-volume
+    configMap:
+      name: *bashrc
+  containers:
+  - name: *name
+    image: &image
+      repository: <image_repo>
+      tag: "<image_tag>"
+    volumeMounts: &commonVolumeMounts
+    - name: *cloudVolumeSecrets
+      mountPath: /root/.cloudvolume/secrets
+      readOnly: true
+    - name: *datasetsVolume
+      mountPath: /app/datasets
+    - name: *bashrcVolume
+      mountPath: /root/
+    envFromConfigMap:
+    - *commonEnvVars
+    env:
+    - name: MY_POD_IP
+      valueFrom:
+        fieldRef:
+          fieldPath: status.podIP
+    resources:
+      requests:
+        memory: 500M
+
+workerDeployments:
+- enabled: true
+  name: &name l2
+  nodeSelector:
+    cloud.google.com/gke-nodepool: low
+  hpa:
+    enabled: false
+  volumes: *commonVolumes
+  containers:
+  - name: *name
+    command: [rq, worker, *name]
+    image: *image
+    volumeMounts: *commonVolumeMounts
+    envFromConfigMap:
+    - *commonEnvVars
+    resources:
+      requests:
+        memory: 1G
+
+- enabled: true
+  name: &name l3
+  nodeSelector:
+    cloud.google.com/gke-nodepool: low
+  hpa:
+    enabled: false
+  volumes: *commonVolumes
+  containers:
+  - name: *name
+    command: [rq, worker, *name]
+    image: *image
+    volumeMounts: *commonVolumeMounts
+    envFromConfigMap:
+    - *commonEnvVars
+    resources:
+      requests:
+        memory: 1.5G
+
+- enabled: true
+  name: &name l4
+  nodeSelector:
+    cloud.google.com/gke-nodepool: low
+  hpa:
+    enabled: false
+  volumes: *commonVolumes
+  containers:
+  - name: *name
+    command: [rq, worker, *name]
+    image: *image
+    volumeMounts: *commonVolumeMounts
+    envFromConfigMap:
+    - *commonEnvVars
+    resources:
+      requests:
+        memory: 2G
+
+trackerDeployments:
+  count: 4 # number of layers in the chunkedgraph
+  enabled: false
+  volumes: *commonVolumes
+  hpa:
+    enabled: false
+  containers:
+  - image: *image
+    volumeMounts: *commonVolumeMounts
+    env:
+    - name: *commonEnvVars
+    resources:
+      requests:
+        memory: 100M