From b31e1ee94fc37bec5b5e2db0320f50d39e712e35 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 18 Nov 2025 07:23:07 -0500 Subject: [PATCH 1/4] feat: iterate on ami, packer, ansible, ssh to investigate --- .gitignore | 2 + ansible/tasks/setup-system.yml | 2 +- docs/ami-local-development.md | 272 ++++++++++++ nix/apps.nix | 1 + nix/devShells.nix | 11 + nix/fmt.nix | 4 + nix/packages/default.nix | 7 +- nix/packages/pg-ami-builder.nix | 53 +++ .../pg-ami-builder/cmd/ansible_rerun.go | 235 ++++++++++ .../pg-ami-builder/cmd/ansible_rerun_test.go | 30 ++ nix/packages/pg-ami-builder/cmd/build.go | 402 ++++++++++++++++++ nix/packages/pg-ami-builder/cmd/build_test.go | 60 +++ nix/packages/pg-ami-builder/cmd/cleanup.go | 144 +++++++ .../pg-ami-builder/cmd/cleanup_test.go | 15 + nix/packages/pg-ami-builder/cmd/create_ami.go | 122 ++++++ nix/packages/pg-ami-builder/cmd/root.go | 29 ++ nix/packages/pg-ami-builder/cmd/ssh.go | 152 +++++++ nix/packages/pg-ami-builder/cmd/ssh_test.go | 15 + nix/packages/pg-ami-builder/cmd/version.go | 21 + nix/packages/pg-ami-builder/go.mod | 28 ++ nix/packages/pg-ami-builder/go.sum | 43 ++ .../pg-ami-builder/internal/ansible/runner.go | 58 +++ .../internal/ansible/runner_test.go | 90 ++++ .../pg-ami-builder/internal/aws/ec2.go | 352 +++++++++++++++ .../pg-ami-builder/internal/aws/ec2_test.go | 46 ++ .../pg-ami-builder/internal/aws/iam.go | 96 +++++ .../pg-ami-builder/internal/aws/ssm.go | 178 ++++++++ .../pg-ami-builder/internal/aws/ssm_test.go | 93 ++++ .../pg-ami-builder/internal/git/repo.go | 37 ++ .../pg-ami-builder/internal/git/repo_test.go | 27 ++ .../pg-ami-builder/internal/packer/runner.go | 84 ++++ .../internal/packer/runner_test.go | 75 ++++ .../pg-ami-builder/internal/state/manager.go | 125 ++++++ .../internal/state/manager_test.go | 78 ++++ nix/packages/pg-ami-builder/main.go | 12 + stage2-nix-psql.pkr.hcl | 26 +- 36 files changed, 3015 insertions(+), 10 deletions(-) create mode 100644 docs/ami-local-development.md create mode 100644 nix/packages/pg-ami-builder.nix create mode 100644 nix/packages/pg-ami-builder/cmd/ansible_rerun.go create mode 100644 nix/packages/pg-ami-builder/cmd/ansible_rerun_test.go create mode 100644 nix/packages/pg-ami-builder/cmd/build.go create mode 100644 nix/packages/pg-ami-builder/cmd/build_test.go create mode 100644 nix/packages/pg-ami-builder/cmd/cleanup.go create mode 100644 nix/packages/pg-ami-builder/cmd/cleanup_test.go create mode 100644 nix/packages/pg-ami-builder/cmd/create_ami.go create mode 100644 nix/packages/pg-ami-builder/cmd/root.go create mode 100644 nix/packages/pg-ami-builder/cmd/ssh.go create mode 100644 nix/packages/pg-ami-builder/cmd/ssh_test.go create mode 100644 nix/packages/pg-ami-builder/cmd/version.go create mode 100644 nix/packages/pg-ami-builder/go.mod create mode 100644 nix/packages/pg-ami-builder/go.sum create mode 100644 nix/packages/pg-ami-builder/internal/ansible/runner.go create mode 100644 nix/packages/pg-ami-builder/internal/ansible/runner_test.go create mode 100644 nix/packages/pg-ami-builder/internal/aws/ec2.go create mode 100644 nix/packages/pg-ami-builder/internal/aws/ec2_test.go create mode 100644 nix/packages/pg-ami-builder/internal/aws/iam.go create mode 100644 nix/packages/pg-ami-builder/internal/aws/ssm.go create mode 100644 nix/packages/pg-ami-builder/internal/aws/ssm_test.go create mode 100644 nix/packages/pg-ami-builder/internal/git/repo.go create mode 100644 nix/packages/pg-ami-builder/internal/git/repo_test.go create mode 100644 nix/packages/pg-ami-builder/internal/packer/runner.go create mode 100644 nix/packages/pg-ami-builder/internal/packer/runner_test.go create mode 100644 nix/packages/pg-ami-builder/internal/state/manager.go create mode 100644 nix/packages/pg-ami-builder/internal/state/manager_test.go create mode 100644 nix/packages/pg-ami-builder/main.go diff --git a/.gitignore b/.gitignore index f5b1a40fc..97d9b119d 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ common-nix.vars.pkr.hcl # pre-commit config is managed in nix .pre-commit-config.yaml nixos.qcow2 +nix/packages/pg-ami-builder/pg-ami-builder +nix/packages/pg-ami-builder/vendor/ diff --git a/ansible/tasks/setup-system.yml b/ansible/tasks/setup-system.yml index afd5a64fc..4218d18a2 100644 --- a/ansible/tasks/setup-system.yml +++ b/ansible/tasks/setup-system.yml @@ -12,7 +12,7 @@ ansible.builtin.apt: cache_valid_time: 3600 pkg: - - acl + - acll - bwm-ng - fail2ban - htop diff --git a/docs/ami-local-development.md b/docs/ami-local-development.md new file mode 100644 index 000000000..6cde482b2 --- /dev/null +++ b/docs/ami-local-development.md @@ -0,0 +1,272 @@ +# Local AMI Development with pg-ami-builder + +This guide explains how to use `pg-ami-builder` for local AMI development and iteration. + +## Prerequisites + +### Required Tools + +- AWS CLI v2 +- aws-vault (for credential management) +- SSM Session Manager plugin +- Git +- Nix + +### Installing SSM Session Manager Plugin + +**macOS:** +```bash +brew install --cask session-manager-plugin +``` + +**Linux:** +See [AWS documentation](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html) + +### AWS Permissions + +Your AWS user/role needs these permissions: +- EC2: RunInstances, TerminateInstances, DescribeInstances, CreateTags +- EC2: CreateSecurityGroup, DeleteSecurityGroup, AuthorizeSecurityGroupIngress +- SSM: StartSession, DescribeSessions +- EC2: CreateImage, DescribeImages (if using --create-ami) + +## Quick Start + +### Building Phase 1 + +```bash +# Run phase 1 build (launches instance and runs packer build) +aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 + +# If packer build fails, instance stays alive for debugging +# SSH to investigate +aws-vault exec dev -- nix run .#pg-ami-builder -- ssh + +# Make local changes and re-run with file sync +vim ansible/playbook.yml +aws-vault exec dev -- nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files + +# Cleanup when done +aws-vault exec dev -- nix run .#pg-ami-builder -- cleanup +``` + +### Building Phase 2 + +```bash +# Run phase 2 with existing stage-1 AMI +aws-vault exec dev -- nix run .#pg-ami-builder -- build phase2 \ + --source-ami ami-stage1-xyz \ + --postgres-version 15 +``` + +## Commands + +### build phase1 + +Launch EC2 instance and run phase 1 ansible playbook. + +```bash +nix run .#pg-ami-builder -- build phase1 --postgres-version 15 [flags] +``` + +**Flags:** +- `--postgres-version` (required) - PostgreSQL major version (15, 16, 17) +- `--region` - AWS region (default: us-east-1) +- `--create-ami` - Create AMI on success (default: false) +- `--ansible-args` - Additional ansible arguments +- `--instance-type` - EC2 instance type (default: c6g.4xlarge) +- `--state-file` - Custom state file path + +### build phase2 + +Launch EC2 instance from stage-1 AMI and run phase 2 ansible playbook. + +```bash +nix run .#pg-ami-builder -- build phase2 --source-ami ami-xyz --postgres-version 15 [flags] +``` + +**Flags:** +- `--source-ami` (required) - Stage-1 AMI ID +- `--postgres-version` (required) - PostgreSQL major version +- `--git-sha` - Git SHA for nix packages (default: current HEAD) +- Plus all flags from phase1 + +### ansible-rerun + +Re-run ansible playbook on existing instance. Optionally sync local file changes first. + +```bash +nix run .#pg-ami-builder -- ansible-rerun phase1 [flags] +``` + +**Flags:** +- `--instance-id` - Target specific instance (default: from state file) +- `--sync-files` - Sync local ansible/, scripts/, and migrations/ files before running (default: false) +- `--ansible-args` - Additional ansible arguments +- `--skip-tags` - Ansible tags to skip +- `--region` - AWS region (default: us-east-1) + +**Examples:** + +```bash +# Re-run without syncing files (use existing files on instance) +nix run .#pg-ami-builder -- ansible-rerun phase1 + +# Re-run with local file changes +nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files + +# Re-run with skip tags +nix run .#pg-ami-builder -- ansible-rerun phase1 --skip-tags migrations +``` + +### ssh + +Connect to instance via AWS SSM Session Manager (default) or EC2 Instance Connect. + +```bash +nix run .#pg-ami-builder -- ssh [flags] +``` + +**Flags:** +- `--instance-id` - Target specific instance for SSM (default: from state file) +- `--region` - AWS region for SSM (default: us-east-1) +- `--aws-ec2-connect-cmd` - Full AWS EC2 Instance Connect command string + +**Examples:** + +```bash +# Connect via SSM (default) +nix run .#pg-ami-builder -- ssh + +# Connect via EC2 Instance Connect +nix run .#pg-ami-builder -- ssh \ + --aws-ec2-connect-cmd "aws ec2-instance-connect ssh --instance-id i-024bba2db43e4b41f --region us-east-1" +``` + +### cleanup + +Terminate instance and remove associated resources. + +```bash +nix run .#pg-ami-builder -- cleanup [flags] +``` + +**Flags:** +- `--instance-id` - Target specific instance (default: from state file) +- `--force` - Skip confirmation prompt + +## Workflows + +### Workflow 1: Develop and test phase 1 changes + +```bash +# Run phase 1 build (launches instance and runs packer build) +aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 + +# If packer fails, instance stays up for debugging +# SSH to investigate +aws-vault exec dev -- nix run .#pg-ami-builder -- ssh + +# Make local changes to ansible files +vim ansible/playbook.yml + +# Re-run with your local changes +aws-vault exec dev -- nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files + +# Repeat until working, then create AMI +aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 --create-ami + +# Cleanup +aws-vault exec dev -- nix run .#pg-ami-builder -- cleanup +``` + +### Workflow 2: Parallel builds for multiple postgres versions + +```bash +# Build PG 15 +aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 \ + --postgres-version 15 \ + --state-file ~/.pg-ami-build/pg15.json + +# Build PG 16 in parallel +aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 \ + --postgres-version 16 \ + --state-file ~/.pg-ami-build/pg16.json + +# SSH into PG 15 instance +aws-vault exec dev -- nix run .#pg-ami-builder -- ssh \ + --state-file ~/.pg-ami-build/pg15.json +``` + +## Troubleshooting + +### SSM Connection Fails + +1. Check SSM agent status on the instance +2. Verify instance profile has SSM permissions +3. Ensure session-manager-plugin is installed + +### Ansible Fails + +The instance is kept running on failure. Check logs: + +```bash +# SSH into instance +nix run .#pg-ami-builder -- ssh + +# Check ansible logs +sudo journalctl -u ansible-provisioner +``` + +### State File Issues + +If state file references non-existent instance: + +```bash +# Override with specific instance +nix run .#pg-ami-builder -- ssh --instance-id i-xxxxx + +# Or clear state and start fresh +rm ~/.pg-ami-build/state.json +``` + +## Advanced Usage + +### Custom State Files for Parallel Builds + +Use `--state-file` to manage multiple builds: + +```bash +nix run .#pg-ami-builder -- build phase1 \ + --postgres-version 15 \ + --state-file ~/.pg-ami-build/custom.json +``` + +### Additional Ansible Arguments + +Pass custom arguments to ansible: + +```bash +nix run .#pg-ami-builder -- build phase1 \ + --postgres-version 15 \ + --ansible-args="--skip-tags=migrations" +``` + +## State File + +Location: `~/.pg-ami-build/state.json` + +The state file tracks the current build instance, allowing subsequent commands to operate on the same instance without specifying `--instance-id`. + +Example state: +```json +{ + "instance_id": "i-1234567890abcdef0", + "phase": "phase1", + "execution_id": "1731672000-15", + "region": "us-east-1", + "postgres_version": "15", + "timestamp": "2025-11-15T10:30:00Z", + "git_sha": "abc123def456" +} +``` diff --git a/nix/apps.nix b/nix/apps.nix index 75eaae49e..561e770e4 100644 --- a/nix/apps.nix +++ b/nix/apps.nix @@ -24,6 +24,7 @@ update-readme = mkApp "update-readme" "update-readme"; show-commands = mkApp "show-commands" "show-commands"; build-test-ami = mkApp "build-test-ami" "build-test-ami"; + pg-ami-builder = mkApp "pg-ami-builder" "pg-ami-builder"; run-testinfra = mkApp "run-testinfra" "run-testinfra"; cleanup-ami = mkApp "cleanup-ami" "cleanup-ami"; trigger-nix-build = mkApp "trigger-nix-build" "trigger-nix-build"; diff --git a/nix/devShells.nix b/nix/devShells.nix index 03768a770..b39b4b772 100644 --- a/nix/devShells.nix +++ b/nix/devShells.nix @@ -47,6 +47,16 @@ ansible-lint self'.packages.packer + # Go development tools + go + gopls + gotools + go-tools + delve + + # AWS tools + awscli2 + self'.packages.start-server self'.packages.start-client self'.packages.start-replica @@ -55,6 +65,7 @@ self'.packages.build-test-ami self'.packages.run-testinfra self'.packages.cleanup-ami + self'.packages.pg-ami-builder dbmate nushell pythonEnv diff --git a/nix/fmt.nix b/nix/fmt.nix index 562c3b3c5..c17c26edd 100644 --- a/nix/fmt.nix +++ b/nix/fmt.nix @@ -11,6 +11,10 @@ package = pkgs.nixfmt-rfc-style; }; ruff-format.enable = true; + gofumpt = { + enable = true; + package = pkgs.gofumpt; + }; }; }; } diff --git a/nix/packages/default.nix b/nix/packages/default.nix index c8eb02ef0..97a94d96a 100644 --- a/nix/packages/default.nix +++ b/nix/packages/default.nix @@ -28,8 +28,12 @@ in { packages = ( - { + rec { build-test-ami = pkgs.callPackage ./build-test-ami.nix { }; + packer = pkgs.callPackage ./packer.nix { inherit inputs; }; + pg-ami-builder = inputs'.nixpkgs-go124.legacyPackages.callPackage ./pg-ami-builder.nix { + inherit packer; + }; cleanup-ami = pkgs.callPackage ./cleanup-ami.nix { }; dbmate-tool = pkgs.callPackage ./dbmate-tool.nix { inherit (self.supabase) defaults; }; docs = pkgs.callPackage ./docs.nix { }; @@ -39,7 +43,6 @@ mecab-naist-jdic = pkgs.callPackage ./mecab-naist-jdic.nix { }; migrate-tool = pkgs.callPackage ./migrate-tool.nix { psql_15 = self'.packages."psql_15/bin"; }; overlayfs-on-package = pkgs.callPackage ./overlayfs-on-package.nix { }; - packer = pkgs.callPackage ./packer.nix { inherit inputs; }; pg-backrest = inputs.nixpkgs-pgbackrest.legacyPackages.${pkgs.system}.pgbackrest; pg-restore = pkgs.callPackage ./pg-restore.nix { psql_15 = self'.packages."psql_15/bin"; }; pg_prove = pkgs.perlPackages.TAPParserSourceHandlerpgTAP; diff --git a/nix/packages/pg-ami-builder.nix b/nix/packages/pg-ami-builder.nix new file mode 100644 index 000000000..a4a3c33fc --- /dev/null +++ b/nix/packages/pg-ami-builder.nix @@ -0,0 +1,53 @@ +{ + pkgs, + lib, + buildGoModule, + makeWrapper, + packer, +}: + +buildGoModule { + pname = "pg-ami-builder"; + version = "0.1.0"; + + src = ./pg-ami-builder; + + vendorHash = "sha256-6zdLFpc9TvX5OqoFuL4d39MfO6Bk0GGuXdJwrsRkYwc="; + + nativeBuildInputs = with pkgs; [ + makeWrapper + awscli2 + git + ]; + + # Disable CGO for static binary + env.CGO_ENABLED = 0; + + # Build flags for reduced binary size and version info + ldflags = [ + "-s" + "-w" + "-X main.Version=0.1.0" + ]; + + # Run tests during build + checkPhase = '' + runHook preCheck + go test ./... -short + runHook postCheck + ''; + + # Wrap binary with runtime dependencies + postInstall = '' + wrapProgram $out/bin/pg-ami-builder \ + --prefix PATH : ${lib.makeBinPath [ packer ]} + ''; + + meta = with lib; { + description = "Local AMI development tool for Supabase Postgres"; + homepage = "https://github.com/supabase/postgres"; + license = licenses.asl20; + maintainers = [ ]; + mainProgram = "pg-ami-builder"; + }; +} diff --git a/nix/packages/pg-ami-builder/cmd/ansible_rerun.go b/nix/packages/pg-ami-builder/cmd/ansible_rerun.go new file mode 100644 index 000000000..fca9c0f6a --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/ansible_rerun.go @@ -0,0 +1,235 @@ +package cmd + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/spf13/cobra" + "github.com/supabase/postgres/pg-ami-builder/internal/ansible" + "github.com/supabase/postgres/pg-ami-builder/internal/git" + "github.com/supabase/postgres/pg-ami-builder/internal/state" +) + +var ( + rerunInstanceID string + skipTags []string + syncFiles bool + sshPublicKey string + rerunGitSHA string +) + +var ansibleRerunCmd = &cobra.Command{ + Use: "ansible-rerun [phase1|phase2]", + Short: "Re-run ansible playbook from current branch on existing instance", + Long: `Re-run the ansible playbook from your current branch on an existing instance. + +This is useful for iterating on ansible changes without relaunching the instance.`, + Args: cobra.ExactArgs(1), + ValidArgs: []string{"phase1", "phase2"}, + RunE: runAnsibleRerun, +} + +func runAnsibleRerun(cmd *cobra.Command, args []string) error { + phase := args[0] + if phase != "phase1" && phase != "phase2" { + return fmt.Errorf("phase must be 'phase1' or 'phase2'") + } + + ctx := context.Background() + + // Get instance ID from state or flag + instanceID := rerunInstanceID + if instanceID == "" { + stateFilePath := stateFile + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get default state file: %w", err) + } + } + + buildState, err := state.LoadState(stateFilePath) + if err != nil { + return fmt.Errorf("failed to load state: %w", err) + } + + // Get phase-specific state + phaseState := buildState.GetPhaseState(phase) + if phaseState == nil { + return fmt.Errorf("no state found for %s. Run build first", phase) + } + + instanceID = phaseState.InstanceID + region = buildState.Region + postgresVersion = buildState.PostgresVersion + if rerunGitSHA == "" { + rerunGitSHA = buildState.GitSHA + } + } + + if instanceID == "" { + return fmt.Errorf("no instance ID available (use --instance-id or run build command first)") + } + + // Get git SHA if still not set + if rerunGitSHA == "" { + var err error + rerunGitSHA, err = git.GetCurrentSHA() + if err != nil { + fmt.Printf("⚠ Could not get git SHA: %v, using 'unknown'\n", err) + rerunGitSHA = "unknown" + } + } + + fmt.Printf("✓ Found instance: %s (region: %s)\n", instanceID, region) + fmt.Printf("✓ Git SHA: %s\n", rerunGitSHA) + + // Get instance availability zone and public IP for SSH + describeCmd := exec.CommandContext(ctx, "aws", "ec2", "describe-instances", + "--instance-ids", instanceID, + "--region", region, + "--query", "Reservations[0].Instances[0].[Placement.AvailabilityZone,PublicIpAddress]", + "--output", "text") + + output, err := describeCmd.Output() + if err != nil { + return fmt.Errorf("failed to describe instance: %w", err) + } + + parts := strings.Fields(string(output)) + if len(parts) < 2 { + return fmt.Errorf("failed to get instance details") + } + + availabilityZone := parts[0] + publicIP := parts[1] + + fmt.Printf("✓ Instance AZ: %s, IP: %s\n", availabilityZone, publicIP) + + // Find SSH public key if not specified + if sshPublicKey == "" { + homeDir := os.Getenv("HOME") + // Try common key types in order + keyPaths := []string{ + filepath.Join(homeDir, ".ssh", "id_ed25519.pub"), + filepath.Join(homeDir, ".ssh", "id_rsa.pub"), + filepath.Join(homeDir, ".ssh", "id_ecdsa.pub"), + } + + for _, keyPath := range keyPaths { + if _, err := os.Stat(keyPath); err == nil { + sshPublicKey = keyPath + break + } + } + + if sshPublicKey == "" { + return fmt.Errorf("no SSH public key found in ~/.ssh/ (tried id_ed25519.pub, id_rsa.pub, id_ecdsa.pub). Use --ssh-public-key to specify one") + } + } + + fmt.Printf("✓ Using SSH key: %s\n", sshPublicKey) + + // Send SSH public key to instance + fmt.Printf("✓ Authorizing SSH key...\n") + + sendKeyCmd := exec.CommandContext(ctx, "aws", "ec2-instance-connect", "send-ssh-public-key", + "--instance-id", instanceID, + "--region", region, + "--availability-zone", availabilityZone, + "--instance-os-user", "ubuntu", + "--ssh-public-key", "file://"+sshPublicKey) + + sendKeyCmd.Stderr = os.Stderr + sendKeyCmd.Stdout = os.Stdout + + if err := sendKeyCmd.Run(); err != nil { + return fmt.Errorf("failed to send SSH public key: %w", err) + } + + fmt.Printf("✓ SSH key authorized for 60 seconds\n") + + // Optionally sync files using rsync over regular SSH + if syncFiles { + fmt.Printf("✓ Syncing files to instance...\n") + + fileMappings := ansible.BuildFileList(phase) + + for _, mapping := range fileMappings { + fmt.Printf(" - Syncing %s -> %s\n", mapping.Src, mapping.Dst) + + // Use rsync over regular SSH (key is already authorized) + rsyncCmd := exec.CommandContext(ctx, "rsync", "-avz", + "-e", "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null", + mapping.Src, + fmt.Sprintf("ubuntu@%s:%s", publicIP, mapping.Dst)) + + rsyncCmd.Stdout = os.Stdout + rsyncCmd.Stderr = os.Stderr + + if err := rsyncCmd.Run(); err != nil { + return fmt.Errorf("failed to sync %s: %w", mapping.Src, err) + } + } + + fmt.Printf("✓ Files synced successfully\n") + } else { + fmt.Printf("✓ Using existing files on instance\n") + } + + // Re-execute ansible via regular SSH + var additionalArgs []string + if len(skipTags) > 0 { + skipTagsStr := strings.Join(skipTags, ",") + additionalArgs = append(additionalArgs, "--skip-tags", skipTagsStr) + } + additionalArgs = append(additionalArgs, ansibleArgs...) + + executeCmd := ansible.BuildAnsiblePlaybookCommand(phase, postgresVersion, rerunGitSHA, additionalArgs) + + fmt.Printf("\n✓ Re-running %s ansible...\n", phase) + fmt.Printf("✓ Executing: %s\n\n", executeCmd) + + // Execute command via regular SSH (key is already authorized) + sshCmd := exec.CommandContext(ctx, "ssh", + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + fmt.Sprintf("ubuntu@%s", publicIP), + executeCmd) + + sshCmd.Stdin = os.Stdin + sshCmd.Stdout = os.Stdout + sshCmd.Stderr = os.Stderr + + if err := sshCmd.Run(); err != nil { + fmt.Printf("\n✗ Ansible execution failed\n") + fmt.Printf("\nInstance still running. Debug with:\n") + fmt.Printf(" - SSH: pg-ami-builder ssh\n") + fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") + return fmt.Errorf("ansible execution failed: %w", err) + } + + fmt.Printf("\n✓ Ansible %s completed successfully\n", phase) + fmt.Printf("\nNext steps:\n") + fmt.Printf(" - Test changes: pg-ami-builder ssh\n") + fmt.Printf(" - Cleanup when done: pg-ami-builder cleanup\n") + + return nil +} + +func init() { + rootCmd.AddCommand(ansibleRerunCmd) + + ansibleRerunCmd.Flags().StringVar(&rerunInstanceID, "instance-id", "", "Target specific instance (default: from state file)") + ansibleRerunCmd.Flags().StringSliceVar(&ansibleArgs, "ansible-args", []string{}, "Additional ansible arguments") + ansibleRerunCmd.Flags().StringSliceVar(&skipTags, "skip-tags", []string{}, "Ansible tags to skip") + ansibleRerunCmd.Flags().StringVar(®ion, "region", "us-east-1", "AWS region") + ansibleRerunCmd.Flags().BoolVar(&syncFiles, "sync-files", false, "Sync local files to instance before re-running ansible") + ansibleRerunCmd.Flags().StringVar(&sshPublicKey, "ssh-public-key", "", "SSH public key path (default: auto-detect from ~/.ssh/)") + ansibleRerunCmd.Flags().StringVar(&rerunGitSHA, "git-sha", "", "Git SHA for nix packages (default: from state file or current HEAD)") +} diff --git a/nix/packages/pg-ami-builder/cmd/ansible_rerun_test.go b/nix/packages/pg-ami-builder/cmd/ansible_rerun_test.go new file mode 100644 index 000000000..9a4e90465 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/ansible_rerun_test.go @@ -0,0 +1,30 @@ +package cmd + +import ( + "strings" + "testing" +) + +func TestAnsibleRerunCommandExists(t *testing.T) { + if ansibleRerunCmd == nil { + t.Fatal("ansibleRerunCmd is nil") + } + + if !strings.HasPrefix(ansibleRerunCmd.Use, "ansible-rerun") { + t.Errorf("Expected Use to start with 'ansible-rerun', got %s", ansibleRerunCmd.Use) + } +} + +func TestAnsibleRerunRequiresPhase(t *testing.T) { + // Test that ValidateArgs requires exactly 1 argument + err := ansibleRerunCmd.Args(ansibleRerunCmd, []string{}) + if err == nil { + t.Error("Expected error for missing phase argument") + } + + // Test with valid phase + err = ansibleRerunCmd.Args(ansibleRerunCmd, []string{"phase1"}) + if err != nil { + t.Errorf("Expected no error for valid phase, got: %v", err) + } +} diff --git a/nix/packages/pg-ami-builder/cmd/build.go b/nix/packages/pg-ami-builder/cmd/build.go new file mode 100644 index 000000000..881a937f7 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/build.go @@ -0,0 +1,402 @@ +package cmd + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/spf13/cobra" + "github.com/supabase/postgres/pg-ami-builder/internal/aws" + "github.com/supabase/postgres/pg-ami-builder/internal/git" + "github.com/supabase/postgres/pg-ami-builder/internal/packer" + "github.com/supabase/postgres/pg-ami-builder/internal/state" +) + +var ( + postgresVersion string + region string + createAMI bool + ansibleArgs []string + instanceType string + stateFile string + sourceAMI string + gitSHA string +) + +var buildCmd = &cobra.Command{ + Use: "build", + Short: "Build AMI phases", + Long: `Build phase1 or phase2 of the AMI`, +} + +var buildPhase1Cmd = &cobra.Command{ + Use: "phase1", + Short: "Launch instance and run phase 1 ansible", + RunE: runBuildPhase1, +} + +var buildPhase2Cmd = &cobra.Command{ + Use: "phase2", + Short: "Launch instance from stage-1 AMI and run phase 2 ansible", + RunE: runBuildPhase2, +} + +func validatePostgresVersion(version string) error { + validVersions := map[string]bool{ + "15": true, + "16": true, + "17": true, + } + + if !validVersions[version] { + return fmt.Errorf("invalid postgres version: %s (must be 15, 16, or 17)", version) + } + return nil +} + +func runBuildPhase1(cmd *cobra.Command, args []string) error { + if err := validatePostgresVersion(postgresVersion); err != nil { + return err + } + + ctx := context.Background() + + // Get git information + sha, err := git.GetCurrentSHA() + if err != nil { + return fmt.Errorf("failed to get git SHA: %w", err) + } + + // Generate execution ID + executionID := aws.GenerateExecutionID(postgresVersion) + + fmt.Printf("✓ Execution ID: %s\n", executionID) + fmt.Printf("✓ Git SHA: %s\n", sha) + + // Get repo root for packer execution + repoRoot, err := git.GetRepoRoot() + if err != nil { + return fmt.Errorf("failed to get repo root: %w", err) + } + + // Rewrite template with unique AMI name + templatePath := filepath.Join(repoRoot, packer.GetPackerTemplate("phase1")) + tempTemplate, cleanup, err := packer.RewriteTemplateWithUniqueAMI(templatePath, executionID, "") + if err != nil { + return fmt.Errorf("failed to rewrite template: %w", err) + } + defer cleanup() + + // Build packer command with temp template + // Match CI behavior: pass postgresql_major to ansible instead of skipping tags + packerVars := map[string]string{ + "region": region, + "git-head-version": sha, + "ansible_arguments": fmt.Sprintf("-e postgresql_major=%s", postgresVersion), + } + packerCmd := packer.BuildPackerCommand("phase1", postgresVersion, executionID, packerVars) + + // Replace template path with temp template + packerCmd[len(packerCmd)-1] = tempTemplate + + fmt.Printf("\n✓ Running packer build with unique AMI name...\n") + fmt.Printf(" Command: %s\n\n", strings.Join(packerCmd, " ")) + + // Execute packer build + packerExec := exec.CommandContext(ctx, packerCmd[0], packerCmd[1:]...) + packerExec.Dir = repoRoot // Run from repo root + packerExec.Stdout = os.Stdout + packerExec.Stderr = os.Stderr + + packerErr := packerExec.Run() + + // Initialize EC2 client for instance discovery if packer failed + if packerErr != nil { + fmt.Printf("\n✗ Packer build failed: %v\n", packerErr) + + // Try to find the packer instance by tag + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + fmt.Printf("⚠ Could not create EC2 client to find instance: %v\n", err) + return fmt.Errorf("packer build failed: %w", packerErr) + } + + fmt.Println("\n✓ Looking for packer instance...") + instanceID, err := ec2Client.FindInstanceByTag(ctx, "packerExecutionId", executionID) + if err != nil { + fmt.Printf("⚠ Could not find packer instance: %v\n", err) + fmt.Println("\nPacker may have cleaned up the instance already.") + return fmt.Errorf("packer build failed: %w", packerErr) + } + + fmt.Printf("✓ Found packer instance: %s\n", instanceID) + + // Save state for debugging + stateFilePath := stateFile + if stateFilePath == "" { + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get default state file: %w", err) + } + } + + buildState := &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } + buildState.SetPhaseState("phase1", &state.PhaseState{ + InstanceID: instanceID, + ExecutionID: executionID, + Timestamp: time.Now().Format(time.RFC3339), + }) + + if err := state.SaveState(stateFilePath, buildState); err != nil { + return fmt.Errorf("failed to save state: %w", err) + } + + fmt.Printf("\nInstance kept running for debugging:\n") + fmt.Printf(" Instance ID: %s\n", instanceID) + fmt.Printf(" State saved to: %s\n", stateFilePath) + fmt.Printf("\nNext steps:\n") + fmt.Printf(" - SSH into instance: pg-ami-builder ssh\n") + fmt.Printf(" - Re-run ansible: pg-ami-builder ansible-rerun phase1 --sync-files\n") + fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") + return fmt.Errorf("packer build failed: %w", packerErr) + } + + fmt.Printf("\n✓ Packer build completed successfully!\n") + fmt.Println("✓ AMI created by packer") + + // Parse AMI ID from packer output (it's already in stdout above) + // For now, use AWS API to find the AMI by tags + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + fmt.Printf("⚠ Could not create EC2 client to find AMI: %v\n", err) + fmt.Println("\n✓ Build phase 1 complete!") + return nil + } + + // Find AMI by execution ID tag + amiID, err := ec2Client.FindAMIByTag(ctx, "packerExecutionId", executionID) + if err != nil { + fmt.Printf("⚠ Could not find created AMI: %v\n", err) + fmt.Println("\n✓ Build phase 1 complete!") + return nil + } + + fmt.Printf("✓ AMI ID: %s\n", amiID) + + // Save AMI ID to state for phase2 + stateFilePath := stateFile + if stateFilePath == "" { + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + fmt.Printf("⚠ Could not get state file path: %v\n", err) + fmt.Println("\n✓ Build phase 1 complete!") + return nil + } + } + + buildState := &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } + buildState.SetPhaseState("phase1", &state.PhaseState{ + ExecutionID: executionID, + AMIID: amiID, + Timestamp: time.Now().Format(time.RFC3339), + }) + + if err := state.SaveState(stateFilePath, buildState); err != nil { + fmt.Printf("⚠ Could not save state: %v\n", err) + } else { + fmt.Printf("✓ State saved to: %s\n", stateFilePath) + } + + fmt.Println("\n✓ Build phase 1 complete!") + fmt.Printf("\nNext: Run phase 2 with:\n") + fmt.Printf(" pg-ami-builder build phase2 --postgres-version %s\n", postgresVersion) + return nil +} + +func runBuildPhase2(cmd *cobra.Command, args []string) error { + if err := validatePostgresVersion(postgresVersion); err != nil { + return err + } + + ctx := context.Background() + + // If --source-ami not provided, read from state + if sourceAMI == "" { + stateFilePath := stateFile + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get state file path: %w", err) + } + } + + buildState, err := state.LoadState(stateFilePath) + if err != nil { + return fmt.Errorf("no --source-ami provided and failed to load from state: %w\nRun phase1 first or provide --source-ami", err) + } + + // Get phase1 AMI + phase1State := buildState.GetPhaseState("phase1") + if phase1State == nil || phase1State.AMIID == "" { + return fmt.Errorf("no AMI ID found in state file. Run phase1 first or provide --source-ami") + } + + sourceAMI = phase1State.AMIID + fmt.Printf("✓ Using AMI from phase1 state: %s\n", sourceAMI) + } + + // Get git SHA + sha := gitSHA + if sha == "" { + var err error + sha, err = git.GetCurrentSHA() + if err != nil { + return fmt.Errorf("failed to get git SHA: %w", err) + } + } + + // Generate execution ID + executionID := aws.GenerateExecutionID(postgresVersion) + + fmt.Printf("✓ Phase 2 Build\n") + fmt.Printf("✓ Source AMI: %s\n", sourceAMI) + fmt.Printf("✓ Postgres Version: %s\n", postgresVersion) + fmt.Printf("✓ Execution ID: %s\n", executionID) + fmt.Printf("✓ Git SHA: %s\n", sha) + + // Get repo root for packer execution + repoRoot, err := git.GetRepoRoot() + if err != nil { + return fmt.Errorf("failed to get repo root: %w", err) + } + + // Rewrite template with unique AMI name + templatePath := filepath.Join(repoRoot, packer.GetPackerTemplate("phase2")) + tempTemplate, cleanup, err := packer.RewriteTemplateWithUniqueAMI(templatePath, executionID, "") + if err != nil { + return fmt.Errorf("failed to rewrite template: %w", err) + } + defer cleanup() + + // Build packer command with temp template + packerVars := map[string]string{ + "region": region, + "git-head-version": sha, + "source-ami": sourceAMI, // Pass source AMI as variable + "git_sha": sha, + "postgres_major_version": postgresVersion, // Needed by ansible scripts + } + packerCmd := packer.BuildPackerCommand("phase2", postgresVersion, executionID, packerVars) + + // Replace template path with temp template + packerCmd[len(packerCmd)-1] = tempTemplate + + fmt.Printf("\n✓ Running packer build with unique AMI name...\n") + fmt.Printf(" Command: %s\n\n", strings.Join(packerCmd, " ")) + + // Execute packer build + packerExec := exec.CommandContext(ctx, packerCmd[0], packerCmd[1:]...) + packerExec.Dir = repoRoot + packerExec.Stdout = os.Stdout + packerExec.Stderr = os.Stderr + + packerErr := packerExec.Run() + + // Initialize EC2 client for instance discovery if packer failed + if packerErr != nil { + fmt.Printf("\n✗ Packer build failed: %v\n", packerErr) + + // Try to find the packer instance by tag + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + fmt.Printf("⚠ Could not create EC2 client to find instance: %v\n", err) + return fmt.Errorf("packer build failed: %w", packerErr) + } + + fmt.Println("\n✓ Looking for packer instance...") + instanceID, err := ec2Client.FindInstanceByTag(ctx, "packerExecutionId", executionID) + if err != nil { + fmt.Printf("⚠ Could not find packer instance: %v\n", err) + fmt.Println("\nPacker may have cleaned up the instance already.") + return fmt.Errorf("packer build failed: %w", packerErr) + } + + fmt.Printf("✓ Found packer instance: %s\n", instanceID) + + // Save state for debugging + stateFilePath := stateFile + if stateFilePath == "" { + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get default state file: %w", err) + } + } + + buildState := &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } + buildState.SetPhaseState("phase2", &state.PhaseState{ + InstanceID: instanceID, + ExecutionID: executionID, + Timestamp: time.Now().Format(time.RFC3339), + }) + + if err := state.SaveState(stateFilePath, buildState); err != nil { + return fmt.Errorf("failed to save state: %w", err) + } + + fmt.Printf("\nInstance kept running for debugging:\n") + fmt.Printf(" Instance ID: %s\n", instanceID) + fmt.Printf(" State saved to: %s\n", stateFilePath) + fmt.Printf("\nNext steps:\n") + fmt.Printf(" - SSH into instance: pg-ami-builder ssh\n") + fmt.Printf(" - Re-run ansible: pg-ami-builder ansible-rerun phase2 --sync-files\n") + fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") + return fmt.Errorf("packer build failed: %w", packerErr) + } + + fmt.Printf("\n✓ Packer build completed successfully!\n") + fmt.Println("✓ Final production AMI created by packer") + fmt.Println("\n✓ Build phase 2 complete!") + return nil +} + +func init() { + rootCmd.AddCommand(buildCmd) + buildCmd.AddCommand(buildPhase1Cmd) + buildCmd.AddCommand(buildPhase2Cmd) + + // Phase 1 flags + buildPhase1Cmd.Flags().StringVar(&postgresVersion, "postgres-version", "", "PostgreSQL major version (required)") + buildPhase1Cmd.MarkFlagRequired("postgres-version") + buildPhase1Cmd.Flags().StringVar(®ion, "region", "us-east-1", "AWS region") + buildPhase1Cmd.Flags().BoolVar(&createAMI, "create-ami", false, "Create AMI on success") + buildPhase1Cmd.Flags().StringSliceVar(&ansibleArgs, "ansible-args", []string{}, "Additional ansible arguments") + buildPhase1Cmd.Flags().StringVar(&instanceType, "instance-type", "c6g.4xlarge", "EC2 instance type") + buildPhase1Cmd.Flags().StringVar(&stateFile, "state-file", "", "Custom state file path") + + // Phase 2 flags + buildPhase2Cmd.Flags().StringVar(&sourceAMI, "source-ami", "", "Stage-1 AMI ID (optional, reads from state if not provided)") + buildPhase2Cmd.Flags().StringVar(&postgresVersion, "postgres-version", "", "PostgreSQL major version (required)") + buildPhase2Cmd.MarkFlagRequired("postgres-version") + buildPhase2Cmd.Flags().StringVar(®ion, "region", "us-east-1", "AWS region") + buildPhase2Cmd.Flags().BoolVar(&createAMI, "create-ami", false, "Create AMI on success") + buildPhase2Cmd.Flags().StringVar(&instanceType, "instance-type", "c6g.4xlarge", "EC2 instance type") + buildPhase2Cmd.Flags().StringVar(&stateFile, "state-file", "", "Custom state file path") + buildPhase2Cmd.Flags().StringVar(&gitSHA, "git-sha", "", "Git SHA for nix packages") +} diff --git a/nix/packages/pg-ami-builder/cmd/build_test.go b/nix/packages/pg-ami-builder/cmd/build_test.go new file mode 100644 index 000000000..b3e6ef79d --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/build_test.go @@ -0,0 +1,60 @@ +package cmd + +import ( + "testing" + + "github.com/spf13/pflag" +) + +func TestBuildPhase1Flags(t *testing.T) { + // Test that postgres-version flag is required + // Reset the flag value + postgresVersion = "" + + // Call the validation directly since Cobra's required flag check + // happens during parse, and we want to test the validation logic + err := validatePostgresVersion(postgresVersion) + if err == nil { + t.Error("Expected error for empty --postgres-version flag") + } + + // Test that validation is called in the command + // This verifies the command structure accepts the flag + if buildPhase1Cmd.Flags().Lookup("postgres-version") == nil { + t.Error("Expected --postgres-version flag to exist") + } + + // Verify the flag is marked as required + required := false + buildPhase1Cmd.Flags().VisitAll(func(f *pflag.Flag) { + if f.Name == "postgres-version" { + required = true + } + }) + if !required { + t.Error("Expected --postgres-version to be defined") + } +} + +func TestValidatePostgresVersion(t *testing.T) { + tests := []struct { + version string + wantErr bool + }{ + {"15", false}, + {"16", false}, + {"17", false}, + {"14", true}, + {"abc", true}, + {"", true}, + } + + for _, tt := range tests { + t.Run(tt.version, func(t *testing.T) { + err := validatePostgresVersion(tt.version) + if (err != nil) != tt.wantErr { + t.Errorf("validatePostgresVersion(%s) error = %v, wantErr %v", tt.version, err, tt.wantErr) + } + }) + } +} diff --git a/nix/packages/pg-ami-builder/cmd/cleanup.go b/nix/packages/pg-ami-builder/cmd/cleanup.go new file mode 100644 index 000000000..dbeba7103 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/cleanup.go @@ -0,0 +1,144 @@ +package cmd + +import ( + "bufio" + "context" + "fmt" + "os" + "strings" + + "github.com/spf13/cobra" + "github.com/supabase/postgres/pg-ami-builder/internal/aws" + "github.com/supabase/postgres/pg-ami-builder/internal/state" +) + +var ( + cleanupInstanceID string + cleanupForce bool + cleanupPhase string +) + +var cleanupCmd = &cobra.Command{ + Use: "cleanup", + Short: "Terminate instance and remove associated resources", + Long: `Terminate the EC2 instance and cleanup associated resources. + +By default, uses the instance from the state file. Override with --instance-id or --phase. +Will prompt for confirmation unless --force is used.`, + RunE: runCleanup, +} + +func runCleanup(cmd *cobra.Command, args []string) error { + ctx := context.Background() + + // Get instance ID and state + instanceID := cleanupInstanceID + var buildState *state.State + stateFilePath := stateFile + + if instanceID == "" { + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get default state file: %w", err) + } + } + + var err error + buildState, err = state.LoadState(stateFilePath) + if err != nil { + return fmt.Errorf("failed to load state: %w", err) + } + + region = buildState.Region + + // Get instance ID from phase-specific state + if cleanupPhase != "" { + phaseState := buildState.GetPhaseState(cleanupPhase) + if phaseState == nil || phaseState.InstanceID == "" { + return fmt.Errorf("no instance found for %s in state file", cleanupPhase) + } + instanceID = phaseState.InstanceID + } else { + // Try to find any instance (prefer phase2, then phase1, then legacy) + if buildState.Phase2 != nil && buildState.Phase2.InstanceID != "" { + instanceID = buildState.Phase2.InstanceID + cleanupPhase = "phase2" + } else if buildState.Phase1 != nil && buildState.Phase1.InstanceID != "" { + instanceID = buildState.Phase1.InstanceID + cleanupPhase = "phase1" + } else if buildState.InstanceID != "" { + instanceID = buildState.InstanceID + cleanupPhase = buildState.Phase + } + } + } + + if instanceID == "" { + return fmt.Errorf("no instance ID available (use --instance-id or check state file)") + } + + // Display instance information + fmt.Printf("Instance to terminate: %s\n", instanceID) + if cleanupPhase != "" { + fmt.Printf("Phase: %s\n", cleanupPhase) + } + if buildState != nil { + phaseState := buildState.GetPhaseState(cleanupPhase) + if phaseState != nil && phaseState.ExecutionID != "" { + fmt.Printf("Execution ID: %s\n", phaseState.ExecutionID) + } + } + fmt.Println() + + // Confirm termination + if !cleanupForce { + fmt.Print("Terminate this instance? [y/N]: ") + reader := bufio.NewReader(os.Stdin) + response, err := reader.ReadString('\n') + if err != nil { + return fmt.Errorf("failed to read input: %w", err) + } + + response = strings.ToLower(strings.TrimSpace(response)) + if response != "y" && response != "yes" { + fmt.Println("Cleanup cancelled") + return nil + } + } + + // Create EC2 client + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + return fmt.Errorf("failed to create EC2 client: %w", err) + } + + // Terminate instance + fmt.Println("✓ Terminating instance...") + if err := ec2Client.TerminateInstance(ctx, instanceID); err != nil { + return fmt.Errorf("failed to terminate instance: %w", err) + } + + fmt.Println("✓ Instance terminated") + + // Clear state file + if stateFilePath != "" { + if err := state.ClearState(stateFilePath); err != nil { + fmt.Printf("Warning: failed to clear state file: %v\n", err) + } else { + fmt.Println("✓ State file cleared") + } + } + + return nil +} + +func init() { + rootCmd.AddCommand(cleanupCmd) + + cleanupCmd.Flags().StringVar(&cleanupInstanceID, "instance-id", "", "Target specific instance (default: from state file)") + cleanupCmd.Flags().StringVar(&cleanupPhase, "phase", "", "Clean up specific phase (phase1 or phase2)") + cleanupCmd.Flags().BoolVar(&cleanupForce, "force", false, "Skip confirmation prompt") + cleanupCmd.Flags().StringVar(®ion, "region", "us-east-1", "AWS region") +} diff --git a/nix/packages/pg-ami-builder/cmd/cleanup_test.go b/nix/packages/pg-ami-builder/cmd/cleanup_test.go new file mode 100644 index 000000000..55ce9b3f2 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/cleanup_test.go @@ -0,0 +1,15 @@ +package cmd + +import ( + "testing" +) + +func TestCleanupCommandExists(t *testing.T) { + if cleanupCmd == nil { + t.Fatal("cleanupCmd is nil") + } + + if cleanupCmd.Use != "cleanup" { + t.Errorf("Expected Use='cleanup', got %s", cleanupCmd.Use) + } +} diff --git a/nix/packages/pg-ami-builder/cmd/create_ami.go b/nix/packages/pg-ami-builder/cmd/create_ami.go new file mode 100644 index 000000000..b3f6af09e --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/create_ami.go @@ -0,0 +1,122 @@ +package cmd + +import ( + "context" + "fmt" + "time" + + "github.com/spf13/cobra" + "github.com/supabase/postgres/pg-ami-builder/internal/aws" + "github.com/supabase/postgres/pg-ami-builder/internal/state" +) + +var createAMICmd = &cobra.Command{ + Use: "create-ami [phase1|phase2]", + Short: "Create AMI from instance", + Long: `Create an AMI from the instance used during ansible-rerun for the specified phase.`, + Args: cobra.ExactArgs(1), + ValidArgs: []string{"phase1", "phase2"}, + RunE: runCreateAMI, +} + +func runCreateAMI(cmd *cobra.Command, args []string) error { + phase := args[0] + if phase != "phase1" && phase != "phase2" { + return fmt.Errorf("phase must be 'phase1' or 'phase2'") + } + + ctx := context.Background() + + // Load state + stateFilePath := stateFile + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get default state file: %w", err) + } + } + + buildState, err := state.LoadState(stateFilePath) + if err != nil { + return fmt.Errorf("failed to load state: %w", err) + } + + // Get phase-specific state + phaseState := buildState.GetPhaseState(phase) + if phaseState == nil || phaseState.InstanceID == "" { + return fmt.Errorf("no instance ID found for %s. Run ansible-rerun first", phase) + } + + instanceID := phaseState.InstanceID + executionID := phaseState.ExecutionID + + fmt.Printf("✓ Found %s instance: %s\n", phase, instanceID) + fmt.Printf("✓ Execution ID: %s\n", executionID) + + // Create EC2 client + ec2Client, err := aws.NewEC2Client(ctx, buildState.Region) + if err != nil { + return fmt.Errorf("failed to create EC2 client: %w", err) + } + + // Generate AMI name + timestamp := time.Now().Unix() + var amiName string + if phase == "phase1" { + amiName = fmt.Sprintf("supabase-postgres-%s-stage-1-%d", buildState.PostgresVersion, timestamp) + } else { + amiName = fmt.Sprintf("supabase-postgres-%s-%d", buildState.PostgresVersion, timestamp) + } + + fmt.Printf("✓ Creating AMI: %s\n", amiName) + + // Create the AMI + amiInput := &aws.CreateAMIInput{ + InstanceID: instanceID, + ExecutionID: executionID, + Phase: phase, + PostgresVersion: buildState.PostgresVersion, + GitSHA: buildState.GitSHA, + } + + amiID, err := ec2Client.CreateAMI(ctx, amiInput) + if err != nil { + return fmt.Errorf("failed to create AMI: %w", err) + } + + fmt.Printf("✓ AMI created: %s\n", amiID) + fmt.Printf("✓ Waiting for AMI to be available...\n") + + // Wait for AMI to be available + if err := ec2Client.WaitForAMIAvailable(ctx, amiID); err != nil { + return fmt.Errorf("failed waiting for AMI: %w", err) + } + + fmt.Printf("✓ AMI is now available\n") + + // Update state with AMI ID + phaseState.AMIID = amiID + phaseState.Timestamp = time.Now().Format(time.RFC3339) + buildState.SetPhaseState(phase, phaseState) + + if err := state.SaveState(stateFilePath, buildState); err != nil { + return fmt.Errorf("failed to save state: %w", err) + } + + fmt.Printf("✓ State updated: %s\n", stateFilePath) + + if phase == "phase1" { + fmt.Printf("\nNext: Run phase 2 with:\n") + fmt.Printf(" pg-ami-builder build phase2 --postgres-version %s\n", buildState.PostgresVersion) + } else { + fmt.Printf("\n✓ Final production AMI ready: %s\n", amiID) + } + + return nil +} + +func init() { + rootCmd.AddCommand(createAMICmd) + createAMICmd.Flags().StringVar(&stateFile, "state-file", "", "Custom state file path") +} diff --git a/nix/packages/pg-ami-builder/cmd/root.go b/nix/packages/pg-ami-builder/cmd/root.go new file mode 100644 index 000000000..8e2dccca1 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/root.go @@ -0,0 +1,29 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "pg-ami-builder", + Short: "Local AMI development tool for Supabase Postgres", + Long: `pg-ami-builder is a CLI tool for iterating on AMI builds locally. + +It allows developers to run individual build phases, debug ansible failures, +and quickly iterate without running full CI/CD pipelines.`, +} + +// Execute runs the root command +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func init() { + rootCmd.CompletionOptions.DisableDefaultCmd = true +} diff --git a/nix/packages/pg-ami-builder/cmd/ssh.go b/nix/packages/pg-ami-builder/cmd/ssh.go new file mode 100644 index 000000000..6806aed19 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/ssh.go @@ -0,0 +1,152 @@ +package cmd + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/spf13/cobra" + "github.com/supabase/postgres/pg-ami-builder/internal/state" +) + +var ( + sshInstanceID string + awsEC2ConnectCmd string +) + +var sshCmd = &cobra.Command{ + Use: "ssh", + Short: "Connect to instance via EC2 Instance Connect", + Long: `Connect to the instance using AWS EC2 Instance Connect. + +By default, uses the instance ID from the state file. Override with --instance-id. +Use --aws-ec2-connect-cmd to provide a full custom AWS EC2 Instance Connect command string.`, + RunE: runSSH, +} + +func runSSH(cmd *cobra.Command, args []string) error { + ctx := context.Background() + + // Use EC2 Instance Connect if AWS CLI command is provided + if awsEC2ConnectCmd != "" { + return connectViaEC2InstanceConnect(ctx) + } + + // Get instance ID from state or flag + instanceID := sshInstanceID + if instanceID == "" { + stateFilePath := stateFile + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + return fmt.Errorf("failed to get default state file: %w", err) + } + } + + buildState, err := state.LoadState(stateFilePath) + if err != nil { + return fmt.Errorf("failed to load state: %w", err) + } + + instanceID = buildState.InstanceID + region = buildState.Region + } + + if instanceID == "" { + return fmt.Errorf("no instance ID available (use --instance-id or run build command first)") + } + + // Connect via EC2 Instance Connect + return connectViaEC2InstanceConnectDirect(ctx, instanceID, region) +} + +func connectViaEC2InstanceConnect(ctx context.Context) error { + fmt.Printf("✓ Connecting via EC2 Instance Connect...\n") + + // Create temporary SSH config file + tempSSHConfig, err := os.CreateTemp("", "ssh-config-*") + if err != nil { + return fmt.Errorf("failed to create temp SSH config: %w", err) + } + defer os.Remove(tempSSHConfig.Name()) + + // Write SSH config with StrictHostKeyChecking disabled + sshConfig := `Host * + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + LogLevel ERROR +` + if _, err := tempSSHConfig.WriteString(sshConfig); err != nil { + return fmt.Errorf("failed to write SSH config: %w", err) + } + tempSSHConfig.Close() + + // Create temporary directory for SSH wrapper + tempBinDir, err := os.MkdirTemp("", "ssh-wrapper-*") + if err != nil { + return fmt.Errorf("failed to create temp bin directory: %w", err) + } + defer os.RemoveAll(tempBinDir) + + // Create SSH wrapper script + wrapperPath := filepath.Join(tempBinDir, "ssh") + wrapperContent := fmt.Sprintf("#!/bin/bash\nexec /usr/bin/ssh -F \"%s\" \"$@\"\n", tempSSHConfig.Name()) + + if err := os.WriteFile(wrapperPath, []byte(wrapperContent), 0o755); err != nil { + return fmt.Errorf("failed to create SSH wrapper: %w", err) + } + + fmt.Printf("✓ Created temporary SSH wrapper\n") + + // Prepend temp bin directory to PATH + originalPath := os.Getenv("PATH") + os.Setenv("PATH", fmt.Sprintf("%s:%s", tempBinDir, originalPath)) + defer os.Setenv("PATH", originalPath) + + // Execute the AWS CLI command + fmt.Printf("✓ Executing: %s\n\n", awsEC2ConnectCmd) + + cmd := exec.CommandContext(ctx, "bash", "-c", awsEC2ConnectCmd) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("AWS EC2 Instance Connect command failed: %w", err) + } + + return nil +} + +func connectViaEC2InstanceConnectDirect(ctx context.Context, instanceID, region string) error { + fmt.Printf("✓ Connecting via EC2 Instance Connect...\n") + fmt.Printf(" Instance: %s\n", instanceID) + fmt.Printf(" Region: %s\n\n", region) + + // Build the AWS CLI command + awsCmd := exec.CommandContext(ctx, "aws", "ec2-instance-connect", "ssh", + "--instance-id", instanceID, + "--region", region, + "--os-user", "ubuntu") + + awsCmd.Stdin = os.Stdin + awsCmd.Stdout = os.Stdout + awsCmd.Stderr = os.Stderr + + if err := awsCmd.Run(); err != nil { + return fmt.Errorf("EC2 Instance Connect failed: %w", err) + } + + return nil +} + +func init() { + rootCmd.AddCommand(sshCmd) + + sshCmd.Flags().StringVar(&sshInstanceID, "instance-id", "", "Target specific instance (default: from state file)") + sshCmd.Flags().StringVar(®ion, "region", "us-east-1", "AWS region") + sshCmd.Flags().StringVar(&awsEC2ConnectCmd, "aws-ec2-connect-cmd", "", "Custom AWS EC2 Instance Connect command (e.g., 'aws ec2-instance-connect ssh --instance-id i-xxx ...')") +} diff --git a/nix/packages/pg-ami-builder/cmd/ssh_test.go b/nix/packages/pg-ami-builder/cmd/ssh_test.go new file mode 100644 index 000000000..b5dd6dc57 --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/ssh_test.go @@ -0,0 +1,15 @@ +package cmd + +import ( + "testing" +) + +func TestSSHCommandExists(t *testing.T) { + if sshCmd == nil { + t.Fatal("sshCmd is nil") + } + + if sshCmd.Use != "ssh" { + t.Errorf("Expected Use='ssh', got %s", sshCmd.Use) + } +} diff --git a/nix/packages/pg-ami-builder/cmd/version.go b/nix/packages/pg-ami-builder/cmd/version.go new file mode 100644 index 000000000..8795c089f --- /dev/null +++ b/nix/packages/pg-ami-builder/cmd/version.go @@ -0,0 +1,21 @@ +package cmd + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +var Version = "dev" + +var versionCmd = &cobra.Command{ + Use: "version", + Short: "Print version information", + Run: func(cmd *cobra.Command, args []string) { + fmt.Printf("pg-ami-builder version %s\n", Version) + }, +} + +func init() { + rootCmd.AddCommand(versionCmd) +} diff --git a/nix/packages/pg-ami-builder/go.mod b/nix/packages/pg-ami-builder/go.mod new file mode 100644 index 000000000..51bb588e8 --- /dev/null +++ b/nix/packages/pg-ami-builder/go.mod @@ -0,0 +1,28 @@ +module github.com/supabase/postgres/pg-ami-builder + +go 1.23.2 + +require ( + github.com/aws/aws-sdk-go-v2 v1.39.6 + github.com/aws/aws-sdk-go-v2/config v1.31.20 + github.com/aws/aws-sdk-go-v2/service/ec2 v1.269.0 + github.com/aws/aws-sdk-go-v2/service/iam v1.50.2 + github.com/aws/aws-sdk-go-v2/service/ssm v1.67.2 + github.com/spf13/cobra v1.10.1 + github.com/spf13/pflag v1.0.10 +) + +require ( + github.com/aws/aws-sdk-go-v2/credentials v1.18.24 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 // indirect + github.com/aws/smithy-go v1.23.2 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect +) diff --git a/nix/packages/pg-ami-builder/go.sum b/nix/packages/pg-ami-builder/go.sum new file mode 100644 index 000000000..73f866a24 --- /dev/null +++ b/nix/packages/pg-ami-builder/go.sum @@ -0,0 +1,43 @@ +github.com/aws/aws-sdk-go-v2 v1.39.6 h1:2JrPCVgWJm7bm83BDwY5z8ietmeJUbh3O2ACnn+Xsqk= +github.com/aws/aws-sdk-go-v2 v1.39.6/go.mod h1:c9pm7VwuW0UPxAEYGyTmyurVcNrbF6Rt/wixFqDhcjE= +github.com/aws/aws-sdk-go-v2/config v1.31.20 h1:/jWF4Wu90EhKCgjTdy1DGxcbcbNrjfBHvksEL79tfQc= +github.com/aws/aws-sdk-go-v2/config v1.31.20/go.mod h1:95Hh1Tc5VYKL9NJ7tAkDcqeKt+MCXQB1hQZaRdJIZE0= +github.com/aws/aws-sdk-go-v2/credentials v1.18.24 h1:iJ2FmPT35EaIB0+kMa6TnQ+PwG5A1prEdAw+PsMzfHg= +github.com/aws/aws-sdk-go-v2/credentials v1.18.24/go.mod h1:U91+DrfjAiXPDEGYhh/x29o4p0qHX5HDqG7y5VViv64= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 h1:T1brd5dR3/fzNFAQch/iBKeX07/ffu/cLu+q+RuzEWk= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13/go.mod h1:Peg/GBAQ6JDt+RoBf4meB1wylmAipb7Kg2ZFakZTlwk= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 h1:a+8/MLcWlIxo1lF9xaGt3J/u3yOZx+CdSveSNwjhD40= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13/go.mod h1:oGnKwIYZ4XttyU2JWxFrwvhF6YKiK/9/wmE3v3Iu9K8= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 h1:HBSI2kDkMdWz4ZM7FjwE7e/pWDEZ+nR95x8Ztet1ooY= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13/go.mod h1:YE94ZoDArI7awZqJzBAZ3PDD2zSfuP7w6P2knOzIn8M= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.269.0 h1:JMU0UqLvPUKovF/kXcIQf1ZVyv3+BwVW5O3bZrXDqBo= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.269.0/go.mod h1:NDdDLLW5PtLLXN661gKcvJvqAH5OBXsfhMlmKVu1/pY= +github.com/aws/aws-sdk-go-v2/service/iam v1.50.2 h1:A03KM3Mo3IitRdM6dg1x5P+/POvDwAYD02YfoYkDgok= +github.com/aws/aws-sdk-go-v2/service/iam v1.50.2/go.mod h1:cuEMbL1mNtO1sUyT+DYDNIA8Y7aJG1oIdgHqUk29Uzk= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 h1:x2Ibm/Af8Fi+BH+Hsn9TXGdT+hKbDd5XOTZxTMxDk7o= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3/go.mod h1:IW1jwyrQgMdhisceG8fQLmQIydcT/jWY21rFhzgaKwo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 h1:kDqdFvMY4AtKoACfzIGD8A0+hbT41KTKF//gq7jITfM= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13/go.mod h1:lmKuogqSU3HzQCwZ9ZtcqOc5XGMqtDK7OIc2+DxiUEg= +github.com/aws/aws-sdk-go-v2/service/ssm v1.67.2 h1:ybM2UK1Fx4AeurfSGzLKdnjw5j6g6mwVI0Lsr7ZnuEc= +github.com/aws/aws-sdk-go-v2/service/ssm v1.67.2/go.mod h1:uNHuYAQazkHqpD+hVomA2+eDSuKJzerno7Fnha6N6/Y= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 h1:NjShtS1t8r5LUfFVtFeI8xLAHQNTa7UI0VawXlrBMFQ= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.3/go.mod h1:fKvyjJcz63iL/ftA6RaM8sRCtN4r4zl4tjL3qw5ec7k= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 h1:gTsnx0xXNQ6SBbymoDvcoRHL+q4l/dAFsQuKfDWSaGc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7/go.mod h1:klO+ejMvYsB4QATfEOIXk8WAEwN4N0aBfJpvC+5SZBo= +github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 h1:HK5ON3KmQV2HcAunnx4sKLB9aPf3gKGwVAf7xnx0QT0= +github.com/aws/aws-sdk-go-v2/service/sts v1.40.2/go.mod h1:E19xDjpzPZC7LS2knI9E6BaRFDK43Eul7vd6rSq2HWk= +github.com/aws/smithy-go v1.23.2 h1:Crv0eatJUQhaManss33hS5r40CG3ZFH+21XSkqMrIUM= +github.com/aws/smithy-go v1.23.2/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/nix/packages/pg-ami-builder/internal/ansible/runner.go b/nix/packages/pg-ami-builder/internal/ansible/runner.go new file mode 100644 index 000000000..c65b7af54 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/ansible/runner.go @@ -0,0 +1,58 @@ +package ansible + +import ( + "fmt" + "strings" +) + +// FileMapping represents a source to destination file mapping +type FileMapping struct { + Src string + Dst string +} + +// BuildFileList returns the list of files to sync based on phase +func BuildFileList(phase string) []FileMapping { + // Base files synced for both phases (matching packer templates) + baseMappings := []FileMapping{ + {Src: "ansible/", Dst: "/tmp/ansible-playbook/ansible/"}, + {Src: "scripts/", Dst: "/tmp/ansible-playbook/scripts/"}, + {Src: "ansible/vars.yml", Dst: "/tmp/ansible-playbook/vars.yml"}, + } + + if phase == "phase2" { + baseMappings = append(baseMappings, FileMapping{ + Src: "migrations/", + Dst: "/tmp/migrations/", + }) + } + + return baseMappings +} + +// BuildAnsiblePlaybookCommand constructs the full ansible-playbook command for re-running +func BuildAnsiblePlaybookCommand(phase, postgresVersion, gitSHA string, additionalArgs []string) string { + var cmd string + + if phase == "phase1" { + // Phase1: Run ansible-playbook locally (not in chroot like packer does) + cmd = fmt.Sprintf( + "ansible-playbook -c local -i 'localhost,' /tmp/ansible-playbook/ansible/playbook.yml "+ + "--extra-vars '{\"nixpkg_mode\": true, \"debpkg_mode\": false, \"stage2_nix\": false}' "+ + "--extra-vars \"psql_version=psql_%s\" "+ + "-e postgresql_major=%s", + postgresVersion, postgresVersion) + } else { + // Phase2: Run the nix provision script with required env vars + cmd = fmt.Sprintf( + "export GIT_SHA=%s && export POSTGRES_MAJOR_VERSION=%s && bash /tmp/ansible-playbook/scripts/nix-provision.sh", + gitSHA, postgresVersion) + } + + // Add any additional arguments + if len(additionalArgs) > 0 { + cmd = cmd + " " + strings.Join(additionalArgs, " ") + } + + return cmd +} diff --git a/nix/packages/pg-ami-builder/internal/ansible/runner_test.go b/nix/packages/pg-ami-builder/internal/ansible/runner_test.go new file mode 100644 index 000000000..dae37830c --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/ansible/runner_test.go @@ -0,0 +1,90 @@ +package ansible + +import ( + "strings" + "testing" +) + +func TestBuildFileList(t *testing.T) { + tests := []struct { + name string + phase string + want []FileMapping + }{ + { + name: "phase1", + phase: "phase1", + want: []FileMapping{ + {Src: "ansible/", Dst: "/tmp/ansible-playbook/ansible/"}, + {Src: "scripts/", Dst: "/tmp/ansible-playbook/scripts/"}, + {Src: "ansible/vars.yml", Dst: "/tmp/ansible-playbook/vars.yml"}, + }, + }, + { + name: "phase2", + phase: "phase2", + want: []FileMapping{ + {Src: "ansible/", Dst: "/tmp/ansible-playbook/ansible/"}, + {Src: "scripts/", Dst: "/tmp/ansible-playbook/scripts/"}, + {Src: "ansible/vars.yml", Dst: "/tmp/ansible-playbook/vars.yml"}, + {Src: "migrations/", Dst: "/tmp/migrations/"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := BuildFileList(tt.phase) + if len(got) != len(tt.want) { + t.Errorf("BuildFileList() returned %d mappings, want %d", len(got), len(tt.want)) + } + }) + } +} + +func TestBuildAnsiblePlaybookCommand(t *testing.T) { + tests := []struct { + name string + phase string + postgresVer string + gitSHA string + additionalArgs []string + wantContains []string + }{ + { + name: "phase1 basic", + phase: "phase1", + postgresVer: "17", + gitSHA: "abc123", + additionalArgs: []string{}, + wantContains: []string{"ansible-playbook", "localhost", "postgresql_major=17", "psql_17"}, + }, + { + name: "phase1 with skip tags", + phase: "phase1", + postgresVer: "16", + gitSHA: "def456", + additionalArgs: []string{"--skip-tags", "migrations"}, + wantContains: []string{"ansible-playbook", "postgresql_major=16", "--skip-tags", "migrations"}, + }, + { + name: "phase2 basic", + phase: "phase2", + postgresVer: "15", + gitSHA: "xyz789", + additionalArgs: []string{}, + wantContains: []string{"nix-provision.sh", "GIT_SHA=xyz789", "POSTGRES_MAJOR_VERSION=15"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := BuildAnsiblePlaybookCommand(tt.phase, tt.postgresVer, tt.gitSHA, tt.additionalArgs) + for _, want := range tt.wantContains { + if !strings.Contains(got, want) { + t.Errorf("BuildAnsiblePlaybookCommand() = %s, want to contain %s", got, want) + } + } + }) + } +} diff --git a/nix/packages/pg-ami-builder/internal/aws/ec2.go b/nix/packages/pg-ami-builder/internal/aws/ec2.go new file mode 100644 index 000000000..20c87fb4f --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/aws/ec2.go @@ -0,0 +1,352 @@ +package aws + +import ( + "context" + "fmt" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/ec2" + "github.com/aws/aws-sdk-go-v2/service/ec2/types" +) + +// EC2Client wraps AWS EC2 operations +type EC2Client struct { + client *ec2.Client + region string +} + +// NewEC2Client creates a new EC2 client +func NewEC2Client(ctx context.Context, region string) (*EC2Client, error) { + cfg, err := config.LoadDefaultConfig(ctx, config.WithRegion(region)) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + return &EC2Client{ + client: ec2.NewFromConfig(cfg), + region: region, + }, nil +} + +// GenerateExecutionID creates a unique execution ID +func GenerateExecutionID(postgresVersion string) string { + timestamp := time.Now().Unix() + return fmt.Sprintf("%d-%s", timestamp, postgresVersion) +} + +// BuildInstanceTags creates EC2 tags for the instance +func BuildInstanceTags(executionID, phase string) []types.Tag { + return []types.Tag{ + {Key: aws.String("creator"), Value: aws.String("pg-ami-builder")}, + {Key: aws.String("packerExecutionId"), Value: aws.String(executionID)}, + {Key: aws.String("appType"), Value: aws.String("postgres")}, + {Key: aws.String("phase"), Value: aws.String(phase)}, + {Key: aws.String("managedBy"), Value: aws.String("pg-ami-builder")}, + } +} + +// LaunchInstanceInput contains parameters for launching an instance +type LaunchInstanceInput struct { + AMI string + InstanceType string + ExecutionID string + Phase string + InstanceProfile string +} + +// LaunchInstance launches an EC2 instance +func (c *EC2Client) LaunchInstance(ctx context.Context, input *LaunchInstanceInput) (string, error) { + // Get or create security group + sgID, err := c.ensureSecurityGroup(ctx) + if err != nil { + return "", fmt.Errorf("failed to ensure security group: %w", err) + } + + // Build tag specifications + tags := BuildInstanceTags(input.ExecutionID, input.Phase) + tagSpec := types.TagSpecification{ + ResourceType: types.ResourceTypeInstance, + Tags: tags, + } + + runInput := &ec2.RunInstancesInput{ + ImageId: aws.String(input.AMI), + InstanceType: types.InstanceType(input.InstanceType), + MinCount: aws.Int32(1), + MaxCount: aws.Int32(1), + SecurityGroupIds: []string{sgID}, + TagSpecifications: []types.TagSpecification{tagSpec}, + } + + // Add IAM instance profile if provided + if input.InstanceProfile != "" { + runInput.IamInstanceProfile = &types.IamInstanceProfileSpecification{ + Name: aws.String(input.InstanceProfile), + } + } + + result, err := c.client.RunInstances(ctx, runInput) + if err != nil { + return "", fmt.Errorf("failed to launch instance: %w", err) + } + + if len(result.Instances) == 0 { + return "", fmt.Errorf("no instances were launched") + } + + return *result.Instances[0].InstanceId, nil +} + +// ensureSecurityGroup creates or retrieves the security group +func (c *EC2Client) ensureSecurityGroup(ctx context.Context) (string, error) { + groupName := "pg-ami-builder-sg" + + // Try to find existing group + describeInput := &ec2.DescribeSecurityGroupsInput{ + Filters: []types.Filter{ + { + Name: aws.String("group-name"), + Values: []string{groupName}, + }, + }, + } + + result, err := c.client.DescribeSecurityGroups(ctx, describeInput) + if err == nil && len(result.SecurityGroups) > 0 { + return *result.SecurityGroups[0].GroupId, nil + } + + // Create new security group + createInput := &ec2.CreateSecurityGroupInput{ + GroupName: aws.String(groupName), + Description: aws.String("Security group for pg-ami-builder instances"), + } + + createResult, err := c.client.CreateSecurityGroup(ctx, createInput) + if err != nil { + return "", fmt.Errorf("failed to create security group: %w", err) + } + + return *createResult.GroupId, nil +} + +// WaitForInstanceRunning waits for an instance to be in running state +func (c *EC2Client) WaitForInstanceRunning(ctx context.Context, instanceID string) error { + waiter := ec2.NewInstanceRunningWaiter(c.client) + maxWaitTime := 10 * time.Minute + + return waiter.Wait(ctx, &ec2.DescribeInstancesInput{ + InstanceIds: []string{instanceID}, + }, maxWaitTime) +} + +// TerminateInstance terminates an EC2 instance +func (c *EC2Client) TerminateInstance(ctx context.Context, instanceID string) error { + _, err := c.client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ + InstanceIds: []string{instanceID}, + }) + if err != nil { + return fmt.Errorf("failed to terminate instance: %w", err) + } + return nil +} + +// BuildAMIName creates a name for the AMI +func BuildAMIName(phase, executionID string) string { + return fmt.Sprintf("pg-ami-builder-%s-%s", phase, executionID) +} + +// BuildAMITags creates tags for the AMI +func BuildAMITags(executionID, phase, postgresVersion, gitSHA string) []types.Tag { + tags := []types.Tag{ + {Key: aws.String("creator"), Value: aws.String("pg-ami-builder")}, + {Key: aws.String("packerExecutionId"), Value: aws.String(executionID)}, + {Key: aws.String("phase"), Value: aws.String(phase)}, + {Key: aws.String("postgresVersion"), Value: aws.String(postgresVersion)}, + {Key: aws.String("managedBy"), Value: aws.String("pg-ami-builder")}, + } + + if gitSHA != "" { + tags = append(tags, types.Tag{ + Key: aws.String("gitSHA"), + Value: aws.String(gitSHA), + }) + } + + return tags +} + +// CreateAMIInput contains parameters for creating an AMI +type CreateAMIInput struct { + InstanceID string + ExecutionID string + Phase string + PostgresVersion string + GitSHA string +} + +// CreateAMI creates an AMI from an instance +func (c *EC2Client) CreateAMI(ctx context.Context, input *CreateAMIInput) (string, error) { + amiName := BuildAMIName(input.Phase, input.ExecutionID) + tags := BuildAMITags(input.ExecutionID, input.Phase, input.PostgresVersion, input.GitSHA) + + // Create the AMI + createInput := &ec2.CreateImageInput{ + InstanceId: aws.String(input.InstanceID), + Name: aws.String(amiName), + Description: aws.String(fmt.Sprintf("PostgreSQL %s AMI - %s", input.PostgresVersion, input.Phase)), + TagSpecifications: []types.TagSpecification{ + { + ResourceType: types.ResourceTypeImage, + Tags: tags, + }, + }, + } + + result, err := c.client.CreateImage(ctx, createInput) + if err != nil { + return "", fmt.Errorf("failed to create AMI: %w", err) + } + + return *result.ImageId, nil +} + +// WaitForAMIAvailable waits for an AMI to be available +func (c *EC2Client) WaitForAMIAvailable(ctx context.Context, amiID string) error { + waiter := ec2.NewImageAvailableWaiter(c.client) + maxWaitTime := 20 * time.Minute + + return waiter.Wait(ctx, &ec2.DescribeImagesInput{ + ImageIds: []string{amiID}, + }, maxWaitTime) +} + +// FindInstanceByTag finds an instance by a specific tag key-value pair +func (c *EC2Client) FindInstanceByTag(ctx context.Context, tagKey string, tagValue string) (string, error) { + input := &ec2.DescribeInstancesInput{ + Filters: []types.Filter{ + { + Name: aws.String(fmt.Sprintf("tag:%s", tagKey)), + Values: []string{tagValue}, + }, + { + Name: aws.String("instance-state-name"), + Values: []string{"pending", "running", "stopping", "stopped"}, + }, + }, + } + + result, err := c.client.DescribeInstances(ctx, input) + if err != nil { + return "", fmt.Errorf("failed to describe instances: %w", err) + } + + // Find the most recent instance + var instanceID string + for _, reservation := range result.Reservations { + for _, instance := range reservation.Instances { + if instance.InstanceId != nil { + instanceID = *instance.InstanceId + break + } + } + if instanceID != "" { + break + } + } + + if instanceID == "" { + return "", fmt.Errorf("no instance found with tag %s=%s", tagKey, tagValue) + } + + return instanceID, nil +} + +// FindLatestAMI finds the latest AMI matching the given filters +// This mimics packer's source_ami_filter behavior for Ubuntu 24.04 ARM64 +func (c *EC2Client) FindLatestAMI(ctx context.Context, namePattern string, owner string) (string, error) { + input := &ec2.DescribeImagesInput{ + Filters: []types.Filter{ + { + Name: aws.String("name"), + Values: []string{namePattern}, + }, + { + Name: aws.String("state"), + Values: []string{"available"}, + }, + { + Name: aws.String("virtualization-type"), + Values: []string{"hvm"}, + }, + { + Name: aws.String("root-device-type"), + Values: []string{"ebs"}, + }, + }, + Owners: []string{owner}, + } + + result, err := c.client.DescribeImages(ctx, input) + if err != nil { + return "", fmt.Errorf("failed to describe images: %w", err) + } + + if len(result.Images) == 0 { + return "", fmt.Errorf("no AMI found matching pattern %q from owner %q", namePattern, owner) + } + + // Find the most recent image by CreationDate + var latest *types.Image + for i := range result.Images { + img := &result.Images[i] + if latest == nil || (img.CreationDate != nil && latest.CreationDate != nil && *img.CreationDate > *latest.CreationDate) { + latest = img + } + } + + if latest == nil || latest.ImageId == nil { + return "", fmt.Errorf("no valid AMI found") + } + + return *latest.ImageId, nil +} + +// FindAMIByTag finds an AMI by a specific tag key-value pair +func (c *EC2Client) FindAMIByTag(ctx context.Context, tagKey string, tagValue string) (string, error) { + input := &ec2.DescribeImagesInput{ + Filters: []types.Filter{ + { + Name: aws.String(fmt.Sprintf("tag:%s", tagKey)), + Values: []string{tagValue}, + }, + }, + Owners: []string{"self"}, + } + + result, err := c.client.DescribeImages(ctx, input) + if err != nil { + return "", fmt.Errorf("failed to describe images: %w", err) + } + + if len(result.Images) == 0 { + return "", fmt.Errorf("no AMI found with tag %s=%s", tagKey, tagValue) + } + + // Return the most recent one if multiple exist + var latest *types.Image + for i := range result.Images { + img := &result.Images[i] + if latest == nil || (img.CreationDate != nil && latest.CreationDate != nil && *img.CreationDate > *latest.CreationDate) { + latest = img + } + } + + if latest == nil || latest.ImageId == nil { + return "", fmt.Errorf("no valid AMI found") + } + + return *latest.ImageId, nil +} diff --git a/nix/packages/pg-ami-builder/internal/aws/ec2_test.go b/nix/packages/pg-ami-builder/internal/aws/ec2_test.go new file mode 100644 index 000000000..d19d925b4 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/aws/ec2_test.go @@ -0,0 +1,46 @@ +package aws + +import ( + "testing" +) + +func TestGenerateExecutionID(t *testing.T) { + execID := GenerateExecutionID("15") + if execID == "" { + t.Fatal("GenerateExecutionID returned empty string") + } + + // Should contain timestamp and version + if len(execID) < 10 { + t.Errorf("Execution ID too short: %s", execID) + } +} + +func TestBuildInstanceTags(t *testing.T) { + tags := BuildInstanceTags("exec-123", "phase1") + + expectedTags := map[string]string{ + "creator": "pg-ami-builder", + "packerExecutionId": "exec-123", + "appType": "postgres", + "phase": "phase1", + "managedBy": "pg-ami-builder", + } + + if len(tags) != len(expectedTags) { + t.Fatalf("Expected %d tags, got %d", len(expectedTags), len(tags)) + } + + for k, expectedV := range expectedTags { + found := false + for _, tag := range tags { + if *tag.Key == k && *tag.Value == expectedV { + found = true + break + } + } + if !found { + t.Errorf("Missing or incorrect tag %s=%s", k, expectedV) + } + } +} diff --git a/nix/packages/pg-ami-builder/internal/aws/iam.go b/nix/packages/pg-ami-builder/internal/aws/iam.go new file mode 100644 index 000000000..637c8f566 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/aws/iam.go @@ -0,0 +1,96 @@ +package aws + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/iam" +) + +// IAMClient wraps AWS IAM operations +type IAMClient struct { + client *iam.Client +} + +// NewIAMClient creates a new IAM client +func NewIAMClient(ctx context.Context) (*IAMClient, error) { + cfg, err := config.LoadDefaultConfig(ctx) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + return &IAMClient{ + client: iam.NewFromConfig(cfg), + }, nil +} + +// EnsureInstanceProfile creates or retrieves an IAM instance profile for SSM access +func (c *IAMClient) EnsureInstanceProfile(ctx context.Context) (string, error) { + roleName := "pg-ami-builder-ssm-role" + profileName := "pg-ami-builder-ssm-profile" + + // Try to get existing role + _, err := c.client.GetRole(ctx, &iam.GetRoleInput{ + RoleName: aws.String(roleName), + }) + if err != nil { + // Role doesn't exist, create it + trustPolicy := `{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Service": "ec2.amazonaws.com" + }, + "Action": "sts:AssumeRole" + } + ] +}` + + _, err = c.client.CreateRole(ctx, &iam.CreateRoleInput{ + RoleName: aws.String(roleName), + AssumeRolePolicyDocument: aws.String(trustPolicy), + Description: aws.String("IAM role for pg-ami-builder instances to use SSM"), + }) + if err != nil { + return "", fmt.Errorf("failed to create IAM role: %w", err) + } + + // Attach SSM managed policy + _, err = c.client.AttachRolePolicy(ctx, &iam.AttachRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyArn: aws.String("arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"), + }) + if err != nil { + return "", fmt.Errorf("failed to attach SSM policy: %w", err) + } + } + + // Try to get existing instance profile + _, err = c.client.GetInstanceProfile(ctx, &iam.GetInstanceProfileInput{ + InstanceProfileName: aws.String(profileName), + }) + if err != nil { + // Instance profile doesn't exist, create it + _, err = c.client.CreateInstanceProfile(ctx, &iam.CreateInstanceProfileInput{ + InstanceProfileName: aws.String(profileName), + }) + if err != nil { + return "", fmt.Errorf("failed to create instance profile: %w", err) + } + + // Add role to instance profile + _, err = c.client.AddRoleToInstanceProfile(ctx, &iam.AddRoleToInstanceProfileInput{ + InstanceProfileName: aws.String(profileName), + RoleName: aws.String(roleName), + }) + if err != nil { + return "", fmt.Errorf("failed to add role to instance profile: %w", err) + } + } + + return profileName, nil +} diff --git a/nix/packages/pg-ami-builder/internal/aws/ssm.go b/nix/packages/pg-ami-builder/internal/aws/ssm.go new file mode 100644 index 000000000..589a5cd77 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/aws/ssm.go @@ -0,0 +1,178 @@ +package aws + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/ssm" + "github.com/aws/aws-sdk-go-v2/service/ssm/types" +) + +// SSMClient wraps AWS SSM operations +type SSMClient struct { + client *ssm.Client + region string +} + +// NewSSMClient creates a new SSM client +func NewSSMClient(ctx context.Context, region string) (*SSMClient, error) { + cfg, err := config.LoadDefaultConfig(ctx, config.WithRegion(region)) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + return &SSMClient{ + client: ssm.NewFromConfig(cfg), + region: region, + }, nil +} + +// BuildSSMCommand creates a shell command string +func BuildSSMCommand(script string) string { + return script +} + +// WaitForSSMReady waits for SSM agent to be ready on the instance +func (c *SSMClient) WaitForSSMReady(ctx context.Context, instanceID string) error { + maxAttempts := 60 + for i := 0; i < maxAttempts; i++ { + input := &ssm.DescribeInstanceInformationInput{ + Filters: []types.InstanceInformationStringFilter{ + { + Key: aws.String("InstanceIds"), + Values: []string{instanceID}, + }, + }, + } + + result, err := c.client.DescribeInstanceInformation(ctx, input) + if err == nil && len(result.InstanceInformationList) > 0 { + status := result.InstanceInformationList[0].PingStatus + if status == types.PingStatusOnline { + return nil + } + } + + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for SSM agent to be ready") +} + +// SendCommand sends a command to an instance via SSM +func (c *SSMClient) SendCommand(ctx context.Context, instanceID, command string) (string, error) { + input := &ssm.SendCommandInput{ + InstanceIds: []string{instanceID}, + DocumentName: aws.String("AWS-RunShellScript"), + Parameters: map[string][]string{ + "commands": {command}, + }, + } + + result, err := c.client.SendCommand(ctx, input) + if err != nil { + return "", fmt.Errorf("failed to send command: %w", err) + } + + return *result.Command.CommandId, nil +} + +// WaitForCommandComplete waits for a command to complete +func (c *SSMClient) WaitForCommandComplete(ctx context.Context, commandID, instanceID string) error { + maxAttempts := 600 // 50 minutes + for i := 0; i < maxAttempts; i++ { + input := &ssm.GetCommandInvocationInput{ + CommandId: aws.String(commandID), + InstanceId: aws.String(instanceID), + } + + result, err := c.client.GetCommandInvocation(ctx, input) + if err != nil { + time.Sleep(5 * time.Second) + continue + } + + switch result.Status { + case types.CommandInvocationStatusSuccess: + return nil + case types.CommandInvocationStatusFailed, types.CommandInvocationStatusCancelled, types.CommandInvocationStatusTimedOut: + return fmt.Errorf("command failed with status: %s", result.Status) + } + + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for command to complete") +} + +// StartSSHSession starts an interactive SSM session +func (c *SSMClient) StartSSHSession(ctx context.Context, instanceID string) error { + cmd := exec.CommandContext(ctx, "aws", "ssm", "start-session", + "--target", instanceID, + "--region", c.region) + + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + return cmd.Run() +} + +// BuildTarCommand creates a command to tar and base64 encode a file/directory +func BuildTarCommand(src string) string { + // Get parent directory for mkdir + dir := filepath.Dir(src) + if dir == "." { + dir = "" + } + + mkdirPart := "" + if dir != "" { + mkdirPart = fmt.Sprintf("mkdir -p %s && ", dir) + } + + return fmt.Sprintf("%star -czf - %s | base64", mkdirPart, src) +} + +// BuildUntarCommand creates a command to base64 decode and untar +func BuildUntarCommand(dst string) string { + return fmt.Sprintf("mkdir -p %s && base64 -d | tar -xzf - -C %s", dst, dst) +} + +// BuildSyncFileCommand creates a command to sync a file to the instance +func BuildSyncFileCommand(dst, tarBase64Data string) string { + parentDir := filepath.Dir(dst) + return fmt.Sprintf("mkdir -p %s && echo '%s' | base64 -d | tar -xzf - -C /", parentDir, tarBase64Data) +} + +// SyncFile syncs a local file/directory to the instance +func (c *SSMClient) SyncFile(ctx context.Context, instanceID, src, dst string) error { + // Read and tar the source file/directory + tarCmd := exec.Command("bash", "-c", BuildTarCommand(src)) + tarOutput, err := tarCmd.Output() + if err != nil { + return fmt.Errorf("failed to tar source: %w", err) + } + + tarBase64 := string(tarOutput) + + // Build and send the untar command + syncCmd := BuildSyncFileCommand(dst, tarBase64) + commandID, err := c.SendCommand(ctx, instanceID, syncCmd) + if err != nil { + return fmt.Errorf("failed to send sync command: %w", err) + } + + // Wait for command to complete + if err := c.WaitForCommandComplete(ctx, commandID, instanceID); err != nil { + return fmt.Errorf("sync command failed: %w", err) + } + + return nil +} diff --git a/nix/packages/pg-ami-builder/internal/aws/ssm_test.go b/nix/packages/pg-ami-builder/internal/aws/ssm_test.go new file mode 100644 index 000000000..2afe97d73 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/aws/ssm_test.go @@ -0,0 +1,93 @@ +package aws + +import ( + "strings" + "testing" +) + +func TestBuildSSMCommand(t *testing.T) { + tests := []struct { + name string + script string + wantCmd bool + }{ + { + name: "simple script", + script: "echo hello", + wantCmd: true, + }, + { + name: "multiline script", + script: "#!/bin/bash\necho hello\necho world", + wantCmd: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cmd := BuildSSMCommand(tt.script) + if (cmd != "") != tt.wantCmd { + t.Errorf("BuildSSMCommand() = %v, wantCmd %v", cmd != "", tt.wantCmd) + } + }) + } +} + +func TestBuildTarCommand(t *testing.T) { + tests := []struct { + name string + src string + expected string + }{ + { + name: "directory sync", + src: "ansible/", + expected: "mkdir -p ansible && tar -czf - ansible/ | base64", + }, + { + name: "file sync", + src: "ansible/vars.yml", + expected: "mkdir -p ansible && tar -czf - ansible/vars.yml | base64", + }, + { + name: "top level file", + src: "README.md", + expected: "tar -czf - README.md | base64", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := BuildTarCommand(tt.src) + if result != tt.expected { + t.Errorf("BuildTarCommand(%q) = %q, want %q", tt.src, result, tt.expected) + } + }) + } +} + +func TestBuildUntarCommand(t *testing.T) { + dst := "/tmp/ansible-playbook/ansible/" + expected := "mkdir -p /tmp/ansible-playbook/ansible/ && base64 -d | tar -xzf - -C /tmp/ansible-playbook/ansible/" + + result := BuildUntarCommand(dst) + if result != expected { + t.Errorf("BuildUntarCommand(%q) = %q, want %q", dst, result, expected) + } +} + +func TestSyncFileCommand(t *testing.T) { + dst := "/tmp/ansible-playbook/ansible/" + tarData := "base64encodeddata" + + result := BuildSyncFileCommand(dst, tarData) + if !strings.Contains(result, "base64 -d") { + t.Errorf("BuildSyncFileCommand should contain base64 decode") + } + if !strings.Contains(result, "tar -xzf") { + t.Errorf("BuildSyncFileCommand should contain tar extract") + } + if !strings.Contains(result, tarData) { + t.Errorf("BuildSyncFileCommand should contain the tar data") + } +} diff --git a/nix/packages/pg-ami-builder/internal/git/repo.go b/nix/packages/pg-ami-builder/internal/git/repo.go new file mode 100644 index 000000000..a876affdd --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/git/repo.go @@ -0,0 +1,37 @@ +package git + +import ( + "fmt" + "os/exec" + "strings" +) + +// GetCurrentSHA returns the current git commit SHA +func GetCurrentSHA() (string, error) { + cmd := exec.Command("git", "rev-parse", "HEAD") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get git SHA: %w", err) + } + return strings.TrimSpace(string(output)), nil +} + +// GetCurrentBranch returns the current git branch +func GetCurrentBranch() (string, error) { + cmd := exec.Command("git", "branch", "--show-current") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get git branch: %w", err) + } + return strings.TrimSpace(string(output)), nil +} + +// GetRepoRoot returns the git repository root directory +func GetRepoRoot() (string, error) { + cmd := exec.Command("git", "rev-parse", "--show-toplevel") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get repo root: %w", err) + } + return strings.TrimSpace(string(output)), nil +} diff --git a/nix/packages/pg-ami-builder/internal/git/repo_test.go b/nix/packages/pg-ami-builder/internal/git/repo_test.go new file mode 100644 index 000000000..3fd468803 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/git/repo_test.go @@ -0,0 +1,27 @@ +package git + +import ( + "testing" +) + +func TestGetCurrentSHA(t *testing.T) { + sha, err := GetCurrentSHA() + if err != nil { + t.Skipf("Not in a git repository: %v", err) + } + + if len(sha) != 40 { + t.Errorf("Expected 40 character SHA, got %d: %s", len(sha), sha) + } +} + +func TestGetCurrentBranch(t *testing.T) { + branch, err := GetCurrentBranch() + if err != nil { + t.Skipf("Not in a git repository: %v", err) + } + + if branch == "" { + t.Error("Expected non-empty branch name") + } +} diff --git a/nix/packages/pg-ami-builder/internal/packer/runner.go b/nix/packages/pg-ami-builder/internal/packer/runner.go new file mode 100644 index 000000000..6a6d5fabb --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/packer/runner.go @@ -0,0 +1,84 @@ +package packer + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +// GetPackerTemplate returns the packer template path for a phase +func GetPackerTemplate(phase string) string { + if phase == "phase1" { + return "amazon-arm64-nix.pkr.hcl" + } + return "stage2-nix-psql.pkr.hcl" +} + +// RewriteTemplateWithUniqueAMI creates a temporary copy of the template with unique AMI name +// For phase2, also replaces source_ami_filter with direct source_ami if sourceAMI is provided +func RewriteTemplateWithUniqueAMI(templatePath, executionID, sourceAMI string) (string, func(), error) { + // Read the original template + content, err := os.ReadFile(templatePath) + if err != nil { + return "", nil, fmt.Errorf("failed to read template: %w", err) + } + + modified := string(content) + + // For phase2: replace source_ami_filter with direct source_ami FIRST + // Do this before ami_name modification to avoid conflicts + if sourceAMI != "" { + // Remove the entire source_ami_filter block (with nested braces) + // Match from source_ami_filter to a closing brace at line start (same indentation) + filterPattern := regexp.MustCompile(`(?m)source_ami_filter\s*\{[^}]*\{[^}]*\}[^}]*\}`) + modified = filterPattern.ReplaceAllString(modified, fmt.Sprintf(`source_ami = "%s"`, sourceAMI)) + } + + // Modify ami_name to append execution ID + // Pattern matches: ami_name = "something-${var.postgres-version}-stage-1" + // or: ami_name = "something-${var.postgres-version}" + pattern := regexp.MustCompile(`(ami_name\s*=\s*"[^"]+")`) + modified = pattern.ReplaceAllStringFunc(modified, func(match string) string { + // Insert execution ID before the closing quote + return strings.TrimSuffix(match, `"`) + `-${var.packer-execution-id}"` + }) + + // Create temp file + tempDir := os.TempDir() + tempFile := filepath.Join(tempDir, fmt.Sprintf("packer-%s-%s", executionID, filepath.Base(templatePath))) + + if err := os.WriteFile(tempFile, []byte(modified), 0o644); err != nil { + return "", nil, fmt.Errorf("failed to write temp template: %w", err) + } + + // Return cleanup function + cleanup := func() { + os.Remove(tempFile) + } + + return tempFile, cleanup, nil +} + +// BuildPackerCommand constructs the packer build command +func BuildPackerCommand(phase, postgresVersion, executionID string, extraVars map[string]string) []string { + template := GetPackerTemplate(phase) + + args := []string{ + "packer", + "build", + "-on-error=abort", // Keep instance running on failure for debugging + "-var", fmt.Sprintf("postgres-version=%s", postgresVersion), + "-var", fmt.Sprintf("packer-execution-id=%s", executionID), + } + + // Add any extra variables + for key, value := range extraVars { + args = append(args, "-var", fmt.Sprintf("%s=%s", key, value)) + } + + args = append(args, template) + + return args +} diff --git a/nix/packages/pg-ami-builder/internal/packer/runner_test.go b/nix/packages/pg-ami-builder/internal/packer/runner_test.go new file mode 100644 index 000000000..f40eba468 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/packer/runner_test.go @@ -0,0 +1,75 @@ +package packer + +import ( + "strings" + "testing" +) + +func TestBuildPackerCommand(t *testing.T) { + tests := []struct { + name string + phase string + postgresVersion string + executionID string + expectedContains []string + }{ + { + name: "phase1 build", + phase: "phase1", + postgresVersion: "15", + executionID: "1234567890-15", + expectedContains: []string{ + "packer", + "build", + "amazon-arm64-nix.pkr.hcl", + "-var", "postgres-version=15", + "-var", "packer-execution-id=1234567890-15", + }, + }, + { + name: "phase2 build", + phase: "phase2", + postgresVersion: "16", + executionID: "1234567890-16", + expectedContains: []string{ + "packer", + "build", + "stage2-nix-psql.pkr.hcl", + "-var", "postgres-version=16", + "-var", "packer-execution-id=1234567890-16", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cmd := BuildPackerCommand(tt.phase, tt.postgresVersion, tt.executionID, map[string]string{}) + cmdStr := strings.Join(cmd, " ") + + for _, expected := range tt.expectedContains { + if !strings.Contains(cmdStr, expected) { + t.Errorf("BuildPackerCommand missing %q in: %s", expected, cmdStr) + } + } + }) + } +} + +func TestGetPackerTemplate(t *testing.T) { + tests := []struct { + phase string + expected string + }{ + {"phase1", "amazon-arm64-nix.pkr.hcl"}, + {"phase2", "stage2-nix-psql.pkr.hcl"}, + } + + for _, tt := range tests { + t.Run(tt.phase, func(t *testing.T) { + result := GetPackerTemplate(tt.phase) + if result != tt.expected { + t.Errorf("GetPackerTemplate(%q) = %q, want %q", tt.phase, result, tt.expected) + } + }) + } +} diff --git a/nix/packages/pg-ami-builder/internal/state/manager.go b/nix/packages/pg-ami-builder/internal/state/manager.go new file mode 100644 index 000000000..e19b5714a --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/state/manager.go @@ -0,0 +1,125 @@ +package state + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +// PhaseState represents the state for a specific phase +type PhaseState struct { + InstanceID string `json:"instance_id,omitempty"` + ExecutionID string `json:"execution_id,omitempty"` + AMIID string `json:"ami_id,omitempty"` + Timestamp string `json:"timestamp,omitempty"` +} + +// State represents the build state +type State struct { + Phase1 *PhaseState `json:"phase1,omitempty"` + Phase2 *PhaseState `json:"phase2,omitempty"` + Region string `json:"region"` + PostgresVersion string `json:"postgres_version"` + GitSHA string `json:"git_sha"` + + // Legacy fields for backward compatibility + InstanceID string `json:"instance_id,omitempty"` + Phase string `json:"phase,omitempty"` + ExecutionID string `json:"execution_id,omitempty"` + AMIID string `json:"ami_id,omitempty"` + Timestamp string `json:"timestamp,omitempty"` +} + +// GetDefaultStateFile returns the default state file path +func GetDefaultStateFile() (string, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to get home directory: %w", err) + } + + stateDir := filepath.Join(homeDir, ".pg-ami-build") + if err := os.MkdirAll(stateDir, 0o755); err != nil { + return "", fmt.Errorf("failed to create state directory: %w", err) + } + + return filepath.Join(stateDir, "state.json"), nil +} + +// SaveState saves the state to a file +func SaveState(filePath string, state *State) error { + // Ensure directory exists + dir := filepath.Dir(filePath) + if err := os.MkdirAll(dir, 0o755); err != nil { + return fmt.Errorf("failed to create state directory: %w", err) + } + + data, err := json.MarshalIndent(state, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal state: %w", err) + } + + if err := os.WriteFile(filePath, data, 0o644); err != nil { + return fmt.Errorf("failed to write state file: %w", err) + } + + return nil +} + +// LoadState loads the state from a file +func LoadState(filePath string) (*State, error) { + data, err := os.ReadFile(filePath) + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("state file does not exist: %s", filePath) + } + return nil, fmt.Errorf("failed to read state file: %w", err) + } + + var state State + if err := json.Unmarshal(data, &state); err != nil { + return nil, fmt.Errorf("failed to unmarshal state: %w", err) + } + + return &state, nil +} + +// ClearState removes the state file +func ClearState(filePath string) error { + if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to remove state file: %w", err) + } + return nil +} + +// GetPhaseState returns the state for a specific phase +func (s *State) GetPhaseState(phase string) *PhaseState { + // Try new structure first + if phase == "phase1" && s.Phase1 != nil { + return s.Phase1 + } + if phase == "phase2" && s.Phase2 != nil { + return s.Phase2 + } + + // Fall back to legacy fields + if s.Phase == phase { + return &PhaseState{ + InstanceID: s.InstanceID, + ExecutionID: s.ExecutionID, + AMIID: s.AMIID, + Timestamp: s.Timestamp, + } + } + + return nil +} + +// SetPhaseState sets the state for a specific phase +func (s *State) SetPhaseState(phase string, ps *PhaseState) { + if phase == "phase1" { + s.Phase1 = ps + } else if phase == "phase2" { + s.Phase2 = ps + } +} diff --git a/nix/packages/pg-ami-builder/internal/state/manager_test.go b/nix/packages/pg-ami-builder/internal/state/manager_test.go new file mode 100644 index 000000000..ec6cefd06 --- /dev/null +++ b/nix/packages/pg-ami-builder/internal/state/manager_test.go @@ -0,0 +1,78 @@ +package state + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +func TestSaveState(t *testing.T) { + tmpDir := t.TempDir() + stateFile := filepath.Join(tmpDir, "state.json") + + state := &State{ + InstanceID: "i-1234567890abcdef0", + Phase: "phase1", + ExecutionID: "1731672000-15", + Region: "us-east-1", + PostgresVersion: "15", + Timestamp: time.Now().Format(time.RFC3339), + GitSHA: "abc123", + } + + err := SaveState(stateFile, state) + if err != nil { + t.Fatalf("SaveState failed: %v", err) + } + + if _, err := os.Stat(stateFile); os.IsNotExist(err) { + t.Fatal("State file was not created") + } +} + +func TestLoadState(t *testing.T) { + tmpDir := t.TempDir() + stateFile := filepath.Join(tmpDir, "state.json") + + original := &State{ + InstanceID: "i-test123", + Phase: "phase2", + ExecutionID: "exec-1", + Region: "us-west-2", + PostgresVersion: "16", + GitSHA: "def456", + } + + if err := SaveState(stateFile, original); err != nil { + t.Fatalf("SaveState failed: %v", err) + } + + loaded, err := LoadState(stateFile) + if err != nil { + t.Fatalf("LoadState failed: %v", err) + } + + if loaded.InstanceID != original.InstanceID { + t.Errorf("InstanceID mismatch: got %s, want %s", loaded.InstanceID, original.InstanceID) + } + if loaded.Phase != original.Phase { + t.Errorf("Phase mismatch: got %s, want %s", loaded.Phase, original.Phase) + } +} + +func TestGetDefaultStateFile(t *testing.T) { + path, err := GetDefaultStateFile() + if err != nil { + // Skip if we can't create the state directory (e.g., in Nix build sandbox) + t.Skipf("GetDefaultStateFile failed (expected in sandboxed environments): %v", err) + } + + if path == "" { + t.Fatal("GetDefaultStateFile returned empty path") + } + + if !filepath.IsAbs(path) { + t.Errorf("Expected absolute path, got %s", path) + } +} diff --git a/nix/packages/pg-ami-builder/main.go b/nix/packages/pg-ami-builder/main.go new file mode 100644 index 000000000..4bb2fa6b9 --- /dev/null +++ b/nix/packages/pg-ami-builder/main.go @@ -0,0 +1,12 @@ +package main + +import ( + "github.com/supabase/postgres/pg-ami-builder/cmd" +) + +var Version = "dev" + +func main() { + cmd.Version = Version + cmd.Execute() +} diff --git a/stage2-nix-psql.pkr.hcl b/stage2-nix-psql.pkr.hcl index 344ced288..407ff723a 100644 --- a/stage2-nix-psql.pkr.hcl +++ b/stage2-nix-psql.pkr.hcl @@ -51,6 +51,11 @@ variable "postgres_major_version" { default = "" } +variable "source-ami" { + type = string + default = "" +} + packer { required_plugins { amazon = { @@ -64,14 +69,21 @@ source "amazon-ebs" "ubuntu" { ami_name = "${var.ami_name}-${var.postgres-version}" instance_type = "c6g.4xlarge" region = "${var.region}" - source_ami_filter { - filters = { - name = "${var.ami_name}-${var.postgres-version}-stage-1" - root-device-type = "ebs" - virtualization-type = "hvm" + + # Use source-ami variable if provided, otherwise use filter + source_ami = var.source-ami != "" ? var.source-ami : null + + dynamic "source_ami_filter" { + for_each = var.source-ami == "" ? [1] : [] + content { + filters = { + name = "${var.ami_name}-${var.postgres-version}-stage-1" + root-device-type = "ebs" + virtualization-type = "hvm" + } + most_recent = true + owners = ["amazon", "self"] } - most_recent = true - owners = ["amazon", "self"] } communicator = "ssh" From 3f63cbb62a0392796af952aa41d8c94d08e8a979 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Tue, 18 Nov 2025 10:07:27 -0500 Subject: [PATCH 2/4] chore: remove stray char that I had put in to force failure for testing --- ansible/tasks/setup-system.yml | 2 +- nix/packages/pg-ami-builder/cmd/build.go | 150 +++++++++++++---------- 2 files changed, 89 insertions(+), 63 deletions(-) diff --git a/ansible/tasks/setup-system.yml b/ansible/tasks/setup-system.yml index 4218d18a2..afd5a64fc 100644 --- a/ansible/tasks/setup-system.yml +++ b/ansible/tasks/setup-system.yml @@ -12,7 +12,7 @@ ansible.builtin.apt: cache_valid_time: 3600 pkg: - - acll + - acl - bwm-ng - fail2ban - htop diff --git a/nix/packages/pg-ami-builder/cmd/build.go b/nix/packages/pg-ami-builder/cmd/build.go index 881a937f7..d9f982ed6 100644 --- a/nix/packages/pg-ami-builder/cmd/build.go +++ b/nix/packages/pg-ami-builder/cmd/build.go @@ -118,54 +118,67 @@ func runBuildPhase1(cmd *cobra.Command, args []string) error { if packerErr != nil { fmt.Printf("\n✗ Packer build failed: %v\n", packerErr) - // Try to find the packer instance by tag - ec2Client, err := aws.NewEC2Client(ctx, region) - if err != nil { - fmt.Printf("⚠ Could not create EC2 client to find instance: %v\n", err) - return fmt.Errorf("packer build failed: %w", packerErr) + // Get state file path first + stateFilePath := stateFile + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + fmt.Printf("⚠ Could not get state file path: %v\n", err) + return fmt.Errorf("packer build failed: %w", packerErr) + } } - fmt.Println("\n✓ Looking for packer instance...") - instanceID, err := ec2Client.FindInstanceByTag(ctx, "packerExecutionId", executionID) + // Load existing state or create new + buildState, err := state.LoadState(stateFilePath) if err != nil { - fmt.Printf("⚠ Could not find packer instance: %v\n", err) - fmt.Println("\nPacker may have cleaned up the instance already.") - return fmt.Errorf("packer build failed: %w", packerErr) + // Create new state if none exists + buildState = &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } } - fmt.Printf("✓ Found packer instance: %s\n", instanceID) - - // Save state for debugging - stateFilePath := stateFile - if stateFilePath == "" { - stateFilePath, err = state.GetDefaultStateFile() + // Try to find the packer instance by tag + var instanceID string + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + fmt.Printf("⚠ Could not create EC2 client to find instance: %v\n", err) + } else { + fmt.Println("\n✓ Looking for packer instance...") + instanceID, err = ec2Client.FindInstanceByTag(ctx, "packerExecutionId", executionID) if err != nil { - return fmt.Errorf("failed to get default state file: %w", err) + fmt.Printf("⚠ Could not find packer instance: %v\n", err) + fmt.Println(" Packer may have cleaned up the instance already.") + } else { + fmt.Printf("✓ Found packer instance: %s\n", instanceID) } } - buildState := &state.State{ - Region: region, - PostgresVersion: postgresVersion, - GitSHA: sha, - } + // Save state with execution ID (and instance ID if found) buildState.SetPhaseState("phase1", &state.PhaseState{ - InstanceID: instanceID, + InstanceID: instanceID, // Will be empty string if not found ExecutionID: executionID, Timestamp: time.Now().Format(time.RFC3339), }) if err := state.SaveState(stateFilePath, buildState); err != nil { - return fmt.Errorf("failed to save state: %w", err) + fmt.Printf("⚠ Could not save state: %v\n", err) + } else { + fmt.Printf("\n✓ State saved to: %s\n", stateFilePath) + fmt.Printf(" Execution ID: %s\n", executionID) + if instanceID != "" { + fmt.Printf(" Instance ID: %s\n", instanceID) + fmt.Printf("\nNext steps:\n") + fmt.Printf(" - SSH into instance: pg-ami-builder ssh\n") + fmt.Printf(" - Re-run ansible: pg-ami-builder ansible-rerun phase1 --sync-files\n") + fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") + } else { + fmt.Printf("\nNo instance available for debugging (already cleaned up by packer)\n") + } } - fmt.Printf("\nInstance kept running for debugging:\n") - fmt.Printf(" Instance ID: %s\n", instanceID) - fmt.Printf(" State saved to: %s\n", stateFilePath) - fmt.Printf("\nNext steps:\n") - fmt.Printf(" - SSH into instance: pg-ami-builder ssh\n") - fmt.Printf(" - Re-run ansible: pg-ami-builder ansible-rerun phase1 --sync-files\n") - fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") return fmt.Errorf("packer build failed: %w", packerErr) } @@ -319,54 +332,67 @@ func runBuildPhase2(cmd *cobra.Command, args []string) error { if packerErr != nil { fmt.Printf("\n✗ Packer build failed: %v\n", packerErr) - // Try to find the packer instance by tag - ec2Client, err := aws.NewEC2Client(ctx, region) - if err != nil { - fmt.Printf("⚠ Could not create EC2 client to find instance: %v\n", err) - return fmt.Errorf("packer build failed: %w", packerErr) + // Get state file path first + stateFilePath := stateFile + if stateFilePath == "" { + var err error + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + fmt.Printf("⚠ Could not get state file path: %v\n", err) + return fmt.Errorf("packer build failed: %w", packerErr) + } } - fmt.Println("\n✓ Looking for packer instance...") - instanceID, err := ec2Client.FindInstanceByTag(ctx, "packerExecutionId", executionID) + // Load existing state or create new + buildState, err := state.LoadState(stateFilePath) if err != nil { - fmt.Printf("⚠ Could not find packer instance: %v\n", err) - fmt.Println("\nPacker may have cleaned up the instance already.") - return fmt.Errorf("packer build failed: %w", packerErr) + // Create new state if none exists + buildState = &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } } - fmt.Printf("✓ Found packer instance: %s\n", instanceID) - - // Save state for debugging - stateFilePath := stateFile - if stateFilePath == "" { - stateFilePath, err = state.GetDefaultStateFile() + // Try to find the packer instance by tag + var instanceID string + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + fmt.Printf("⚠ Could not create EC2 client to find instance: %v\n", err) + } else { + fmt.Println("\n✓ Looking for packer instance...") + instanceID, err = ec2Client.FindInstanceByTag(ctx, "packerExecutionId", executionID) if err != nil { - return fmt.Errorf("failed to get default state file: %w", err) + fmt.Printf("⚠ Could not find packer instance: %v\n", err) + fmt.Println(" Packer may have cleaned up the instance already.") + } else { + fmt.Printf("✓ Found packer instance: %s\n", instanceID) } } - buildState := &state.State{ - Region: region, - PostgresVersion: postgresVersion, - GitSHA: sha, - } + // Save state with execution ID (and instance ID if found) buildState.SetPhaseState("phase2", &state.PhaseState{ - InstanceID: instanceID, + InstanceID: instanceID, // Will be empty string if not found ExecutionID: executionID, Timestamp: time.Now().Format(time.RFC3339), }) if err := state.SaveState(stateFilePath, buildState); err != nil { - return fmt.Errorf("failed to save state: %w", err) + fmt.Printf("⚠ Could not save state: %v\n", err) + } else { + fmt.Printf("\n✓ State saved to: %s\n", stateFilePath) + fmt.Printf(" Execution ID: %s\n", executionID) + if instanceID != "" { + fmt.Printf(" Instance ID: %s\n", instanceID) + fmt.Printf("\nNext steps:\n") + fmt.Printf(" - SSH into instance: pg-ami-builder ssh\n") + fmt.Printf(" - Re-run ansible: pg-ami-builder ansible-rerun phase2 --sync-files\n") + fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") + } else { + fmt.Printf("\nNo instance available for debugging (already cleaned up by packer)\n") + } } - fmt.Printf("\nInstance kept running for debugging:\n") - fmt.Printf(" Instance ID: %s\n", instanceID) - fmt.Printf(" State saved to: %s\n", stateFilePath) - fmt.Printf("\nNext steps:\n") - fmt.Printf(" - SSH into instance: pg-ami-builder ssh\n") - fmt.Printf(" - Re-run ansible: pg-ami-builder ansible-rerun phase2 --sync-files\n") - fmt.Printf(" - Cleanup: pg-ami-builder cleanup\n") return fmt.Errorf("packer build failed: %w", packerErr) } From 03fa3e9edd2782d63cf604a8e2d46b7d14a43c9c Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Wed, 19 Nov 2025 11:24:57 -0500 Subject: [PATCH 3/4] fix: cleaning up function of running phases etc --- nix/packages/pg-ami-builder/cmd/build.go | 103 ++++++++++++++---- nix/packages/pg-ami-builder/cmd/cleanup.go | 33 +++++- nix/packages/pg-ami-builder/cmd/ssh.go | 25 ++++- .../pg-ami-builder/internal/packer/runner.go | 16 +++ 4 files changed, 147 insertions(+), 30 deletions(-) diff --git a/nix/packages/pg-ami-builder/cmd/build.go b/nix/packages/pg-ami-builder/cmd/build.go index d9f982ed6..0303cea71 100644 --- a/nix/packages/pg-ami-builder/cmd/build.go +++ b/nix/packages/pg-ami-builder/cmd/build.go @@ -185,26 +185,21 @@ func runBuildPhase1(cmd *cobra.Command, args []string) error { fmt.Printf("\n✓ Packer build completed successfully!\n") fmt.Println("✓ AMI created by packer") - // Parse AMI ID from packer output (it's already in stdout above) - // For now, use AWS API to find the AMI by tags + // Find AMI by execution ID tag (injected by our template rewriter) + var amiID string ec2Client, err := aws.NewEC2Client(ctx, region) if err != nil { fmt.Printf("⚠ Could not create EC2 client to find AMI: %v\n", err) - fmt.Println("\n✓ Build phase 1 complete!") - return nil - } - - // Find AMI by execution ID tag - amiID, err := ec2Client.FindAMIByTag(ctx, "packerExecutionId", executionID) - if err != nil { - fmt.Printf("⚠ Could not find created AMI: %v\n", err) - fmt.Println("\n✓ Build phase 1 complete!") - return nil + } else { + amiID, err = ec2Client.FindAMIByTag(ctx, "packerExecutionId", executionID) + if err != nil { + fmt.Printf("⚠ Could not find created AMI: %v\n", err) + } else { + fmt.Printf("✓ AMI ID: %s\n", amiID) + } } - fmt.Printf("✓ AMI ID: %s\n", amiID) - - // Save AMI ID to state for phase2 + // Save state even if we couldn't find the AMI stateFilePath := stateFile if stateFilePath == "" { stateFilePath, err = state.GetDefaultStateFile() @@ -215,14 +210,19 @@ func runBuildPhase1(cmd *cobra.Command, args []string) error { } } - buildState := &state.State{ - Region: region, - PostgresVersion: postgresVersion, - GitSHA: sha, + // Load existing state or create new + buildState, err := state.LoadState(stateFilePath) + if err != nil { + buildState = &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } } + buildState.SetPhaseState("phase1", &state.PhaseState{ ExecutionID: executionID, - AMIID: amiID, + AMIID: amiID, // Will be empty if not found Timestamp: time.Now().Format(time.RFC3339), }) @@ -233,8 +233,13 @@ func runBuildPhase1(cmd *cobra.Command, args []string) error { } fmt.Println("\n✓ Build phase 1 complete!") - fmt.Printf("\nNext: Run phase 2 with:\n") - fmt.Printf(" pg-ami-builder build phase2 --postgres-version %s\n", postgresVersion) + if amiID != "" { + fmt.Printf("\nNext: Run phase 2 with:\n") + fmt.Printf(" pg-ami-builder build phase2 --postgres-version %s\n", postgresVersion) + } else { + fmt.Printf("\nNote: AMI ID not automatically detected. You can manually add it to:\n") + fmt.Printf(" %s\n", stateFilePath) + } return nil } @@ -398,7 +403,61 @@ func runBuildPhase2(cmd *cobra.Command, args []string) error { fmt.Printf("\n✓ Packer build completed successfully!\n") fmt.Println("✓ Final production AMI created by packer") + + // Find AMI by execution ID tag (injected by our template rewriter) + var amiID string + ec2Client, err := aws.NewEC2Client(ctx, region) + if err != nil { + fmt.Printf("⚠ Could not create EC2 client to find AMI: %v\n", err) + } else { + amiID, err = ec2Client.FindAMIByTag(ctx, "packerExecutionId", executionID) + if err != nil { + fmt.Printf("⚠ Could not find created AMI: %v\n", err) + } else { + fmt.Printf("✓ AMI ID: %s\n", amiID) + } + } + + // Save state even if we couldn't find the AMI + stateFilePath := stateFile + if stateFilePath == "" { + stateFilePath, err = state.GetDefaultStateFile() + if err != nil { + fmt.Printf("⚠ Could not get state file path: %v\n", err) + fmt.Println("\n✓ Build phase 2 complete!") + return nil + } + } + + // Load existing state or create new + buildState, err := state.LoadState(stateFilePath) + if err != nil { + buildState = &state.State{ + Region: region, + PostgresVersion: postgresVersion, + GitSHA: sha, + } + } + + buildState.SetPhaseState("phase2", &state.PhaseState{ + ExecutionID: executionID, + AMIID: amiID, // Will be empty if not found + Timestamp: time.Now().Format(time.RFC3339), + }) + + if err := state.SaveState(stateFilePath, buildState); err != nil { + fmt.Printf("⚠ Could not save state: %v\n", err) + } else { + fmt.Printf("✓ State saved to: %s\n", stateFilePath) + } + fmt.Println("\n✓ Build phase 2 complete!") + if amiID != "" { + fmt.Printf("\nProduction AMI ready: %s\n", amiID) + } else { + fmt.Printf("\nNote: AMI ID not automatically detected. You can manually add it to:\n") + fmt.Printf(" %s\n", stateFilePath) + } return nil } diff --git a/nix/packages/pg-ami-builder/cmd/cleanup.go b/nix/packages/pg-ami-builder/cmd/cleanup.go index dbeba7103..98e055f5f 100644 --- a/nix/packages/pg-ami-builder/cmd/cleanup.go +++ b/nix/packages/pg-ami-builder/cmd/cleanup.go @@ -122,12 +122,33 @@ func runCleanup(cmd *cobra.Command, args []string) error { fmt.Println("✓ Instance terminated") - // Clear state file - if stateFilePath != "" { - if err := state.ClearState(stateFilePath); err != nil { - fmt.Printf("Warning: failed to clear state file: %v\n", err) - } else { - fmt.Println("✓ State file cleared") + // Clear phase-specific state + if stateFilePath != "" && cleanupPhase != "" { + // Load state, clear just this phase, save back + buildState, err := state.LoadState(stateFilePath) + if err == nil { + // Clear this specific phase + buildState.SetPhaseState(cleanupPhase, nil) + + // Check if both phases are now empty + phase1Empty := buildState.Phase1 == nil || (buildState.Phase1.InstanceID == "" && buildState.Phase1.AMIID == "") + phase2Empty := buildState.Phase2 == nil || (buildState.Phase2.InstanceID == "" && buildState.Phase2.AMIID == "") + + if phase1Empty && phase2Empty { + // Both phases empty, delete entire file + if err := state.ClearState(stateFilePath); err != nil { + fmt.Printf("⚠ Failed to clear state file: %v\n", err) + } else { + fmt.Println("✓ State file cleared (all phases cleaned)") + } + } else { + // Save updated state with this phase cleared + if err := state.SaveState(stateFilePath, buildState); err != nil { + fmt.Printf("⚠ Failed to update state file: %v\n", err) + } else { + fmt.Printf("✓ State file updated (%s cleared)\n", cleanupPhase) + } + } } } diff --git a/nix/packages/pg-ami-builder/cmd/ssh.go b/nix/packages/pg-ami-builder/cmd/ssh.go index 6806aed19..976d16c23 100644 --- a/nix/packages/pg-ami-builder/cmd/ssh.go +++ b/nix/packages/pg-ami-builder/cmd/ssh.go @@ -13,6 +13,7 @@ import ( var ( sshInstanceID string + sshPhase string awsEC2ConnectCmd string ) @@ -51,12 +52,31 @@ func runSSH(cmd *cobra.Command, args []string) error { return fmt.Errorf("failed to load state: %w", err) } - instanceID = buildState.InstanceID region = buildState.Region + + // Get instance ID from phase-specific state + if sshPhase != "" { + phaseState := buildState.GetPhaseState(sshPhase) + if phaseState == nil || phaseState.InstanceID == "" { + return fmt.Errorf("no instance found for %s in state file", sshPhase) + } + instanceID = phaseState.InstanceID + } else { + // Auto-detect: prefer phase2, then phase1, then legacy + if buildState.Phase2 != nil && buildState.Phase2.InstanceID != "" { + instanceID = buildState.Phase2.InstanceID + fmt.Println("✓ Auto-detected phase2 instance") + } else if buildState.Phase1 != nil && buildState.Phase1.InstanceID != "" { + instanceID = buildState.Phase1.InstanceID + fmt.Println("✓ Auto-detected phase1 instance") + } else if buildState.InstanceID != "" { + instanceID = buildState.InstanceID + } + } } if instanceID == "" { - return fmt.Errorf("no instance ID available (use --instance-id or run build command first)") + return fmt.Errorf("no instance ID available (use --instance-id or --phase, or run build command first)") } // Connect via EC2 Instance Connect @@ -147,6 +167,7 @@ func init() { rootCmd.AddCommand(sshCmd) sshCmd.Flags().StringVar(&sshInstanceID, "instance-id", "", "Target specific instance (default: from state file)") + sshCmd.Flags().StringVar(&sshPhase, "phase", "", "Connect to specific phase instance (phase1 or phase2)") sshCmd.Flags().StringVar(®ion, "region", "us-east-1", "AWS region") sshCmd.Flags().StringVar(&awsEC2ConnectCmd, "aws-ec2-connect-cmd", "", "Custom AWS EC2 Instance Connect command (e.g., 'aws ec2-instance-connect ssh --instance-id i-xxx ...')") } diff --git a/nix/packages/pg-ami-builder/internal/packer/runner.go b/nix/packages/pg-ami-builder/internal/packer/runner.go index 6a6d5fabb..16c2ef58a 100644 --- a/nix/packages/pg-ami-builder/internal/packer/runner.go +++ b/nix/packages/pg-ami-builder/internal/packer/runner.go @@ -45,6 +45,22 @@ func RewriteTemplateWithUniqueAMI(templatePath, executionID, sourceAMI string) ( return strings.TrimSuffix(match, `"`) + `-${var.packer-execution-id}"` }) + // Add packerExecutionId to AMI tags (not run_tags) + // Find the tags = { block (after run_tags) and inject packerExecutionId + tagsPattern := regexp.MustCompile(`(?s)(tags\s*=\s*\{)([^}]*creator\s*=\s*"packer"[^}]*)(\})`) + modified = tagsPattern.ReplaceAllStringFunc(modified, func(match string) string { + // Check if packerExecutionId already exists in this tags block + if strings.Contains(match, "packerExecutionId") { + return match // Already has it + } + // Insert packerExecutionId after the opening brace + parts := tagsPattern.FindStringSubmatch(match) + if len(parts) == 4 { + return parts[1] + "\n packerExecutionId = \"${var.packer-execution-id}\"" + parts[2] + parts[3] + } + return match + }) + // Create temp file tempDir := os.TempDir() tempFile := filepath.Join(tempDir, fmt.Sprintf("packer-%s-%s", executionID, filepath.Base(templatePath))) From 95f4e5b34aae480129cae5c816d09474bcf23f48 Mon Sep 17 00:00:00 2001 From: Sam Rose Date: Wed, 19 Nov 2025 13:24:14 -0500 Subject: [PATCH 4/4] docs: update docs --- docs/ami-local-development.md | 38 +++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/ami-local-development.md b/docs/ami-local-development.md index 6cde482b2..c6b819bf8 100644 --- a/docs/ami-local-development.md +++ b/docs/ami-local-development.md @@ -2,6 +2,18 @@ This guide explains how to use `pg-ami-builder` for local AMI development and iteration. + Summary + + | Aspect | CI/CD Workflows | pg-ami-builder | + |--------------------|---------------------------------|-----------------------------------------| + | AMI Creation | Packer auto-creates only | Packer auto-creates + manual create-ami | + | Workflow | Linear, automated | Iterative, debuggable | + | State | Stateless, ephemeral | Stateful, persistent | + | Error Handling | Terminate and restart | Preserve, debug, fix, continue | + | Use Case | Production releases, CI testing | Local development, iteration | + | Instance Lifecycle | Always terminated | Preserved for debugging | + + ## Prerequisites ### Required Tools @@ -36,25 +48,25 @@ Your AWS user/role needs these permissions: ```bash # Run phase 1 build (launches instance and runs packer build) -aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 +aws-vault exec -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 # If packer build fails, instance stays alive for debugging # SSH to investigate -aws-vault exec dev -- nix run .#pg-ami-builder -- ssh +aws-vault exec -- nix run .#pg-ami-builder -- ssh # Make local changes and re-run with file sync vim ansible/playbook.yml -aws-vault exec dev -- nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files +aws-vault exec -- nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files # Cleanup when done -aws-vault exec dev -- nix run .#pg-ami-builder -- cleanup +aws-vault exec -- nix run .#pg-ami-builder -- cleanup ``` ### Building Phase 2 ```bash # Run phase 2 with existing stage-1 AMI -aws-vault exec dev -- nix run .#pg-ami-builder -- build phase2 \ +aws-vault exec -- nix run .#pg-ami-builder -- build phase2 \ --source-ami ami-stage1-xyz \ --postgres-version 15 ``` @@ -161,40 +173,40 @@ nix run .#pg-ami-builder -- cleanup [flags] ```bash # Run phase 1 build (launches instance and runs packer build) -aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 +aws-vault exec -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 # If packer fails, instance stays up for debugging # SSH to investigate -aws-vault exec dev -- nix run .#pg-ami-builder -- ssh +aws-vault exec -- nix run .#pg-ami-builder -- ssh # Make local changes to ansible files vim ansible/playbook.yml # Re-run with your local changes -aws-vault exec dev -- nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files +aws-vault exec -- nix run .#pg-ami-builder -- ansible-rerun phase1 --sync-files # Repeat until working, then create AMI -aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 --create-ami +aws-vault exec -- nix run .#pg-ami-builder -- build phase1 --postgres-version 15 --create-ami # Cleanup -aws-vault exec dev -- nix run .#pg-ami-builder -- cleanup +aws-vault exec -- nix run .#pg-ami-builder -- cleanup ``` ### Workflow 2: Parallel builds for multiple postgres versions ```bash # Build PG 15 -aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 \ +aws-vault exec -- nix run .#pg-ami-builder -- build phase1 \ --postgres-version 15 \ --state-file ~/.pg-ami-build/pg15.json # Build PG 16 in parallel -aws-vault exec dev -- nix run .#pg-ami-builder -- build phase1 \ +aws-vault exec -- nix run .#pg-ami-builder -- build phase1 \ --postgres-version 16 \ --state-file ~/.pg-ami-build/pg16.json # SSH into PG 15 instance -aws-vault exec dev -- nix run .#pg-ami-builder -- ssh \ +aws-vault exec -- nix run .#pg-ami-builder -- ssh \ --state-file ~/.pg-ami-build/pg15.json ```