diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..9f0a1ec --- /dev/null +++ b/.editorconfig @@ -0,0 +1,32 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +# All files +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +# TOML files +[*.toml] +indent_style = space +indent_size = 2 +max_line_length = 100 + +# JSON files +[*.json] +indent_style = space +indent_size = 2 + +# Markdown files +[*.md] +trim_trailing_whitespace = false +max_line_length = 80 + +# YAML files +[*.{yml,yaml}] +indent_style = space +indent_size = 2 diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index bf7643e..d1ffbbc 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,20 +1,404 @@ -This repository contains all the configuration needed to run the live Torrust Tracker demo. - -The main goal is to provide a simple and easy-to-use setup for the Torrust Tracker, which can be deployed on a single server. - -The current major initiative is to migrate the tracker to a new infrastructure on Hetzner. This involves: -- Running the tracker binary directly on the host for performance. -- Using Docker for supporting services like Nginx, Prometheus, Grafana and MySQL. -- Migrating the database from SQLite to MySQL. - -When providing assistance, please act as an experienced open-source developer and system administrator. - -Follow these conventions: -- Use Conventional Commits for commit messages. Include the issue number in this format `#1` in the commit message if applicable, e.g., `feat: [#1] add new feature`. - - The issue number should be the branch prefix, e.g., `feat: [#1] add new feature` for branch `1-add-new-feature`. -- We use the proposed GitHub branch naming convention: - - Starts with a number indicating the issue number. - - Followed by a hyphen and a short description of the feature or fix. - - Uses hyphens to separate words, e.g., `1-add-new-feature`. -- Ensure that shell scripts are POSIX-compliant. -- Provide clear and concise documentation for any new features or changes. +# Torrust Tracker Demo - Contributor Guide + +## 🎯 Project Overview + +**Torrust Tracker Demo** is the complete production deployment configuration for running a live [Torrust Tracker](https://github.com/torrust/torrust-tracker) instance. This repository provides: + +- **Production deployment** configurations for Hetzner cloud infrastructure +- **Local testing environment** using KVM/libvirt virtualization +- **Infrastructure as Code** approach using OpenTofu/Terraform and cloud-init +- **Monitoring setup** with Grafana dashboards and Prometheus metrics +- **Automated deployment** scripts and Docker Compose configurations + +### Current Major Initiative + +We are migrating the tracker to a new infrastructure on Hetzner, involving: + +- Running the tracker binary directly on the host for performance +- Using Docker for supporting services (Nginx, Prometheus, Grafana, MySQL) +- Migrating the database from SQLite to MySQL +- Implementing Infrastructure as Code for reproducible deployments + +## 📁 Repository Structure + +```text +torrust-tracker-demo/ +├── .github/ +│ ├── workflows/ # GitHub Actions CI/CD pipelines +│ └── copilot-instructions.md # This contributor guide +├── docs/ +│ ├── adr/ # Architecture Decision Records +│ │ └── 001-makefile-location.md # Makefile location decision +│ └── README.md # Cross-cutting documentation index +├── infrastructure/ # Infrastructure as Code +│ ├── terraform/ # OpenTofu/Terraform configurations +│ │ ├── main.tf # VM and infrastructure definition +│ │ └── terraform.tfvars.example # Example configuration +│ ├── cloud-init/ # VM provisioning templates +│ │ ├── user-data.yaml.tpl # Main system configuration +│ │ ├── user-data-minimal.yaml.tpl # Debug configuration +│ │ ├── meta-data.yaml # VM metadata +│ │ └── network-config.yaml # Network setup +│ ├── scripts/ # Infrastructure automation scripts +│ ├── tests/ # Infrastructure validation tests +│ ├── docs/ # Infrastructure documentation +│ │ ├── quick-start.md # Fast setup guide +│ │ ├── local-testing-setup.md # Detailed setup +│ │ ├── infrastructure-overview.md # Architecture overview +│ │ ├── testing/ # Testing documentation +│ │ └── third-party/ # Third-party setup guides +│ ├── .gitignore # Infrastructure-specific ignores +│ └── README.md # Infrastructure overview +├── application/ # Application deployment and services +│ ├── share/ +│ │ ├── bin/ # Deployment and utility scripts +│ │ ├── container/ # Docker service configurations +│ │ ├── dev/ # Development configs +│ │ └── grafana/ # Grafana dashboards +│ ├── docs/ # Application documentation +│ │ ├── production-setup.md # Production deployment docs +│ │ ├── deployment.md # Deployment procedures +│ │ ├── firewall-requirements.md # Application firewall requirements +│ │ ├── useful-commands.md # Operational commands +│ │ └── media/ # Screenshots and diagrams +│ ├── compose.yaml # Docker Compose for services +│ ├── .env.production # Production environment template +│ ├── .gitignore # Application-specific ignores +│ └── README.md # Application overview +├── Makefile # Main automation interface +└── *.md # Project root documentation +``` + +### Key Components + +#### 🏗️ Infrastructure (`infrastructure/`) + +- **OpenTofu/Terraform**: Declarative infrastructure configuration +- **Cloud-init**: Automated VM provisioning and setup +- **Scripts**: Automation helpers for libvirt, monitoring, etc. +- **Tests**: Infrastructure validation and integration tests +- **Documentation**: Infrastructure-specific guides and references + +#### 🐳 Application Services (`application/`) + +- **Docker Compose**: Service orchestration configuration +- **Service Configs**: Nginx, Grafana, Prometheus configurations +- **Deployment Scripts**: Application deployment and utility scripts +- **Documentation**: Production setup, deployment, and operational guides + +#### 📚 Documentation (`docs/`) + +- **Cross-cutting**: Project-wide documentation and ADRs +- **Architecture Decisions**: Documented design choices and rationale + +## 🛠️ Development Workflow + +### Quick Start for Contributors + +```bash +# 1. Clone and setup +git clone https://github.com/torrust/torrust-tracker-demo.git +cd torrust-tracker-demo + +# 2. Install dependencies (Ubuntu/Debian) +make install-deps + +# 3. Setup SSH key for VMs +make setup-ssh-key + +# 4. Test infrastructure locally +make apply # Deploy test VM +make ssh # Connect to VM +make destroy # Cleanup + +# 5. Run tests +make test # Full infrastructure test +make test-syntax # Syntax validation only +``` + +### Main Commands + +| Command | Purpose | +| ------------------------- | ------------------------------------------- | +| `make help` | Show all available commands | +| `make install-deps` | Install OpenTofu, libvirt, KVM, virt-viewer | +| `make test` | Run complete infrastructure tests | +| `make apply` | Deploy VM with full configuration | +| `make apply-minimal` | Deploy VM with minimal config | +| `make ssh` | Connect to deployed VM | +| `make console` | Access VM console (text-based) | +| `make vm-console` | Access VM graphical console (GUI) | +| `make destroy` | Remove deployed VM | +| `make monitor-cloud-init` | Watch VM provisioning progress | + +## 📋 Conventions and Standards + +### Git Workflow + +#### Branch Naming + +- **Format**: `{issue-number}-{short-description}` +- **Examples**: `42-add-mysql-support`, `15-fix-ssl-renewal` +- Always start with the GitHub issue number + +#### Commit Messages + +- **Format**: Conventional Commits with issue references +- **Structure**: `{type}: [#{issue}] {description}` +- **Examples**: + ``` + feat: [#42] add MySQL database support + fix: [#15] resolve SSL certificate renewal issue + docs: [#8] update deployment guide + ci: [#23] add infrastructure validation tests + ``` + +#### Commit Types + +- `feat`: New features +- `fix`: Bug fixes +- `docs`: Documentation changes +- `ci`: CI/CD pipeline changes +- `refactor`: Code refactoring +- `test`: Test additions/changes +- `chore`: Maintenance tasks + +### Code Quality Standards + +#### Shell Scripts + +- **POSIX Compliance**: All shell scripts must be POSIX-compliant +- **Linting**: Use [ShellCheck](https://github.com/koalaman/shellcheck) +- **Error Handling**: Use `set -euo pipefail` for strict error handling +- **Documentation**: Include help functions and clear comments + +#### YAML Files + +- **Linting**: Use [yamllint](https://yamllint.readthedocs.io/en/stable/) +- **Configuration**: Follow `.yamllint-ci.yml` rules +- **Formatting**: 2-space indentation, 120-character line limit +- **Comments**: Use `# ` (space after hash) for comments + +#### Markdown Documentation + +- **Linting**: Follow [markdownlint](https://github.com/DavidAnson/markdownlint) conventions +- **Structure**: Use consistent heading hierarchy +- **Links**: Prefer relative links for internal documentation +- **Code blocks**: Always specify language for syntax highlighting + +#### Automated Linting + +The project includes a comprehensive linting script that validates all file types: + +```bash +./scripts/lint.sh # Run all linters +./scripts/lint.sh --yaml # Run only yamllint +./scripts/lint.sh --shell # Run only ShellCheck +./scripts/lint.sh --markdown # Run only markdownlint +``` + +**IMPORTANT**: Always run `./scripts/lint.sh` before committing to ensure code quality standards are met. + +#### TOML Configuration Files + +- **Formatting**: Use [Even Better TOML](https://marketplace.visualstudio.com/items?itemName=tamasfe.even-better-toml) extension for VS Code +- **Style**: Blank lines between sections, 2-space indentation, preserve comments +- **Configuration**: Project includes `.taplo.toml` and `.vscode/settings.json` for consistent formatting +- **Key conventions**: + - Blank line before each `[section]` and `[[array]]` + - Detailed comments for port configurations + - Preserve logical grouping and order + - No automatic key reordering + +#### Infrastructure as Code + +- **Validation**: All Terraform/OpenTofu must pass `tofu validate` +- **Planning**: Test with `tofu plan` before applying +- **Variables**: Use `terraform.tfvars` for sensitive/local config (git-ignored) +- **Templates**: Use `.tpl` extension for templated files + +### Testing Requirements + +#### Infrastructure Tests + +- **Syntax validation**: All configurations must pass linting +- **Local deployment**: Must successfully deploy and provision VMs +- **Service validation**: All services must start and be accessible +- **Network testing**: Ports and firewall rules must be correct + +#### CI/CD Requirements + +- **GitHub Actions**: All PRs must pass CI validation +- **No secrets**: Never commit SSH keys, passwords, or tokens +- **Documentation**: Update docs for any infrastructure changes + +### Security Guidelines + +#### Secrets Management + +- **SSH Keys**: Use template variables, store in `terraform.tfvars` +- **Git Ignore**: Distributed `.gitignore` files in each component (root, infrastructure, application) +- **Environment Variables**: Use environment variables for CI/CD secrets +- **Review**: All security-related changes require review + +#### Infrastructure Security + +- **UFW Firewall**: Only open required ports +- **SSH Access**: Key-based authentication only +- **Updates**: Enable automatic security updates +- **Monitoring**: Log security events and access + +## 🚀 Getting Started + +### For New Contributors + +1. **Read the documentation**: + + - [Quick Start Guide](../infrastructure/docs/quick-start.md) + - [Complete Setup Guide](../infrastructure/docs/local-testing-setup.md) + - [Production Setup Guide](../application/docs/production-setup.md) + +2. **Set up your development environment**: + + ```bash + make install-deps # Install dependencies + make setup-ssh-key # Configure SSH access + make test-prereq # Verify setup + ``` + +3. **Install recommended VS Code extensions**: + + - **[Even Better TOML](https://marketplace.visualstudio.com/items?itemName=tamasfe.even-better-toml)** - TOML syntax highlighting and formatting + - **[ShellCheck](https://marketplace.visualstudio.com/items?itemName=timonwong.shellcheck)** - Shell script linting + - **[YAML](https://marketplace.visualstudio.com/items?itemName=redhat.vscode-yaml)** - YAML support with schema validation + - **[markdownlint](https://marketplace.visualstudio.com/items?itemName=DavidAnson.vscode-markdownlint)** - Markdown linting + - **[HashiCorp Terraform](https://marketplace.visualstudio.com/items?itemName=HashiCorp.terraform)** - Terraform/OpenTofu support + +4. **Configure VS Code workspace**: + + - Project includes `.vscode/settings.json` with TOML formatting configuration + - Extensions will use project-specific settings automatically + - Reload VS Code after installing extensions for settings to take effect + +5. **TOML Formatting Setup**: + + - **Configuration files**: `.taplo.toml` and `.vscode/settings.json` control formatting + - **Format on save**: TOML files auto-format when saved (`Ctrl+S`) + - **Manual format**: Use `Shift+Alt+F` (Windows/Linux) or `Shift+Option+F` (Mac) + - **Style**: Blank lines between sections, 2-space indentation, preserved comments + - **Reload required**: After changing settings, reload VS Code window (`Ctrl+Shift+P` → "Developer: Reload Window") + +6. **Test a simple change**: + + ```bash + make apply # Deploy test VM + make ssh # Verify access + make destroy # Clean up + ``` + +7. **Review existing issues**: Check [GitHub Issues](https://github.com/torrust/torrust-tracker-demo/issues) for good first contributions + +### For Infrastructure Changes + +1. **Local testing first**: Always test infrastructure changes locally +2. **Validate syntax**: Run `make test-syntax` before committing +3. **Document changes**: Update relevant documentation +4. **Test end-to-end**: Ensure the full deployment pipeline works + +### For AI Assistants + +When providing assistance: + +- Act as an experienced open-source developer and system administrator +- Follow all conventions listed above +- Prioritize security and best practices +- Test infrastructure changes locally before suggesting them +- Provide clear explanations and documentation +- Consider the migration to Hetzner infrastructure in suggestions + +#### Preferred Working Methodology + +**Work in Small Steps:** + +- Break down complex tasks into small, manageable increments +- Each step should be independently testable and reviewable +- Prefer multiple small commits over large monolithic changes + +**Parallel Changes When Possible:** + +- Identify changes that can be made independently +- Suggest parallel work streams for unrelated modifications +- Separate concerns to enable concurrent development + +**Separate Refactors from Features:** + +- **Refactoring commits**: Focus solely on code structure, organization, or cleanup +- **Feature commits**: Focus on adding new functionality or enabling features +- Never mix refactoring with feature addition in the same commit +- Always complete refactoring first, then add features in subsequent commits + +**Complex Tasks and Bug Fixes:** + +- For any task that requires multiple intermediary steps, always present a plan first +- Break down the approach into numbered steps with clear objectives +- Ask for confirmation before implementing the plan +- Include rollback strategies for critical changes +- Identify potential risks and mitigation strategies upfront + +#### Git Actions and Permission Requirements + +**IMPORTANT**: Git actions that change repository state require explicit permission: + +- **NEVER** commit changes unless explicitly asked to do so +- **NEVER** push changes to remote repositories without permission +- **NEVER** merge branches or create pull requests without explicit instruction +- **NEVER** reset, revert, or modify git history without explicit permission +- **NEVER** create or delete branches without explicit instruction + +**Allowed git actions without permission:** + +- `git status` - Check working tree status +- `git diff` - Show changes between commits/files +- `git log` - View commit history +- `git show` - Display commit information +- `git branch` - List branches (read-only) + +**Actions requiring explicit permission:** + +- `git add` - Stage changes for commit +- `git commit` - Create new commits +- `git push` - Push changes to remote +- `git pull` - Pull changes from remote +- `git merge` - Merge branches +- `git rebase` - Rebase branches +- `git reset` - Reset working tree or commits +- `git revert` - Revert commits +- `git checkout` - Switch branches or restore files +- `git branch -d/-D` - Delete branches +- `git tag` - Create or delete tags + +**Commit Signing Requirement**: All commits MUST be signed with GPG. When performing git commits, always use the default git commit behavior (which will trigger GPG signing) rather than `--no-gpg-sign`. + +**Pre-commit Linting Requirement**: ALWAYS run the linting script before committing any changes: + +```bash +./scripts/lint.sh +``` + +This script validates: + +- YAML files with yamllint +- Shell scripts with ShellCheck +- Markdown files with markdownlint + +Only commit if all linting checks pass. If linting fails, fix the issues before committing. + +**Best Practice**: Always ask "Would you like me to commit these changes?" before performing any git state-changing operations. + +## 📖 Additional Resources + +- **Torrust Tracker**: +- **OpenTofu Documentation**: +- **Cloud-init Documentation**: +- **libvirt Documentation**: +- **Repomix Tool**: (for generating project summaries) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..8940886 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,25 @@ +name: Testing + +on: + push: + branches: [main, develop] + pull_request: + branches: [main, develop] + +jobs: + lint: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install linting tools + run: | + sudo apt-get update + sudo apt-get install -y yamllint shellcheck + sudo npm install -g markdownlint-cli + + - name: Run linting script + run: | + ./scripts/lint.sh diff --git a/.gitignore b/.gitignore index 26737bd..7d3d992 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,13 @@ -.env +# Project-level gitignore - cross-cutting files only +# Specific ignores are in infrastructure/.gitignore and application/.gitignore + +# Repository organization tool output repomix-output.xml -storage/ + +# Testing dependencies (when cloned locally for smoke testing) +torrust-tracker/ + +# Note: This repository uses distributed .gitignore files: +# - infrastructure/.gitignore: Infrastructure-specific files (Terraform, VMs, etc.) +# - application/.gitignore: Application-specific files (Docker, logs, data, etc.) +# - .gitignore (this file): Project-wide cross-cutting files only diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000..d3e2b98 --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,19 @@ +{ + "default": true, + "MD013": { + "line_length": 100 + }, + "MD031": true, + "MD032": true, + "MD040": true, + "MD022": true, + "MD009": true, + "MD007": { + "indent": 2 + }, + "MD026": false, + "MD041": false, + "MD034": false, + "MD024": false, + "MD033": false +} \ No newline at end of file diff --git a/.taplo.toml b/.taplo.toml new file mode 100644 index 0000000..8e7be21 --- /dev/null +++ b/.taplo.toml @@ -0,0 +1,31 @@ +# Taplo (Even Better TOML) configuration +# This configures the TOML formatter for this project +[formatting] +# Column width for wrapping +column_width = 100 +# Indentation (2 spaces) +indent_string = " " +# Alignment and spacing +align_entries = false +align_comments = true +indent_tables = false +indent_entries = false +# Compact settings - set to false to preserve spacing +compact_arrays = false +compact_inline_tables = false +compact_entries = false +# Array formatting +array_trailing_comma = true +array_auto_expand = true +array_auto_collapse = false +# Inline table settings +inline_table_expand = true +# Blank lines - allow up to 2 consecutive blank lines +allowed_blank_lines = 2 +# Preserve key/array order (don't reorder across blank lines) +reorder_keys = false +reorder_arrays = false +reorder_inline_tables = false +# Line endings and trailing newline +trailing_newline = true +crlf = false diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7af45ba --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + // Even Better TOML formatter configuration (camelCase for VS Code) + "evenBetterToml.formatter.columnWidth": 100, + "evenBetterToml.formatter.indentString": " ", + "evenBetterToml.formatter.alignEntries": false, + "evenBetterToml.formatter.alignComments": true, + "evenBetterToml.formatter.indentTables": false, + "evenBetterToml.formatter.indentEntries": false, + "evenBetterToml.formatter.compactArrays": false, + "evenBetterToml.formatter.compactInlineTables": false, + "evenBetterToml.formatter.compactEntries": false, + "evenBetterToml.formatter.arrayTrailingComma": true, + "evenBetterToml.formatter.arrayAutoExpand": true, + "evenBetterToml.formatter.arrayAutoCollapse": false, + "evenBetterToml.formatter.inlineTableExpand": true, + "evenBetterToml.formatter.allowedBlankLines": 2, + "evenBetterToml.formatter.reorderKeys": false, + "evenBetterToml.formatter.reorderArrays": false, + "evenBetterToml.formatter.reorderInlineTables": false, + "evenBetterToml.formatter.trailingNewline": true, + "evenBetterToml.formatter.crlf": false, + // Format on save for TOML files + "[toml]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "tamasfe.even-better-toml" + } +} \ No newline at end of file diff --git a/.yamllint-ci.yml b/.yamllint-ci.yml new file mode 100644 index 0000000..c700212 --- /dev/null +++ b/.yamllint-ci.yml @@ -0,0 +1,10 @@ +extends: default + +rules: + line-length: + max: 120 # More reasonable for infrastructure code + comments: + min-spaces-from-content: 1 # Allow single space before comments + document-start: disable # Cloud-init files don't need --- start + truthy: + allowed-values: ["true", "false", "yes", "no", "on", "off"] # Allow cloud-init and GitHub Actions common values diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e6cd2e3 --- /dev/null +++ b/Makefile @@ -0,0 +1,344 @@ +# Makefile for Torrust Tracker Local Testing Infrastructure +.PHONY: help init plan apply destroy test clean status refresh-state ssh install-deps console vm-console lint lint-yaml lint-shell lint-markdown + +# Default variables +VM_NAME ?= torrust-tracker-demo +TERRAFORM_DIR = infrastructure/terraform +TESTS_DIR = infrastructure/tests + +# Help target +help: ## Show this help message + @echo "Torrust Tracker Local Testing Infrastructure" + @echo "" + @echo "Available targets:" + @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " %-20s %s\n", $$1, $$2}' $(MAKEFILE_LIST) + +install-deps: ## Install required dependencies (Ubuntu/Debian) + @echo "Installing dependencies..." + sudo apt update + sudo apt install -y qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils virt-manager virt-viewer genisoimage + sudo usermod -aG libvirt $$USER + sudo usermod -aG kvm $$USER + sudo systemctl enable libvirtd + sudo systemctl start libvirtd + @echo "Setting up libvirt storage and permissions..." + @sudo virsh pool-define-as default dir --target /var/lib/libvirt/images || true + @sudo virsh pool-autostart default || true + @sudo virsh pool-start default || true + @sudo chown -R libvirt-qemu:libvirt /var/lib/libvirt/images/ || true + @sudo chmod -R 755 /var/lib/libvirt/images/ || true + @echo "Installing OpenTofu..." + curl -fsSL https://get.opentofu.org/install-opentofu.sh -o install-opentofu.sh + chmod +x install-opentofu.sh + sudo ./install-opentofu.sh --install-method deb + rm install-opentofu.sh + @echo "Dependencies installed. Please log out and log back in for group changes to take effect." + +init: ## Initialize OpenTofu + @echo "Initializing OpenTofu..." + cd $(TERRAFORM_DIR) && tofu init + +plan: ## Show what OpenTofu will do + @echo "Planning infrastructure changes..." + @if [ -f $(TERRAFORM_DIR)/local.tfvars ]; then \ + cd $(TERRAFORM_DIR) && tofu plan -var-file="local.tfvars"; \ + else \ + echo "WARNING: No local.tfvars found. Please create it first with 'make setup-ssh-key'"; \ + exit 1; \ + fi + +apply-minimal: ## Deploy VM with minimal cloud-init configuration + @echo "Ensuring libvirt permissions are correct..." + @$(MAKE) fix-libvirt + @echo "Deploying VM with minimal configuration..." + cd $(TERRAFORM_DIR) && tofu apply -var-file="local.tfvars" -var="use_minimal_config=true" -parallelism=1 -auto-approve + @echo "Fixing permissions after deployment..." + @$(MAKE) fix-libvirt + +apply: ## Deploy the VM + @echo "Ensuring libvirt permissions are correct..." + @$(MAKE) fix-libvirt + @echo "Deploying VM..." + @if [ -f $(TERRAFORM_DIR)/local.tfvars ]; then \ + echo "Using local SSH key configuration..."; \ + cd $(TERRAFORM_DIR) && tofu apply -var-file="local.tfvars" -parallelism=1 -auto-approve; \ + else \ + echo "WARNING: No local.tfvars found. Creating with placeholder..."; \ + echo 'ssh_public_key = "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY"' > $(TERRAFORM_DIR)/local.tfvars; \ + echo "Please edit $(TERRAFORM_DIR)/local.tfvars with your SSH public key and run 'make apply' again"; \ + exit 1; \ + fi + @echo "Fixing permissions after deployment..." + @$(MAKE) fix-libvirt + +destroy: ## Destroy the VM + @echo "Destroying VM..." + cd $(TERRAFORM_DIR) && tofu destroy -auto-approve + +status: ## Show current infrastructure status + @echo "Infrastructure status:" + cd $(TERRAFORM_DIR) && tofu show + +refresh-state: ## Refresh Terraform state to detect IP changes + @echo "Refreshing Terraform state..." + cd $(TERRAFORM_DIR) && tofu refresh + @echo "Updated outputs:" + cd $(TERRAFORM_DIR) && tofu output + +ssh: ## SSH into the VM + @echo "Connecting to VM..." + @VM_IP=$$(virsh domifaddr $(VM_NAME) | grep ipv4 | awk '{print $$4}' | cut -d'/' -f1); \ + if [ -n "$$VM_IP" ]; then \ + echo "Connecting to $$VM_IP..."; \ + ssh torrust@$$VM_IP; \ + else \ + echo "Could not get VM IP. Is the VM deployed?"; \ + exit 1; \ + fi + +test: ## Run all tests + @echo "Running infrastructure tests..." + $(TESTS_DIR)/test-local-setup.sh full-test + +test-prereq: ## Test prerequisites only + @echo "Testing prerequisites..." + $(TESTS_DIR)/test-local-setup.sh prerequisites + +check-libvirt: ## Check libvirt installation and permissions + @echo "Checking libvirt setup..." + @echo "1. Checking if libvirt service is running:" + @sudo systemctl status libvirtd --no-pager -l || echo "libvirtd not running" + @echo "" + @echo "2. Checking user groups:" + @groups | grep -q libvirt && echo "✓ User is in libvirt group" || echo "✗ User is NOT in libvirt group" + @groups | grep -q kvm && echo "✓ User is in kvm group" || echo "✗ User is NOT in kvm group" + @echo "" + @echo "3. Testing libvirt access:" + @virsh list --all >/dev/null 2>&1 && echo "✓ User can access libvirt" || echo "✗ User cannot access libvirt (try 'sudo virsh list')" + @echo "" + @echo "4. Checking default network:" + @virsh net-list --all 2>/dev/null | grep -q default && echo "✓ Default network exists" || echo "✗ Default network missing" + @echo "" + @echo "5. Checking KVM support:" + @test -r /dev/kvm && echo "✓ KVM device accessible" || echo "✗ KVM device not accessible" + @echo "" + @echo "If you see any ✗ marks, run 'make fix-libvirt' to attempt fixes" + +fix-libvirt: ## Fix common libvirt permission issues + @echo "Setting up user-friendly libvirt configuration..." + @infrastructure/scripts/setup-user-libvirt.sh + @echo "Attempting to fix libvirt permissions..." + @echo "Adding user to required groups..." + sudo usermod -aG libvirt $$USER + sudo usermod -aG kvm $$USER + @echo "Starting libvirt service..." + sudo systemctl enable libvirtd + sudo systemctl start libvirtd + @echo "Checking if default network needs to be started..." + @sudo virsh net-list --all | grep -q "default.*inactive" && sudo virsh net-start default || true + @sudo virsh net-autostart default 2>/dev/null || true + @echo "" + @echo "✓ Fix attempt completed!" + @echo "IMPORTANT: You need to log out and log back in (or run 'newgrp libvirt') for group changes to take effect" + @echo "Then run 'make check-libvirt' to verify the fixes worked" + +test-syntax: ## Test configuration syntax only + @echo "Testing configuration syntax..." + $(TESTS_DIR)/test-local-setup.sh syntax + +lint: ## Run all linting checks (yamllint, shellcheck, markdownlint) + @echo "Running linting checks..." + ./scripts/lint.sh + +lint-yaml: ## Run only yamllint + @echo "Running yamllint..." + ./scripts/lint.sh --yaml + +lint-shell: ## Run only shellcheck + @echo "Running shellcheck..." + ./scripts/lint.sh --shell + +lint-markdown: ## Run only markdownlint + @echo "Running markdownlint..." + ./scripts/lint.sh --markdown + +test-integration: ## Run integration tests (requires deployed VM) + @echo "Running integration tests..." + $(TESTS_DIR)/test-integration.sh full-test + +deploy-test: ## Deploy VM for testing (without cleanup) + @echo "Deploying test VM..." + $(TESTS_DIR)/test-local-setup.sh deploy + +clean: ## Clean up temporary files + @echo "Cleaning up..." + rm -f $(TERRAFORM_DIR)/.terraform.lock.hcl + rm -f $(TERRAFORM_DIR)/terraform.tfstate.backup + rm -f install-opentofu.sh + rm -f /tmp/torrust-infrastructure-test.log + +clean-and-fix: ## Clean up all VMs and fix libvirt permissions + @echo "Cleaning up VMs and fixing permissions..." + @echo "1. Stopping and undefining any existing VMs:" + @for vm in $$(virsh list --all --name 2>/dev/null | grep -v '^$$'); do \ + echo " Cleaning up VM: $$vm"; \ + virsh destroy $$vm 2>/dev/null || true; \ + virsh undefine $$vm 2>/dev/null || true; \ + done + @echo "2. Removing OpenTofu state:" + @cd $(TERRAFORM_DIR) && rm -f terraform.tfstate terraform.tfstate.backup .terraform.lock.hcl 2>/dev/null || true + @echo "3. Cleaning libvirt images:" + @sudo rm -f /var/lib/libvirt/images/torrust-tracker-demo* /var/lib/libvirt/images/ubuntu-24.04-base.qcow2 2>/dev/null || true + @echo "4. Fixing libvirt setup:" + @$(MAKE) fix-libvirt + @echo "✓ Clean up complete. You can now run 'make apply' safely." + +# New target for setting up SSH key +setup-ssh-key: ## Setup local SSH key configuration + @if [ -f $(TERRAFORM_DIR)/local.tfvars ]; then \ + echo "Local SSH configuration already exists at $(TERRAFORM_DIR)/local.tfvars"; \ + echo "Current configuration:"; \ + cat $(TERRAFORM_DIR)/local.tfvars; \ + else \ + echo "Creating local SSH key configuration..."; \ + echo 'ssh_public_key = "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY"' > $(TERRAFORM_DIR)/local.tfvars; \ + echo ""; \ + echo "✓ Created $(TERRAFORM_DIR)/local.tfvars"; \ + echo ""; \ + echo "Next steps:"; \ + echo "1. Get your SSH public key:"; \ + echo " cat ~/.ssh/id_rsa.pub"; \ + echo " # or cat ~/.ssh/id_ed25519.pub"; \ + echo ""; \ + echo "2. Edit the file and replace the placeholder:"; \ + echo " vim $(TERRAFORM_DIR)/local.tfvars"; \ + echo ""; \ + echo "3. Deploy the VM:"; \ + echo " make apply"; \ + fi + +restart-and-monitor: ## Destroy, deploy fresh, and monitor cloud-init + @echo "🔄 Complete restart: destroying existing VM..." + @$(MAKE) destroy || true + @echo "🚀 Deploying fresh VM..." + @$(MAKE) apply & + @echo "⏳ Waiting 10 seconds for VM to start..." + @sleep 10 + @echo "📡 Starting cloud-init monitoring..." + @$(MAKE) monitor-cloud-init + +fresh-start: restart-and-monitor ## Alias for restart-and-monitor + +# Development targets +dev-setup: install-deps init fix-libvirt setup-ssh-key ## Complete development setup + @echo "Development environment setup complete!" + @echo "Next steps:" + @echo "1. Log out and log back in for group changes" + @echo "2. Edit $(TERRAFORM_DIR)/local.tfvars with your SSH public key" + @echo "3. Run 'make test-prereq' to verify setup" + @echo "4. Run 'make apply' to deploy a VM" + +quick-test: test-prereq test-syntax ## Quick test without VM deployment + @echo "Quick tests completed!" + +# Help for specific workflows +workflow-help: ## Show common workflows + @echo "Common workflows:" + @echo "" + @echo "1. First-time setup:" + @echo " make dev-setup" + @echo " # Log out and log back in" + @echo " # Edit infrastructure/cloud-init/user-data.yaml to add your SSH key" + @echo " make test-prereq" + @echo "" + @echo "2. Deploy and test:" + @echo " make apply" + @echo " make ssh" + @echo " make destroy" + @echo "" + @echo "3. Run full test suite:" + @echo " make test" + @echo "" + @echo "4. Run integration tests:" + @echo " make apply" + @echo " make test-integration" + @echo " make destroy" + @echo "" + @echo "5. Development cycle:" + @echo " make plan # Review changes" + @echo " make apply # Deploy" + @echo " make ssh # Test manually" + @echo " make destroy # Clean up" + +monitor-cloud-init: ## Monitor cloud-init progress in real-time + @echo "Monitoring cloud-init progress..." + @./infrastructure/scripts/monitor-cloud-init.sh + +vm-restart: ## Restart the VM + @echo "Restarting VM..." + virsh shutdown $(VM_NAME) + @echo "Waiting for shutdown..." + @sleep 5 + virsh start $(VM_NAME) + @echo "VM restarted" + +# CI/CD specific targets +ci-test-syntax: ## Test syntax for CI (with dummy values) + @echo "Testing syntax for CI environment..." + @echo "Creating temporary config with dummy values..." + @cd $(TERRAFORM_DIR) && \ + echo 'ssh_public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC dummy-key-for-ci-testing"' > ci-test.tfvars && \ + tofu init && \ + tofu validate && \ + rm ci-test.tfvars + @echo "Testing cloud-init templates..." + @CI=true $(TESTS_DIR)/test-local-setup.sh syntax + @echo "Testing cloud-init YAML syntax with yamllint..." + @if command -v yamllint >/dev/null 2>&1; then \ + yamllint -c .yamllint-ci.yml infrastructure/cloud-init/network-config.yaml && \ + yamllint -c .yamllint-ci.yml infrastructure/cloud-init/meta-data.yaml && \ + cd infrastructure/cloud-init && \ + sed 's/$${ssh_public_key}/ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC/' user-data.yaml.tpl > /tmp/user-data-test.yaml && \ + sed 's/$${ssh_public_key}/ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC/' user-data-minimal.yaml.tpl > /tmp/user-data-minimal-test.yaml && \ + yamllint -c ../../.yamllint-ci.yml /tmp/user-data-test.yaml && \ + yamllint -c ../../.yamllint-ci.yml /tmp/user-data-minimal-test.yaml && \ + rm -f /tmp/user-data-test.yaml /tmp/user-data-minimal-test.yaml; \ + else \ + echo "yamllint not available, skipping additional YAML validation"; \ + fi + +vm-ip: ## Show VM IP address + @echo "Getting VM IP address..." + @VM_IP=$$(virsh domifaddr $(VM_NAME) | grep ipv4 | awk '{print $$4}' | cut -d'/' -f1); \ + if [ -n "$$VM_IP" ]; then \ + echo "VM IP: $$VM_IP"; \ + else \ + echo "VM IP not assigned yet or VM not running"; \ + echo "VM status:"; \ + virsh list --all | grep $(VM_NAME) || echo "VM not found"; \ + fi + +vm-info: ## Show detailed VM network information + @echo "VM Network Information:" + @echo "======================" + @virsh list --all | grep $(VM_NAME) | head -1 || echo "VM not found" + @echo "" + @echo "Network interfaces:" + @virsh domifaddr $(VM_NAME) 2>/dev/null || echo "No network information available" + @echo "" + @echo "DHCP leases:" + @virsh net-dhcp-leases default 2>/dev/null | grep $(VM_NAME) || echo "No DHCP lease found" + +console: ## Access VM console (text-based) + @echo "Connecting to VM console..." + @echo "Use Ctrl+] to exit console" + @virsh console $(VM_NAME) + +vm-console: ## Access VM graphical console (GUI) + @echo "Opening VM graphical console..." + @if command -v virt-viewer >/dev/null 2>&1; then \ + virt-viewer $(VM_NAME) || virt-viewer spice://127.0.0.1:5900; \ + else \ + echo "virt-viewer not found. Please install it:"; \ + echo " sudo apt install virt-viewer"; \ + fi diff --git a/README.md b/README.md index e90b0f8..531ff2c 100644 --- a/README.md +++ b/README.md @@ -4,24 +4,157 @@ This repo contains all the configuration needed to run the live Torrust Tracker It's also used to track issues in production. -> IMPORTANT: We are in the process of [splitting the Torrust Demo repo into two repos](https://github.com/torrust/torrust-demo/issues/79). This will allow us to deploy both services independently and it would make easier for users who only want to setup the tracker to re-use this setup. The content of this repo may change drastically in the future. +> IMPORTANT: We are in the process of [splitting the Torrust Demo repo into +> two repos](https://github.com/torrust/torrust-demo/issues/79). This will +> allow us to deploy both services independently and it would make easier for +> users who only want to setup the tracker to re-use this setup. The content +> of this repo may change drastically in the future. -## Demo tracker +## 🏗️ Repository Structure -- HTTP Tracker: -- UDP Tracker: udp://tracker.torrust-demo.com:6969/announce +This repository is organized into two main concerns: -### Monitoring +### 📦 [`infrastructure/`](infrastructure/) -The demo includes monitoring with Grafana dashboards for the tracker to track performance metrics, peer connections, and system health. +**Server and system setup** - VMs, networking, operating system configuration -![Grafana Dashboard](docs/media/torrust-tracker-grafana-dashboard.png) +- OpenTofu/Terraform for VM provisioning +- cloud-init templates for system setup +- libvirt/KVM for local testing +- Infrastructure testing and validation -## Documentation +### 🚀 [`application/`](application/) -- [Setup](docs/setup.md) -- [Deployment](docs/deployment.md) -- [Firewall](docs/firewall.md) -- [Sample Commands](docs/sample_commands.md) -- [Rollbacks](docs/rollbacks.md) -- [Backups](docs/backups.md) +**Application deployment and configuration** - Docker services, app config + +- Docker Compose for service orchestration +- Torrust Tracker configuration +- Nginx, Prometheus, Grafana setup +- Application scripts and utilities + +### 📚 [`docs/`](docs/) + +**Project documentation** - Guides, security, and reference materials + +- General project documentation +- Security and auditing information +- Cross-cutting concerns + +## Demo Tracker + +- **HTTP Tracker**: +- **UDP Tracker**: udp://tracker.torrust-demo.com:6969/announce + +For detailed information about all tracker ports and their specific purposes, +see [Application Port Documentation](application/docs/firewall-requirements.md#torrust-tracker-ports). + +The demo includes monitoring with Grafana dashboards for performance metrics, +peer connections, and system health. + +![Grafana Dashboard](application/docs/media/torrust-tracker-grafana-dashboard.png) + +## 🚀 Quick Start + +For detailed setup instructions, see the specific documentation: + +- **Infrastructure**: [Infrastructure Quick Start](infrastructure/docs/quick-start.md) +- **Application**: [Application README](application/README.md) + +### Complete Development Setup + +```bash +# 1. Setup infrastructure dependencies +make dev-setup +# Log out and log back in for group permissions + +# 2. Configure SSH key +make setup-ssh-key +# Edit infrastructure/terraform/local.tfvars with your SSH public key + +# 3. Deploy VM and application +make apply # Deploy VM +make ssh # Access VM +docker compose -f application/compose.yaml up -d # Deploy application +make destroy # Clean up +``` + +## 📚 Documentation + +### Infrastructure Documentation + +- [Infrastructure Quick Start](infrastructure/docs/quick-start.md) - Get started + in 5 minutes +- [Complete Infrastructure Setup](infrastructure/docs/local-testing-setup.md) - + Detailed guide +- [Infrastructure Overview](infrastructure/docs/infrastructure-overview.md) - + Complete capabilities +- [libvirt Troubleshooting](infrastructure/docs/third-party/libvirt-setup.md) - + Fix common issues + +### Application Documentation + +- [Application Overview](application/README.md) - Application components and + deployment +- [Production Setup](application/docs/production-setup.md) - Production + deployment +- [Deployment Guide](application/docs/deployment.md) - Deployment procedures +- [Backup Procedures](application/docs/backups.md) - Data backup and recovery +- [Rollback Guide](application/docs/rollbacks.md) - Application rollbacks +- [Useful Commands](application/docs/useful-commands.md) - Common operations +- [Firewall Requirements](application/docs/firewall-requirements.md) - Network + access requirements + +### General Documentation + +- [Documentation Structure](docs/README.md) - Cross-cutting documentation +- [Architecture Decisions](docs/adr/) - Design decisions and rationale + - [ADR-001: Makefile Location](docs/adr/001-makefile-location.md) - Why the + main Makefile is at repository root + - [ADR-002: Docker for All Services](docs/adr/002-docker-for-all-services.md) - + Why we use Docker for all services including UDP tracker + +## 🛠️ Development + +For contributors and developers: + +- **Infrastructure development**: See [`infrastructure/README.md`](infrastructure/README.md) +- **Application development**: See [`application/README.md`](application/README.md) +- **General contributing guidelines**: See [`.github/copilot-instructions.md`](.github/copilot-instructions.md) +- **Architecture decisions**: See [`docs/adr/`](docs/adr/) for design rationale + +### Code Quality and Linting + +The project uses automated linting to ensure code quality and consistency: + +```bash +# Run all linting checks +make lint + +# Run individual linters +make lint-yaml # YAML files (yamllint) +make lint-shell # Shell scripts (shellcheck) +make lint-markdown # Markdown files (markdownlint) +``` + +**Required tools:** + +- `yamllint` - YAML syntax and style checking +- `shellcheck` - Shell script analysis +- `markdownlint` - Markdown style and structure checking + +**Installation:** + +```bash +# Ubuntu/Debian +sudo apt-get install yamllint shellcheck +sudo npm install -g markdownlint-cli + +# Or install all project dependencies +make install-deps +``` + +Linting is automatically run in CI/CD pipelines to validate all contributions. + +> **Note**: The main `Makefile` is kept at the root level to serve as the +> project's primary interface. See [ADR-001](docs/adr/001-makefile-location.md) +> for the full rationale. diff --git a/.env.production b/application/.env.production similarity index 100% rename from .env.production rename to application/.env.production diff --git a/application/.gitignore b/application/.gitignore new file mode 100644 index 0000000..4280242 --- /dev/null +++ b/application/.gitignore @@ -0,0 +1,81 @@ +# Application-specific gitignore + +# Environment files +.env +.env.local +.env.development +.env.production.local +!.env.production + +# Docker and container files +docker-compose.override.yml +.dockerignore + +# Application data and storage +/storage/ +/data/ +/database/ +/uploads/ +/logs/ +*.db +*.sqlite +*.sqlite3 + +# SSL certificates and keys +/ssl/ +/certs/ +*.pem +*.key +*.crt +*.csr +!*.example.* + +# Application logs +*.log +/logs/ +tracker.log +nginx.log +access.log +error.log + +# Backup files +*.backup +*.bak +*.dump +*.sql.gz + +# Grafana data +/grafana/data/ +/grafana/plugins/ +/grafana/provisioning/datasources/*.yml +!grafana/provisioning/datasources/*.yml.example + +# Prometheus data +/prometheus/data/ + +# Temporary files +/tmp/ +/temp/ +*.tmp +*~ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Application secrets +secrets.yml +secrets.yaml +passwords.txt +tokens.txt + +# Development artifacts +.vscode/settings.json +.idea/ +*.swp +*.swo diff --git a/application/README.md b/application/README.md new file mode 100644 index 0000000..cb2e39f --- /dev/null +++ b/application/README.md @@ -0,0 +1,248 @@ +# Application + +This directory contains all application-related components for the Torrust +Tracker Demo project - everything needed to deploy, configure, and manage the +Torrust Tracker application itself. + +## 🚀 Structure + +```text +application/ +├── docs/ # Application documentation +│ ├── production-setup.md # Production deployment guide +│ ├── deployment.md # Deployment procedures +│ ├── backups.md # Application backup procedures +│ ├── rollbacks.md # Application rollback procedures +│ ├── useful-commands.md # Common application commands +│ ├── firewall-requirements.md # Network access requirements +│ └── media/ # Application-specific images and diagrams +│ ├── torrust-tracker-grafana-dashboard.png +│ └── do-firewall-configuration.png +├── share/ # Application resources +│ ├── bin/ # Deployment and utility scripts +│ │ ├── deploy-torrust-tracker-demo.com.sh +│ │ ├── install.sh +│ │ ├── ssl_renew.sh +│ │ ├── time-running.sh +│ │ ├── tracker-db-backup.sh +│ │ └── tracker-filtered-logs.sh +│ ├── container/default/config/ # Container configurations +│ │ ├── crontab.conf +│ │ ├── nginx.conf +│ │ ├── prometheus.yml +│ │ └── tracker.prod.container.sqlite3.toml +│ ├── dev/home/ # Development configurations +│ └── grafana/dashboards/ # Grafana dashboard configurations +│ ├── metrics.json +│ ├── stats.json +│ └── README.md +├── compose.yaml # Docker Compose configuration +├── .env.production # Production environment variables +└── README.md # This file +``` + +## 🎯 Purpose + +### Application Concerns + +- **Service Deployment**: Torrust Tracker, Nginx, Prometheus, Grafana +- **Application Configuration**: Tracker settings, database connections +- **Service Orchestration**: Docker Compose service management +- **Application Data**: Database, logs, metrics, dashboards +- **Application Security**: SSL certificates, service authentication +- **Application Monitoring**: Metrics collection, alerting, dashboards + +### Technologies Used + +- **Docker & Docker Compose**: Container orchestration +- **Torrust Tracker**: The main BitTorrent tracker application +- **Nginx**: Reverse proxy and SSL termination +- **Prometheus**: Metrics collection and storage +- **Grafana**: Metrics visualization and dashboards +- **MySQL**: Database (migrating from SQLite) +- **Certbot**: SSL certificate management + +## 🚀 Quick Start + +### Application Deployment + +```bash +# Deploy application services +docker compose -f application/compose.yaml up -d + +# Check service status +docker compose -f application/compose.yaml ps + +# View logs +docker compose -f application/compose.yaml logs -f +``` + +### Full Infrastructure + Application Testing + +```bash +# 1. Setup infrastructure (from repository root) +make dev-setup +# Log out and log back in for permissions + +# 2. Configure SSH key +make setup-ssh-key +# Edit infrastructure/terraform/local.tfvars with your SSH public key + +# 3. Deploy VM and application +make apply # Deploy VM +make ssh # Access VM +docker compose -f application/compose.yaml up -d # Deploy application +make destroy # Clean up +``` + +### Demo Tracker URLs + +Once deployed, the tracker is available at: + +- **HTTP Tracker**: +- **UDP Tracker**: udp://tracker.torrust-demo.com:6969/announce + +For detailed information about all tracker ports and their purposes, +see [Port Documentation](docs/firewall-requirements.md#torrust-tracker-ports). + +### Monitoring Dashboard + +The demo includes comprehensive monitoring with Grafana dashboards: + +![Grafana Dashboard](docs/media/torrust-tracker-grafana-dashboard.png) + +## 📋 What Application Provides + +### Core Services + +- **Torrust Tracker**: BitTorrent tracker with HTTP and UDP support +- **Web Interface**: Management and monitoring interface +- **API Endpoints**: REST API for tracker management +- **Metrics Collection**: Prometheus metrics for monitoring +- **Visualization**: Grafana dashboards for analytics + +### Support Services + +- **Reverse Proxy**: Nginx for SSL termination and routing +- **SSL Certificates**: Automated certificate management +- **Log Management**: Centralized logging and filtering +- **Backup System**: Database and configuration backups +- **Health Monitoring**: Service health checks and alerting + +## 🔧 Application vs Infrastructure + +This directory focuses on **application** concerns. For **infrastructure** +concerns (VMs, networking, system setup), see the +[`../infrastructure/`](../infrastructure/) directory. + +**Application = "What runs and how it's configured"** +**Infrastructure = "Where and how the application runs"** + +## 🔄 Deployment Workflow + +### Development + +1. **Infrastructure**: Use `make apply` to provision VM +2. **Application**: Deploy services with Docker Compose +3. **Testing**: Run integration tests +4. **Iteration**: Make changes and repeat + +### Production + +1. **Infrastructure**: Provision Hetzner servers +2. **Application**: Deploy using production configuration +3. **Monitoring**: Enable metrics and alerting +4. **Maintenance**: Automated backups and updates + +## 📊 Monitoring and Observability + +- **Metrics**: Prometheus scrapes application metrics +- **Dashboards**: Grafana provides visualization +- **Logs**: Centralized logging with filtering +- **Health Checks**: Service availability monitoring +- **Alerts**: Notification system for issues + +## 🔒 Security + +- **SSL/TLS**: Automatic certificate management +- **Service Isolation**: Container-based security +- **Access Control**: Authentication and authorization +- **Data Protection**: Encrypted data at rest and in transit + +## 📚 Documentation + +### Application Guides + +- [Production Setup](docs/production-setup.md) - Production deployment +- [Deployment Procedures](docs/deployment.md) - Step-by-step deployment +- [Backup Procedures](docs/backups.md) - Data backup and recovery +- [Rollback Procedures](docs/rollbacks.md) - Application rollback procedures +- [Useful Commands](docs/useful-commands.md) - Common operations and commands +- [Firewall Requirements](docs/firewall-requirements.md) - Network access needs + +### Contributing to Application Documentation + +When adding application documentation: + +- **Application docs**: Docker, services, deployment, operations, configuration +- **Keep it practical**: Focus on deployment, configuration, and operations +- **Include examples**: Provide working command examples +- **Test procedures**: Document testing and validation steps +- **Cross-reference**: Link to related application documentation + +### Application vs Infrastructure Separation + +Application = "What runs and how it's configured" + +Application documentation should cover: + +- Docker Compose service configuration +- Application deployment procedures +- Service-level monitoring and logging +- Application backup and recovery +- SSL certificate management +- Application-specific troubleshooting + +See [`../infrastructure/`](../infrastructure/) for infrastructure-specific documentation. + +## 🐳 Docker Design Decision + +This demo repository uses **Docker containers for all services**, including the +Torrust Tracker UDP component, even though this may not provide optimal +performance for high-throughput UDP tracking operations. + +### Why Docker for Everything? + +The decision to use Docker for all services, including the performance-critical +UDP tracker, prioritizes: + +1. **Simplicity**: Single orchestration method (Docker Compose) for all services +2. **Consistency**: Identical deployment process across environments +3. **Maintainability**: Easier updates and dependency management +4. **Documentation**: Clear, reusable examples for users +5. **Demo Focus**: Emphasizes functionality demonstration over peak performance + +### Performance Considerations + +While Docker networking may introduce some overhead for UDP operations compared +to running the tracker binary directly on the host, this trade-off aligns with +the repository's primary goals: + +- **Demo Environment**: Showcasing Torrust Tracker functionality +- **Frequent Updates**: Easy deployment of new tracker versions +- **User-Friendly**: Simple setup process for evaluation and testing + +### Production Performance Optimization + +For production deployments requiring maximum UDP performance, consider: + +- Running the tracker binary directly on the host +- Using host networking mode for containers +- Implementing kernel-level network optimizations +- Disabling connection tracking for UDP traffic + +These optimizations will be covered in dedicated performance documentation +outside this demo repository. + +> **Reference**: See [ADR-002](../docs/adr/002-docker-for-all-services.md) for +> the complete rationale behind this design decision. diff --git a/compose.yaml b/application/compose.yaml similarity index 98% rename from compose.yaml rename to application/compose.yaml index b8d7d9b..8d5d6fe 100644 --- a/compose.yaml +++ b/application/compose.yaml @@ -36,8 +36,6 @@ services: max-size: "10m" max-file: "10" depends_on: - - index-gui - - index - tracker - grafana @@ -108,4 +106,3 @@ networks: volumes: mysql_data: {} grafana_data: {} - diff --git a/docs/backups.md b/application/docs/backups.md similarity index 83% rename from docs/backups.md rename to application/docs/backups.md index 014a8f1..031e4e8 100644 --- a/docs/backups.md +++ b/application/docs/backups.md @@ -4,7 +4,7 @@ ```bash cd /home/torrust/github/torrust/torrust-tracker-demo/ -./share/bin/tracker-db-backup.sh +./share/bin/tracker-db-backup.sh ``` ## Check Backups Crontab Configuration @@ -13,7 +13,9 @@ cd /home/torrust/github/torrust/torrust-tracker-demo/ sudo crontab -e ``` -You should see the [crontab.conf](../share/container/default/config/crontab.conf) configuration file. +You should see the +[crontab.conf](../share/container/default/config/crontab.conf) configuration +file. ## Check Backups diff --git a/application/docs/deployment.md b/application/docs/deployment.md new file mode 100644 index 0000000..3b33670 --- /dev/null +++ b/application/docs/deployment.md @@ -0,0 +1,46 @@ +# Deployment + +1. SSH into the server. +2. Execute the deployment script: `./bin/deploy-torrust-tracker-demo.com.sh`. +3. Execute the smoke tests: + + ```console + # Clone Torrust Tracker + git@github.com:torrust/torrust-tracker.git + cd torrust-tracker + ``` + + Execute the following commands to run the tracker client and checker. + + Simulate a torrent announce to the tracker using UDP: + + ```console + cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://tracker.torrust-demo.com:6969/announce \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq + ``` + + Simulate a torrent scrape to the tracker using HTTP: + + ```console + cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + https://tracker.torrust-demo.com \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq + ``` + + Make a request to the health check endpoint: + + ```console + TORRUST_CHECKER_CONFIG='{ + "udp_trackers": ["udp://tracker.torrust-demo.com:6969/announce"], + "http_trackers": ["https://tracker.torrust-demo.com"], + "health_checks": ["https://tracker.torrust-demo.com/api/health_check"] + }' cargo run -p torrust-tracker-client --bin tracker_checker + + ``` + +4. Check the logs of the tracker container to see if everything is working: + + ```console + ./share/bin/tracker-filtered-logs.sh + ``` diff --git a/application/docs/firewall-requirements.md b/application/docs/firewall-requirements.md new file mode 100644 index 0000000..0bd20ff --- /dev/null +++ b/application/docs/firewall-requirements.md @@ -0,0 +1,138 @@ +# Firewall Requirements + +This document specifies the network access requirements for the Torrust Tracker +application components. These requirements should be implemented at the +infrastructure level (cloud firewall, UFW, iptables, etc.). + +## Required Ports + +### Public Access (Internet-facing) + +| Port | Protocol | Service | Purpose | Required | +| ---- | -------- | ------- | --------------------------------- | -------- | +| 22 | TCP | SSH | Server administration | Yes | +| 80 | TCP | HTTP | Let's Encrypt challenges | Temp\* | +| 443 | TCP | HTTPS | Web interface & API | Yes | +| 6868 | UDP | Tracker | Internal testing UDP tracker | Yes | +| 6969 | UDP | Tracker | Official public UDP tracker | Yes | +| 7070 | TCP | Tracker | Internal HTTP tracker (via Nginx) | Yes | + +\*Port 80 only needs to be temporarily enabled during SSL certificate generation. + +### Internal Access (Server-local only) + +| Port | Protocol | Service | Purpose | Access | +| ---- | -------- | ---------- | ---------------------- | ------ | +| 1212 | TCP | Tracker | API & metrics endpoint | Local | +| 3100 | TCP | Grafana | Dashboard interface | Local | +| 9090 | TCP | Prometheus | Metrics collection | Local | + +## Security Requirements + +### Critical: Internal Services Must Not Be Exposed + +- **Prometheus (port 9090)**: No authentication - must be internal only +- **Grafana (port 3100)**: Contains sensitive metrics - internal only +- **Tracker metrics (port 1212)**: Internal monitoring data + +### SSL/TLS Requirements + +- **HTTPS (port 443)**: Must use valid SSL certificates +- **HTTP (port 80)**: Only for Let's Encrypt ACME challenges +- **Tracker UDP**: No encryption (BitTorrent protocol limitation) +- **Tracker HTTP**: Should use HTTPS in production + +## Implementation Examples + +The infrastructure should implement these requirements. Common implementations: + +### Cloud Firewalls (Hetzner, AWS, etc.) + +```yaml +# Allow only required external ports +# Block all internal ports from external access +``` + +### UFW (Ubuntu Firewall) + +```bash +# Example commands (infrastructure will implement) +ufw allow 22/tcp +ufw allow 443/tcp +ufw allow 6868/udp +ufw allow 6969/udp +ufw allow 7070/tcp +# Internal ports: no external access +``` + +![Current Firewall Configuration](media/do-firewall-configuration.png) + +## Application Service Mapping + +### Torrust Tracker Ports + +The tracker uses four main ports, each serving a specific purpose: + +#### UDP Tracker Ports + +- **Port 6868 (UDP)**: Internal testing UDP tracker + + - Not listed on public tracker lists (like [newtrackon.org](https://newtrackon.com/)) + - Used for internal testing and development + - Provides an alternative UDP endpoint when port 6969 is under heavy load + - Guarantees developers can make requests without timeouts due to socket saturation + +- **Port 6969 (UDP)**: Official public UDP tracker + - The primary UDP tracker port listed on public tracker lists + - Always under heavy usage in production + - Standard BitTorrent UDP announce endpoint + - Used by torrent clients for tracker communication + +#### HTTP/HTTPS Tracker Ports + +- **Port 7070 (TCP)**: Internal HTTP tracker + - HTTP-only tracker endpoint (no HTTPS) + - Not directly accessible from the internet (internal/private network only) + - Accessed internally through Nginx reverse proxy + - Nginx provides HTTPS termination and certificate management + - Used for HTTP-based tracker announces + +#### API and Metrics + +- **Port 1212 (TCP)**: Tracker API and metrics + - Used internally (private network) between Nginx proxy and tracker service + - Exposed publicly through Nginx proxy at `https://tracker.torrust-demo.com/api/...` + - Main endpoint: `/api/v1/stats` for Prometheus metrics (with token authentication) + - Additional endpoints available for manual requests: + - `/api/v1/torrents` - List of torrents + - `/api/v1/torrent/{info_hash}` - Torrent details + - `/api/v1/metrics` - Prometheus metrics + - Used by Prometheus for monitoring and Grafana dashboards + +### Application Service Mapping + +### Torrust Tracker + +- **UDP ports 6868, 6969**: BitTorrent announce endpoints +- **TCP port 7070**: HTTP announce (internal, accessed via Nginx proxy) +- **TCP port 1212**: API and metrics (internal, accessed via Nginx proxy) + +### Nginx Reverse Proxy + +- **TCP port 443**: HTTPS termination and routing +- **TCP port 80**: Let's Encrypt challenges only + +### Monitoring Stack + +- **Prometheus (9090)**: Metrics collection (internal only) +- **Grafana (3100)**: Dashboards (internal only) + +## Security Notes + +1. **Never expose Prometheus** - it has no authentication +2. **Grafana access** - should be accessed via SSH tunnel or VPN +3. **Regular certificate renewal** - may require temporary port 80 access +4. **Monitor access logs** - watch for unauthorized access attempts + +For infrastructure-specific firewall implementation guides, see the +infrastructure documentation. diff --git a/docs/do-firewall-configuration.png b/application/docs/media/do-firewall-configuration.png similarity index 100% rename from docs/do-firewall-configuration.png rename to application/docs/media/do-firewall-configuration.png diff --git a/docs/media/torrust-tracker-grafana-dashboard.png b/application/docs/media/torrust-tracker-grafana-dashboard.png similarity index 100% rename from docs/media/torrust-tracker-grafana-dashboard.png rename to application/docs/media/torrust-tracker-grafana-dashboard.png diff --git a/docs/setup.md b/application/docs/production-setup.md similarity index 84% rename from docs/setup.md rename to application/docs/production-setup.md index 799a5ce..d27f88c 100644 --- a/docs/setup.md +++ b/application/docs/production-setup.md @@ -2,7 +2,7 @@ Follow instructions on [Deploying Torrust To Production](https://torrust.com/blog/deploying-torrust-to-production). -You need to also enable a [firewall](./docs/firewall.md). +You need to also enable a [firewall](./firewall-requirements.md). The application is located in the directory: `/home/torrust/github/torrust/torrust-tracker-demo`. diff --git a/application/docs/rollbacks.md b/application/docs/rollbacks.md new file mode 100644 index 0000000..edb4839 --- /dev/null +++ b/application/docs/rollbacks.md @@ -0,0 +1,58 @@ +# Rollbacks + +If you have a problems after a [deployment](deployment.md), you can rollback +to the previous version of the app. + +1. SSH into the server. + +2. Check the docker images to see the previous version of the app: + + ```console + docker images torrust/tracker + REPOSITORY TAG IMAGE ID CREATED SIZE + torrust/tracker develop b081a7499542 19 minutes ago 133MB + torrust/tracker 7dbdad453cf3 6 hours ago 133MB + ``` + +3. Tag the previous version of the app with a new name (e.g. `rollback`): + + ```console + docker tag 7dbdad453cf3 torrust/tracker:rollback + ``` + + This command tags the image with ID `7dbdad453cf3` as `torrust/tracker:rollback`. + + ```console + docker images torrust/tracker + REPOSITORY TAG IMAGE ID CREATED SIZE + torrust/tracker develop b081a7499542 21 minutes ago 133MB + torrust/tracker rollback 7dbdad453cf3 6 hours ago 133MB + ``` + + The `rollback` tag now points to the previous version of the app. + +4. Edit the `compose.yaml` file to use the new tag: + + Change the line: + + ```yaml + image: torrust/tracker:develop + ``` + + to: + + ```yaml + image: torrust/tracker:rollback + ``` + +5. Run the following command to start the previous version of the app: + + ```console + docker compose up --build --detach + ``` + +6. Check the logs of the tracker container to see if everything is working: + + ```console + ./share/bin/tracker-filtered-logs.sh + ``` diff --git a/docs/sample_commands.md b/application/docs/useful-commands.md similarity index 87% rename from docs/sample_commands.md rename to application/docs/useful-commands.md index 463c086..38314af 100644 --- a/docs/sample_commands.md +++ b/application/docs/useful-commands.md @@ -3,5 +3,6 @@ - `docker ps`: list containers. - `docker compose logs -f`: print all containers' logs. - `docker compose logs -f tracker`: print tracker container' logs. -- `docker compose logs -f tracker | head -n100`: print the first 100 lines in the tracker container log. +- `docker compose logs -f tracker | head -n100`: print the first 100 lines + in the tracker container log. - `docker compose logs -f | grep "ERROR"`: print logs showing only errors. diff --git a/share/bin/deploy-torrust-tracker-demo.com.sh b/application/share/bin/deploy-torrust-tracker-demo.com.sh similarity index 100% rename from share/bin/deploy-torrust-tracker-demo.com.sh rename to application/share/bin/deploy-torrust-tracker-demo.com.sh diff --git a/share/bin/install.sh b/application/share/bin/install.sh similarity index 94% rename from share/bin/install.sh rename to application/share/bin/install.sh index b21c8d4..2df8a6a 100755 --- a/share/bin/install.sh +++ b/application/share/bin/install.sh @@ -34,7 +34,7 @@ fi mkdir -p ./storage/tracker/etc if ! [ -f "./storage/tracker/etc/tracker.prod.container.sqlite3.toml" ]; then - echo "Crating tracker configuration: './storage/tracker/etc/tracker.toml'" + echo "Creating tracker configuration: './storage/tracker/etc/tracker.toml'" cp ./share/container/default/config/tracker.prod.container.sqlite3.toml ./storage/tracker/etc/tracker.toml fi @@ -45,4 +45,4 @@ mkdir -p ./storage/prometheus/etc if ! [ -f "./storage/prometheus/etc/prometheus.yml" ]; then echo "Creating prometheus config file: './storage/prometheus/etc/prometheus.yml'" cp ./share/container/default/config/prometheus.yml ./storage/prometheus/etc/prometheus.yml -fi \ No newline at end of file +fi diff --git a/share/bin/ssl_renew.sh b/application/share/bin/ssl_renew.sh similarity index 100% rename from share/bin/ssl_renew.sh rename to application/share/bin/ssl_renew.sh diff --git a/share/bin/time-running.sh b/application/share/bin/time-running.sh similarity index 100% rename from share/bin/time-running.sh rename to application/share/bin/time-running.sh diff --git a/share/bin/tracker-db-backup.sh b/application/share/bin/tracker-db-backup.sh similarity index 100% rename from share/bin/tracker-db-backup.sh rename to application/share/bin/tracker-db-backup.sh diff --git a/share/bin/tracker-filtered-logs.sh b/application/share/bin/tracker-filtered-logs.sh similarity index 100% rename from share/bin/tracker-filtered-logs.sh rename to application/share/bin/tracker-filtered-logs.sh diff --git a/share/container/default/config/crontab.conf b/application/share/container/default/config/crontab.conf similarity index 100% rename from share/container/default/config/crontab.conf rename to application/share/container/default/config/crontab.conf diff --git a/share/container/default/config/nginx.conf b/application/share/container/default/config/nginx.conf similarity index 96% rename from share/container/default/config/nginx.conf rename to application/share/container/default/config/nginx.conf index be7bf76..96d9cd2 100644 --- a/share/container/default/config/nginx.conf +++ b/application/share/container/default/config/nginx.conf @@ -11,11 +11,13 @@ server location /api/ { proxy_pass http://tracker:1212/api/; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } location / { proxy_pass http://tracker:7070; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } location ~ /.well-known/acme-challenge diff --git a/share/container/default/config/prometheus.yml b/application/share/container/default/config/prometheus.yml similarity index 100% rename from share/container/default/config/prometheus.yml rename to application/share/container/default/config/prometheus.yml diff --git a/application/share/container/default/config/tracker.prod.container.sqlite3.toml b/application/share/container/default/config/tracker.prod.container.sqlite3.toml new file mode 100644 index 0000000..d91af30 --- /dev/null +++ b/application/share/container/default/config/tracker.prod.container.sqlite3.toml @@ -0,0 +1,62 @@ +[metadata] +app = "torrust-tracker" +purpose = "configuration" +schema_version = "2.0.0" + +[logging] +#threshold = "trace" +threshold = "info" + +[core] +listed = false +private = false + +[core.tracker_policy] +persistent_torrent_completed_stat = true + +[core.announce_policy] +interval = 300 +interval_min = 300 + +[core.net] +on_reverse_proxy = true + +[core.database] +driver = "sqlite3" +path = "/var/lib/torrust/tracker/database/sqlite3.db" + +# UDP Tracker Configuration +# Two UDP ports are configured for different purposes: + +# Port 6868: Internal testing UDP tracker +# - Not listed on public tracker lists (like newtrackon.org) +# - Used for internal testing and development +# - Provides alternative endpoint when port 6969 is under heavy load +# - Guarantees developers can make requests without timeouts +[[udp_trackers]] +bind_address = "0.0.0.0:6868" + +# Port 6969: Official public UDP tracker +# - Primary UDP tracker port listed on public tracker lists +# - Always under heavy usage in production +# - Standard BitTorrent UDP announce endpoint +[[udp_trackers]] +bind_address = "0.0.0.0:6969" + +# HTTP Tracker Configuration +# Port 7070: Internal HTTP tracker (HTTP-only, no HTTPS) +# - Not directly accessible from internet (internal/private network only) +# - Accessed through Nginx reverse proxy which provides HTTPS termination +# - Used for HTTP-based tracker announces +[[http_trackers]] +bind_address = "0.0.0.0:7070" + +# API and Metrics Configuration +# Port 1212: Tracker API and metrics endpoint +# - Used internally (private network) between Nginx proxy and tracker service +# - Exposed publicly through Nginx proxy at https://tracker.torrust-demo.com/api/... +# - Main endpoints: /api/v1/stats, /api/v1/metrics, /api/v1/torrents +# - Used by Prometheus for monitoring and Grafana dashboards +# - See application/docs/firewall-requirements.md for complete port documentation +[http_api] +bind_address = "0.0.0.0:1212" diff --git a/share/dev/home/.ssh/config b/application/share/dev/home/.ssh/config similarity index 100% rename from share/dev/home/.ssh/config rename to application/share/dev/home/.ssh/config diff --git a/share/grafana/dashboards/README.md b/application/share/grafana/dashboards/README.md similarity index 100% rename from share/grafana/dashboards/README.md rename to application/share/grafana/dashboards/README.md diff --git a/share/grafana/dashboards/metrics.json b/application/share/grafana/dashboards/metrics.json similarity index 100% rename from share/grafana/dashboards/metrics.json rename to application/share/grafana/dashboards/metrics.json diff --git a/share/grafana/dashboards/stats.json b/application/share/grafana/dashboards/stats.json similarity index 100% rename from share/grafana/dashboards/stats.json rename to application/share/grafana/dashboards/stats.json diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..c2024b5 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,76 @@ +# Documentation Structure + +This directory contains general cross-cutting documentation for the Torrust +Tracker Demo project. + +For specific documentation: + +- **Application documentation**: [`../application/docs/`](../application/docs/) +- **Infrastructure documentation**: [`../infrastructure/docs/`](../infrastructure/docs/) + +## Current Structure + +This directory currently contains cross-cutting documentation: + +### 📋 [`adr/`](adr/) (Architecture Decision Records) + +**Current ADRs:** + +- [ADR-001: Makefile Location](adr/001-makefile-location.md) - Decision to keep + Makefile at repository root level + +### Future Categories + +The following directories can be created as needed: + +### 🔬 `research/` (Research and Investigations) + +**Findings, explorations, and technical investigations** - For documenting +research findings, performance analysis, and technical explorations that +span multiple concerns. + +### 📊 `benchmarking/` (Performance Testing) + +**Performance testing and benchmarks** - For performance analysis, +optimization data, and benchmark results that evaluate the complete system. + +### 🧮 `theory/` (Theoretical Documentation) + +**Mathematical and theoretical concepts** - For algorithms, protocols, +and theoretical documentation related to BitTorrent and distributed systems. + +## Contributing to Documentation + +When adding new documentation: + +1. **Check if it belongs in application or infrastructure docs first** + + - See [`../application/README.md`](../application/README.md) for application + documentation guidelines + - See [`../infrastructure/README.md`](../infrastructure/README.md) for + infrastructure documentation guidelines + +2. **Use this directory for cross-cutting concerns only** + + - Architecture decisions affecting multiple layers + - Research spanning infrastructure and application + - Theoretical concepts and protocols + - Performance analysis of the complete system + +3. **Create appropriate directories** only when you have content to add + +4. **Use descriptive filenames** that clearly indicate the content + +5. **Follow markdown best practices** and maintain consistency + +6. **Update README files** when adding new categories or significant content + +7. **Cross-reference** related documentation when appropriate + +## Documentation Guidelines + +- **Cross-cutting vs Specific**: Keep layer-specific docs in their respective directories +- **Research**: Should document findings, methodology, and conclusions +- **ADRs**: Should follow standard ADR template format and affect multiple layers +- **Theory**: Should explain concepts clearly with examples when possible +- **Benchmarks**: Should include methodology, environment, and reproducible results diff --git a/docs/adr/001-makefile-location.md b/docs/adr/001-makefile-location.md new file mode 100644 index 0000000..6a01274 --- /dev/null +++ b/docs/adr/001-makefile-location.md @@ -0,0 +1,145 @@ +# ADR-001: Keep Makefile at Repository Root Level + +## Status + +Accepted + +## Date + +2025-07-01 + +## Context + +During the repository reorganization to separate infrastructure and application +concerns, we considered whether the main `Makefile` should be moved to the +`infrastructure/` directory since it contains primarily infrastructure-related +commands (95% of commands are for VM management, OpenTofu operations, libvirt +setup, etc.). + +The repository was reorganized into: + +- `infrastructure/` - VMs, cloud-init, system setup, networking +- `application/` - Docker services, app deployment, configuration +- `docs/` - Cross-cutting documentation + +### Makefile Content Analysis + +The current Makefile contains: + +**Infrastructure Commands (95%):** + +- `install-deps` - Installs KVM, libvirt, OpenTofu +- `init`, `plan`, `apply`, `destroy` - OpenTofu/Terraform operations +- `ssh`, `status` - VM management +- `test-prereq`, `test-syntax`, `test-integration` - Infrastructure testing +- `fix-libvirt`, `check-libvirt` - libvirt troubleshooting +- `monitor-cloud-init` - VM provisioning monitoring +- `setup-ssh-key` - SSH configuration for VMs + +**Cross-cutting Commands (5%):** + +- `help`, `workflow-help` - General project help +- `clean` - Cleanup operations +- `dev-setup` - Complete environment setup + +**Application Commands:** + +- None directly (no Docker Compose, service management commands) + +## Decision + +**Keep the Makefile at the repository root level.** + +## Rationale + +### Arguments for keeping at root: + +1. **Project Entry Point**: The Makefile serves as the main interface for the + entire project. Users expect to run `make help` from the project root to + understand available operations. + +2. **Cross-cutting Nature**: While most commands are infrastructure-focused, + key commands like `dev-setup`, `help`, and `workflow-help` span both + infrastructure and application concerns. + +3. **User Experience**: Moving the Makefile would break the common expectation + that `make help` works from the project root directory. + +4. **Documentation Consistency**: All current documentation (README files, + quick-start guides, GitHub Actions) references root-level `make` commands. + Moving would require extensive documentation updates. + +5. **CI/CD Integration**: GitHub Actions workflows reference the Makefile from + the root. Moving it would require updating CI/CD configurations. + +6. **Discoverability**: Users cloning the repository expect to find the main + build/deployment interface at the root level. + +### Arguments against moving to infrastructure/: + +1. **Breaks Existing Workflows**: All documentation and established user + workflows would need updating. + +2. **Reduces Discoverability**: The Makefile would be less obvious to new + contributors. + +3. **Path Complexity**: Users would need to run `make -C infrastructure help` + or `cd infrastructure && make help` instead of simply `make help`. + +4. **Convention Breaking**: Most projects keep their main Makefile at the root, + even when it primarily serves one subsystem. + +## Consequences + +### Positive: + +- Maintains familiar user experience and established workflows +- Keeps documentation and CI/CD configurations unchanged +- Preserves the Makefile as the main project interface +- Allows for future expansion with application-specific commands + +### Negative: + +- The root Makefile contains primarily infrastructure commands, which may seem + inconsistent with the infrastructure/application separation +- Could be confusing for contributors who expect clear separation of concerns + +### Mitigating Actions: + +1. **Clear Documentation**: Document in the Makefile header that it primarily + contains infrastructure commands but serves as the project-wide interface. + +2. **Future Enhancement**: Consider adding application-specific commands to the + root Makefile or creating a delegation system to application-specific + Makefiles as needed. + +3. **Consistent Commenting**: Use clear section comments in the Makefile to + group related commands and explain their purpose. + +## Alternatives Considered + +### Alternative 1: Move to infrastructure/ + +- **Pros**: Better alignment with infrastructure focus +- **Cons**: Breaks user experience, requires extensive documentation updates + +### Alternative 2: Split Makefile + +- Create `infrastructure/Makefile` for infrastructure-specific commands +- Create `application/Makefile` for application-specific commands +- Keep root Makefile with high-level commands that delegate to specific ones +- **Pros**: Clear separation of concerns +- **Cons**: Added complexity, potential for confusion about which Makefile to use + +### Alternative 3: Rename and Move + +- Move to `infrastructure/Makefile` and create root-level convenience script +- **Pros**: Clear location for infrastructure commands +- **Cons**: Non-standard approach, added maintenance burden + +## References + +- Repository reorganization discussion +- Analysis of Makefile content and usage patterns +- User workflow documentation in `infrastructure/docs/quick-start.md` +- CI/CD configuration in `.github/workflows/infrastructure.yml` diff --git a/docs/adr/002-docker-for-all-services.md b/docs/adr/002-docker-for-all-services.md new file mode 100644 index 0000000..c6ff427 --- /dev/null +++ b/docs/adr/002-docker-for-all-services.md @@ -0,0 +1,218 @@ +# ADR-002: Use Docker for All Services Including UDP Tracker + +## Status + +Accepted + +## Date + +2025-01-07 + +## Context + +The Torrust Tracker Demo repository provides a complete deployment environment for +the Torrust Tracker, including the UDP tracker component, HTTP tracker, REST API, +and supporting services (Prometheus, Grafana, MySQL/SQLite). + +### Performance Considerations + +UDP tracker performance is critical for BitTorrent operations, and several +performance optimization approaches were considered: + +1. **Host Network Mode**: Running UDP tracker containers with `--network=host` + to avoid Docker networking overhead +2. **Connection Tracking Disable**: Disabling `nf_conntrack` to reduce kernel + overhead for UDP packet processing +3. **Source Compilation**: Running the tracker binary directly on the host + instead of using Docker containers + +### Technical Challenges Identified + +During investigation of performance optimizations, several issues were encountered: + +1. **Connection Tracking vs Docker**: Docker networking appears to rely on + connection tracking (`nf_conntrack`) for proper packet routing. Disabling + connection tracking while using Docker containers resulted in networking + issues. + +2. **Host Mode Limitations**: While host networking mode worked, it created + complications with service orchestration and port management in the demo + environment. + +3. **Complexity vs Benefit**: Performance optimizations added significant + complexity to the deployment process and infrastructure management. + +### Related Issues + +This decision addresses problems documented in previous GitHub issues: + +- [torrust/torrust-demo#27](https://github.com/torrust/torrust-demo/issues/27): + Improve tracker performance by adjusting docker network configuration +- [torrust/torrust-demo#72](https://github.com/torrust/torrust-demo/issues/72): + Fix nf_conntrack table overflow causing UDP packet drops + +## Decision + +**Use Docker containers for all services in the Torrust Tracker Demo, including +the UDP tracker, without host networking mode or connection tracking modifications.** + +## Rationale + +### Primary Goals Alignment + +The Torrust Tracker Demo repository has specific primary objectives: + +1. **Demo Environment Setup**: Provide a complete, working demonstration of + Torrust Tracker functionality +2. **Frequent Updates**: Update the demo environment regularly, ideally with + every tracker release +3. **Declarative Infrastructure**: Maintain Infrastructure as Code approach + for reproducible deployments +4. **Documentation Generation**: Serve as a reference implementation for + deployment procedures + +### Performance vs Simplicity Trade-off + +While Docker networking may introduce some performance overhead compared to +native host networking, the benefits outweigh the costs for this use case: + +**Benefits of Docker Approach:** + +- **Consistency**: All services use the same orchestration method +- **Simplicity**: Single Docker Compose configuration manages all services +- **Reproducibility**: Identical behavior across different environments +- **Maintenance**: Easier updates and dependency management +- **Documentation**: Clearer examples for users to follow +- **Testing**: Simplified CI/CD and local testing procedures + +**Performance Considerations:** + +- The demo environment prioritizes functionality demonstration over peak performance +- Users requiring maximum performance can reference this implementation and + optimize for their specific production needs +- Performance optimizations can be documented separately without complicating + the base demo + +### Future Performance Documentation + +Performance optimization will be addressed through: + +1. **Dedicated Documentation**: Separate guides for production performance tuning +2. **Configuration Examples**: Performance-focused configuration templates +3. **Best Practices**: Documentation of optimization techniques and trade-offs +4. **Potential Repositories**: Specialized repositories focused on high-performance + deployments + +## Consequences + +### Positive Consequences + +- **Simplified Deployment**: Single orchestration method for all services +- **Better Documentation**: Clear, consistent examples for users +- **Easier Maintenance**: Streamlined update procedures +- **Improved Testing**: Consistent test environments +- **Faster Development**: Reduced complexity in infrastructure management + +### Negative Consequences + +- **Performance Overhead**: Some UDP tracker performance impact from Docker networking +- **Resource Usage**: Additional container overhead compared to native binaries +- **Networking Complexity**: Docker networking abstractions may obscure network issues + +### Mitigation Strategies + +1. **Clear Documentation**: Document the performance trade-offs explicitly +2. **Performance Guidelines**: Provide separate documentation for production + performance optimization +3. **Configuration Examples**: Include performance-tuned configuration examples +4. **Monitoring**: Include comprehensive monitoring to identify performance issues + +### Future Considerations + +- Monitor for significant performance issues in demo environment +- Re-evaluate if Docker networking becomes a major limitation +- Consider hybrid approaches for specific production use cases +- Provide migration guides for users who need maximum performance + +## Alternatives Considered + +### Alternative 1: Host Network Mode + +**Description**: Run UDP tracker with `--network=host` + +**Pros**: + +- Better network performance +- Reduced networking overhead +- Direct access to host network interfaces + +**Cons**: + +- Port conflicts with host services +- Reduced container isolation +- Complications with service discovery +- More complex firewall configuration + +**Decision**: Rejected due to increased complexity and orchestration challenges + +### Alternative 2: Native Binary Deployment + +**Description**: Compile and run tracker binary directly on host + +**Pros**: + +- Maximum performance +- No container overhead +- Direct kernel network access + +**Cons**: + +- Complex dependency management +- Platform-specific build requirements +- Reduced deployment consistency +- More complex update procedures +- Breaking changes to current infrastructure + +**Decision**: Rejected due to complexity and maintenance burden + +### Alternative 3: Hybrid Approach + +**Description**: Use Docker for supporting services, native binary for tracker + +**Pros**: + +- Performance optimization for critical component +- Maintained orchestration for supporting services + +**Cons**: + +- Increased complexity +- Mixed deployment methods +- More complex CI/CD pipelines +- Inconsistent documentation examples + +**Decision**: Rejected due to increased complexity and mixed approaches + +### Alternative 4: Conditional Deployment + +**Description**: Support both Docker and native deployment modes + +**Pros**: + +- User choice for performance vs simplicity +- Flexibility for different use cases + +**Cons**: + +- Significant maintenance burden +- Complex documentation +- Multiple testing matrices +- Potential for configuration drift + +**Decision**: Rejected due to maintenance complexity + +## References + +- [Torrust Tracker Documentation](https://docs.rs/torrust-tracker/) +- [GitHub Issue #27: Docker Network Configuration](https://github.com/torrust/torrust-demo/issues/27) +- [GitHub Issue #72: nf_conntrack Overflow](https://github.com/torrust/torrust-demo/issues/72) diff --git a/docs/deployment.md b/docs/deployment.md deleted file mode 100644 index f7d2848..0000000 --- a/docs/deployment.md +++ /dev/null @@ -1,42 +0,0 @@ -# Deployment - -1. SSH into the server. -2. Execute the deployment script: `./bin/deploy-torrust-tracker-demo.com.sh`. -3. Execute the smoke tests: - - ```console - # Clone Torrust Tracker - git@github.com:torrust/torrust-tracker.git - cd torrust-tracker - ``` - - Execute the following commands to run the tracker client and checker. - - Simulate a torrent announce to the tracker using UDP: - - ```console - cargo run -p torrust-tracker-client --bin udp_tracker_client announce udp://tracker.torrust-demo.com:6969/announce 9c38422213e30bff212b30c360d26f9a02136422 | jq - ``` - - Simulate a torrent scrape to the tracker using HTTP: - - ```console - cargo run -p torrust-tracker-client --bin http_tracker_client announce https://tracker.torrust-demo.com 9c38422213e30bff212b30c360d26f9a02136422 | jq - ``` - - Make a request to the health check endpoint: - - ```console - TORRUST_CHECKER_CONFIG='{ - "udp_trackers": ["udp://tracker.torrust-demo.com:6969/announce"], - "http_trackers": ["https://tracker.torrust-demo.com"], - "health_checks": ["https://tracker.torrust-demo.com/api/health_check"] - }' cargo run -p torrust-tracker-client --bin tracker_checker - - ``` - -4. Check the logs of the tracker container to see if everything is working: - - ```console - ./share/bin/tracker-filtered-logs.sh - ``` diff --git a/docs/firewall.md b/docs/firewall.md deleted file mode 100644 index 80749be..0000000 --- a/docs/firewall.md +++ /dev/null @@ -1,11 +0,0 @@ -# Firewall - -We are using a Hetzner Firewall. - -![Firewall Rules](./do-firewall-configuration.png) - -This is especially important for Prometheus service because it does not have authentication. This should not be exposed: - - - -The port 80 is not enabled but you need to temporarily enable it to generate new Let's Encrypt certificates. diff --git a/docs/guides/integration-testing-guide.md b/docs/guides/integration-testing-guide.md new file mode 100644 index 0000000..1f904ab --- /dev/null +++ b/docs/guides/integration-testing-guide.md @@ -0,0 +1,908 @@ +# Integration Testing Guide + +This guide provides step-by-step instructions for running complete integration +tests on a fresh virtual machine. All commands are ready to copy and paste. + +## Overview + +This guide will walk you through: + +1. Creating a fresh VM by cleaning up any existing infrastructure +2. Deploying the VM with full Torrust Tracker configuration +3. Waiting for cloud-init to complete (critical step!) +4. Running comprehensive integration tests +5. Verifying all services work correctly +6. Cleaning up resources + +**Total Time**: ~8-12 minutes (improved from previous connectivity issues) + +--- + +## Prerequisites + +Ensure you have completed the initial setup: + +```bash +# Verify prerequisites are met +make test-prereq +``` + +**Expected Output**: All checks should pass with ✅ marks. + +--- + +## Step 1: Clean Up and Prepare Fresh Environment + +### 1.1 Navigate to Project Directory + +For example: + +```bash +cd /home/yourname/Documents/git/committer/me/github/torrust/torrust-tracker-demo +``` + +**⚠️ Important**: All commands in this guide assume you are running from the +project root directory. If you see "command not found" errors, verify you are +in the correct directory. + +### 1.2 Check for Existing Resources + +⚠️ **WARNING**: The following commands will destroy existing VMs and remove +data. Only proceed if you want to start with a completely clean environment. + +```bash +# Check for existing VMs that might conflict +virsh list --all | grep torrust-tracker-demo || echo "✅ No conflicting VM found" + +# Check for existing libvirt volumes +virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo || \ + echo "✅ No conflicting volumes found" + +# Check for existing OpenTofu state +ls -la infrastructure/terraform/terraform.tfstate* 2>/dev/null || \ + echo "✅ No existing state files" +``` + +**Expected Output**: Should show "✅" messages if no conflicts exist. + +### 1.3 Clean Up Any Existing Infrastructure + +⚠️ **DESTRUCTIVE OPERATION**: This will permanently delete VMs, volumes, +and state files. + +```bash +# Complete cleanup - removes VMs, state files, and fixes permissions +time make clean-and-fix +``` + +**Expected Output**: + +- VMs destroyed and undefined +- OpenTofu state files removed +- libvirt images cleaned +- Permissions fixed +- **Time**: ~5 seconds (actual: 5.02s) + +**What This Creates**: Clean slate with no VMs or state files. + +### 1.4 Verify Clean State + +```bash +# Verify no conflicting resources remain +echo "=== Verifying Clean State ===" + +# Check VMs +virsh list --all | grep torrust-tracker-demo && \ + echo "❌ VM still exists!" || echo "✅ No VM conflicts" + +# Check volumes in user-default pool +virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo && \ + echo "❌ Volumes still exist!" || echo "✅ No volume conflicts" + +# Check OpenTofu state +ls infrastructure/terraform/terraform.tfstate* 2>/dev/null && \ + echo "❌ State files still exist!" || echo "✅ No state file conflicts" +``` + +**Expected Output**: All checks should show "✅" (no conflicts). + +### 1.4.1 Manual Cleanup (if needed) + +If the verification step shows "❌ Volumes still exist!" then manually clean them: + +```bash +# List conflicting volumes +virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo + +# Delete each volume manually +virsh vol-delete torrust-tracker-demo-cloudinit.iso user-default +virsh vol-delete torrust-tracker-demo.qcow2 user-default + +# Verify cleanup +virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo && \ + echo "❌ Volumes still exist!" || echo "✅ No volume conflicts" +``` + +**Expected Output**: Should show "✅ No volume conflicts" after manual cleanup. + +**What This Fixes**: Removes leftover volumes that `make clean-and-fix` +sometimes misses. + +### 1.5 Set Up SSH Key Configuration + +⚠️ **CRITICAL STEP**: This step was **missing** from our initial testing and +caused SSH connection failures! + +#### For Default SSH Keys (id_rsa) + +```bash +# Set up SSH key configuration for VM access +time make setup-ssh-key +``` + +#### For Non-Default SSH Keys (e.g., torrust_rsa) + +⚠️ **IMPORTANT**: If you're using a non-default SSH key file (e.g., +`~/.ssh/torrust_rsa` instead of `~/.ssh/id_rsa`), you need to: + +1. **Configure the public key in terraform**: + +```bash +# Get your non-default public key +cat ~/.ssh/torrust_rsa.pub + +# Manually edit the terraform configuration +vim infrastructure/terraform/local.tfvars + +# Add your public key content: +ssh_public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC... your-key-here" +``` + +1. **Configure SSH client to use the correct private key**: + +```bash +# Option 1: Create/edit SSH config +echo "Host 192.168.122.* + IdentityFile ~/.ssh/torrust_rsa + IdentitiesOnly yes" >> ~/.ssh/config + +# Option 2: Always specify key explicitly when connecting +# ssh -i ~/.ssh/torrust_rsa torrust@VM_IP +``` + +**Expected Output** (for both methods): + +```console +Creating local SSH key configuration... + +✓ Created infrastructure/terraform/local.tfvars + +Next steps: +1. Get your SSH public key: + cat ~/.ssh/id_rsa.pub + # or cat ~/.ssh/torrust_rsa.pub + +2. Edit the file and replace the placeholder: + vim infrastructure/terraform/local.tfvars + +3. Deploy the VM: + make apply +``` + +**What This Creates**: `infrastructure/terraform/local.tfvars` with SSH key +configuration. + +**Verify Configuration**: + +```bash +# Ensure the file contains your actual public key (not placeholder) +cat infrastructure/terraform/local.tfvars | grep ssh_public_key + +# Should show your full public key, not "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY" +``` + +### 1.6 Initialize OpenTofu + +```bash +# Initialize OpenTofu providers +time make init +``` + +**Expected Output**: + +- Provider plugins downloaded +- Lock file created +- "OpenTofu has been successfully initialized!" message +- **Time**: ~3 seconds (actual: 3.11s) + +**What This Creates**: `.terraform.lock.hcl` file in `infrastructure/terraform/` + +--- + +## Step 2: Deploy Fresh Virtual Machine + +### 2.1 Plan the Deployment + +```bash +# Review what will be created +time make plan +``` + +**Expected Output**: + +- Plan to create 4 resources: + - `libvirt_volume.base_image` (Ubuntu cloud image) + - `libvirt_volume.vm_disk` (VM disk) + - `libvirt_cloudinit_disk.commoninit` (cloud-init configuration) + - `libvirt_domain.vm` (the actual VM) +- **Time**: ~1 second (actual: 0.63s) + +**What This Shows**: Infrastructure plan without making changes. + +### 2.2 Deploy the VM + +```bash +# Deploy VM with full configuration (this takes time!) +time make apply +``` + +**Expected During Deployment**: + +1. Libvirt permissions check and fixes +2. Download of Ubuntu 24.04 cloud image (~600MB) +3. VM disk creation +4. Cloud-init ISO creation +5. VM startup + +**Expected Output**: + +- Resources created successfully +- VM IP address in outputs (may show "No IP assigned yet" initially) +- **Time**: ~5 seconds (actual: 4.92s for VM creation after image cached) + +**Common Errors and Solutions**: + +- "storage volume 'torrust-tracker-demo-cloudinit.iso' exists already" + Run: `virsh vol-delete torrust-tracker-demo-cloudinit.iso user-default` +- "Inconsistent dependency lock file" → Run: `make init` to reinitialize + +**What This Creates**: + +- Running VM named `torrust-tracker-demo` +- VM disk and cloud-init ISO in libvirt storage +- OpenTofu state file with VM information + +### 2.3 Verify VM is Running + +```bash +# Check VM status +virsh list --all +``` + +**Expected Output**: + +```console + Id Name State +-------------------------------------- + 1 torrust-tracker-demo running +``` + +--- + +## Step 3: Wait for Cloud-Init Completion (Critical!) + +**⏱️ Timing Update**: Based on recent testing, cloud-init completes much faster +than originally estimated. The VM is typically ready for SSH connections within +2-3 minutes. Previous issues were caused by firewall configuration blocking SSH +connections during cloud-init, preventing proper completion. The firewall setup +has been improved to allow SSH access throughout the process. + +### 3.1 Get VM IP Address + +```bash +# Get IP from libvirt (more reliable during cloud-init) +VM_IP=$(virsh domifaddr torrust-tracker-demo | grep ipv4 | \ + awk '{print $4}' | cut -d'/' -f1) +echo "VM IP: $VM_IP" +``` + +**Expected Output**: IP address like `192.168.122.XXX` + +### 3.2 Debug Cloud-Init Issues (When SSH Fails) + +If SSH connections are failing after 5+ minutes, use these debugging +techniques based on [cloud-init debugging documentation](https://cloudinit.readthedocs.io/en/latest/howto/debugging.html): + +#### Access VM Console + +```bash +# Method 1: Connect to VM console via virsh (text-based) +virsh console torrust-tracker-demo + +# Login as 'ubuntu' (default user) with no password, then: +sudo cloud-init status --long +sudo cat /var/log/cloud-init.log | tail -20 +sudo cat /var/log/cloud-init-output.log | tail -20 +sudo systemctl status cloud-init-local cloud-init cloud-config cloud-final + +# Exit console: Ctrl+] +``` + +#### Method 2: Use virt-viewer for graphical console access + +```bash +# Connect to VM graphical console (shows login prompt) +virt-viewer spice://127.0.0.1:5900 + +# Alternative using VM name +virt-viewer torrust-tracker-demo +``` + +**Note**: The virt-viewer method provides a graphical console where you should +see a login prompt. This is particularly useful when the text-based virsh +console doesn't work or when you need to see the full boot process. + +#### Check from Host System + +```bash +# Check if SSH port is responding +timeout 5 nc -zv $VM_IP 22 + +# Check VM system status +virsh dominfo torrust-tracker-demo + +# Check VM console output +virsh console torrust-tracker-demo --force +``` + +#### Minimal Configuration Testing + +If cloud-init takes too long, test with minimal configuration: + +```bash +# Backup original configuration +cp infrastructure/cloud-init/user-data.yaml.tpl infrastructure/cloud-init/user-data.yaml.tpl.backup + +# Use minimal configuration (edit manually or restore from backup) +# Then redeploy: +make destroy && make apply +``` + +### 3.3 Monitor Cloud-Init Progress + +Cloud-init typically completes in 2-3 minutes because it: + +- Downloads and installs 15+ packages (Docker, git, htop, ufw, fail2ban, etc.) +- Configures firewall with multiple rules +- Sets up system optimizations +- Creates directory structures +- May reboot the system for clean state + +**Note**: Previous versions of this guide estimated 5-10 minutes, but the issue +was firewall configuration blocking SSH access during cloud-init. The improved +firewall setup now allows SSH connections throughout the process, enabling +faster and more reliable completion. + +```bash +# Monitor cloud-init in real-time (opens in separate terminal) +make monitor-cloud-init & + +# OR manually check SSH connectivity every 30 seconds +while true; do + echo "$(date): Testing SSH to $VM_IP..." + if timeout 10 ssh -o StrictHostKeyChecking=no \ + -o ConnectTimeout=10 torrust@$VM_IP \ + "echo 'SSH works!'" 2>/dev/null; then + echo "✅ SSH connection successful!" + break + fi + echo "⏳ Cloud-init still running... waiting 30 seconds" + sleep 30 +done +``` + +**Expected Behavior**: + +- SSH connections will fail initially with "Connection refused" or + "Permission denied" +- After 2-3 minutes, SSH will start working (faster if no reboot is required) +- **Time**: ~2-3 minutes for full cloud-init completion + +### 3.3 Verify Cloud-Init Completion + +```bash +# Check cloud-init final status +ssh -o StrictHostKeyChecking=no torrust@$VM_IP "cloud-init status --long" +``` + +**Expected Output**: + +```console +status: done +time: [timestamp] +detail: DataSource DataSourceNoCloud [seed=/dev/sr0][dsmode=net] +``` + +### 3.4 Verify VM Configuration + +```bash +# Test basic VM readiness +echo "=== Testing VM Configuration ===" + +# Check Docker installation +ssh torrust@$VM_IP "docker --version" +ssh torrust@$VM_IP "docker compose version || docker-compose --version" + +# Check firewall status +ssh torrust@$VM_IP "sudo ufw status" + +# Check if directories are created +ssh torrust@$VM_IP "ls -la /home/torrust/github/" + +# Check system packages +ssh torrust@$VM_IP "which git curl wget htop" +``` + +**Expected Output**: + +- Docker version information +- Docker Compose version information (V2 plugin preferred, standalone + version also supported) +- UFW firewall showing "Status: active" with configured rules +- `/home/torrust/github/torrust` directory exists +- All system packages available + +**What This Verifies**: VM is fully configured and ready for integration tests. + +**Note**: The cloud-init configuration now installs Docker Compose V2 plugin +for better compatibility with modern compose.yaml files. + +--- + +## Step 4: Run Integration Tests + +### 4.1 Test VM Access + +```bash +# Test basic VM connectivity +time ./infrastructure/tests/test-integration.sh access +``` + +**Expected Output**: + +- SSH connectivity test passes +- VM accessible message +- **Time**: ~5 seconds + +### 4.2 Test Docker Installation + +```bash +# Test Docker functionality +time ./infrastructure/tests/test-integration.sh docker +``` + +**Expected Output**: + +- Docker version check passes +- Docker Compose version check passes (automatically detects V2 plugin or + standalone version) +- **Time**: ~10 seconds + +**Note**: The test script automatically detects whether Docker Compose V2 +plugin (`docker compose`) or standalone version (`docker-compose`) is +available and uses the appropriate command. + +### 4.3 Setup Torrust Tracker Demo + +```bash +# Clone and setup the Torrust Tracker repository +time ./infrastructure/tests/test-integration.sh setup +``` + +**Expected Output**: + +- Repository cloned to `/home/torrust/github/torrust/torrust-tracker-demo` +- Environment file `.env` created from `.env.production` +- **Time**: ~30 seconds + +**What This Creates**: Torrust Tracker Demo repository with environment +configuration. + +### 4.4 Start Torrust Tracker Services + +```bash +# Pull images and start all services +time ./infrastructure/tests/test-integration.sh start +``` + +**Expected Output**: + +- Docker images pulled successfully +- All services started in background +- Service status showing all containers running +- **Time**: ~2-3 minutes (pulling images) + +**What This Creates**: Running Docker stack with: + +- Torrust Tracker (HTTP and UDP) +- Prometheus (metrics collection) +- Grafana (monitoring dashboard) +- Nginx (reverse proxy) + +### 4.5 Test Service Endpoints + +```bash +# Test all API endpoints +time ./infrastructure/tests/test-integration.sh endpoints +``` + +**Expected Output**: + +- HTTP API responding on port 7070 +- Metrics endpoint responding on port 1212 +- UDP ports listening (6868, 6969) +- **Time**: ~15 seconds + +### 4.6 Test Monitoring Services + +```bash +# Test Prometheus and Grafana +time ./infrastructure/tests/test-integration.sh monitoring +``` + +**Expected Output**: + +- Prometheus health check passes +- Grafana health check passes +- **Time**: ~10 seconds + +### 4.7 Run Complete Integration Test Suite + +```bash +# Run all tests in sequence +time ./infrastructure/tests/test-integration.sh full-test +``` + +**Expected Output**: + +- All individual tests pass in sequence +- Services stopped cleanly at the end +- "All integration tests passed!" message +- **Time**: ~3-5 minutes total + +**What This Verifies**: Complete end-to-end functionality of the Torrust +Tracker deployment. + +--- + +## Step 5: Manual Verification (Optional) + +### 5.1 SSH Into VM and Explore + +```bash +# Connect to VM for manual inspection +make ssh +``` + +**Inside the VM, you can run**: + +```bash +# Check cloud-init logs +sudo cat /var/log/cloud-init-output.log | tail -20 + +# Check running services +docker compose ps + +# Check service logs +docker compose logs --tail=20 + +# Check system status +sudo systemctl status docker +sudo ufw status verbose + +# Check Torrust Tracker logs +docker compose logs torrust-tracker --tail=20 + +# Exit the VM +exit +``` + +### 5.2 Test External Access (from Host) + +```bash +# Get VM IP for external testing +VM_IP=$(cd infrastructure/terraform && tofu output -raw vm_ip) +echo "VM IP: $VM_IP" + +# Test HTTP API from host +curl -s http://$VM_IP:7070/api/v1/stats | jq . || echo "API test failed" + +# Test metrics endpoint from host +curl -s http://$VM_IP:1212/metrics | head -10 +``` + +**Expected Output**: + +- JSON response from stats API +- Prometheus metrics data + +--- + +## Step 6: Performance and Load Testing (Optional) + +### Alternative: External Smoke Testing + +For quick external validation without infrastructure complexity, consider using +the dedicated [Smoke Testing Guide](smoke-testing-guide.md). This approach +uses the Torrust Tracker Client tools to test your deployment from an external +perspective: + +- ✅ **Quick validation** (~5 minutes vs full integration testing) +- ✅ **External black-box testing** using official client tools +- ✅ **Protocol-level verification** (UDP, HTTP, API endpoints) +- ✅ **No infrastructure knowledge required** - just test the deployed services +- ✅ **Perfect for post-deployment validation** and sanity checks + +The smoke testing approach complements this integration guide by providing a +simpler alternative when you only need to verify that the deployed tracker +is working correctly. + +### 6.1 Measure Service Response Times + +```bash +# Test API response time +ssh torrust@$VM_IP \ + "time curl -s http://localhost:7070/api/v1/stats >/dev/null" + +# Test metrics response time +ssh torrust@$VM_IP \ + "time curl -s http://localhost:1212/metrics >/dev/null" + +# Test multiple concurrent requests +ssh torrust@$VM_IP \ + "for i in {1..10}; do \ + curl -s http://localhost:7070/api/v1/stats >/dev/null & \ + done; wait" +``` + +### 6.2 Check Resource Usage + +```bash +# Monitor system resources +ssh torrust@$VM_IP "top -b -n1 | head -20" +ssh torrust@$VM_IP "df -h" +ssh torrust@$VM_IP "free -h" +ssh torrust@$VM_IP "docker stats --no-stream" +``` + +--- + +## Step 7: Cleanup + +### 7.1 Stop Services (if needed) + +```bash +# Stop all services cleanly +./infrastructure/tests/test-integration.sh stop +``` + +### 7.2 Destroy VM and Clean Up + +```bash +# Destroy the VM and clean up resources +time make destroy +``` + +**Expected Output**: + +- All resources destroyed +- State files cleaned +- **Time**: ~30 seconds + +### 7.3 Final Cleanup + +```bash +# Complete cleanup +make clean +``` + +**Expected Output**: + +- Temporary files removed +- Lock files cleaned + +--- + +## Troubleshooting + +### Resource Conflicts During Deployment + +#### Cloud-init ISO Already Exists + +```bash +# Check if cloud-init ISO exists +virsh vol-list user-default | grep cloudinit + +# Remove the conflicting cloud-init ISO +virsh vol-delete torrust-tracker-demo-cloudinit.iso user-default + +# Then retry: make apply +``` + +#### OpenTofu State Conflicts + +```bash +# If you get "Inconsistent dependency lock file" +make init + +# If you get state conflicts, clean and restart +make clean-and-fix +make init +make apply +``` + +#### VM Already Exists + +```bash +# Check existing VMs +virsh list --all | grep torrust-tracker-demo + +# Force cleanup if VM exists but not in OpenTofu state +virsh destroy torrust-tracker-demo +virsh undefine torrust-tracker-demo +virsh vol-delete torrust-tracker-demo.qcow2 user-default +``` + +### Common Issues and Solutions + +#### SSH Connection Fails + +**MOST COMMON CAUSES**: + +1. **Missing SSH key configuration**: + +```bash +# Check if SSH key was configured +cat infrastructure/terraform/local.tfvars + +# If file doesn't exist or contains "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY": +make setup-ssh-key +# Then redeploy: make destroy && make apply +``` + +1. **Using non-default SSH key** (e.g., `torrust_rsa` instead of `id_rsa`): + +```bash +# Check which keys exist +ls -la ~/.ssh/ + +# Check which key is configured in VM +grep ssh_public_key infrastructure/terraform/local.tfvars + +# Test with explicit key specification +ssh -i ~/.ssh/torrust_rsa -o StrictHostKeyChecking=no torrust@$VM_IP "echo 'Test'" + +# Configure SSH client permanently +echo "Host 192.168.122.* + IdentityFile ~/.ssh/torrust_rsa + IdentitiesOnly yes" >> ~/.ssh/config +``` + +1. **Cloud-init still running**: + +```bash +# Check if cloud-init is still running +virsh console torrust-tracker-demo --force +# Press Ctrl+] to exit console + +# Check VM IP again +virsh domifaddr torrust-tracker-demo + +# Test SSH port availability +timeout 5 nc -zv $VM_IP 22 + +# Wait longer - cloud-init typically completes in 2-3 minutes +# but may take up to 5 minutes in some cases +``` + +#### Services Don't Start + +```bash +# SSH into VM and check Docker +ssh torrust@$VM_IP "docker ps -a" + +# Check Docker Compose logs (try both commands) +ssh torrust@$VM_IP "cd /home/torrust/github/torrust/torrust-tracker-demo && \ + docker compose logs || docker-compose logs" + +# Check if Docker daemon is running +ssh torrust@$VM_IP "sudo systemctl status docker" + +# Verify Docker Compose version compatibility +ssh torrust@$VM_IP "docker compose version || docker-compose --version" +``` + +**Common Docker Compose Issues**: + +- **"compose.yaml format not supported"**: This indicates an older docker-compose + version. The integration tests automatically detect and use the correct command. +- **"docker: 'compose' is not a docker command"**: VM has standalone docker-compose + instead of Docker Compose V2 plugin. Both are supported. + +#### Integration Tests Fail + +```bash +# Check test logs +cat /tmp/torrust-integration-test.log + +# Collect system logs +ssh torrust@$VM_IP \ + "sudo journalctl --since='1 hour ago' --no-pager | tail -50" + +# Check VM resources +ssh torrust@$VM_IP "free -h && df -h" +``` + +#### Cloud-Init Issues + +```bash +# Check cloud-init status and logs +ssh torrust@$VM_IP "cloud-init status --long" +ssh torrust@$VM_IP "sudo cat /var/log/cloud-init-output.log | tail -50" +ssh torrust@$VM_IP "sudo cloud-init analyze show" +``` + +--- + +## Summary + +This guide provides a complete integration testing workflow that: + +1. **Creates fresh infrastructure** in ~3-5 minutes +2. **Waits for cloud-init** to complete (~2-3 minutes) +3. **Runs comprehensive tests** covering all services (~3-5 minutes) +4. **Verifies end-to-end functionality** of the Torrust Tracker +5. **Cleans up resources** when complete + +**Total Time**: ~8-12 minutes for complete cycle + +### Key Lessons Learned + +During the development of this guide, we identified several critical issues: + +1. **SSH Key Configuration**: The most common failure is missing or incorrect SSH + key setup. The `make setup-ssh-key` step is **mandatory**. + +2. **Non-Default SSH Keys**: If using custom SSH keys (like `torrust_rsa` + instead of `id_rsa`), you must: + + - Configure the public key in `infrastructure/terraform/local.tfvars` + - Set up SSH client configuration or use `-i` flag explicitly + +3. **Docker Compose Compatibility**: Cloud-init now installs Docker Compose V2 + plugin for better compatibility with modern compose.yaml files. Integration + tests automatically detect and use the appropriate command (`docker compose` + or `docker-compose`). + +4. **Cloud-Init Timing**: Cloud-init performs many operations including: + + - Package downloads and installations + - System configuration + - **System reboot** (in full configuration) + - Service startup after reboot + + The main improvement was fixing firewall configuration to allow SSH access + during cloud-init, preventing connectivity blocks that caused completion + delays. Actual completion time is typically 2-3 minutes. + +5. **Debugging Techniques**: Use `virsh console` and cloud-init logs to debug + issues when SSH fails. + +### Success Factors + +The key to success is **proper SSH key configuration** and **allowing cloud-init +to complete** - it installs many packages and configures the system, which +typically takes 2-3 minutes but ensures a production-ready environment. + +All commands are designed to be copy-pasteable and include realistic timing +information to set proper expectations. diff --git a/docs/guides/quick-start.md b/docs/guides/quick-start.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/guides/smoke-testing-guide.md b/docs/guides/smoke-testing-guide.md new file mode 100644 index 0000000..d1abaab --- /dev/null +++ b/docs/guides/smoke-testing-guide.md @@ -0,0 +1,498 @@ +# Smoke Testing Guide + +This guide explains how to run end-to-end smoke tests against a deployed +Torrust Tracker using official client tools. This is perfect for quick +validation after deployment or when you want to verify functionality without +needing to understand the infrastructure internals. + +## Overview + +Smoke testing provides: + +- ✅ **Quick validation** (~5 minutes) +- ✅ **External black-box testing** using official Torrust client tools +- ✅ **Protocol-level verification** (UDP, HTTP, API endpoints) +- ✅ **No infrastructure knowledge required** +- ✅ **Perfect for post-deployment validation** + +This approach complements the [Integration Testing Guide](integration-testing-guide.md) +by providing a simpler alternative when you only need to verify that the +deployed tracker is working correctly. + +## Prerequisites + +### System Requirements + +- Git installed +- Rust toolchain (cargo) installed +- Network access to the deployed Torrust Tracker + +### Target Environment + +This guide covers testing against: + +- **Local/Demo Environment**: HTTP without certificates (development) +- **Future Scope**: Production environments with Let's Encrypt certificates + +> **Note**: Certificate generation with Let's Encrypt for HTTP services +> (Tracker API on port 1212, HTTP tracker on port 7070, Grafana, etc.) +> is not fully automated yet. This guide currently focuses on local +> testing environments. + +## Step 1: Setup Torrust Tracker Client + +### 1.1 Get the Torrust Tracker Repository + +You have two options for accessing the Torrust Tracker client tools: + +#### Option A: Use Existing Installation + +If you already have the Torrust Tracker repository cloned: + +```bash +# Navigate to your existing torrust-tracker directory +cd /path/to/your/torrust-tracker +# Example: cd /home/josecelano/Documents/git/committer/me/github/torrust/torrust-tracker + +# Verify you have the client tools +ls -la src/bin/ | grep -E "(udp_tracker_client|http_tracker_client|tracker_checker)" +``` + +#### Option B: Clone Fresh Copy + +If you don't have the repository, clone it locally: + +```bash +# Clone the official Torrust Tracker repository +git clone https://github.com/torrust/torrust-tracker +cd torrust-tracker +``` + +> **Note**: If cloning locally, the `torrust-tracker/` directory is already +> added to `.gitignore` to avoid conflicts with the demo repository. + +### 1.2 Verify Rust Installation + +```bash +# Check Rust version (required for compiling client tools) +cargo --version +rustc --version + +# If Rust is not installed, install it: +# curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +# source ~/.cargo/env +``` + +### 1.3 Understanding Client Tools + +> **Important**: The Torrust Tracker client tools are **not published on +> crates.io** yet. They must be compiled from source using the tracker +> repository. You cannot install them with `cargo install` - you must +> run them using `cargo run` from the tracker root directory. + +The available client tools are: + +- `udp_tracker_client` - Tests UDP tracker protocol +- `http_tracker_client` - Tests HTTP tracker protocol +- `tracker_checker` - Comprehensive health checker + +### 1.4 Verify Client Tools + +```bash +# Verify you're in the tracker root directory +pwd +ls Cargo.toml + +# Check available client binaries +ls -la src/bin/ | grep -E "client|checker" + +# Test that client tools can be run (will show help/usage) +cargo run -p torrust-tracker-client --bin udp_tracker_client -- --help +cargo run -p torrust-tracker-client --bin http_tracker_client -- --help +cargo run -p torrust-tracker-client --bin tracker_checker -- --help +``` + +## Step 2: Identify Target Server + +### 2.1 For Local VM Testing + +If you're testing against a local VM deployed with the integration guide: + +```bash +# Get VM IP address (if using local VM) +VM_IP=$(cd infrastructure/terraform && tofu output -raw vm_ip 2>/dev/null) || \ +VM_IP=$(virsh domifaddr torrust-tracker-demo | grep ipv4 | awk '{print $4}' | cut -d'/' -f1) + +echo "Testing against VM: $VM_IP" +``` + +### 2.2 For Remote Server Testing + +```bash +# Set your target server IP or domain +TARGET_SERVER="your-server.example.com" +# or +TARGET_SERVER="192.168.1.100" + +echo "Testing against server: $TARGET_SERVER" +``` + +### 2.3 Verify Server Accessibility + +```bash +# Test basic connectivity +ping -c 3 $TARGET_SERVER + +# Test if tracker ports are open +nc -zv $TARGET_SERVER 6868 # UDP tracker port 1 +nc -zv $TARGET_SERVER 6969 # UDP tracker port 2 +nc -zv $TARGET_SERVER 7070 # HTTP tracker port +nc -zv $TARGET_SERVER 1212 # API/metrics port +``` + +## Step 3: Run Smoke Tests + +### 3.1 Test UDP Trackers + +#### UDP Tracker on Port 6868 + +```bash +# Test UDP tracker on port 6868 +echo "=== Testing UDP Tracker (6868) ===" +cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$TARGET_SERVER:6868/announce \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq +``` + +**Expected Output:** + +```json +{ + "transaction_id": 2425393296, + "announce_response": { + "interval": 120, + "leechers": 0, + "seeders": 0, + "peers": [] + } +} +``` + +#### UDP Tracker on Port 6969 + +```bash +# Test UDP tracker on port 6969 +echo "=== Testing UDP Tracker (6969) ===" +cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$TARGET_SERVER:6969/announce \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq +``` + +**Expected Output:** Similar JSON response with tracker statistics. + +### 3.2 Test HTTP Tracker + +#### Through Nginx Proxy (Port 80) - ✅ Working + +The HTTP tracker is configured to run behind an nginx reverse proxy. The nginx +configuration now properly passes the `X-Forwarded-For` header, enabling HTTP +tracker functionality through the proxy: + +```bash +# Test HTTP tracker through nginx proxy on port 80 +echo "=== Testing HTTP Tracker through Nginx Proxy (80) ===" +cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + http://$TARGET_SERVER:80 \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq +``` + +**Expected Output:** + +```json +{ + "complete": 1, + "incomplete": 0, + "interval": 300, + "min interval": 300, + "peers": [ + { + "ip": "192.168.122.1", + "peer id": [ + 45, 113, 66, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 49 + ], + "port": 47401 + } + ] +} +``` + +#### Direct Access (Port 7070) - Expected to Fail + +Direct access to port 7070 will fail because the tracker is configured for reverse proxy mode: + +```bash +# Test HTTP tracker directly on port 7070 (expected to fail) +echo "=== Testing HTTP Tracker Direct (7070) - Expected to fail ===" +cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + http://$TARGET_SERVER:7070 \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq +``` + +**Expected Behavior**: Should fail with an error about missing `X-Forwarded-For` +header, confirming the tracker is correctly configured for reverse proxy mode. + +### 3.3 Test API Endpoints + +#### Health Check Endpoint - ✅ Working + +```bash +# Test health check API through nginx proxy +echo "=== Testing Health Check API ===" +curl -s http://$TARGET_SERVER:80/api/health_check | jq +``` + +**Expected Output:** + +```json +{ + "status": "Ok" +} +``` + +#### Statistics Endpoint - ✅ Working + +The statistics API is available through the nginx proxy on port 80: + +```bash +# Test statistics API through nginx proxy +echo "=== Testing Statistics API ===" +curl -s http://$TARGET_SERVER:80/api/v1/stats | jq +``` + +**Expected Output:** + +```json +{ + "torrents": 0, + "seeders": 0, + "completed": 0, + "leechers": 0, + "tcp4_connections_handled": 0, + "tcp4_announces_handled": 0, + "tcp4_scrapes_handled": 0, + "tcp6_connections_handled": 0, + "tcp6_announces_handled": 0, + "tcp6_scrapes_handled": 0, + "udp4_connections_handled": 0, + "udp4_announces_handled": 0, + "udp4_scrapes_handled": 0, + "udp6_connections_handled": 0, + "udp6_announces_handled": 0, + "udp6_scrapes_handled": 0 +} +``` + +#### Metrics Endpoint + +```bash +# Test Prometheus metrics +echo "=== Testing Metrics Endpoint ===" +curl -s http://$TARGET_SERVER:1212/metrics | head -20 +``` + +**Expected Output:** Prometheus-formatted metrics data. + +### 3.4 Comprehensive Tracker Checker + +> **Note**: The tracker checker is designed for production environments with +> HTTPS. For local testing without certificates, individual endpoint tests +> (above) are more reliable. + +For completeness, here's how to use the tracker checker tool: + +```bash +# Configure tracker checker for your environment +export TORRUST_CHECKER_CONFIG='{ + "udp_trackers": ["udp://'$TARGET_SERVER':6969/announce"], + "http_trackers": ["http://'$TARGET_SERVER':80"], + "health_checks": ["http://'$TARGET_SERVER':80/api/health_check"] +}' + +# Run comprehensive checker +echo "=== Running Comprehensive Tracker Checker ===" +cargo run -p torrust-tracker-client --bin tracker_checker +``` + +**Expected Output:** Status report for all configured endpoints. + +## Step 4: Interpret Results + +### 4.1 Success Indicators + +All tests should show: + +- ✅ **UDP Trackers**: JSON responses with interval/peer data +- ✅ **HTTP Tracker** (via proxy): JSON response with tracker statistics +- ✅ **Health Check**: `{"status": "Ok"}` response +- ✅ **Statistics API** (via proxy): JSON with current tracker metrics +- ✅ **Metrics**: Prometheus-formatted data + +### 4.2 Common Issues and Solutions + +#### Connection Refused + +```bash +# Check if services are running (must run from the application directory) +ssh torrust@$TARGET_SERVER \ + "cd /home/torrust/github/torrust/torrust-tracker-demo/application && docker compose ps" + +# Check firewall rules +ssh torrust@$TARGET_SERVER "sudo ufw status" + +# Restart services if needed (must run from the application directory) +ssh torrust@$TARGET_SERVER \ + "cd /home/torrust/github/torrust/torrust-tracker-demo/application && docker compose restart" +``` + +#### DNS Resolution Issues + +```bash +# Test with IP address instead of hostname +TARGET_SERVER="192.168.1.100" # Replace with actual IP + +# Or add to /etc/hosts temporarily +echo "$TARGET_SERVER your-server.example.com" | sudo tee -a /etc/hosts +``` + +#### Certificate Issues (Future Production Testing) + +> **Note**: This section will be expanded when Let's Encrypt automation +> is implemented. + +For production environments with HTTPS certificates: + +```bash +# Test HTTPS endpoints (future) +curl -s https://$TARGET_SERVER/api/health_check | jq + +# Configure tracker checker for HTTPS (future) +export TORRUST_CHECKER_CONFIG='{ + "udp_trackers": ["udp://'$TARGET_SERVER':6969/announce"], + "http_trackers": ["https://'$TARGET_SERVER'"], + "health_checks": ["https://'$TARGET_SERVER'/api/health_check"] +}' +``` + +## Step 5: Automated Smoke Test Script (Optional) + +For repeated testing, create an automated script: + +```bash +# Create smoke test script +cat > smoke_test.sh << 'EOF' +#!/bin/bash +set -euo pipefail + +TARGET_SERVER="${1:-localhost}" +INFOHASH="9c38422213e30bff212b30c360d26f9a02136422" + +echo "=== Torrust Tracker Smoke Tests ===" +echo "Target: $TARGET_SERVER" +echo + +# Test UDP Tracker 6868 +echo "Testing UDP Tracker (6868)..." +if cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$TARGET_SERVER:6868/announce $INFOHASH >/dev/null 2>&1; then + echo "✅ UDP 6868: PASS" +else + echo "❌ UDP 6868: FAIL" +fi + +# Test UDP Tracker 6969 +echo "Testing UDP Tracker (6969)..." +if cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$TARGET_SERVER:6969/announce $INFOHASH >/dev/null 2>&1; then + echo "✅ UDP 6969: PASS" +else + echo "❌ UDP 6969: FAIL" +fi + +# Test HTTP Tracker +echo "Testing HTTP Tracker (7070)..." +if cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + http://$TARGET_SERVER:7070 $INFOHASH >/dev/null 2>&1; then + echo "✅ HTTP 7070: PASS" +else + echo "❌ HTTP 7070: FAIL" +fi + +# Test Health Check +echo "Testing Health Check API..." +if curl -s http://$TARGET_SERVER:1212/api/health_check | grep -q "ok"; then + echo "✅ Health Check: PASS" +else + echo "❌ Health Check: FAIL" +fi + +# Test Statistics +echo "Testing Statistics API..." +if curl -s http://$TARGET_SERVER:7070/api/v1/stats | grep -q "torrents"; then + echo "✅ Statistics: PASS" +else + echo "❌ Statistics: FAIL (expected due to proxy configuration)" +fi + +echo +echo "=== Smoke Tests Complete ===" +echo "Note: HTTP tracker and statistics tests may fail due to reverse proxy configuration" +EOF + +chmod +x smoke_test.sh + +# Run smoke tests +./smoke_test.sh $TARGET_SERVER +``` + +## Step 6: Cleanup + +```bash +# Return to original directory +cd .. + +# Optional: Remove cloned repository if no longer needed +# rm -rf torrust-tracker +``` + +## Summary + +This smoke testing guide provides a quick way to verify Torrust Tracker +functionality using official client tools. It's perfect for: + +- **Post-deployment validation** +- **Quick health checks** +- **External testing perspective** +- **Protocol-level verification** + +The tests cover all major Torrust Tracker components: + +- UDP trackers (ports 6868, 6969) +- HTTP tracker (port 7070) +- REST API endpoints (health, statistics) +- Metrics collection (Prometheus format) + +For more comprehensive testing including infrastructure validation, see the +[Integration Testing Guide](integration-testing-guide.md). + +## Future Enhancements + +This guide will be expanded to include: + +- ✅ **HTTPS testing** with Let's Encrypt certificates +- ✅ **Performance benchmarking** with load testing +- ✅ **Multi-peer simulation** for realistic scenarios +- ✅ **Grafana dashboard validation** +- ✅ **Database consistency checks** + +Stay tuned for updates as the Torrust Tracker Demo evolves! diff --git a/docs/infrastructure/libvirt-setup.md b/docs/infrastructure/libvirt-setup.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/infrastructure/local-testing-setup.md b/docs/infrastructure/local-testing-setup.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/infrastructure/quick-start.md b/docs/infrastructure/quick-start.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/rollbacks.md b/docs/rollbacks.md deleted file mode 100644 index 06550ef..0000000 --- a/docs/rollbacks.md +++ /dev/null @@ -1,57 +0,0 @@ -# Rollbacks - -If you have a problems after a [deployment](deployment.md), you can rollback to the previous version of the app. - -1. SSH into the server. - -2. Check the docker images to see the previous version of the app: - - ```console - docker images torrust/tracker - REPOSITORY TAG IMAGE ID CREATED SIZE - torrust/tracker develop b081a7499542 19 minutes ago 133MB - torrust/tracker 7dbdad453cf3 6 hours ago 133MB - ``` - -3. Tag the previous version of the app with a new name (e.g. `rollback`): - - ```console - docker tag 7dbdad453cf3 torrust/tracker:rollback - ``` - - This command tags the image with ID `7dbdad453cf3` as `torrust/tracker:rollback`. - - ```console - docker images torrust/tracker - REPOSITORY TAG IMAGE ID CREATED SIZE - torrust/tracker develop b081a7499542 21 minutes ago 133MB - torrust/tracker rollback 7dbdad453cf3 6 hours ago 133MB - ``` - - The `rollback` tag now points to the previous version of the app. - -4. Edit the `compose.yaml` file to use the new tag: - - Change the line: - - ```yaml - image: torrust/tracker:develop - ``` - - to: - - ```yaml - image: torrust/tracker:rollback - ``` - -5. Run the following command to start the previous version of the app: - - ```console - docker compose up --build --detach - ``` - -6. Check the logs of the tracker container to see if everything is working: - - ```console - ./share/bin/tracker-filtered-logs.sh - ``` diff --git a/docs/testing/makefile-validation-checklist.md b/docs/testing/makefile-validation-checklist.md new file mode 100644 index 0000000..e69de29 diff --git a/infrastructure/.gitignore b/infrastructure/.gitignore new file mode 100644 index 0000000..b093859 --- /dev/null +++ b/infrastructure/.gitignore @@ -0,0 +1,58 @@ +# Infrastructure-specific gitignore + +# OpenTofu/Terraform files +*.tfstate +*.tfstate.* +*.tfvars +!terraform.tfvars.example +.terraform/ +.terraform.lock.hcl +terraform.tfplan +terraform.tfplan.* + +# Cloud-init generated files +user-data.yaml +user-data-minimal.yaml + +# libvirt/KVM generated files +*.qcow2 +*.img +libvirt_cloudinit_*.iso + +# Infrastructure logs +*.log +/logs/ +infrastructure-test.log + +# SSH keys and secrets +*.pem +*.key +!*.pub +id_* +!id_*.pub + +# Local configuration files +local.tfvars +.env.local +.env.infrastructure + +# VM and storage artifacts +/storage/ +/images/ +/disks/ + +# Test artifacts +test-results/ +test-output/ +/tmp/ + +# Cloud provider CLI configs +.aws/ +.azure/ +.gcloud/ +.hcloud/ + +# Backup files +*.backup +*.bak +*~ diff --git a/infrastructure/README.md b/infrastructure/README.md new file mode 100644 index 0000000..1c77d2b --- /dev/null +++ b/infrastructure/README.md @@ -0,0 +1,251 @@ +# Infrastructure + +This directory contains the infrastructure-as-code configuration for the +Torrust Tracker Demo project. + +## Directory Structure + +```text +infrastructure/ +├── terraform/ # OpenTofu/Terraform configuration +│ ├── main.tf # Main configuration +│ └── terraform.tfvars.example # Example variables +└── cloud-init/ # Cloud-init configuration + ├── user-data.yaml # Main cloud-init config + ├── meta-data.yaml # VM metadata + └── network-config.yaml # Network configuration +``` + +## Purpose + +This infrastructure setup provides: + +1. **Local Testing Environment** - Test deployments locally using KVM/libvirt +2. **Hetzner Preparation** - Validate configurations before cloud deployment +3. **Consistent Environments** - Reproducible infrastructure across environments +4. **Automated Testing** - Validate changes through automated tests + +## Quick Start + +See the [Quick Start Guide](docs/quick-start.md) for the fastest way to get started. + +### Infrastructure Testing Commands + +```bash +# Setup infrastructure dependencies +make dev-setup +# Log out and log back in for group permissions + +# Configure SSH key +make setup-ssh-key +# Edit terraform/local.tfvars with your SSH public key + +# Test infrastructure layer +make test-prereq # Check prerequisites +make test-syntax # Validate configurations +make apply # Deploy VM +make ssh # Test access +make destroy # Clean up + +# Run full infrastructure test suite +make test +``` + +### Infrastructure Only Workflow + +```bash +# 1. Install dependencies +make dev-setup + +# 2. Configure SSH access +make setup-ssh-key + +# 3. Test everything +make test + +# 4. Deploy for development +make apply +make ssh + +# 5. Clean up +make destroy +``` + +## Components + +### OpenTofu Configuration (`terraform/`) + +- **main.tf** - Defines the VM, storage, and networking configuration +- **terraform.tfvars.example** - Template for customizing VM specifications + +Key features: + +- Uses KVM/libvirt provider for local virtualization +- Downloads Ubuntu 24.04 cloud image automatically +- Configures VM with appropriate resources +- Applies cloud-init configuration during boot + +### Cloud-Init Configuration (`cloud-init/`) + +- **user-data.yaml** - System configuration, packages, users, and setup scripts +- **meta-data.yaml** - VM metadata (hostname, instance ID) +- **network-config.yaml** - Network configuration (DHCP by default) + +The cloud-init configuration: + +- Creates `torrust` user with sudo privileges +- Installs Docker and development tools +- Configures UFW firewall with tracker ports +- Applies network performance optimizations +- Sets up automatic security updates + +## Usage + +### Basic Operations + +```bash +# Initialize (first time only) +make init + +# Deploy VM +make apply + +# Connect to VM +make ssh + +# Clean up +make destroy +``` + +### Testing + +```bash +# Run all tests +make test + +# Test prerequisites only +make test-prereq + +# Test configuration syntax +make test-syntax +``` + +### Customization + +1. Copy the example variables file: + + ```bash + cp infrastructure/terraform/terraform.tfvars.example infrastructure/terraform/terraform.tfvars + ``` + +2. Edit to customize VM specifications: + + ```hcl + vm_memory = 4096 # 4GB RAM + vm_vcpus = 4 # 4 CPU cores + vm_disk_size = 30 # 30GB disk + ``` + +3. Add your SSH public key to `infrastructure/cloud-init/user-data.yaml` + +## VM Specifications + +### Default Configuration + +- **OS**: Ubuntu 24.04 LTS +- **RAM**: 2GB +- **CPU**: 2 cores +- **Disk**: 20GB +- **Network**: DHCP on default libvirt network + +### Installed Software + +- Docker and Docker Compose +- Git, curl, vim, htop +- UFW firewall +- Fail2ban for SSH protection +- Automatic security updates + +### Network Ports + +- 22/tcp - SSH +- 80/tcp, 443/tcp - HTTP/HTTPS +- 6868/udp, 6969/udp - Tracker UDP (see [Port Documentation](../application/docs/firewall-requirements.md#torrust-tracker-ports)) +- 7070/tcp - Tracker HTTP API (see [Port Documentation](../application/docs/firewall-requirements.md#torrust-tracker-ports)) +- 1212/tcp - Metrics (see [Port Documentation](../application/docs/firewall-requirements.md#torrust-tracker-ports)) + +## Security + +The VM is configured with security best practices: + +- SSH key authentication only (no passwords) +- UFW firewall with minimal required ports +- Automatic security updates +- Fail2ban protection against brute force attacks +- Docker daemon with log rotation + +## Troubleshooting + +### Common Issues + +1. **libvirt permissions**: Ensure you're in the `libvirt` and `kvm` groups +2. **VM boot issues**: Check `make vm-console` for boot messages +3. **SSH connection**: VM may take 2-3 minutes to fully initialize + +### Debugging Commands + +```bash +# Check VM status +make vm-info + +# Access VM console +make vm-console + +# View test logs +make logs + +# Check libvirt status +sudo systemctl status libvirtd +``` + +## Next Steps + +After the VM is running: + +1. Deploy Torrust Tracker services +2. Run integration tests +3. Test monitoring and metrics +4. Validate backup/restore procedures + +## Contributing + +When modifying the infrastructure: + +1. Test locally with `make test` +2. Update documentation as needed +3. Follow the project's commit conventions +4. Ensure backward compatibility + +### Documentation Guidelines + +When adding infrastructure documentation: + +- **Infrastructure docs**: VMs, cloud-init, system setup, networking, OpenTofu/Terraform +- **Keep it actionable**: Guides should be step-by-step +- **Include troubleshooting**: Document common issues and solutions +- **Cross-reference**: Link to related infrastructure documentation +- **Test instructions**: Always include commands to verify setup + +### Infrastructure vs Application Separation + +Infrastructure = "Where and how the application runs" + +Infrastructure documentation should cover: + +- VM provisioning and configuration +- Operating system setup and networking +- System-level security and firewall implementation +- Infrastructure testing and validation +- Cloud provider specific configurations + +See [`../application/`](../application/) for application-specific documentation. diff --git a/infrastructure/cloud-init/meta-data.yaml b/infrastructure/cloud-init/meta-data.yaml new file mode 100644 index 0000000..a7e20c9 --- /dev/null +++ b/infrastructure/cloud-init/meta-data.yaml @@ -0,0 +1,3 @@ +hostname: ${hostname} +instance-id: ${hostname}-001 +local-hostname: ${hostname} diff --git a/infrastructure/cloud-init/network-config.yaml b/infrastructure/cloud-init/network-config.yaml new file mode 100644 index 0000000..420a8ff --- /dev/null +++ b/infrastructure/cloud-init/network-config.yaml @@ -0,0 +1,5 @@ +version: 2 +ethernets: + ens3: + dhcp4: true + dhcp-identifier: mac diff --git a/infrastructure/cloud-init/user-data-minimal.yaml.tpl b/infrastructure/cloud-init/user-data-minimal.yaml.tpl new file mode 100644 index 0000000..7c62a54 --- /dev/null +++ b/infrastructure/cloud-init/user-data-minimal.yaml.tpl @@ -0,0 +1,13 @@ +#cloud-config +users: + - name: testuser + lock_passwd: false + ssh_pwauth: true + sudo: ALL=(ALL) NOPASSWD:ALL +ssh_pwauth: true +chpasswd: + expire: false + users: + - name: testuser + password: testpass123 + type: text diff --git a/infrastructure/cloud-init/user-data.yaml.tpl b/infrastructure/cloud-init/user-data.yaml.tpl new file mode 100644 index 0000000..13ea0b1 --- /dev/null +++ b/infrastructure/cloud-init/user-data.yaml.tpl @@ -0,0 +1,224 @@ +#cloud-config +# cloud-config +# Optimized cloud-init configuration based on manual testing + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + # plain_text_passwd: torrust123 # Commented out - enable only for debugging/recovery + ssh_authorized_keys: + - ${ssh_public_key} + +# Enable SSH password authentication for debugging +# ssh_pwauth: true # Commented out - enable only for debugging/recovery + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages (verified working order) +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ca-certificates + - gnupg + - lsb-release + - ufw + - fail2ban + - unattended-upgrades + # NOTE: Rust build dependencies commented out since we're using Docker for all services (see ADR-002) + # Uncomment the following packages if you need to compile Rust applications (like Torrust Tracker) from source: + # - pkg-config + # - libssl-dev + # - make + # - build-essential + # - libsqlite3-dev + # - sqlite3 + +# System configuration files +write_files: + # SSH configuration to enable password authentication + # Commented out - enable only for debugging/recovery + # - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + # content: | + # PasswordAuthentication yes + # PubkeyAuthentication yes + # permissions: "0644" + # owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # Install Docker using official method + # Remove any old Docker packages + - > + for pkg in docker.io docker-doc docker-compose docker-compose-v2 podman-docker containerd runc; do + apt-get remove -y $pkg || true; done + + # Add Docker's official GPG key + - mkdir -p /etc/apt/keyrings + - > + curl -fsSL https://download.docker.com/linux/ubuntu/gpg + -o /etc/apt/keyrings/docker.asc + - chmod a+r /etc/apt/keyrings/docker.asc + # Add Docker repository + - > + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + + # Update package index and install Docker + - apt-get update + - > + apt-get install -y docker-ce docker-ce-cli containerd.io + docker-buildx-plugin docker-compose-plugin + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Verify Docker installation + - docker --version + - docker compose version + + # NOTE: Rust installation commented out since we're using Docker for all services (see ADR-002) + # Uncomment the following section if you need to compile Rust applications from source: + # # Install Rust using rustup (official method) + # # Install as torrust user to ensure proper ownership + # - > + # sudo -u torrust bash -c 'curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y' + # + # # Add Rust to PATH for torrust user + # - > + # echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /home/torrust/.bashrc + # - > + # echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /home/torrust/.profile + # + # # Verify Rust installation + # - sudo -u torrust bash -c 'source ~/.cargo/env && rustc --version' + # - sudo -u torrust bash -c 'source ~/.cargo/env && cargo --version' + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw allow 80/tcp + - ufw allow 443/tcp + - ufw allow 6868/udp + - ufw allow 6969/udp + - ufw allow 7070/tcp + - ufw allow 1212/tcp + - ufw --force enable + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + # Set up log rotation for Docker + - systemctl restart docker + +# Final message +final_message: | + Torrust Tracker Demo VM setup completed! + + System Information: + - OS: Ubuntu 24.04 LTS + - User: torrust (with sudo privileges and SSH key access only) + - Docker: Installed and configured + - Firewall: UFW enabled with proper SSH rules + - Security: Automatic updates enabled + - Note: All Torrust Tracker services run in Docker containers (see ADR-002) + Rust build dependencies are commented out but can be enabled if needed + + SSH Access: + - SSH Key: ssh torrust@VM_IP + - Password: Disabled for security (can be re-enabled in cloud-init config if needed) + + Next steps: + 1. SSH into the VM as user 'torrust' + 2. Clone the torrust-tracker-demo repository + 3. Run the deployment scripts using Docker Compose + + The VM is ready for Torrust Tracker deployment! + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/README.md b/infrastructure/docs/bugs/001-ssh-authentication-failure/README.md new file mode 100644 index 0000000..e6b98a5 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/README.md @@ -0,0 +1,104 @@ +# SSH Authentication Failure Bug - #001 + +**Date Resolved:** July 4, 2025 +**Status:** ✅ Resolved +**Impact:** High - Blocked VM access completely +**Root Cause:** YAML document start marker (`---`) breaking cloud-init parsing + +## Problem Summary + +The full cloud-init configuration (`user-data.yaml.tpl`) for the Torrust Tracker +Demo VM was causing SSH authentication failures for both SSH key and password +authentication, preventing users from accessing deployed VMs. + +## Root Cause + +The issue was caused by using the YAML document start marker (`---`) at the +beginning of the cloud-init configuration file instead of the required +`#cloud-config` header. This caused cloud-init to misprocess the entire +configuration, resulting in: + +- Empty SSH authorized_keys (SSH key variable not templated) +- Broken password authentication setup +- Schema validation errors in cloud-init + +## The Fix + +**Simple but Critical Change:** + +```yaml +# BEFORE (BROKEN): +--- +# cloud-config + +# AFTER (FIXED): +#cloud-config +``` + +**File Changed:** `infrastructure/cloud-init/user-data.yaml.tpl` + +## Investigation Process + +This bug was resolved through systematic incremental testing: + +1. **Incremental Testing**: Created 15+ test configurations, adding features one by one +2. **Root Cause Isolation**: Compared working vs. broken configurations using diff analysis +3. **Hypothesis Formation**: Identified YAML header as the key difference +4. **Validation**: Deployed fresh VM with corrected header and confirmed fix + +## Validation Results + +After applying the fix: + +- ✅ SSH Key Authentication: Works perfectly +- ✅ Password Authentication: Works perfectly +- ✅ All Cloud-Init Features: Docker, UFW, packages, etc. - ALL WORKING +- ✅ Integration Tests: Complete test suite passes +- ✅ Make Commands: Standard workflow (`make init`, `make plan`, `make apply`) works + +## Files in This Directory + +### Core Documentation + +- `SSH_BUG_ANALYSIS.md` - Initial analysis and hypothesis formation +- `SSH_BUG_SUMMARY.md` - Complete investigation summary with detailed timeline + +### Test Artifacts + +- `test-configs/` - All 16 test configurations used during incremental testing + - `user-data-test-1.1.yaml.tpl` through `user-data-test-15.1.yaml.tpl` + - `user-data-test-header.yaml.tpl` - Final test that confirmed the fix + +### Validation + +- `validation/` - (Currently empty, reserved for future validation scripts) + +## Lessons Learned + +1. **Cloud-init requires specific headers**: `#cloud-config` is mandatory, not `---` +2. **Incremental testing is powerful**: Systematic approach isolated the issue effectively +3. **Template variable validation**: Always verify that template variables are being substituted correctly +4. **Integration testing is crucial**: End-to-end testing revealed the full scope of the issue + +## Prevention + +To prevent similar issues: + +- Always use `#cloud-config` as the first line in cloud-init files +- Test template variable substitution in terraform plans +- Run integration tests after any cloud-init configuration changes +- Use the documented make workflow for deployments + +## Related Issues + +This fix resolves SSH access problems that were preventing users from following +the integration testing guide and deploying the Torrust Tracker Demo +successfully. + +## Technical Details + +For complete technical details, debugging methodology, and step-by-step +investigation process, see: + +- [SSH_BUG_ANALYSIS.md](SSH_BUG_ANALYSIS.md) - Initial investigation +- [SSH_BUG_SUMMARY.md](SSH_BUG_SUMMARY.md) - Comprehensive analysis with timeline diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/SSH_BUG_ANALYSIS.md b/infrastructure/docs/bugs/001-ssh-authentication-failure/SSH_BUG_ANALYSIS.md new file mode 100644 index 0000000..2c584a0 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/SSH_BUG_ANALYSIS.md @@ -0,0 +1,132 @@ + + +# SSH Authentication Bug Analysis - Cloud-Init Configuration + +## Problem Summary + +The full cloud-init configuration (`user-data.yaml.tpl`) for the Torrust Tracker Demo VM causes SSH authentication failures. Both SSH key and password authentication are denied, preventing access to the deployed VM. + +## Current Status + +- **Baseline**: Minimal config works perfectly (SSH key + password auth) +- **Problem**: Full config breaks SSH completely (connection refused/denied) +- **Goal**: Identify the exact component causing SSH failure + +## Test Results Summary + +### ✅ Working Configurations (SSH Access Confirmed) + +| Test | Description | Config File | SSH Key | SSH Password | Notes | +| ------------ | ---------------------- | --------------------------------- | ------- | ------------ | ------------------ | +| Baseline | Minimal config | `user-data-minimal.yaml.tpl` | ✅ | ✅ | Perfect baseline | +| Test 1.1 | Switch to torrust user | `user-data-test-1.1.yaml.tpl` | ✅ | ✅ | User config OK | +| Test 2.1 | Add basic packages | `user-data-test-2.1.yaml.tpl` | ✅ | ✅ | Package install OK | +| Test 3.1/3.2 | SSH config + restart | `user-data-test-3.1/3.2.yaml.tpl` | ✅ | ✅ | SSH config OK | +| Test 5.1 | Add UFW firewall | `user-data-test-5.1.yaml.tpl` | ✅ | ✅ | UFW rules OK | +| Test 7.1 | Add reboot | `user-data-test-7.1.yaml.tpl` | ✅ | ✅ | Reboot OK | + +### ❌ Failing Configuration + +| Test | Description | Config File | SSH Key | SSH Password | Notes | +| ---- | --------------- | -------------------- | ------- | ------------ | ---------------------- | +| Full | Complete config | `user-data.yaml.tpl` | ❌ | ❌ | Both auth methods fail | + +## Technical Analysis + +### Network Connectivity + +- VM gets IP address via DHCP (confirmed) +- SSH port 22 is open (nmap confirms) +- UFW is not blocking SSH (rules allow port 22) +- SSH daemon is running (telnet connects to port 22) + +### SSH Daemon Status + +- SSH service is active and running +- Port 22 is listening +- However, authentication is denied for both methods +- Error: "Permission denied (publickey,password)" + +### What We've Ruled Out + +1. **Network/Firewall**: UFW allows SSH, port is open +2. **SSH Service**: Daemon is running and accepting connections +3. **User Configuration**: torrust user exists with proper groups +4. **Basic Packages**: Standard package installation doesn't break SSH +5. **Reboot**: System reboot doesn't affect SSH access + +## Suspect Components (Not Yet Tested) + +Based on the difference between working Test 7.1 and failing full config: + +### 1. **fail2ban** (HIGH PRIORITY) + +- **Risk**: Could be blocking SSH attempts +- **Mechanism**: Might ban localhost/initial connections +- **Test needed**: Add fail2ban to working config + +### 2. **Docker Installation/Configuration** (HIGH PRIORITY) + +- **Risk**: Docker daemon.json or service conflicts +- **Mechanism**: Could affect networking or SSH service +- **Test needed**: Add Docker components separately + +### 3. **sysctl Network Tuning** (MEDIUM PRIORITY) + +- **Risk**: Network parameter changes could affect SSH +- **Mechanism**: TCP/networking tweaks might break SSH +- **Test needed**: Add sysctl configuration + +### 4. **unattended-upgrades** (LOW PRIORITY) + +- **Risk**: Could trigger system changes during boot +- **Mechanism**: Background updates might conflict +- **Test needed**: Add unattended-upgrades config + +### 5. **Service Restart Timing** (MEDIUM PRIORITY) + +- **Risk**: Docker restart might affect SSH +- **Mechanism**: Service interdependencies +- **Test needed**: Add Docker restart commands + +## Testing Strategy + +### Phase 1: Individual Component Testing + +1. Test 8.1: Add fail2ban to working config +2. Test 8.2: Add Docker daemon.json to working config +3. Test 8.3: Add sysctl settings to working config +4. Test 8.4: Add unattended-upgrades to working config +5. Test 8.5: Add Docker service restarts to working config + +### Phase 2: Combination Testing + +- If individual components work, test combinations +- Build up to full config systematically + +### Phase 3: Detailed Investigation + +- If issue persists, examine logs in detail +- Check cloud-init logs, SSH logs, system logs +- Use VM console access for debugging + +## Next Steps + +1. ✅ **Document findings** (this file) +2. 🔄 **Create incremental test configs** for suspect components +3. 🔄 **Test each component individually** +4. 🔄 **Identify the breaking component** +5. 🔄 **Fix or work around the issue** + +## Expected Outcome + +We expect to identify a single component (most likely fail2ban or Docker configuration) that breaks SSH authentication. Once identified, we can either: + +- Fix the component's configuration +- Reorder the installation/configuration steps +- Work around the issue with alternative approaches + +--- + +_Analysis Date: July 4, 2025_ +_Last Updated: Initial analysis_ diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/SSH_BUG_SUMMARY.md b/infrastructure/docs/bugs/001-ssh-authentication-failure/SSH_BUG_SUMMARY.md new file mode 100644 index 0000000..60da55a --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/SSH_BUG_SUMMARY.md @@ -0,0 +1,259 @@ + + +# SSH Authentication Bug Analysis Summary + +**Date:** July 4, 2025 +**Status:** ✅ RESOLVED - ROOT CAUSE CONFIRMED + +## Problem Description + +The full cloud-init configuration (`user-data.yaml.tpl`) for the Torrust Tracker +Demo VM causes SSH authentication failures for both SSH key and password +authentication. The issue manifests as: + +- SSH connection attempts time out or are rejected +- Both SSH key authentication and password authentication fail +- VM appears to be running normally (gets IP, port 22 is open, SSH daemon is + running) +- UFW firewall shows SSH is allowed + +## ROOT CAUSE IDENTIFIED AND CONFIRMED ✅ + +**CONFIRMED**: The YAML document start marker ("---") was causing cloud-init to +process the configuration incorrectly, leading to SSH authentication failures. + +**EVIDENCE**: + +- **user-data.yaml.tpl** (BROKEN): Uses "---" as the first line → SSH + authentication fails +- **user-data-test-header.yaml.tpl** (FIXED): Uses "#cloud-config" as the first + line → SSH authentication works perfectly + +**VALIDATION RESULTS**: + +- ✅ SSH Key Authentication: Works perfectly +- ✅ Password Authentication: Works perfectly (password: torrust123) +- ✅ All cloud-init features: Applied correctly (Docker, UFW, packages, etc.) + +**CONCLUSION**: The cloud-init parser requires "#cloud-config" as the first +line, not the YAML document start marker "---". Using "---" causes the entire +configuration to be misprocessed, breaking SSH setup while other features may +still work partially. + +## Current Knowledge + +### Working Components (Confirmed through incremental testing) + +1. **Basic user setup** (`user-data-minimal.yaml.tpl`) - SSH ✅ +2. **torrust user creation** (`user-data-test-1.1.yaml.tpl`) - SSH ✅ +3. **Basic packages installation** (`user-data-test-2.1.yaml.tpl`) - SSH ✅ +4. **SSH configuration and restart** (`user-data-test-3.1.yaml.tpl`, + `user-data-test-3.2.yaml.tpl`) - SSH ✅ +5. **UFW firewall configuration** (`user-data-test-5.1.yaml.tpl`) - SSH ✅ +6. **System reboot** (`user-data-test-7.1.yaml.tpl`) - SSH ✅ +7. **Fail2ban** (`user-data-test-8.1.yaml.tpl`) - SSH ✅ +8. **Docker installation and configuration** (`user-data-test-9.1.yaml.tpl`) - SSH ✅ +9. **Sysctl network optimizations** (`user-data-test-10.1.yaml.tpl`) - SSH ✅ +10. **Unattended-upgrades** (`user-data-test-11.1.yaml.tpl`) - SSH ✅ +11. **Torrust packages** (`user-data-test-12.1.yaml.tpl`) - SSH ✅ +12. **Docker Compose V2** (`user-data-test-13.1.yaml.tpl`) - SSH ✅ +13. **UFW additional rules** (`user-data-test-14.1.yaml.tpl`) - SSH ✅ +14. **Docker restart** (`user-data-test-15.1.yaml.tpl`) - SSH ✅ + +### Suspect Components (Not yet isolated) + +Based on the difference between the last working config +(`user-data-test-7.1.yaml.tpl`) and the full config (`user-data.yaml.tpl`), +the following components are suspects: + +1. **fail2ban** - Could be blocking SSH connections +2. **Docker installation and configuration** - Could interfere with networking +3. **sysctl network optimizations** - Could affect SSH connections +4. **unattended-upgrades** - Could interfere during setup +5. **Docker daemon restart** - Could cause timing issues + +## Testing Methodology + +Using incremental testing approach: + +- Start with last known working config (`user-data-test-7.1.yaml.tpl`) +- Add one suspect component at a time +- Test SSH after each addition +- Identify the exact component that breaks SSH + +## Test Results So Far + +| Config | Components Added | SSH Key | SSH Password | Status | +| ------------ | ------------------------- | ------- | ------------ | ---------- | +| minimal | ubuntu user only | ✅ | ✅ | Working | +| test-1.1 | + torrust user | ✅ | ✅ | Working | +| test-2.1 | + basic packages | ✅ | ✅ | Working | +| test-3.1/3.2 | + SSH config/restart | ✅ | ✅ | Working | +| test-5.1 | + UFW firewall | ✅ | ✅ | Working | +| test-7.1 | + reboot | ✅ | ✅ | Working | +| test-8.1 | + fail2ban | ✅ | ✅ | Working | +| test-9.1 | + Docker | ✅ | ✅ | Working | +| test-10.1 | + sysctl optimizations | ✅ | ✅ | Working | +| test-11.1 | + unattended-upgrades | ✅ | ✅ | Working | +| test-12.1 | + Torrust packages | ✅ | ✅ | Working | +| test-13.1 | + Docker Compose V2 | ✅ | ✅ | Working | +| test-14.1 | + UFW additional rules | ✅ | ✅ | Working | +| test-15.1 | + Docker restart | ✅ | ✅ | Working | +| **full** | + ALL COMPONENTS COMBINED | ❌ | ❌ | **BROKEN** | + +## CRITICAL DISCOVERY - CONFIRMED! + +🚨 **ALL INDIVIDUAL COMPONENTS WORK!** 🚨 +✅ **FULL CONFIGURATION FAILS!** ✅ + +**CONFIRMATION TEST RESULTS:** + +- **Full Config VM IP:** 192.168.122.6 +- **SSH Key Authentication:** ❌ Permission denied (publickey) +- **SSH Password Authentication:** ❌ Permission denied (publickey) +- **Port 22 Status:** ✅ Open and listening +- **SSH Daemon:** ✅ Running + +This **confirms our hypothesis** that the SSH failure is NOT caused by any +individual component, but rather by the combination of all components together. + +We have systematically tested **EVERY SINGLE COMPONENT** from the full configuration +individually, and they all work perfectly. This means the SSH failure is NOT caused by +any individual component, but rather by: + +1. **Component interactions** - Multiple components interfering with each other +2. **Timing issues** - Race conditions between services during startup +3. **Configuration ordering** - The sequence of operations matters +4. **Cumulative effects** - The combination of all components together + +## Next Steps + +1. **Test fail2ban** - Add fail2ban package and default config to test-7.1 ✅ **PASSED** +2. **Test Docker** - Add Docker installation and configuration ✅ **PASSED** +3. **Test sysctl** - Add network optimizations ✅ **PASSED** +4. **Test unattended-upgrades** - Add automatic updates configuration ✅ **PASSED** +5. **Test Torrust packages** - Add pkg-config, libssl-dev, make, build-essential, + libsqlite3-dev, sqlite3 ✅ **PASSED** +6. **Test Docker Compose installation** - Add Docker Compose V2 plugin installation ✅ **PASSED** +7. **Test additional UFW rules** - Add Torrust-specific firewall rules ✅ **PASSED** +8. **Test Docker restart** - Add Docker daemon restart command ✅ **PASSED** + +## NEW INVESTIGATION STRATEGY + +Since all individual components work, we need to investigate: + +1. **Test exact full configuration** - Deploy the exact full config and debug +2. **Compare configurations** - Find subtle differences between working incremental tests and full config +3. **Timing analysis** - Investigate service startup timing and dependencies +4. **Component interaction analysis** - Test combinations of components + +## Hypotheses - UPDATED AFTER DISCOVERY + +**ALL INDIVIDUAL COMPONENTS HAVE BEEN RULED OUT!** + +1. **fail2ban blocking SSH** - ❌ **RULED OUT** - Test 8.1 passed +2. **Docker network interference** - ❌ **RULED OUT** - Test 9.1 passed +3. **sysctl optimizations** - ❌ **RULED OUT** - Test 10.1 passed +4. **unattended-upgrades** - ❌ **RULED OUT** - Test 11.1 passed +5. **Additional Torrust packages** - ❌ **RULED OUT** - Test 12.1 passed +6. **Docker Compose installation** - ❌ **RULED OUT** - Test 13.1 passed +7. **Additional UFW rules** - ❌ **RULED OUT** - Test 14.1 passed +8. **Docker restart command** - ❌ **RULED OUT** - Test 15.1 passed + +**NEW HYPOTHESES - ROOT CAUSE ANALYSIS:** + +1. **Component interactions** - ⚠️ **LIKELY** - Multiple components interfering +2. **Timing issues** - ⚠️ **LIKELY** - Race conditions during startup +3. **Service dependencies** - ⚠️ **LIKELY** - Services starting in wrong order +4. **Cumulative resource usage** - ⚠️ **POSSIBLE** - Memory/CPU constraints +5. **Configuration file conflicts** - ⚠️ **POSSIBLE** - Overlapping configs +6. **SSH service restart timing** - ⚠️ **POSSIBLE** - SSH restart conflicts with other services + +## Technical Details + +- **VM Environment**: libvirt/KVM with Ubuntu 24.04 cloud image +- **SSH Configuration**: Both key and password authentication enabled +- **Network**: UFW firewall with SSH explicitly allowed +- **Testing Tools**: ssh, sshpass, nc, virsh net-dhcp-leases + +## Files Created + +- `user-data-minimal.yaml.tpl` - Baseline working config +- `user-data-test-1.1.yaml.tpl` - + torrust user +- `user-data-test-2.1.yaml.tpl` - + basic packages +- `user-data-test-3.1.yaml.tpl` - + SSH config +- `user-data-test-3.2.yaml.tpl` - + SSH restart +- `user-data-test-5.1.yaml.tpl` - + UFW firewall +- `user-data-test-7.1.yaml.tpl` - + reboot +- `user-data.yaml.tpl` - Full config (broken) + +## Current Action + +Creating incremental tests to isolate the exact component causing SSH failure. + +## 🎉 FINAL RESOLUTION AND SUCCESS ✅ + +**DATE:** July 4, 2025 +**STATUS:** ✅ COMPLETELY RESOLVED + +### Root Cause Confirmed + +The SSH authentication failure in the Torrust Tracker Demo VM was caused by **the YAML document start marker (`---`) at the beginning of the cloud-init configuration file**. + +### The Fix + +**Simple but Critical Change:** + +```yaml +# BEFORE (BROKEN): +--- +# cloud-config + +# AFTER (FIXED): +#cloud-config +``` + +### Validation Results + +**Fresh deployment using make commands:** + +1. `make destroy` - Clean slate +2. `make init` - Initialize OpenTofu +3. `make plan` - Verified SSH key templating is correct +4. `make apply` - Deployed fresh VM + +**Authentication Test Results:** + +- ✅ **SSH Key Authentication**: `ssh torrust@192.168.122.172` - SUCCESS +- ✅ **Password Authentication**: `sshpass -p 'torrust123' ssh torrust@192.168.122.172` - SUCCESS +- ✅ **All Cloud-Init Features**: Docker, UFW, packages, etc. - ALL WORKING + +### Technical Details + +**The Problem:** + +- Cloud-init parser expects `#cloud-config` as the first line +- Using YAML document start marker `---` causes the entire configuration to be misprocessed +- This breaks SSH key templating (`${ssh_public_key}` becomes `None`) +- Results in empty `ssh_authorized_keys` and authentication failures + +**The Solution:** + +- Replace `---` with `#cloud-config` at the beginning of `user-data.yaml.tpl` +- This ensures proper cloud-init parsing and SSH key templating +- All other cloud-init features continue to work correctly + +### Impact + +This fix resolves the SSH authentication issue that was preventing users from accessing the Torrust Tracker Demo VM. The infrastructure is now working as designed with both SSH key and password authentication enabled. + +**Files Fixed:** + +- `infrastructure/cloud-init/user-data.yaml.tpl` - Header changed from `---` to `#cloud-config` + +**Deployment Method:** + +- Standard make commands work perfectly: `make init`, `make plan`, `make apply` +- Integration testing workflow is fully operational + +## ROOT CAUSE IDENTIFIED AND CONFIRMED ✅ diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-1.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-1.1.yaml.tpl new file mode 100644 index 0000000..ffa61bb --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-1.1.yaml.tpl @@ -0,0 +1,40 @@ +#cloud-config +# Test 1.1: Change from testuser to torrust user +# Based on minimal config + torrust user configuration + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration - CHANGED: torrust user instead of testuser +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-10.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-10.1.yaml.tpl new file mode 100644 index 0000000..20ea273 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-10.1.yaml.tpl @@ -0,0 +1,182 @@ +#cloud-config +# Test 10.1: Add sysctl network optimizations (SUSPECT TEST) +# Based on Test 9.1 + sysctl network optimizations + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, and Docker +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ufw + - fail2ban + - docker.io + - ca-certificates + - gnupg + - lsb-release + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # fail2ban configuration + - path: /etc/fail2ban/jail.local + content: | + [DEFAULT] + # Default ban time (10 minutes) + bantime = 600 + # Find time window (10 minutes) + findtime = 600 + # Max retries before ban + maxretry = 5 + # Backend to use + backend = systemd + + [sshd] + enabled = true + port = ssh + filter = sshd + logpath = /var/log/auth.log + maxretry = 5 + bantime = 600 + findtime = 600 + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance (NEW - SUSPECT) + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw --force enable + + # Configure and start fail2ban service + - systemctl enable fail2ban + - systemctl start fail2ban + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + + # Apply sysctl settings (NEW - SUSPECT) + - sysctl -p /etc/sysctl.d/99-torrust.conf + +# Final message +final_message: | + Test 10.1 VM setup completed! + SSH Access: ssh torrust@VM_IP or sshpass -p 'torrust123' ssh torrust@VM_IP + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-11.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-11.1.yaml.tpl new file mode 100644 index 0000000..a10c5eb --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-11.1.yaml.tpl @@ -0,0 +1,189 @@ +#cloud-config +# Test 11.1: Add unattended-upgrades configuration (SUSPECT TEST) +# Based on Test 10.1 + unattended-upgrades package and configuration + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, Docker, and unattended-upgrades +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ufw + - fail2ban + - docker.io + - ca-certificates + - gnupg + - lsb-release + - unattended-upgrades + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # fail2ban configuration + - path: /etc/fail2ban/jail.local + content: | + [DEFAULT] + # Default ban time (10 minutes) + bantime = 600 + # Find time window (10 minutes) + findtime = 600 + # Max retries before ban + maxretry = 5 + # Backend to use + backend = systemd + + [sshd] + enabled = true + port = ssh + filter = sshd + logpath = /var/log/auth.log + maxretry = 5 + bantime = 600 + findtime = 600 + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw --force enable + + # Configure and start fail2ban service + - systemctl enable fail2ban + - systemctl start fail2ban + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates (NEW - SUSPECT) + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + +# Final message +final_message: | + Test 11.1 VM setup completed! + SSH Access: ssh torrust@VM_IP or sshpass -p 'torrust123' ssh torrust@VM_IP + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-12.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-12.1.yaml.tpl new file mode 100644 index 0000000..cfcbd3a --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-12.1.yaml.tpl @@ -0,0 +1,167 @@ +#cloud-config +# Test 12.1: Add additional Torrust-specific packages +# Based on Test 11.1 + additional packages (pkg-config, libssl-dev, make, build-essential, libsqlite3-dev, sqlite3) + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, Docker, unattended-upgrades, and Torrust-specific packages +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ca-certificates + - gnupg + - lsb-release + - ufw + - fail2ban + - unattended-upgrades + - docker.io + # Torrust Tracker dependencies for future source compilation + # Currently using Docker, but planning to compile from source for better performance + - pkg-config + - libssl-dev + - make + - build-essential + - libsqlite3-dev + - sqlite3 + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw allow 80/tcp + - ufw allow 443/tcp + - ufw --force enable + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + +# Final message +final_message: | + Test 12.1: Torrust Tracker Demo VM with additional packages + + Added packages: pkg-config, libssl-dev, make, build-essential, libsqlite3-dev, sqlite3 + + SSH Access: + - SSH Key: ssh torrust@VM_IP + - Password: sshpass -p 'torrust123' ssh torrust@VM_IP + + Testing additional Torrust dependencies... + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-13.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-13.1.yaml.tpl new file mode 100644 index 0000000..c3a0be1 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-13.1.yaml.tpl @@ -0,0 +1,178 @@ +#cloud-config +# Test 13.1: Add Docker Compose V2 plugin installation +# Based on Test 12.1 + Docker Compose V2 plugin installation + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, Docker, unattended-upgrades, and Torrust-specific packages +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ca-certificates + - gnupg + - lsb-release + - ufw + - fail2ban + - unattended-upgrades + - docker.io + # Torrust Tracker dependencies for future source compilation + # Currently using Docker, but planning to compile from source for better performance + - pkg-config + - libssl-dev + - make + - build-essential + - libsqlite3-dev + - sqlite3 + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin (compatible with compose.yaml format) + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw allow 80/tcp + - ufw allow 443/tcp + - ufw --force enable + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + +# Final message +final_message: | + Test 13.1: Torrust Tracker Demo VM with Docker Compose V2 plugin + + Added: Docker Compose V2 plugin installation + + SSH Access: + - SSH Key: ssh torrust@VM_IP + - Password: sshpass -p 'torrust123' ssh torrust@VM_IP + + Testing Docker Compose V2 plugin installation... + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-14.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-14.1.yaml.tpl new file mode 100644 index 0000000..f3695de --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-14.1.yaml.tpl @@ -0,0 +1,183 @@ +#cloud-config +# Test 14.1: Add additional UFW rules for Torrust-specific ports +# Based on Test 13.1 + additional UFW rules (6868/udp, 6969/udp, 7070/tcp, 1212/tcp) + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, Docker, unattended-upgrades, and Torrust-specific packages +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ca-certificates + - gnupg + - lsb-release + - ufw + - fail2ban + - unattended-upgrades + - docker.io + # Torrust Tracker dependencies for future source compilation + # Currently using Docker, but planning to compile from source for better performance + - pkg-config + - libssl-dev + - make + - build-essential + - libsqlite3-dev + - sqlite3 + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin (compatible with compose.yaml format) + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw allow 80/tcp + - ufw allow 443/tcp + # Additional Torrust-specific ports + - ufw allow 6868/udp + - ufw allow 6969/udp + - ufw allow 7070/tcp + - ufw allow 1212/tcp + - ufw --force enable + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + +# Final message +final_message: | + Test 14.1: Torrust Tracker Demo VM with additional UFW rules + + Added: Additional UFW rules for Torrust ports (6868/udp, 6969/udp, 7070/tcp, 1212/tcp) + + SSH Access: + - SSH Key: ssh torrust@VM_IP + - Password: sshpass -p 'torrust123' ssh torrust@VM_IP + + Testing additional UFW firewall rules... + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-15.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-15.1.yaml.tpl new file mode 100644 index 0000000..65d179e --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-15.1.yaml.tpl @@ -0,0 +1,187 @@ +#cloud-config +# Test 15.1: Add Docker daemon restart command +# Based on Test 14.1 + Docker daemon restart command (systemctl restart docker) + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, Docker, unattended-upgrades, and Torrust-specific packages +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ca-certificates + - gnupg + - lsb-release + - ufw + - fail2ban + - unattended-upgrades + - docker.io + # Torrust Tracker dependencies for future source compilation + # Currently using Docker, but planning to compile from source for better performance + - pkg-config + - libssl-dev + - make + - build-essential + - libsqlite3-dev + - sqlite3 + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin (compatible with compose.yaml format) + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw allow 80/tcp + - ufw allow 443/tcp + # Additional Torrust-specific ports + - ufw allow 6868/udp + - ufw allow 6969/udp + - ufw allow 7070/tcp + - ufw allow 1212/tcp + - ufw --force enable + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + + # Set up log rotation for Docker - RESTART DOCKER DAEMON + # THIS IS THE SUSPECTED COMPONENT THAT BREAKS SSH! + - systemctl restart docker + +# Final message +final_message: | + Test 15.1: Torrust Tracker Demo VM with Docker daemon restart + + Added: Docker daemon restart command (systemctl restart docker) + + SSH Access: + - SSH Key: ssh torrust@VM_IP + - Password: sshpass -p 'torrust123' ssh torrust@VM_IP + + Testing Docker daemon restart command - THE SUSPECTED SSH BREAKER! + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-2.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-2.1.yaml.tpl new file mode 100644 index 0000000..a468356 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-2.1.yaml.tpl @@ -0,0 +1,53 @@ +#cloud-config +# Test 2.1: Add basic packages +# Based on Test 1.1 + basic package installation + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations - ADDED +package_update: true +package_upgrade: true + +# Install basic packages - ADDED +packages: + - curl + - wget + - git + - htop + - vim + - net-tools diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-3.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-3.1.yaml.tpl new file mode 100644 index 0000000..57366b6 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-3.1.yaml.tpl @@ -0,0 +1,63 @@ +#cloud-config +# Test 3.1: Add SSH configuration file +# Based on Test 2.1 + SSH daemon configuration + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install basic packages +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + +# System configuration files - ADDED +write_files: + # SSH configuration to enable password authentication - ADDED + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-3.2.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-3.2.yaml.tpl new file mode 100644 index 0000000..76ead7a --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-3.2.yaml.tpl @@ -0,0 +1,73 @@ +#cloud-config +# Test 3.2: Add SSH service restart commands +# Based on Test 3.1 + SSH service restart + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install basic packages +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + +# Commands to run after package installation - ADDED +runcmd: + # Create torrust user directories - ADDED + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) - ADDED + - systemctl restart sshd + - systemctl enable ssh diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-5.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-5.1.yaml.tpl new file mode 100644 index 0000000..03d3d6c --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-5.1.yaml.tpl @@ -0,0 +1,90 @@ +#cloud-config +# Test 5.1: Add UFW firewall configuration (CRITICAL TEST) +# Based on Test 3.2 + UFW firewall setup + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW - ADDED +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ufw + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # UFW basic configuration - ADDED + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) - ADDED + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw --force enable diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-7.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-7.1.yaml.tpl new file mode 100644 index 0000000..276d8c0 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-7.1.yaml.tpl @@ -0,0 +1,102 @@ +#cloud-config +# Test 7.1: Add reboot configuration (CRITICAL TEST) +# Based on Test 5.1 + reboot after setup + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ufw + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw --force enable + +# Final message +final_message: | + Test 7.1 VM setup completed! + SSH Access: ssh torrust@VM_IP or sshpass -p 'torrust123' ssh torrust@VM_IP + +# Power state - reboot after setup - ADDED (CRITICAL TEST) +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-8.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-8.1.yaml.tpl new file mode 100644 index 0000000..9debb9e --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-8.1.yaml.tpl @@ -0,0 +1,131 @@ +#cloud-config +# Test 8.1: Add fail2ban configuration (SUSPECT TEST) +# Based on Test 7.1 + fail2ban package and basic configuration + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW and fail2ban +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ufw + - fail2ban + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # fail2ban configuration + - path: /etc/fail2ban/jail.local + content: | + [DEFAULT] + # Default ban time (10 minutes) + bantime = 600 + # Find time window (10 minutes) + findtime = 600 + # Max retries before ban + maxretry = 5 + # Backend to use + backend = systemd + + [sshd] + enabled = true + port = ssh + filter = sshd + logpath = /var/log/auth.log + maxretry = 5 + bantime = 600 + findtime = 600 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw --force enable + + # Configure and start fail2ban service + - systemctl enable fail2ban + - systemctl start fail2ban + +# Final message +final_message: | + Test 8.1 VM setup completed! + SSH Access: ssh torrust@VM_IP or sshpass -p 'torrust123' ssh torrust@VM_IP + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-9.1.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-9.1.yaml.tpl new file mode 100644 index 0000000..46cbdae --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-9.1.yaml.tpl @@ -0,0 +1,164 @@ +#cloud-config +# Test 9.1: Add Docker installation and configuration (SUSPECT TEST) +# Based on Test 8.1 + Docker package and basic configuration + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages including UFW, fail2ban, and Docker +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ufw + - fail2ban + - docker.io + - ca-certificates + - gnupg + - lsb-release + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # fail2ban configuration + - path: /etc/fail2ban/jail.local + content: | + [DEFAULT] + # Default ban time (10 minutes) + bantime = 600 + # Find time window (10 minutes) + findtime = 600 + # Max retries before ban + maxretry = 5 + # Backend to use + backend = systemd + + [sshd] + enabled = true + port = ssh + filter = sshd + logpath = /var/log/auth.log + maxretry = 5 + bantime = 600 + findtime = 600 + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw --force enable + + # Configure and start fail2ban service + - systemctl enable fail2ban + - systemctl start fail2ban + + # Configure Docker (NEW - SUSPECT) + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin (NEW - SUSPECT) + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + +# Final message +final_message: | + Test 9.1 VM setup completed! + SSH Access: ssh torrust@VM_IP or sshpass -p 'torrust123' ssh torrust@VM_IP + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-header.yaml.tpl b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-header.yaml.tpl new file mode 100644 index 0000000..eadb877 --- /dev/null +++ b/infrastructure/docs/bugs/001-ssh-authentication-failure/test-configs/user-data-test-header.yaml.tpl @@ -0,0 +1,184 @@ +#cloud-config +# Test: Full config with corrected header format (remove YAML document start marker) +# Testing if the "---" YAML document start marker is causing the SSH failure + +# Basic system configuration +hostname: torrust-tracker-demo +locale: en_US.UTF-8 +timezone: UTC + +# User configuration +users: + - name: torrust + groups: + [ + adm, + audio, + cdrom, + dialout, + dip, + floppy, + lxd, + netdev, + plugdev, + sudo, + video, + ] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + lock_passwd: false + ssh_authorized_keys: + - ${ssh_public_key} + +# Set password using chpasswd (most reliable method) +chpasswd: + list: | + torrust:torrust123 + expire: false + +# Enable SSH password authentication for debugging +ssh_pwauth: true + +# Package updates and installations +package_update: true +package_upgrade: true + +# Install packages (verified working order) +packages: + - curl + - wget + - git + - htop + - vim + - net-tools + - ca-certificates + - gnupg + - lsb-release + - ufw + - fail2ban + - unattended-upgrades + - docker.io + # Torrust Tracker dependencies for future source compilation + # Currently using Docker, but planning to compile from source for better performance + - pkg-config + - libssl-dev + - make + - build-essential + - libsqlite3-dev + - sqlite3 + +# System configuration files +write_files: + # SSH configuration to enable password authentication + - path: /etc/ssh/sshd_config.d/50-cloud-init.conf + content: | + PasswordAuthentication yes + PubkeyAuthentication yes + permissions: "0644" + owner: root:root + + # Docker daemon configuration + - path: /etc/docker/daemon.json + content: | + { + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } + } + permissions: "0644" + owner: root:root + + # UFW basic configuration + - path: /etc/ufw/ufw.conf + content: | + ENABLED=yes + LOGLEVEL=low + permissions: "0644" + owner: root:root + + # Sysctl optimizations for network performance + - path: /etc/sysctl.d/99-torrust.conf + content: | + # Network optimizations for BitTorrent tracker + net.core.rmem_max = 268435456 + net.core.wmem_max = 268435456 + net.core.netdev_max_backlog = 5000 + net.ipv4.tcp_rmem = 4096 65536 16777216 + net.ipv4.tcp_wmem = 4096 65536 16777216 + net.ipv4.tcp_congestion_control = bbr + net.ipv4.ip_local_port_range = 1024 65535 + net.core.somaxconn = 1024 + permissions: "0644" + owner: root:root + +# Commands to run after package installation +runcmd: + # Create torrust user directories + - mkdir -p /home/torrust/github/torrust + - chown -R torrust:torrust /home/torrust/github + + # Configure SSH first (restart sshd with new config) + - systemctl restart sshd + - systemctl enable ssh + + # Configure Docker + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Install Docker Compose V2 plugin (compatible with compose.yaml format) + - mkdir -p /usr/local/lib/docker/cli-plugins + - > + curl -SL + "https://github.com/docker/compose/releases/download/v2.21.0/docker-compose-linux-x86_64" + -o /usr/local/lib/docker/cli-plugins/docker-compose + - chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + - > + ln -sf /usr/local/lib/docker/cli-plugins/docker-compose + /usr/local/bin/docker-compose + + # CRITICAL: Configure UFW firewall SAFELY (allow SSH BEFORE enabling) + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 22/tcp + - ufw allow 80/tcp + - ufw allow 443/tcp + - ufw allow 6868/udp + - ufw allow 6969/udp + - ufw allow 7070/tcp + - ufw allow 1212/tcp + - ufw --force enable + + # Apply sysctl settings + - sysctl -p /etc/sysctl.d/99-torrust.conf + + # Configure automatic security updates + - > + echo 'Unattended-Upgrade::Automatic-Reboot "false";' >> + /etc/apt/apt.conf.d/50unattended-upgrades + - systemctl enable unattended-upgrades + # Set up log rotation for Docker + - systemctl restart docker + +# Final message +final_message: | + Test: Full config with corrected header format + + Testing if the YAML document start marker (---) was causing SSH failure + + SSH Access: + - SSH Key: ssh torrust@VM_IP + - Password: sshpass -p 'torrust123' ssh torrust@VM_IP + + If this works, the root cause is the YAML document start marker! + +# Power state - reboot after setup +power_state: + mode: reboot + message: "Rebooting after initial setup" + timeout: 60 + condition: true diff --git a/infrastructure/docs/bugs/README.md b/infrastructure/docs/bugs/README.md new file mode 100644 index 0000000..dadc058 --- /dev/null +++ b/infrastructure/docs/bugs/README.md @@ -0,0 +1,133 @@ +# Bug Documentation Archive + +This directory contains comprehensive documentation for bugs that have been +investigated and resolved in the Torrust Tracker Demo infrastructure project. + +## Purpose + +The purpose of this archive is to: + +- **Preserve Investigation Process**: Document the complete debugging methodology + and thought process used to identify and resolve infrastructure issues +- **Enable Knowledge Transfer**: Provide detailed reference material for future + contributors who encounter similar problems +- **Improve Debugging Skills**: Demonstrate systematic approaches to + infrastructure troubleshooting +- **Prevent Regression**: Maintain test cases and validation procedures to + ensure fixes remain effective + +## Structure + +Each bug is documented in its own numbered directory following this convention: + +```text +infrastructure/docs/bugs/ +├── README.md # This file +├── 001-ssh-authentication-failure/ # First documented bug +│ ├── README.md # Bug overview and summary +│ ├── SSH_BUG_ANALYSIS.md # Initial analysis and hypothesis +│ ├── SSH_BUG_SUMMARY.md # Complete investigation summary +│ ├── test-configs/ # Test configurations used +│ │ ├── user-data-test-1.1.yaml.tpl +│ │ ├── user-data-test-2.1.yaml.tpl +│ │ └── ... +│ └── validation/ # Final validation artifacts +└── 002-next-bug/ # Future bug documentation + └── ... +``` + +## Documentation Standards + +When documenting a new bug, create a new numbered directory and include: + +### Required Files + +1. **README.md** - Bug overview with: + + - Problem description + - Root cause summary + - Fix applied + - Validation results + - References to related files + +2. **Analysis Documentation** - Detailed investigation process: + + - Initial symptoms and error messages + - Hypothesis formation and testing + - Step-by-step debugging methodology + - Dead ends and lessons learned + +3. **Test Artifacts** - Evidence and test cases: + - Configuration files used during testing + - Test scripts and validation procedures + - Before/after comparisons + - Reproducible test cases + +### Naming Conventions + +- **Directories**: Use format `NNN-short-description` (e.g., `001-ssh-authentication-failure`) +- **Files**: Use descriptive names with consistent prefixes: + - `ANALYSIS_` for investigation documentation + - `SUMMARY_` for comprehensive overviews + - `test-` for test configurations + - `validation-` for final verification artifacts + +### Content Guidelines + +- **Be Comprehensive**: Include all relevant information, even failed attempts +- **Document Process**: Explain the reasoning behind each debugging step +- **Include Context**: Provide enough background for newcomers to understand +- **Show Evidence**: Include relevant log outputs, error messages, and test results +- **Explain the Fix**: Detail exactly what was changed and why it works +- **Provide Validation**: Include steps to verify the fix and prevent regression + +## Usage Examples + +### For Contributors Encountering Similar Issues + +1. **Search by Symptoms**: Look through bug directories for similar error messages + or behavior patterns +2. **Review Methodology**: Study the debugging approach used in similar cases +3. **Adapt Test Procedures**: Use existing test configurations as templates +4. **Apply Lessons Learned**: Benefit from documented pitfalls and solutions + +### For Maintainers + +1. **Validate Fixes**: Use documented test cases to ensure fixes remain effective +2. **Onboard New Contributors**: Point to relevant bug documentation for learning +3. **Improve Infrastructure**: Identify patterns in bugs to prevent future issues +4. **Review Process**: Use documented methodologies to improve debugging practices + +## Quality Standards + +All bug documentation should: + +- ✅ Be reproducible by following the documented steps +- ✅ Include complete context and background information +- ✅ Demonstrate systematic debugging methodology +- ✅ Provide clear validation procedures +- ✅ Explain both what worked and what didn't work +- ✅ Include timing information and performance impacts +- ✅ Reference related infrastructure components + +## Contributing + +When adding new bug documentation: + +1. **Create New Directory**: Use next available number with descriptive name +2. **Follow Standards**: Use the structure and naming conventions above +3. **Include All Artifacts**: Don't leave out "failed" attempts or test files +4. **Write for Others**: Assume the reader is unfamiliar with the specific issue +5. **Validate Documentation**: Ensure someone else can follow your steps +6. **Update This README**: Add any new patterns or insights to these guidelines + +## Index of Documented Bugs + +| Bug ID | Description | Status | Impact | Date Resolved | +| ------ | -------------------------- | ----------- | ------------------------ | ------------- | +| 001 | SSH Authentication Failure | ✅ Resolved | High - Blocked VM access | 2025-07-04 | + +--- + +_This archive serves as a knowledge base for infrastructure debugging and should +be maintained as a valuable resource for the Torrust community._ diff --git a/infrastructure/docs/infrastructure-overview.md b/infrastructure/docs/infrastructure-overview.md new file mode 100644 index 0000000..8c74dff --- /dev/null +++ b/infrastructure/docs/infrastructure-overview.md @@ -0,0 +1,250 @@ +# Infrastructure Setup Summary + +This document summarizes the local testing infrastructure setup for the Torrust +Tracker Demo project. + +## 📁 What Was Created + +### Core Infrastructure Files + +```output +infrastructure/ +├── terraform/ +│ ├── main.tf # OpenTofu configuration for KVM/libvirt +│ ├── terraform.tfvars.example # Example configuration variables +│ └── .gitignore # Ignore generated files +├── cloud-init/ +│ ├── user-data.yaml # Main VM configuration +│ ├── meta-data.yaml # VM metadata template +│ └── network-config.yaml # Network configuration +└── README.md # Infrastructure overview +``` + +### Documentation + +```output +docs/infrastructure/ +├── quick-start.md # 5-minute setup guide +└── local-testing-setup.md # Complete setup documentation +``` + +### Testing Framework + +```output +tests/ +├── test-local-setup.sh # Infrastructure deployment tests +└── test-integration.sh # Torrust Tracker integration tests +``` + +### Automation + +```output +Makefile # Build automation and shortcuts +.github/workflows/infrastructure.yml # CI/CD validation +``` + +## 🎯 Capabilities + +### Local VM Testing + +- **KVM/libvirt virtualization** for local testing +- **Ubuntu 24.04 LTS** base image with cloud-init +- **Automated VM provisioning** with OpenTofu +- **Reproducible environments** identical to production + +### System Configuration + +- **Docker and Docker Compose** pre-installed +- **UFW firewall** configured with tracker ports +- **Performance optimizations** for BitTorrent traffic +- **Security hardening** with SSH keys and automatic updates + +### Testing Suite + +- **Prerequisites validation** - Check if tools are installed +- **Configuration syntax validation** - Validate OpenTofu and cloud-init +- **Infrastructure deployment tests** - Deploy, test, cleanup +- **Integration tests** - Full Torrust Tracker deployment validation + +### Developer Experience + +- **One-command setup** with `make dev-setup` +- **Simple deployment** with `make apply` +- **Easy SSH access** with `make ssh` +- **Comprehensive testing** with `make test` + +## 🚀 Getting Started + +### Quick Start (5 minutes) + +```bash +# 1. Install everything +make dev-setup + +# 2. Log out and back in for permissions + +# 3. Add your SSH key to infrastructure/cloud-init/user-data.yaml + +# 4. Test and deploy +make test-prereq +make apply +make ssh +``` + +### Full Test Suite + +```bash +# Run all tests (includes VM deployment) +make test + +# Or run integration tests on existing VM +make apply +make test-integration +make destroy +``` + +## 🔧 VM Specifications + +### Default Configuration + +- **OS**: Ubuntu 24.04 LTS +- **RAM**: 2GB (configurable) +- **CPU**: 2 cores (configurable) +- **Disk**: 20GB (configurable) +- **Network**: DHCP with port forwarding + +### Pre-installed Software + +- Docker 24.x with Docker Compose +- Git, curl, vim, htop, net-tools +- UFW firewall with fail2ban +- Automatic security updates + +### Network Ports (Pre-configured) + +- `22/tcp` - SSH access +- `80/tcp`, `443/tcp` - HTTP/HTTPS for proxy +- `6868/udp`, `6969/udp` - Torrust Tracker UDP (see [detailed port docs](../../application/docs/firewall-requirements.md#torrust-tracker-ports)) +- `7070/tcp` - Tracker HTTP API (see [detailed port docs](../../application/docs/firewall-requirements.md#torrust-tracker-ports)) +- `1212/tcp` - Metrics endpoint (see [detailed port docs](../../application/docs/firewall-requirements.md#torrust-tracker-ports)) +- `9090/tcp` - Prometheus (internal) +- `3100/tcp` - Grafana (internal) + +## 🧪 Test Coverage + +### Infrastructure Tests (`test-local-setup.sh`) + +✅ Prerequisites validation (OpenTofu, KVM, libvirt) +✅ Configuration syntax validation +✅ VM deployment and connectivity +✅ Docker and system services +✅ Network and firewall configuration + +### Integration Tests (`test-integration.sh`) + +✅ Torrust Tracker repository cloning +✅ Docker Compose service startup +✅ HTTP API endpoint testing +✅ Metrics endpoint validation +✅ Prometheus and Grafana health checks +✅ UDP tracker port verification + +### CI/CD Validation + +✅ OpenTofu configuration validation +✅ Cloud-init YAML syntax checking +✅ Documentation link validation +✅ Script permission verification + +## 🎉 Benefits + +### For Development + +- **Faster feedback** - Test changes locally before cloud deployment +- **Cost effective** - No cloud resources needed for development +- **Consistent environments** - Same config as production +- **Easy debugging** - Direct VM access and logs + +### For Operations + +- **Infrastructure as Code** - All configuration in version control +- **Automated testing** - Catch issues before deployment +- **Documentation** - Clear setup and troubleshooting guides +- **Reproducible** - Anyone can spin up identical environment + +### For CI/CD + +- **Validation pipeline** - Syntax and configuration checking +- **Test automation** - Automated deployment verification +- **Change confidence** - Know changes work before merging + +## 📈 Next Steps + +### Immediate Enhancements + +- [ ] Add SSL/TLS certificate testing +- [ ] Implement log aggregation testing +- [ ] Add backup/restore testing +- [ ] Create performance benchmarking + +### Advanced Features + +- [ ] Multi-VM testing (load balancer + multiple trackers) +- [ ] Network failure simulation +- [ ] Database migration testing +- [ ] Security vulnerability scanning + +### Production Readiness + +- [ ] Hetzner Cloud adaptation +- [ ] Terraform Cloud integration +- [ ] Monitoring and alerting setup +- [ ] Disaster recovery testing + +## 🤝 Usage Examples + +### Development Workflow + +```bash +# Make infrastructure changes +vim infrastructure/terraform/main.tf + +# Test locally +make test-syntax +make apply +make test-integration + +# Iterate +make destroy +# Repeat +``` + +### Testing Changes + +```bash +# Test specific components +make test-prereq # Check prerequisites +make test-syntax # Validate configs only +make deploy-test # Deploy without cleanup +make test-integration # Test Torrust Tracker +``` + +### Debugging Issues + +```bash +# Access VM directly +make ssh + +# Check VM console +make vm-console + +# View logs +make logs + +# Get VM info +make vm-info +``` + +This infrastructure setup provides a solid foundation for testing Torrust +Tracker deployments locally before moving to production environments like +Hetzner Cloud. diff --git a/infrastructure/docs/local-testing-setup.md b/infrastructure/docs/local-testing-setup.md new file mode 100644 index 0000000..27e5f1f --- /dev/null +++ b/infrastructure/docs/local-testing-setup.md @@ -0,0 +1,375 @@ +# Local Testing Infrastructure Setup + +This document describes how to set up a local testing environment for the +Torrust Tracker using OpenTofu and cloud-init with KVM/libvirt virtualization. + +## Prerequisites + +### System Requirements + +- Linux system (Ubuntu 20.04+ or similar) +- At least 4GB RAM (2GB will be allocated to the VM) +- 30GB free disk space +- Virtualization support enabled in BIOS/UEFI + +### Required Tools Installation + +#### 1. Install KVM/libvirt + +**Quick installation:** + +```bash +# Ubuntu/Debian +sudo apt update +sudo apt install -y qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils virt-manager + +# Add your user to libvirt group +sudo usermod -aG libvirt $USER +sudo usermod -aG kvm $USER + +# Start and enable libvirt service +sudo systemctl enable libvirtd +sudo systemctl start libvirtd + +# Log out and log back in for group changes to take effect +``` + +**For detailed instructions, troubleshooting, and other distributions, see:** +[libvirt Setup Guide](libvirt-setup.md) + +#### 2. Install OpenTofu + +```bash +# Download and install OpenTofu +curl -fsSL https://get.opentofu.org/install-opentofu.sh -o install-opentofu.sh +chmod +x install-opentofu.sh +sudo ./install-opentofu.sh --install-method deb + +# Verify installation +tofu version +``` + +#### 3. Verify KVM Setup + +```bash +# Check if KVM is working +sudo kvm-ok + +# Check libvirt status +sudo systemctl status libvirtd + +# Verify user permissions (should work without sudo after group setup) +virsh list --all + +# If the above fails, try with sudo to verify libvirt is working +sudo virsh list --all + +# Check if default network exists and is active +virsh net-list --all + +# If default network is not active, start it +virsh net-start default +virsh net-autostart default + +# Verify KVM module is loaded +lsmod | grep kvm + +# Check virtualization support +egrep -c '(vmx|svm)' /proc/cpuinfo # Should return > 0 + +# Test libvirt connection +virsh uri +``` + +**Troubleshooting libvirt group permissions:** + +If `virsh list` fails with permission errors: + +```bash +# Check current groups +groups + +# If libvirt group is not listed, re-add user and refresh session +sudo usermod -aG libvirt $USER + +# Option 1: Log out and log back in +# Option 2: Start new shell with libvirt group +newgrp libvirt + +# Option 3: Restart session +exec su -l $USER +``` + +## Configuration + +### 1. SSH Key Setup + +Before deploying, you need to add your SSH public key to the cloud-init configuration: + +```bash +# Generate SSH key if you don't have one +ssh-keygen -t rsa -b 4096 -C "your-email@example.com" + +# Copy your public key +cat ~/.ssh/id_rsa.pub +``` + +Edit `infrastructure/cloud-init/user-data.yaml` and replace the placeholder SSH +key with your actual public key. + +### 2. Customize VM Configuration (Optional) + +You can customize the VM specifications by editing variables in +`infrastructure/terraform/main.tf` or creating a `terraform.tfvars` file: + +```hcl +# infrastructure/terraform/terraform.tfvars +vm_name = "torrust-tracker-test" +vm_memory = 4096 # 4GB RAM +vm_vcpus = 4 # 4 CPU cores +vm_disk_size = 30 # 30GB disk +``` + +## Deployment + +### 1. Initialize OpenTofu + +```bash +cd infrastructure/terraform +tofu init +``` + +### 2. Plan the Deployment + +```bash +tofu plan +``` + +### 3. Deploy the VM + +```bash +tofu apply +``` + +The deployment will: + +- Download Ubuntu 24.04 cloud image +- Create a VM with specified resources +- Apply cloud-init configuration +- Set up basic system requirements +- Install Docker and configure firewall + +### 4. Connect to the VM + +After deployment completes, OpenTofu will output the VM's IP address: + +```bash +# SSH to the VM (replace IP with actual IP from output) +ssh torrust@ +``` + +## VM Features + +The deployed VM includes: + +### System Configuration + +- Ubuntu 24.04 LTS +- User `torrust` with sudo privileges +- SSH key authentication +- Automatic security updates enabled + +### Software Installed + +- Docker and Docker Compose +- Basic development tools (git, curl, vim, htop) +- Network utilities +- UFW firewall (configured) +- Fail2ban for SSH protection + +### Network Configuration + +- UFW firewall enabled with rules for: + - SSH (22/tcp) + - HTTP (80/tcp) and HTTPS (443/tcp) + - Torrust Tracker ports (6868/udp, 6969/udp, 7070/tcp, 1212/tcp) + - See [detailed port documentation](../../application/docs/firewall-requirements.md#torrust-tracker-ports) + +### Performance Optimizations + +- Network tuning for BitTorrent traffic +- Docker logging configured +- BBR congestion control enabled + +## Post-Deployment Steps + +### 1. Clone the Repository + +```bash +ssh torrust@ +cd /home/torrust/github/torrust +git clone https://github.com/torrust/torrust-tracker-demo.git +cd torrust-tracker-demo +``` + +### 2. Set up Environment + +```bash +# Copy and configure environment file +cp .env.production .env.local +# Edit .env.local as needed +``` + +### 3. Deploy Torrust Tracker + +```bash +# Start the services +docker compose up -d +``` + +## Management Commands + +### VM Lifecycle + +```bash +# Start VM +virsh start torrust-tracker-demo + +# Stop VM +virsh shutdown torrust-tracker-demo + +# Force stop VM +virsh destroy torrust-tracker-demo + +# Check VM status +virsh list --all + +# Get VM info +virsh dominfo torrust-tracker-demo +``` + +### OpenTofu Management + +```bash +# Show current state +tofu show + +# Destroy infrastructure +tofu destroy + +# Refresh state +tofu refresh +``` + +## Troubleshooting + +### Common Issues + +1. **libvirt permission errors** + + - Ensure your user is in the `libvirt` and `kvm` groups + - Log out and log back in after adding groups + +2. **AppArmor blocking libvirt-qemu (Permission denied errors)** + + - **Symptoms**: `Could not open '/path/to/file.qcow2': Permission denied` + - **Root cause**: AppArmor security policies restrict libvirt-qemu access + to storage directories + - **Solution**: Create AppArmor override (automatically done by our setup scripts) + + ```bash + # Manual fix if needed: + sudo mkdir -p /etc/apparmor.d/abstractions/libvirt-qemu.d + sudo tee /etc/apparmor.d/abstractions/libvirt-qemu.d/override << 'EOF' + # AppArmor override for libvirt-qemu storage access + # Fixes terraform-provider-libvirt permission issues + /var/lib/libvirt/images/** rwk, + /home/*/libvirt/images/** rwk, + EOF + sudo systemctl restart apparmor + + # Ensure parent directories have execute permissions + chmod o+x /home/$USER + chmod o+x /home/$USER/libvirt + ``` + +3. **VM fails to start** + + - Check libvirt logs: `journalctl -u libvirtd` + - Verify KVM support: `sudo kvm-ok` + +4. **Cloud-init not working** + + - Check cloud-init logs in VM: `sudo cloud-init status --long` + - Verify cloud-init files syntax + +5. **SSH connection refused** + + - VM might still be booting/configuring + - Check VM console: `virsh console torrust-tracker-demo` or `virt-viewer spice://127.0.0.1:5900` + - Verify firewall rules + +6. **VM deployment timeout (can't get IP address)** + + - **Symptoms**: VM starts but times out waiting for DHCP lease + - **Cause**: Cloud-init setup takes time (package installation, system + configuration, reboot) + - **Solution**: This is normal; VM will get IP after cloud-init completes + (~5-10 minutes) + - **Check**: Use `virsh console torrust-tracker-demo` or + `virt-viewer spice://127.0.0.1:5900` to monitor boot progress + +7. **Terraform/OpenTofu shows "No IP assigned yet" but VM has IP** + + - **Symptoms**: `make status` shows: + + ```text + connection_info = "VM created, waiting for IP address..." + vm_ip = "No IP assigned yet" + ``` + + But `virsh domifaddr torrust-tracker-demo` shows an IP address. + + - **Root Cause**: Terraform libvirt provider state is not synchronized with + the actual VM network state. This happens because: + + - DHCP lease assignment timing varies + - Terraform state becomes stale after cloud-init completes + - The provider doesn't automatically refresh network interface information + + - **Solution**: Refresh the Terraform state to synchronize with libvirt: + + ```bash + # Method 1: Refresh Terraform state + cd infrastructure/terraform + tofu refresh + + # Method 2: Use the make command (if available) + make refresh-state + + # Verify the fix + tofu output + ``` + + - **Prevention**: The IP detection issue can be minimized by: + + - Waiting 2-3 minutes after `make apply` before checking IP + - Using `virsh domifaddr VM_NAME` to get IP directly from libvirt + - Adding automatic refresh to the Makefile status command + + - **Alternative**: Get the IP directly from libvirt without Terraform: + + ```bash + # Get IP address directly + virsh domifaddr torrust-tracker-demo + + # Or use in scripts + VM_IP=$(virsh domifaddr torrust-tracker-demo | grep ipv4 | awk '{print $4}' | cut -d'/' -f1) + echo "VM IP: $VM_IP" + ``` + + - **Why This Happens**: The libvirt provider checks `network_interface[0].addresses` + which is populated asynchronously. The VM gets its IP from DHCP, but Terraform's + cached state doesn't reflect this until explicitly refreshed. + +### Logs and Debugging diff --git a/infrastructure/docs/quick-start.md b/infrastructure/docs/quick-start.md new file mode 100644 index 0000000..72a923b --- /dev/null +++ b/infrastructure/docs/quick-start.md @@ -0,0 +1,217 @@ +# Quick Start Guide - Local Testing Infrastructure + +This guide will get you up and running with the Torrust Tracker local testing +environment in minutes. + +## 🚀 Quick Setup + +### 1. Prerequisites Check + +Ensure you have a Linux system (Ubuntu 20.04+ recommended) with: + +- 4GB+ RAM available +- 30GB+ free disk space +- Virtualization enabled in BIOS + +### 2. One-Command Setup + +```bash +make dev-setup +``` + +This will install all required tools: + +- KVM/libvirt for virtualization +- OpenTofu for infrastructure management +- Configure user permissions + +**Important:** After this command completes, log out and log back in for +group permissions to take effect. + +### 3. Configure SSH Access + +Edit your SSH public key into the cloud-init configuration: + +```bash +# Generate SSH key if needed +ssh-keygen -t rsa -b 4096 -C "your-email@example.com" + +# Copy your public key +cat ~/.ssh/torrust_rsa.pub + +# Edit the cloud-init file and replace the placeholder SSH key +vim infrastructure/cloud-init/user-data.yaml +``` + +### 4. Verify Setup + +```bash +make test-prereq +``` + +The output should be something like: + +```console +Testing prerequisites... +infrastructure/tests/test-local-setup.sh prerequisites +[INFO] Testing prerequisites... +[SUCCESS] OpenTofu is installed: OpenTofu v1.10.1 +[SUCCESS] libvirtd service is running +[SUCCESS] User has libvirt access +[SUCCESS] Default libvirt network is active +[SUCCESS] KVM support available +``` + +**If you get libvirt permission errors:** + +```bash +# Check if you're in the libvirt group +groups | grep libvirt + +# If not, re-add yourself and refresh session +sudo usermod -aG libvirt $USER + +# Then either: +# Option 1: Log out and back in +# Option 2: Use newgrp to activate the group +newgrp libvirt + +# Option 3: Start a new login shell +exec su -l $USER + +# Verify libvirt access +virsh list --all +``` + +## 🏃 Deploy and Test + +### Deploy VM + +```bash +# Initialize OpenTofu (first time only) +make init + +# Review what will be created +make plan + +# Deploy the VM +make apply +``` + +### Connect to VM + +```bash +# SSH into the VM +make ssh +``` + +### Test Torrust Tracker + +Inside the VM: + +```bash +# Clone the repository +cd /home/torrust/github/torrust +git clone https://github.com/torrust/torrust-tracker-demo.git +cd torrust-tracker-demo + +# Set up environment +cp .env.production .env + +# Start services +docker compose up -d + +# Check status +docker compose ps +``` + +### Cleanup + +```bash +# Destroy the VM when done +make destroy +``` + +## 📋 Available Commands + +| Command | Description | +| -------------------- | -------------------------------------------- | +| `make help` | Show all available commands | +| `make test` | Run complete test suite | +| `make apply` | Deploy VM | +| `make ssh` | Connect to VM | +| `make destroy` | Remove VM | +| `make status` | Show infrastructure status | +| `make refresh-state` | Refresh Terraform state to detect IP changes | + +## 🔧 Troubleshooting + +### Common Issues + +1. **Permission errors**: Make sure you logged out/in after `make dev-setup` +2. **VM won't start**: Check with `sudo kvm-ok` that virtualization is enabled +3. **SSH connection fails**: VM might still be booting, wait 2-3 minutes +4. **libvirt file ownership errors**: Run `make fix-libvirt` to fix permissions +5. **"No IP assigned yet" issue**: If `make status` shows no IP but VM is running: + + ```bash + # Check if VM actually has an IP + virsh domifaddr torrust-tracker-demo + + # If IP is shown, refresh Terraform state + make refresh-state + ``` + + **Why this happens**: Terraform's state can become stale after cloud-init completes. + The VM gets its IP from DHCP, but Terraform doesn't automatically detect this change. + See [detailed troubleshooting](local-testing-setup.md#troubleshooting) for more info. + +### Getting Help + +```bash +# Fix libvirt permissions automatically +make fix-libvirt + +# Check test logs +make logs + +# Access VM console directly +make vm-console + +# Show detailed workflow help +make workflow-help +``` + +## 🎯 What's Next? + +Once your VM is running: + +1. **Deploy Torrust Tracker** - Follow the steps above to get the tracker running +2. **Test functionality** - Try accessing the tracker endpoints +3. **Monitor services** - Check Grafana dashboards +4. **Iterate** - Make changes and redeploy quickly + +## 📚 Full Documentation + +For detailed information, see: + +- [Complete Setup Guide](local-testing-setup.md) +- [Test Documentation](../tests/test-local-setup.sh) + +## 🧪 Test Everything + +Run the full automated test suite: + +```bash +make test +``` + +This will: + +- Verify all prerequisites +- Validate configurations +- Deploy a VM +- Test connectivity and services +- Clean up automatically + +Perfect for CI/CD or validating changes! diff --git a/infrastructure/docs/refactoring/twelve-factor-refactor/README.md b/infrastructure/docs/refactoring/twelve-factor-refactor/README.md new file mode 100644 index 0000000..8e3c0ab --- /dev/null +++ b/infrastructure/docs/refactoring/twelve-factor-refactor/README.md @@ -0,0 +1,543 @@ +# Twelve-Factor App Refactoring Plan for Torrust Tracker Demo + +## Executive Summary + +This document outlines a comprehensive plan to refactor the Torrust Tracker +Demo repository to follow +[The Twelve-Factor App](https://12factor.net/) methodology while maintaining +the current local testing environment and preparing for multi-cloud production +deployments (starting with Hetzner). + +## Current State Analysis + +### Current Architecture + +- **VM Provisioning**: Cloud-init + OpenTofu/Terraform (local KVM/libvirt) +- **Application Deployment**: Manual post-provisioning via `test-integration.sh` +- **Configuration**: Mixed approach with Docker containers and environment + variables +- **Services**: Tracker, Prometheus, Grafana via Docker Compose +- **Environment Management**: Basic `.env.production` file + +### Torrust Tracker Specific Considerations + +From the official Torrust Tracker documentation, we need to account for: + +#### Configuration Requirements + +- **Multiple Database Drivers**: SQLite (development) and MySQL (production) +- **Service Components**: HTTP tracker, UDP tracker, and REST API +- **Port Configuration**: UDP (6868, 6969), HTTP (7070), API (1212) +- **Authentication**: Time-bound keys and access tokens +- **Performance Optimization**: Network tuning for BitTorrent traffic + +#### Deployment Modes + +- **Private Mode**: Requires authentication keys for tracker access +- **Public Mode**: Open tracker without authentication +- **Whitelisted Mode**: Only specific torrents allowed + +#### Docker vs Source Compilation + +- **Current**: Using Docker images (torrust/tracker:develop) +- **Future Plans**: Considering source compilation for production performance optimization +- **Dependencies**: pkg-config, libssl-dev, make, build-essential, libsqlite3-dev +- **Demo Repository Decision**: Uses Docker for all services to prioritize simplicity, + consistency, and frequent updates over peak performance + (see [ADR-002](../../../docs/adr/002-docker-for-all-services.md)) + +### Twelve-Factor Violations Identified + + + +| Factor | Current Issue | Impact | +| ------------------------ | --------------------------------------------------------------- | ------ | +| **I. Codebase** | ✅ Good - Single repo with multiple environments | None | +| **II. Dependencies** | ⚠️ Partial - Dependencies in cloud-init, not isolated | Medium | +| **III. Config** | ❌ Config mixed in files and env vars, not environment-specific | High | +| **IV. Backing Services** | ✅ Good - Services are attachable resources | None | +| **V. Build/Release/Run** | ❌ No clear separation, deployment mixed with infrastructure | High | +| **VI. Processes** | ✅ Good - Stateless application processes | None | +| **VII. Port Binding** | ✅ Good - Services export via port binding | None | +| **VIII. Concurrency** | ✅ Good - Can scale via process model | None | +| **IX. Disposability** | ⚠️ Partial - VMs not quickly disposable due to app coupling | Medium | +| **X. Dev/Prod Parity** | ❌ Local and production have different deployment paths | High | +| **XI. Logs** | ✅ Good - Docker logging configured | None | +| **XII. Admin Processes** | ⚠️ Partial - No clear admin process separation | Low | + + + +## Target Architecture + +### Core Principles + +1. **Infrastructure ≠ Application**: Clean separation of concerns +2. **Environment Parity**: Same deployment process for local/staging/production +3. **Configuration as Environment**: All config via environment variables +4. **Immutable Infrastructure**: VMs are cattle, not pets +5. **Deployment Pipeline**: Clear build → release → run stages + +### High-Level Architecture + +The refactored architecture will separate infrastructure provisioning from +application deployment, ensuring twelve-factor compliance while maintaining +the flexibility to deploy to multiple cloud providers. + +## Refactoring Plan + +### Phase 1: Foundation & Configuration (Weeks 1-2) + +**Objective**: Establish twelve-factor configuration and deployment foundation + +#### 1.1 Configuration Management Refactor + +- Create environment-specific configuration structure +- Implement strict environment variable configuration +- Remove hardcoded configuration from cloud-init + +#### 1.2 Deployment Separation + +- Extract application deployment from infrastructure provisioning +- Create dedicated deployment scripts +- Implement configuration injection mechanism + +#### 1.3 Environment Standardization + +- Standardize local, staging, and production environments +- Create environment-specific variable files +- Implement configuration validation + +### Phase 2: Build/Release/Run Separation (Weeks 3-4) + +**Objective**: Implement clear separation of build, release, and run stages + +#### 2.1 Build Stage + +- Infrastructure provisioning only +- Base system preparation +- Dependency installation + +#### 2.2 Release Stage + +- Application deployment +- Configuration injection +- Service orchestration + +#### 2.3 Run Stage + +- Service startup +- Health checking +- Monitoring setup + +### Phase 3: Multi-Cloud Preparation (Weeks 5-6) + +**Objective**: Prepare for Hetzner and future cloud provider support + +#### 3.1 Cloud Abstraction + +- Provider-agnostic configuration +- Modular infrastructure components +- Environment-specific provider configs + +#### 3.2 Deployment Orchestration + +- Unified deployment interface +- Provider-specific implementations +- Configuration templating + +### Phase 4: Operational Excellence (Weeks 7-8) + +**Objective**: Implement production-ready operational practices + +#### 4.1 Monitoring & Observability + +- Health check standardization +- Logging standardization +- Metrics collection + +#### 4.2 Maintenance & Updates + +- Rolling deployment capability +- Backup procedures +- Disaster recovery + +## Implementation Details + +### Directory Structure Changes + +```text +torrust-tracker-demo/ +├── infrastructure/ +│ ├── cloud-init/ +│ │ ├── base-system.yaml.tpl # Base system only +│ │ └── providers/ # Provider-specific templates +│ │ ├── local/ +│ │ ├── hetzner/ +│ │ └── aws/ # Future +│ ├── terraform/ +│ │ ├── modules/ # Reusable modules +│ │ │ ├── base-vm/ +│ │ │ ├── networking/ +│ │ │ └── security/ +│ │ └── providers/ # Provider configurations +│ │ ├── local/ +│ │ ├── hetzner/ +│ │ └── aws/ # Future +│ ├── scripts/ +│ │ ├── deploy-app.sh # Application deployment +│ │ ├── configure-env.sh # Environment configuration +│ │ ├── validate-deployment.sh # Deployment validation +│ │ └── health-check.sh # Health checking +│ └── config/ # Configuration templates +│ ├── environments/ +│ │ ├── local.env +│ │ ├── staging.env +│ │ └── production.env +│ └── templates/ +│ ├── tracker.toml.tpl +│ └── prometheus.yml.tpl +├── application/ +│ ├── compose/ # Environment-specific compose files +│ │ ├── base.yaml # Base services +│ │ ├── local.yaml # Local overrides +│ │ ├── staging.yaml # Staging overrides +│ │ └── production.yaml # Production overrides +│ ├── config/ # Application configurations +│ │ └── templates/ # Configuration templates +│ └── scripts/ # Application-specific scripts +└── docs/ + └── deployment/ # Deployment documentation + ├── local.md + ├── staging.md + └── production.md +``` + +### Configuration Strategy + +#### Environment Variables Hierarchy + +```text +1. System Environment Variables (highest priority) +2. .env.{environment} files +3. Default values in configuration templates +``` + +#### Configuration Categories + +```yaml +# Infrastructure Configuration +INFRASTRUCTURE_PROVIDER: "hetzner|local|aws" +INFRASTRUCTURE_REGION: "fsn1" +INFRASTRUCTURE_INSTANCE_TYPE: "cx11" + +# Application Configuration +TORRUST_TRACKER_MODE: "private|public|whitelisted" +TORRUST_TRACKER_DATABASE_URL: "sqlite:///var/lib/torrust/tracker.db" +TORRUST_TRACKER_LOG_LEVEL: "info|debug|trace" +TORRUST_TRACKER_API_TOKEN: "${TORRUST_API_TOKEN}" + +# Service Configuration +PROMETHEUS_RETENTION_TIME: "15d" +GRAFANA_ADMIN_PASSWORD: "${GRAFANA_PASSWORD}" + +# Security Configuration +SSH_PUBLIC_KEY: "${SSH_PUBLIC_KEY}" +SSL_EMAIL: "${SSL_EMAIL}" +DOMAIN_NAME: "${DOMAIN_NAME}" +``` + +### Deployment Workflow + +#### Current Workflow (Manual) + +```bash +1. make apply # Infrastructure + app deployment +2. SSH and manual configuration +3. Manual service startup +``` + +#### Target Workflow (Twelve-Factor) + +```bash +# Infrastructure +1. make infra-apply ENVIRONMENT=local +2. make app-deploy ENVIRONMENT=local +3. make health-check ENVIRONMENT=local + +# Application Updates (without infrastructure changes) +1. make app-deploy ENVIRONMENT=local +2. make health-check ENVIRONMENT=local +``` + +## Testing Strategy + +### Test Categories + +#### 1. Infrastructure Tests + +```bash +# Syntax validation +make test-syntax # YAML, HCL, shell syntax + +# Infrastructure deployment +make test-infrastructure # VM provisioning only + +# Environment validation +make test-environment # Configuration validation +``` + +#### 2. Application Tests + +```bash +# Application deployment +make test-app-deployment # Application deployment only + +# End-to-end testing +make test-e2e # Full deployment pipeline + +# Service validation +make test-services # Health checks, endpoints +``` + +#### 3. Integration Tests + +```bash +# Multi-environment testing +make test-local # Local environment +make test-staging # Staging environment +make test-production # Production environment (dry-run) +``` + +## Migration Strategy + +### Phase 1: Backward Compatibility (Weeks 1-2) + +#### Maintain Current Functionality + +- Current `make apply` still works +- Existing test scripts remain functional +- No breaking changes to user workflow + +#### Introduce New Structure + +- Add new configuration structure alongside existing +- Implement new deployment scripts +- Create environment-specific configurations + +#### Validation + +- All existing tests pass +- New structure tests pass +- Documentation updated + +### Phase 2: Gradual Migration (Weeks 3-4) + +#### Deprecate Old Patterns + +- Mark old configuration patterns as deprecated +- Provide migration warnings and guidance +- Implement migration helpers + +#### Promote New Patterns + +- Make new deployment method the default +- Update documentation to favor new approach +- Provide clear migration examples + +#### Parallel Support + +- Both old and new methods work +- Clear migration path documented +- User choice for migration timing + +### Phase 3: New Default (Weeks 5-6) + +#### Switch Defaults + +- New twelve-factor approach becomes default +- Old approach requires explicit flags +- Comprehensive migration documentation + +#### Remove Deprecated Code + +- Clean up old configuration patterns +- Simplify codebase +- Update all documentation + +#### Production Readiness + +- Full Hetzner support implemented +- Multi-cloud foundation ready +- Operational procedures documented + +## Success Metrics + +### Configuration Compliance + +- ✅ 100% configuration via environment variables +- ✅ No hardcoded configuration in deployment files +- ✅ Environment-specific configuration isolation + +### Deployment Reliability + +- ✅ < 5 minute VM provisioning time +- ✅ < 2 minute application deployment time +- ✅ 100% deployment success rate in testing + +### Environment Parity + +- ✅ Identical deployment process across environments +- ✅ Configuration-only differences between environments +- ✅ Zero manual configuration steps + +### Operational Excellence + +- ✅ Automated health checking +- ✅ Comprehensive logging and monitoring +- ✅ Clear rollback procedures + +## Risk Assessment & Mitigation + +### Technical Risks + +#### Risk: Configuration Complexity + +- **Impact**: High - Could make deployment more complex +- **Probability**: Medium +- **Mitigation**: + - Provide clear examples and documentation + - Implement configuration validation + - Create migration helpers + +#### Risk: Environment Inconsistencies + +- **Impact**: High - Could cause production issues +- **Probability**: Low +- **Mitigation**: + - Strict environment variable validation + - Automated testing across environments + - Configuration templates with validation + +#### Risk: Deployment Failures + +- **Impact**: Medium - Could disrupt testing workflow +- **Probability**: Low +- **Mitigation**: + - Comprehensive testing strategy + - Rollback procedures + - Gradual migration approach + +### Operational Risks + +#### Risk: User Adoption + +- **Impact**: Medium - Users might resist change +- **Probability**: Medium +- **Mitigation**: + - Maintain backward compatibility during transition + - Clear migration documentation + - Demonstrable benefits + +#### Risk: Documentation Lag + +- **Impact**: Medium - Could cause confusion +- **Probability**: Medium +- **Mitigation**: + - Documentation-first approach + - Automated documentation testing + - Community feedback integration + +## Dependencies & Prerequisites + +### Technical Dependencies + +- OpenTofu/Terraform ≥ 1.0 +- Docker ≥ 20.0 +- Docker Compose ≥ 2.0 +- KVM/libvirt (local testing) +- Cloud provider SDKs (production) + +### Knowledge Prerequisites + +- Understanding of twelve-factor methodology +- Experience with infrastructure as code +- Familiarity with environment variable configuration +- Knowledge of container orchestration + +### Resource Requirements + +- Development time: 8 weeks (1 person) +- Testing infrastructure: Local KVM environment +- Documentation effort: 20% of development time +- Community coordination: 10% of development time + +## Deliverables + +### Week 1-2: Foundation + +- [ ] Environment-specific configuration structure +- [ ] Configuration validation scripts +- [ ] Deployment separation implementation +- [ ] Updated documentation + +### Week 3-4: Build/Release/Run + +- [ ] Infrastructure provisioning scripts +- [ ] Application deployment scripts +- [ ] Health checking implementation +- [ ] Integration testing framework + +### Week 5-6: Multi-Cloud Preparation + +- [ ] Provider abstraction layer +- [ ] Hetzner cloud integration +- [ ] Configuration templating system +- [ ] Multi-environment testing + +### Week 7-8: Operational Excellence + +- [ ] Monitoring standardization +- [ ] Backup procedures +- [ ] Disaster recovery documentation +- [ ] Production deployment guides + +## Related Documents + +- [Twelve-Factor App Methodology](https://12factor.net/) +- [Torrust Tracker Documentation](https://docs.rs/torrust-tracker/latest/torrust_tracker/) +- [Production Deployment Guide](https://torrust.com/blog/deploying-torrust-to-production) +- [Current Local Testing Setup](../local-testing-setup.md) +- [Infrastructure Overview](../infrastructure-overview.md) + +## Support & Communication + +### Implementation Team + +- **Lead**: Project maintainer +- **Review**: Core team members +- **Testing**: Community contributors + +### Communication Channels + +- **GitHub Issues**: Technical discussions and questions +- **Pull Requests**: Code review and implementation +- **Documentation**: Continuous updates and improvements + +### Feedback Collection + +- **Weekly Progress Reports**: Implementation status +- **Community Feedback**: User experience and suggestions +- **Technical Reviews**: Architecture and implementation validation + +--- + +**Next Steps**: + +1. Review and approve this plan +2. Create detailed implementation tickets +3. Begin Phase 1 implementation +4. Establish regular progress reviews + +**Estimated Completion**: 8 weeks from start date +**Risk Level**: Medium (well-defined scope, clear requirements) +**Impact**: High (enables production deployment and multi-cloud support) diff --git a/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md b/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md new file mode 100644 index 0000000..6370106 --- /dev/null +++ b/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md @@ -0,0 +1,550 @@ +# Migration Guide: From Current State to Twelve-Factor + +## Overview + +This guide provides step-by-step instructions for migrating from the current +setup to the twelve-factor compliant architecture while maintaining backward +compatibility and minimizing disruption. + +## Current vs Target Comparison + +### Current Setup + +```bash +# Current workflow +make apply # Does everything: infrastructure + app +./infrastructure/tests/test-integration.sh setup # Manual app setup +``` + +### Target Setup + +```bash +# New twelve-factor workflow +make infra-apply ENVIRONMENT=local # Infrastructure only +make app-deploy ENVIRONMENT=local # Application only +make health-check ENVIRONMENT=local # Validation +``` + +## Migration Strategy + +### Step 1: Create New Structure (Week 1) + +#### 1.1 Create Configuration Structure + +```bash +# Create directory structure +mkdir -p infrastructure/config/environments +mkdir -p infrastructure/config/templates +mkdir -p application/config/templates + +# Create environment files +cat > infrastructure/config/environments/local.env << 'EOF' +# Infrastructure Configuration +INFRASTRUCTURE_PROVIDER=local +INFRASTRUCTURE_VM_NAME=torrust-tracker-demo +INFRASTRUCTURE_VM_MEMORY=2048 +INFRASTRUCTURE_VM_CPUS=2 + +# Application Configuration +TORRUST_TRACKER_MODE=public +TORRUST_TRACKER_LOG_LEVEL=debug +TORRUST_TRACKER_DATABASE_DRIVER=sqlite3 +TORRUST_TRACKER_API_TOKEN=MyAccessToken + +# Service Configuration +GRAFANA_ADMIN_PASSWORD=admin +PROMETHEUS_RETENTION_TIME=7d + +# Network Configuration +TORRUST_TRACKER_UDP_PORT_6868=6868 +TORRUST_TRACKER_UDP_PORT_6969=6969 +TORRUST_TRACKER_HTTP_PORT=7070 +TORRUST_TRACKER_API_PORT=1212 +EOF +``` + +#### 1.2 Extract Configuration from Cloud-Init + +Current `user-data.yaml.tpl` has hardcoded application configuration. +We need to separate this into: + +1. **Base system configuration** (stays in cloud-init) +2. **Application configuration** (moves to environment variables) + +**New base cloud-init template** (`base-system.yaml.tpl`): + +```yaml +#cloud-config +hostname: ${hostname} +locale: en_US.UTF-8 +timezone: UTC + +users: + - name: torrust + groups: [adm, sudo, docker] + sudo: ["ALL=(ALL) NOPASSWD:ALL"] + shell: /bin/bash + ssh_authorized_keys: + - ${ssh_public_key} + +packages: + - curl + - git + - docker.io + - htop + - vim + - ufw + +runcmd: + # System setup only - NO application deployment + - systemctl enable docker + - systemctl start docker + - usermod -aG docker torrust + + # Basic firewall setup + - ufw --force reset + - ufw default deny incoming + - ufw default allow outgoing + - ufw allow ssh + - ufw allow 80/tcp + - ufw allow 443/tcp + - ufw allow 6868/udp + - ufw allow 6969/udp + - ufw allow 7070/tcp + - ufw allow 1212/tcp + - ufw --force enable + +final_message: | + Base system ready for application deployment. + VM is ready for Torrust Tracker deployment! +``` + +#### 1.3 Create Configuration Templates + +**Tracker configuration template** (`infrastructure/config/templates/tracker.toml.tpl`): + +```toml +[logging] +threshold = "${TORRUST_TRACKER_LOG_LEVEL}" + +[core] +inactive_peer_cleanup_interval = 600 +listed = false +private = ${TORRUST_TRACKER_PRIVATE:-false} +tracker_usage_statistics = true + +[core.announce_policy] +interval = ${TORRUST_TRACKER_ANNOUNCE_INTERVAL:-120} +interval_min = ${TORRUST_TRACKER_ANNOUNCE_INTERVAL_MIN:-120} + +[core.database] +driver = "${TORRUST_TRACKER_DATABASE_DRIVER}" +path = "${TORRUST_TRACKER_DATABASE_PATH:-./storage/tracker/lib/database/sqlite3.db}" + +[core.net] +external_ip = "0.0.0.0" +on_reverse_proxy = false + +[health_check_api] +bind_address = "0.0.0.0:${TORRUST_TRACKER_API_PORT}" + +[http_api] +bind_address = "0.0.0.0:${TORRUST_TRACKER_API_PORT}" + +[http_api.access_tokens] +admin = "${TORRUST_TRACKER_API_TOKEN}" + +[[udp_trackers]] +bind_address = "0.0.0.0:${TORRUST_TRACKER_UDP_PORT_6868}" + +[[udp_trackers]] +bind_address = "0.0.0.0:${TORRUST_TRACKER_UDP_PORT_6969}" + +[[http_trackers]] +bind_address = "0.0.0.0:${TORRUST_TRACKER_HTTP_PORT}" +``` + +### Step 2: Adapt Current Scripts (Week 1-2) + +#### 2.1 Modify test-integration.sh + +Instead of completely replacing `test-integration.sh`, we'll adapt it to use +the new configuration system while maintaining backward compatibility. + +**Enhanced setup_torrust_tracker function:** + +```bash +# Enhanced setup function in test-integration.sh +setup_torrust_tracker() { + log_info "Setting up Torrust Tracker Demo..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Check if new configuration system is available + if [ -f "${PROJECT_ROOT}/infrastructure/scripts/deploy-app.sh" ]; then + log_info "Using new twelve-factor deployment system" + + # Use new deployment script + "${PROJECT_ROOT}/infrastructure/scripts/deploy-app.sh" local "${vm_ip}" + + else + log_info "Using legacy deployment system" + + # Original deployment logic (preserved for backward compatibility) + setup_legacy_deployment "${vm_ip}" + fi + + log_success "Torrust Tracker Demo setup completed" + return 0 +} + +# Legacy deployment function (preserved) +setup_legacy_deployment() { + local vm_ip="$1" + + # Check if already cloned + if vm_exec "${vm_ip}" "test -d /home/torrust/github/torrust/torrust-tracker-demo" \ + "Checking if repo exists"; then + log_info "Repository already exists, updating..." + vm_exec "${vm_ip}" \ + "cd /home/torrust/github/torrust/torrust-tracker-demo && git pull" \ + "Updating repository" + else + log_info "Cloning repository..." + vm_exec "${vm_ip}" "mkdir -p /home/torrust/github/torrust" \ + "Creating directory structure" + vm_exec "${vm_ip}" \ + "cd /home/torrust/github/torrust && git clone \ +https://github.com/torrust/torrust-tracker-demo.git" \ + "Cloning repository" + fi + + # Setup environment file + vm_exec "${vm_ip}" \ + "cd /home/torrust/github/torrust/torrust-tracker-demo && cp .env.production .env" \ + "Setting up environment file" +} +``` + +#### 2.2 Update Makefile + +Add new targets while keeping existing ones: + +```makefile +# New twelve-factor targets +infra-apply: ## Deploy infrastructure only + @echo "Deploying infrastructure for environment: $(ENVIRONMENT)" + @if [ -z "$(ENVIRONMENT)" ]; then \ + echo "ERROR: ENVIRONMENT not specified. Use: make infra-apply ENVIRONMENT=local"; \ + exit 1; \ + fi + ./infrastructure/scripts/provision-infrastructure.sh $(ENVIRONMENT) apply + +app-deploy: ## Deploy application only + @echo "Deploying application for environment: $(ENVIRONMENT)" + @if [ -z "$(ENVIRONMENT)" ]; then \ + echo "ERROR: ENVIRONMENT not specified. Use: make app-deploy ENVIRONMENT=local"; \ + exit 1; \ + fi + ./infrastructure/scripts/deploy-app.sh $(ENVIRONMENT) + +health-check: ## Check deployment health + @echo "Checking deployment health for environment: $(ENVIRONMENT)" + @if [ -z "$(ENVIRONMENT)" ]; then \ + echo "ERROR: ENVIRONMENT not specified. Use: make health-check ENVIRONMENT=local"; \ + exit 1; \ + fi + ./infrastructure/scripts/health-check.sh $(ENVIRONMENT) + +# Enhanced existing targets +apply: ## Deploy VM with application (legacy method, maintained for compatibility) + @echo "Deploying VM with full application stack..." + @echo "NOTE: Consider using 'make infra-apply ENVIRONMENT=local && \ +make app-deploy ENVIRONMENT=local' for better separation" + cd $(TERRAFORM_DIR) && tofu apply -var-file="local.tfvars" + @echo "Deployment completed. Testing application deployment..." + $(TESTS_DIR)/test-integration.sh setup + +# Configuration management +configure-env: ## Process environment configuration + @echo "Processing configuration for environment: $(ENVIRONMENT)" + @if [ -z "$(ENVIRONMENT)" ]; then \ + echo "ERROR: ENVIRONMENT not specified. Use: make configure-env ENVIRONMENT=local"; \ + exit 1; \ + fi + ./infrastructure/scripts/configure-env.sh $(ENVIRONMENT) + +validate-config: ## Validate configuration files + @echo "Validating configuration files..." + ./infrastructure/scripts/validate-config.sh +``` + +> **Note**: In actual Makefile implementation, replace the 4-space indentation +> with tabs as required by Make syntax. + +### Step 3: Gradual Migration (Week 2-3) + +#### 3.1 Update Documentation + +**Enhanced README.md section:** + +````markdown +## Deployment Options + +### Option 1: Twelve-Factor Deployment (Recommended) + +```bash +# 1. Deploy infrastructure +make infra-apply ENVIRONMENT=local + +# 2. Deploy application +make app-deploy ENVIRONMENT=local + +# 3. Validate deployment +make health-check ENVIRONMENT=local +``` +```` + +### Option 2: Legacy Single-Command Deployment + +```bash +# Deploy everything at once (legacy method) +make apply +``` + +### Configuration Management + +The new system uses environment-specific configuration: + +- `infrastructure/config/environments/local.env` - Local development +- `infrastructure/config/environments/staging.env` - Staging environment +- `infrastructure/config/environments/production.env` - Production environment + +Process configuration before deployment: + +```bash +make configure-env ENVIRONMENT=local +make validate-config +``` + +#### 3.2 Migration Testing + +**Test both deployment methods work:** + +```bash +# Test new method +make infra-apply ENVIRONMENT=local +make app-deploy ENVIRONMENT=local +make health-check ENVIRONMENT=local +make destroy + +# Test legacy method still works +make apply +make destroy +``` + +### Step 4: Environment-Specific Configurations (Week 3-4) + +#### 4.1 Create Environment Variations + +**Staging configuration** (`infrastructure/config/environments/staging.env`): + +```bash +# Infrastructure Configuration +INFRASTRUCTURE_PROVIDER=hetzner +INFRASTRUCTURE_REGION=fsn1 +INFRASTRUCTURE_INSTANCE_TYPE=cx11 + +# Application Configuration +TORRUST_TRACKER_MODE=private +TORRUST_TRACKER_LOG_LEVEL=info +TORRUST_TRACKER_DATABASE_DRIVER=sqlite3 +TORRUST_TRACKER_API_TOKEN=${TORRUST_STAGING_API_TOKEN} + +# Service Configuration +GRAFANA_ADMIN_PASSWORD=${GRAFANA_STAGING_PASSWORD} +PROMETHEUS_RETENTION_TIME=15d + +# Security Configuration +SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} +DOMAIN_NAME=staging.torrust-demo.com +SSL_EMAIL=${SSL_EMAIL} +``` + +**Production configuration** (`infrastructure/config/environments/production.env`): + +```bash +# Infrastructure Configuration +INFRASTRUCTURE_PROVIDER=hetzner +INFRASTRUCTURE_REGION=fsn1 +INFRASTRUCTURE_INSTANCE_TYPE=cx21 + +# Application Configuration +TORRUST_TRACKER_MODE=private +TORRUST_TRACKER_LOG_LEVEL=info +TORRUST_TRACKER_DATABASE_DRIVER=mysql +TORRUST_TRACKER_DATABASE_URL=${TORRUST_PROD_DATABASE_URL} +TORRUST_TRACKER_API_TOKEN=${TORRUST_PROD_API_TOKEN} + +# Service Configuration +GRAFANA_ADMIN_PASSWORD=${GRAFANA_PROD_PASSWORD} +PROMETHEUS_RETENTION_TIME=30d + +# Security Configuration +SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} +DOMAIN_NAME=torrust-demo.com +SSL_EMAIL=${SSL_EMAIL} +``` + +#### 4.2 Provider-Specific Configurations + +Create provider-specific Terraform configurations: + +```text +infrastructure/ +├── terraform/ +│ ├── providers/ +│ │ ├── local/ +│ │ │ ├── main.tf +│ │ │ └── variables.tf +│ │ ├── hetzner/ +│ │ │ ├── main.tf +│ │ │ ├── variables.tf +│ │ │ └── hetzner.tf +│ │ └── aws/ # Future +│ │ ├── main.tf +│ │ ├── variables.tf +│ │ └── aws.tf +│ └── modules/ # Shared modules +│ ├── base-vm/ +│ ├── networking/ +│ └── security/ +``` + +### Step 5: Production Readiness (Week 4-5) + +#### 5.1 Hetzner Cloud Integration + +**Hetzner provider configuration** (`infrastructure/terraform/providers/hetzner/main.tf`): + +```hcl +terraform { + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + version = "~> 1.45" + } + } +} + +provider "hcloud" { + token = var.hcloud_token +} + +# Use shared base-vm module +module "tracker_vm" { + source = "../../modules/base-vm" + + # Provider-specific values + provider_type = "hetzner" + instance_type = var.instance_type + region = var.region + + # Common values + vm_name = var.vm_name + ssh_public_key = var.ssh_public_key + environment = var.environment +} +``` + +#### 5.2 Environment Variable Management + +For production, use secure environment variable management: + +```bash +# Example using direnv for local development +cat > .envrc << 'EOF' +# Load environment-specific configuration +export ENVIRONMENT=local +source infrastructure/config/environments/${ENVIRONMENT}.env + +# Sensitive variables (not committed to git) +export SSH_PUBLIC_KEY="$(cat ~/.ssh/id_rsa.pub)" +export TORRUST_PROD_API_TOKEN="your-production-token" +export GRAFANA_PROD_PASSWORD="your-production-password" +EOF + +# Allow direnv +direnv allow +``` + +## Migration Checklist + +### Week 1: Foundation + +- [ ] Create new directory structure +- [ ] Create environment configuration files +- [ ] Create configuration templates +- [ ] Implement configuration processing scripts +- [ ] Test configuration processing locally + +### Week 2: Integration + +- [ ] Modify existing scripts for backward compatibility +- [ ] Update Makefile with new targets +- [ ] Update documentation +- [ ] Test both old and new deployment methods + +### Week 3: Environment Support + +- [ ] Create staging and production configurations +- [ ] Implement environment-specific logic +- [ ] Test multi-environment deployment +- [ ] Validate configuration for all environments + +### Week 4: Provider Abstraction + +- [ ] Create provider-specific Terraform modules +- [ ] Implement Hetzner cloud support +- [ ] Test cloud provider deployment +- [ ] Document cloud-specific requirements + +### Week 5: Production Readiness + +- [ ] Implement secure secret management +- [ ] Create production deployment procedures +- [ ] Implement monitoring and health checks +- [ ] Create disaster recovery procedures + +## Rollback Plan + +If issues arise during migration, you can always rollback to the previous system: + +```bash +# Rollback to legacy deployment +git checkout HEAD~1 # Or specific commit before migration +make apply # Use old deployment method +``` + +The migration maintains backward compatibility, so the old `make apply` command +will continue to work throughout the transition period. + +## Benefits After Migration + +1. **Environment Parity**: Same deployment process for all environments +2. **Configuration Management**: All configuration via environment variables +3. **Deployment Speed**: Faster application updates (no infrastructure changes) +4. **Cloud Flexibility**: Easy to add new cloud providers +5. **Testing**: Better isolation between infrastructure and application testing +6. **Monitoring**: Clearer deployment validation and health checking + +## Next Steps + +Once this migration is complete: + +1. Add support for additional cloud providers (AWS, GCP) +2. Implement rolling deployments +3. Add automated backup and disaster recovery +4. Implement configuration drift detection +5. Add performance monitoring and alerting diff --git a/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md b/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md new file mode 100644 index 0000000..edc0d62 --- /dev/null +++ b/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md @@ -0,0 +1,856 @@ +# Implementation Checklist: Phase 1 - Foundation & Configuration + +## Overview + +This checklist provides detailed implementation steps for Phase 1 of the +Twelve-Factor App refactoring plan. This phase focuses on establishing the +foundation for configuration management and deployment separation. + +## Week 1: Configuration Management Refactor + +### 1.1 Environment Configuration Structure + +#### Task 1.1.1: Create Environment Directory Structure + +```bash +mkdir -p infrastructure/config/environments +mkdir -p infrastructure/config/templates +mkdir -p application/config/templates +``` + +**Files to create:** + +- [ ] `infrastructure/config/environments/local.env` +- [ ] `infrastructure/config/environments/staging.env` +- [ ] `infrastructure/config/environments/production.env` +- [ ] `infrastructure/config/templates/tracker.toml.tpl` +- [ ] `infrastructure/config/templates/prometheus.yml.tpl` + +#### Task 1.1.2: Environment Variable Definition + +**Local Environment (`local.env`):** + +```bash +# Infrastructure Configuration +INFRASTRUCTURE_PROVIDER=local +INFRASTRUCTURE_VM_NAME=torrust-tracker-demo +INFRASTRUCTURE_VM_MEMORY=2048 +INFRASTRUCTURE_VM_CPUS=2 + +# Torrust Tracker Core Configuration +TORRUST_TRACKER_MODE=public +TORRUST_TRACKER_LOG_LEVEL=debug +TORRUST_TRACKER_LISTED=false +TORRUST_TRACKER_PRIVATE=false +TORRUST_TRACKER_STATS=true + +# Database Configuration +TORRUST_TRACKER_DATABASE_DRIVER=sqlite3 +TORRUST_TRACKER_DATABASE_PATH=./storage/tracker/lib/database/sqlite3.db + +# Network Configuration +TORRUST_TRACKER_EXTERNAL_IP=0.0.0.0 +TORRUST_TRACKER_ON_REVERSE_PROXY=false + +# Tracker Policy +TORRUST_TRACKER_CLEANUP_INTERVAL=600 +TORRUST_TRACKER_MAX_PEER_TIMEOUT=900 +TORRUST_TRACKER_PERSISTENT_COMPLETED_STAT=false +TORRUST_TRACKER_REMOVE_PEERLESS=true + +# Announce Policy +TORRUST_TRACKER_ANNOUNCE_INTERVAL=120 +TORRUST_TRACKER_ANNOUNCE_INTERVAL_MIN=120 + +# Port Configuration +TORRUST_TRACKER_UDP_6868_ENABLED=true +TORRUST_TRACKER_UDP_6969_ENABLED=true +TORRUST_TRACKER_HTTP_ENABLED=true +TORRUST_TRACKER_HTTP_PORT=7070 +TORRUST_TRACKER_API_PORT=1212 +TORRUST_TRACKER_HEALTH_CHECK_PORT=1313 + +# API Authentication +TORRUST_TRACKER_API_TOKEN=local-dev-token + +# Service Configuration +GRAFANA_ADMIN_PASSWORD=admin +PROMETHEUS_RETENTION_TIME=7d + +# Docker Configuration +USER_ID=1000 +``` + +**Staging Environment (`staging.env`):** + +```bash +# Infrastructure +INFRASTRUCTURE_PROVIDER=hetzner +INFRASTRUCTURE_REGION=fsn1 +INFRASTRUCTURE_INSTANCE_TYPE=cx11 + +# Application +TORRUST_TRACKER_MODE=private +TORRUST_TRACKER_LOG_LEVEL=info +TORRUST_TRACKER_DATABASE_DRIVER=sqlite3 +TORRUST_TRACKER_API_TOKEN=${TORRUST_STAGING_API_TOKEN} + +# Services +GRAFANA_ADMIN_PASSWORD=${GRAFANA_STAGING_PASSWORD} +PROMETHEUS_RETENTION_TIME=15d + +# Security +SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} +DOMAIN_NAME=staging.torrust-demo.com +SSL_EMAIL=${SSL_EMAIL} +``` + +**Production Environment (`production.env`):** + +```bash +# Infrastructure Configuration +INFRASTRUCTURE_PROVIDER=hetzner +INFRASTRUCTURE_REGION=fsn1 +INFRASTRUCTURE_INSTANCE_TYPE=cx21 + +# Torrust Tracker Core Configuration +TORRUST_TRACKER_MODE=private +TORRUST_TRACKER_LOG_LEVEL=info +TORRUST_TRACKER_LISTED=false +TORRUST_TRACKER_PRIVATE=true +TORRUST_TRACKER_STATS=true + +# Database Configuration (MySQL for production) +TORRUST_TRACKER_DATABASE_DRIVER=mysql +TORRUST_TRACKER_DATABASE_URL=${TORRUST_PROD_DATABASE_URL} + +# Network Configuration +TORRUST_TRACKER_EXTERNAL_IP=${PRODUCTION_EXTERNAL_IP} +TORRUST_TRACKER_ON_REVERSE_PROXY=true + +# Tracker Policy (production optimized) +TORRUST_TRACKER_CLEANUP_INTERVAL=300 +TORRUST_TRACKER_MAX_PEER_TIMEOUT=1800 +TORRUST_TRACKER_PERSISTENT_COMPLETED_STAT=true +TORRUST_TRACKER_REMOVE_PEERLESS=false + +# Announce Policy (production optimized) +TORRUST_TRACKER_ANNOUNCE_INTERVAL=600 +TORRUST_TRACKER_ANNOUNCE_INTERVAL_MIN=300 + +# Port Configuration +TORRUST_TRACKER_UDP_6868_ENABLED=true +TORRUST_TRACKER_UDP_6969_ENABLED=true +TORRUST_TRACKER_HTTP_ENABLED=true +TORRUST_TRACKER_HTTP_PORT=7070 +TORRUST_TRACKER_API_PORT=1212 +TORRUST_TRACKER_HEALTH_CHECK_PORT=1313 + +# API Authentication (from secrets) +TORRUST_TRACKER_API_TOKEN=${TORRUST_PROD_API_TOKEN} + +# Service Configuration +GRAFANA_ADMIN_PASSWORD=${GRAFANA_PROD_PASSWORD} +PROMETHEUS_RETENTION_TIME=30d + +# Security Configuration +SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} +DOMAIN_NAME=torrust-demo.com +SSL_EMAIL=${SSL_EMAIL} + +# Docker Configuration +USER_ID=1000 +``` + +#### Task 1.1.3: Configuration Template Creation + +**Tracker Configuration Template (`tracker.toml.tpl`):** + +```toml +[logging] +threshold = "${TORRUST_TRACKER_LOG_LEVEL}" + +[core] +inactive_peer_cleanup_interval = ${TORRUST_TRACKER_CLEANUP_INTERVAL:-600} +listed = ${TORRUST_TRACKER_LISTED:-false} +private = ${TORRUST_TRACKER_PRIVATE:-false} +tracker_usage_statistics = ${TORRUST_TRACKER_STATS:-true} + +[core.announce_policy] +interval = ${TORRUST_TRACKER_ANNOUNCE_INTERVAL:-120} +interval_min = ${TORRUST_TRACKER_ANNOUNCE_INTERVAL_MIN:-120} + +[core.database] +driver = "${TORRUST_TRACKER_DATABASE_DRIVER}" +{{#if (eq TORRUST_TRACKER_DATABASE_DRIVER "sqlite3")}} +path = "${TORRUST_TRACKER_DATABASE_PATH:-./storage/tracker/lib/database/sqlite3.db}" +{{else}} +url = "${TORRUST_TRACKER_DATABASE_URL}" +{{/if}} + +[core.net] +external_ip = "${TORRUST_TRACKER_EXTERNAL_IP:-0.0.0.0}" +on_reverse_proxy = ${TORRUST_TRACKER_ON_REVERSE_PROXY:-false} + +[core.tracker_policy] +max_peer_timeout = ${TORRUST_TRACKER_MAX_PEER_TIMEOUT:-900} +persistent_torrent_completed_stat = ${TORRUST_TRACKER_PERSISTENT_COMPLETED_STAT:-false} +remove_peerless_torrents = ${TORRUST_TRACKER_REMOVE_PEERLESS:-true} + +# Health check API (separate from main API) +[health_check_api] +bind_address = "127.0.0.1:${TORRUST_TRACKER_HEALTH_CHECK_PORT:-1313}" + +# Main HTTP API +[http_api] +bind_address = "0.0.0.0:${TORRUST_TRACKER_API_PORT:-1212}" + +[http_api.access_tokens] +admin = "${TORRUST_TRACKER_API_TOKEN}" + +# UDP Trackers (multiple instances supported) +{{#if TORRUST_TRACKER_UDP_6868_ENABLED}} +[[udp_trackers]] +bind_address = "0.0.0.0:6868" +{{/if}} + +{{#if TORRUST_TRACKER_UDP_6969_ENABLED}} +[[udp_trackers]] +bind_address = "0.0.0.0:6969" +{{/if}} + +# HTTP Trackers (multiple instances supported) +{{#if TORRUST_TRACKER_HTTP_ENABLED}} +[[http_trackers]] +bind_address = "0.0.0.0:${TORRUST_TRACKER_HTTP_PORT:-7070}" +{{/if}} +``` + +#### Task 1.1.4: Torrust Tracker Configuration Strategy + +Based on the official Torrust Tracker documentation, the tracker supports +multiple configuration methods with the following priority order: + +1. **Environment Variable TORRUST_TRACKER_CONFIG_TOML** (highest priority) +2. **tracker.toml file** (medium priority) +3. **Default configuration** (lowest priority) + +For twelve-factor compliance, we'll use method #1 (environment variables) with +the following approach: + +**Configuration Generation Script (`generate-tracker-config.sh`):** + +```bash +#!/bin/bash +# Generate tracker configuration from environment variables + +set -euo pipefail + +# Generate tracker.toml from template +envsubst < "${CONFIG_DIR}/templates/tracker.toml.tpl" > "/tmp/tracker.toml" + +# Set the TORRUST_TRACKER_CONFIG_TOML environment variable +export TORRUST_TRACKER_CONFIG_TOML="$(cat /tmp/tracker.toml)" + +# Clean up temporary file +rm -f "/tmp/tracker.toml" + +echo "Tracker configuration generated from environment variables" +``` + +#### Alternative: Direct Environment Variable Configuration + +For even better twelve-factor compliance, we can use the tracker's support +for environment variable overrides: + +```bash +# Core configuration +export TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER="${TORRUST_TRACKER_DATABASE_DRIVER}" +export TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH="${TORRUST_TRACKER_DATABASE_PATH}" +export TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__NET__EXTERNAL_IP="${TORRUST_TRACKER_EXTERNAL_IP}" + +# HTTP API configuration +export TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN="${TORRUST_TRACKER_API_TOKEN}" + +# Logging configuration +export TORRUST_TRACKER_CONFIG_OVERRIDE_LOGGING__THRESHOLD="${TORRUST_TRACKER_LOG_LEVEL}" +``` + +### 1.2 Configuration Processing Scripts + +#### Task 1.2.1: Create Configuration Processing Script + +**File:** `infrastructure/scripts/configure-env.sh` + +```bash +#!/bin/bash +# Configuration processing script for Torrust Tracker Demo +# Processes environment variables and generates configuration files + +set -euo pipefail + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +CONFIG_DIR="${PROJECT_ROOT}/infrastructure/config" + +# Default values +ENVIRONMENT="${1:-local}" +VERBOSE="${VERBOSE:-false}" + +# Logging functions +log_info() { + echo "[INFO] $1" +} + +log_error() { + echo "[ERROR] $1" >&2 +} + +# Load environment configuration +load_environment() { + local env_file="${CONFIG_DIR}/environments/${ENVIRONMENT}.env" + + if [[ ! -f "${env_file}" ]]; then + log_error "Environment file not found: ${env_file}" + exit 1 + fi + + log_info "Loading environment: ${ENVIRONMENT}" + # shellcheck source=/dev/null + source "${env_file}" +} + +# Validate required environment variables +validate_environment() { + local required_vars=( + "INFRASTRUCTURE_PROVIDER" + "TORRUST_TRACKER_MODE" + "TORRUST_TRACKER_LOG_LEVEL" + "TORRUST_TRACKER_API_TOKEN" + ) + + for var in "${required_vars[@]}"; do + if [[ -z "${!var:-}" ]]; then + log_error "Required environment variable not set: ${var}" + exit 1 + fi + done + + log_info "Environment validation passed" +} + +# Process configuration templates +process_templates() { + local templates_dir="${CONFIG_DIR}/templates" + local output_dir="${PROJECT_ROOT}/application/storage/tracker/etc" + + # Ensure output directory exists + mkdir -p "${output_dir}" + + # Process tracker configuration template + if [[ -f "${templates_dir}/tracker.toml.tpl" ]]; then + log_info "Processing tracker configuration template" + envsubst < "${templates_dir}/tracker.toml.tpl" > "${output_dir}/tracker.toml" + fi + + log_info "Configuration templates processed" +} + +# Main execution +main() { + log_info "Starting configuration processing for environment: ${ENVIRONMENT}" + + load_environment + validate_environment + process_templates + + log_info "Configuration processing completed successfully" +} + +# Show help +show_help() { + cat </dev/null 2>&1; then + # Create temporary file with sample values for validation + local temp_file + temp_file=$(mktemp) + + # Set sample environment variables + export TORRUST_TRACKER_LOG_LEVEL="info" + export TORRUST_TRACKER_DATABASE_DRIVER="sqlite3" + export TORRUST_TRACKER_API_TOKEN="sample-token" + export TORRUST_TRACKER_API_PORT="1212" + + # Process template and validate + envsubst < "${tracker_template}" > "${temp_file}" + + if taplo fmt --check "${temp_file}" >/dev/null 2>&1; then + echo "[SUCCESS] Tracker template TOML syntax validation passed" + else + echo "[ERROR] Tracker template TOML syntax validation failed" + rm -f "${temp_file}" + return 1 + fi + + rm -f "${temp_file}" + else + echo "[WARNING] taplo not available, skipping TOML syntax validation" + fi + + echo "[SUCCESS] Template validation passed" + return 0 +} + +# Main validation +main() { + echo "[INFO] Starting configuration validation" + + local failed=0 + + # Validate environment files + for env in local staging production; do + env_file="${CONFIG_DIR}/environments/${env}.env" + if ! validate_env_file "${env_file}" "${env}"; then + failed=1 + fi + done + + # Validate templates + if ! validate_templates; then + failed=1 + fi + + if [[ ${failed} -eq 0 ]]; then + echo "[SUCCESS] All configuration validation passed" + return 0 + else + echo "[ERROR] Configuration validation failed" + return 1 + fi +} + +# Run validation +main "$@" +``` + +## Week 2: Deployment Separation + +### 2.1 Infrastructure Provisioning Scripts + +#### Task 2.1.1: Create Infrastructure Provisioning Script + +**File:** `infrastructure/scripts/provision-infrastructure.sh` + +```bash +#!/bin/bash +# Infrastructure provisioning script for Torrust Tracker Demo +# Provisions base infrastructure without application deployment + +set -euo pipefail + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +TERRAFORM_DIR="${PROJECT_ROOT}/infrastructure/terraform" + +# Default values +ENVIRONMENT="${1:-local}" +ACTION="${2:-apply}" + +# Logging functions +log_info() { + echo "[INFO] $1" +} + +log_error() { + echo "[ERROR] $1" >&2 +} + +# Load environment configuration +load_environment() { + local config_script="${SCRIPT_DIR}/configure-env.sh" + + if [[ -f "${config_script}" ]]; then + log_info "Loading environment configuration: ${ENVIRONMENT}" + "${config_script}" "${ENVIRONMENT}" + else + log_error "Configuration script not found: ${config_script}" + exit 1 + fi +} + +# Provision infrastructure +provision_infrastructure() { + log_info "Provisioning infrastructure for environment: ${ENVIRONMENT}" + + cd "${TERRAFORM_DIR}" + + case "${ACTION}" in + "init") + log_info "Initializing Terraform" + tofu init + ;; + "plan") + log_info "Planning infrastructure changes" + tofu plan -var="environment=${ENVIRONMENT}" + ;; + "apply") + log_info "Applying infrastructure changes" + tofu apply -var="environment=${ENVIRONMENT}" -auto-approve + ;; + "destroy") + log_info "Destroying infrastructure" + tofu destroy -var="environment=${ENVIRONMENT}" -auto-approve + ;; + *) + log_error "Unknown action: ${ACTION}" + exit 1 + ;; + esac +} + +# Main execution +main() { + log_info "Starting infrastructure provisioning" + + load_environment + provision_infrastructure + + log_info "Infrastructure provisioning completed" +} + +# Show help +show_help() { + cat <&2 +} + +# Get VM IP from Terraform output +get_vm_ip() { + if [[ -n "${VM_IP}" ]]; then + echo "${VM_IP}" + return 0 + fi + + cd "${TERRAFORM_DIR}" + local vm_ip + vm_ip=$(tofu output -raw vm_ip 2>/dev/null || echo "") + + if [[ -z "${vm_ip}" ]]; then + log_error "Could not get VM IP from Terraform output" + return 1 + fi + + echo "${vm_ip}" +} + +# Execute command on VM via SSH +vm_exec() { + local vm_ip="$1" + local command="$2" + local description="${3:-}" + + if [[ -n "${description}" ]]; then + log_info "${description}" + fi + + ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 torrust@"${vm_ip}" "${command}" +} + +# Deploy application +deploy_application() { + local vm_ip="$1" + + log_info "Deploying application to ${vm_ip}" + + # Clone/update repository + vm_exec "${vm_ip}" " + mkdir -p /home/torrust/github/torrust + cd /home/torrust/github/torrust + + if [ -d torrust-tracker-demo ]; then + cd torrust-tracker-demo && git pull + else + git clone https://github.com/torrust/torrust-tracker-demo.git + fi + " "Setting up application repository" + + # Process configuration + vm_exec "${vm_ip}" " + cd /home/torrust/github/torrust/torrust-tracker-demo + infrastructure/scripts/configure-env.sh ${ENVIRONMENT} + " "Processing configuration for environment: ${ENVIRONMENT}" + + # Start services + vm_exec "${vm_ip}" " + cd /home/torrust/github/torrust/torrust-tracker-demo/application + docker compose up -d + " "Starting application services" + + log_info "Application deployment completed" +} + +# Validate deployment +validate_deployment() { + local vm_ip="$1" + + log_info "Validating deployment" + + # Wait for services to be ready + sleep 30 + + # Check service health + vm_exec "${vm_ip}" " + cd /home/torrust/github/torrust/torrust-tracker-demo/application + docker compose ps + " "Checking service status" + + # Test endpoints + vm_exec "${vm_ip}" " + curl -f -s http://localhost:7070/health_check || exit 1 + curl -f -s http://localhost:1212/api/v1/stats || exit 1 + " "Testing application endpoints" + + log_info "Deployment validation completed successfully" +} + +# Main execution +main() { + log_info "Starting application deployment for environment: ${ENVIRONMENT}" + + local vm_ip + vm_ip=$(get_vm_ip) + + deploy_application "${vm_ip}" + validate_deployment "${vm_ip}" + + log_info "Application deployment completed successfully" +} + +# Show help +show_help() { + cat < 0 + +# 7. Test libvirt connection +virsh uri +``` + +## 🔧 Common Issues and Fixes + +### Issue 1: Permission Denied Errors + +**Symptoms:** + +```text +error: Failed to connect to socket /var/run/libvirt/libvirt-sock: Permission denied +``` + +**Solutions:** + +```bash +# Check current groups +groups + +# Add user to libvirt group if missing +sudo usermod -aG libvirt $USER + +# Refresh group membership (choose one): +# Option A: Log out and log back in +# Option B: Start new shell with group +newgrp libvirt +# Option C: Restart login shell +exec su -l $USER + +# Verify fix +virsh list --all +``` + +### Issue 2: libvirtd Service Not Running + +**Symptoms:** + +```text +error: failed to connect to the hypervisor +``` + +**Solutions:** + +```bash +# Check service status +sudo systemctl status libvirtd + +# Start the service +sudo systemctl start libvirtd + +# Enable automatic startup +sudo systemctl enable libvirtd + +# If it fails to start, check logs +sudo journalctl -u libvirtd -f +``` + +### Issue 3: Default Network Not Available + +**Symptoms:** + +```text +error: Network 'default' is not active +``` + +**Solutions:** + +```bash +# Check network status +virsh net-list --all + +# Start default network +virsh net-start default + +# Enable automatic startup +virsh net-autostart default + +# If default network doesn't exist, create it +sudo virsh net-define /etc/libvirt/qemu/networks/default.xml +sudo virsh net-start default +sudo virsh net-autostart default +``` + +### Issue 4: Missing Default Storage Pool + +**Symptoms:** + +```text +Error: can't find storage pool 'default' +``` + +**Solutions:** + +```bash +# Check current storage pools +virsh pool-list --all + +# If no default pool exists, create it +sudo virsh pool-define-as default dir \ + --target /var/lib/libvirt/images +sudo virsh pool-autostart default +sudo virsh pool-start default + +# Verify the pool is active +virsh pool-list --all +``` + +### Issue 5: Missing mkisofs Command + +**Symptoms:** + +```text +error while starting the creation of CloudInit's ISO image: +exec: "mkisofs": executable file not found in $PATH +``` + +**Solutions:** + +```bash +# Install genisoimage package (which provides mkisofs) +sudo apt update +sudo apt install -y genisoimage + +# Verify mkisofs is available +which mkisofs +mkisofs --version +``` + +### Issue 6: KVM Not Available + +**Symptoms:** + +```text +error: KVM is not available +``` + +**Solutions:** + +```bash +# Check if KVM modules are loaded +lsmod | grep kvm + +# Load KVM modules manually +sudo modprobe kvm +sudo modprobe kvm_intel # For Intel CPUs +# OR +sudo modprobe kvm_amd # For AMD CPUs + +# Check CPU virtualization support +egrep -c '(vmx|svm)' /proc/cpuinfo + +# If output is 0, virtualization is not supported or not enabled in BIOS +``` + +### Issue 7: BIOS Virtualization Disabled + +**Symptoms:** + +- KVM modules won't load +- `/dev/kvm` doesn't exist +- `kvm-ok` reports virtualization disabled + +**Solutions:** + +1. Reboot and enter BIOS/UEFI settings +2. Look for virtualization options: + - Intel: "Intel VT-x" or "Virtualization Technology" + - AMD: "AMD-V" or "SVM Mode" +3. Enable the option +4. Save and reboot + +### Issue 8: Nested Virtualization Issues + +**Symptoms:** + +- Running inside a VM and can't create VMs +- Poor performance in nested VMs + +**Solutions:** + +```bash +# Check if nested virtualization is enabled +cat /sys/module/kvm_intel/parameters/nested # Intel +cat /sys/module/kvm_amd/parameters/nested # AMD + +# Enable nested virtualization (Intel) +echo 'options kvm_intel nested=1' | sudo tee /etc/modprobe.d/kvm.conf + +# Enable nested virtualization (AMD) +echo 'options kvm_amd nested=1' | sudo tee /etc/modprobe.d/kvm.conf + +# Reload modules +sudo modprobe -r kvm_intel && sudo modprobe kvm_intel # Intel +sudo modprobe -r kvm_amd && sudo modprobe kvm_amd # AMD +``` + +### Issue 9: File Ownership Problems in libvirt Images Directory + +**Symptoms:** + +```text +Error: virError(Code=1, Domain=10, Message='internal error: +process exited while connecting to monitor: ... Permission denied') +``` + +**Cause:** +Sometimes libvirt downloads or creates VM images with incorrect ownership +(root:root instead of libvirt-qemu:libvirt), causing permission errors +when trying to start VMs. + +**Solutions:** + +```bash +# Quick fix using Makefile +make fix-libvirt + +# Manual fix +sudo chown -R libvirt-qemu:libvirt /var/lib/libvirt/images/ +sudo chmod -R 755 /var/lib/libvirt/images/ +sudo systemctl restart libvirtd + +# Verify ownership is correct +ls -la /var/lib/libvirt/images/ +``` + +**Prevention:** +The `make apply` command now automatically fixes these permissions before +deploying VMs. + +### Issue 10: AppArmor Permission Denied Errors + +**Symptoms:** + +```text +Could not open '/path/to/file.qcow2': Permission denied +error creating libvirt domain: internal error: process exited while +connecting to monitor +``` + +**Cause:** + +AppArmor security policies restrict libvirt-qemu access to storage +directories. This is a common issue with the terraform-provider-libvirt +when using custom storage locations. + +**Reference:** [terraform-provider-libvirt Issue #1163](https://github.com/dmacvicar/terraform-provider-libvirt/issues/1163) + +**Solutions:** + +```bash +# Quick fix using our automated script +make fix-libvirt + +# Manual fix - Create AppArmor override +sudo mkdir -p /etc/apparmor.d/abstractions/libvirt-qemu.d + +sudo tee /etc/apparmor.d/abstractions/libvirt-qemu.d/override << 'EOF' +# AppArmor override for libvirt-qemu storage access +# Fixes terraform-provider-libvirt permission issues + +# Allow access to default libvirt images directory +/var/lib/libvirt/images/** rwk, + +# Allow access to user-specific libvirt storage +/home/*/libvirt/images/** rwk, +EOF + +# Restart AppArmor to apply changes +sudo systemctl restart apparmor + +# Ensure parent directories have execute permissions +chmod o+x /home/$USER +chmod o+x /home/$USER/libvirt +``` + +**Prevention:** + +Our setup scripts automatically create this override to prevent the issue. + +## 🚀 Quick Fix Commands + +### Automated Fix + +```bash +# Use our automated fixer +make fix-libvirt + +# Then logout/login or use: +newgrp libvirt + +# Verify the fix +make check-libvirt +``` + +### Manual Fix Script + +```bash +#!/bin/bash +# Quick libvirt fix script + +echo "Fixing libvirt setup..." + +# Install packages (Ubuntu/Debian) +sudo apt update +sudo apt install -y qemu-kvm libvirt-daemon-system libvirt-clients bridge-utils + +# Add user to groups +sudo usermod -aG libvirt $USER +sudo usermod -aG kvm $USER + +# Start services +sudo systemctl enable libvirtd +sudo systemctl start libvirtd + +# Start default network +sudo virsh net-start default 2>/dev/null || true +sudo virsh net-autostart default 2>/dev/null || true + +echo "Fix completed! Please log out and log back in." +``` + +## 📋 Diagnostic Commands + +```bash +# System information +uname -a +lscpu | grep Virtualization + +# Service status +sudo systemctl status libvirtd +sudo systemctl is-enabled libvirtd + +# User groups +groups +id $USER + +# libvirt version and connection +virsh version +virsh uri +virsh capabilities | head -20 + +# Network information +virsh net-list --all +ip link show virbr0 + +# VM information +virsh list --all +virsh pool-list --all + +# DHCP lease information +virsh net-dhcp-leases default + +# Log analysis +sudo journalctl -u libvirtd --since "1 hour ago" +``` + +## 🌐 Understanding DHCP Lease Behavior + +### Why Multiple DHCP Leases Appear + +When you run `virsh net-dhcp-leases default`, you might see multiple entries +even if you're only using one VM. This is **normal behavior** for the +following reasons: + +1. **Lease Persistence**: DHCP leases don't immediately disappear when VMs are + destroyed - they persist until their natural expiry time +2. **VM Lifecycle**: Each time you run `make apply` and `make destroy`, a new + VM is created with a different MAC address +3. **Unique MAC Addresses**: Each VM deployment gets a fresh MAC address, + creating a new DHCP lease entry + +### Example Output Explanation + +```console +$ virsh net-dhcp-leases default + Expiry Time MAC address Protocol IP address +-------------------------------------------------------------------------------- + 2025-07-04 18:03:22 52:54:00:02:c2:8b ipv4 192.168.122.51/24 + 2025-07-04 18:12:03 52:54:00:76:41:06 ipv4 192.168.122.172/24 + 2025-07-04 17:57:29 52:54:00:ea:19:a4 ipv4 192.168.122.161/24 +``` + +In this example: + +- **192.168.122.172** - Current active VM (has hostname "torrust-tracker-demo") +- **192.168.122.51** and **192.168.122.161** - Previous VM deployments + (expired/inactive) + +### Verifying Active VMs + +To see which VMs are actually running: + +```bash +# Check running VMs +virsh list --all + +# Check if any VMs are consuming the leased IPs +ping -c 1 192.168.122.172 # Should respond if VM is active +ping -c 1 192.168.122.51 # Should timeout if VM is destroyed +``` + +### DHCP Lease Cleanup + +- **Automatic**: Leases automatically expire based on their expiry time +- **Manual**: You can restart the libvirt network to clear expired leases: + +```bash +# Restart default network to clean up expired leases +virsh net-destroy default +virsh net-start default +``` + +**Note**: This will interrupt network connectivity for running VMs, so only do +this when no VMs are active. + +### Impact on Development + +This behavior **does not affect** your development workflow: + +- New VM deployments get fresh IP addresses +- Old leases don't conflict with new deployments +- The infrastructure works correctly regardless of lease history + +## 🆘 Emergency Reset + +If nothing else works, completely reset libvirt: + +```bash +# WARNING: This will destroy all VMs and networks! + +# Stop all VMs +for vm in $(virsh list --name); do virsh destroy "$vm"; done + +# Stop libvirt +sudo systemctl stop libvirtd + +# Remove configuration (backup first!) +sudo cp -r /etc/libvirt /etc/libvirt.backup +sudo rm -rf /var/lib/libvirt/images/* +sudo rm -rf /var/lib/libvirt/qemu/* + +# Reinstall packages +sudo apt remove --purge libvirt-daemon-system libvirt-clients +sudo apt install libvirt-daemon-system libvirt-clients + +# Restart service +sudo systemctl start libvirtd +sudo systemctl enable libvirtd + +# Recreate default network +sudo virsh net-start default +sudo virsh net-autostart default +``` + +## 📞 Getting Help + +If you're still having issues: + +1. Check system logs: `sudo journalctl -u libvirtd` +2. Verify hardware: `sudo kvm-ok` +3. Check our test output: `make test-prereq` +4. Review libvirt documentation: `man libvirtd` +5. Check Ubuntu/Debian wiki: [KVM/Installation](https://help.ubuntu.com/community/KVM/Installation) + +## 🔍 Related Files + +- `/etc/libvirt/qemu.conf` - QEMU configuration +- `/etc/libvirt/libvirtd.conf` - libvirt daemon configuration +- `/var/log/libvirt/` - libvirt logs +- `/var/lib/libvirt/` - libvirt data directory diff --git a/infrastructure/scripts/comprehensive-libvirt-fix.sh b/infrastructure/scripts/comprehensive-libvirt-fix.sh new file mode 100755 index 0000000..3019822 --- /dev/null +++ b/infrastructure/scripts/comprehensive-libvirt-fix.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Comprehensive libvirt permission fix script +# This script addresses all common libvirt permission issues + +set -euo pipefail + +echo "🔧 Comprehensive libvirt permission fix..." + +# 1. Ensure correct ownership of libvirt directories +echo "1. Fixing libvirt directory ownership..." +sudo chown -R libvirt-qemu:libvirt /var/lib/libvirt/images/ || true +sudo chmod -R 755 /var/lib/libvirt/images/ || true + +# 2. Set proper permissions on qemu directory +echo "2. Fixing qemu configuration directory..." +sudo chown -R libvirt-qemu:kvm /var/lib/libvirt/qemu/ || true +sudo chmod -R 755 /var/lib/libvirt/qemu/ || true + +# 3. Create udev rule to automatically fix ownership for new files +echo "3. Creating udev rule for automatic ownership fix..." +sudo tee /etc/udev/rules.d/99-libvirt-qemu.rules >/dev/null <<'EOF' +# Automatically set correct ownership for libvirt files +ACTION=="add", SUBSYSTEM=="block", KERNEL=="loop*", OWNER="libvirt-qemu", GROUP="libvirt" +ACTION=="add", PATH=="/var/lib/libvirt/images/*", OWNER="libvirt-qemu", GROUP="libvirt" +EOF + +# 4. Update libvirt configuration to use correct user/group +echo "4. Updating libvirt configuration..." +sudo sed -i 's/^#user = "libvirt-qemu"/user = "libvirt-qemu"/' /etc/libvirt/qemu.conf || true +sudo sed -i 's/^#group = "kvm"/group = "kvm"/' /etc/libvirt/qemu.conf || true + +# 5. Update AppArmor profile with proper override (fixes terraform-provider-libvirt issue #1163) +echo "5. Updating AppArmor profile for libvirt..." +# Create AppArmor override directory +sudo mkdir -p /etc/apparmor.d/abstractions/libvirt-qemu.d + +# Create override file with proper permissions for storage directories +sudo tee /etc/apparmor.d/abstractions/libvirt-qemu.d/override >/dev/null <<'EOF' +# AppArmor override for libvirt-qemu to access custom storage locations +# This fixes permission denied errors with terraform-provider-libvirt +# See: https://github.com/dmacvicar/terraform-provider-libvirt/issues/1163 + +# Allow access to default libvirt images directory +/var/lib/libvirt/images/** rwk, + +# Allow access to user-specific libvirt storage +/home/*/libvirt/images/** rwk, +EOF + +# Ensure parent directories have execute permissions for libvirt-qemu user +chmod o+x /home/*/libvirt 2>/dev/null || true +chmod o+x /home/* 2>/dev/null || true + +# 6. Restart services +echo "6. Restarting services..." +sudo systemctl reload udev || true +sudo systemctl restart libvirtd || true +sudo systemctl reload apparmor || true + +# 7. Fix any existing files +echo "7. Final ownership fix..." +sudo chown -R libvirt-qemu:libvirt /var/lib/libvirt/images/ || true + +echo "✅ Libvirt permission fix complete!" diff --git a/infrastructure/scripts/fix-volume-permissions.sh b/infrastructure/scripts/fix-volume-permissions.sh new file mode 100755 index 0000000..a515110 --- /dev/null +++ b/infrastructure/scripts/fix-volume-permissions.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Fix libvirt volume permissions after creation +# This script is called by OpenTofu after creating volumes + +set -euo pipefail + +echo "Fixing libvirt volume permissions..." + +# Fix ownership of all files in libvirt images directory +sudo chown -R libvirt-qemu:libvirt /var/lib/libvirt/images/ 2>/dev/null || true +sudo chmod -R 755 /var/lib/libvirt/images/ 2>/dev/null || true + +# Also fix qemu directory +sudo chown -R libvirt-qemu:kvm /var/lib/libvirt/qemu/ 2>/dev/null || true + +echo "✓ Volume permissions fixed" diff --git a/infrastructure/scripts/monitor-cloud-init.sh b/infrastructure/scripts/monitor-cloud-init.sh new file mode 100755 index 0000000..ec53e7e --- /dev/null +++ b/infrastructure/scripts/monitor-cloud-init.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Monitor cloud-init progress for Torrust Tracker Demo VM + +VM_NAME="torrust-tracker-demo" +SSH_KEY_PATH="$HOME/.ssh/torrust_rsa" +echo "🔍 Monitoring cloud-init progress for $VM_NAME" +echo "Press Ctrl+C to stop monitoring" +echo "" + +# Function to try SSH connection and get cloud-init status +check_cloud_init() { + local ip=$1 + timeout 5 ssh -o ConnectTimeout=2 -o StrictHostKeyChecking=no -o BatchMode=yes \ + -i "$SSH_KEY_PATH" torrust@"$ip" \ + "sudo cloud-init status --long" 2>/dev/null +} + +# Function to try SSH connection and get cloud-init logs +get_cloud_init_logs() { + local ip=$1 + timeout 10 ssh -o ConnectTimeout=2 -o StrictHostKeyChecking=no -o BatchMode=yes \ + -i "$SSH_KEY_PATH" torrust@"$ip" \ + "sudo tail -f /var/log/cloud-init-output.log" 2>/dev/null +} + +counter=0 +while true; do + counter=$((counter + 1)) + echo "--- Check #$counter at $(date) ---" + + # Check VM state + vm_state=$(virsh domstate $VM_NAME 2>/dev/null) + echo "VM State: $vm_state" + + # Try to get IP address + ip=$(virsh domifaddr $VM_NAME 2>/dev/null | grep -E "192\.168\.122\.[0-9]+" | awk '{print $4}' | cut -d'/' -f1) + + if [ -n "$ip" ]; then + echo "VM IP: $ip" + echo "Checking cloud-init status..." + + if cloud_init_status=$(check_cloud_init "$ip"); then + echo "$cloud_init_status" + + if echo "$cloud_init_status" | grep -q "status: done"; then + echo "🎉 Cloud-init completed!" + echo "You can now connect: ssh -i $SSH_KEY_PATH torrust@$ip" + break + elif echo "$cloud_init_status" | grep -q "status: running"; then + echo "📦 Cloud-init is running... Getting live logs:" + get_cloud_init_logs "$ip" + fi + else + echo "⏳ SSH not ready yet, cloud-init may still be running..." + fi + else + echo "⏳ No IP address yet..." + # Check DHCP leases + virsh net-dhcp-leases default 2>/dev/null | grep -v "Expiry Time" | grep -v "^$" | head -5 + fi + + echo "" + sleep 10 +done diff --git a/infrastructure/scripts/setup-user-libvirt.sh b/infrastructure/scripts/setup-user-libvirt.sh new file mode 100755 index 0000000..d45b320 --- /dev/null +++ b/infrastructure/scripts/setup-user-libvirt.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Alternative libvirt setup that avoids permission issues +# This script configures libvirt to work with root permissions temporarily + +set -euo pipefail + +echo "🔧 Setting up alternative libvirt configuration..." + +# 1. Create alternative storage pool in user directory +STORAGE_DIR="/home/$USER/libvirt/images" +echo "1. Creating alternative storage directory: $STORAGE_DIR" +mkdir -p "$STORAGE_DIR" +chmod 755 "$STORAGE_DIR" + +# 2. Define alternative storage pool +echo "2. Setting up alternative storage pool..." +if ! virsh pool-list --all | grep -q "user-default"; then + cat >/tmp/user-pool.xml < + user-default + + $STORAGE_DIR + + 755 + $(id -u) + $(id -g) + + + +EOF + + virsh pool-define /tmp/user-pool.xml + virsh pool-autostart user-default + virsh pool-start user-default + rm /tmp/user-pool.xml + echo " ✓ User storage pool created" +else + echo " ✓ User storage pool already exists" +fi + +# 3. Update libvirt to run as current user for local testing +echo "3. Updating libvirt configuration for local testing..." +sudo sed -i "s/^user = \"libvirt-qemu\"/user = \"$USER\"/" /etc/libvirt/qemu.conf || true +sudo sed -i "s/^group = \"kvm\"/group = \"$(id -gn)\"/" /etc/libvirt/qemu.conf || true + +# 4. Restart libvirt +echo "4. Restarting libvirt..." +sudo systemctl restart libvirtd + +echo "✅ Alternative libvirt configuration complete!" +echo "Storage pool 'user-default' is available at: $STORAGE_DIR" diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf new file mode 100644 index 0000000..8aee128 --- /dev/null +++ b/infrastructure/terraform/main.tf @@ -0,0 +1,153 @@ +# Torrust Tracker Demo - Local Testing Infrastructure +# OpenTofu configuration for KVM/libvirt local testing + +terraform { + required_version = ">= 1.0" + required_providers { + libvirt = { + source = "dmacvicar/libvirt" + version = "~> 0.7" + } + } +} + +# Configure the libvirt provider +provider "libvirt" { + uri = "qemu:///system" +} + +# Variables +variable "use_minimal_config" { + description = "Use minimal cloud-init configuration for debugging" + type = bool + default = false +} + +variable "ssh_public_key" { + description = "SSH public key for VM access" + type = string + default = "" +} + +variable "vm_name" { + description = "Name of the virtual machine" + type = string + default = "torrust-tracker-demo" +} + +variable "vm_memory" { + description = "Memory allocation for VM in MB" + type = number + default = 2048 +} + +variable "vm_vcpus" { + description = "Number of vCPUs for the VM" + type = number + default = 2 +} + +variable "vm_disk_size" { + description = "Disk size in GB" + type = number + default = 20 +} + +variable "base_image_url" { + description = "URL for the base Ubuntu cloud image" + type = string + default = "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img" +} + +# Download Ubuntu cloud image +resource "libvirt_volume" "base_image" { + name = "ubuntu-24.04-base.qcow2" + source = var.base_image_url + format = "qcow2" + pool = "user-default" + + # Fix permissions after creation + provisioner "local-exec" { + command = "${path.module}/../scripts/fix-volume-permissions.sh" + } +} + +# Create a volume for the VM based on the base image +resource "libvirt_volume" "vm_disk" { + name = "${var.vm_name}.qcow2" + base_volume_id = libvirt_volume.base_image.id + size = var.vm_disk_size * 1024 * 1024 * 1024 # Convert GB to bytes + pool = "user-default" + + # Fix permissions after creation + provisioner "local-exec" { + command = "${path.module}/../scripts/fix-volume-permissions.sh" + } +} + +# Create cloud-init disk +resource "libvirt_cloudinit_disk" "commoninit" { + name = "${var.vm_name}-cloudinit.iso" + user_data = templatefile("${path.module}/../cloud-init/${var.use_minimal_config ? "user-data-minimal.yaml.tpl" : "user-data.yaml.tpl"}", { + ssh_public_key = var.ssh_public_key + }) + meta_data = templatefile("${path.module}/../cloud-init/meta-data.yaml", { + hostname = var.vm_name + }) + network_config = file("${path.module}/../cloud-init/network-config.yaml") + pool = "user-default" +} + +# Create the VM +resource "libvirt_domain" "vm" { + name = var.vm_name + memory = var.vm_memory + vcpu = var.vm_vcpus + + cloudinit = libvirt_cloudinit_disk.commoninit.id + + + + disk { + volume_id = libvirt_volume.vm_disk.id + } + + network_interface { + network_name = "default" + wait_for_lease = false + } + + # Console for debugging + console { + type = "pty" + target_port = "0" + target_type = "serial" + } + + graphics { + type = "spice" + listen_type = "address" + autoport = true + } + + # Boot configuration + boot_device { + dev = ["hd", "network"] + } +} + +# Output the VM's IP address +output "vm_ip" { + value = length(libvirt_domain.vm.network_interface[0].addresses) > 0 ? libvirt_domain.vm.network_interface[0].addresses[0] : "No IP assigned yet" + description = "IP address of the created VM" +} + +output "vm_name" { + value = libvirt_domain.vm.name + description = "Name of the created VM" +} + +output "connection_info" { + value = length(libvirt_domain.vm.network_interface[0].addresses) > 0 ? "SSH to VM: ssh torrust@${libvirt_domain.vm.network_interface[0].addresses[0]}" : "VM created, waiting for IP address..." + description = "SSH connection command" +} diff --git a/infrastructure/terraform/terraform.tfvars.example b/infrastructure/terraform/terraform.tfvars.example new file mode 100644 index 0000000..bd83659 --- /dev/null +++ b/infrastructure/terraform/terraform.tfvars.example @@ -0,0 +1,17 @@ +# Example OpenTofu variables for Torrust Tracker Demo +# Copy this file to terraform.tfvars and customize as needed + +# VM Configuration +vm_name = "torrust-tracker-demo" +vm_memory = 2048 # MB - Minimum 2GB recommended +vm_vcpus = 2 # Number of CPU cores +vm_disk_size = 20 # GB - Disk size + +# Alternative configuration for more resources +# vm_memory = 4096 # 4GB RAM for better performance +# vm_vcpus = 4 # 4 CPU cores +# vm_disk_size = 30 # 30GB disk + +# Base image (Ubuntu 24.04 LTS) +# You can override this to use a different Ubuntu version +# base_image_url = "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img" diff --git a/infrastructure/tests/test-integration.sh b/infrastructure/tests/test-integration.sh new file mode 100755 index 0000000..b53299b --- /dev/null +++ b/infrastructure/tests/test-integration.sh @@ -0,0 +1,399 @@ +#!/bin/bash +# Integration test script for Torrust Tracker deployment +# Tests the complete deployment workflow in the VM + +set -euo pipefail + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +TERRAFORM_DIR="${PROJECT_ROOT}/infrastructure/terraform" +TEST_LOG_FILE="/tmp/torrust-integration-test.log" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging function +log() { + echo -e "$1" | tee -a "${TEST_LOG_FILE}" +} + +log_info() { + log "${BLUE}[INFO]${NC} $1" +} + +log_success() { + log "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + log "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + log "${RED}[ERROR]${NC} $1" +} + +# Get VM IP from Terraform output +get_vm_ip() { + cd "${TERRAFORM_DIR}" + local vm_ip + vm_ip=$(tofu output -raw vm_ip 2>/dev/null || echo "") + + if [ -z "${vm_ip}" ]; then + log_error "Could not get VM IP from OpenTofu output" + return 1 + fi + + echo "${vm_ip}" +} + +# Execute command on VM via SSH +vm_exec() { + local vm_ip="$1" + local command="$2" + local description="${3:-}" + + if [ -n "${description}" ]; then + log_info "${description}" + fi + + ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 torrust@"${vm_ip}" "${command}" +} + +# Detect which Docker Compose command is available +get_docker_compose_cmd() { + local vm_ip="$1" + + if vm_exec "${vm_ip}" "docker compose version >/dev/null 2>&1" ""; then + echo "docker compose" + elif vm_exec "${vm_ip}" "docker-compose --version >/dev/null 2>&1" ""; then + echo "docker-compose" + else + echo "" + fi +} + +# Test VM is accessible +test_vm_access() { + log_info "Testing VM access..." + + local vm_ip + vm_ip=$(get_vm_ip) + + if vm_exec "${vm_ip}" "echo 'VM is accessible'" "Checking SSH connectivity"; then + log_success "VM is accessible at ${vm_ip}" + return 0 + else + log_error "Cannot access VM" + return 1 + fi +} + +# Test Docker is working +test_docker() { + log_info "Testing Docker installation..." + + local vm_ip + vm_ip=$(get_vm_ip) + + if vm_exec "${vm_ip}" "docker --version" "Checking Docker version"; then + log_success "Docker is installed and working" + else + log_error "Docker is not working" + return 1 + fi + + # Check Docker Compose (try V2 plugin first, then fallback to standalone) + if vm_exec "${vm_ip}" "docker compose version" "Checking Docker Compose V2 plugin"; then + log_success "Docker Compose V2 plugin is available" + elif vm_exec "${vm_ip}" "docker-compose --version" "Checking Docker Compose standalone"; then + log_success "Docker Compose standalone is available" + log_warning "Using standalone docker-compose. Consider upgrading to Docker Compose V2 plugin for full compatibility." + else + log_error "Docker Compose is not working" + return 1 + fi + + return 0 +} + +# Clone and setup Torrust Tracker Demo +setup_torrust_tracker() { + log_info "Setting up Torrust Tracker Demo..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Check if already cloned + if vm_exec "${vm_ip}" "test -d /home/torrust/github/torrust/torrust-tracker-demo" "Checking if repo exists"; then + log_info "Repository already exists, updating..." + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && git pull" "Updating repository" + else + log_info "Cloning repository..." + vm_exec "${vm_ip}" "mkdir -p /home/torrust/github/torrust" "Creating directory structure" + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust && git clone https://github.com/torrust/torrust-tracker-demo.git" "Cloning repository" + fi + + # Setup environment file + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && cp .env.production .env" "Setting up environment file" + + log_success "Torrust Tracker Demo setup completed" + return 0 +} + +# Start Torrust Tracker services +start_tracker_services() { + log_info "Starting Torrust Tracker services..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Detect which Docker Compose command to use + local compose_cmd + compose_cmd=$(get_docker_compose_cmd "${vm_ip}") + + if [ -z "${compose_cmd}" ]; then + log_error "Docker Compose is not available" + return 1 + fi + + log_info "Using Docker Compose command: ${compose_cmd}" + + # Pull latest images + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && ${compose_cmd} pull" "Pulling Docker images" + + # Start services + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && ${compose_cmd} up -d" "Starting services" + + # Wait for services to be ready + log_info "Waiting for services to be ready..." + sleep 30 + + # Check service status + if vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && ${compose_cmd} ps" "Checking service status"; then + log_success "Services started successfully" + else + log_error "Services failed to start properly" + return 1 + fi + + return 0 +} + +# Test Torrust Tracker endpoints +test_tracker_endpoints() { + log_info "Testing Torrust Tracker endpoints..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Test HTTP API endpoint + log_info "Testing HTTP API endpoint..." + if vm_exec "${vm_ip}" "curl -f -s http://localhost:7070/api/v1/stats" "Checking HTTP API"; then + log_success "HTTP API is responding" + else + log_error "HTTP API is not responding" + return 1 + fi + + # Test metrics endpoint + log_info "Testing metrics endpoint..." + if vm_exec "${vm_ip}" "curl -f -s http://localhost:1212/metrics" "Checking metrics endpoint"; then + log_success "Metrics endpoint is responding" + else + log_error "Metrics endpoint is not responding" + return 1 + fi + + # Test if UDP ports are listening + log_info "Testing UDP tracker ports..." + if vm_exec "${vm_ip}" "ss -ul | grep -E ':6868|:6969'" "Checking UDP ports"; then + log_success "UDP tracker ports are listening" + else + log_warning "UDP tracker ports might not be listening (this is expected if no peers are connected)" + fi + + return 0 +} + +# Test monitoring services +test_monitoring() { + log_info "Testing monitoring services..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Test Prometheus + log_info "Testing Prometheus..." + if vm_exec "${vm_ip}" "curl -f -s http://localhost:9090/-/healthy" "Checking Prometheus health"; then + log_success "Prometheus is healthy" + else + log_error "Prometheus is not healthy" + return 1 + fi + + # Test Grafana + log_info "Testing Grafana..." + if vm_exec "${vm_ip}" "curl -f -s http://localhost:3100/api/health" "Checking Grafana health"; then + log_success "Grafana is healthy" + else + log_error "Grafana is not healthy" + return 1 + fi + + return 0 +} + +# Collect logs for debugging +collect_logs() { + log_info "Collecting logs for debugging..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Docker logs + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && docker compose logs --tail=50" "Collecting Docker logs" + + # System logs + vm_exec "${vm_ip}" "sudo journalctl --since='1 hour ago' --no-pager | tail -50" "Collecting system logs" + + return 0 +} + +# Stop services +stop_services() { + log_info "Stopping Torrust Tracker services..." + + local vm_ip + vm_ip=$(get_vm_ip) + + # Detect which Docker Compose command to use + local compose_cmd + compose_cmd=$(get_docker_compose_cmd "${vm_ip}") + + if [ -n "${compose_cmd}" ]; then + vm_exec "${vm_ip}" "cd /home/torrust/github/torrust/torrust-tracker-demo && ${compose_cmd} down" "Stopping services" + else + log_warning "Docker Compose not available, cannot stop services" + fi + + log_success "Services stopped" + return 0 +} + +# Run full integration test +run_integration_test() { + log_info "Starting Torrust Tracker integration test..." + echo "Test started at: $(date)" >"${TEST_LOG_FILE}" + + local failed=0 + + test_vm_access || failed=1 + + if [ ${failed} -eq 0 ]; then + test_docker || failed=1 + setup_torrust_tracker || failed=1 + start_tracker_services || failed=1 + test_tracker_endpoints || failed=1 + test_monitoring || failed=1 + fi + + # Always collect logs if there were failures + if [ ${failed} -ne 0 ]; then + log_warning "Test failed, collecting logs for debugging..." + collect_logs || true + fi + + # Always try to stop services + stop_services || log_warning "Failed to stop services cleanly" + + if [ ${failed} -eq 0 ]; then + log_success "All integration tests passed!" + return 0 + else + log_error "Integration tests failed. Check ${TEST_LOG_FILE} for details." + return 1 + fi +} + +# Help function +show_help() { + cat </dev/null 2>&1; then + log_success "OpenTofu is installed: $(tofu version | head -n1)" + else + log_error "OpenTofu is not installed" + return 1 + fi + + # Check if libvirt is installed and running + if systemctl is-active --quiet libvirtd; then + log_success "libvirtd service is running" + else + log_error "libvirtd service is not running. Run: sudo systemctl start libvirtd" + return 1 + fi + + # Check if user can access libvirt + if virsh list >/dev/null 2>&1; then + log_success "User has libvirt access" + elif sudo virsh list >/dev/null 2>&1; then + log_warning "User can access libvirt with sudo (group membership may need refresh)" + log_info "To fix this, run one of the following:" + log_info " 1. Log out and log back in" + log_info " 2. Run: newgrp libvirt" + log_info " 3. Run: exec su -l \$USER" + log_info "For now, we'll continue with sudo access..." + export LIBVIRT_NEEDS_SUDO=1 + else + log_error "User cannot access libvirt even with sudo" + log_error "Please check if libvirt is properly installed:" + log_error " sudo systemctl status libvirtd" + log_error " sudo apt install qemu-kvm libvirt-daemon-system libvirt-clients" + return 1 + fi + + # Check if default network exists and is active + local net_check_cmd="virsh net-list --all" + if [ "${LIBVIRT_NEEDS_SUDO:-}" = "1" ]; then + net_check_cmd="sudo $net_check_cmd" + fi + + if $net_check_cmd | grep -q "default.*active"; then + log_success "Default libvirt network is active" + elif $net_check_cmd | grep -q "default"; then + log_warning "Default network exists but is not active, attempting to start..." + local start_cmd="virsh net-start default && virsh net-autostart default" + if [ "${LIBVIRT_NEEDS_SUDO:-}" = "1" ]; then + start_cmd="sudo $start_cmd" + fi + if eval "$start_cmd"; then + log_success "Default network started successfully" + else + log_error "Failed to start default network" + return 1 + fi + else + log_error "Default libvirt network does not exist" + log_error "This is unusual and may indicate a problem with libvirt installation" + return 1 + fi + + # Check KVM support + if [ -r /dev/kvm ]; then + log_success "KVM support available" + else + log_error "KVM support not available" + return 1 + fi + + # Check if default storage pool exists and is active + local pool_check_cmd="virsh pool-list --all" + if [ "${LIBVIRT_NEEDS_SUDO:-}" = "1" ]; then + pool_check_cmd="sudo $pool_check_cmd" + fi + + if $pool_check_cmd | grep -q "default.*active"; then + log_success "Default storage pool is active" + elif $pool_check_cmd | grep -q "default"; then + log_warning "Default storage pool exists but is not active, attempting to start..." + local start_pool_cmd="virsh pool-start default" + if [ "${LIBVIRT_NEEDS_SUDO:-}" = "1" ]; then + start_pool_cmd="sudo $start_pool_cmd" + fi + if eval "$start_pool_cmd"; then + log_success "Default storage pool started successfully" + else + log_error "Failed to start default storage pool" + return 1 + fi + else + log_warning "Default storage pool does not exist, creating it..." + local create_pool_cmd="virsh pool-define-as default dir --target /var/lib/libvirt/images && virsh pool-autostart default && virsh pool-start default" + if [ "${LIBVIRT_NEEDS_SUDO:-}" = "1" ]; then + create_pool_cmd="sudo $create_pool_cmd" + fi + if eval "$create_pool_cmd"; then + log_success "Default storage pool created successfully" + else + log_error "Failed to create default storage pool" + return 1 + fi + fi + + # Check libvirt images directory permissions + if [ -d "/var/lib/libvirt/images" ]; then + local images_owner + images_owner=$(stat -c "%U:%G" /var/lib/libvirt/images 2>/dev/null || echo "unknown:unknown") + if [ "$images_owner" = "libvirt-qemu:libvirt" ]; then + log_success "libvirt images directory has correct ownership" + else + log_warning "libvirt images directory ownership needs fixing (currently: $images_owner)" + log_info "Run 'make fix-libvirt' to fix this automatically" + fi + fi + + return 0 +} + +test_terraform_syntax() { + log_info "Testing OpenTofu configuration syntax..." + + cd "${TERRAFORM_DIR}" + + # Initialize if needed + if [ ! -d ".terraform" ]; then + log_info "Initializing OpenTofu..." + if tofu init; then + log_success "OpenTofu initialization successful" + else + log_error "OpenTofu initialization failed" + return 1 + fi + fi + + # Validate configuration + if tofu validate; then + log_success "OpenTofu configuration is valid" + else + log_error "OpenTofu configuration validation failed" + return 1 + fi + + # Plan (dry run) - only if libvirt is available and not in CI + if [ "${CI:-}" = "true" ]; then + log_info "CI environment detected, skipping OpenTofu plan (requires libvirt)" + log_success "OpenTofu syntax validation completed for CI" + elif [ -S "/var/run/libvirt/libvirt-sock" ]; then + if tofu plan -out=test.tfplan >/dev/null 2>&1; then + log_success "OpenTofu plan successful" + rm -f test.tfplan + else + log_error "OpenTofu plan failed" + return 1 + fi + else + log_warning "libvirt not available, skipping OpenTofu plan" + log_success "OpenTofu syntax validation completed" + fi + + return 0 +} + +test_cloud_init_syntax() { + log_info "Testing cloud-init configuration syntax..." + + local cloud_init_dir="${PROJECT_ROOT}/infrastructure/cloud-init" + + # Check if cloud-init files exist + local required_files=("user-data.yaml.tpl" "user-data-minimal.yaml.tpl" "meta-data.yaml" "network-config.yaml") + for file in "${required_files[@]}"; do + if [ -f "${cloud_init_dir}/${file}" ]; then + log_success "Found ${file}" + else + log_error "Missing ${file}" + return 1 + fi + done + + # Validate YAML syntax (if yamllint is available) + if command -v yamllint >/dev/null 2>&1; then + # Test static YAML files + for file in meta-data.yaml network-config.yaml; do + if yamllint -c "${PROJECT_ROOT}/.yamllint-ci.yml" "${cloud_init_dir}/${file}" >/dev/null 2>&1; then + log_success "${file} YAML syntax is valid" + else + log_warning "${file} YAML syntax check failed (continuing anyway)" + fi + done + + # Test template files by substituting variables + local temp_dir="/tmp/torrust-cloud-init-test" + mkdir -p "${temp_dir}" + + for template in user-data.yaml.tpl user-data-minimal.yaml.tpl; do + local test_file="${temp_dir}/${template%.tpl}" + # Substitute template variables with dummy values for syntax testing + sed "s/\\\${ssh_public_key}/ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC/" "${cloud_init_dir}/${template}" >"${test_file}" + + if yamllint -c "${PROJECT_ROOT}/.yamllint-ci.yml" "${test_file}" >/dev/null 2>&1; then + log_success "${template} YAML syntax is valid (after variable substitution)" + else + log_warning "${template} YAML syntax check failed (continuing anyway)" + fi + done + + # Cleanup + rm -rf "${temp_dir}" + else + log_warning "yamllint not available, skipping YAML syntax validation" + fi + + return 0 +} + +deploy_vm() { + log_info "Deploying test VM..." + + cd "${TERRAFORM_DIR}" + + # Apply configuration + if tofu apply -auto-approve; then + log_success "VM deployment successful" + return 0 + else + log_error "VM deployment failed" + return 1 + fi +} + +test_vm_connectivity() { + log_info "Testing VM connectivity..." + + cd "${TERRAFORM_DIR}" + + # Get VM IP from Terraform output + local vm_ip + vm_ip=$(tofu output -raw vm_ip 2>/dev/null || echo "") + + if [ -z "${vm_ip}" ]; then + log_error "Could not get VM IP from OpenTofu output" + return 1 + fi + + log_info "VM IP: ${vm_ip}" + + # Wait for VM to be ready (cloud-init can take time) + log_info "Waiting for VM to be ready (this may take a few minutes)..." + local max_attempts=30 + local attempt=1 + + while [ ${attempt} -le ${max_attempts} ]; do + if ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o BatchMode=yes torrust@"${vm_ip}" "echo 'VM is ready'" >/dev/null 2>&1; then + log_success "VM is accessible via SSH" + break + fi + + log_info "Attempt ${attempt}/${max_attempts}: VM not ready yet, waiting..." + sleep 20 + ((attempt++)) + done + + if [ ${attempt} -gt ${max_attempts} ]; then + log_error "VM did not become accessible within expected time" + return 1 + fi + + return 0 +} + +test_vm_services() { + log_info "Testing VM services..." + + cd "${TERRAFORM_DIR}" + local vm_ip + vm_ip=$(tofu output -raw vm_ip) + + # Test Docker installation + if ssh -o StrictHostKeyChecking=no torrust@"${vm_ip}" "docker --version" >/dev/null 2>&1; then + log_success "Docker is installed and accessible" + else + log_error "Docker is not working" + return 1 + fi + + # Test UFW status + if ssh -o StrictHostKeyChecking=no torrust@"${vm_ip}" "sudo ufw status" | grep -q "Status: active"; then + log_success "UFW firewall is active" + else + log_error "UFW firewall is not active" + return 1 + fi + + # Test if required ports are open + local required_ports=("22" "80" "443" "6868" "6969" "7070" "1212") + for port in "${required_ports[@]}"; do + if ssh -o StrictHostKeyChecking=no torrust@"${vm_ip}" "sudo ufw status numbered" | grep -q "${port}"; then + log_success "Port ${port} is configured in UFW" + else + log_warning "Port ${port} might not be configured in UFW" + fi + done + + return 0 +} + +cleanup_vm() { + log_info "Cleaning up test VM..." + + cd "${TERRAFORM_DIR}" + + if tofu destroy -auto-approve; then + log_success "VM cleanup successful" + else + log_error "VM cleanup failed" + return 1 + fi + + return 0 +} + +run_full_test() { + log_info "Starting full infrastructure test..." + echo "Test started at: $(date)" >"${TEST_LOG_FILE}" + + local failed=0 + + test_prerequisites || failed=1 + test_terraform_syntax || failed=1 + test_cloud_init_syntax || failed=1 + + if [ ${failed} -eq 0 ]; then + deploy_vm || failed=1 + + if [ ${failed} -eq 0 ]; then + test_vm_connectivity || failed=1 + test_vm_services || failed=1 + fi + + # Always try to cleanup + cleanup_vm || log_warning "Cleanup failed, manual cleanup may be required" + fi + + if [ ${failed} -eq 0 ]; then + log_success "All tests passed!" + return 0 + else + log_error "Some tests failed. Check ${TEST_LOG_FILE} for details." + return 1 + fi +} + +# Help function +show_help() { + cat </dev/null 2>&1 +} + +# Function to run yamllint +run_yamllint() { + print_status "INFO" "Running yamllint on YAML files..." + + if ! command_exists yamllint; then + print_status "ERROR" "yamllint not found. Install with: sudo apt-get install yamllint" + return 1 + fi + + # Use yamllint config if it exists + if [ -f ".yamllint-ci.yml" ]; then + if yamllint -c .yamllint-ci.yml .; then + print_status "SUCCESS" "yamllint passed" + return 0 + else + print_status "ERROR" "yamllint failed" + return 1 + fi + else + if yamllint .; then + print_status "SUCCESS" "yamllint passed" + return 0 + else + print_status "ERROR" "yamllint failed" + return 1 + fi + fi +} + +# Function to run ShellCheck +run_shellcheck() { + print_status "INFO" "Running ShellCheck on shell scripts..." + + if ! command_exists shellcheck; then + print_status "ERROR" "shellcheck not found. Install with: sudo apt-get install shellcheck" + return 1 + fi + + # Use glob pattern to find shell scripts, excluding .git and .terraform directories + # Enable globstar for ** patterns + shopt -s globstar nullglob + + # Find shell scripts with common extensions + shell_files=() + for pattern in "**/*.sh" "**/*.bash"; do + for file in $pattern; do + # Skip files in .git and .terraform directories + if [[ "$file" != *".git"* && "$file" != *".terraform"* ]]; then + shell_files+=("$file") + fi + done + done + + if [ ${#shell_files[@]} -eq 0 ]; then + print_status "WARNING" "No shell scripts found" + return 0 + fi + + if shellcheck "${shell_files[@]}"; then + print_status "SUCCESS" "shellcheck passed" + return 0 + else + print_status "ERROR" "shellcheck failed" + return 1 + fi +} + +# Function to run markdownlint +run_markdownlint() { + print_status "INFO" "Running markdownlint on Markdown files..." + + if ! command_exists markdownlint; then + print_status "ERROR" "markdownlint not found. Install with: npm install -g markdownlint-cli" + return 1 + fi + + # Use markdownlint with glob pattern to find markdown files + # markdownlint can handle glob patterns and will exclude .git directories by default + if markdownlint "**/*.md"; then + print_status "SUCCESS" "markdownlint passed" + return 0 + else + print_status "ERROR" "markdownlint failed" + return 1 + fi +} + +# Main function +main() { + print_status "INFO" "Starting linting process..." + + local exit_code=0 + + # Run yamllint + if ! run_yamllint; then + exit_code=1 + fi + + echo "" + + # Run ShellCheck + if ! run_shellcheck; then + exit_code=1 + fi + + echo "" + + # Run markdownlint + if ! run_markdownlint; then + exit_code=1 + fi + + echo "" + + if [ $exit_code -eq 0 ]; then + print_status "SUCCESS" "All linting checks passed!" + else + print_status "ERROR" "Some linting checks failed!" + fi + + return $exit_code +} + +# Show help +show_help() { + cat <