# Terraform Infrastructure as Code Best Practices
## 1. Project Structure & Organization
Organize Terraform code with clear module structure and separation of concerns.
```hcl
# main.tf
terraform {
required_version = ">= 1.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.23"
}
}
backend "s3" {
bucket = "my-terraform-state"
key = "production/terraform.tfstate"
region = "us-west-2"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
provider "aws" {
region = ar.aws_region
default_tags {
tags = {
Environment = ar.environment
Project = ar.project_name
ManagedBy = "terraform"
}
}
}
# Variables
variable "environment" {
description = "Environment name"
type = string
validation {
condition = contains(["dev", "staging", "prod"], ar.environment)
error_message = "Environment must be dev, staging, or prod."
}
}
variable "aws_region" {
description = "AWS region"
type = string
default = "us-west-2"
}
variable "project_name" {
description = "Name of the project"
type = string
}
```
## 2. Module Development
Create reusable modules with proper inputs, outputs, and documentation.
```hcl
# modules/vpc/main.tf
resource "aws_vpc" "main" {
cidr_block = ar.cidr_block
enable_dns_hostnames = true
enable_dns_support = true
tags = merge(ar.tags, {
Name = "${ar.name}-vpc"
})
}
resource "aws_subnet" "public" {
# Resource count configuration
count = length(ar.public_subnets)
vpc_id = aws_vpc.main.id
cidr_block = ar.public_subnets[count.index]
availability_zone = data.aws_availability_zones.available.names[count.index]
map_public_ip_on_launch = true
tags = merge(ar.tags, {
Name = "${ar.name}-public-${count.index + 1}"
Type = "public"
})
}
resource "aws_subnet" "private" {
# Resource count for private subnets
count = length(ar.private_subnets)
vpc_id = aws_vpc.main.id
cidr_block = ar.private_subnets[count.index]
availability_zone = data.aws_availability_zones.available.names[count.index]
tags = merge(ar.tags, {
Name = "${ar.name}-private-${count.index + 1}"
Type = "private"
})
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = merge(ar.tags, {
Name = "${ar.name}-igw"
})
}
resource "aws_nat_gateway" "main" {
# Enable NAT gateway count
count = ar.enable_nat_gateway ? length(ar.public_subnets) : 0
allocation_id = aws_eip.nat[count.index].id
subnet_id = aws_subnet.public[count.index].id
tags = merge(ar.tags, {
Name = "${ar.name}-nat-${count.index + 1}"
})
depends_on = [aws_internet_gateway.main]
}
resource "aws_eip" "nat" {
# Enable NAT gateway count
count = ar.enable_nat_gateway ? length(ar.public_subnets) : 0
domain = "vpc"
tags = merge(ar.tags, {
Name = "${ar.name}-eip-${count.index + 1}"
})
}
data "aws_availability_zones" "available" {
state = "available"
}
# modules/vpc/variables.tf
variable "name" {
description = "Name prefix for VPC resources"
type = string
}
variable "cidr_block" {
description = "CIDR block for VPC"
type = string
validation {
condition = can(cidrhost(ar.cidr_block, 0))
error_message = "CIDR block must be valid."
}
}
variable "public_subnets" {
description = "List of public subnet CIDR blocks"
type = list(string)
default = []
}
variable "private_subnets" {
description = "List of private subnet CIDR blocks"
type = list(string)
default = []
}
variable "enable_nat_gateway" {
description = "Enable NAT Gateway for private subnets"
type = bool
default = true
}
variable "tags" {
description = "Additional tags for resources"
type = map(string)
default = {}
}
# modules/vpc/outputs.tf
output "vpc_id" {
description = "ID of the VPC"
value = aws_vpc.main.id
}
output "vpc_cidr_block" {
description = "CIDR block of the VPC"
value = aws_vpc.main.cidr_block
}
output "public_subnet_ids" {
description = "IDs of the public subnets"
value = aws_subnet.public[*].id
}
output "private_subnet_ids" {
description = "IDs of the private subnets"
value = aws_subnet.private[*].id
}
output "internet_gateway_id" {
description = "ID of the Internet Gateway"
value = aws_internet_gateway.main.id
}
```
## 3. State Management
Use remote state and implement state locking for team collaboration.
```hcl
# backend.tf
terraform {
backend "s3" {
bucket = "company-terraform-state"
key = "environments/production/terraform.tfstate"
region = "us-west-2"
encrypt = true
dynamodb_table = "terraform-state-locks"
# Optional: Use role assumption for cross-account access
role_arn = "arn:aws:iam::123456789012:role/TerraformRole"
}
}
# Create S3 bucket for state storage
resource "aws_s3_bucket" "terraform_state" {
bucket = "company-terraform-state"
}
resource "aws_s3_bucket_versioning" "terraform_state" {
bucket = aws_s3_bucket.terraform_state.id
versioning_configuration {
status = "Enabled"
}
}
resource "aws_s3_bucket_encryption" "terraform_state" {
bucket = aws_s3_bucket.terraform_state.id
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
resource "aws_s3_bucket_public_access_block" "terraform_state" {
bucket = aws_s3_bucket.terraform_state.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}
# DynamoDB table for state locking
resource "aws_dynamodb_table" "terraform_locks" {
name = "terraform-state-locks"
billing_mode = "PAY_PER_REQUEST"
hash_key = "LockID"
attribute {
name = "LockID"
type = "S"
}
tags = {
Name = "Terraform State Locks"
}
}
```
## 4. Variable Management
Use locals, data sources, and proper variable handling.
```hcl
# variables.tf
variable "environment" {
description = "Environment name"
type = string
validation {
condition = contains(["dev", "staging", "prod"], ar.environment)
error_message = "Environment must be one of: dev, staging, prod."
}
}
variable "instance_types" {
description = "Map of instance types by environment"
type = map(string)
default = {
dev = "t3.micro"
staging = "t3.small"
prod = "t3.medium"
}
}
variable "database_config" {
description = "Database configuration"
type = object({
engine = string
engine_version = string
instance_class = string
allocated_storage = number
backup_retention_period = number
})
validation {
condition = ar.database_config.allocated_storage >= 20
error_message = "Database storage must be at least 20 GB."
}
}
# locals.tf
locals {
common_tags = {
Environment = ar.environment
Project = ar.project_name
ManagedBy = "terraform"
CreatedAt = timestamp()
}
instance_type = ar.instance_types[ar.environment]
azs = slice(data.aws_availability_zones.available.names, 0, 3)
vpc_cidr = {
dev = "10.0.0.0/16"
staging = "10.1.0.0/16"
prod = "10.2.0.0/16"
}
}
# data.tf
data "aws_availability_zones" "available" {
state = "available"
}
data "aws_ami" "amazon_linux" {
most_recent = true
owners = ["amazon"]
filter {
name = "name"
values = ["amzn2-ami-hvm-*-x86_64-gp2"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
}
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}
```
## 5. Resource Configuration
Follow best practices for resource configuration and naming.
```hcl
# ec2.tf
resource "aws_launch_template" "app" {
name_prefix = "${ar.project_name}-${ar.environment}-"
image_id = data.aws_ami.amazon_linux.id
instance_type = local.instance_type
vpc_security_group_ids = [aws_security_group.app.id]
iam_instance_profile {
name = aws_iam_instance_profile.app.name
}
user_data = base64encode(templatefile("${path.module}/userdata.sh", {
environment = ar.environment
app_version = ar.app_version
}))
tag_specifications {
resource_type = "instance"
tags = merge(local.common_tags, {
Name = "${ar.project_name}-${ar.environment}-app"
})
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_autoscaling_group" "app" {
name = "${ar.project_name}-${ar.environment}-asg"
vpc_zone_identifier = ar.private_subnet_ids
target_group_arns = [aws_lb_target_group.app.arn]
health_check_type = "ELB"
min_size = ar.asg_config.min_size
max_size = ar.asg_config.max_size
desired_capacity = ar.asg_config.desired_capacity
launch_template {
id = aws_launch_template.app.id
version = "$Latest"
}
instance_refresh {
strategy = "Rolling"
preferences {
min_healthy_percentage = 50
}
}
tag {
key = "Name"
value = "${ar.project_name}-${ar.environment}-asg"
propagate_at_launch = false
}
dynamic "tag" {
for_each = local.common_tags
content {
key = tag.key
value = tag.value
propagate_at_launch = true
}
}
}
# Security Groups
resource "aws_security_group" "app" {
name_prefix = "${ar.project_name}-${ar.environment}-app-"
vpc_id = ar.vpc_id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
security_groups = [aws_security_group.alb.id]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, {
Name = "${ar.project_name}-${ar.environment}-app-sg"
})
lifecycle {
create_before_destroy = true
}
}
```
## 6. Output Values
Define comprehensive outputs for module composition.
```hcl
# outputs.tf
output "vpc_id" {
description = "ID of the VPC"
value = module.vpc.vpc_id
}
output "load_balancer_dns" {
description = "DNS name of the load balancer"
value = aws_lb.main.dns_name
}
output "database_endpoint" {
description = "RDS instance endpoint"
value = aws_db_instance.main.endpoint
sensitive = true
}
output "security_group_ids" {
description = "Map of security group IDs"
value = {
app = aws_security_group.app.id
database = aws_security_group.database.id
alb = aws_security_group.alb.id
}
}
output "resource_arns" {
description = "ARNs of created resources"
value = {
s3_bucket = aws_s3_bucket.app_data.arn
kms_key = aws_kms_key.app.arn
iam_role = aws_iam_role.app.arn
target_group = aws_lb_target_group.app.arn
}
}
```
## 7. Environment Management
Use workspaces and environment-specific configurations.
```hcl
# environments/dev/terraform.tfvars
environment = "dev"
project_name = "myapp"
# VPC Configuration
vpc_cidr = "10.0.0.0/16"
public_subnets = [
"10.0.1.0/24",
"10.0.2.0/24"
]
private_subnets = [
"10.0.10.0/24",
"10.0.20.0/24"
]
# Auto Scaling Configuration
asg_config = {
min_size = 1
max_size = 3
desired_capacity = 1
}
# Database Configuration
database_config = {
engine = "postgres"
engine_version = "15.4"
instance_class = "db.t3.micro"
allocated_storage = 20
backup_retention_period = 7
}
# environments/prod/terraform.tfvars
environment = "prod"
project_name = "myapp"
vpc_cidr = "10.2.0.0/16"
public_subnets = [
"10.2.1.0/24",
"10.2.2.0/24",
"10.2.3.0/24"
]
private_subnets = [
"10.2.10.0/24",
"10.2.20.0/24",
"10.2.30.0/24"
]
asg_config = {
min_size = 3
max_size = 10
desired_capacity = 5
}
database_config = {
engine = "postgres"
engine_version = "15.4"
instance_class = "db.r6g.large"
allocated_storage = 100
backup_retention_period = 30
}
```
## 8. Security & Compliance
Implement security best practices and compliance requirements.
```hcl
# security.tf
resource "aws_kms_key" "app" {
description = "KMS key for ${ar.project_name}-${ar.environment}"
deletion_window_in_days = 7
enable_key_rotation = true
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Sid = "Enable IAM User Permissions"
Effect = "Allow"
Principal = {
AWS = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"
}
Action = "kms:*"
Resource = "*"
}
]
})
tags = local.common_tags
}
resource "aws_kms_alias" "app" {
name = "alias/${ar.project_name}-${ar.environment}"
target_key_id = aws_kms_key.app.key_id
}
# S3 Bucket with encryption and versioning
resource "aws_s3_bucket" "app_data" {
bucket = "${ar.project_name}-${ar.environment}-data-${random_id.bucket_suffix.hex}"
}
resource "aws_s3_bucket_versioning" "app_data" {
bucket = aws_s3_bucket.app_data.id
versioning_configuration {
status = "Enabled"
}
}
resource "aws_s3_bucket_encryption" "app_data" {
bucket = aws_s3_bucket.app_data.id
rule {
apply_server_side_encryption_by_default {
kms_master_key_id = aws_kms_key.app.arn
sse_algorithm = "aws:kms"
}
bucket_key_enabled = true
}
}
resource "aws_s3_bucket_public_access_block" "app_data" {
bucket = aws_s3_bucket.app_data.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}
resource "random_id" "bucket_suffix" {
byte_length = 8
}
# IAM Role with least privilege
resource "aws_iam_role" "app" {
name = "${ar.project_name}-${ar.environment}-app-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
}
]
})
tags = local.common_tags
}
resource "aws_iam_policy" "app" {
name = "${ar.project_name}-${ar.environment}-app-policy"
description = "Policy for ${ar.project_name} application"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
]
Resource = "${aws_s3_bucket.app_data.arn}/*"
},
{
Effect = "Allow"
Action = [
"kms:Decrypt",
"kms:DescribeKey"
]
Resource = aws_kms_key.app.arn
}
]
})
}
resource "aws_iam_role_policy_attachment" "app" {
role = aws_iam_role.app.name
policy_arn = aws_iam_policy.app.arn
}
```
## 9. Testing & Validation
Implement testing strategies for infrastructure code.
```hcl
# tests/integration_test.go
package test
import (
"testing"
"github.com/gruntwork-io/terratest/modules/terraform"
"github.com/gruntwork-io/terratest/modules/aws"
"github.com/stretchr/testify/assert"
)
func TestTerraformVPCModule(t *testing.T) {
t.Parallel()
// Configure Terraform options
terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{
TerraformDir: "../modules/vpc",
Vars: map[string]interface{}{
"name": "test-vpc",
"cidr_block": "10.0.0.0/16",
"public_subnets": []string{"10.0.1.0/24", "10.0.2.0/24"},
"private_subnets": []string{"10.0.10.0/24", "10.0.20.0/24"},
},
})
// Clean up resources after test
defer terraform.Destroy(t, terraformOptions)
// Deploy infrastructure
terraform.InitAndApply(t, terraformOptions)
// Validate outputs
vpcId := terraform.Output(t, terraformOptions, "vpc_id")
assert.NotEmpty(t, vpcId)
// Validate VPC exists in AWS
aws.GetVpcById(t, vpcId, "us-west-2")
}
# Makefile for common operations
.PHONY: init plan apply destroy validate format test
init:
terraform init
validate:
terraform validate
tflint
checkov -d .
format:
terraform fmt -recursive
plan:
terraform plan -var-file="environments/$(ENV)/terraform.tfvars"
apply:
terraform apply -var-file="environments/$(ENV)/terraform.tfvars"
destroy:
terraform destroy -var-file="environments/$(ENV)/terraform.tfvars"
test:
cd tests && go test -v -timeout 30m
```
## 10. CI/CD Integration
Integrate Terraform with CI/CD pipelines for automated deployments.
```yaml
# .github/workflows/terraform.yml
name: Terraform CI/CD
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
env:
TF_VERSION: 1.6.0
jobs:
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: ${{env.TF_VERSION}}
- name: Terraform Format
run: terraform fmt -check
- name: Terraform Init
run: terraform init
- name: Terraform Validate
run: terraform validate
- name: Run TFLint
uses: terraform-linters/setup-tflint@v3
with:
tflint_version: latest
- name: TFLint
run: |
tflint --init
tflint
plan:
runs-on: ubuntu-latest
needs: validate
strategy:
matrix:
environment: [dev, staging]
steps:
- uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: ${{env.TF_VERSION}}
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{secrets.AWS_ACCESS_KEY_ID}}
aws-secret-access-key: ${{secrets.AWS_SECRET_ACCESS_KEY}}
aws-region: us-west-2
- name: Terraform Init
run: terraform init
- name: Terraform Plan
run: |
terraform plan \
-var-file="environments/${{matrix.environment}}/terraform.tfvars" \
-out="${{matrix.environment}}.tfplan"
- name: Upload Plan
uses: actions/upload-artifact@v3
with:
name: ${{matrix.environment}}-plan
path: ${{matrix.environment}}.tfplan
apply:
runs-on: ubuntu-latest
needs: plan
if: github.ref == 'refs/heads/main'
environment:
name: production
url: ${{steps.apply.outputs.app_url}}
steps:
- uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_version: ${{env.TF_VERSION}}
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{secrets.AWS_ACCESS_KEY_ID}}
aws-secret-access-key: ${{secrets.AWS_SECRET_ACCESS_KEY}}
aws-region: us-west-2
- name: Terraform Init
run: terraform init
- name: Terraform Apply
id: apply
run: |
terraform apply \
-var-file="environments/prod/terraform.tfvars" \
-auto-approve
# Capture outputs
echo "app_url=$(terraform output -raw load_balancer_dns)" >> $GITHUB_OUTPUT
```
## Checklist
- [ ] Use consistent naming conventions and resource tagging
- [ ] Implement proper module structure with inputs, outputs, and documentation
- [ ] Configure remote state storage with encryption and locking
- [ ] Use data sources and locals to avoid hardcoded values
- [ ] Implement validation rules for variables and resources
- [ ] Follow security best practices with KMS encryption and IAM policies
- [ ] Use lifecycle rules to prevent accidental resource destruction
- [ ] Implement comprehensive testing with Terratest
- [ ] Set up CI/CD pipelines for automated validation and deployment
- [ ] Use Terraform workspaces or separate state files for environments
- [ ] Keep Terraform and provider versions pinned
- [ ] Implement drift detection and remediation processes
- [ ] Use Terraform Cloud or similar for team collaboration
- [ ] Document infrastructure architecture and deployment procedures
- [ ] Monitor infrastructure costs and implement cost controls