Why AWS WAF is Hard to Tune
AWS WAF is powerful but opaque. You enable the AWS Managed Rules for OWASP Top 10 and immediately start getting "403 Forbidden" complaints from customers. Your Shopify webhook breaks. Your Jenkins CI/CD pipeline cannot POST to your API. Welcome to the false positive problem.
Managed rule groups are written conservatively — they block legitimate traffic rather than miss a real attack. For your specific application, you need to tune aggressively to find the balance between security and availability.
WAF Architecture Overview
Internet --> CloudFront/ALB/API Gateway
|
AWS WAF WebACL
|
+-------------|-------------+
| | |
IP Rule Groups OWASP Rules Rate Rules
| | |
+-----------+-+-------------+
|
Allow / Block / Count / CAPTCHA
|
Your Application
Start with Count Mode — Never Deploy Blocking Cold
# terraform/waf.tf
resource "aws_wafv2_web_acl" "main" {
name = "main-waf"
scope = "REGIONAL"
default_action {
allow {}
}
# Phase 1: Add managed rules in COUNT mode
rule {
name = "AWSManagedRulesCommonRuleSet"
priority = 10
override_action {
count {} # Observe before enforcing
}
statement {
managed_rule_group_statement {
vendor_name = "AWS"
name = "AWSManagedRulesCommonRuleSet"
rule_action_override {
action_to_use { count {} }
name = "SizeRestrictions_BODY" # Triggers on file uploads
}
rule_action_override {
action_to_use { count {} }
name = "GenericRFI_BODY"
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "AWSManagedRulesCommonRuleSet"
sampled_requests_enabled = true
}
}
rule {
name = "SQLiProtection"
priority = 20
override_action {
count {} # Analyze logs first
}
statement {
managed_rule_group_statement {
vendor_name = "AWS"
name = "AWSManagedRulesSQLiRuleSet"
version = "Version_2.0"
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "SQLiRuleSet"
sampled_requests_enabled = true
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "MainWebACL"
sampled_requests_enabled = true
}
}
resource "aws_wafv2_web_acl_logging_configuration" "main" {
log_destination_configs = [aws_kinesis_firehose_delivery_stream.waf.arn]
resource_arn = aws_wafv2_web_acl.main.arn
logging_filter {
default_behavior = "KEEP"
filter {
behavior = "DROP"
condition {
action_condition {
action = "ALLOW"
}
}
requirement = "MEETS_ALL"
}
}
}
Analyzing WAF Logs with Athena
-- Create Athena table over WAF logs in S3
CREATE EXTERNAL TABLE waf_logs (
timestamp BIGINT,
formatversion INT,
webaclid STRING,
terminatingruleid STRING,
terminatingruletype STRING,
action STRING,
httprequest STRUCT<
clientip: STRING,
country: STRING,
uri: STRING,
args: STRING,
httpmethod: STRING,
headers: ARRAY<STRUCT<name: STRING, value: STRING>>
>,
rulegrouplist ARRAY<STRUCT<
rulegroupid: STRING,
terminatingrule: STRUCT<ruleid: STRING, action: STRING>,
nonterminatingmatchingrules: ARRAY<STRUCT<ruleid: STRING, action: STRING>>
>>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 's3://your-bucket/waf-logs/'
TBLPROPERTIES ('has_encrypted_data' = 'false');
-- Find top rules triggering COUNT actions (false positive candidates)
SELECT
terminatingruleid,
httprequest.uri,
httprequest.clientip,
COUNT(*) AS hit_count
FROM waf_logs
WHERE action = 'COUNT'
AND from_unixtime(timestamp/1000) > now() - interval '24' hour
GROUP BY 1, 2, 3
ORDER BY hit_count DESC
LIMIT 50;
-- Find legitimate IPs being blocked (compare with app access logs)
SELECT
httprequest.clientip,
COUNT(*) AS blocked_count,
array_agg(DISTINCT httprequest.uri) AS blocked_uris
FROM waf_logs
WHERE action = 'BLOCK'
AND from_unixtime(timestamp/1000) > now() - interval '7' day
GROUP BY 1
ORDER BY blocked_count DESC
LIMIT 20;
-- Identify which rule is causing the most false positives
SELECT
rg.terminatingrule.ruleid AS rule_id,
COUNT(*) AS count
FROM waf_logs
CROSS JOIN UNNEST(rulegrouplist) AS t(rg)
WHERE action = 'COUNT'
GROUP BY 1
ORDER BY count DESC;
Writing Custom Rules for Your Application
# Custom rule: Rate limit by IP (anti-brute-force)
resource "aws_wafv2_web_acl" "main" {
# ... existing config ...
rule {
name = "RateLimitPerIP"
priority = 1 # High priority — evaluated first
action {
block {}
}
statement {
rate_based_statement {
limit = 2000 # requests per 5-minute window
aggregate_key_type = "IP"
scope_down_statement {
byte_match_statement {
positional_constraint = "STARTS_WITH"
search_string = "/api/"
field_to_match {
uri_path {}
}
text_transformation {
priority = 0
type = "LOWERCASE"
}
}
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "RateLimitPerIP"
sampled_requests_enabled = true
}
}
# Block specific bad user agents
rule {
name = "BlockBadBots"
priority = 2
action {
block {}
}
statement {
or_statement {
statement {
byte_match_statement {
positional_constraint = "CONTAINS"
search_string = "zgrab"
field_to_match {
single_header { name = "user-agent" }
}
text_transformation {
priority = 0
type = "LOWERCASE"
}
}
}
statement {
byte_match_statement {
positional_constraint = "CONTAINS"
search_string = "masscan"
field_to_match {
single_header { name = "user-agent" }
}
text_transformation {
priority = 0
type = "LOWERCASE"
}
}
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "BlockBadBots"
sampled_requests_enabled = true
}
}
# Whitelist trusted IPs (office, CI/CD, monitoring)
rule {
name = "AllowTrustedIPs"
priority = 3
action {
allow {}
}
statement {
ip_set_reference_statement {
arn = aws_wafv2_ip_set.trusted_ips.arn
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "AllowTrustedIPs"
sampled_requests_enabled = false
}
}
}
resource "aws_wafv2_ip_set" "trusted_ips" {
name = "trusted-ips"
scope = "REGIONAL"
ip_address_version = "IPV4"
addresses = [
"203.0.113.10/32", # Office IP
"198.51.100.0/24", # CI/CD NAT range
"10.0.0.0/8", # VPC internal
]
}
Handling Common False Positive Scenarios
# Scenario 1: Webhook payloads triggering SQLi rules
# (Stripe, GitHub, Shopify webhooks contain JSON that looks like SQL)
rule {
name = "ExcludeWebhookPaths"
priority = 5
action {
allow {} # Allow before SQLi rules (lower priority number = higher priority)
}
statement {
byte_match_statement {
positional_constraint = "STARTS_WITH"
search_string = "/webhooks/"
field_to_match {
uri_path {}
}
text_transformation {
priority = 0
type = "LOWERCASE"
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "AllowWebhooks"
sampled_requests_enabled = true
}
}
# Scenario 2: Admin panel triggering XSS rules
# (HTML editors, rich text fields)
resource "aws_wafv2_regex_pattern_set" "admin_paths" {
name = "admin-paths"
scope = "REGIONAL"
regular_expression {
regex_string = "^/admin/(content|pages|posts)/.*"
}
}
rule {
name = "RelaxAdminEditor"
priority = 6
action {
allow {}
}
statement {
and_statement {
statement {
regex_pattern_set_reference_statement {
arn = aws_wafv2_regex_pattern_set.admin_paths.arn
field_to_match {
uri_path {}
}
text_transformation {
priority = 0
type = "LOWERCASE"
}
}
}
statement {
# Only allow admin IPs to use this relaxed rule
ip_set_reference_statement {
arn = aws_wafv2_ip_set.trusted_ips.arn
}
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "RelaxAdminEditor"
sampled_requests_enabled = true
}
}
Geo-Blocking and Bot Control
# Geo-block countries you don't serve (reduces attack surface)
rule {
name = "GeoBlock"
priority = 8
action {
block {}
}
statement {
not_statement {
statement {
geo_match_statement {
country_codes = [
"US", "CA", "GB", "AU", "DE", "FR", "NL",
"SG", "AE", "SA", "PK", "IN"
]
}
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "GeoBlock"
sampled_requests_enabled = true
}
}
# AWS Bot Control (paid add-on — worth it for high-traffic apps)
rule {
name = "BotControl"
priority = 9
override_action {
none {} # Use rule group's own actions
}
statement {
managed_rule_group_statement {
vendor_name = "AWS"
name = "AWSManagedRulesBotControlRuleSet"
managed_rule_group_configs {
aws_managed_rules_bot_control_rule_set {
inspection_level = "TARGETED" # vs COMMON — more thorough
}
}
# Exclude verified bots (Google, Bing, etc.)
rule_action_override {
action_to_use { allow {} }
name = "CategoryVerifiedSearchEngine"
}
rule_action_override {
action_to_use { allow {} }
name = "CategoryVerifiedSocialMedia"
}
}
}
visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "BotControl"
sampled_requests_enabled = true
}
}
Monitoring and Alerting
# CloudWatch alarms for WAF events
resource "aws_cloudwatch_metric_alarm" "waf_block_spike" {
alarm_name = "WAF-Block-Spike"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
metric_name = "BlockedRequests"
namespace = "AWS/WAFV2"
period = 300
statistic = "Sum"
threshold = 1000
alarm_description = "WAF blocked requests spike — possible attack or false positive wave"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
Region = "us-east-1"
Rule = "ALL"
WebACL = aws_wafv2_web_acl.main.name
}
}
resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_triggers" {
alarm_name = "WAF-RateLimit-Triggers"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
metric_name = "RateLimitPerIP"
namespace = "AWS/WAFV2"
period = 60
statistic = "Sum"
threshold = 100
alarm_actions = [aws_sns_topic.alerts.arn]
alarm_description = "Many IPs hitting rate limit — possible distributed attack"
dimensions = {
Region = "us-east-1"
Rule = "RateLimitPerIP"
WebACL = aws_wafv2_web_acl.main.name
}
}
Transition from Count to Block Mode
# After 1-2 weeks of count mode, analyze and switch
# Query Athena to confirm low false positive rate:
# If less than 0.1% of COUNT hits are legitimate traffic, switch to BLOCK
# Step 1: Identify false positive IPs and add to allowlist
# Step 2: Add path exclusions for webhook endpoints
# Step 3: Override noisy rules (SizeRestrictions_BODY, etc.) to COUNT
# Step 4: Change rule group override_action from count {} to none {}
# Step 5: Monitor for 24 hours with alarms active
# Rollback plan: keep Terraform state, one command to revert
# terraform apply -var="waf_mode=count"
Conclusion
A well-tuned AWS WAF follows the traffic: start in Count mode, analyze with Athena, build exclusions for your legitimate traffic patterns, then gradually enable blocking. The goal is a WAF that is invisible to real users but a wall to attackers.
The most common mistakes are deploying managed rules cold without analysis, not logging all traffic initially, and failing to maintain an IP allowlist for trusted sources like monitoring services, CI/CD pipelines, and partner integrations. Invest 2 weeks in count mode analysis and you will have a WAF configuration that runs cleanly for years.
Sarah Chen
Senior Cybersecurity Engineer with 12+ years of experience in penetration testing and security architecture.