Daily Spend Alert

In an AWS environment, cost control is very important. It's all too easy to start something, forget about it and be confronted with a huge bill at the end of the month.

Of course there are several standard tricks you can employ. At a very minimum, you can set a "Billing Alert" on the "Total Estimated Cost" of your organization. But that alarm is triggered when the monthly maximum has been exceeded and by then you might be too late. You can also setup budget reports but these are sent every day, week or month even when nothing is wrong, so they tend to get ignored.

What you really want is a check on a daily basis, with granular resolution. That's what this script is about.

This script uses the Cost Explorer API to make a list of all Unblended Costs that were produced yesterday, listed by account. It then enriches this information with the account names taken from AWS Organizations, and it compares the Unblended cost with a set maximum per account, that is taken from an environment variable. If any single account exceeded its maximum, or if the total exceeded the maximum, an alert is mailed (through SNS) to the administrator, who can then take action.

First, here is the Lambda script (NodeJS 12.x)

var AWS = require('aws-sdk');
var costexplorer = new AWS.CostExplorer();
var sns = new AWS.SNS();
var organizations = new AWS.Organizations();

// Variables that are needed in multiple functions. They are global, but they are initialized every time the main handler is called.
var today="";
var yesterday="";

var max_spend = JSON.parse( process.env.MAX_SPEND );

exports.handler = async (event, context) => {
    console.log( "Main: started" );
    
    // (Re-)initialize global variables
    today = new Date().toISOString().split('T')[0];
    yesterday = new Date((new Date()).valueOf() - 1000*60*60*24).toISOString().split('T')[0];
    //console.log( "Today: " + today );
    //console.log( "Yesterday: " + yesterday );
    
    // Get cost data and list of all accounts, in parallel
    var costDataPromise = getCostData();
    var accountDataPromise = getAccountData();
    var r = await Promise.all( [costDataPromise, accountDataPromise] );
    var costData = r[0];
    var accountData = r[1];
    //console.log( "Main: costData: ", costData );
    //console.log( "Main: accountData: ", accountData );
    
    console.log( "Main: All promises fulfilled" );

    // Enrich costData with accountData and maximum
    var max_exceeded = false;
    for( var i=0; i<costData.Accounts.length; i++ ) {
        if( accountData[costData.Accounts[i].Account] ) {
            costData.Accounts[i].Name = accountData[costData.Accounts[i].Account];
        }
        if( max_spend[costData.Accounts[i].Account] ) {
            // There is a max for this account
            costData.Accounts[i].Max = max_spend[costData.Accounts[i].Account];
            if( costData.Accounts[i].UnblendedCost >= costData.Accounts[i].Max ) {
                max_exceeded = true;
            }
        }
    }
    costData.Max = max_spend.Total;
    console.log( "Main: costData after enrichment:", costData );
    
    // If cost exceeded, send email
    if( costData.UnblendedCost >= parseFloat( max_spend.Total ) || max_exceeded ) {
        console.log( "ALERT! Maximum daily cost exceeded." );
        await sendAlert( costData );
    }
   
    // Return costData
    // As this function is normally called from CW Events, these results will probably be ignored
    return(costData);
};

const getCostData = async () => {
    console.log( "getCostData: Started" );
    
    var params = {
        TimePeriod: { 
            Start: yesterday,
            End: today
        },
        Granularity: 'DAILY',
        GroupBy: [
            {
                Type: 'DIMENSION',
                Key: 'LINKED_ACCOUNT',
            }
        ],
        Metrics: [
            'UNBLENDED_COST',
        ],
        Filter: {
            Not: {
                Dimensions: {
                    Key: 'RECORD_TYPE',
                    Values: ['Credit', 'Refund']
                }
            }
        }
    };
    var costData = await costexplorer.getCostAndUsage(params).promise();
    console.log( "getCostData: getCostAndUsage is finished." );
    
    var result = {
        UnblendedCost: 0,
        Accounts: []
    };
    
    for( var i=0; i<costData.ResultsByTime[0].Groups.length; i++ ) {
        //console.log( costData.ResultsByTime[0].Groups[i] );
        var account_number = costData.ResultsByTime[0].Groups[i].Keys[0];
        var amount = parseFloat( costData.ResultsByTime[0].Groups[i].Metrics.UnblendedCost.Amount);
                
        result.UnblendedCost += amount;
        result.Accounts.push( {
            Account: account_number,
            UnblendedCost: amount
        } );
    }
    
    console.log( "getCostData: returning" );
    return new Promise(resolve => { resolve( result ) });
};

const getAccountData = async () => {
    console.log( "getAccountData: Started" );
    var params = {};
    var accountData = await organizations.listAccounts(params).promise();
    
    console.log( "getAccountData: listAccounts finished." );
    var result = {};
    
    for( var i=0; i<accountData.Accounts.length; i++ ) {
        var account_number = accountData.Accounts[i].Id;
        var account_name = accountData.Accounts[i].Name;
        
        result[account_number] = account_name;
    }
    
    console.log( "getAccountData: returning results" );
    return new Promise(resolve => { resolve( result ) });
};

const sendAlert = async ( costData ) => {
    console.log( "sendAlert: Started" );
        
    // Create summary text. This goes into the email body.
    var summary_text = "The following spending limits were exceeded on " + yesterday + ":\n\n";
    
    if( costData.UnblendedCost >= costData.Max ) {
        summary_text += "Organization max: " + parseFloat( max_spend.Total ).toFixed(2) + " actual: " + costData.UnblendedCost.toFixed(2) + "\n";
    }
    for( var i=0; i<costData.Accounts.length; i++ ) {
        if( costData.Accounts[i].UnblendedCost >= costData.Accounts[i].Max ) {
            summary_text += "Account: " + costData.Accounts[i].Account + " (" + costData.Accounts[i].Name + ") max: " + costData.Accounts[i].Max.toFixed(2) + " actual: " + costData.Accounts[i].UnblendedCost.toFixed(2) + "\n";
        }
    }
    summary_text += "\nDetails:\n";
    summary_text += JSON.stringify( costData, null, '\t' );
    
    var params = {
        TopicArn: process.env.TopicARN,
        Subject: "AWS Daily Spending limit exceeded on " + yesterday + " (" + costData.UnblendedCost.toFixed(2) + " USD)",
        Message: summary_text
    };

    console.log( "sendAlert: finished." );
    return sns.publish( params ).promise();
};

To implement this properly, you need to do a few more things:

  • The script is designed to run in your root account. If you want to run the script from a non-root account you need to setup the appropriate role switch to the root account inside the script, and support this with the required IAM policies.
  • The script requires two environment variables to be setup:
    • TopicARN is the ARN of the SNS topic where alerts need to be sent to.
    • MAX_SPEND is a JSON structure with maximums overall and per account. It will look like this:
      {
        "Total": 60,
        "024440473875": 12.5,
        "071615624272": 50
      }
      

      Obviously you will need to remove the tabs and newline characters as all of this needs to fit on one line. The Lambda uses JSON.parse() to interpret the string, so you need to make sure you're compatible with that.

      The list does not need to be account-complete: If account information is missing then the script simply does not take that maximum into account.

  • The script requires a role with the right IAM policy to allow these three API calls: organizations.ListAccounts, ce.GetCostAndUsage and sns:Publish. That policy would look like this:
    {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": "organizations:ListAccounts",
                "Resource": "*"
            },
    	{
                "Effect": "Allow",
                "Action": "ce:GetCostAndUsage",
                "Resource": "*"
            },
    	{
                "Effect": "Allow",
                "Action": "sns:Publish",
                "Resource": "---arn of your topic---"
            }
        ]
    }
    

    Of course you can split this across multiple policies if you want to.

  • You will need to setup a schedule in CloudWatch Events so that this Lambda runs after the billing information is complete.

    The billing cycle of AWS seems to work in eight-hour increments. So the information for things that cost money immediately, such as API calls, is gathered up per region and sent to North Virginia once every eight hours. Costs "per day" are calculated in the morning of the next day, and the information seems to be complete by about noon UTC. So I have setup my script to run daily at 12:00 UTC.

Feel free to use this script, or to modify it to your own needs.