Static site deployment with CDK and AWS

The CI/CD pipeline consists of CodeCommit as a repo, CodePipeline as an orchestrator and AWS S3 as the destination. When you git-push the project code to CodeCommit, it will trigger the CodePipeline. Now if you are only committing the HTML site directly to the repo, the workflow is fairly straightforward:

Grab the source code from CodeCommit
Deploy to the S3 destination (unzipped)
Invalidate CloudFront distribution (using a Lambda)

The problem is that in my case, the static site was in a CDK project. So the actual HTML files were in a subdirectory called “src”. The above workflow resulted in the CDK files and folders being deployed to S3 and the website being in a subdirectory. The CodeCommit and Deploy steps do not support filtering or selecting by subdirectory so I considered three options to solve this:

Point the CloudFront distribution at the /src path. This seemed hacky and it would mean that the CDK files are still all in the S3 bucket which could be a security concern.
Add a build step to grab the contents of the src folder and pass it to the next step in the pipeline. This seemed a bit overkill for a site that did not need building and would add more unnecessary latency.
Add a step after CodeCommit and trigger a Lambda to update the Artifact zip with only the files from the src subdirectory. This is the option I picked.

The following is the code, done with basic javascript directly in the CDK lib file for simplicity.

We will need the following libraries:

const cdk = require('@aws-cdk/core');
const codecommit = require("@aws-cdk/aws-codecommit");
const iam = require("@aws-cdk/aws-iam");
const s3 = require("@aws-cdk/aws-s3");
const lambda = require("@aws-cdk/aws-lambda");
const logs = require("@aws-cdk/aws-logs");
const cloudfront = require("@aws-cdk/aws-cloudfront");
const codepipeline = require("@aws-cdk/aws-codepipeline");
const codepipelineactions = require("@aws-cdk/aws-codepipeline-actions");

First we created the CodeCommit repo.

// codecommit repo
const repo = new codecommit.Repository(this, "my-project-repo", {
 repositoryName:"my-project"
});

After deploying this the first time we added the remote URL to our local GIT with the following commands:

git remote add project_name codecommit_url
git add
git commit -m “first commit”
git push project_name master

Then we create an S3 bucket for the website and an S3 bucket for deployments.

// s3 - website
const webstore = new s3.Bucket(this, "my-project-webstore", {
 websiteIndexDocument: "index.html",
 websiteErrorDocument: "index.html",
 publicReadAccess: true
});

const deploystore = new s3.Bucket(this, "my-project-deploystore", {
 publicReadAccess: false
});

Our CloudFront distribution for the website

// cloudfront
const cdn = new cloudfront.CloudFrontWebDistribution(this, "my-project-cdn", {
 originConfigs: [{
   customOriginSource:{ // we use this instead of s3 origin as this will make subfolders automatically load their index.html
     domainName:webstore.bucketWebsiteDomainName, // check the configuration after deploying to make sure the url has "website" in it
     originProtocolPolicy:cloudfront.OriginProtocolPolicy.HTTP_ONLY
   },
   behaviors : [ {
     allowedMethods: cloudfront.CloudFrontAllowedMethods.GET_HEAD,
     compress:true,
     isDefaultBehavior: true
   }]
 }],
 aliasConfiguration:{
   acmCertRef: "arn:aws:acm:us-east-1:ACCOUNT:CERTID",
   names:["www.example.com"]
 },
 defaultRootObject:"index.html",
 priceClass:cloudfront.PriceClass.PRICE_CLASS_ALL,
 viewerProtocolPolicy:cloudfront.ViewerProtocolPolicy.REDIRECT_TO_HTTPS
});

Permissions for our deployment src extractor Lambda function

let lambdapolicy = new iam.PolicyStatement({
 effect: iam.Effect.ALLOW,
 resources: [
   "arn:aws:logs:*",
   deploystore.bucketArn,
   deploystore.bucketArn+"/*",
   "arn:aws:codepipeline:REGION:ACCOUNT:myproject*"
 ],
 actions: [
   "logs:CreateLogStream",
   "logs:PutLogEvents",
   "logs:CreateLogGroup",
   "s3:PutObject",
   "s3:GetObject",
   "codepipeline:PutJobFailureResult",
   "codepipeline:PutJobSuccessResult"
 ]
});
let lambdarole = new iam.Role(this, "my-project-role", { assumedBy: new iam.ServicePrincipal("lambda.amazonaws.com") });
lambdarole.addToPolicy(lambdapolicy);

The deployment src extractor Lambda function config (source code for this Lambda will be after all of the CDK config)

const deploylambda = new lambda.Function(this, "myproject-deploysrc",{
 description: "Microservice to extract site code for deployment",
 runtime: lambda.Runtime.NODEJS_12_X,
 handler: "index.handler",
 code: lambda.Code.fromAsset("lambda/"),
 role: lambdarole,
 memorySize: 512,
 timeout: cdk.Duration.seconds(600),
 logRetention: logs.RetentionDays.THREE_DAYS,
});

Code pipeline configuration

const sourceOutput = new codepipeline.Artifact();
const pipeline = new codepipeline.Pipeline(this, 'my-project-pipeline', {
 pipelineName:"myproject",
 artifactBucket:deploystore,
 stages: [
   {
     stageName: 'Source',
     actions: [
       new codepipelineactions.CodeCommitSourceAction({
         actionName: 'CodeCommit',
         repository: repo,
         branch:"master",
         output:sourceOutput
       })
     ],
   },
   {
     stageName: 'DeployPrep',
     actions: [
       new codepipelineactions.LambdaInvokeAction({
         actionName: 'InvokeLambda',
         lambda: deploylambda,
         inputs:[sourceOutput],
         output:sourceOutput
       })
     ],
   },
   {
     stageName: 'Deploy',
     actions: [
       new codepipelineactions.S3DeployAction({
         actionName: 'S3Deploy',
         input:sourceOutput,
         bucket: webstore,
         extract:true
       })
     ],
   },
   {
     stageName: 'InvalidateCloudfront',
     actions: [
       new codepipelineactions.LambdaInvokeAction({
         actionName: 'InvokeLambda',
         lambda: lambda.Function.fromFunctionArn(this,"cloudfrontInvalidator","ARN FOR AN EXISTING LAMBDA IN MY AWS ACCOUNT"),
         userParameters: cdn.distributionId
       })
     ],
   },
 ],
});

The website HTML files go into a “src” subdirectory and the src extractor lambda goes into a “lambda” subdirectory. The following is the source code for the Lambda function.

We will need the following libraries. It is best practices to install the AWS SDK into your project and not depend on the SDK available by default on Lambda.

// aws libs
const AWS = require('aws-sdk'); // npm install
const s3 = new AWS.S3({apiVersion: "2006-03-01"});
const codepipeline = new AWS.CodePipeline({apiVersion: '2015-07-09'});

// built-in libs
const stream = require('stream');
const crypto = require('crypto');

// included libs
const yauzl = require("yauzl"); // npm install
const yazl = require("yazl"); // npm install

The main handler grabs a few parameters that we need from the CodePipeline event and then runs the workflow, which is a series of promise functions.

exports.handler =  async function(event){
  
  // bucket and key where we can find the input artifact
  const bucket = event["CodePipeline.job"].data.inputArtifacts[0].location.s3Location.bucketName;
  const key = event["CodePipeline.job"].data.inputArtifacts[0].location.s3Location.objectKey;
  
  // job ID, which we need to update the status at the end
  const id = event["CodePipeline.job"].id;
  
  // run workflow
  return getArtifact(bucket,key)
   .then(readZip)
   .then(getSrcZip)
   .then(storeSrcArtifact)
   .then(jobSuccess)
   .catch(jobFailed);

Next, we will go through these functions one by one. The first one simply retrieves the artefact from S3 using the bucket and key provided in the event.

// retrieve artifact from S3
function getArtifact(bucket,key){
 return s3.getObject({Bucket: bucket, Key: key}).promise().then(res => {
   return res.Body;
 });
}

We then use yauzl to unzip the file. Unlike gzip, zip files cannot be streamed due to how they are structured. As such, this function will only work if your application package can fit into the configured Lambda memory (512MB in this case as per above CDK config).

// read the zipfile from the data
function readZip(zipdata){
 return new Promise(function(resolve, reject){

   // read the zip from buffer (entire zip, this cannot be streamed)
   yauzl.fromBuffer(zipdata, {lazyEntries: true}, (err, zip) => {
     if(err) {
       console.log("Error accessing artifact: ",err);
       return reject("Error accessing artifact");
     }

     resolve(zip);
   });
 });
}

Next for the tricky part. We loop through each file in the zip skipping those that do not start with “src”. We extract the file then add it to a new zip file created with yazl. It does seem a bit inefficient to decompress from the old zip then compress it again into the new zip. If someone knows how to just copy across the already compressed files please let me know! Once all “src” files have been added, it will return the new zip object.

// get all files from the src/ subdir and place them into a new zip
function getSrcZip(zip) {
 return new Promise(function(resolve, reject){
   // new zip that will contain only src
   const newzip = new yazl.ZipFile();

   // read each item in the zip
   zip.readEntry();
   zip.on("entry", function(entry){
     // we only want files in the src dir, skip others
     if(entry.fileName.substr(0,3) !== "src") return zip.readEntry();

     // extract file
     zip.openReadStream(entry, {decompress: entry.isCompressed() ? true : null}, function (err, readStream) {
       if(err){
         zip.close();
         console.log("Failed to read file in artifact: ", err);
         return reject("Failed to read file in artifact");
       }

       // collect data into buffer
       let buffer = null;
       readStream.on('data', function(d) {
         if(!buffer){
           buffer = d;
         }else{
           buffer = Buffer.concat([buffer, d]);
         }
       });

       // file data collection completed
       readStream.on('end', function () {
         // here we could inject some date or other info into a file
         // or implement some security or other controls

         // add it to the new zip (without the src dir in the path)
         newzip.addBuffer(buffer,entry.fileName.substr(4));

         // continue to next entry
         zip.readEntry();
       });

       // fail on error
       readStream.on('error', function (err) {
         zip.close();
         console.log("Failed to extract file from artifact: ", err);
         return reject("Failed to extract file from artifact");
       });
     });
   });

   // all items processed
   zip.on("end", function(){
     // all files added
     newzip.end();

     // resolve
     resolve(newzip)
   });

   // any error
   zip.on("error", function(err){
     reject(err)
   });
 });
}

Yazl outputs a stream, so we use the stream library to wrap s3 putObject and stream the file to S3. We overwrite the original artefact file with our new version so the pipeline can simply treat it as a new revision.

// store artifact
function storeSrcArtifact(newzip){
  // create stream destination
  const { writeStream, promise } = uploadArtifactSrc(bucket, key);
  
  // stream to s3
  newzip.outputStream.pipe(writeStream);
  
  // function for creating s3 stream destination
  function uploadArtifactSrc(Bucket, Key){
    const s3 = new AWS.S3();
    const pass = new stream.PassThrough();
    return {
      writeStream: pass,
      promise: s3.upload({ Bucket, Key, Body: pass }).promise(),
    };
  }
  
  // return the s3 promise
  return promise;
}

Assuming all went well above, we can now update CodePipeline and let it know that the step was completed successfully.

// completed pipeline job
function jobSuccess(res){

 return codepipeline.putJobSuccessResult({
   jobId: id, // set at the start of the handler
   currentRevision: {
     changeIdentifier: crypto.randomBytes(20).toString('hex'), // random hash
     revision: res.ETag, // etag from the updated artefact file
     created: new Date().getTime(),
     revisionSummary: 'Extracted website src files from artifact'
   }
 }).promise().then(res => {
   return "success";
 });
}

If anything went wrong in any of the steps above, then we want to let CodePipeline know so it can show the error (which could trigger an SNS notification to let the developer know).

// failed pipeline job
function jobFailed(err){
 console.log("Pipeline job 'Extract Src' has failed: ",err);

 return codepipeline.putJobFailureResult({
    failureDetails: {
      message: "Pipeline job 'Extract Src' has failed with error: "+err.message,
      type: "JobFailed"
    },
    jobId: id,
  }).promise().then(() => { return "Pipeline job 'Extract Src' has failed with error: "+err.message; });
}

You can see how this function is quite generic and could easily be reused across multiple projects. This is something I have done with a CloudFront invalidation function that I use for multiple projects. Above, in the CDK config for CodePipeline, you can see that the repo ID is included as a user parameter in the 4th step. This is the shared CloudFront invalidator Lambda and the repo ID is passed so it knows which repo to invalidate. This is how I can reuse it across multiple projects. The following is the source code for that Lambda function.

const AWS = require("aws-sdk");
const cloudfront = new AWS.CloudFront({apiVersion: '2020-05-31'});
const codepipeline = new AWS.CodePipeline({apiVersion: '2015-07-09'});

exports.handler = async (event,context) => {    
  // get the jobId (needed to report success/failure)
  let jobId = event["CodePipeline.job"].id; // 
      
  // get & validate distribution id, filter out quotes that CDK adds
  let cid = "";
  try { 
    cid = event["CodePipeline.job"].data.actionConfiguration.configuration.UserParameters.replace(/"/g,"");
  } catch(err){ 
    return jobFailed("invalid distribution id"); 
  }
  if(!cid) return jobFailed("invalid distribution id");
  
  // create cloudfront invalidation
  return cloudfront.createInvalidation({
    DistributionId: cid,
    InvalidationBatch: {
      CallerReference: 'invalidateAll'+new Date().getTime(), // needs to be unique each time
      Paths: { 
        Quantity: 1,
        Items: [
          '/*' // you pay per invalidation, not per number of files invalidated, so this is usually the easiest option for small static sites
        ]
      }
    }
  }).promise().then(res => {
    return jobSucceeded();
  }).catch(err => {
    return jobFailed("invalid distribution id"); // probably invalid distribution id
  });

  // job failed
  function jobFailed(msg){
    return codepipeline.putJobFailureResult({
      jobId: jobId,
      failureDetails: {
        message: JSON.stringify(msg),
        type: 'JobFailed'
      }
    }).promise().then(res => {
      return {statusCode: 400, body: msg};
    }).catch(err => {
      console.log(err);
      return {statusCode: 400, body: msg};
    });
  }
  
  // job succeeded
  function jobSucceeded(){
    return codepipeline.putJobSuccessResult({jobId: jobId}).promise().then(res =>{
      return {statusCode: 200, body: "success"}    
    }).catch(err => {
      console.log(err);
      return {statusCode: 400, body: "could not update job"}
    });
  }
}

And that's it. This was a fun thing to build and figure out, next i will restructure it as a reusable CDK construct so i can easily add it to future projects. It could also be used for sites that you compile locally to avoid a build step though if you are working within a team that would not be a best practices approach.