Skip to main content
aws in the trenches advanced cloud engineering for senior developers

Lambda Layers, Extensions, and Custom Runtimes

5 min read Chapter 11 of 21

Lambda Layers, Extensions, and Custom Runtimes

Lambda’s 50 MB zipped / 250 MB unzipped deployment package limit is a hard constraint. Layers solve this by allowing shared code (SDKs, libraries, utilities) to exist independently from your function code. Extensions hook into the execution environment lifecycle for observability, security, and configuration. Custom runtimes let you run any language that speaks HTTP.

Layer Architecture

A Layer is a .zip archive extracted into /opt/ in the execution environment. Up to 5 layers can be attached to a function, applied in order (later layers override earlier ones for file conflicts).

Execution Environment filesystem:
/var/task/           ← Your function code
/opt/                ← Layer content extracted here
/opt/python/         ← Python layer: automatically on sys.path
/opt/python/lib/python3.12/site-packages/
/opt/java/lib/       ← Java layer: automatically on classpath
/opt/bin/            ← Executable binaries
/opt/lib/            ← Shared libraries (LD_LIBRARY_PATH includes this)
# Building a Python layer with compiled dependencies
# This must be built on Amazon Linux 2023 (Lambda's runtime OS) or in Docker

# Dockerfile for building the layer
"""
FROM public.ecr.aws/lambda/python:3.12

RUN pip install \
    pandas==2.1.4 \
    numpy==1.26.3 \
    pyarrow==14.0.2 \
    -t /opt/python/

# Layer structure must be: python/lib/python3.12/site-packages/ OR just python/
# The simpler python/ prefix works for all Python versions
"""

import subprocess
import zipfile
import os

def build_layer():
    """Build a Lambda layer locally using Docker for correct architecture."""
    # Build inside Docker to match Lambda's OS
    subprocess.run([
        'docker', 'run', '--rm',
        '-v', f'{os.getcwd()}/layer:/opt/layer',
        'public.ecr.aws/lambda/python:3.12',
        'pip', 'install',
        'pandas==2.1.4', 'numpy==1.26.3',
        '-t', '/opt/layer/python/'
    ], check=True)

    # Zip the layer
    with zipfile.ZipFile('data-science-layer.zip', 'w', zipfile.ZIP_DEFLATED) as zf:
        for root, dirs, files in os.walk('layer'):
            for file in files:
                filepath = os.path.join(root, file)
                arcname = filepath.replace('layer/', '')
                zf.write(filepath, arcname)

    # Publish
    import boto3
    lambda_client = boto3.client('lambda')

    with open('data-science-layer.zip', 'rb') as f:
        response = lambda_client.publish_layer_version(
            LayerName='data-science-deps',
            Content={'ZipFile': f.read()},
            CompatibleRuntimes=['python3.12'],
            CompatibleArchitectures=['x86_64', 'arm64'],
            Description='pandas + numpy + pyarrow'
        )

    print(f"Published layer: {response['LayerVersionArn']}")
    return response['LayerVersionArn']
// Java layers: JAR files in java/lib/ are automatically added to classpath
// Build with Maven or Gradle, then package the dependencies

/*
Layer ZIP structure for Java:
java/
  lib/
    jackson-core-2.16.0.jar
    jackson-databind-2.16.0.jar
    aws-sdk-dynamodb-2.24.0.jar
    ... (all dependency JARs)
*/

// build.gradle for producing layer content:
/*
plugins {
    id 'java'
}

task buildLayer(type: Copy) {
    from configurations.runtimeClasspath
    into "${buildDir}/layer/java/lib"
}

task packageLayer(type: Zip, dependsOn: buildLayer) {
    from "${buildDir}/layer"
    archiveFileName = "java-deps-layer.zip"
    destinationDirectory = file("${buildDir}/distributions")
}
*/

Lambda Extensions

Extensions run as separate processes in the execution environment, receiving lifecycle events from the Lambda Extensions API:

# External Extension: Runs as a separate process alongside your function
# File: extensions/telemetry-extension (must be executable)

#!/usr/bin/env python3
"""
External extension for collecting and shipping telemetry.
This script runs as a separate process in the execution environment.
"""
import os
import sys
import signal
import requests
import json
from threading import Thread
from queue import Queue

LAMBDA_EXTENSION_API = f"http://{os.environ['AWS_LAMBDA_RUNTIME_API']}/2020-01-01/extension"

class TelemetryExtension:
    def __init__(self):
        self.telemetry_queue = Queue()
        self.extension_id = None

    def register(self):
        """Register with the Extensions API."""
        response = requests.post(
            f"{LAMBDA_EXTENSION_API}/register",
            headers={'Lambda-Extension-Name': 'telemetry-extension'},
            json={'events': ['INVOKE', 'SHUTDOWN']}
        )
        self.extension_id = response.headers['Lambda-Extension-Identifier']
        return self.extension_id

    def next_event(self):
        """Long-poll for the next lifecycle event."""
        response = requests.get(
            f"{LAMBDA_EXTENSION_API}/event/next",
            headers={'Lambda-Extension-Identifier': self.extension_id},
            timeout=None  # Blocks until next event
        )
        return response.json()

    def run(self):
        self.register()

        while True:
            event = self.next_event()
            event_type = event['eventType']

            if event_type == 'INVOKE':
                # Function is being invoked — collect telemetry in background
                self.flush_telemetry_batch()
            elif event_type == 'SHUTDOWN':
                # Environment is shutting down — final flush
                self.flush_telemetry_batch(final=True)
                sys.exit(0)

    def flush_telemetry_batch(self, final=False):
        """Ship collected telemetry to your observability backend."""
        batch = []
        while not self.telemetry_queue.empty():
            batch.append(self.telemetry_queue.get_nowait())

        if batch:
            # Send to your telemetry endpoint (Datadog, Honeycomb, etc.)
            requests.post(
                os.environ.get('TELEMETRY_ENDPOINT', 'https://telemetry.example.com/v1/logs'),
                json={'records': batch, 'final_flush': final}
            )

if __name__ == '__main__':
    TelemetryExtension().run()

Custom Runtime: The Bootstrap Contract

A custom runtime is a Lambda layer containing a bootstrap executable. Lambda invokes it, and it must implement this HTTP contract:

#!/usr/bin/env python3
"""
bootstrap - Custom Lambda runtime implementation
This demonstrates the Runtime Interface contract that all Lambda runtimes implement.
"""
import os
import sys
import json
import importlib
import traceback
import requests

RUNTIME_API = os.environ['AWS_LAMBDA_RUNTIME_API']
HANDLER = os.environ['_HANDLER']  # e.g., "module.function_name"

def get_next_invocation():
    """Poll Lambda for the next invocation."""
    response = requests.get(
        f"http://{RUNTIME_API}/2018-06-01/runtime/invocation/next",
        timeout=None  # Blocks until invocation available
    )
    request_id = response.headers['Lambda-Runtime-Aws-Request-Id']
    event = response.json()

    # Build context from headers
    context = {
        'request_id': request_id,
        'deadline_ms': response.headers.get('Lambda-Runtime-Deadline-Ms'),
        'invoked_arn': response.headers.get('Lambda-Runtime-Invoked-Function-Arn'),
        'trace_id': response.headers.get('Lambda-Runtime-Trace-Id')
    }
    return request_id, event, context

def send_response(request_id, result):
    """Send successful response."""
    requests.post(
        f"http://{RUNTIME_API}/2018-06-01/runtime/invocation/{request_id}/response",
        json=result
    )

def send_error(request_id, error):
    """Send error response."""
    requests.post(
        f"http://{RUNTIME_API}/2018-06-01/runtime/invocation/{request_id}/error",
        json={
            'errorMessage': str(error),
            'errorType': type(error).__name__,
            'stackTrace': traceback.format_exception(error)
        }
    )

def main():
    # Load handler function
    module_name, function_name = HANDLER.rsplit('.', 1)
    module = importlib.import_module(module_name)
    handler = getattr(module, function_name)

    # Processing loop
    while True:
        request_id, event, context = get_next_invocation()

        # Set trace ID for X-Ray
        os.environ['_X_AMZN_TRACE_ID'] = context.get('trace_id', '')

        try:
            result = handler(event, context)
            send_response(request_id, result)
        except Exception as e:
            send_error(request_id, e)

if __name__ == '__main__':
    main()

Practical Layer Pattern: Configuration Injection

# Layer that injects configuration from Parameter Store during INIT
# Mounted as an extension that fetches config before the function handler runs

import boto3
import json
import os

ssm = boto3.client('ssm')

def load_config_to_env():
    """
    Fetch all parameters under a path and inject as environment variables.
    This runs during INIT — your function code sees them as regular env vars.
    """
    path = os.environ.get('CONFIG_PATH', '/myapp/prod/')

    paginator = ssm.get_paginator('get_parameters_by_path')
    for page in paginator.paginate(Path=path, WithDecryption=True, Recursive=True):
        for param in page['Parameters']:
            # /myapp/prod/DATABASE_URL → DATABASE_URL
            env_name = param['Name'].split('/')[-1].upper()
            os.environ[env_name] = param['Value']

# Execute during layer init
load_config_to_env()

# Your function code now just reads os.environ['DATABASE_URL']
# No Secrets Manager SDK calls in hot path, no latency per invocation

Layer versioning gotcha: Layers are immutable and versioned. When you publish a new version, existing functions continue using the old version until you explicitly update them. This is by design — it prevents a shared layer update from breaking 50 functions simultaneously. But it also means you need automation to roll layer updates across your fleet.