Skip to main content
ship it and sleep

Expand-Contract Pattern and Backward-Compatible Migrations

5 min read Chapter 26 of 66

Expand-Contract Pattern and Backward-Compatible Migrations

The Failure

The inventory team needed to change the quantity column from INTEGER to DECIMAL(10,2) to support fractional units (liquids measured in liters). They ran ALTER TABLE products ALTER COLUMN quantity TYPE DECIMAL(10,2). PostgreSQL rewrote the entire table (2.3 million rows). The table was locked for 47 seconds. During those 47 seconds, every inventory check, every reservation, and every stock update failed. The checkout service returned errors for 47 seconds during peak traffic.

An expand-contract approach avoids the table lock entirely. Add a new column, backfill in batches, migrate reads, drop the old column. Zero downtime.

The Mechanism

Three Common Schema Changes and Their Expand-Contract Patterns

ChangeNaive ApproachExpand-Contract ApproachDeployments
Column renameALTER TABLE RENAME COLUMNAdd new column, sync trigger, migrate code, drop old3
Type changeALTER TABLE ALTER COLUMN TYPEAdd new column with new type, backfill, migrate code, drop old3
Table splitCreate new tables, migrate data, drop oldAdd new tables, dual-write, migrate reads, drop old writes, drop old table4

The Implementation

Pattern 1: Column Type Change (INTEGER → DECIMAL)

Deployment 1: Expand

-- V001__expand_quantity_decimal.sql
-- Add new column with correct type
ALTER TABLE products ADD COLUMN quantity_decimal DECIMAL(10,2);

-- Backfill in batches to avoid long-running transactions
DO $$
DECLARE
  batch_size INT := 10000;
  total_rows INT;
  processed INT := 0;
BEGIN
  SELECT count(*) INTO total_rows FROM products WHERE quantity_decimal IS NULL;
  WHILE processed < total_rows LOOP
    UPDATE products
    SET quantity_decimal = quantity::DECIMAL(10,2)
    WHERE id IN (
      SELECT id FROM products
      WHERE quantity_decimal IS NULL
      LIMIT batch_size
    );
    processed := processed + batch_size;
    RAISE NOTICE 'Processed % of % rows', processed, total_rows;
    COMMIT;
  END LOOP;
END $$;

-- Sync trigger: any write to quantity also writes to quantity_decimal
CREATE OR REPLACE FUNCTION sync_quantity()
RETURNS TRIGGER AS $$
BEGIN
  IF NEW.quantity IS NOT NULL THEN
    NEW.quantity_decimal := NEW.quantity::DECIMAL(10,2);
  END IF;
  IF NEW.quantity_decimal IS NOT NULL THEN
    NEW.quantity := ROUND(NEW.quantity_decimal)::INTEGER;
  END IF;
  RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER trg_sync_quantity
  BEFORE INSERT OR UPDATE ON products
  FOR EACH ROW EXECUTE FUNCTION sync_quantity();

Deployment 1: Code change

// HARDENED: Write to both columns, read from old
// inventory-service/src/main/java/com/acme/inventory/ProductRepository.java
@Query("UPDATE products SET quantity = :qty, quantity_decimal = :qtyDecimal WHERE id = :id")
void updateQuantity(@Param("id") Long id,
                    @Param("qty") int qty,
                    @Param("qtyDecimal") BigDecimal qtyDecimal);

// Still reading from 'quantity' — old code compatibility
@Query("SELECT quantity FROM products WHERE id = :id")
int getQuantity(@Param("id") Long id);

Deployment 2: Migrate reads

// HARDENED: Read from new column, write to both
@Query("SELECT quantity_decimal FROM products WHERE id = :id")
BigDecimal getQuantity(@Param("id") Long id);

// Still writing to both — rollback safety
@Query("UPDATE products SET quantity = ROUND(:qtyDecimal), quantity_decimal = :qtyDecimal WHERE id = :id")
void updateQuantity(@Param("id") Long id,
                    @Param("qtyDecimal") BigDecimal qtyDecimal);

Deployment 3: Contract

-- V003__contract_quantity_decimal.sql
-- Remove sync trigger
DROP TRIGGER trg_sync_quantity ON products;
DROP FUNCTION sync_quantity();

-- Remove old column
ALTER TABLE products DROP COLUMN quantity;

-- Rename new column
ALTER TABLE products RENAME COLUMN quantity_decimal TO quantity;

-- Add NOT NULL constraint
ALTER TABLE products ALTER COLUMN quantity SET NOT NULL;

Pattern 2: Table Split (orders → orders + order_items)

Deployment 1: Expand

-- V010__expand_order_items.sql
CREATE TABLE order_items (
  id BIGSERIAL PRIMARY KEY,
  order_id BIGINT NOT NULL REFERENCES orders(id),
  product_id BIGINT NOT NULL,
  quantity DECIMAL(10,2) NOT NULL,
  unit_price DECIMAL(10,2) NOT NULL,
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

CREATE INDEX idx_order_items_order_id ON order_items(order_id);

-- Backfill from JSON column in orders table
INSERT INTO order_items (order_id, product_id, quantity, unit_price)
SELECT
  o.id,
  (item->>'productId')::BIGINT,
  (item->>'quantity')::DECIMAL(10,2),
  (item->>'unitPrice')::DECIMAL(10,2)
FROM orders o,
  jsonb_array_elements(o.items) AS item
WHERE o.items IS NOT NULL;

Deployment 1: Code change — dual write

// HARDENED: Dual-write to both orders.items JSON and order_items table
func (r *OrderRepo) CreateOrder(ctx context.Context, order Order) error {
    tx, err := r.db.BeginTx(ctx, nil)
    if err != nil {
        return err
    }
    defer tx.Rollback()

    // Write to orders table (includes items JSON for backward compat)
    _, err = tx.ExecContext(ctx,
        `INSERT INTO orders (id, customer_id, total, items, status)
         VALUES ($1, $2, $3, $4, $5)`,
        order.ID, order.CustomerID, order.Total,
        order.ItemsJSON(), order.Status)
    if err != nil {
        return err
    }

    // Also write to order_items table (new normalized structure)
    for _, item := range order.Items {
        _, err = tx.ExecContext(ctx,
            `INSERT INTO order_items (order_id, product_id, quantity, unit_price)
             VALUES ($1, $2, $3, $4)`,
            order.ID, item.ProductID, item.Quantity, item.UnitPrice)
        if err != nil {
            return err
        }
    }

    return tx.Commit()
}

Deployment 2: Migrate reads

// HARDENED: Read from order_items, still dual-write
func (r *OrderRepo) GetOrderItems(ctx context.Context, orderID string) ([]OrderItem, error) {
    rows, err := r.db.QueryContext(ctx,
        `SELECT product_id, quantity, unit_price
         FROM order_items WHERE order_id = $1`, orderID)
    // ... read from normalized table
}

Deployment 3: Stop dual-write, drop old column

-- V012__contract_order_items.sql
ALTER TABLE orders DROP COLUMN items;

The Gate

Each expand migration is tested in CI with the backward-compatibility check described in CH9. The CI pipeline:

  1. Applies the expand migration to a test database
  2. Runs the current production code’s tests against the new schema (backward compat)
  3. Runs the new code’s tests against the new schema (forward compat)

Both must pass before the migration is promoted to staging.

The Recovery

Expand migration fails: The migration transaction rolls back. No schema changes applied. Fix the migration SQL and retry.

Code deployment after expand fails: Roll back the code. The old code still works because the expand migration only added new columns/tables. The old code ignores them.

Contract migration removes a column that is still needed: The contract migration should never run until all code reading the old column has been deployed and verified. Add a CI check that greps the codebase for references to the old column name before allowing the contract migration to run.