gRPC for Service-to-Service Communication

The main chapter showed gRPC delivering 3.7x faster serialization and 2.5x smaller payloads compared to REST+JSON. This section covers implementation: setting up gRPC in the content platform’s Spring Boot services, choosing between unary and streaming patterns, propagating deadlines to prevent cascade failures, and measuring end-to-end performance.

gRPC Spring Boot Setup

The content platform uses grpc-spring-boot-starter for seamless integration with Spring’s dependency injection and lifecycle management:

<!-- pom.xml: gRPC dependencies -->
<dependencies>
    <dependency>
        <groupId>net.devh</groupId>
        <artifactId>grpc-spring-boot-starter</artifactId>
        <version>3.1.0.RELEASE</version>
    </dependency>
    <dependency>
        <groupId>io.grpc</groupId>
        <artifactId>grpc-protobuf</artifactId>
        <version>1.62.2</version>
    </dependency>
    <dependency>
        <groupId>io.grpc</groupId>
        <artifactId>grpc-stub</artifactId>
        <version>1.62.2</version>
    </dependency>
</dependencies>

<!-- Protobuf compilation plugin -->
<plugin>
    <groupId>org.xolstice.maven.plugins</groupId>
    <artifactId>protobuf-maven-plugin</artifactId>
    <version>0.6.1</version>
    <configuration>
        <protocArtifact>com.google.protobuf:protoc:3.25.3:exe:${os.detected.classifier}</protocArtifact>
        <pluginId>grpc-java</pluginId>
        <pluginArtifact>io.grpc:protoc-gen-grpc-java:1.62.2:exe:${os.detected.classifier}</pluginArtifact>
    </configuration>
    <executions>
        <execution>
            <goals>
                <goal>compile</goal>
                <goal>compile-custom</goal>
            </goals>
        </execution>
    </executions>
</plugin>

Service definition for the article service:

// src/main/proto/article_service.proto
syntax = "proto3";

package content.platform.article;

option java_multiple_files = true;
option java_package = "com.contentplatform.grpc.article";

service ArticleService {
  // Unary: single request, single response
  rpc GetArticle(GetArticleRequest) returns (ArticleDetail);

  // Unary: paginated list
  rpc ListArticles(ListArticlesRequest) returns (ListArticlesResponse);

  // Server streaming: large batch export
  rpc StreamArticleBatch(BatchExportRequest) returns (stream ArticleSummary);

  // Bidirectional streaming: real-time view event ingestion
  rpc StreamViewEvents(stream ViewEvent) returns (stream ViewEventAck);
}

message GetArticleRequest {
  string article_id = 1;
  repeated string fields = 2;  // Field mask for partial response
}

message ListArticlesRequest {
  int32 page_size = 1;
  string cursor = 2;
  repeated string categories = 3;
  SortOrder sort = 4;

  enum SortOrder {
    SORT_UNSPECIFIED = 0;
    PUBLISHED_DESC = 1;
    VIEW_COUNT_DESC = 2;
    TRENDING = 3;
  }
}

message ListArticlesResponse {
  repeated ArticleSummary articles = 1;
  string next_cursor = 2;
  int32 total_count = 3;
}

message ArticleSummary {
  string id = 1;
  string title = 2;
  string excerpt = 3;
  int64 view_count = 4;
  int64 published_at_epoch = 5;
  repeated string categories = 6;
  string author = 7;
  string thumbnail_url = 8;
}

message ArticleDetail {
  string id = 1;
  string title = 2;
  string content_html = 3;
  string author = 4;
  int64 published_at_epoch = 5;
  int64 view_count = 6;
  repeated string categories = 7;
  repeated RelatedArticle related = 8;
}

message RelatedArticle {
  string id = 1;
  string title = 2;
  float relevance_score = 3;
}

message ViewEvent {
  string article_id = 1;
  string user_id = 2;
  int64 timestamp_epoch = 3;
  int32 scroll_depth_percent = 4;
  int32 time_on_page_seconds = 5;
}

message ViewEventAck {
  int64 processed_count = 1;
  int64 last_processed_timestamp = 2;
}

message BatchExportRequest {
  int64 since_epoch = 1;
  repeated string categories = 2;
  int32 batch_size = 3;
}

Server Implementation

// ArticleGrpcService.java
@GrpcService
public class ArticleGrpcService extends ArticleServiceGrpc.ArticleServiceImplBase {

    private final ArticleRepository articleRepository;
    private final ViewEventProcessor viewEventProcessor;

    public ArticleGrpcService(ArticleRepository articleRepository,
                              ViewEventProcessor viewEventProcessor) {
        this.articleRepository = articleRepository;
        this.viewEventProcessor = viewEventProcessor;
    }

    @Override
    public void listArticles(ListArticlesRequest request,
                             StreamObserver<ListArticlesResponse> responseObserver) {
        try {
            var articles = articleRepository.findByCriteria(
                request.getCategoriesList(),
                request.getSort(),
                request.getPageSize(),
                request.getCursor()
            );

            var response = ListArticlesResponse.newBuilder()
                .addAllArticles(articles.items())
                .setNextCursor(articles.nextCursor())
                .setTotalCount(articles.totalCount())
                .build();

            responseObserver.onNext(response);
            responseObserver.onCompleted();
        } catch (Exception e) {
            responseObserver.onError(Status.INTERNAL
                .withDescription("Failed to list articles: " + e.getMessage())
                .withCause(e)
                .asRuntimeException());
        }
    }

    // Server streaming: sends articles in chunks as they load from DB
    @Override
    public void streamArticleBatch(BatchExportRequest request,
                                   StreamObserver<ArticleSummary> responseObserver) {
        int batchSize = request.getBatchSize() > 0 ? request.getBatchSize() : 100;
        String cursor = "";

        try {
            while (true) {
                var batch = articleRepository.findBatchSince(
                    request.getSinceEpoch(),
                    request.getCategoriesList(),
                    batchSize,
                    cursor
                );

                for (ArticleSummary article : batch.items()) {
                    responseObserver.onNext(article);
                }

                if (batch.items().size() < batchSize) {
                    break;  // No more data
                }
                cursor = batch.nextCursor();
            }
            responseObserver.onCompleted();
        } catch (Exception e) {
            responseObserver.onError(Status.INTERNAL
                .withDescription("Batch export failed")
                .asRuntimeException());
        }
    }

    // Bidirectional streaming: ingest view events, periodically ack
    @Override
    public StreamObserver<ViewEvent> streamViewEvents(
            StreamObserver<ViewEventAck> responseObserver) {

        return new StreamObserver<>() {
            private long processedCount = 0;
            private long lastTimestamp = 0;

            @Override
            public void onNext(ViewEvent event) {
                viewEventProcessor.process(event);
                processedCount++;
                lastTimestamp = event.getTimestampEpoch();

                // Ack every 100 events (backpressure signal)
                if (processedCount % 100 == 0) {
                    responseObserver.onNext(ViewEventAck.newBuilder()
                        .setProcessedCount(processedCount)
                        .setLastProcessedTimestamp(lastTimestamp)
                        .build());
                }
            }

            @Override
            public void onError(Throwable t) {
                // Client disconnected or error; flush pending events
                viewEventProcessor.flush();
            }

            @Override
            public void onCompleted() {
                // Send final ack
                responseObserver.onNext(ViewEventAck.newBuilder()
                    .setProcessedCount(processedCount)
                    .setLastProcessedTimestamp(lastTimestamp)
                    .build());
                responseObserver.onCompleted();
            }
        };
    }
}

Deadline Propagation

Without deadlines, a slow downstream service causes cascade failures. The recommendation service calls the article service, which calls the search service. If the user sets a 500ms timeout, each service must propagate the remaining budget:

// SLOW: No deadline propagation
// User timeout: 500ms
// API Gateway -> Recommendation Service (200ms processing)
//   -> Article Service (200ms processing)
//     -> Search Service (200ms processing)
// Total: 600ms -> user sees timeout, but all 3 services did full work

// FAST: Deadline propagation
// User timeout: 500ms
// API Gateway -> Recommendation Service (deadline: 500ms)
//   -> Article Service (deadline: 300ms remaining)
//     -> Search Service (deadline: 100ms remaining)
// Search service knows it has 100ms: uses cached results if query would take longer

@GrpcService
public class RecommendationGrpcService
        extends RecommendationServiceGrpc.RecommendationServiceImplBase {

    private final ArticleServiceGrpc.ArticleServiceBlockingStub articleStub;

    public RecommendationGrpcService(
            @GrpcClient("article-service")
            ArticleServiceGrpc.ArticleServiceBlockingStub articleStub) {
        this.articleStub = articleStub;
    }

    @Override
    public void getRecommendations(RecommendationRequest request,
                                   StreamObserver<RecommendationResponse> responseObserver) {
        // Get remaining deadline from incoming context
        Deadline deadline = Context.current().getDeadline();

        if (deadline != null && deadline.isExpired()) {
            responseObserver.onError(Status.DEADLINE_EXCEEDED
                .withDescription("Deadline already expired on entry")
                .asRuntimeException());
            return;
        }

        // Propagate deadline to downstream call with buffer
        long remainingMs = deadline != null
            ? deadline.timeRemaining(TimeUnit.MILLISECONDS)
            : 5000;  // Default 5s if no deadline set

        // Reserve 50ms for our own processing
        long downstreamDeadlineMs = Math.max(remainingMs - 50, 10);

        try {
            ListArticlesResponse articles = articleStub
                .withDeadline(Deadline.after(downstreamDeadlineMs, TimeUnit.MILLISECONDS))
                .listArticles(ListArticlesRequest.newBuilder()
                    .setPageSize(20)
                    .addAllCategories(request.getCategoriesList())
                    .setSort(ListArticlesRequest.SortOrder.TRENDING)
                    .build());

            // Score and rank (must complete within reserved 50ms)
            var recommendations = rankArticles(articles.getArticlesList(), request);

            responseObserver.onNext(recommendations);
            responseObserver.onCompleted();
        } catch (StatusRuntimeException e) {
            if (e.getStatus().getCode() == Status.Code.DEADLINE_EXCEEDED) {
                // Downstream timed out; return cached/degraded response
                responseObserver.onNext(getCachedRecommendations(request));
                responseObserver.onCompleted();
            } else {
                responseObserver.onError(e);
            }
        }
    }
}

Client-side deadline configuration:

// application.yml for gRPC client
// grpc:
//   client:
//     article-service:
//       address: dns:///article-service:9090
//       defaultLoadBalancingPolicy: round_robin
//       negotiationType: plaintext  # Internal network, no TLS needed
//       deadline: 2000  # Default deadline 2s for all calls

// Override per-call when needed:
ArticleDetail article = articleStub
    .withDeadline(Deadline.after(200, TimeUnit.MILLISECONDS))
    .getArticle(GetArticleRequest.newBuilder()
        .setArticleId(articleId)
        .build());

Unary vs Streaming: When to Use Each

// Unary RPC: single request, single response
// Use when: response fits in memory, latency < 1s expected
// Content platform: GetArticle, ListArticles (paginated)
// Overhead: 1 HEADERS frame + N DATA frames + 1 HEADERS (trailers)

// Server streaming: single request, multiple response messages
// Use when: response is large/unbounded, client processes incrementally
// Content platform: BatchExport (millions of articles for analytics)
// Advantage: constant memory on server (stream, don't buffer)
//
// Client code:
Iterator<ArticleSummary> articles = articleStub
    .streamArticleBatch(BatchExportRequest.newBuilder()
        .setSinceEpoch(lastExportEpoch)
        .setBatchSize(500)
        .build());

long count = 0;
while (articles.hasNext()) {
    ArticleSummary article = articles.next();
    analyticsStore.index(article);
    count++;
    if (count % 10_000 == 0) {
        log.info("Exported {} articles", count);
    }
}

// Bidirectional streaming: both sides send multiple messages
// Use when: continuous data flow in both directions
// Content platform: ViewEvent ingestion with periodic acks
// Advantage: single connection handles thousands of events/sec
//            acks provide backpressure signal

Streaming performance comparison for the batch export use case (1 million articles):

Unary with pagination (50 per page, 20,000 requests):
  Total time:     48.2s
  Memory peak:    890MB (buffering pages for retry on failure)
  Network calls:  20,000

Server streaming (batches of 500, single stream):
  Total time:     12.4s
  Memory peak:    45MB (processing in flight)
  Network calls:  1
  Speedup:        3.9x

Error Handling with Status Codes

gRPC defines 16 status codes. Mapping them correctly prevents silent failures:

// Correct status code mapping for content platform:
@GrpcService
public class ArticleGrpcService extends ArticleServiceGrpc.ArticleServiceImplBase {

    @Override
    public void getArticle(GetArticleRequest request,
                           StreamObserver<ArticleDetail> responseObserver) {
        if (request.getArticleId().isBlank()) {
            responseObserver.onError(Status.INVALID_ARGUMENT
                .withDescription("article_id must not be empty")
                .asRuntimeException());
            return;
        }

        var article = articleRepository.findById(request.getArticleId());

        if (article == null) {
            responseObserver.onError(Status.NOT_FOUND
                .withDescription("Article not found: " + request.getArticleId())
                .asRuntimeException());
            return;
        }

        responseObserver.onNext(toProto(article));
        responseObserver.onCompleted();
    }
}

// Status codes and retry behavior:
// OK (0):              Success
// INVALID_ARGUMENT (3): Do NOT retry (client bug)
// NOT_FOUND (5):       Do NOT retry (data does not exist)
// DEADLINE_EXCEEDED (4): Maybe retry with backoff (transient)
// UNAVAILABLE (14):    Retry with backoff (server overloaded)
// INTERNAL (13):       Maybe retry (bug or transient)
// RESOURCE_EXHAUSTED (8): Retry with longer backoff (rate limited)

gRPC Load Balancing

HTTP/1.1 load balancing works at the connection level: each new connection goes to a different backend. gRPC reuses a single HTTP/2 connection, so L4 load balancers send all requests to one backend:

// SLOW: L4 load balancer with gRPC
// Client -> L4 LB -> Backend A (ALL requests go here)
// Backend B, C idle
// Problem: single HTTP/2 connection pinned to one backend by L4 LB

// FAST: Client-side load balancing
// Client discovers backends via DNS or service registry
// Client round-robins requests across backends on separate connections

// application.yml:
// grpc:
//   client:
//     article-service:
//       address: dns:///article-service.internal:9090
//       defaultLoadBalancingPolicy: round_robin
//       # DNS returns multiple A records; client connects to all

// Alternative: L7 proxy (Envoy, Linkerd) that understands HTTP/2 frames
// Envoy inspects stream-level requests and distributes across backends
// Content platform uses Envoy sidecar in Kubernetes:
// Client -> Envoy sidecar -> Backend A (stream 1, 3, 5)
//                         -> Backend B (stream 2, 4, 6)
//                         -> Backend C (stream 7, 8, 9)

Benchmark: gRPC vs REST+JSON

Same workload: ListArticles returning 50 articles, 10,000 requests, 100 concurrent callers, internal network (0.5ms RTT):

// JMH benchmark setup for serialization comparison
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 10, time = 1)
public class SerializationBenchmark {

    private ListArticlesResponse protoResponse;
    private ArticleListDto jsonResponse;
    private ObjectMapper jackson;
    private byte[] protoBytes;
    private byte[] jsonBytes;

    @Setup
    public void setup() {
        jackson = new ObjectMapper();
        // 50 articles with realistic field sizes
        protoResponse = buildProtoResponse(50);
        jsonResponse = buildJsonResponse(50);
        protoBytes = protoResponse.toByteArray();
        jsonBytes = jackson.writeValueAsBytes(jsonResponse);
    }

    @Benchmark
    public byte[] serializeProtobuf() {
        return protoResponse.toByteArray();
    }

    @Benchmark
    public byte[] serializeJson() throws Exception {
        return jackson.writeValueAsBytes(jsonResponse);
    }

    @Benchmark
    public ListArticlesResponse deserializeProtobuf() throws Exception {
        return ListArticlesResponse.parseFrom(protoBytes);
    }

    @Benchmark
    public ArticleListDto deserializeJson() throws Exception {
        return jackson.readValue(jsonBytes, ArticleListDto.class);
    }
}

Results:

Serialization (50 articles):
  Protobuf serialize:      38.2 us  (14,820 bytes output)
  Jackson serialize:      142.5 us  (37,450 bytes output)
  Ratio: 3.73x faster, 2.53x smaller

Deserialization (50 articles):
  Protobuf deserialize:    52.1 us
  Jackson deserialize:    198.3 us
  Ratio: 3.81x faster

End-to-end RPC (internal network, 0.5ms RTT, 100 concurrent):
  gRPC ListArticles:
    P50:   1.8ms
    P99:   4.2ms
    Throughput: 48,200 req/s

  REST+JSON ListArticles:
    P50:   3.9ms
    P99:   9.8ms
    Throughput: 22,100 req/s

  Improvement:
    P50:  2.2x faster
    P99:  2.3x faster
    Throughput: 2.2x higher

The 2.2x end-to-end improvement is less than the 3.7x serialization improvement because network I/O and database access dominate total latency. On the internal network with 0.5ms RTT, the serialization overhead (142us vs 38us) represents 2.7% vs 1.0% of total request time. The bandwidth savings matter more at scale: 22.6MB/s less network traffic for the same throughput.

Trade-offs and Migration Strategy

// When to use gRPC vs REST:
//
// gRPC wins:
// - Internal services (both sides control the stack)
// - High-throughput batch processing
// - Streaming use cases (event ingestion, large exports)
// - Strict schema enforcement needed (protobuf contracts)
// - Bandwidth-sensitive (2.5x smaller payloads)
//
// REST wins:
// - Public APIs (browser compatibility, curl debugging)
// - Simple CRUD with infrequent calls
// - Team unfamiliar with protobuf
// - Need human-readable wire format for debugging

// Migration strategy for content platform:
// Phase 1: New internal services start with gRPC (recommendation-service)
// Phase 2: High-traffic internal calls migrate (article-service -> search-service)
// Phase 3: Event streaming migrates (view-events pipeline)
// Phase 4: Keep REST for public API gateway (browser-facing)
//
// Dual-protocol support during migration:
@Configuration
public class DualProtocolConfig {

    @Bean
    public RouterFunction<ServerResponse> restRoutes(ArticleService articleService) {
        return route(GET("/api/articles"), req -> {
            // REST endpoint calls same service layer
            var articles = articleService.listArticles(
                req.param("page_size").map(Integer::parseInt).orElse(50),
                req.param("cursor").orElse("")
            );
            return ServerResponse.ok().bodyValue(articles);
        });
    }
    // gRPC endpoint (ArticleGrpcService) calls same ArticleService
    // Both coexist until REST clients migrate
}