fix: skip streams over limits in dry-run mode

grobinson-grafana · grobinson-grafana · commit 3e92b424b78f · 2025-04-11T14:21:51.000+01:00
This commit fixes a bug when distributors were running with
IngestLimitsDryRunEnabled where streams that were over the max
stream limits would still be written to the metadata topic,
meaning those streams would be included in the existing streams
rather than outside the max stream limit.
diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go
@@ -721,6 +721,7 @@ func (d *Distributor) PushWithResolver(ctx context.Context, req *logproto.PushRe
 		return &logproto.PushResponse{}, validationErr
 	}
 
+	var skipMetadataHashes map[uint64]struct{}
 	if d.cfg.IngestLimitsEnabled {
 		var reasonsForStreams map[uint64][]string
 		streams, reasonsForStreams, err = d.ingestLimits.enforceLimits(ctx, tenantID, streams)
@@ -737,7 +738,17 @@ func (d *Distributor) PushWithResolver(ctx context.Context, req *logproto.PushRe
 				}
 			}
 			newStreams := streamsForReasons[limits_frontend.ReasonExceedsMaxStreams]
-			if !d.cfg.IngestLimitsDryRunEnabled {
+			// When IngestLimitsDryRunEnabled is true, we need to stop stream hashes
+			// that exceed the stream limit from being written to the metadata topic.
+			// If we don't do this, the stream hashes that should have been rejected
+			// will instead being counted as a known stream, causing a disagreement
+			// in metrics between the limits service and ingesters.
+			if d.cfg.IngestLimitsDryRunEnabled {
+				skipMetadataHashes := make(map[uint64]struct{})
+				for _, streamHash := range newStreams {
+					skipMetadataHashes[streamHash] = struct{}{}
+				}
+			} else {
 				// TODO(grobinson): Return the reasons for each stream, instead of
 				// generic error messages.
 				if len(newStreams) == len(streams) {
@@ -797,7 +808,7 @@ func (d *Distributor) PushWithResolver(ctx context.Context, req *logproto.PushRe
 			return nil, err
 		}
 		// We don't need to create a new context like the ingester writes, because we don't return unless all writes have succeeded.
-		d.sendStreamsToKafka(ctx, streams, tenantID, &tracker, subring)
+		d.sendStreamsToKafka(ctx, streams, skipMetadataHashes, tenantID, &tracker, subring)
 	}
 
 	if d.cfg.IngesterEnabled {
@@ -1232,10 +1243,10 @@ func (d *Distributor) sendStreamsErr(ctx context.Context, ingester ring.Instance
 	return err
 }
 
-func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStream, tenant string, tracker *pushTracker, subring *ring.PartitionRing) {
+func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStream, skipMetadataHashes map[uint64]struct{}, tenant string, tracker *pushTracker, subring *ring.PartitionRing) {
 	for _, s := range streams {
 		go func(s KeyedStream) {
-			err := d.sendStreamToKafka(ctx, s, tenant, subring)
+			err := d.sendStreamToKafka(ctx, s, skipMetadataHashes, tenant, subring)
 			if err != nil {
 				err = fmt.Errorf("failed to write stream to kafka: %w", err)
 			}
@@ -1244,7 +1255,7 @@ func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStr
 	}
 }
 
-func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream, tenant string, subring *ring.PartitionRing) error {
+func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream, skipMetadataHashes map[uint64]struct{}, tenant string, subring *ring.PartitionRing) error {
 	if len(stream.Stream.Entries) == 0 {
 		return nil
 	}
@@ -1274,26 +1285,27 @@ func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream,
 
 	entriesSize, structuredMetadataSize := calculateStreamSizes(stream.Stream)
 
-	// However, unlike stream records, the distributor writes stream metadata
-	// records to one of a fixed number of partitions, the size of which is
-	// determined ahead of time. It does not use a ring. The reason for this
-	// is that we want to be able to scale components that consume metadata
-	// records independent of ingesters.
-	metadataPartitionID := int32(stream.HashKeyNoShard % uint64(d.numMetadataPartitions))
-	metadata, err := kafka.EncodeStreamMetadata(
-		metadataPartitionID,
-		d.cfg.KafkaConfig.Topic,
-		tenant,
-		stream.HashKeyNoShard,
-		entriesSize,
-		structuredMetadataSize,
-	)
-	if err != nil {
-		return fmt.Errorf("failed to marshal metadata: %w", err)
+	if _, ok := skipMetadataHashes[stream.HashKeyNoShard]; !ok {
+		// However, unlike stream records, the distributor writes stream metadata
+		// records to one of a fixed number of partitions, the size of which is
+		// determined ahead of time. It does not use a ring. The reason for this
+		// is that we want to be able to scale components that consume metadata
+		// records independent of ingesters.
+		metadataPartitionID := int32(stream.HashKeyNoShard % uint64(d.numMetadataPartitions))
+		metadata, err := kafka.EncodeStreamMetadata(
+			metadataPartitionID,
+			d.cfg.KafkaConfig.Topic,
+			tenant,
+			stream.HashKeyNoShard,
+			entriesSize,
+			structuredMetadataSize,
+		)
+		if err != nil {
+			return fmt.Errorf("failed to marshal metadata: %w", err)
+		}
+		records = append(records, metadata)
 	}
 
-	records = append(records, metadata)
-
 	d.kafkaRecordsPerRequest.Observe(float64(len(records)))
 
 	produceResults := d.kafkaWriter.ProduceSync(ctx, records)