Skip to content

Unknown service schedulerpb.SchedulerForQuerier #17113

Open
@ldiego73

Description

@ldiego73

Describe the bug
When deploying Loki with the SimpleScalable deployment mode, I'm seeing constant errors in the logs from loki-read pods:

k logs pod/loki-read-f5f67c585-8khjw -n observability

level=info ts=2025-04-10T19:35:58.650872453Z caller=http.go:160 org_id=fake msg="starting to tail logs" tenant=fake selectors="{stream=\"stdout\",pod=\"loki-canary-9ppvh\"} "
level=info ts=2025-04-10T19:35:58.651667691Z caller=http.go:163 org_id=fake msg="ended tailing logs" tenant=fake selectors="{stream=\"stdout\",pod=\"loki-canary-9ppvh\"} "
level=error ts=2025-04-10T19:35:59.008139779Z caller=scheduler_processor.go:111 component=querier msg="error processing requests from scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForQuerier" addr=127.0.0.1:9096
level=error ts=2025-04-10T19:35:59.042137553Z caller=scheduler_processor.go:111 component=querier msg="error processing requests from scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForQuerier" addr=127.0.0.1:9096
level=error ts=2025-04-10T19:35:59.048660032Z caller=scheduler_processor.go:111 component=querier msg="error processing requests from scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForQuerier" addr=127.0.0.1:9096
level=error ts=2025-04-10T19:35:59.093817635Z caller=frontend_scheduler_worker.go:248 msg="error sending requests to scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForFrontend" addr=127.0.0.1:9096

My config

# values-loki.yaml.gotmpl
deploymentMode: SimpleScalable

loki:
  auth_enabled: false
  commonConfig:
    path_prefix: /var/loki
    replication_factor: {{ eq $env "prod" | ternary "3" "2" }}
    ring:
      instance_addr: 127.0.0.1
      kvstore:
        store: memberlist
  compactor:
    working_directory: /var/loki/compactor
    compaction_interval: {{ eq $env "prod" | ternary "10m" "15m" }}
    {{ if eq $env "prod" }}
    retention_enabled: true
    retention_delete_delay: 2h
    {{ end }}
  ingester:
    lifecycler:
      ring:
        kvstore:
          store: memberlist
        replication_factor: {{ eq $env "prod" | ternary "3" "2" }}
    chunk_encoding: snappy
    chunk_idle_period: {{ eq $env "prod" | ternary "30m" "15m" }}
    chunk_retain_period: {{ eq $env "prod" | ternary "5m" "3m" }}
    max_chunk_age: {{ eq $env "prod" | ternary "2h" "1h" }}
    wal:
      enabled: true
      dir: /var/loki/wal
      {{ if eq $env "prod" }}
      replay_memory_ceiling: 512MB
      {{ end }}
    flush_op_timeout: {{ eq $env "prod" | ternary "10m" "5m" }}
  limits_config:
    allow_structured_metadata: true
    ingestion_rate_mb: {{ eq $env "prod" | ternary "15" "5" }}
    ingestion_burst_size_mb: {{ eq $env "prod" | ternary "30" "10" }}
    per_stream_rate_limit: {{ eq $env "prod" | ternary "5MB" "3MB" }}
    per_stream_rate_limit_burst: {{ eq $env "prod" | ternary "10MB" "5MB" }}

    # Query limits
    max_entries_limit_per_query: {{ eq $env "prod" | ternary "10000" "5000" }}
    max_query_length: {{ eq $env "prod" | ternary "2160h" "721h" }}
    max_query_parallelism: {{ eq $env "prod" | ternary "32" "16" }}
    max_streams_per_user: {{ eq $env "prod" | ternary "50000" "10000" }}
    max_query_series: {{ eq $env "prod" | ternary "1000" "500" }}

    # Data retention and management
    retention_period: {{ eq $env "prod" | ternary "8760h" "2190h" }} # 1 year (8760 hours) for prod, and 3 months (2160 hours) for staging
    reject_old_samples: true
    reject_old_samples_max_age: {{ eq $env "prod" | ternary "336h" "168h" }}
    creation_grace_period: 1h

    # Cache and memory limits
    max_cache_freshness_per_query: {{ eq $env "prod" | ternary "1h" "10m" }}
    split_queries_by_interval: {{ eq $env "prod" | ternary "1h" "30m" }}

    {{ if eq $env "prod" }}
    # Specific cardinality limits for production only
    cardinality_limit: 100000
    max_label_names_per_series: 30
    max_label_name_length: 1024
    max_label_value_length: 2048

    volume_enabled: true
    {{ end }}
  memberlistConfig:
    bind_port: 7946
    join_members:
      - loki-memberlist
  query_range:
    align_queries_with_step: true
    cache_results: true
    results_cache:
      cache:
        embedded_cache:
          enabled: true
          max_size_mb: {{ eq $env "prod" | ternary "1024" "512" }}
          ttl: {{ eq $env "prod" | ternary "24h" "12h" }}
      compression: snappy
  query_scheduler:
    use_scheduler_ring: true
  querier:
    max_concurrent: {{ eq $env "prod" | ternary "10" "5" }}
  pattern_ingester:
    enabled: true
  server:
    http_listen_port: 3100
    grpc_listen_port: 9096
    http_server_read_timeout: {{ eq $env "prod" | ternary "120s" "60s" }}
    http_server_write_timeout: {{ eq $env "prod" | ternary "120s" "60s" }}
    grpc_server_max_recv_msg_size: {{ eq $env "prod" | ternary "20971520" "10485760" }}
  schemaConfig:
    configs:
      - from: "2024-04-01"
        store: tsdb
        object_store: s3
        schema: v13
        index:
          prefix: index_
          period: 24h
  storage:
    type: s3
    bucketNames:
      chunks: {{ $bucket }}
      ruler: {{ $bucket }}
      admin: {{ $bucket }}
    s3:
      region: {{ $region }}
      s3forcepathstyle: false
      insecure: false
  storage_config:
    aws:
      bucketnames: {{ $bucket }}
      insecure: false
      region: {{ $region }}
      storage_class: STANDARD_IA
      s3forcepathstyle: false
    tsdb_shipper:
      active_index_directory: /var/loki/tsdb
      cache_location: /var/loki/cache
      cache_ttl: {{ eq $env "prod" | ternary "24h" "12h" }}

minio:
  enabled: false

serviceAccount:
  create: true
  name: loki
  annotations:
    eks.amazonaws.com/role-arn: arn:aws:iam::111111111:role/{{ $cluster }}-loki-{{ $region }}

gateway:
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

backend:
  replicas: {{ eq $env "prod" | ternary "3" "2" }}
  autoscaling:
    enabled: true
    minReplicas: {{ eq $env "prod" | ternary "3" "2" }}
    maxReplicas: {{ eq $env "prod" | ternary "10" "5" }}
    targetCPUUtilizationPercentage: 60
    targetMemoryUtilizationPercentage: 80
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

read:
  replicas: {{ eq $env "prod" | ternary "3" "2" }}
  autoscaling:
    enabled: true
    minReplicas: {{ eq $env "prod" | ternary "3" "2" }}
    maxReplicas: {{ eq $env "prod" | ternary "10" "5" }}
    targetCPUUtilizationPercentage: 60
    targetMemoryUtilizationPercentage: 80
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

write:
  replicas: {{ eq $env "prod" | ternary "3" "2" }}
  autoscaling:
    enabled: true
    minReplicas: {{ eq $env "prod" | ternary "3" "2" }}
    maxReplicas: {{ eq $env "prod" | ternary "10" "5" }}
    targetCPUUtilizationPercentage: 60
    targetMemoryUtilizationPercentage: 80
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

resultsCache:
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

chunksCache:
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

lokiCanary:
  affinity:
    nodeAffinity:
      {{ $shared.nodeAffinity | toYaml | nindent 6 }}
  tolerations:
    {{ $shared.tolerations | toYaml | nindent 4 }}

To Reproduce
Steps to reproduce the behavior:

  1. Deploy Loki using Helm chart with deploymentMode: SimpleScalable
  2. No explicit query-scheduler configuration in values.yaml
  3. Check the logs of loki-read pods

Expected behavior
The loki-read pods should either:

  1. Successfully connect to the query-scheduler service if it's part of the SimpleScalable deployment
  2. Not attempt to connect to a scheduler if it's not required

Environment:

  • Infrastructure: Kubernetes
  • Deployment tool: Helm
  • Kubernetes version: v1.30.9-eks-5d632ec
  • Loki version: 3.4.3
  • Helm chart version: latest

Screenshots, Promtail config, or terminal output

Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    area/ssdtype/bugSomehing is not working as expected

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions