Open
Description
Describe the bug
When deploying Loki with the SimpleScalable deployment mode, I'm seeing constant errors in the logs from loki-read pods:
k logs pod/loki-read-f5f67c585-8khjw -n observability
level=info ts=2025-04-10T19:35:58.650872453Z caller=http.go:160 org_id=fake msg="starting to tail logs" tenant=fake selectors="{stream=\"stdout\",pod=\"loki-canary-9ppvh\"} "
level=info ts=2025-04-10T19:35:58.651667691Z caller=http.go:163 org_id=fake msg="ended tailing logs" tenant=fake selectors="{stream=\"stdout\",pod=\"loki-canary-9ppvh\"} "
level=error ts=2025-04-10T19:35:59.008139779Z caller=scheduler_processor.go:111 component=querier msg="error processing requests from scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForQuerier" addr=127.0.0.1:9096
level=error ts=2025-04-10T19:35:59.042137553Z caller=scheduler_processor.go:111 component=querier msg="error processing requests from scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForQuerier" addr=127.0.0.1:9096
level=error ts=2025-04-10T19:35:59.048660032Z caller=scheduler_processor.go:111 component=querier msg="error processing requests from scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForQuerier" addr=127.0.0.1:9096
level=error ts=2025-04-10T19:35:59.093817635Z caller=frontend_scheduler_worker.go:248 msg="error sending requests to scheduler" err="rpc error: code = Unimplemented desc = unknown service schedulerpb.SchedulerForFrontend" addr=127.0.0.1:9096
My config
# values-loki.yaml.gotmpl
deploymentMode: SimpleScalable
loki:
auth_enabled: false
commonConfig:
path_prefix: /var/loki
replication_factor: {{ eq $env "prod" | ternary "3" "2" }}
ring:
instance_addr: 127.0.0.1
kvstore:
store: memberlist
compactor:
working_directory: /var/loki/compactor
compaction_interval: {{ eq $env "prod" | ternary "10m" "15m" }}
{{ if eq $env "prod" }}
retention_enabled: true
retention_delete_delay: 2h
{{ end }}
ingester:
lifecycler:
ring:
kvstore:
store: memberlist
replication_factor: {{ eq $env "prod" | ternary "3" "2" }}
chunk_encoding: snappy
chunk_idle_period: {{ eq $env "prod" | ternary "30m" "15m" }}
chunk_retain_period: {{ eq $env "prod" | ternary "5m" "3m" }}
max_chunk_age: {{ eq $env "prod" | ternary "2h" "1h" }}
wal:
enabled: true
dir: /var/loki/wal
{{ if eq $env "prod" }}
replay_memory_ceiling: 512MB
{{ end }}
flush_op_timeout: {{ eq $env "prod" | ternary "10m" "5m" }}
limits_config:
allow_structured_metadata: true
ingestion_rate_mb: {{ eq $env "prod" | ternary "15" "5" }}
ingestion_burst_size_mb: {{ eq $env "prod" | ternary "30" "10" }}
per_stream_rate_limit: {{ eq $env "prod" | ternary "5MB" "3MB" }}
per_stream_rate_limit_burst: {{ eq $env "prod" | ternary "10MB" "5MB" }}
# Query limits
max_entries_limit_per_query: {{ eq $env "prod" | ternary "10000" "5000" }}
max_query_length: {{ eq $env "prod" | ternary "2160h" "721h" }}
max_query_parallelism: {{ eq $env "prod" | ternary "32" "16" }}
max_streams_per_user: {{ eq $env "prod" | ternary "50000" "10000" }}
max_query_series: {{ eq $env "prod" | ternary "1000" "500" }}
# Data retention and management
retention_period: {{ eq $env "prod" | ternary "8760h" "2190h" }} # 1 year (8760 hours) for prod, and 3 months (2160 hours) for staging
reject_old_samples: true
reject_old_samples_max_age: {{ eq $env "prod" | ternary "336h" "168h" }}
creation_grace_period: 1h
# Cache and memory limits
max_cache_freshness_per_query: {{ eq $env "prod" | ternary "1h" "10m" }}
split_queries_by_interval: {{ eq $env "prod" | ternary "1h" "30m" }}
{{ if eq $env "prod" }}
# Specific cardinality limits for production only
cardinality_limit: 100000
max_label_names_per_series: 30
max_label_name_length: 1024
max_label_value_length: 2048
volume_enabled: true
{{ end }}
memberlistConfig:
bind_port: 7946
join_members:
- loki-memberlist
query_range:
align_queries_with_step: true
cache_results: true
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: {{ eq $env "prod" | ternary "1024" "512" }}
ttl: {{ eq $env "prod" | ternary "24h" "12h" }}
compression: snappy
query_scheduler:
use_scheduler_ring: true
querier:
max_concurrent: {{ eq $env "prod" | ternary "10" "5" }}
pattern_ingester:
enabled: true
server:
http_listen_port: 3100
grpc_listen_port: 9096
http_server_read_timeout: {{ eq $env "prod" | ternary "120s" "60s" }}
http_server_write_timeout: {{ eq $env "prod" | ternary "120s" "60s" }}
grpc_server_max_recv_msg_size: {{ eq $env "prod" | ternary "20971520" "10485760" }}
schemaConfig:
configs:
- from: "2024-04-01"
store: tsdb
object_store: s3
schema: v13
index:
prefix: index_
period: 24h
storage:
type: s3
bucketNames:
chunks: {{ $bucket }}
ruler: {{ $bucket }}
admin: {{ $bucket }}
s3:
region: {{ $region }}
s3forcepathstyle: false
insecure: false
storage_config:
aws:
bucketnames: {{ $bucket }}
insecure: false
region: {{ $region }}
storage_class: STANDARD_IA
s3forcepathstyle: false
tsdb_shipper:
active_index_directory: /var/loki/tsdb
cache_location: /var/loki/cache
cache_ttl: {{ eq $env "prod" | ternary "24h" "12h" }}
minio:
enabled: false
serviceAccount:
create: true
name: loki
annotations:
eks.amazonaws.com/role-arn: arn:aws:iam::111111111:role/{{ $cluster }}-loki-{{ $region }}
gateway:
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
backend:
replicas: {{ eq $env "prod" | ternary "3" "2" }}
autoscaling:
enabled: true
minReplicas: {{ eq $env "prod" | ternary "3" "2" }}
maxReplicas: {{ eq $env "prod" | ternary "10" "5" }}
targetCPUUtilizationPercentage: 60
targetMemoryUtilizationPercentage: 80
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
read:
replicas: {{ eq $env "prod" | ternary "3" "2" }}
autoscaling:
enabled: true
minReplicas: {{ eq $env "prod" | ternary "3" "2" }}
maxReplicas: {{ eq $env "prod" | ternary "10" "5" }}
targetCPUUtilizationPercentage: 60
targetMemoryUtilizationPercentage: 80
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
write:
replicas: {{ eq $env "prod" | ternary "3" "2" }}
autoscaling:
enabled: true
minReplicas: {{ eq $env "prod" | ternary "3" "2" }}
maxReplicas: {{ eq $env "prod" | ternary "10" "5" }}
targetCPUUtilizationPercentage: 60
targetMemoryUtilizationPercentage: 80
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
resultsCache:
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
chunksCache:
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
lokiCanary:
affinity:
nodeAffinity:
{{ $shared.nodeAffinity | toYaml | nindent 6 }}
tolerations:
{{ $shared.tolerations | toYaml | nindent 4 }}
To Reproduce
Steps to reproduce the behavior:
- Deploy Loki using Helm chart with
deploymentMode: SimpleScalable
- No explicit query-scheduler configuration in values.yaml
- Check the logs of loki-read pods
Expected behavior
The loki-read pods should either:
- Successfully connect to the query-scheduler service if it's part of the SimpleScalable deployment
- Not attempt to connect to a scheduler if it's not required
Environment:
- Infrastructure: Kubernetes
- Deployment tool: Helm
- Kubernetes version: v1.30.9-eks-5d632ec
- Loki version: 3.4.3
- Helm chart version: latest
Screenshots, Promtail config, or terminal output