Name: rook-ceph-rgw-ceph-a-699b8bdb59-tn664 Namespace: openstack Priority: 0 Service Account: rook-ceph-rgw Node: instance/199.204.45.113 Start Time: Fri, 17 Apr 2026 04:53:18 +0000 Labels: app=rook-ceph-rgw app.kubernetes.io/component=cephobjectstores.ceph.rook.io app.kubernetes.io/created-by=rook-ceph-operator app.kubernetes.io/instance=ceph app.kubernetes.io/managed-by=rook-ceph-operator app.kubernetes.io/name=ceph-rgw app.kubernetes.io/part-of=ceph ceph_daemon_id=ceph ceph_daemon_type=rgw pod-template-hash=699b8bdb59 rgw=ceph rook.io/operator-namespace=rook-ceph rook_cluster=openstack rook_object_store=ceph Annotations: Status: Running IP: 10.0.0.74 IPs: IP: 10.0.0.74 Controlled By: ReplicaSet/rook-ceph-rgw-ceph-a-699b8bdb59 Init Containers: chown-container-data-dir: Container ID: containerd://b743900c19856e2186759a1379009f92bedc429d2f457f6c7857dc87d3ffc542 Image: harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7 Image ID: harbor.atmosphere.dev/quay.io/ceph/ceph@sha256:1b9158ce28975f95def6a0ad459fa19f1336506074267a4b47c1bd914a00fec0 Port: Host Port: Command: chown Args: --verbose --recursive ceph:ceph /var/log/ceph /var/lib/ceph/crash /run/ceph /var/lib/ceph/rgw/ceph-ceph State: Terminated Reason: Completed Exit Code: 0 Started: Fri, 17 Apr 2026 04:53:19 +0000 Finished: Fri, 17 Apr 2026 04:53:19 +0000 Ready: True Restart Count: 0 Environment: Mounts: /etc/ceph from rook-config-override (ro) /etc/ceph/keyring-store/ from rook-ceph-rgw-ceph-a-keyring (ro) /run/ceph from ceph-daemons-sock-dir (rw) /var/lib/ceph/crash from rook-ceph-crash (rw) /var/lib/ceph/rgw/ceph-ceph from ceph-daemon-data (rw) /var/log/ceph from rook-ceph-log (rw) /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-6zc2m (ro) Containers: rgw: Container ID: containerd://ce9554cf23c174979c4c2363c9d2e0c7fc3ebb652b47f0b525a89c955d5bb072 Image: harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7 Image ID: harbor.atmosphere.dev/quay.io/ceph/ceph@sha256:1b9158ce28975f95def6a0ad459fa19f1336506074267a4b47c1bd914a00fec0 Port: Host Port: Command: radosgw Args: --fsid=4837cbf8-4f90-4300-b3f6-726c9b9f89b4 --keyring=/etc/ceph/keyring-store/keyring --default-log-to-stderr=true --default-err-to-stderr=true --default-mon-cluster-log-to-stderr=true --default-log-stderr-prefix=debug --default-log-to-file=false --default-mon-cluster-log-to-file=false --mon-host=$(ROOK_CEPH_MON_HOST) --mon-initial-members=$(ROOK_CEPH_MON_INITIAL_MEMBERS) --id=rgw.ceph.a --setuser=ceph --setgroup=ceph --foreground --rgw-frontends=beast port=8080 --host=$(POD_NAME) --rgw-mime-types-file=/etc/ceph/rgw/mime.types --rgw-realm=ceph --rgw-zonegroup=ceph --rgw-zone=ceph State: Running Started: Fri, 17 Apr 2026 04:53:19 +0000 Ready: True Restart Count: 0 Readiness: exec [bash -c #!/usr/bin/env bash PROBE_TYPE="readiness" PROBE_PORT="8080" PROBE_PROTOCOL="HTTP" # standard bash codes start at 126 and progress upward. pick error codes from 125 downward for # script as to allow curl to output new error codes and still return a distinctive number. USAGE_ERR_CODE=125 PROBE_ERR_CODE=124 # curl error codes: 1-123 STARTUP_TYPE='startup' READINESS_TYPE='readiness' RGW_URL="$PROBE_PROTOCOL://0.0.0.0:$PROBE_PORT" function check() { local URL="$1" # --insecure - don't validate ssl if using secure port only # --silent - don't output progress info # --output /dev/stderr - output HTML header to stdout (good for debugging) # --write-out '%{response_code}' - print the HTTP response code to stdout curl --insecure --silent --output /dev/stderr --write-out '%{response_code}' "$URL" } http_response="$(check "$RGW_URL")" retcode=$? if [[ $retcode -ne 0 ]]; then # if this is the startup probe, always returning failure. if startup probe passes, all subsequent # probes can rely on the assumption that the health check was once succeeding without errors. # if this is the readiness probe, we know that curl was previously working correctly in the # startup probe, so curl error most likely means some new error with the RGW. echo "RGW health check failed with error code: $retcode. the RGW likely cannot be reached by clients" >/dev/stderr exit $retcode fi RGW_RATE_LIMITING_RESPONSE=503 RGW_MISCONFIGURATION_RESPONSE=500 if [[ $http_response -ge 200 ]] && [[ $http_response -lt 400 ]]; then # 200-399 are successful responses. same behavior as Kubernetes' HTTP probe exit 0 elif [[ $http_response -eq $RGW_RATE_LIMITING_RESPONSE ]]; then # S3's '503: slow down' code is not an error but an indication that RGW is throttling client # traffic. failing the readiness check here would only cause an increase in client connections on # other RGWs and likely cause those to fail also in a cascade. i.e., a special healthy response. echo "INFO: RGW is rate limiting" 2>/dev/stderr exit 0 elif [[ $http_response -eq $RGW_MISCONFIGURATION_RESPONSE ]]; then # can't specifically determine if the RGW is running or not. most likely a misconfiguration. case "$PROBE_TYPE" in "$STARTUP_TYPE") # fail until we can accurately get a valid healthy response when runtime starts. echo 'FAIL: HTTP code 500 suggests an RGW misconfiguration.' >/dev/stderr exit $PROBE_ERR_CODE ;; "$READINESS_TYPE") # config likely modified at runtime which could result in all RGWs failing this check. # occasional client failures are still better than total failure, so ignore this echo 'WARN: HTTP code 500 suggests an RGW misconfiguration' >/dev/stderr exit 0 ;; *) # prior arg validation means this path should never be activated, but keep to be safe echo "ERROR: probe type is unknown: $PROBE_TYPE" >/dev/stderr exit $USAGE_ERR_CODE ;; esac else # anything else is a failing response. same behavior as Kubernetes' HTTP probe echo "FAIL: received an HTTP error code: $http_response" exit $PROBE_ERR_CODE fi ] delay=10s timeout=5s period=10s #success=3 #failure=3 Startup: exec [bash -c #!/usr/bin/env bash PROBE_TYPE="startup" PROBE_PORT="8080" PROBE_PROTOCOL="HTTP" # standard bash codes start at 126 and progress upward. pick error codes from 125 downward for # script as to allow curl to output new error codes and still return a distinctive number. USAGE_ERR_CODE=125 PROBE_ERR_CODE=124 # curl error codes: 1-123 STARTUP_TYPE='startup' READINESS_TYPE='readiness' RGW_URL="$PROBE_PROTOCOL://0.0.0.0:$PROBE_PORT" function check() { local URL="$1" # --insecure - don't validate ssl if using secure port only # --silent - don't output progress info # --output /dev/stderr - output HTML header to stdout (good for debugging) # --write-out '%{response_code}' - print the HTTP response code to stdout curl --insecure --silent --output /dev/stderr --write-out '%{response_code}' "$URL" } http_response="$(check "$RGW_URL")" retcode=$? if [[ $retcode -ne 0 ]]; then # if this is the startup probe, always returning failure. if startup probe passes, all subsequent # probes can rely on the assumption that the health check was once succeeding without errors. # if this is the readiness probe, we know that curl was previously working correctly in the # startup probe, so curl error most likely means some new error with the RGW. echo "RGW health check failed with error code: $retcode. the RGW likely cannot be reached by clients" >/dev/stderr exit $retcode fi RGW_RATE_LIMITING_RESPONSE=503 RGW_MISCONFIGURATION_RESPONSE=500 if [[ $http_response -ge 200 ]] && [[ $http_response -lt 400 ]]; then # 200-399 are successful responses. same behavior as Kubernetes' HTTP probe exit 0 elif [[ $http_response -eq $RGW_RATE_LIMITING_RESPONSE ]]; then # S3's '503: slow down' code is not an error but an indication that RGW is throttling client # traffic. failing the readiness check here would only cause an increase in client connections on # other RGWs and likely cause those to fail also in a cascade. i.e., a special healthy response. echo "INFO: RGW is rate limiting" 2>/dev/stderr exit 0 elif [[ $http_response -eq $RGW_MISCONFIGURATION_RESPONSE ]]; then # can't specifically determine if the RGW is running or not. most likely a misconfiguration. case "$PROBE_TYPE" in "$STARTUP_TYPE") # fail until we can accurately get a valid healthy response when runtime starts. echo 'FAIL: HTTP code 500 suggests an RGW misconfiguration.' >/dev/stderr exit $PROBE_ERR_CODE ;; "$READINESS_TYPE") # config likely modified at runtime which could result in all RGWs failing this check. # occasional client failures are still better than total failure, so ignore this echo 'WARN: HTTP code 500 suggests an RGW misconfiguration' >/dev/stderr exit 0 ;; *) # prior arg validation means this path should never be activated, but keep to be safe echo "ERROR: probe type is unknown: $PROBE_TYPE" >/dev/stderr exit $USAGE_ERR_CODE ;; esac else # anything else is a failing response. same behavior as Kubernetes' HTTP probe echo "FAIL: received an HTTP error code: $http_response" exit $PROBE_ERR_CODE fi ] delay=10s timeout=5s period=10s #success=1 #failure=33 Environment: CONTAINER_IMAGE: harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7 POD_NAME: rook-ceph-rgw-ceph-a-699b8bdb59-tn664 (v1:metadata.name) POD_NAMESPACE: openstack (v1:metadata.namespace) NODE_NAME: (v1:spec.nodeName) POD_MEMORY_LIMIT: node allocatable (limits.memory) POD_MEMORY_REQUEST: 0 (requests.memory) POD_CPU_LIMIT: node allocatable (limits.cpu) POD_CPU_REQUEST: 0 (requests.cpu) CEPH_USE_RANDOM_NONCE: true ROOK_MSGR2: msgr2_false_encryption_false_compression_false ROOK_CEPH_MON_HOST: Optional: false ROOK_CEPH_MON_INITIAL_MEMBERS: Optional: false Mounts: /etc/ceph from rook-config-override (ro) /etc/ceph/keyring-store/ from rook-ceph-rgw-ceph-a-keyring (ro) /etc/ceph/rgw from rook-ceph-rgw-ceph-mime-types (ro) /run/ceph from ceph-daemons-sock-dir (rw) /var/lib/ceph/crash from rook-ceph-crash (rw) /var/lib/ceph/rgw/ceph-ceph from ceph-daemon-data (rw) /var/log/ceph from rook-ceph-log (rw) /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-6zc2m (ro) log-collector: Container ID: containerd://c1b71284094a5d3952d4521a36ee5df181c55e17df276213c06a802c24d03263 Image: harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7 Image ID: harbor.atmosphere.dev/quay.io/ceph/ceph@sha256:1b9158ce28975f95def6a0ad459fa19f1336506074267a4b47c1bd914a00fec0 Port: Host Port: Command: /bin/bash -x -e -m -c CEPH_CLIENT_ID=ceph-client.rgw.ceph.a PERIODICITY=daily LOG_ROTATE_CEPH_FILE=/etc/logrotate.d/ceph LOG_MAX_SIZE=500M ROTATE=7 # edit the logrotate file to only rotate a specific daemon log # otherwise we will logrotate log files without reloading certain daemons # this might happen when multiple daemons run on the same machine sed -i "s|*.log|$CEPH_CLIENT_ID.log|" "$LOG_ROTATE_CEPH_FILE" # replace default daily with given user input sed --in-place "s/daily/$PERIODICITY/g" "$LOG_ROTATE_CEPH_FILE" # replace rotate count, default 7 for all ceph daemons other than rbd-mirror sed --in-place "s/rotate 7/rotate $ROTATE/g" "$LOG_ROTATE_CEPH_FILE" if [ "$LOG_MAX_SIZE" != "0" ]; then # adding maxsize $LOG_MAX_SIZE at the 4th line of the logrotate config file with 4 spaces to maintain indentation sed --in-place "4i \ \ \ \ maxsize $LOG_MAX_SIZE" "$LOG_ROTATE_CEPH_FILE" fi while true; do # we don't force the logrorate but we let the logrotate binary handle the rotation based on user's input for periodicity and size logrotate --verbose "$LOG_ROTATE_CEPH_FILE" sleep 15m done State: Running Started: Fri, 17 Apr 2026 04:53:19 +0000 Ready: True Restart Count: 0 Limits: memory: 1Gi Requests: cpu: 100m memory: 100Mi Environment: Mounts: /etc/ceph from rook-config-override (ro) /run/ceph from ceph-daemons-sock-dir (rw) /var/lib/ceph/crash from rook-ceph-crash (rw) /var/log/ceph from rook-ceph-log (rw) /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-6zc2m (ro) Conditions: Type Status Initialized True Ready True ContainersReady True PodScheduled True Volumes: rook-config-override: Type: Projected (a volume that contains injected data from multiple sources) ConfigMapName: rook-config-override ConfigMapOptional: rook-ceph-rgw-ceph-a-keyring: Type: Secret (a volume populated by a Secret) SecretName: rook-ceph-rgw-ceph-a-keyring Optional: false ceph-daemons-sock-dir: Type: HostPath (bare host directory volume) Path: /var/lib/rook/exporter HostPathType: DirectoryOrCreate rook-ceph-log: Type: HostPath (bare host directory volume) Path: /var/lib/rook/openstack/log HostPathType: rook-ceph-crash: Type: HostPath (bare host directory volume) Path: /var/lib/rook/openstack/crash HostPathType: ceph-daemon-data: Type: EmptyDir (a temporary directory that shares a pod's lifetime) Medium: SizeLimit: rook-ceph-rgw-ceph-mime-types: Type: ConfigMap (a volume populated by a ConfigMap) Name: rook-ceph-rgw-ceph-mime-types Optional: false kube-api-access-6zc2m: Type: Projected (a volume that contains injected data from multiple sources) TokenExpirationSeconds: 3607 ConfigMapName: kube-root-ca.crt ConfigMapOptional: DownwardAPI: true QoS Class: Burstable Node-Selectors: Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s node.kubernetes.io/unreachable:NoExecute op=Exists for 5s Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Scheduled 24m default-scheduler Successfully assigned openstack/rook-ceph-rgw-ceph-a-699b8bdb59-tn664 to instance Normal Pulled 24m kubelet Container image "harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7" already present on machine Normal Created 24m kubelet Created container chown-container-data-dir Normal Started 24m kubelet Started container chown-container-data-dir Normal Pulled 24m kubelet Container image "harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7" already present on machine Normal Created 24m kubelet Created container rgw Normal Started 24m kubelet Started container rgw Normal Pulled 24m kubelet Container image "harbor.atmosphere.dev/quay.io/ceph/ceph:v18.2.7" already present on machine Normal Created 24m kubelet Created container log-collector Normal Started 24m kubelet Started container log-collector