Skip to content

Kristjan/container #196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions examples/container/container-id.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/bin/bash
# this script is used to get the container id of the running container
# for reference, see https://github.com/DataDog/datadogpy/blob/master/datadog/dogstatsd/container.py
# include it and call get_container_id() to get the container id, or call it directly,
# e.g "export DD_CONTAINER_ID=$(./container-id.sh)"

set -u

CGROUP_PATH="/proc/self/cgroup"
# CGROUP_MOUNT_PATH="/sys/fs/cgroup" # cgroup mount path.
CGROUP_NS_PATH="/proc/self/ns/cgroup" # path to the cgroup namespace file.
# CGROUPV1_BASE_CONTROLLER="memory" # controller used to identify the container-id in cgroup v1 (memory).
# CGROUPV2_BASE_CONTROLLER="" # controller used to identify the container-id in cgroup v2.
HOST_CGROUP_NAMESPACE_INODE=4026531835 # 0xEFFFFFFB # inode of the host cgroup namespace.
MOUNTINFO_PATH="/proc/self/mountinfo" # path to the mountinfo file.

LINE_RE="^([0-9]+):([^:]*):(.+)$" # regex to parse the cgroup file.

UUID_SOURCE="[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"
CONTAINER_SOURCE="[0-9a-f]{64}"
TASK_SOURCE="[0-9a-f]{32}-[0-9]+"
CONTAINER_RE="(.+)?($UUID_SOURCE|$CONTAINER_SOURCE|$TASK_SOURCE)(\.scope)?$" # regex to match the container id.

# define method to check if the current process is in a host cgroup namespace.
is_host_cgroup_namespace()
{
# check if the cgroup namespace file exists.
if [ -f "$CGROUP_NS_PATH" ]; then

# get the inode of the cgroup namespace file.
inode=$(stat -Lc %i "$CGROUP_NS_PATH")

# check if the inode of the cgroup namespace file is the same as the inode of the host cgroup namespace.
if [ "$inode" -eq "$HOST_CGROUP_NAMESPACE_INODE" ]; then
return 0
fi
fi
return 1
}

get_cgroup_inode()
{
# get the inode of the cgroup namespace file.
inode=$(stat -Lc %i "$CGROUP_NS_PATH")
echo "$inode"
}

# define method to read the container id from the cgroup file
read_cgroup_path()
{
# find all lines that match the regex.
while IFS= read -r line; do
#echo $line
# strip leading and trailing whitespace.
line=$(echo "$line" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
if [[ "$line" =~ $LINE_RE ]]; then
# get the controller and the path.
# controller="${BASH_REMATCH[2]}"
path="${BASH_REMATCH[3]}"
#echo path $path

# split $path by / and iterate over the parts
IFS='/' read -r -a path_parts <<< "$path"
for part in "${path_parts[@]}"; do
# match the container id from path
#echo part $part
if [[ "$part" =~ $CONTAINER_RE ]]; then
echo "${BASH_REMATCH[2]}"
return 0
fi
done
fi
done < "$CGROUP_PATH"
return 1
}

#define method to read container id from the mountinfo file.
# this is needed for cgroup v2, where the cgroup file is empty.
# the lines look like this:
#647 646 0:55 /docker/47e6bf8be66c1a5206309fffa130784a157d42bb4d8bc4151646430a437d22c8 /sys/fs/cgroup/cpuset ro,nosuid,nodev,noexec,relatime master:130 - cgroup cpuset rw,cpuset
# and we want to extract the container id from the path whic is the fourth field in the file
# see https://stackoverflow.com/questions/68816329/how-to-get-docker-container-id-from-within-the-container-with-cgroup-v2
read_mountinfo()
{
# iterate over lines in mountinfo
while IFS= read -r line; do
#echo $line
# strip leading and trailing whitespace.
line=$(echo "$line" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
# split the line by space
IFS=' ' read -r -a parts <<< "$line"
# get the path from the fourth field
path="${parts[3]}"
#split the path into parts
IFS='/' read -r -a path_parts <<< "$path"
# iterate over the parts
for part in "${path_parts[@]}"; do
# match the container id from path
if [[ "$part" =~ $CONTAINER_RE ]]; then
echo "${BASH_REMATCH[2]}"
return 0
fi
done
done < "$MOUNTINFO_PATH"
}

get_container_id()
{
# statsd use either the proper container id (64 characters) or the cgroup inode number as
# an idntifier, "ci-<container-id>"" or "in-<cgroup-inode>"" respectively.
if is_host_cgroup_namespace; then
container_id=$(read_cgroup_path)
status=$?
if [ $status -ne 0 ]; then
echo "Failed reading container id from cgroup" >&2
exit $status
fi
echo "ci-$container_id"
exit 0
fi
inode=$(get_cgroup_inode)
echo "in-$inode"
exit 0
}
3 changes: 3 additions & 0 deletions include/datadog/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ enum class ConfigName : char {
SPAN_SAMPLING_RULES,
TRACE_BAGGAGE_MAX_BYTES,
TRACE_BAGGAGE_MAX_ITEMS,
ENTITY_ID,
EXTERNAL_ENV,
CONTAINER_ID,
};

// Represents metadata for configuration parameters
Expand Down
13 changes: 13 additions & 0 deletions include/datadog/datadog_agent_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,18 @@ struct DatadogAgentConfig {
// How often, in seconds, to query the Datadog Agent for remote configuration
// updates.
Optional<double> remote_configuration_poll_interval_seconds;
// External environment, used to populate headers. Overriden by the
// DD_EXTERNAL_ENV variable, usually supplied by the Datadog Admission
// Controller.
Optional<std::string> external_env;
// Container ID, used to populate trace headers. Overriden by the
// `DD_CONTAINER_ID` environment variable. This is used to populate a
// header to help the datadog agent identify the container, from an
// external origin discovery script.
// Typcially "ci-<container_id>" where container_id is the GUID container
// id of the container, or "in-<inode>" where inode is the inode of the
// containers's cgroup.
Optional<std::string> container_id;

static Expected<HTTPClient::URL> parse(StringView);
};
Expand All @@ -89,6 +101,7 @@ class FinalizedDatadogAgentConfig {
std::chrono::steady_clock::duration shutdown_timeout;
std::chrono::steady_clock::duration remote_configuration_poll_interval;
std::unordered_map<ConfigName, ConfigMetadata> metadata;
std::unordered_map<std::string, std::string> extra_headers;
};

Expected<FinalizedDatadogAgentConfig> finalize_config(
Expand Down
3 changes: 3 additions & 0 deletions include/datadog/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ namespace environment {
// preprocessor is used so that the DD_* symbols are listed exactly once.
#define LIST_ENVIRONMENT_VARIABLES(MACRO) \
MACRO(DD_AGENT_HOST) \
MACRO(DD_CONTAINER_ID) \
MACRO(DD_ENV) \
MACRO(DD_ENTITY_ID) \
MACRO(DD_EXTERNAL_ENV) \
MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \
MACRO(DD_PROPAGATION_STYLE_EXTRACT) \
MACRO(DD_PROPAGATION_STYLE_INJECT) \
Expand Down
4 changes: 4 additions & 0 deletions include/datadog/tracer_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ struct TracerConfig {
/// The maximum amount of bytes allowed to be written during tracing context
/// injection.
Optional<std::size_t> baggage_max_bytes;
// Entity ID to add as internal tag. Overridden by the `DD_ENTITY_ID`,
// usually supplied by Datadog's Admission Controller to help identify traces
// coming from kubernetes pods.
Optional<std::string> entity_id;
};

// `FinalizedTracerConfig` contains `Tracer` implementation details derived from
Expand Down
5 changes: 5 additions & 0 deletions src/datadog/datadog_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ DatadogAgent::DatadogAgent(
config.remote_configuration_poll_interval,
[this] { get_and_apply_remote_configuration_updates(); }));
}

extra_headers = config.extra_headers;
}

DatadogAgent::~DatadogAgent() {
Expand Down Expand Up @@ -298,6 +300,9 @@ void DatadogAgent::flush() {
headers.set("Datadog-Meta-Tracer-Version",
tracer_signature_.library_version);
headers.set("X-Datadog-Trace-Count", std::to_string(trace_chunks.size()));
for (const auto& [key, value] : extra_headers) {
headers.set(key, value);
}
};

// This is the callback for the HTTP response. It's invoked
Expand Down
1 change: 1 addition & 0 deletions src/datadog/datadog_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class DatadogAgent : public Collector {
HTTPClient::ErrorHandler telemetry_on_error_;
std::chrono::steady_clock::duration request_timeout_;
std::chrono::steady_clock::duration shutdown_timeout_;
std::unordered_map<std::string, std::string> extra_headers;

remote_config::Manager remote_config_;
TracerSignature tracer_signature_;
Expand Down
23 changes: 23 additions & 0 deletions src/datadog/datadog_agent_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ Expected<DatadogAgentConfig> load_datadog_agent_env_config() {
env_config.url = std::move(configured_url);
}

if (auto external_env = lookup(environment::DD_EXTERNAL_ENV)) {
env_config.external_env = std::string{*external_env};
}
if (auto container_id = lookup(environment::DD_CONTAINER_ID)) {
env_config.container_id = std::string{*container_id};
}

return env_config;
}

Expand Down Expand Up @@ -144,6 +151,22 @@ Expected<FinalizedDatadogAgentConfig> finalize_config(
result.metadata[ConfigName::AGENT_URL] =
ConfigMetadata(ConfigName::AGENT_URL, url, origin);

const auto [env_origin, env] =
pick(env_config->external_env, user_config.external_env, std::string{});
result.metadata[ConfigName::EXTERNAL_ENV] =
ConfigMetadata(ConfigName::EXTERNAL_ENV, env, env_origin);
if (!env.empty()) {
result.extra_headers["Datadog-External-Env"] = env;
}

const auto [container_id_origin, container_id] =
pick(env_config->container_id, user_config.container_id, std::string{});
result.metadata[ConfigName::CONTAINER_ID] = ConfigMetadata(
ConfigName::CONTAINER_ID, container_id, container_id_origin);
if (!container_id.empty()) {
result.extra_headers["Datadog-Container-Id"] = container_id;
}

return result;
}

Expand Down
1 change: 1 addition & 0 deletions src/datadog/tags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const std::string span_type = "span.type";
const std::string operation_name = "operation";
const std::string resource_name = "resource.name";
const std::string version = "version";
const std::string entity_id = "dd.internal.entity_id";

namespace internal {

Expand Down
1 change: 1 addition & 0 deletions src/datadog/tags.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ extern const std::string span_type;
extern const std::string operation_name;
extern const std::string resource_name;
extern const std::string version;
extern const std::string entity_id;

namespace internal {
extern const std::string propagation_error;
Expand Down
16 changes: 16 additions & 0 deletions src/datadog/tracer_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "parse_util.h"
#include "platform_util.h"
#include "string_util.h"
#include "tags.h"

namespace datadog {
namespace tracing {
Expand Down Expand Up @@ -148,6 +149,10 @@ Expected<TracerConfig> load_tracer_env_config(Logger &logger) {
env_cfg.baggage_max_bytes = std::move(*maybe_value);
}

if (auto entity_id = lookup(environment::DD_ENTITY_ID)) {
env_cfg.entity_id = std::string{*entity_id};
}

// PropagationStyle
// Print a warning if a questionable combination of environment variables is
// defined.
Expand Down Expand Up @@ -304,6 +309,17 @@ Expected<FinalizedTracerConfig> finalize_config(const TracerConfig &user_config,
final_config.metadata[ConfigName::TAGS] = ConfigMetadata(
ConfigName::TAGS, join_tags(final_config.defaults.tags), origin);

// DD_ENTITY_ID
std::string entity_id;
std::tie(origin, entity_id) =
pick(env_config->entity_id, user_config.entity_id, "");
final_config.metadata[ConfigName::ENTITY_ID] = ConfigMetadata(
ConfigName::ENTITY_ID, entity_id, origin);
if (!entity_id.empty()) {
final_config.defaults.tags.insert_or_assign(
tags::entity_id, entity_id);
}

// Extraction Styles
const std::vector<PropagationStyle> default_propagation_styles{
PropagationStyle::DATADOG, PropagationStyle::W3C,
Expand Down
65 changes: 65 additions & 0 deletions test/test_datadog_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
#include <chrono>
#include <iostream>

#include "common/environment.h"
#include "mocks/event_schedulers.h"
#include "mocks/http_clients.h"
#include "mocks/loggers.h"
#include "test.h"

using namespace datadog::tracing;
using namespace datadog::test;
using namespace std::chrono_literals;

TEST_CASE("CollectorResponse", "[datadog_agent]") {
Expand Down Expand Up @@ -170,6 +172,69 @@ TEST_CASE("CollectorResponse", "[datadog_agent]") {
}
}

TEST_CASE("RequestHeaders", "[datadog_agent]") {
TracerConfig config;
config.service = "testsvc";
const auto logger =
std::make_shared<MockLogger>(std::cerr, MockLogger::ERRORS_ONLY);
const auto event_scheduler = std::make_shared<MockEventScheduler>();
const auto http_client = std::make_shared<MockHTTPClient>();
config.logger = logger;
config.agent.event_scheduler = event_scheduler;
config.agent.http_client = http_client;
// Tests currently only cover sending traces to the agent.
// Submiting telemetry performs essentially the same steps, but may be added
// in the future.
config.telemetry.enabled = false;
config.agent.external_env = "extenv1";
config.agent.container_id = "container1";
auto finalized = finalize_config(config);
REQUIRE(finalized);

SECTION("external_env") {
{
Tracer tracer{*finalized};
auto span = tracer.create_span();
(void)span;
}
REQUIRE(
http_client->request_headers.items["Datadog-Entity-ID"] ==
"extenv1");
}
SECTION("DD_EXTERNAL_ENV") {
{
const EnvGuard guard{"DD_EXTERNAL_ENV", "extenv2"};
auto finalized2 = finalize_config(config);
Tracer tracer{*finalized2};
auto span = tracer.create_span();
(void)span;
}
REQUIRE(http_client->request_headers.items["Datadog-Entity-ID"] ==
"extenv2");
}

SECTION("container_id") {
{
Tracer tracer{*finalized};
auto span = tracer.create_span();
(void)span;
}
REQUIRE(http_client->request_headers.items["Datadog-Container-Id"] ==
"container1");
}
SECTION("DD_CONTAINER_ID") {
{
const EnvGuard guard{"DD_CONTAINER_ID", "container2"};
auto finalized2 = finalize_config(config);
Tracer tracer{*finalized2};
auto span = tracer.create_span();
(void)span;
}
REQUIRE(http_client->request_headers.items["Datadog-Container-Id"] ==
"container2");
}
}

// NOTE: `report_telemetry` is too vague for now.
// Does it mean no telemetry at all or just metrics are not generated?
//
Expand Down
Loading