Skip to content

Kamil otel #6796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 18 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions configs/gateway.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
/**
* Hive Gateway Config with a Hive Console Specific Span Processor.
*
* ```sh
* docker run --name hive-gateway --rm -p 4000:4000 \
* -v $(pwd)/configs/gateway.config.ts:/gateway/gateway.config.ts \
* -e HIVE_ORGANIZATION_ACCESS_TOKEN="<organization_access_key>"
* -e HIVE_TARGET_REF="<target_ref>"
* -e DEBUG=1
* ghcr.io/graphql-hive/gateway supergraph \
* "http://host.docker.internal:3001/artifacts/v1/<target_id>" \
* --hive-cdn-key '<cdn_key>'
* ````
*/
import { createOtlpHttpExporter, defineConfig, GatewayPlugin } from '@graphql-hive/gateway';
import type { MeshFetchRequestInit } from '@graphql-mesh/types';
import { Span, SpanProcessor } from '@opentelemetry/sdk-trace-base';

// The following plugin is used to trace the fetch calls made by Mesh.
const useOnFetchTracer = (): GatewayPlugin => {
const upstreamCallHeaders: Array<{
url: string;
headers: MeshFetchRequestInit['headers'];
}> = [];

return {
onFetch({ url, options }) {
upstreamCallHeaders.push({ url, headers: options.headers });
},
onRequest({ request, url, endResponse, fetchAPI }) {
if (url.pathname === '/upstream-fetch' && request.method === 'GET') {
endResponse(fetchAPI.Response.json(upstreamCallHeaders));
return;
}
},
onExecute({ context }) {
const span: Span | undefined = context.opentelemetry
.activeContext()
._currentContext.values()
.next().value;

if (!span) {
return;
}
return {
onExecuteDone(ctx) {
const errors = new Set<string>();
if (ctx.result?.errors) {
for (const err of ctx.result.errors) {
if (err.extensions?.code && typeof err.extensions.code === 'string') {
errors.add(err.extensions.code);
}
}
}
if (errors.size) {
span.setAttribute('hive.graphql.error.codes', Array.from(errors).join(','));
}
},
};
},
};
};

class HiveTracingSpanProcessor implements SpanProcessor {
private activeSpans: Map<string, Map<string, Span>> = new Map();
private rootSpanIds: Map<string, string> = new Map();
private subgraphNames: Map<string, Set<string>> = new Map();

onStart(span: Span): void {
const spanContext = span.spanContext();
const traceId = spanContext.traceId;
const spanId = spanContext.spanId;

// Initialize trace data structures if needed
if (!this.activeSpans.has(traceId)) {
this.activeSpans.set(traceId, new Map());
}
if (!this.subgraphNames.has(traceId)) {
this.subgraphNames.set(traceId, new Set());
}

this.activeSpans.get(traceId)!.set(spanId, span);

// If this is a root span (no parent), mark it as the root span for this trace
if (!span.parentSpanId) {
this.rootSpanIds.set(traceId, spanId);
}

// Check if this is a subgraph execution span
if (span.name && span.name.startsWith('subgraph.execute')) {
const subgraphName = span.attributes['gateway.upstream.subgraph.name'];
if (subgraphName && typeof subgraphName === 'string') {
this.subgraphNames.get(traceId)!.add(subgraphName);
}
}
}

onEnd(span: Span): void {
const spanContext = span.spanContext();
const traceId = spanContext.traceId;
const spanId = spanContext.spanId;

// Skip if we don't have this trace
if (!this.activeSpans.has(traceId)) {
return;
}

const spansForTrace = this.activeSpans.get(traceId)!;
const rootSpanId = this.rootSpanIds.get(traceId);
const subgraphNamesForTrace = this.subgraphNames.get(traceId);

// Check if this is the GraphQL execute span we're interested in
// TODO: can we have this fully type safe?
if (span.name === 'graphql.execute') {
const operationType = span.attributes['graphql.operation.type'];
const operationName = span.attributes['graphql.operation.name'];
const errorCount = span.attributes['graphql.error.count'];
const document = span.attributes['graphql.document'];

if (rootSpanId) {
const rootSpan = spansForTrace.get(rootSpanId);
if (rootSpan && !rootSpan.ended) {
// Update the name of the root span
if (operationType && document) {
rootSpan.updateName(`${operationType} ${operationName}`);

// Copy attributes to root span
rootSpan.setAttribute('hive.graphql.operation.type', operationType);
rootSpan.setAttribute('hive.graphql.operation.name', operationName ?? '');
rootSpan.setAttribute('hive.graphql.operation.document', document);

if (errorCount !== undefined)
rootSpan.setAttribute('hive.graphql.error.count', errorCount);

// Add the subgraph names as a comma-separated list
if (subgraphNamesForTrace && subgraphNamesForTrace.size > 0) {
rootSpan.setAttribute(
'hive.subgraph.names',
Array.from(subgraphNamesForTrace).join(','),
);
}
}
}
}
}

// For any subgraph span that's ending, make sure we capture its name
if (span.name && span.name.startsWith('subgraph.execute')) {
const subgraphName = span.attributes['gateway.upstream.subgraph.name'];
if (subgraphName && typeof subgraphName === 'string' && subgraphNamesForTrace) {
subgraphNamesForTrace.add(subgraphName);

// Update root span with current list of subgraph names
if (rootSpanId) {
const rootSpan = spansForTrace.get(rootSpanId);
if (rootSpan && !rootSpan.ended) {
rootSpan.setAttribute(
'hive.subgraph.names',
Array.from(subgraphNamesForTrace).join(','),
);
}
}
}
}

// Clean up the span reference
spansForTrace.delete(spanId);

// If this is the root span or if no spans remain, clean up the trace
if (rootSpanId === spanId || spansForTrace.size === 0) {
this.activeSpans.delete(traceId);
this.rootSpanIds.delete(traceId);
this.subgraphNames.delete(traceId);
if (process.env.DEBUG === '1') {
console.log('span attributes', span.attributes);
}
}
}

async forceFlush(): Promise<void> {
// Clear all processor state
this.activeSpans.clear();
this.rootSpanIds.clear();
this.subgraphNames.clear();
}

async shutdown(): Promise<void> {
// Clean up resources when shutting down
await this.forceFlush();
}
}
async function createHiveTracingSpanProcessor(): Promise<HiveTracingSpanProcessor> {
return new HiveTracingSpanProcessor();
}

export const gatewayConfig = defineConfig({
openTelemetry: {
exporters: [
createHiveTracingSpanProcessor(),
createOtlpHttpExporter(
{
url: 'http://host.docker.internal:4318/v1/traces',
headers: {
Authorization: `Bearer ${process.env.HIVE_ORGANIZATION_ACCESS_TOKEN}`,
'X-Hive-Target-Ref': process.env.HIVE_TARGET_REF,
},
},
// Batching config is set in order to make it easier to test.
{
scheduledDelayMillis: 1,
},
),
],
serviceName: 'hive-gateway',
},
plugins: () => [useOnFetchTracer()],
});
28 changes: 28 additions & 0 deletions docker/configs/otel-collector/builder-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
dist:
version: 0.122.0
name: otelcol-custom
description: Custom OTel Collector distribution
output_path: ./otelcol-custom

receivers:
- gomod: go.opentelemetry.io/collector/receiver/otlpreceiver v0.122.0

processors:
- gomod: go.opentelemetry.io/collector/processor/batchprocessor v0.122.0
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/processor/attributesprocessor
v0.122.0
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.122.0

exporters:
- gomod: go.opentelemetry.io/collector/exporter/debugexporter v0.122.0
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/exporter/clickhouseexporter v0.122.0

extensions:
- gomod:
github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension
v0.122.0
- gomod:
github.com/graphql-hive/opentelemetry-collector-contrib/extension/hiveauthextension cd0c57cf22
86 changes: 86 additions & 0 deletions docker/configs/otel-collector/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
extensions:
hiveauth:
endpoint: ${HIVE_OTEL_AUTH_ENDPOINT}
receivers:
otlp:
protocols:
grpc:
include_metadata: true
endpoint: '0.0.0.0:4317'
auth:
authenticator: hiveauth
http:
cors:
allowed_origins: ['*']
allowed_headers: ['*']
include_metadata: true
endpoint: '0.0.0.0:4318'
auth:
authenticator: hiveauth
processors:
batch:
timeout: 5s
send_batch_size: 100000
attributes:
actions:
- key: hive.target_id
from_context: auth.targetId
action: insert
filter/drop_missing_attributes:
error_mode: ignore
traces:
span:
# prettier-ignore
- not (
attributes["http.status"] != nil and
attributes["http.host"] != nil and
attributes["http.method"] != nil and
attributes["http.route"] != nil and
attributes["http.url"] != nil and
(
attributes["hive.graphql.operation.type"] == "query" or
attributes["hive.graphql.operation.type"] == "mutation" or
attributes["hive.graphql.operation.type"] == "subscription"
)
)
exporters:
# debug:
# verbosity: detailed
# sampling_initial: 5
# sampling_thereafter: 200
clickhouse:
endpoint: tcp://clickhouse:9000?dial_timeout=10s&compress=lz4
database: default
async_insert: true
username: test
password: test
create_schema: false
ttl: 720h
compress: lz4
logs_table_name: otel_logs
traces_table_name: otel_traces
metrics_table_name: otel_metrics
timeout: 5s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
service:
extensions:
- hiveauth
telemetry:
logs:
level: INFO
encoding: json
output_paths: ['stdout']
error_output_paths: ['stderr']
pipelines:
traces:
receivers: [otlp]
processors: [
attributes,
# filter/drop_missing_attributes,
batch,
]
exporters: [clickhouse]
19 changes: 19 additions & 0 deletions docker/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ services:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: registry
PGDATA: /var/lib/postgresql/data
HIVE_OTEL_AUTH_ENDPOINT: 'http://host.docker.internal:3001/otel-auth'
volumes:
- ./.hive-dev/postgresql/db:/var/lib/postgresql/data
ports:
Expand Down Expand Up @@ -176,5 +177,23 @@ services:
networks:
- 'stack'

otel-collector:
depends_on:
clickhouse:
condition: service_healthy
build:
context: ./configs/otel-collector
dockerfile: ./../../otel-collector.dockerfile
environment:
HIVE_OTEL_AUTH_ENDPOINT: 'http://host.docker.internal:3001/otel-auth'
volumes:
- ./configs/otel-collector/builder-config.yaml:/builder-config.yaml
- ./configs/otel-collector/config.yaml:/etc/otel-config.yaml
ports:
- '4317:4317'
- '4318:4318'
networks:
- 'stack'

networks:
stack: {}
Loading
Loading