Skip to content
7 changes: 7 additions & 0 deletions .changeset/brave-meals-judge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/api": patch
"@hyperdx/app": patch
---

feat: add support for alert auto-resolve
7 changes: 7 additions & 0 deletions .changeset/friendly-apricots-sell.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@hyperdx/common-utils": patch
"@hyperdx/api": patch
"@hyperdx/app": patch
---

feat: support incident.io integration
1 change: 1 addition & 0 deletions packages/api/src/controllers/alerts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import logger from '@/utils/logger';
import { alertSchema } from '@/utils/zod';

export type AlertInput = {
id?: string;
source?: AlertSource;
channel: AlertChannel;
interval: AlertInterval;
Expand Down
6 changes: 2 additions & 4 deletions packages/api/src/models/webhook.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import { WebhookService } from '@hyperdx/common-utils/dist/types';
import { ObjectId } from 'mongodb';
import mongoose, { Schema } from 'mongoose';

export enum WebhookService {
Slack = 'slack',
Generic = 'generic',
}
export { WebhookService };

interface MongooseMap extends Map<string, string> {
// https://mongoosejs.com/docs/api/map.html#MongooseMap.prototype.toJSON()
Expand Down
5 changes: 5 additions & 0 deletions packages/api/src/tasks/__tests__/checkAlerts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ describe('checkAlerts', () => {
alertProvider,
clickhouseClient: {} as any,
metadata: {} as any,
state: 'ALERT',
template: 'Custom body @webhook-My_Web', // partial name should work
view: {
...defaultSearchView,
Expand Down Expand Up @@ -350,6 +351,7 @@ describe('checkAlerts', () => {
alertProvider,
clickhouseClient: {} as any,
metadata: {} as any,
state: 'ALERT',
template: 'Custom body @webhook-My_Web', // partial name should work
view: {
...defaultSearchView,
Expand Down Expand Up @@ -410,6 +412,7 @@ describe('checkAlerts', () => {
alertProvider,
clickhouseClient: {} as any,
metadata: {} as any,
state: 'ALERT',
template: 'Custom body @webhook-{{attributes.webhookName}}', // partial name should work
view: {
...defaultSearchView,
Expand Down Expand Up @@ -481,6 +484,7 @@ describe('checkAlerts', () => {
alertProvider,
clickhouseClient: {} as any,
metadata: {} as any,
state: 'ALERT',
template: `
{{#is_match "attributes.k8s.pod.name" "otel-collector-123"}}
Runbook URL: {{attributes.runbook.url}}
Expand Down Expand Up @@ -518,6 +522,7 @@ describe('checkAlerts', () => {
alertProvider,
clickhouseClient: {} as any,
metadata: {} as any,
state: 'ALERT',
template:
'{{#is_match "attributes.host" "web"}} @webhook-My_Web {{/is_match}}', // partial name should work
view: {
Expand Down
56 changes: 54 additions & 2 deletions packages/api/src/tasks/checkAlerts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ const fireChannelEvent = async ({
savedSearch,
source,
startTime,
state,
totalCount,
windowSizeInMins,
teamWebhooksById,
Expand All @@ -82,6 +83,7 @@ const fireChannelEvent = async ({
savedSearch?: ISavedSearch | null;
source?: ISource | null;
startTime: Date;
state: AlertState;
totalCount: number;
windowSizeInMins: number;
teamWebhooksById: Map<string, IWebhook>;
Expand All @@ -105,6 +107,7 @@ const fireChannelEvent = async ({
const attributesNested = unflattenObject(attributes);
const templateView: AlertMessageTemplateDefaultView = {
alert: {
id: alert.id,
channel: alert.channel,
dashboardId: dashboard?.id,
groupBy: alert.groupBy,
Expand Down Expand Up @@ -133,6 +136,7 @@ const fireChannelEvent = async ({
alertProvider,
clickhouseClient,
metadata,
state,
title: buildAlertMessageTemplateTitle({
template: alert.name,
view: templateView,
Expand Down Expand Up @@ -350,6 +354,7 @@ export const processAlert = async (
savedSearch: (details as any).savedSearch,
source,
startTime: bucketStart,
state: AlertState.ALERT,
totalCount: _value,
windowSizeInMins,
teamWebhooksById,
Expand All @@ -368,6 +373,47 @@ export const processAlert = async (
}
}

// Check if alert transitioned from ALERT to OK (resolved)
if (
previous?.state === AlertState.ALERT &&
history.state === AlertState.OK
) {
logger.info({
message: `Alert resolved, triggering ${alert.channel.type} notification`,
alertId: alert.id,
});

try {
const lastValue = history.lastValues[history.lastValues.length - 1];
await fireChannelEvent({
alert,
alertProvider,
attributes: {}, // FIXME: support attributes (logs + resources ?)
clickhouseClient,
dashboard: (details as any).dashboard,
endTime: fns.addMinutes(
lastValue?.startTime || nowInMinsRoundDown,
windowSizeInMins,
),
group: '',
metadata,
savedSearch: (details as any).savedSearch,
source,
startTime: lastValue?.startTime || nowInMinsRoundDown,
state: AlertState.OK,
totalCount: lastValue?.count || 0,
windowSizeInMins,
teamWebhooksById,
});
} catch (e) {
logger.error({
message: 'Failed to fire resolved channel event',
alertId: alert.id,
error: serializeError(e),
});
}
}

await alertProvider.updateAlertState(history);
} catch (e) {
// Uncomment this for better error messages locally
Expand All @@ -386,6 +432,7 @@ export { handleSendGenericWebhook };
export interface AggregatedAlertHistory {
_id: ObjectId;
createdAt: Date;
state: AlertState;
}

/**
Expand Down Expand Up @@ -416,11 +463,16 @@ export const getPreviousAlertHistories = async (
createdAt: { $lte: now },
},
},
// Group by alert ID, taking the latest createdAt value for each group
// Sort by createdAt descending to get the latest first
{
$sort: { createdAt: -1 },
},
// Group by alert ID, taking the first (latest) values for each group
{
$group: {
_id: '$alert',
createdAt: { $max: '$createdAt' },
createdAt: { $first: '$createdAt' },
state: { $first: '$state' },
},
},
]),
Expand Down
38 changes: 36 additions & 2 deletions packages/api/src/tasks/template.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ import {
WebhookService,
zAlertChannelType,
} from '@hyperdx/common-utils/dist/types';
import { _useTry, formatDate } from '@hyperdx/common-utils/dist/utils';
import {
_useTry,
formatDate,
objectHash,
} from '@hyperdx/common-utils/dist/utils';
import { isValidSlackUrl } from '@hyperdx/common-utils/dist/validation';
import Handlebars, { HelperOptions } from 'handlebars';
import _ from 'lodash';
Expand Down Expand Up @@ -59,6 +63,10 @@ interface Message {
hdxLink: string;
title: string;
body: string;
state: string;
startTime: number;
endTime: number;
eventId: string;
}

export const notifyChannel = async ({
Expand All @@ -73,7 +81,10 @@ export const notifyChannel = async ({
const webhook = channel.channel;
if (webhook.service === WebhookService.Slack) {
await handleSendSlackWebhook(webhook, message);
} else if (webhook.service === 'generic') {
} else if (
webhook.service === WebhookService.Generic ||
webhook.service === WebhookService.IncidentIO
) {
await handleSendGenericWebhook(webhook, message);
}
break;
Expand Down Expand Up @@ -196,11 +207,18 @@ export const handleSendGenericWebhook = async (
let body = '';
try {
const handlebars = Handlebars.create();
// Register eq helper for conditional checks
handlebars.registerHelper('eq', (a, b) => a === b);

body = handlebars.compile(webhook.body, {
noEscape: true,
})({
body: escapeJsonString(message.body),
endTime: message.endTime,
eventId: message.eventId,
link: escapeJsonString(message.hdxLink),
startTime: message.startTime,
state: message.state,
title: escapeJsonString(message.title),
});
} catch (e) {
Expand Down Expand Up @@ -377,6 +395,7 @@ export const renderAlertTemplate = async ({
alertProvider,
clickhouseClient,
metadata,
state,
template,
title,
view,
Expand All @@ -385,6 +404,7 @@ export const renderAlertTemplate = async ({
alertProvider: AlertProvider;
clickhouseClient: ClickhouseClient;
metadata: Metadata;
state: string;
template?: string | null;
title: string;
view: AlertMessageTemplateDefaultView;
Expand Down Expand Up @@ -451,12 +471,26 @@ export const renderAlertTemplate = async ({
);

if (channel) {
const startTime = view.startTime.getTime();
const endTime = view.endTime.getTime();
const eventId = objectHash({
alertId: alert.id,
channel: {
type: channel.type,
id: channel.channel._id.toString(),
},
});

await notifyChannel({
channel,
message: {
hdxLink: buildAlertMessageTemplateHdxLink(alertProvider, view),
title,
body: renderedBody,
state,
startTime,
endTime,
eventId,
},
});
}
Expand Down
36 changes: 31 additions & 5 deletions packages/app/src/TeamPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,15 @@ import { useSources } from './source';
import { useConfirm } from './useConfirm';
import { capitalizeFirstLetter } from './utils';

const DEFAULT_GENERIC_WEBHOOK_BODY = ['{{title}}', '{{body}}', '{{link}}'];
const DEFAULT_GENERIC_WEBHOOK_BODY = [
'{{title}}',
'{{body}}',
'{{link}}',
'{{state}}',
'{{startTime}}',
'{{endTime}}',
'{{eventId}}',
];
const DEFAULT_GENERIC_WEBHOOK_BODY_TEMPLATE =
DEFAULT_GENERIC_WEBHOOK_BODY.join(' | ');

Expand Down Expand Up @@ -733,15 +741,27 @@ export function CreateWebhookForm({
}
}

let defaultBody = body;
if (!body) {
if (service === WebhookService.Generic) {
defaultBody = `{"text": "${DEFAULT_GENERIC_WEBHOOK_BODY_TEMPLATE}"}`;
} else if (service === WebhookService.IncidentIO) {
defaultBody = `{
"title": "{{title}}",
"description": "{{body}}",
"deduplication_key": "{{eventId}}",
"status": "{{#if (eq state "ALERT")}}firing{{else}}resolved{{/if}}",
"source_url": "{{link}}"
}`;
}
}

const response = await saveWebhook.mutateAsync({
service,
name,
url,
description: description || '',
body:
service === WebhookService.Generic && !body
? `{"text": "${DEFAULT_GENERIC_WEBHOOK_BODY_TEMPLATE}"}`
: body,
body: defaultBody,
headers: parsedHeaders,
});
notifications.show({
Expand Down Expand Up @@ -815,6 +835,11 @@ export function CreateWebhookForm({
label="Generic"
{...form.register('service', { required: true })}
/>
<Radio
value={WebhookService.IncidentIO}
label="Incident.io"
{...form.register('service', { required: true })}
/>
</Group>
</Radio.Group>
<TextInput
Expand Down Expand Up @@ -976,6 +1001,7 @@ function IntegrationsSection() {
const { data: webhookData, refetch: refetchWebhooks } = api.useWebhooks([
WebhookService.Slack,
WebhookService.Generic,
WebhookService.IncidentIO,
]);

const allWebhooks = useMemo(() => {
Expand Down
10 changes: 7 additions & 3 deletions packages/app/src/components/Alerts.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ import { useMemo } from 'react';
import { Control, useController } from 'react-hook-form';
import { Select, SelectProps } from 'react-hook-form-mantine';
import { Label, ReferenceArea, ReferenceLine } from 'recharts';
import type { AlertChannelType } from '@hyperdx/common-utils/dist/types';
import {
type AlertChannelType,
WebhookService,
} from '@hyperdx/common-utils/dist/types';
import { Button, ComboboxData, Group, Modal } from '@mantine/core';
import { useDisclosure } from '@mantine/hooks';

Expand All @@ -19,8 +22,9 @@ const WebhookChannelForm = <T extends object>(
props: Partial<SelectProps<T>>,
) => {
const { data: webhooks, refetch: refetchWebhooks } = api.useWebhooks([
'slack',
'generic',
WebhookService.Slack,
WebhookService.Generic,
WebhookService.IncidentIO,
]);
const [opened, { open, close }] = useDisclosure(false);

Expand Down
1 change: 1 addition & 0 deletions packages/common-utils/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ export type StacktraceBreadcrumb = {
export enum WebhookService {
Slack = 'slack',
Generic = 'generic',
IncidentIO = 'incidentio',
}

// -------------------------
Expand Down
2 changes: 2 additions & 0 deletions packages/common-utils/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { add as fnsAdd, format as fnsFormat } from 'date-fns';
import { formatInTimeZone } from 'date-fns-tz';
import { z } from 'zod';

export { default as objectHash } from 'object-hash';

import {
ChartConfigWithDateRange,
ChartConfigWithOptTimestamp,
Expand Down
Loading