Prometheus告警媒介 钉钉

创建钉钉群

null

null

null

null

添加钉钉群机器人

null

null

null

null

null

null

null

null

secret: SECc95134129e043e4be06df4d5aa2afdef066a6d361ac73da97bc7220618cfa9da

null

null

Webhook: https://oapi.dingtalk.com/robot/send?access_token=e7f8aac8fc2705064b28ae1b6d1a6d0dfc53974e0dc98423384b637c9ebe4498

null

安装钉钉告警插件

null

null

null

[root@alertmanager ~]# wget https://github.com/timonwong/prometheus-webhook-dingtalk/releases/download/v2.1.0/prometheus-webhook-dingtalk-2.1.0.linux-amd64.tar.gz
[root@alertmanager ~]# ls
prometheus-webhook-dingtalk-2.1.0.linux-amd64.tar.gz
[root@alertmanager ~]# tar xf prometheus-webhook-dingtalk-2.1.0.linux-amd64.tar.gz
[root@alertmanager ~]# ls
prometheus-webhook-dingtalk-2.1.0.linux-amd64
[root@alertmanager ~]# mv prometheus-webhook-dingtalk-2.1.0.linux-amd64 /usr/local/src/prometheus-webhook-dingtalk
[root@alertmanager ~]# ls /usr/local/src/
alertmanager  prometheus-webhook-dingtalk
[root@alertmanager ~]# mv /usr/local/src/prometheus-webhook-dingtalk/config.example.yml /usr/local/src/prometheus-webhook-dingtalk/config.yml
[root@alertmanager ~]# ls /usr/local/src/prometheus-webhook-dingtalk/
config.yml  contrib  LICENSE  prometheus-webhook-dingtalk
注册为系统服务
[root@alertmanager prometheus-webhook-dingtalk]# vim /usr/lib/systemd/system/prometheus-webhook-dingtalk.service
[root@alertmanager prometheus-webhook-dingtalk]# cat > /usr/lib/systemd/system/prometheus-webhook-dingtalk.service << EOF
[Service]
ExecStart=/usr/local/src/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk --config.file=/usr/local/src/prometheus-webhook-dingtalk/config.yml

[Install]
WantedBy=multi-user.target

[Unit]
Description=prometheus-webhook-dingtalk
After=network.target
EOF
重载/开机自启/查看状态/启动
systemctl daemon-reload
systemctl enable prometheus-webhook-dingtalk
systemctl start prometheus-webhook-dingtalk
systemctl status prometheus-webhook-dingtalk

配置钉钉告警插件与钉钉机器人集成

[root@alertmanager ~]# cd /usr/local/src/prometheus-webhook-dingtalk/
[root@alertmanager prometheus-webhook-dingtalk]# pwd
/usr/local/src/prometheus-webhook-dingtalk
[root@alertmanager prometheus-webhook-dingtalk]# ls
config.yml  contrib  LICENSE  prometheus-webhook-dingtalk
[root@alertmanager prometheus-webhook-dingtalk]# vim config.yml
[root@alertmanager prometheus-webhook-dingtalk]# cat config.yml
## Request timeout
# timeout: 5s

## Uncomment following line in order to write template from scratch (be careful!)
#no_builtin_template: true

## Customizable templates path
#templates:
#  - contrib/templates/legacy/template.tmpl
templates:
  - /usr/local/src/alertmanager/dingtalk.tmpl

## You can also override default template using `default_message`
## The following example to use the 'legacy' template from v0.3.0
#default_message:
#  title: '{{ template "legacy.title" . }}'
#  text: '{{ template "legacy.content" . }}'

## Targets, previously was known as "profiles"
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=e7f8aac8fc2705064b28ae1b6d1a6d0dfc53974e0dc98423384b637c9ebe4498
    # secret for signature
    secret: SECc95134129e043e4be06df4d5aa2afdef066a6d361ac73da97bc7220618cfa9da
    # Customize template content
    message:
      # Use legacy template
      title: '{{ template "ops.title" . }}'
      text: '{{ template "ops.content" . }}'
说明:
## Request timeout
# timeout: 5s

## Uncomment following line in order to write template from scratch (be careful!)
#no_builtin_template: true

## Customizable templates path
#templates:
#  - contrib/templates/legacy/template.tmpl
templates:
  - /usr/local/src/alertmanager/dingtalk.tmpl 自定义告警模板文件及位置

## You can also override default template using `default_message`
## The following example to use the 'legacy' template from v0.3.0
#default_message:
#  title: '{{ template "legacy.title" . }}'
#  text: '{{ template "legacy.content" . }}'

## Targets, previously was known as "profiles"
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=e7f8aac8fc2705064b28ae1b6d1a6d0dfc53974e0dc98423384b637c9ebe4498 配置钉钉机器人webhook_url
    # secret for signature
    secret: SECc95134129e043e4be06df4d5aa2afdef066a6d361ac73da97bc7220618cfa9da 配置加签SECRET
    # Customize template content
    message:
      # Use legacy template
      title: '{{ template "ops.title" . }}' 添加模板标题,在下面的模板文件title位置
      text: '{{ template "ops.content" . }}' 添加模板内容,在下面的模板文件content位置

为alertmanager配置告警模板文件

[root@alertmanager ~]# vim /usr/local/src/alertmanager/dingtalk.tmpl
[root@alertmanager ~]# cat /usr/local/src/alertmanager/dingtalk.tmpl
{{ define "__subject" }}
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}]
{{ end }}


{{ define "__alert_list" }}{{ range . }}
---
    **zqf 告警类型**: {{ .Labels.alertname }}
    **zqf 告警级别**: {{ .Labels.level }}
    **zqf 故障主机**: {{ .Labels.instance }}
    **zqf 告警信息**: {{ .Annotations.description }}
    **zqf 触发时间**: {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
{{ end }}{{ end }}

{{ define "__resolved_list" }}{{ range . }}
---
    **zqf 告警类型**: {{ .Labels.alertname }}
    **zqf 告警级别**: {{ .Labels.level }}
    **zqf 故障主机**: {{ .Labels.instance }}
    **zqf 触发时间**: {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
    **zqf 恢复时间**: {{ (.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
{{ end }}{{ end }}


{{ define "ops.title" }}
{{ template "__subject" . }}
{{ end }}

{{ define "ops.content" }}
{{ if gt (len .Alerts.Firing) 0 }}
**====zqf  侦测到{{ .Alerts.Firing | len  }}个故障====**
{{ template "__alert_list" .Alerts.Firing }}
---
{{ end }}

{{ if gt (len .Alerts.Resolved) 0 }}
**====zqf  恢复{{ .Alerts.Resolved | len  }}个故障====**
{{ template "__resolved_list" .Alerts.Resolved }}
{{ end }}
{{ end }}

{{ define "ops.link.title" }}{{ template "ops.title" . }}{{ end }}
{{ define "ops.link.content" }}{{ template "ops.content" . }}{{ end }}
{{ template "ops.title" . }}
{{ template "ops.content" . }}

修改Alertmanager配置文件添加钉钉告警渠道

[root@alertmanager ~]# ss -anput | grep ":8060"
tcp    LISTEN     0      4096   [::]:8060               [::]:*                   users:(("prometheus-webh",pid=9313,fd=3))
[root@alertmanager ~]# vim /usr/local/src/alertmanager/alertmanager.yml
[root@alertmanager ~]# cat /usr/local/src/alertmanager/alertmanager.yml
global:
  resolve_timeout: 3m

templates:
  - '/usr/local/src/alertmanager/dingtalk.tmpl'

route:
  group_by: ['env','instance','type','group','job','alertname']
  group_wait: 30s
  group_interval: 5m
  repeat_interval: 1h
  receiver: dingtalk_webhook1
  routes:
    - receiver: dingtalk_webhook1
      group_wait: 30s
      match_re:
        severity: "critical|warning"
receivers:
- name: 'dingtalk_webhook1'
  webhook_configs:
  - url: 'http://192.168.10.173:8060/dingtalk/webhook1/send'
    send_resolved: true





说明如下:
global:
  resolve_timeout: 3m

templates:
  - '/usr/local/src/alertmanager/dingtalk.tmpl' 告警模板位置

route:
  group_by: ['env','instance','type','group','job','alertname'] 根据告警规则组名进行分组
  group_wait: 30s 分组内第一个告警等待时间,30s内如有第二个告警会合并一个告警
  group_interval: 5m 发送新告警间隔时间
  repeat_interval: 1h 重复告警间隔发送时间,如果没处理过1h再次发送
  receiver: dingtalk_webhook 告警接收人

receivers:
- name: 'dingtalk_webhook' 告警接收人
  webhook_configs:
  - url: 'http://192.168.10.173:8060/dingtalk/webhook1/send' 访问webhook1 url
    send_resolved: true 在恢复后是否发送恢复消息给接收人
邮件告警的配置文件:
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.126.com:25'
  smtp_from: 'nextgo@126.com'
  smtp_auth_username: 'nextgo@126.com'
  smtp_auth_password: 'RXGFEHFQCLXAMFTP'
  smtp_require_tls: false
route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 5m
  repeat_interval: 1m
  receiver: 'mail'
receivers:
- name: 'mail'
  email_configs:
  - to: 'nextgo@126.com'

关于alertmanager.yaml文件的解释如下:
global: #全局设置,配置解决告警时间间隔和邮件发送服务
  resolve_timeout: 5m # 定义持续多长时间未接收到告警标记后,就将告警状态标记为resolved
  smtp_smarthost: 'smtp.126.com:25' # 邮件服务器
  smtp_from: 'nextgo@126.com' # 告警发送邮箱
  smtp_auth_username: 'nextgo@126.com' # 邮箱名
  smtp_auth_password: 'RXGFEHFQCLXAMFTP' # 邮箱认证使用授权码
  smtp_require_tls: false # 是否启动tls
route: # 路由树,每个告警都会在配置的顶级路由中进入路由树,路由树匹配所有报警规则
  group_by: ['alertname'] # 告警过滤中分组标签
  group_wait: 10s # 分组等待的时间
  group_interval: 5m # 上下两组发送告警的间隔时间
  repeat_interval: 1m # 重复发送告警时间,默认为1h,现修改为1分钟
  receiver: 'mail' # 指定告警媒介类型
receivers: # 告警接收器,这里配置接收邮箱地址。
- name: 'mail' # 告警来源自定义名称
  email_configs:
  - to: 'nextgo@126.com' # 指定接收端email

重启prometheus-webhook-dingtalk及alertmanager

[root@alertmanager ~]# systemctl restart prometheus-webhook-dingtalk
[root@alertmanager ~]# systemctl restart alertmanager

或使用curl -lv -X POST http://localhost:9093/-/reload进行配置文件重新加载

告警测试

[root@prometheus-server ~]# df -h
文件系统                 容量  已用  可用 已用% 挂载点
devtmpfs                 1.9G     0  1.9G    0% /dev
tmpfs                    2.0G     0  2.0G    0% /dev/shm
tmpfs                    2.0G  9.8M  1.9G    1% /run
tmpfs                    2.0G     0  2.0G    0% /sys/fs/cgroup
/dev/mapper/centos-root   50G  7.5G   43G   15% /
/dev/sda1               1014M  293M  722M   29% /boot
/dev/mapper/centos-home  969G   33M  969G    1% /home
tmpfs                    391M   12K  391M    1% /run/user/42
tmpfs                    391M     0  391M    0% /run/user/0


[root@prometheus-server ~]# dd if=/dev/zero of=/test1 bs=1M count=10000
记录了10000+0 的读入
记录了10000+0 的写出
10485760000字节(10 GB)已复制,14.1928 秒,739 MB/秒


[root@prometheus-server ~]# df -h
文件系统                 容量  已用  可用 已用% 挂载点
devtmpfs                 1.9G     0  1.9G    0% /dev
tmpfs                    2.0G     0  2.0G    0% /dev/shm
tmpfs                    2.0G  9.8M  1.9G    1% /run
tmpfs                    2.0G     0  2.0G    0% /sys/fs/cgroup
/dev/mapper/centos-root   50G   18G   33G   35% /
/dev/sda1               1014M  293M  722M   29% /boot
/dev/mapper/centos-home  969G   33M  969G    1% /home
tmpfs                    391M   12K  391M    1% /run/user/42
tmpfs                    391M     0  391M    0% /run/user/0

null

null

null

null

null

results matching ""

    No results matching ""