prometheus 服务部署

本贴最后更新于 1609 天前,其中的信息可能已经天翻地覆

  • 主服务 prometheus 挂载了 配置文件 prometheus.yml(详细介绍在 prometheus 配置文件)、告警规则目录 ./rules、监控服务地址目录 ./targets
  • 搭配告警服务 alertmanager 挂载了 配置文件 alertmanager.yml(详细介绍在 Alertmanager)
  • 配合告警服务进行钉钉告警 dingtalk 挂载了 配置文件 ding.yml(详细介绍在钉钉告警) 、自定义告警模板 template.tmpl
  • 服务监控中转 pushgateway
  • prometheus 远程存储服务器 influxdb,用于数据持久化

docker-compose 文件

version: '3.6' services: prometheus: image: prom/prometheus container_name: prometheus environment: TZ : 'Asia/Shanghai' restart: always volumes: - /etc/localtime:/etc/localtime - ./prometheus.yml:/etc/prometheus/prometheus.yml - ./rules/alert.rules:/prometheus/alert.rules - ./rules/containerAlert.rules:/prometheus/containerAlert.rules - ./rules/jvm.rules:/prometheus/jvm.rules - ./targets/convergence_targets.json:/prometheus/convergence_targets.json - ./targets/ac_targets.json:/prometheus/ac_targets.json - ./targets/other_targets.json:/prometheus/other_targets.json - ./targets/ex_targets.json:/prometheus/ex_targets.json - ./targets/ng_grouptask_targets.json:/prometheus/ng_grouptask_targets.json ports: - 19090:9090 alertmanager: image: prom/alertmanager:latest container_name: alertmanager environment: TZ : 'Asia/Shanghai' volumes: - /etc/localtime:/etc/localtime - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml restart: always ports: - 19093:9093 prometheusdingtalk: image: timonwong/prometheus-webhook-dingtalk container_name: dingtalk environment: TZ : 'Asia/Shanghai' restart: always volumes: - /etc/localtime:/etc/localtime - ./ding.yml:/etc/prometheus-webhook-dingtalk/config.yml - ./template.tmpl:/etc/prometheus-webhook-dingtalk/templates/default.tmpl ports: - 18060:8060 prometheusgateway: image: prom/pushgateway container_name: pg environment: TZ : 'Asia/Shanghai' volumes: - /etc/localtime:/etc/localtime restart: always ports: - 9091:9091 influxdb: image: influxdb:1.8 container_name: influxdb ports: - 18086:8086 restart: always volumes: - /prometheus/influxdb/conf:/etc/influxdb - /data/influxdb/data:/var/lib/influxdb/data - /data/influxdb/meta:/var/lib/influxdb/meta - /data/influxdb/wal:/var/lib/influxdb/wale environment: - TZ=Asia/Shanghai

启动命令

docker-compose up -d

查看容器状态

docker ps -a

配置文件参考

prometheus.yml

# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - /prometheus/alert.rules - /prometheus/containerAlert.rules - /prometheus/jvm.rules # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'pushgateway' static_configs: - targets: ['pg:9091'] labels: instance: pushgateway - job_name: 'convergency_sd' scrape_interval: 3s metrics_path: '/actuator/prometheus' file_sd_configs: - refresh_interval: 1m files: ['/prometheus/convergence_targets.json'] - job_name: 'ac_sd' file_sd_configs: - refresh_interval: 1m files: ['/prometheus/ac_targets.json'] - job_name: 'other_sd' file_sd_configs: - files: ['/prometheus/other_targets.json'] - job_name: 'ex_sd' file_sd_configs: - files: ['/prometheus/ex_targets.json'] remote_write: - url: "http://influxdb:8086/api/v1/prom/write?db=prometheus&u=admin&p=admin" remote_read: - url: "http://influxdb:8086/api/v1/prom/read?db=prometheus&u=amdin&p=admin"

alertmanager.yml

global: resolve_timeout: 5m route: receiver: webhook group_wait: 10s group_interval: 10s repeat_interval: 5m routes: - receiver: webhook match_re: level: P0 - receiver: webhookJvm repeat_interval: 20m match: checkType: jvm - receiver: webhookJvmNum repeat_interval: 20m match: checkType: jvmNum - receiver: webhook repeat_interval: 20m match_re: level: P3 - receiver: webhook repeat_interval: 20m match_re: level: P2 - receiver: webhook match_re: level: P1 receivers: - name: webhook webhook_configs: - url: http://dingtalk:8060/dingtalk/webhook2/send send_resolved: true - name: webhookTest webhook_configs: - url: http://dingtalk:8060/dingtalk/webhook3/send send_resolved: false - name: webhookJvm webhook_configs: - url: http://dingtalk:8060/dingtalk/webhook5/send send_resolved: false - name: webhookJvmNum webhook_configs: - url: http://dingtalk:8060/dingtalk/webhook6/send send_resolved: false inhibit_rules: - source_match: ## 源报警规则 severity: 'critical' target_match: ## 抑制的报警规则 severity: 'warning' equal: ['checkType'] ## 需要都有相同的标签及值,否则抑制不起作用

ding.yml

## Request timeout # timeout: 5s ## Customizable templates path templates: - /etc/prometheus-webhook-dingtalk/templates/default.tmpl ## You can also override default template using `default_message` ## The following example to use the 'legacy' template from v0.3.0 # default_message: # title: '{{ template "legacy.title" . }}' # text: '{{ template "legacy.content" . }}' ## Targets, previously was known as "profiles" targets: webhook2: #芬香服务器告警群 url: https://oapi.dingtalk.com/robot/send?access_token=61e9af8466b046fc5b3bffdd5bbe2de1c3f456d5df8d8396629a5664979422c7 message: title: '{{ template "legacy.title" . }}' text: '{{ template "legacy.content" . }}' webhook4: url: https://oapi.dingtalk.com/robot/send?access_token=2c4c5d8d7c1dfc87236cfddb4919d96d9c4c5f0054959debef1cf980a41844db message: # Use legacy template title: '{{ template "legacy.title" . }}' text: '{{ template "legacy.content" . }}' webhook5: # Jvm模板 url: https://oapi.dingtalk.com/robot/send?access_token=61e9af8466b046fc5b3bffdd5bbe2de1c3f456d5df8d8396629a5664979422c7 message: title: '{{ template "jvm.title" . }}' text: '{{ template "jvm.content" . }}' webhook6: # Jvm模板换算单位 url: https://oapi.dingtalk.com/robot/send?access_token=61e9af8466b046fc5b3bffdd5bbe2de1c3f456d5df8d8396629a5664979422c7 message: title: '{{ template "jvmNum.title" . }}' text: '{{ template "jvmNum.content" . }}' webhook_legacy: url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx # Customize template content message: # Use legacy template title: '{{ template "legacy.title" . }}' text: '{{ template "legacy.content" . }}'

influx

[meta] dir = "/var/lib/influxdb/meta" [data] dir = "/var/lib/influxdb/data" engine = "tsm1" wal-dir = "/var/lib/influxdb/wal"

相关帖子

欢迎来到这里!

我们正在构建一个小众社区,大家在这里相互信任,以平等 • 自由 • 奔放的价值观进行分享交流。最终,希望大家能够找到与自己志同道合的伙伴,共同成长。

注册 关于
请输入回帖内容 ...