Различия

Показаны различия между двумя версиями страницы.

--- linux:metrics [2021/07/20 05:31]
admin
+++ linux:metrics [2023/11/05 06:05] (текущий)
admin [Мониторинг ZFS]
@@ Строка 1: / Строка 1: @@
-====== Сбор метрик и хранение в TSDB ======
+====== Сбор метрик и хранение в InfluxDB ======
 ===== telegraf =====
 Агент сервера с открытым исходным кодом, который поможет вам собирать метрики из ваших стеков, датчиков и систем.\\
+==== Установка ====
+Скачиваем дистр и ставим **dpkg -i**
+<code bash>wget https://repos.influxdata.com/debian/pool/stable/t/telegraf/telegraf_1.19.1-1_amd64.deb # Debian
+wget https://repos.influxdata.com/centos/9/amd64/stable/telegraf-1.22.4-1.x86_64.rpm # CentOS
+   # Либо есть в тех же репозиториях что и influxdb
+cat <<EOF | sudo tee /etc/yum.repos.d/influxdata.repo
+[influxdata]
+name = InfluxData Repository - Stable
+baseurl = https://repos.influxdata.com/stable/\$basearch/main
+enabled = 1
+gpgcheck = 1
+gpgkey = https://repos.influxdata.com/influxdata-archive_compat.key
+EOF
+dnf install telegraf
+</code>
+==== Настройка ====
+Конфиг находится в **/etc/telegraf/telegraf.config**
+Командой можно генерировать конфигурацию:
+<code bash># telegraf -sample-config > telegraf.conf</code>
+В данном случае будет дефолтная конфа, со всеми доступными метриками (плагинами)\\
+Можно ограничивать аргументами:
+<code bash># telegraf -sample-config --input-filter <плагины сбора метрик через ":"> --output-filter <плагины передачи данных с метрик через ":"> > telegraf.conf</code>
+на странице https://docs.influxdata.com/telegraf/v1.19/plugins/ можно найти список всех доступных плагинов.
+Пример:
+<code bash># telegraf -sample-config --input-filter disk:diskio:hddtemp --output-filter influxdb > telegraf.conf</code>
+Проверка конфигурации:
+<code bash># telegraf --test && systemctl restart telegraf</code>
+Далее проверяем наличие данных в БД:
+<code bash>
+influx
+> show databases
+> use telegraf
+> show measurements
+> SELECT * FROM diskio ORDER BY time DESC LIMIT 15
+# и т.д.
+</code>
+<details>
+<summary> :!: Пример конфига </summary>
+telegraf -test - проверка конфигурации
+<code bash>
+[global_tags]
+[agent]
+  interval = "60s"
+  round_interval = true
+  metric_buffer_limit = 1000
+  flush_buffer_when_full = true
+  collection_jitter = "0s"
+  flush_interval = "10s"
+  flush_jitter = "0s"
+  ## Logging configuration:
+  debug = false
+  quiet = false
+  logfile = "C:/Program Files/Telegraf/telegraf.log"
+  hostname = ""
+###############################################################################
+#                                  OUTPUTS                                    #
+###############################################################################
+[[outputs.influxdb]]
+  urls = ["http://:8086"] # required
+  database = "telegraf" # required
+  precision = "s"
+  retention_policy = ""
+  timeout = "10s"
+  username = ""
+  password = ""
+###############################################################################
+#                                  INPUTS                                     #
+###############################################################################
+[[inputs.win_perf_counters]]
+  [[inputs.win_perf_counters.object]]
+    # Processor usage, alternative to native, reports on a per core.
+    ObjectName = "Processor"
+    Instances = ["*"]
+    Counters = [
+      "% Idle Time",
+      "% Interrupt Time",
+      "% Privileged Time",
+      "% User Time",
+      "% Processor Time",
+      "% DPC Time",
+    ]
+    Measurement = "win_cpu"
+    # Set to true to include _Total instance when querying for all (*).
+    IncludeTotal=true
+  [[inputs.win_perf_counters.object]]
+    # Disk times and queues
+    ObjectName = "LogicalDisk"
+    Instances = ["*"]
+    Counters = [
+      "% Idle Time",
+      "% Disk Time",
+      "% Disk Read Time",
+      "% Disk Write Time",
+      "Current Disk Queue Length",
+      "% Free Space",
+      "Free Megabytes",
+    ]
+    Measurement = "win_disk"
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false
+  [[inputs.win_perf_counters.object]]
+    ObjectName = "PhysicalDisk"
+    Instances = ["*"]
+    Counters = [
+      "Disk Read Bytes/sec",
+      "Disk Write Bytes/sec",
+      "Current Disk Queue Length",
+      "Disk Reads/sec",
+      "Disk Writes/sec",
+      "% Disk Time",
+      "% Disk Read Time",
+      "% Disk Write Time",
+    ]
+    Measurement = "win_diskio"
+  [[inputs.win_perf_counters.object]]
+    ObjectName = "Network Interface"
+    Instances = ["*"]
+    Counters = [
+      "Bytes Received/sec",
+      "Bytes Sent/sec",
+      "Packets Received/sec",
+      "Packets Sent/sec",
+      "Packets Received Discarded",
+      "Packets Outbound Discarded",
+      "Packets Received Errors",
+      "Packets Outbound Errors",
+    ]
+    Measurement = "win_net"
+  [[inputs.win_perf_counters.object]]
+    ObjectName = "System"
+    Counters = [
+      "Context Switches/sec",
+      "System Calls/sec",
+      "Processor Queue Length",
+      "System Up Time",
+    ]
+    Instances = ["------"]
+    Measurement = "win_system"
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false
+  [[inputs.win_perf_counters.object]]
+    # Example query where the Instance portion must be removed to get data back,
+    # such as from the Memory object.
+    ObjectName = "Memory"
+    Counters = [
+      "Available Bytes",
+      "Cache Faults/sec",
+      "Demand Zero Faults/sec",
+      "Page Faults/sec",
+      "Pages/sec",
+      "Transition Faults/sec",
+      "Pool Nonpaged Bytes",
+      "Pool Paged Bytes",
+      "Standby Cache Reserve Bytes",
+      "Standby Cache Normal Priority Bytes",
+      "Standby Cache Core Bytes",
+    ]
+    # Use 6 x - to remove the Instance bit from the query.
+    Instances = ["------"]
+    Measurement = "win_mem"
+    # Set to true to include _Total instance when querying for all (*).
+    #IncludeTotal=false
+  [[inputs.win_perf_counters.object]]
+    # Example query where the Instance portion must be removed to get data back,
+    # such as from the Paging File object.
+    ObjectName = "Paging File"
+    Counters = [
+      "% Usage",
+    ]
+    Instances = ["_Total"]
+    Measurement = "win_swap"
+  [[inputs.win_perf_counters.object]]
+    ObjectName = "Network Interface"
+    Instances = ["*"]
+    Counters = [
+      "Bytes Sent/sec",
+      "Bytes Received/sec",
+      "Packets Sent/sec",
+      "Packets Received/sec",
+      "Packets Received Discarded",
+      "Packets Received Errors",
+      "Packets Outbound Discarded",
+      "Packets Outbound Errors",
+    ]
+[[inputs.win_perf_counters.object]]
+    # Process metrics, in this case for IIS only
+    ObjectName = "Process"
+    Counters = ["% Processor Time","Handle Count","Private Bytes","Thread Count","Virtual Bytes","Working Set","wmiApSrv"]
+    Instances = ["telegraf"]
+    Measurement = "win_proc"
+    #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).
+#[[inputs.win_services]]
+#  ## Names of the services to monitor. Leave empty to monitor all the available services on the host
+#  service_names = ["wmiApSrv"]
+#[[inputs.ping]]
+#  urls = ["10.10.10.250"]
+# # Read metrics about cpu usage
+#[[inputs.cpu]]
+#   ## Whether to report per-cpu stats or not
+#   percpu = true
+#   ## Whether to report total system cpu stats or not
+#   totalcpu = true
+#   ## Comment this line if you want the raw CPU time metrics
+#   fielddrop = ["time_*"]
+# # Read metrics about disk usage by mount point
+[[inputs.disk]]
+#   ## By default, telegraf gather stats for all mountpoints.
+#   ## Setting mountpoints will restrict the stats to the specified mountpoints.
+#   ## mount_points=["/"]
+#
+#   ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
+#   ## present on /run, /var/run, /dev/shm or /dev).
+#   # ignore_fs = ["tmpfs", "devtmpfs"]
+# # Read metrics about disk IO by device
+[[inputs.diskio]]
+#   ## By default, telegraf will gather stats for all devices including
+#   ## disk partitions.
+#   ## Setting devices will restrict the stats to the specified devices.
+#   ## devices = ["sda", "sdb"]
+#   ## Uncomment the following line if you do not need disk serial numbers.
+#   ## skip_serial_number = true
+# # Read metrics about memory usage
+[[inputs.mem]]
+#   # no configuration
+# # Read metrics about swap memory usage
+[[inputs.swap]]
+#   # no configuration
+[[inputs.exec]]
+	commands = ["C:/test.bat"]
+  timeout = "30s"
+# name_suffix = "Win_Run"
+#  #name_override = "telegraf"
+	data_format = "influx"
+</code>
+</details>
+==== Мониторинг ZFS ====
+<details>
+<summary>:!: Плагин для ZFS</summary>
+[[https://github.com/influxdata/telegraf/blob/master/plugins/inputs/zfs/README.md|Doc]]\\
+Добавялем в конфиг, собсна достаточно с параметрами по умолчанию\\
+Данные берутся из "/proc/spl/kstat/zfs"\\
+<code bash>
+[[inputs.zfs]]
+</code>
+</details>
+<details>
+<summary>:!: Публикация в Prometheus </summary>
+[[https://github.com/influxdata/telegraf/blob/master/plugins/outputs/prometheus_client/README.md|Doc]]\\
+По некоторым причинам удобнее метрики передавать (настраивать в графане) через прометеус\\
+Есть модуль который публикует содержимое телеграфа в формате прометеуса, в веб-страницу\\
+Добавляем блок "output" в конфиг:\\
+<code bash>
+[[outputs.prometheus_client]]
+  listen = ":9273"
+  string_as_label = true
+  metric_version = 2
+</code>
+</details>
+<details>
+<summary> </summary>
+<code bash>
+</code>
+</details>
-===== InfluxDB =====
-Платформа для создания и работы с приложениями временных рядов. (https://www.influxdata.com/)\\

zxcx

Инструменты пользователя

Инструменты сайта

Различия

Инструменты страницы