Показаны различия между двумя версиями страницы.
Предыдущая версия справа и слева Предыдущая версия Следующая версия | Предыдущая версия | ||
linux:metrics [2021/07/20 06:47] admin |
linux:metrics [2023/11/05 06:05] (текущий) admin [Мониторинг ZFS] |
||
---|---|---|---|
Строка 1: | Строка 1: | ||
- | ====== Сбор метрик и хранение в TSDB ====== | + | ====== Сбор метрик и хранение в InfluxDB |
Строка 8: | Строка 8: | ||
==== Установка ==== | ==== Установка ==== | ||
Скачиваем дистр и ставим **dpkg -i** | Скачиваем дистр и ставим **dpkg -i** | ||
- | <code bash># wget https:// | + | <code bash> |
+ | wget https:// | ||
+ | |||
+ | # Либо есть в тех же репозиториях что и influxdb | ||
+ | cat <<EOF | sudo tee / | ||
+ | [influxdata] | ||
+ | name = InfluxData Repository - Stable | ||
+ | baseurl = https:// | ||
+ | enabled = 1 | ||
+ | gpgcheck = 1 | ||
+ | gpgkey = https:// | ||
+ | EOF | ||
+ | |||
+ | dnf install telegraf | ||
+ | </ | ||
Строка 21: | Строка 35: | ||
на странице https:// | на странице https:// | ||
- | Пример | + | Пример: |
<code bash># telegraf -sample-config --input-filter disk: | <code bash># telegraf -sample-config --input-filter disk: | ||
- | Проверка конфигурации | + | Проверка конфигурации: |
<code bash># telegraf --test && systemctl restart telegraf</ | <code bash># telegraf --test && systemctl restart telegraf</ | ||
- | Далее проверяем наличие данных в БД | + | Далее проверяем наличие данных в БД: |
<code bash> | <code bash> | ||
influx | influx | ||
Строка 38: | Строка 52: | ||
- | ==== ==== | ||
+ | < | ||
+ | < | ||
+ | telegraf -test - проверка конфигурации | ||
- | ==== ==== | + | <code bash> |
+ | [global_tags] | ||
+ | [agent] | ||
+ | interval | ||
+ | round_interval | ||
+ | metric_buffer_limit | ||
+ | flush_buffer_when_full | ||
+ | collection_jitter | ||
+ | flush_interval | ||
+ | flush_jitter | ||
+ | |||
+ | ## Logging configuration: | ||
+ | debug = false | ||
+ | quiet = false | ||
+ | logfile = " | ||
+ | |||
+ | hostname = "" | ||
+ | |||
+ | |||
+ | ############################################################################### | ||
+ | # OUTPUTS | ||
+ | ############################################################################### | ||
+ | |||
+ | [[outputs.influxdb]] | ||
+ | urls = [" | ||
+ | database = " | ||
+ | precision = " | ||
+ | retention_policy = "" | ||
+ | timeout = " | ||
+ | username = "" | ||
+ | password = "" | ||
+ | ############################################################################### | ||
+ | # INPUTS | ||
+ | ############################################################################### | ||
- | <code bash># </ | + | [[inputs.win_perf_counters]] |
- | <code bash># </ | + | |
- | <code bash># </ | + | # Processor usage, alternative to native, reports on a per core. |
- | <code bash># </ | + | |
+ | Instances = [" | ||
+ | Counters = [ | ||
+ | "% Idle Time", | ||
+ | "% Interrupt Time", | ||
+ | "% Privileged Time", | ||
+ | "% User Time", | ||
+ | "% Processor Time", | ||
+ | "% DPC Time", | ||
+ | ] | ||
+ | Measurement = " | ||
+ | | ||
+ | IncludeTotal=true | ||
+ | [[inputs.win_perf_counters.object]] | ||
+ | # Disk times and queues | ||
+ | ObjectName = " | ||
+ | Instances = [" | ||
+ | Counters = [ | ||
+ | "% Idle Time", | ||
+ | "% Disk Time", | ||
+ | "% Disk Read Time", | ||
+ | "% Disk Write Time", | ||
+ | " | ||
+ | "% Free Space", | ||
+ | "Free Megabytes", | ||
+ | ] | ||
+ | Measurement = " | ||
+ | # Set to true to include _Total instance when querying for all (*). | ||
+ | # | ||
- | ===== InfluxDB ===== | + | [[inputs.win_perf_counters.object]] |
- | Платформа для создания и работы с приложениями временных рядов. (https://www.influxdata.com/)\\ | + | ObjectName |
+ | Instances | ||
+ | Counters | ||
+ | " | ||
+ | "Disk Write Bytes/sec", | ||
+ | " | ||
+ | "Disk Reads/sec", | ||
+ | "Disk Writes/ | ||
+ | "% Disk Time", | ||
+ | "% Disk Read Time", | ||
+ | "% Disk Write Time", | ||
+ | ] | ||
+ | Measurement = " | ||
- | ==== Подготовка сервера ==== | + | [[inputs.win_perf_counters.object]] |
- | Для БД временных рядов важна синхронизация времени, настраиваем временную зону и ставим службу синхронизации времени\\ | + | ObjectName |
- | *В каталоге | + | Instances |
- | <code bash># \cp /usr/ | + | Counters |
- | <code bash># apt install chrony && sudo systemctl enable chrony</code> | + | " |
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | ] | ||
+ | Measurement = " | ||
- | Открываем файервол | + | [[inputs.win_perf_counters.object]] |
- | <code bash># iptables | + | |
+ | Counters = [ | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | ] | ||
+ | Instances = ["------"] | ||
+ | Measurement = " | ||
+ | # Set to true to include _Total instance when querying for all (*). | ||
+ | # | ||
+ | [[inputs.win_perf_counters.object]] | ||
+ | # Example query where the Instance portion must be removed to get data back, | ||
+ | # such as from the Memory object. | ||
+ | ObjectName = " | ||
+ | Counters = [ | ||
+ | " | ||
+ | "Cache Faults/ | ||
+ | " | ||
+ | "Page Faults/ | ||
+ | " | ||
+ | " | ||
+ | "Pool Nonpaged Bytes", | ||
+ | "Pool Paged Bytes", | ||
+ | " | ||
+ | " | ||
+ | " | ||
- | ==== Установка | + | ] |
- | Импортируем ключ | + | # Use 6 x - to remove the Instance bit from the query. |
- | <code bash># wget -qO- https://repos.influxdata.com/influxdb.key | sudo apt-key add -</ | + | Instances |
+ | Measurement | ||
+ | # Set to true to include _Total instance when querying for all (*). | ||
+ | # | ||
+ | |||
+ | [[inputs.win_perf_counters.object]] | ||
+ | # Example query where the Instance portion must be removed to get data back, | ||
+ | # such as from the Paging File object. | ||
+ | ObjectName | ||
+ | Counters = [ | ||
+ | "% Usage", | ||
+ | ] | ||
+ | Instances = [" | ||
+ | Measurement = " | ||
+ | |||
+ | [[inputs.win_perf_counters.object]] | ||
+ | ObjectName = " | ||
+ | Instances = [" | ||
+ | Counters = [ | ||
+ | "Bytes Sent/ | ||
+ | "Bytes Received/ | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | ] | ||
+ | |||
+ | [[inputs.win_perf_counters.object]] | ||
+ | # Process metrics, in this case for IIS only | ||
+ | ObjectName = " | ||
+ | Counters = ["% Processor Time"," | ||
+ | Instances = [" | ||
+ | Measurement = " | ||
+ | # | ||
+ | |||
+ | # | ||
+ | # ## Names of the services to monitor. Leave empty to monitor all the available services on the host | ||
+ | # service_names = [" | ||
+ | |||
+ | # | ||
+ | # urls = [" | ||
+ | |||
+ | # # Read metrics about cpu usage | ||
+ | # | ||
+ | # ## Whether to report per-cpu stats or not | ||
+ | # | ||
+ | # ## Whether to report total system cpu stats or not | ||
+ | # | ||
+ | # ## Comment this line if you want the raw CPU time metrics | ||
+ | # | ||
+ | |||
+ | |||
+ | # # Read metrics about disk usage by mount point | ||
+ | [[inputs.disk]] | ||
+ | # ## By default, telegraf gather stats for all mountpoints. | ||
+ | # ## Setting mountpoints will restrict the stats to the specified mountpoints. | ||
+ | # ## mount_points=["/" | ||
+ | # | ||
+ | # ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually | ||
+ | # ## present on /run, /var/run, /dev/shm or /dev). | ||
+ | # # ignore_fs = [" | ||
+ | |||
+ | |||
+ | # # Read metrics about disk IO by device | ||
+ | [[inputs.diskio]] | ||
+ | # ## By default, telegraf will gather stats for all devices including | ||
+ | # ## disk partitions. | ||
+ | # ## Setting devices will restrict the stats to the specified devices. | ||
+ | # ## devices = [" | ||
+ | # ## Uncomment the following line if you do not need disk serial numbers. | ||
+ | # ## skip_serial_number = true | ||
+ | |||
+ | |||
+ | # # Read metrics about memory usage | ||
+ | [[inputs.mem]] | ||
+ | # # no configuration | ||
+ | |||
+ | |||
+ | # # Read metrics about swap memory usage | ||
+ | [[inputs.swap]] | ||
+ | # # no configuration | ||
+ | |||
+ | |||
+ | |||
+ | [[inputs.exec]] | ||
+ | commands = [" | ||
+ | timeout = " | ||
+ | # name_suffix = " | ||
+ | # # | ||
+ | data_format = " | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | |||
+ | |||
+ | ==== Мониторинг ZFS ==== | ||
+ | |||
+ | < | ||
+ | < | ||
+ | [[https:// | ||
+ | |||
+ | Добавялем в конфиг, собсна достаточно с параметрами по умолчанию\\ | ||
+ | Данные берутся из "/ | ||
+ | <code bash> | ||
+ | [[inputs.zfs]] | ||
+ | |||
+ | </ | ||
+ | </ | ||
+ | |||
+ | |||
+ | < | ||
+ | < | ||
+ | [[https://github.com/influxdata/ | ||
+ | По некоторым причинам удобнее метрики передавать (настраивать в графане) через прометеус\\ | ||
+ | Есть модуль который публикует содержимое телеграфа в формате прометеуса, | ||
+ | Добавляем блок " | ||
+ | <code bash> | ||
+ | [[outputs.prometheus_client]] | ||
+ | listen = ": | ||
+ | string_as_label = true | ||
+ | metric_version = 2 | ||
+ | </code> | ||
+ | </ | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | < | ||
+ | < | ||
+ | |||
+ | <code bash> | ||
+ | |||
+ | </ | ||
+ | </ | ||
- | Добавляем репо | ||
- | <code bash># echo "deb https:// | ||
- | Ставим | ||
- | <code bash># apt update && sudo apt install influxdb</ | ||
- | ==== ==== | ||
- | ==== ==== | ||
- | ==== ==== | ||
- | <code bash># </ | ||
- | <code bash># </ | ||
- | <code bash># </ | ||
- | <code bash># </ | ||
- | <code bash># </ | ||
- | <code bash># </ | ||