====== Сбор метрик и хранение в InfluxDB ======


===== telegraf =====
Агент сервера с открытым исходным кодом, который поможет вам собирать метрики из ваших стеков, датчиков и систем.\\


==== Установка ====
Скачиваем дистр и ставим **dpkg -i**
<code bash>wget https://repos.influxdata.com/debian/pool/stable/t/telegraf/telegraf_1.19.1-1_amd64.deb # Debian
wget https://repos.influxdata.com/centos/9/amd64/stable/telegraf-1.22.4-1.x86_64.rpm # CentOS

   # Либо есть в тех же репозиториях что и influxdb
cat <<EOF | sudo tee /etc/yum.repos.d/influxdata.repo
[influxdata]
name = InfluxData Repository - Stable
baseurl = https://repos.influxdata.com/stable/\$basearch/main
enabled = 1
gpgcheck = 1
gpgkey = https://repos.influxdata.com/influxdata-archive_compat.key
EOF

dnf install telegraf
</code>


==== Настройка ====
Конфиг находится в **/etc/telegraf/telegraf.config**

Командой можно генерировать конфигурацию:
<code bash># telegraf -sample-config > telegraf.conf</code>
В данном случае будет дефолтная конфа, со всеми доступными метриками (плагинами)\\
Можно ограничивать аргументами:
<code bash># telegraf -sample-config --input-filter <плагины сбора метрик через ":"> --output-filter <плагины передачи данных с метрик через ":"> > telegraf.conf</code>
на странице https://docs.influxdata.com/telegraf/v1.19/plugins/ можно найти список всех доступных плагинов.

Пример:
<code bash># telegraf -sample-config --input-filter disk:diskio:hddtemp --output-filter influxdb > telegraf.conf</code>

Проверка конфигурации:
<code bash># telegraf --test && systemctl restart telegraf</code>

Далее проверяем наличие данных в БД:
<code bash>
influx
> show databases
> use telegraf
> show measurements
> SELECT * FROM diskio ORDER BY time DESC LIMIT 15
# и т.д.
</code>


<details>
<summary> :!: Пример конфига </summary>
telegraf -test - проверка конфигурации

<code bash>
[global_tags]
[agent]
  interval = "60s"
  round_interval = true
  metric_buffer_limit = 1000
  flush_buffer_when_full = true
  collection_jitter = "0s"
  flush_interval = "10s"
  flush_jitter = "0s"
 
  ## Logging configuration:
  debug = false
  quiet = false
  logfile = "C:/Program Files/Telegraf/telegraf.log"
 
  hostname = ""
 
 
###############################################################################
#                                  OUTPUTS                                    #
###############################################################################
 
[[outputs.influxdb]]
  urls = ["http://:8086"] # required
  database = "telegraf" # required
  precision = "s"
  retention_policy = ""
  timeout = "10s"
  username = ""
  password = ""


###############################################################################
#                                  INPUTS                                     #
###############################################################################


[[inputs.win_perf_counters]]
  [[inputs.win_perf_counters.object]]
    # Processor usage, alternative to native, reports on a per core.
    ObjectName = "Processor"
    Instances = ["*"]
    Counters = [
      "% Idle Time",
      "% Interrupt Time",
      "% Privileged Time",
      "% User Time",
      "% Processor Time",
      "% DPC Time",
    ]
    Measurement = "win_cpu"
    # Set to true to include _Total instance when querying for all (*).
    IncludeTotal=true

  [[inputs.win_perf_counters.object]]
    # Disk times and queues
    ObjectName = "LogicalDisk"
    Instances = ["*"]
    Counters = [
      "% Idle Time",
      "% Disk Time",
      "% Disk Read Time",
      "% Disk Write Time",
      "Current Disk Queue Length",
      "% Free Space",
      "Free Megabytes",
    ]
    Measurement = "win_disk"
    # Set to true to include _Total instance when querying for all (*).
    #IncludeTotal=false

  [[inputs.win_perf_counters.object]]
    ObjectName = "PhysicalDisk"
    Instances = ["*"]
    Counters = [
      "Disk Read Bytes/sec",
      "Disk Write Bytes/sec",
      "Current Disk Queue Length",
      "Disk Reads/sec",
      "Disk Writes/sec",
      "% Disk Time",
      "% Disk Read Time",
      "% Disk Write Time",
    ]
    Measurement = "win_diskio"

  [[inputs.win_perf_counters.object]]
    ObjectName = "Network Interface"
    Instances = ["*"]
    Counters = [
      "Bytes Received/sec",
      "Bytes Sent/sec",
      "Packets Received/sec",
      "Packets Sent/sec",
      "Packets Received Discarded",
      "Packets Outbound Discarded",
      "Packets Received Errors",
      "Packets Outbound Errors",
    ]
    Measurement = "win_net"

  [[inputs.win_perf_counters.object]]
    ObjectName = "System"
    Counters = [
      "Context Switches/sec",
      "System Calls/sec",
      "Processor Queue Length",
      "System Up Time",
    ]
    Instances = ["------"]
    Measurement = "win_system"
    # Set to true to include _Total instance when querying for all (*).
    #IncludeTotal=false

  [[inputs.win_perf_counters.object]]
    # Example query where the Instance portion must be removed to get data back,
    # such as from the Memory object.
    ObjectName = "Memory"
    Counters = [
      "Available Bytes",
      "Cache Faults/sec",
      "Demand Zero Faults/sec",
      "Page Faults/sec",
      "Pages/sec",
      "Transition Faults/sec",
      "Pool Nonpaged Bytes",
      "Pool Paged Bytes",
      "Standby Cache Reserve Bytes",
      "Standby Cache Normal Priority Bytes",
      "Standby Cache Core Bytes",

    ]
    # Use 6 x - to remove the Instance bit from the query.
    Instances = ["------"]
    Measurement = "win_mem"
    # Set to true to include _Total instance when querying for all (*).
    #IncludeTotal=false

  [[inputs.win_perf_counters.object]]
    # Example query where the Instance portion must be removed to get data back,
    # such as from the Paging File object.
    ObjectName = "Paging File"
    Counters = [
      "% Usage",
    ]
    Instances = ["_Total"]
    Measurement = "win_swap"

  [[inputs.win_perf_counters.object]]
    ObjectName = "Network Interface"
    Instances = ["*"]
    Counters = [
      "Bytes Sent/sec",
      "Bytes Received/sec",
      "Packets Sent/sec",
      "Packets Received/sec",
      "Packets Received Discarded",
      "Packets Received Errors",
      "Packets Outbound Discarded",
      "Packets Outbound Errors",
    ]

[[inputs.win_perf_counters.object]]
    # Process metrics, in this case for IIS only
    ObjectName = "Process"
    Counters = ["% Processor Time","Handle Count","Private Bytes","Thread Count","Virtual Bytes","Working Set","wmiApSrv"]
    Instances = ["telegraf"]
    Measurement = "win_proc"
    #IncludeTotal=false #Set to true to include _Total instance when querying for all (*).

#[[inputs.win_services]]
#  ## Names of the services to monitor. Leave empty to monitor all the available services on the host
#  service_names = ["wmiApSrv"]	
	
#[[inputs.ping]]
#  urls = ["10.10.10.250"]

# # Read metrics about cpu usage
#[[inputs.cpu]]
#   ## Whether to report per-cpu stats or not
#   percpu = true
#   ## Whether to report total system cpu stats or not
#   totalcpu = true
#   ## Comment this line if you want the raw CPU time metrics
#   fielddrop = ["time_*"]


# # Read metrics about disk usage by mount point
[[inputs.disk]]
#   ## By default, telegraf gather stats for all mountpoints.
#   ## Setting mountpoints will restrict the stats to the specified mountpoints.
#   ## mount_points=["/"]
#
#   ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually
#   ## present on /run, /var/run, /dev/shm or /dev).
#   # ignore_fs = ["tmpfs", "devtmpfs"]


# # Read metrics about disk IO by device
[[inputs.diskio]]
#   ## By default, telegraf will gather stats for all devices including
#   ## disk partitions.
#   ## Setting devices will restrict the stats to the specified devices.
#   ## devices = ["sda", "sdb"]
#   ## Uncomment the following line if you do not need disk serial numbers.
#   ## skip_serial_number = true


# # Read metrics about memory usage
[[inputs.mem]]
#   # no configuration


# # Read metrics about swap memory usage
[[inputs.swap]]
#   # no configuration

  
[[inputs.exec]]  
	commands = ["C:/test.bat"]
  timeout = "30s"
# name_suffix = "Win_Run"
#  #name_override = "telegraf"
	data_format = "influx"
</code>
</details>


==== Мониторинг ZFS ====

<details>
<summary>:!: Плагин для ZFS</summary>
[[https://github.com/influxdata/telegraf/blob/master/plugins/inputs/zfs/README.md|Doc]]\\

Добавялем в конфиг, собсна достаточно с параметрами по умолчанию\\
Данные берутся из "/proc/spl/kstat/zfs"\\
<code bash>
[[inputs.zfs]]

</code>
</details>


<details>
<summary>:!: Публикация в Prometheus </summary>
[[https://github.com/influxdata/telegraf/blob/master/plugins/outputs/prometheus_client/README.md|Doc]]\\
По некоторым причинам удобнее метрики передавать (настраивать в графане) через прометеус\\
Есть модуль который публикует содержимое телеграфа в формате прометеуса, в веб-страницу\\
Добавляем блок "output" в конфиг:\\
<code bash>
[[outputs.prometheus_client]]
  listen = ":9273"
  string_as_label = true
  metric_version = 2
</code>
</details>


<details>
<summary> </summary>

<code bash>

</code>
</details>