Это старая версия документа!
Агент сервера с открытым исходным кодом, который поможет вам собирать метрики из ваших стеков, датчиков и систем.
Скачиваем дистр и ставим dpkg -i
wget https://repos.influxdata.com/debian/pool/stable/t/telegraf/telegraf_1.19.1-1_amd64.deb # Debian wget https://repos.influxdata.com/centos/9/amd64/stable/telegraf-1.22.4-1.x86_64.rpm # CentOS
Конфиг находится в /etc/telegraf/telegraf.config
Командой можно генерировать конфигурацию:
# telegraf -sample-config > telegraf.conf
В данном случае будет дефолтная конфа, со всеми доступными метриками (плагинами)
Можно ограничивать аргументами:
# telegraf -sample-config --input-filter <плагины сбора метрик через ":"> --output-filter <плагины передачи данных с метрик через ":"> > telegraf.conf
на странице https://docs.influxdata.com/telegraf/v1.19/plugins/ можно найти список всех доступных плагинов.
Пример:
# telegraf -sample-config --input-filter disk:diskio:hddtemp --output-filter influxdb > telegraf.conf
Проверка конфигурации:
# telegraf --test && systemctl restart telegraf
Далее проверяем наличие данных в БД:
influx > show databases > use telegraf > show measurements > SELECT * FROM diskio ORDER BY time DESC LIMIT 15 # и т.д.
telegraf -test - проверка конфигурации
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. # Global tags can be specified here in key="value" format. [global_tags] # dc = "us-east-1" # will tag all metrics with dc=us-east-1 # rack = "1a" # Configuration for telegraf agent [agent] ## Default data collection interval for all inputs interval = "60s" ## Rounds collection interval to 'interval' ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true ## Telegraf will cache metric_buffer_limit metrics for each output, and will ## flush this buffer on a successful write. metric_buffer_limit = 1000 ## Flush the buffer whenever full, regardless of flush_interval. flush_buffer_when_full = true ## Collection jitter is used to jitter the collection by a random amount. ## Each plugin will sleep for a random time within jitter before collecting. ## This can be used to avoid many plugins querying things like sysfs at the ## same time, which can have a measurable effect on the system. collection_jitter = "0s" ## Default flushing interval for all outputs. You shouldn't set this below ## interval. Maximum flush_interval will be flush_interval + flush_jitter flush_interval = "10s" ## Jitter the flush interval by a random amount. This is primarily to avoid ## large write spikes for users running a large number of telegraf instances. ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s flush_jitter = "0s" ## Logging configuration: ## Run telegraf in debug mode debug = false ## Run telegraf in quiet mode quiet = false ## Specify the log file name. The empty string means to log to stdout. logfile = "C:/Program Files/Telegraf/telegraf.log" ## Override default hostname, if empty use os.Hostname() hostname = "" ############################################################################### # OUTPUTS # ############################################################################### # Configuration for influxdb server to send metrics to [[outputs.influxdb]] # The full HTTP or UDP endpoint URL for your InfluxDB instance. # Multiple urls can be specified but it is assumed that they are part of the same # cluster, this means that only ONE of the urls will be written to each interval. # urls = ["udp://localhost:8089"] # UDP endpoint example urls = ["http://:8086"] # required # The target database for metrics (telegraf will create it if not exists) database = "telegraf" # required # Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". # note: using second precision greatly helps InfluxDB compression precision = "s" retention_policy = "" ## Write timeout (for the InfluxDB client), formatted as a string. ## If not provided, will default to 5s. 0s means no timeout (not recommended). timeout = "10s" username = "" password = "" # Set the user agent for HTTP POSTs (can be useful for log differentiation) # user_agent = "telegraf" # Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) # udp_payload = 512 ############################################################################### # INPUTS # ############################################################################### [[inputs.win_perf_counters]] [[inputs.win_perf_counters.object]] # Processor usage, alternative to native, reports on a per core. ObjectName = "Processor" Instances = ["*"] Counters = [ "% Idle Time", "% Interrupt Time", "% Privileged Time", "% User Time", "% Processor Time", "% DPC Time", ] Measurement = "win_cpu" # Set to true to include _Total instance when querying for all (*). IncludeTotal=true [[inputs.win_perf_counters.object]] # Disk times and queues ObjectName = "LogicalDisk" Instances = ["*"] Counters = [ "% Idle Time", "% Disk Time", "% Disk Read Time", "% Disk Write Time", "Current Disk Queue Length", "% Free Space", "Free Megabytes", ] Measurement = "win_disk" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] ObjectName = "PhysicalDisk" Instances = ["*"] Counters = [ "Disk Read Bytes/sec", "Disk Write Bytes/sec", "Current Disk Queue Length", "Disk Reads/sec", "Disk Writes/sec", "% Disk Time", "% Disk Read Time", "% Disk Write Time", ] Measurement = "win_diskio" [[inputs.win_perf_counters.object]] ObjectName = "Network Interface" Instances = ["*"] Counters = [ "Bytes Received/sec", "Bytes Sent/sec", "Packets Received/sec", "Packets Sent/sec", "Packets Received Discarded", "Packets Outbound Discarded", "Packets Received Errors", "Packets Outbound Errors", ] Measurement = "win_net" [[inputs.win_perf_counters.object]] ObjectName = "System" Counters = [ "Context Switches/sec", "System Calls/sec", "Processor Queue Length", "System Up Time", ] Instances = ["------"] Measurement = "win_system" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] # Example query where the Instance portion must be removed to get data back, # such as from the Memory object. ObjectName = "Memory" Counters = [ "Available Bytes", "Cache Faults/sec", "Demand Zero Faults/sec", "Page Faults/sec", "Pages/sec", "Transition Faults/sec", "Pool Nonpaged Bytes", "Pool Paged Bytes", "Standby Cache Reserve Bytes", "Standby Cache Normal Priority Bytes", "Standby Cache Core Bytes", ] # Use 6 x - to remove the Instance bit from the query. Instances = ["------"] Measurement = "win_mem" # Set to true to include _Total instance when querying for all (*). #IncludeTotal=false [[inputs.win_perf_counters.object]] # Example query where the Instance portion must be removed to get data back, # such as from the Paging File object. ObjectName = "Paging File" Counters = [ "% Usage", ] Instances = ["_Total"] Measurement = "win_swap" [[inputs.win_perf_counters.object]] ObjectName = "Network Interface" Instances = ["*"] Counters = [ "Bytes Sent/sec", "Bytes Received/sec", "Packets Sent/sec", "Packets Received/sec", "Packets Received Discarded", "Packets Received Errors", "Packets Outbound Discarded", "Packets Outbound Errors", ] [[inputs.win_perf_counters.object]] # Process metrics, in this case for IIS only ObjectName = "Process" Counters = ["% Processor Time","Handle Count","Private Bytes","Thread Count","Virtual Bytes","Working Set","wmiApSrv"] Instances = ["telegraf"] Measurement = "win_proc" #IncludeTotal=false #Set to true to include _Total instance when querying for all (*). #[[inputs.win_services]] # ## Names of the services to monitor. Leave empty to monitor all the available services on the host # service_names = ["wmiApSrv"] #[[inputs.ping]] # urls = ["10.10.10.250"] # # Read metrics about cpu usage #[[inputs.cpu]] # ## Whether to report per-cpu stats or not # percpu = true # ## Whether to report total system cpu stats or not # totalcpu = true # ## Comment this line if you want the raw CPU time metrics # fielddrop = ["time_*"] # # Read metrics about disk usage by mount point [[inputs.disk]] # ## By default, telegraf gather stats for all mountpoints. # ## Setting mountpoints will restrict the stats to the specified mountpoints. # ## mount_points=["/"] # # ## Ignore some mountpoints by filesystem type. For example (dev)tmpfs (usually # ## present on /run, /var/run, /dev/shm or /dev). # # ignore_fs = ["tmpfs", "devtmpfs"] # # Read metrics about disk IO by device [[inputs.diskio]] # ## By default, telegraf will gather stats for all devices including # ## disk partitions. # ## Setting devices will restrict the stats to the specified devices. # ## devices = ["sda", "sdb"] # ## Uncomment the following line if you do not need disk serial numbers. # ## skip_serial_number = true # # Read metrics about memory usage [[inputs.mem]] # # no configuration # # Read metrics about swap memory usage [[inputs.swap]] # # no configuration [[inputs.exec]] commands = ["C:/test.bat"] timeout = "30s" # name_suffix = "Win_Run" # #name_override = "telegraf" data_format = "influx"
Платформа для создания и работы с приложениями временных рядов. (https://www.influxdata.com/)
Для БД временных рядов важна синхронизация времени, настраиваем временную зону и ставим службу синхронизации времени
*В каталоге /usr/share/zoneinfo список всех возможных вариантов временных зон
# \cp /usr/share/zoneinfo/Europe/Moscow /etc/localtime
# apt install chrony && sudo systemctl enable chrony
Открываем файервол:
# iptables -I INPUT 1 -p tcp --dport 8086 -j ACCEPT
Импортируем ключ:
# wget -qO- https://repos.influxdata.com/influxdb.key | sudo apt-key add -
Добавляем репо:
# echo "deb https://repos.influxdata.com/debian buster stable" | sudo tee /etc/apt/sources.list.d/influxdb.list
Ставим:
# apt update && sudo apt install influxdb
По умолчанию вход открытый, включить надо в конфиге, в блоке «http», параметр - auth-enabled = true # Set to true
# influx > CREATE USER admin WITH PASSWORD '<password>' WITH ALL PRIVILEGES > SHOW USERS > GRANT [READ,WRITE,ALL] ON <database_name> TO <username> > REVOKE [READ,WRITE,ALL] ON <database_name> FROM <username> > GRANT ALL PRIVILEGES TO <username> > REVOKE ALL PRIVILEGES FROM <username> > SHOW GRANTS FOR <user_name> > SET PASSWORD FOR <username> = '<password>' > DROP USER <username> > > CREATE DATABASE > DROP DATABASE > DROP SERIES > DROP MEASUREMENT > CREATE RETENTION POLICY > ALTER RETENTION POLICY > DROP RETENTION POLICY > CREATE CONTINUOUS QUERY > DROP CONTINUOUS QUERY >
#