最近在整车上流量的分析优化方案,需要对外网进程的流量进行治理,之前整了个shell脚本监控:Shell脚本调用iftop监控分析服务器进程外网流量,这个是python脚本版本,主要是输出进程名、pid、上下行流量统计,时间戳,服务状态等相关信息,并将日志收集到kakfa,为下一步开发平台分析做准备。脚本执行资源占用情况,CPU:0.1-2%、MEM:0.1%,32M左右
#!/usr/bin/env python3
import subprocess
import json
import time
import datetime
import os
import requests
# 提前安装好iftop、lsof及相关依赖包
log_path = "/home/anzhihe/log/monitor/autocar_public_network_monitor"
if not os.path.exists(log_path):
os.makedirs(log_path)
current_date = datetime.datetime.now()
one_week_ago = current_date - datetime.timedelta(days=7)
url = 'http://192.168.1.110:8080/api/get_xxx_status'
data = {"src":"test_tool","context":3}
headers = {'Content-Type': 'application/json'}
# 获取自动驾驶状态
def get_autocar_status(url, params, headers):
try:
response = requests.post(url, data=json.dumps(params), headers=headers, timeout=5)
if response.status_code == 200:
response_data = response.json()
current_pilot = response_data['data']['result']['current_pilot']
# if current_pilot:
# return current_pilot
# else:
# return 0
return current_pilot if current_pilot else 0
except Exception as e:
return 500 # 500表示获取状态异常
def del_oneweekago_log(log_path):
try:
for filename in os.listdir(log_path):
if filename.endswith('.log'):
file_path = os.path.join(log_path, filename)
creation_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path))
if creation_time < one_week_ago:
os.remove(file_path)
except Exception as e:
pass
# 使用正则表达式匹配需要删除的文件
#pattern = r'.*\.log.*'
#for file_name in os.listdir(folder_path):
# file_path = os.path.join(folder_path, file_name)
# if re.match(pattern, file_name) and os.path.isfile(file_path):
# os.remove(file_path)
# print(f"已删除文件: {file_path}")
# 每次脚本启动时删除7天前的*.log日志文件
del_oneweekago_log(log_path)
while True:
try:
network_rate = {}
network_rate["current_pilot_status"] = get_autocar_status(url, data, headers)
network_rate["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
# 执行 iftop 命令并捕获输出
command_iftop = 'timeout 15s iftop -i eth0 -tnNP -f "not src net 192.168.0.0/16 and not port ssh and not port ntp and not port 53 or not dst net 192.168.0.0/16 and not port 53 and not port 123 and not host 114.114.114.114" -L 500 -s 3'
iftop_output = subprocess.run(command_iftop, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True).stdout
time.sleep(2)
command_total_rate = "egrep 'Total send rate:|Total receive rate:' | awk '{print $(NF-1)}'"
total_rate = subprocess.run(command_total_rate, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, input=iftop_output).stdout
network_rate['total_send_rate'] = total_rate.split('\n')[0] + '/s'
network_rate['total_receive_rate'] = total_rate.split('\n')[1] + '/s'
command_get_ports = "grep 192.168 | awk -F'[ \t]+' '{print $3}'|grep ':'"
local_ports = subprocess.run(command_get_ports, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, input=iftop_output).stdout
ports_list = list(filter(None,local_ports.split('\n')))
process_rate = []
for src in ports_list:
port = src.split(':')[1]
process_pid = subprocess.run("timeout 5s lsof -i :\"%s\" | awk 'NR>1 {print $1\",\"$2}'" % port, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True).stdout.strip("\n")
process_pid = process_pid if process_pid else "unknown,unknown"
if process_pid.find("\n") != -1:
process_pid = process_pid.split('\n')[0]
send_receive_rate = subprocess.run("grep -w \"%s\" -A 1 |xargs echo |awk '{print $2\" <=> \"$8\",\"$5\"/s,\"$11\"/s\"}'" % src, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, input=iftop_output).stdout.strip("\n")
route, send_rate, receive_rate = [*send_receive_rate.split(',')]
# process_name, pid = process_pid.split('-')[0], process_pid.split('-')[1]
process_name, pid = [*process_pid.split(',')]
process_dict = {
"name": process_name,
"pid": pid,
"route": route,
"send_rate": send_rate,
"receive_rate": receive_rate
}
process_rate.append(process_dict)
network_rate['data'] = process_rate
network_rate["log_type"] = 'autocar_public_network_monitor'
current_date = datetime.datetime.utcnow().strftime('%Y%m%d')
file_name = log_path + '/network_' + current_date +'.log'
with open(file_name, 'a') as output_file:
output_file.write(json.dumps(network_rate) + '\n')
except Exception as e:
continue
time.sleep(15)日志输出格式:
{
"timestamp": "2024-01-01 18:11:51.960",
"current_pilot_status": -1,
"total_send_rate": "25.8Kb/s",
"total_receive_rate": "2.25Kb/s",
"data": [
{
"name": "filebeat",
"pid": "11805",
"route": "192.168.1.102:45134 <=> 1.2.3.4:18004",
"send_rate": "9.53Kb/s",
"receive_rate": "692b/s"
},
{
"name": "filebeat",
"pid": "11805",
"route": "192.168.1.102:39998 <=> 1.2.3.4:18003",
"send_rate": "7.75Kb/s",
"receive_rate": "568b/s"
},
{
"name": "filebeat",
"pid": "11805",
"route": "192.168.1.102:45134 <=> 1.2.3.4:18004",
"send_rate": "9.53Kb/s",
"receive_rate": "692b/s"
},
{
"name": "todeskd",
"pid": "5573",
"route": "192.168.1.102:33812 <=> 1.2.3.4:443",
"send_rate": "786b/s",
"receive_rate": "474b/s"
}
],
"log_type": "autocar_public_network_monitor"
}参考: