最近在整车上流量的分析优化方案,需要对外网进程的流量进行治理,之前整了个shell脚本监控:Shell脚本调用iftop监控分析服务器进程外网流量,这个是python脚本版本,主要是输出进程名、pid、上下行流量统计,时间戳,服务状态等相关信息,并将日志收集到kakfa,为下一步开发平台分析做准备。脚本执行资源占用情况,CPU:0.1-2%、MEM:0.1%,32M左右
#!/usr/bin/env python3 import subprocess import json import time import datetime import os import requests # 提前安装好iftop、lsof及相关依赖包 log_path = "/home/anzhihe/log/monitor/autocar_public_network_monitor" if not os.path.exists(log_path): os.makedirs(log_path) current_date = datetime.datetime.now() one_week_ago = current_date - datetime.timedelta(days=7) url = 'http://192.168.1.110:8080/api/get_xxx_status' data = {"src":"test_tool","context":3} headers = {'Content-Type': 'application/json'} # 获取自动驾驶状态 def get_autocar_status(url, params, headers): try: response = requests.post(url, data=json.dumps(params), headers=headers, timeout=5) if response.status_code == 200: response_data = response.json() current_pilot = response_data['data']['result']['current_pilot'] # if current_pilot: # return current_pilot # else: # return 0 return current_pilot if current_pilot else 0 except Exception as e: return 500 # 500表示获取状态异常 def del_oneweekago_log(log_path): try: for filename in os.listdir(log_path): if filename.endswith('.log'): file_path = os.path.join(log_path, filename) creation_time = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) if creation_time < one_week_ago: os.remove(file_path) except Exception as e: pass # 使用正则表达式匹配需要删除的文件 #pattern = r'.*\.log.*' #for file_name in os.listdir(folder_path): # file_path = os.path.join(folder_path, file_name) # if re.match(pattern, file_name) and os.path.isfile(file_path): # os.remove(file_path) # print(f"已删除文件: {file_path}") # 每次脚本启动时删除7天前的*.log日志文件 del_oneweekago_log(log_path) while True: try: network_rate = {} network_rate["current_pilot_status"] = get_autocar_status(url, data, headers) network_rate["timestamp"] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] # 执行 iftop 命令并捕获输出 command_iftop = 'timeout 15s iftop -i eth0 -tnNP -f "not src net 192.168.0.0/16 and not port ssh and not port ntp and not port 53 or not dst net 192.168.0.0/16 and not port 53 and not port 123 and not host 114.114.114.114" -L 500 -s 3' iftop_output = subprocess.run(command_iftop, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True).stdout time.sleep(2) command_total_rate = "egrep 'Total send rate:|Total receive rate:' | awk '{print $(NF-1)}'" total_rate = subprocess.run(command_total_rate, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, input=iftop_output).stdout network_rate['total_send_rate'] = total_rate.split('\n')[0] + '/s' network_rate['total_receive_rate'] = total_rate.split('\n')[1] + '/s' command_get_ports = "grep 192.168 | awk -F'[ \t]+' '{print $3}'|grep ':'" local_ports = subprocess.run(command_get_ports, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, input=iftop_output).stdout ports_list = list(filter(None,local_ports.split('\n'))) process_rate = [] for src in ports_list: port = src.split(':')[1] process_pid = subprocess.run("timeout 5s lsof -i :\"%s\" | awk 'NR>1 {print $1\",\"$2}'" % port, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True).stdout.strip("\n") process_pid = process_pid if process_pid else "unknown,unknown" if process_pid.find("\n") != -1: process_pid = process_pid.split('\n')[0] send_receive_rate = subprocess.run("grep -w \"%s\" -A 1 |xargs echo |awk '{print $2\" <=> \"$8\",\"$5\"/s,\"$11\"/s\"}'" % src, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, input=iftop_output).stdout.strip("\n") route, send_rate, receive_rate = [*send_receive_rate.split(',')] # process_name, pid = process_pid.split('-')[0], process_pid.split('-')[1] process_name, pid = [*process_pid.split(',')] process_dict = { "name": process_name, "pid": pid, "route": route, "send_rate": send_rate, "receive_rate": receive_rate } process_rate.append(process_dict) network_rate['data'] = process_rate network_rate["log_type"] = 'autocar_public_network_monitor' current_date = datetime.datetime.utcnow().strftime('%Y%m%d') file_name = log_path + '/network_' + current_date +'.log' with open(file_name, 'a') as output_file: output_file.write(json.dumps(network_rate) + '\n') except Exception as e: continue time.sleep(15)
日志输出格式:
{ "timestamp": "2024-01-01 18:11:51.960", "current_pilot_status": -1, "total_send_rate": "25.8Kb/s", "total_receive_rate": "2.25Kb/s", "data": [ { "name": "filebeat", "pid": "11805", "route": "192.168.1.102:45134 <=> 1.2.3.4:18004", "send_rate": "9.53Kb/s", "receive_rate": "692b/s" }, { "name": "filebeat", "pid": "11805", "route": "192.168.1.102:39998 <=> 1.2.3.4:18003", "send_rate": "7.75Kb/s", "receive_rate": "568b/s" }, { "name": "filebeat", "pid": "11805", "route": "192.168.1.102:45134 <=> 1.2.3.4:18004", "send_rate": "9.53Kb/s", "receive_rate": "692b/s" }, { "name": "todeskd", "pid": "5573", "route": "192.168.1.102:33812 <=> 1.2.3.4:443", "send_rate": "786b/s", "receive_rate": "474b/s" } ], "log_type": "autocar_public_network_monitor" }
参考: