生产实践:
Python抓取页面指定元素下span标签,获取磁盘报警汇总信息
学习技巧:
python BeautifulSoup库使用,html、css格式了解
脚本内容:
使用soup库爬取<span></span>标签中间的内容,html格式如下:
脚本内容如下:
#!/usr/bin/env python3
# coding: utf-8
# create by anzhihe 20210512
import requests
import sys
from bs4 import BeautifulSoup
#import prettytable as pt
def getHTMLText(url):
'''
此函数用于获取网页的html文档
'''
try:
#获取服务器的响应内容,并设置最大请求时间为6秒
res = requests.get(url, timeout = 6)
#判断返回状态码是否为200
res.raise_for_status()
#设置该html文档可能的编码
# res.encoding = res.apparent_encoding
# print(res.encoding)
res.encoding = 'utf-8'
#返回网页HTML代码
return res.text
except:
return '产生异常'
def main(argv):
'''
主函数
'''
# 报警信息标签定义
alarms_info = { 'disk': 'df_bytes_free_percent', 'mem': 'mem_memfree_percent' }
if argv[1] == '-h' or argv[1] not in alarms_info:
print('Usage: alarm [disk/mem]')
sys.exit()
# 目标网页,这个可以换成指定的网页
#url = 'https://chegva.com'
url = 'http://10.110.112.996/?sortby=metric'
alarm = getHTMLText(url)
# 解析HTML代码
soup = BeautifulSoup(alarm, 'html.parser')
check_value = alarms_info[argv[1]]
res = soup.find_all('div', attrs={'id': check_value})
for div in res:
#print(div.select('.alarm'))
alarm_info = div.select('.alarm')
for alarm in alarm_info:
print([ span.get_text() for span in alarm.select(".alarm > span")[:3]])
print('-'*80)
if __name__ == "__main__":
main(sys.argv)
# Usage:alarm [disk/mem]◎查看效果
参考:

