生产实践:
Python抓取页面指定元素下span标签,获取磁盘报警汇总信息
学习技巧:
python BeautifulSoup库使用,html、css格式了解
脚本内容:
使用soup库爬取<span></span>标签中间的内容,html格式如下:
脚本内容如下:
#!/usr/bin/env python3 # coding: utf-8 # create by anzhihe 20210512 import requests import sys from bs4 import BeautifulSoup #import prettytable as pt def getHTMLText(url): ''' 此函数用于获取网页的html文档 ''' try: #获取服务器的响应内容,并设置最大请求时间为6秒 res = requests.get(url, timeout = 6) #判断返回状态码是否为200 res.raise_for_status() #设置该html文档可能的编码 # res.encoding = res.apparent_encoding # print(res.encoding) res.encoding = 'utf-8' #返回网页HTML代码 return res.text except: return '产生异常' def main(argv): ''' 主函数 ''' # 报警信息标签定义 alarms_info = { 'disk': 'df_bytes_free_percent', 'mem': 'mem_memfree_percent' } if argv[1] == '-h' or argv[1] not in alarms_info: print('Usage: alarm [disk/mem]') sys.exit() # 目标网页,这个可以换成指定的网页 #url = 'https://chegva.com' url = 'http://10.110.112.996/?sortby=metric' alarm = getHTMLText(url) # 解析HTML代码 soup = BeautifulSoup(alarm, 'html.parser') check_value = alarms_info[argv[1]] res = soup.find_all('div', attrs={'id': check_value}) for div in res: #print(div.select('.alarm')) alarm_info = div.select('.alarm') for alarm in alarm_info: print([ span.get_text() for span in alarm.select(".alarm > span")[:3]]) print('-'*80) if __name__ == "__main__": main(sys.argv) # Usage:alarm [disk/mem]
◎查看效果
参考: