`
ejacky
  • 浏览: 30726 次
  • 性别: Icon_minigender_1
  • 来自: 抚顺
社区版块
存档分类
最新评论

阿里云监控服务器信息

阅读更多
#!/usr/bin/python
#########################################
# Function:    sample linux performance indices
# Usage:       python sampler.py
# Author:      CMS DEV TEAM
# Company:     Aliyun Inc.
# Version:     1.1
#########################################

import os
import os.path
import sys
import time
import operator
import httplib
import logging
import socket
import random
from shutil import copyfile
from subprocess import Popen, PIPE
from logging.handlers import RotatingFileHandler

logger = None
REMOTE_HOST = None
REMOTE_PORT = None
REMOTE_MONITOR_URI = None
UUID = None

def get_mem_usage_percent():
    try:
        f = open('/proc/meminfo', 'r')
        for line in f:
            if line.startswith('MemTotal:'):
                mem_total = int(line.split()[1])
            elif line.startswith('MemFree:'):
                mem_free = int(line.split()[1])
            elif line.startswith('Buffers:'):
                mem_buffer = int(line.split()[1])
            elif line.startswith('Cached:'):
                mem_cache = int(line.split()[1])
            elif line.startswith('SwapTotal:'):
                vmem_total = int(line.split()[1])
            elif line.startswith('SwapFree:'):
                vmem_free = int(line.split()[1])
            else:
                continue
        f.close()
    except:
        return None
    physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total)
    virtual_percent = 0
    if vmem_total > 0:
        virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total)
    return physical_percent, virtual_percent

black_list = ('iso9660',)

def usage_percent(use, total):
    try:
        ret = (float(use) / total) * 100
    except ZeroDivisionError:
        raise Exception("ERROR - zero division error")
    return ret

def get_disk_partition():
    return_list = []
    pd = []
    try:
        f = open("/proc/filesystems", "r")
        for line in f:
            if not line.startswith("nodev"):
                fs_type = line.strip()
                if fs_type not in black_list:
                    pd.append(fs_type)
        f.close()

        f = open('/etc/mtab', "r")
        for line in f:
            if line.startswith('none'):
                continue
            tmp = line.strip().split()
            ft = tmp[2]
            if ft not in pd:
                continue
            return_list.append(tmp[1])
        f.close()
    except:
        return None
    return return_list

def check_disk():
    try:
        return_dict = {}
        p_list = get_disk_partition()
        for i in p_list:
            dt = os.statvfs(i)
            use = (dt.f_blocks - dt.f_bfree) * dt.f_frsize
            all = dt.f_blocks * dt.f_frsize
            return_dict[i] = ('%.2f' % (usage_percent(use, all),), ('%.2f' % (all * 1.0 / (1024 * 1000000))))
    except:
        return None
    return return_dict

_CLOCK_TICKS = os.sysconf("SC_CLK_TCK")

def get_cpu_time():
    need_sleep = False
    if not os.path.isfile('/tmp/cpu_stat') or os.path.getsize('/tmp/cpu_stat') == 0:
        copyfile('/proc/stat', '/tmp/cpu_stat')
        need_sleep = True

    try:
        f1 = open('/tmp/cpu_stat', 'r')
        values1 = f1.readline().split()
        total_time1 = 0
        for i in values1[1:]:
            total_time1 += int(i)
        idle_time1 = int(values1[4])
        iowait_time1 = int(values1[5])
    finally:
        f1.close()

    if need_sleep:
        time.sleep(1)

    f2 = open('/proc/stat', 'r')
    try:
        values2 = f2.readline().split()
        total_time2 = 0
        for i in values2[1:]:
            total_time2 += int(i)
        idle_time2 = int(values2[4])
        iowait_time2 = int(values2[5])
    finally:
        f2.close()
    idle_time = idle_time2 - idle_time1
    iowait_time = iowait_time2 - iowait_time1
    total_time = total_time2 - total_time1

    cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)
    # compensate logic
    if total_time < 0 or idle_time < 0 or iowait_time < 0 or cpu_percentage < 0 or cpu_percentage > 100:
        time.sleep(1)
        f3 = open('/proc/stat', 'r')
        try:
            values3 = f3.readline().split()
            total_time3 = 0
            for i in values3[1:]:
                total_time3 += int(i)
            idle_time3 = int(values3[4])
            iowait_time3 = int(values3[5])
        finally:
            f3.close()
        idle_time = idle_time3 - idle_time2
        iowait_time = iowait_time3 - iowait_time2
        total_time = total_time3 - total_time2
        cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time)

    copyfile('/proc/stat', '/tmp/cpu_stat')
    return cpu_percentage

def network_io_kbitps():
    """Return network I/O statistics for every network interface
    installed on the system as a dict of raw tuples.
    """
    f1 = open("/proc/net/dev", "r")
    try:
        lines1 = f1.readlines()
    finally:
        f1.close()

    retdict1 = {}
    for line1 in lines1[2:]:
        colon1 = line1.find(':')
        assert colon1 > 0, line1
        name1 = line1[:colon1].strip()
        fields1 = line1[colon1 + 1:].strip().split()
        bytes_recv1 = float('%.4f' % (float(fields1[0]) * 0.0078125))
        bytes_sent1 = float('%.4f' % (float(fields1[8]) * 0.0078125))
        retdict1[name1] = (bytes_recv1, bytes_sent1)
    time.sleep(1)
    f2 = open("/proc/net/dev", "r")
    try:
        lines2 = f2.readlines()
    finally:
        f2.close()
    retdict2 = {}
    for line2 in lines2[2:]:
        colon2 = line2.find(':')
        assert colon2 > 0, line2
        name2 = line2[:colon2].strip()
        fields2 = line2[colon2 + 1:].strip().split()
        bytes_recv2 = float('%.4f' % (float(fields2[0]) * 0.0078125))
        bytes_sent2 = float('%.4f' % (float(fields2[8]) * 0.0078125))
        retdict2[name2] = (bytes_recv2, bytes_sent2)
    retdict = merge_with(retdict2, retdict1)
    return retdict

def disk_io_Kbps():
    iostat = Popen("iostat -d -k 1 2 | sed '/Device\|Linux\|^$/d' > /tmp/disk_io", shell=True, stdout=PIPE, stderr=PIPE)
    iostat_error = iostat.communicate()[1].strip()
    if iostat_error:
        logger.error("iostat not exists, %s" % iostat_error)
        return None

    retdict = {}
    exception = None 
    try:
        try:
            f = open('/tmp/disk_io', 'r')
        except Exception, ex:
            exception = ex
            logger.error(exception)
        if exception:
            return None
        lines = f.readlines()
        for line in lines:
            name, _, readkps, writekps, _, _, = line.split()
            if name:
                readkps = float(readkps)
                writekps = float(writekps)
                retdict[name] = (readkps, writekps)
        return retdict
    finally:
        f.close()

def merge_with(d1, d2, fn=lambda x, y: tuple(map(operator.sub, x, y))):
    res = d1.copy() # "= dict(d1)" for lists of tuples
    for key, val in d2.iteritems(): # ".. in d2" for lists of tuples
        try:
            res[key] = fn(res[key], val)
        except KeyError:
            res[key] = val
    return res

def get_load():
    try:
        f = open('/proc/loadavg', 'r')
        tmp = f.readline().split()
        lavg_1 = float(tmp[0])
        lavg_5 = float(tmp[1])
        lavg_15 = float(tmp[2])
        f.close()
    except:
        return None
    return lavg_1, lavg_5, lavg_15

def get_tcp_status():
    check_cmd = "command -v ss"
    check_proc = Popen(check_cmd, shell=True, stdout=PIPE)
    ss = check_proc.communicate()[0].rstrip('\n')
    if ss:
        cmd = "ss -ant | awk '{if(NR != 1) print $1}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | sed 's/-/_/g' | sed 's/ESTAB=/ESTABLISHED=/g' | sed 's/FIN_WAIT_/FIN_WAIT/g'"
    else:
        cmd = "netstat -anp | grep tcp | awk '{print $6}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | tail -n 1"
    tcp_proc = Popen(cmd, shell=True, stdout=PIPE)
    tcp_status = tcp_proc.communicate()[0].rstrip('\n')
    return tcp_status

def get_proc_number():
    cmd = "ps axu | wc -l | tail -n 1"
    proc_func = Popen(cmd, shell=True, stdout=PIPE)
    proc_number = proc_func.communicate()[0].rstrip('\n')
    return proc_number

def all_index():
    return (
        int(time.time() * 1000),
        get_cpu_time(),
        get_mem_usage_percent(),
        check_disk(),
        disk_io_Kbps(),
        network_io_kbitps(),
        get_load(),
        get_tcp_status(),
        get_proc_number()
    )

def collector():
    timestamp, cpu, mem, disk, disk_io, net, load, tcp_status, process_number = all_index()
    disk_utilization = ''
    disk_io_read = ''
    disk_io_write = ''
    internet_networkrx = ''
    internet_networktx = ''
    tcp_status_count = ''
    period_1 = ''
    period_5 = ''
    period_15 = ''

    if UUID:
        cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID

        memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID

        if load:
            period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=1min\n' % UUID
            period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=5min\n' % UUID
            period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=15min\n' % UUID

        if disk:
            for name, value in disk.items():
                disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s mountpoint=%s\n' % (UUID, name)

        if disk_io:
            for name, value in disk_io.items():
                disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)
                disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name)

        for name, value in net.items():
            internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)
            internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name)

        if tcp_status:
            status_count = tcp_status.split()
            for element in status_count:
                key_value = element.split('=')
                tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count instanceId=%s state=%s\n' % (UUID, key_value[0])

        process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count instanceId=%s\n' % UUID
    else:
        cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent\n'

        memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent\n'

        if load:
            period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count period=1min\n'
            period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count period=5min\n'
            period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count period=15min\n'

        if disk:
            for name, value in disk.items():
                disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent mountpoint=%s\n' % name

        if disk_io:
            for name, value in disk_io.items():
                disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name
                disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name

        for name, value in net.items():
            internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name
            internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name

        if tcp_status:
            status_count = tcp_status.split()
            for element in status_count:
                key_value = element.split('=')
                tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count state=%s\n' % key_value[0]

        process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count\n'

    data_post = cpu_utilization + memory_utilization + period_1 + period_5 + period_15 + disk_utilization + disk_io_read + disk_io_write + internet_networkrx + internet_networktx + tcp_status_count + process_count
    print data_post
    interval = random.randint(0, 5000)
    time.sleep(interval / 1000.0)

    headers = {"Content-Type": "text/plain", "Accept": "text/plain"}
    exception = None
    http_client = None
    try:
        try:
            http_client = httplib.HTTPConnection(REMOTE_HOST, REMOTE_PORT)
            http_client.request(method="POST", url=REMOTE_MONITOR_URI, body=data_post, headers=headers)
            response = http_client.getresponse()
            if response.status == 200:
                return
            else:
                logger.warn("response code %d" % response.status)
                logger.warn("response code %s" % response.read())
        except Exception, ex:
            exception = ex
    finally:
        if http_client:
            http_client.close()
        if exception:
            logger.error(exception)

if __name__ == '__main__':
    REMOTE_HOST = 'open.cms.aliyun.com'
    REMOTE_PORT = 80

    # get report address
    if not os.path.isfile("../cmscfg"):
        pass
    else:
        props = {}
        prop_file = file("../cmscfg", 'r')
        for line in prop_file.readlines():
            kv = line.split('=')
            props[kv[0].strip()] = kv[1].strip()
        prop_file.close()
        if props.get('report_domain'):
            REMOTE_HOST = props.get('report_domain')
        if props.get('report_port'):
            REMOTE_PORT = props.get('report_port')

    # get uuid
    if not os.path.isfile("../aegis_quartz/conf/uuid"):
        pass
    else:
        uuid_file = file("../aegis_quartz/conf/uuid", 'r')
        UUID = uuid_file.readline()
        UUID = UUID.lower()

    REMOTE_MONITOR_URI = "/metrics/putLines"
    MONITOR_DATA_FILE_DIR = "/tmp"
    LOG_FILE = "/tmp/" + "vm.log"
    LOG_LEVEL = logging.INFO
    LOG_FILE_MAX_BYTES = 1024 * 1024
    LOG_FILE_MAX_COUNT = 3
    logger = logging.getLogger('sampler')
    logger.setLevel(LOG_LEVEL)
    handler = RotatingFileHandler(filename=LOG_FILE, mode='a', maxBytes=LOG_FILE_MAX_BYTES,
                                  backupCount=LOG_FILE_MAX_COUNT)
    formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    socket.setdefaulttimeout(10)

    try:
        collector()
    except Exception, e:
        logger.error(e)
        sys.exit(1)
分享到:
评论

相关推荐

    阿里云服务器ECS介绍.pptx

    云服务器ECS介绍 阿里云服务器ECS介绍全文共12页,当前为第1页。 打造公共、开放的以数据为中心的云计算服务平台,借助技术创新,不断提升计算能力与规模效益,将云计算变成真正意义上的5A的公共服务 阿里云打造数据...

    阿里云负载均衡服务器集群架构图:云盾、云监控、SLB、ECS集群、主从数据库.zip

    阿里云均衡负载服务器集群架构图:云盾、云监控、SLB、ECS集群、主从数据库.zip 1.购买均衡负载 2.购买ECS云主机 3.管理后端服务器

    Python3编程实现获取阿里云ECS实例及监控的方法

    本文实例讲述了Python3编程实现获取阿里云ECS实例及监控的方法。分享给大家供大家参考,具体如下: #!/usr/bin/env python3.5 # -*- coding:utf8 -*- try: import httplib except ImportError: import ...

    关于阿里云监控(部署)

    阿里云服务器部署,图文档

    阿里云服务器用户管理

    云服务器是阿里云计算在基础设施应用上的重要组成部分,也是阿里云计算公司最基础的产品之一。通过云服务器控制面板您可以看到您的云服务器的配置信息,而且可以对您的云服务器执行重启、关闭、启动、重置、更改密码...

    阿里云服务器的远程机器人监控系统.pdf

    阿里云服务器的远程机器人监控系统.pdf

    利用MQTT协议+阿里云平台实现视频监控系统(Qt上位机源码).zip

    在阿里云服务器上创建2个设备,分为为设备A和设备B;设备A负责采集本地摄像头画面上传,设备B负责接收设备A上传的数据然后解析显示出来。在阿里云服务器上需要配置云产品流转,让设备A的数据上传后自动发送给设备B...

    阿里云原生实践15讲-178页.pdf

    坚持探索与落地并重,阿里云原生之路全景揭秘 如何快速恢复大规模容器故障 阿里利用K8S、Kata容器和裸金属服务器构建无服务器平台 CafeDeployment:为互联网金融关键任务场景扩展的Kubernetes资源 Serverless市场...

    基于阿里云MQTT物联网平台视频监控

    在阿里云服务器上创建2个设备,分为为设备A和设备B;设备A负责采集本地摄像头画面上传,设备B负责接收设备A上传的数据然后解析显示出来。在阿里云服务器上需要配置云产品流转,让设备A的数据上传后自动发送给设备B。...

    树莓派控制温湿度传感器,人体传感器,摄像头实现远程监控,并且数据备份云端服务器.zip

    ###功能 ####树莓派采集温湿度传感器,并且将数据上传到阿里云服务器 ####人体传感器监测有人到来时,控制USB摄像头拍照,是否有人经过也会上传到云端服务器 ####树莓派cpu温度也会监测并且上传云端服务器 ####多...

    app-利用MQTT协议+阿里云平台实现视频监控系统.zip

    在阿里云服务器上创建2个设备,分为为设备A和设备B;设备A负责采集本地摄像头画面上传,设备B负责接收设备A上传的数据然后解析显示出来。在阿里云服务器上需要配置云产品流转,让设备A的数据上传后自动发送给设备B...

    阿里云OSS附件通 商业白金版 PHP≥5.3

    阿里云OSS附件通允许将门户,论坛附件,图片,相册等上传到...7. 支持阿里云OSS的流量监控,防止超流量而使得产生费用。 8. 支持设置超过特定大小的附件才能被上传到阿里云OSS 9. 支持门户,论坛,相册的所有附件操作。

    国内各家云服务器对比.xlsx

    各云产品对比,,,,,,,, 产品,WO云,天翼云,腾讯云,阿里云,盛大云,新浪云,百度云, IAAS,云主机,云主机,云服务器,云服务器ECS,云主机,,, ,负载均衡,负载均衡,负载均衡,负载均衡,负载均衡,,, ,存储类,对象存储,云存储,...

    零基础教你配置阿里云服务器搭建网站

    后来换成云服务器,开始是自己手动部署环境和上传网站程序,使用过程中总是会出现一些小问题,然后就是不停的在网上找答案,解决这些问题。 后来继续做新站的时候,就懒的手动一步一步去搭建了。于是用了服务器管理...

    基于阿里云的Nodejs稳定性实践.pdf

    本次分享主要介绍下基于阿里云如何进行 Nodejs 应用的稳定性保障。 课程大纲 现状 应用开发 流程卡口 性能测试 运维监控 专家介绍 冬萌(花名) 阿里云高级开发工程师 兴趣领域:性能优化、稳定性、开发者服务 ...

    树莓派控制温湿度传感器,人体传感器,摄像头实现远程监控,并且数据备份云端服务器

    ####*树莓派采集温湿度传感器,并且将数据上传到阿里云服务器* ####*人体传感器监测有人到来时,控制USB摄像头拍照,是否有人经过也会上传到云端服务器* ####*树莓派cpu温度也会监测并且上传云端服务器* ####*多线程...

    服务器选择方案对比.docx

    阿里云云服务器 传统服务器 机房网络 绿色节能 自主研发的直流电服务器,绿色机房设计,PUE低 传统交流电服务器设计,PUE高 独享带宽 骨干机房,出口带宽大,BGP独享带宽 机房质量参差不齐,用户选择困难,以共享...

    esp32-cam远程监控拍照上传巴法云.rar

    应用esp32-cam 实现远程监控,抓拍图片上传到云平台,同时推送到微信详细实验步骤+完整代码代码。可以用于 远程监控,智能仓库,人脸识别等多种用途。 本资料可与“基于Arduino IDE的ESP32-CAM视频流人脸识别”配合...

    基于阿里云服务器+libevent+qt+mysql等实现仿qq聊天软件.zip

    mysql:存储MySQL自身的系统信息,如用户权限、服务器配置、事件调度等。 performance_schema:自MySQL 5.5版本引入,用于收集服务器性能数据,帮助诊断和优化系统性能。 test(非必要):默认提供的测试数据库,...

    某公司自建服务器租用服务器云服务器选用分析.docx

    3 服务器租用 此情况不作特别分析,例万网之前就是做服务器租用的,已被阿里收购,所 有服务器资源已被作为阿里云基建。 4.云服务 优点: 云服务器租用价格低于传统的物理服务器租用,且无需支付押金。 价格的低廉...

Global site tag (gtag.js) - Google Analytics