服务器cpu使用率不高,load比较高,所以要查看一下IO。硬盘IO可以通过命令vmstat或iostat获得(也可以用yum 安装dstat获得),网络IO可以用iftop命令获取。但是不知道那个进程使用硬盘IO比较高,通过查找没有找到相关命令,只好自己写个脚本进行统计处理。
本脚本在CentOS6下(kernel2.6以上)python2.6测试通过。
直接运行脚本,默认情况下收集3秒钟数据,显示读写最高的前三个进程。如用参数可以使用命令“python fhip.py 4 5 3”,第一个数位每次收集读写数据的间隔秒数,第二个数是打印出读写最多的n个进程,第三个为运行脚本的次数。因为参数部分写的比较简单那,所以用参数必须3个全写。
本脚本在CentOS6下(kernel2.6以上)python2.6测试通过。
直接运行脚本,默认情况下收集3秒钟数据,显示读写最高的前三个进程。如用参数可以使用命令“python fhip.py 4 5 3”,第一个数位每次收集读写数据的间隔秒数,第二个数是打印出读写最多的n个进程,第三个为运行脚本的次数。因为参数部分写的比较简单那,所以用参数必须3个全写。
#!/bin/python
#-*- coding:utf-8 -*-
# Filename: ind_high_io_process
# Revision: 1.0
# Date: 2013-3-8
# Author: simonzhang
# web: www.simonzhang.net
# Email: simon-zzm@163.com
### END INIT INFO
import os
import re
import sys
import time
from string import strip
####
sys_proc_path = '/proc/'
re_find_process_number = '^\d+$'
####
# 通过/proc/$pid/io获取读写信息
####
def collect_info():
_tmp = {}
re_find_process_dir = re.compile(re_find_process_number)
for i in os.listdir(sys_proc_path):
if re_find_process_dir.search(i):
# 获得进程名
process_name = open("%s%s/stat" % (sys_proc_path, i), "rb").read().split(" ")[1]
# 读取io信息
rw_io = open("%s%s/io" % (sys_proc_path, i), "rb").readlines()
for _info in rw_io:
cut_info = strip(_info).split(':')
if strip(cut_info[0]) == "read_bytes":
read_io = int(strip(cut_info[1]))
if strip(cut_info[0]) == "write_bytes":
write_io = int(strip(cut_info[1]))
_tmp[i] = {"name":process_name, "read_bytes":read_io, "write_bytes":write_io}
return _tmp
def main(_sleep_time, _list_num):
_sort_read_dict = {}
_sort_write_dict = {}
# 获取系统读写数据
process_info_list_frist = collect_info()
time.sleep(_sleep_time)
process_info_list_second = collect_info()
# 将读数据和写数据进行分组,写入两个字典中
for loop in process_info_list_second.keys():
second_read_v = process_info_list_second[loop]["read_bytes"]
second_write_v = process_info_list_second[loop]["write_bytes"]
try:
frist_read_v = process_info_list_frist[loop]["read_bytes"]
except:
frist_read_v = 0
try:
frist_write_v = process_info_list_frist[loop]["write_bytes"]
except:
frist_write_v = 0
# 计算第二次获得数据域第一次获得数据的差
_sort_read_dict[loop] = second_read_v - frist_read_v
_sort_write_dict[loop] = second_write_v - frist_write_v
# 将读写数据进行排序
sort_read_dict = sorted(_sort_read_dict.items(),key=lambda _sort_read_dict:_sort_read_dict[1],reverse=True)
sort_write_dict = sorted(_sort_write_dict.items(),key=lambda _sort_write_dict:_sort_write_dict[1],reverse=True)
# 打印统计结果
print "pid process read(bytes) pid process write(btyes)"
for _num in range(_list_num):
read_pid = sort_read_dict[_num][0]
write_pid = sort_write_dict[_num][0]
res = "%s" % read_pid
res += " " * (8 - len(read_pid)) + process_info_list_second[read_pid]["name"]
res += " " * (12 - len(process_info_list_second[read_pid]["name"])) + "%s" % sort_read_dict[_num][1]
res += " " * (12 - len("%s" % sort_read_dict[_num][1])) + write_pid
res += " " * (8 - len(write_pid)) + process_info_list_second[write_pid]["name"]
res += " " * (12 - len("%s" % process_info_list_second[write_pid]["name"])) + "%s" % sort_write_dict[_num][1]
print res
print "\n" * 1
if __name__ == '__main__':
try:
_sleep_time = sys.argv[1]
except:
_sleep_time = 3
try:
_num = sys.argv[2]
except:
_num = 3
try:
loop = sys.argv[3]
except:
loop = 1
for i in range(int(loop)):
main(int(_sleep_time), int(_num))
#-*- coding:utf-8 -*-
# Filename: ind_high_io_process
# Revision: 1.0
# Date: 2013-3-8
# Author: simonzhang
# web: www.simonzhang.net
# Email: simon-zzm@163.com
### END INIT INFO
import os
import re
import sys
import time
from string import strip
####
sys_proc_path = '/proc/'
re_find_process_number = '^\d+$'
####
# 通过/proc/$pid/io获取读写信息
####
def collect_info():
_tmp = {}
re_find_process_dir = re.compile(re_find_process_number)
for i in os.listdir(sys_proc_path):
if re_find_process_dir.search(i):
# 获得进程名
process_name = open("%s%s/stat" % (sys_proc_path, i), "rb").read().split(" ")[1]
# 读取io信息
rw_io = open("%s%s/io" % (sys_proc_path, i), "rb").readlines()
for _info in rw_io:
cut_info = strip(_info).split(':')
if strip(cut_info[0]) == "read_bytes":
read_io = int(strip(cut_info[1]))
if strip(cut_info[0]) == "write_bytes":
write_io = int(strip(cut_info[1]))
_tmp[i] = {"name":process_name, "read_bytes":read_io, "write_bytes":write_io}
return _tmp
def main(_sleep_time, _list_num):
_sort_read_dict = {}
_sort_write_dict = {}
# 获取系统读写数据
process_info_list_frist = collect_info()
time.sleep(_sleep_time)
process_info_list_second = collect_info()
# 将读数据和写数据进行分组,写入两个字典中
for loop in process_info_list_second.keys():
second_read_v = process_info_list_second[loop]["read_bytes"]
second_write_v = process_info_list_second[loop]["write_bytes"]
try:
frist_read_v = process_info_list_frist[loop]["read_bytes"]
except:
frist_read_v = 0
try:
frist_write_v = process_info_list_frist[loop]["write_bytes"]
except:
frist_write_v = 0
# 计算第二次获得数据域第一次获得数据的差
_sort_read_dict[loop] = second_read_v - frist_read_v
_sort_write_dict[loop] = second_write_v - frist_write_v
# 将读写数据进行排序
sort_read_dict = sorted(_sort_read_dict.items(),key=lambda _sort_read_dict:_sort_read_dict[1],reverse=True)
sort_write_dict = sorted(_sort_write_dict.items(),key=lambda _sort_write_dict:_sort_write_dict[1],reverse=True)
# 打印统计结果
print "pid process read(bytes) pid process write(btyes)"
for _num in range(_list_num):
read_pid = sort_read_dict[_num][0]
write_pid = sort_write_dict[_num][0]
res = "%s" % read_pid
res += " " * (8 - len(read_pid)) + process_info_list_second[read_pid]["name"]
res += " " * (12 - len(process_info_list_second[read_pid]["name"])) + "%s" % sort_read_dict[_num][1]
res += " " * (12 - len("%s" % sort_read_dict[_num][1])) + write_pid
res += " " * (8 - len(write_pid)) + process_info_list_second[write_pid]["name"]
res += " " * (12 - len("%s" % process_info_list_second[write_pid]["name"])) + "%s" % sort_write_dict[_num][1]
print res
print "\n" * 1
if __name__ == '__main__':
try:
_sleep_time = sys.argv[1]
except:
_sleep_time = 3
try:
_num = sys.argv[2]
except:
_num = 3
try:
loop = sys.argv[3]
except:
loop = 1
for i in range(int(loop)):
main(int(_sleep_time), int(_num))
pid process read(bytes) pid process write(btyes)
57192 (mysqld) 8192 28870 (vsftpd) 397312
875 (udevd) 0 57192 (mysqld) 229376
16534 (php-fpm) 0 817 (kjournald) 163840
2960 (kondemand/7)0 2883 (syslogd) 20480
452 (usb-storage)0 43084 (nginx) 12288
16762 (php-fpm) 0 18833 (crond) 4096
2363 (iscsi_eh) 0 875 (udevd) 0
6811 (php-fpm) 0 16534 (php-fpm) 0
57192 (mysqld) 8192 28870 (vsftpd) 397312
875 (udevd) 0 57192 (mysqld) 229376
16534 (php-fpm) 0 817 (kjournald) 163840
2960 (kondemand/7)0 2883 (syslogd) 20480
452 (usb-storage)0 43084 (nginx) 12288
16762 (php-fpm) 0 18833 (crond) 4096
2363 (iscsi_eh) 0 875 (udevd) 0
6811 (php-fpm) 0 16534 (php-fpm) 0
你想随时知道你的服务器的运行状况吗? 比如CPU,内存,IO,网络的使用情况,该监听的端口是否正在监听,是否有进程占用了太多内存等等。 你是否想到了SNMP,Cacti,Nagios。
有个更简单的办法就是写一个shell脚本来收集系统运行的信息,然后用crontab定时去执行,然后用nginx把收集的系统信息暴露出来,这样你就可以随时用手机查看你的服务器运行信息了。
这里有一个shell脚本,可以收集系统的常规的运行信息。
https://github.com/onlytiancai/codesnip/blob/master/shell/sysinfo.sh
运行它,它会输出类似下面的信息,你可以修改脚本添加你关心的信息,或者删除你看不懂或不关心的信息。
https://github.com/onlytiancai/codesnip/blob/master/shell/sysinfo.txt
假设我们要把该脚本下载到/opt/scripts目录下,执行如下命令
运行crontab -e,增加如下定时任务,每5分钟收集系统信息并保存在/var/www/sysinfo目录下
*/5 * * * * /bin/sh /opt/scripts/sysinfo.sh >/var/www/sysinfo/sysinfo.txt 2>&1
修改nginx配置,把/var/www/sysinfo目录开放成可以通过web访问。
这样,你就可以在你的手机上输入 http://yourdomain.com/sysinfo/sysinfo.txt 来随时查看你的服务器状态了。
提示:
1:如果你觉得系统信息里有敏感信息,比如ps命令的输出,可以修改脚本把敏感信息去掉。
2:为了防止别人知道你的系统信息url,你可以在nginx配置的location节设置一个比较长的随机的地址,然后在手机浏览器的收藏夹里把该网址收藏一下。
有个更简单的办法就是写一个shell脚本来收集系统运行的信息,然后用crontab定时去执行,然后用nginx把收集的系统信息暴露出来,这样你就可以随时用手机查看你的服务器运行信息了。
这里有一个shell脚本,可以收集系统的常规的运行信息。
https://github.com/onlytiancai/codesnip/blob/master/shell/sysinfo.sh
#!/bin/sh
#requires the following
# free, hostname, grep, cut, awk, uname, sar, ps, netstat
. /root/.profile
HOSTNAME=`hostname -s`
#memory
MEMORY=`free | grep Mem | awk '{print $2}'`
#cpu info
CPUS=`cat /proc/cpuinfo | grep processor | wc -l | awk '{print $1}'`
CPU_MHZ=`cat /proc/cpuinfo | grep MHz | tail -n1 | awk '{print $4}'`
CPU_TYPE=`cat /proc/cpuinfo | grep vendor_id | tail -n 1 | awk '{print $3}'`
CPU_TYPE2=`uname -m`
OS_NAME=`uname -s`
OS_KERNEL=`uname -r`
UPTIME=`uptime`
PROC_COUNT=`ps -ef | wc -l`
body() {
IFS= read -r header
printf '%s\n' "$header"
"$@"
}
#print it out
echo "概要信息" `date +'%Y-%m-%d %H:%S'`
echo "----------------------------------"
echo "主机名 : $HOSTNAME"
echo "内存大小 : $MEMORY"
echo "CPU核数 : $CPUS"
echo "CPU类型 : $CPU_TYPE $CPU_TYPE2 $CPU_MHZ MHz"
echo "操作系统 : $OS_NAME"
echo "内核版本 : $OS_KERNEL"
echo "进程总数 : $PROC_COUNT"
echo "启动时间及负载 : $UPTIME"
echo
echo "内存使用情况"
echo "----------------------------------"
free -m
echo
echo "磁盘使用情况"
echo "----------------------------------"
df -h
echo
echo "网络连接情况"
echo "----------------------------------"
#过滤了127.0.0.1
netstat -n |grep -v '127.0.0.1'| awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}'
echo
echo "网络监听情况"
echo "----------------------------------"
netstat -tnpl | awk 'NR>2 {printf "%-20s %-15s \n",$4,$7}'
echo
echo "内存占用Top 10"
echo "----------------------------------"
ps -eo rss,pmem,pcpu,vsize,args |body sort -k 1 -r -n | head -n 10
echo
echo "CPU占用Top 10"
echo "----------------------------------"
ps -eo rss,pmem,pcpu,vsize,args |body sort -k 3 -r -n | head -n 10
echo
echo "最近1小时网络流量统计"
echo "----------------------------------"
sar -n DEV -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时cpu使用统计"
echo "----------------------------------"
sar -u -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时磁盘IO统计"
echo "----------------------------------"
sar -b -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时进程队列和平均负载统计"
echo "----------------------------------"
sar -q -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时内存和交换空间的统计统计"
echo "----------------------------------"
sar -r -s `date -d "1 hour ago" +%H:%M:%S`
echo
# 参考链接:
# http://www.dslreports.com/forum/remark,2069987
# http://www.ctohome.com/FuWuQi/1b/688.html
#requires the following
# free, hostname, grep, cut, awk, uname, sar, ps, netstat
. /root/.profile
HOSTNAME=`hostname -s`
#memory
MEMORY=`free | grep Mem | awk '{print $2}'`
#cpu info
CPUS=`cat /proc/cpuinfo | grep processor | wc -l | awk '{print $1}'`
CPU_MHZ=`cat /proc/cpuinfo | grep MHz | tail -n1 | awk '{print $4}'`
CPU_TYPE=`cat /proc/cpuinfo | grep vendor_id | tail -n 1 | awk '{print $3}'`
CPU_TYPE2=`uname -m`
OS_NAME=`uname -s`
OS_KERNEL=`uname -r`
UPTIME=`uptime`
PROC_COUNT=`ps -ef | wc -l`
body() {
IFS= read -r header
printf '%s\n' "$header"
"$@"
}
#print it out
echo "概要信息" `date +'%Y-%m-%d %H:%S'`
echo "----------------------------------"
echo "主机名 : $HOSTNAME"
echo "内存大小 : $MEMORY"
echo "CPU核数 : $CPUS"
echo "CPU类型 : $CPU_TYPE $CPU_TYPE2 $CPU_MHZ MHz"
echo "操作系统 : $OS_NAME"
echo "内核版本 : $OS_KERNEL"
echo "进程总数 : $PROC_COUNT"
echo "启动时间及负载 : $UPTIME"
echo
echo "内存使用情况"
echo "----------------------------------"
free -m
echo
echo "磁盘使用情况"
echo "----------------------------------"
df -h
echo
echo "网络连接情况"
echo "----------------------------------"
#过滤了127.0.0.1
netstat -n |grep -v '127.0.0.1'| awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}'
echo
echo "网络监听情况"
echo "----------------------------------"
netstat -tnpl | awk 'NR>2 {printf "%-20s %-15s \n",$4,$7}'
echo
echo "内存占用Top 10"
echo "----------------------------------"
ps -eo rss,pmem,pcpu,vsize,args |body sort -k 1 -r -n | head -n 10
echo
echo "CPU占用Top 10"
echo "----------------------------------"
ps -eo rss,pmem,pcpu,vsize,args |body sort -k 3 -r -n | head -n 10
echo
echo "最近1小时网络流量统计"
echo "----------------------------------"
sar -n DEV -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时cpu使用统计"
echo "----------------------------------"
sar -u -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时磁盘IO统计"
echo "----------------------------------"
sar -b -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时进程队列和平均负载统计"
echo "----------------------------------"
sar -q -s `date -d "1 hour ago" +%H:%M:%S`
echo
echo "最近1小时内存和交换空间的统计统计"
echo "----------------------------------"
sar -r -s `date -d "1 hour ago" +%H:%M:%S`
echo
# 参考链接:
# http://www.dslreports.com/forum/remark,2069987
# http://www.ctohome.com/FuWuQi/1b/688.html
运行它,它会输出类似下面的信息,你可以修改脚本添加你关心的信息,或者删除你看不懂或不关心的信息。
https://github.com/onlytiancai/codesnip/blob/master/shell/sysinfo.txt
概要信息 2013-10-30 13:58
----------------------------------
主机名 : testing
内存大小 : 1026080
CPU核数 : 1
CPU类型 : GenuineIntel x86_64 2659.983 MHz
操作系统 : Linux
内核版本 : 2.6.18-308.13.1.el5
进程总数 : 106
启动时间及负载 : 13:26:58 up 4 days, 18:59, 2 users, load average: 0.00, 0.00, 0.00
内存使用情况
----------------------------------
total used free shared buffers cached
Mem: 1002 921 80 0 242 372
-/+ buffers/cache: 306 695
Swap: 2015 0 2015
磁盘使用情况
----------------------------------
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/VolGroup00-LogVol00
18G 12G 5.0G 71% /
/dev/sda1 99M 26M 68M 28% /boot
tmpfs 502M 0 502M 0% /dev/shm
网络连接情况
----------------------------------
ESTABLISHED 2
网络监听情况
----------------------------------
0.0.0.0:8803 26946/python
0.0.0.0:3306 26926/mysqld
0.0.0.0:8812 26777/python2.7
0.0.0.0:621 2559/rpc.statd
0.0.0.0:111 2518/portmap
0.0.0.0:80 2958/nginx
0.0.0.0:22 2862/sshd
127.0.0.1:631 2876/cupsd
0.0.0.0:8888 26782/python
127.0.0.1:25 2917/sendmail
0.0.0.0:7003 26781/python
0.0.0.0:443 2958/nginx
:::22 2862/sshd
内存占用Top 10
----------------------------------
RSS %MEM %CPU VSZ COMMAND
32788 3.1 0.0 38932 iscsiuio
25540 2.4 0.0 211040 python run.py
24984 2.4 0.0 190036 /usr/libexec/mysqld --basedir=/usr --datadir=/var/lib/mysql --user=mysql --pid-file=/var/run/mysqld/mysqld.pid --skip-external-locking --socket=/var/lib/mysql/mysql.sock
17164 1.6 0.0 258384 /usr/bin/python -tt /usr/sbin/yum-updatesd
13308 1.2 0.0 179208 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:wsgiapp -b 0.0.0.0:7003 -k gevent
13116 1.2 0.0 180808 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
13112 1.2 0.0 180820 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
13104 1.2 0.0 180800 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
13104 1.2 0.0 180792 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
CPU占用Top 10
----------------------------------
RSS %MEM %CPU VSZ COMMAND
924 0.0 0.0 12752 /sbin/udevd -d
908 0.0 0.0 21660 xinetd -stayalive -pidfile /var/run/xinetd.pid
868 0.0 0.0 63524 ps -eo rss,pmem,pcpu,vsize,args
860 0.0 0.0 12340 hald-addon-keyboard: listening on /dev/input/event0
856 0.0 0.0 12340 hald-addon-acpi: listening on acpid socket /var/run/acpid.socket
800 0.0 0.0 10176 rpc.statd
792 0.0 0.0 57252 rpc.idmapd
792 0.0 0.0 10448 /usr/sbin/hcid
最近1小时网络流量统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM IFACE rxpck/s txpck/s rxbyt/s txbyt/s rxcmp/s txcmp/s rxmcst/s
12:40:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:40:01 PM eth0 1.22 0.25 81.84 12.91 0.00 0.00 0.00
12:40:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:50:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:50:01 PM eth0 1.57 0.24 103.54 12.40 0.00 0.00 0.00
12:50:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:00:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:00:01 PM eth0 1.81 0.25 117.32 12.91 0.00 0.00 0.00
01:00:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:10:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:10:01 PM eth0 1.81 0.25 117.91 13.03 0.00 0.00 0.00
01:10:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:20:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:20:01 PM eth0 1.87 0.71 157.33 88.51 0.00 0.00 0.00
01:20:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
Average: lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
Average: eth0 1.65 0.34 115.58 27.95 0.00 0.00 0.00
Average: sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
最近1小时cpu使用统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM CPU %user %nice %system %iowait %steal %idle
12:40:01 PM all 0.05 0.00 0.19 0.02 0.00 99.74
12:50:01 PM all 0.05 0.00 0.19 0.04 0.00 99.72
01:00:01 PM all 0.05 0.00 0.18 0.02 0.00 99.75
01:10:01 PM all 0.04 0.00 0.18 0.06 0.00 99.72
01:20:01 PM all 0.35 0.00 0.25 0.04 0.00 99.36
Average: all 0.11 0.00 0.20 0.04 0.00 99.66
最近1小时磁盘IO统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM tps rtps wtps bread/s bwrtn/s
12:40:01 PM 4.94 0.00 4.94 0.00 65.77
12:50:01 PM 4.94 0.00 4.94 0.00 65.04
01:00:01 PM 5.01 0.00 5.01 0.00 66.63
01:10:01 PM 5.02 0.00 5.02 0.00 66.41
01:20:01 PM 7.61 0.00 7.61 0.00 104.93
Average: 5.50 0.00 5.50 0.00 73.75
最近1小时进程队列和平均负载统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM runq-sz plist-sz ldavg-1 ldavg-5 ldavg-15
12:40:01 PM 4 130 0.00 0.00 0.00
12:50:01 PM 2 130 0.00 0.00 0.00
01:00:01 PM 2 131 0.00 0.00 0.00
01:10:01 PM 6 132 0.00 0.00 0.00
01:20:01 PM 4 133 0.00 0.00 0.00
Average: 4 131 0.00 0.00 0.00
最近1小时内存和交换空间的统计统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM kbmemfree kbmemused %memused kbbuffers kbcached kbswpfree kbswpused %swpused kbswpcad
12:40:01 PM 88264 937816 91.40 247568 381400 2064376 0 0.00 0
12:50:01 PM 88016 938064 91.42 247632 381420 2064376 0 0.00 0
01:00:01 PM 87760 938320 91.45 247688 381444 2064376 0 0.00 0
01:10:01 PM 87380 938700 91.48 247772 381468 2064376 0 0.00 0
01:20:01 PM 87620 938460 91.46 247812 381488 2064376 0 0.00 0
Average: 87808 938272 91.44 247694 381444 2064376 0 0.00 0
----------------------------------
主机名 : testing
内存大小 : 1026080
CPU核数 : 1
CPU类型 : GenuineIntel x86_64 2659.983 MHz
操作系统 : Linux
内核版本 : 2.6.18-308.13.1.el5
进程总数 : 106
启动时间及负载 : 13:26:58 up 4 days, 18:59, 2 users, load average: 0.00, 0.00, 0.00
内存使用情况
----------------------------------
total used free shared buffers cached
Mem: 1002 921 80 0 242 372
-/+ buffers/cache: 306 695
Swap: 2015 0 2015
磁盘使用情况
----------------------------------
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/VolGroup00-LogVol00
18G 12G 5.0G 71% /
/dev/sda1 99M 26M 68M 28% /boot
tmpfs 502M 0 502M 0% /dev/shm
网络连接情况
----------------------------------
ESTABLISHED 2
网络监听情况
----------------------------------
0.0.0.0:8803 26946/python
0.0.0.0:3306 26926/mysqld
0.0.0.0:8812 26777/python2.7
0.0.0.0:621 2559/rpc.statd
0.0.0.0:111 2518/portmap
0.0.0.0:80 2958/nginx
0.0.0.0:22 2862/sshd
127.0.0.1:631 2876/cupsd
0.0.0.0:8888 26782/python
127.0.0.1:25 2917/sendmail
0.0.0.0:7003 26781/python
0.0.0.0:443 2958/nginx
:::22 2862/sshd
内存占用Top 10
----------------------------------
RSS %MEM %CPU VSZ COMMAND
32788 3.1 0.0 38932 iscsiuio
25540 2.4 0.0 211040 python run.py
24984 2.4 0.0 190036 /usr/libexec/mysqld --basedir=/usr --datadir=/var/lib/mysql --user=mysql --pid-file=/var/run/mysqld/mysqld.pid --skip-external-locking --socket=/var/lib/mysql/mysql.sock
17164 1.6 0.0 258384 /usr/bin/python -tt /usr/sbin/yum-updatesd
13308 1.2 0.0 179208 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:wsgiapp -b 0.0.0.0:7003 -k gevent
13116 1.2 0.0 180808 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
13112 1.2 0.0 180820 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
13104 1.2 0.0 180800 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
13104 1.2 0.0 180792 /root/.monitor/bin/python /root/.monitor/bin/gunicorn mainweb:app -b 0.0.0.0:8888 -w 4 -k gevent
CPU占用Top 10
----------------------------------
RSS %MEM %CPU VSZ COMMAND
924 0.0 0.0 12752 /sbin/udevd -d
908 0.0 0.0 21660 xinetd -stayalive -pidfile /var/run/xinetd.pid
868 0.0 0.0 63524 ps -eo rss,pmem,pcpu,vsize,args
860 0.0 0.0 12340 hald-addon-keyboard: listening on /dev/input/event0
856 0.0 0.0 12340 hald-addon-acpi: listening on acpid socket /var/run/acpid.socket
800 0.0 0.0 10176 rpc.statd
792 0.0 0.0 57252 rpc.idmapd
792 0.0 0.0 10448 /usr/sbin/hcid
最近1小时网络流量统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM IFACE rxpck/s txpck/s rxbyt/s txbyt/s rxcmp/s txcmp/s rxmcst/s
12:40:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:40:01 PM eth0 1.22 0.25 81.84 12.91 0.00 0.00 0.00
12:40:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:50:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12:50:01 PM eth0 1.57 0.24 103.54 12.40 0.00 0.00 0.00
12:50:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:00:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:00:01 PM eth0 1.81 0.25 117.32 12.91 0.00 0.00 0.00
01:00:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:10:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:10:01 PM eth0 1.81 0.25 117.91 13.03 0.00 0.00 0.00
01:10:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:20:01 PM lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
01:20:01 PM eth0 1.87 0.71 157.33 88.51 0.00 0.00 0.00
01:20:01 PM sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
Average: lo 0.00 0.00 0.00 0.00 0.00 0.00 0.00
Average: eth0 1.65 0.34 115.58 27.95 0.00 0.00 0.00
Average: sit0 0.00 0.00 0.00 0.00 0.00 0.00 0.00
最近1小时cpu使用统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM CPU %user %nice %system %iowait %steal %idle
12:40:01 PM all 0.05 0.00 0.19 0.02 0.00 99.74
12:50:01 PM all 0.05 0.00 0.19 0.04 0.00 99.72
01:00:01 PM all 0.05 0.00 0.18 0.02 0.00 99.75
01:10:01 PM all 0.04 0.00 0.18 0.06 0.00 99.72
01:20:01 PM all 0.35 0.00 0.25 0.04 0.00 99.36
Average: all 0.11 0.00 0.20 0.04 0.00 99.66
最近1小时磁盘IO统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM tps rtps wtps bread/s bwrtn/s
12:40:01 PM 4.94 0.00 4.94 0.00 65.77
12:50:01 PM 4.94 0.00 4.94 0.00 65.04
01:00:01 PM 5.01 0.00 5.01 0.00 66.63
01:10:01 PM 5.02 0.00 5.02 0.00 66.41
01:20:01 PM 7.61 0.00 7.61 0.00 104.93
Average: 5.50 0.00 5.50 0.00 73.75
最近1小时进程队列和平均负载统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM runq-sz plist-sz ldavg-1 ldavg-5 ldavg-15
12:40:01 PM 4 130 0.00 0.00 0.00
12:50:01 PM 2 130 0.00 0.00 0.00
01:00:01 PM 2 131 0.00 0.00 0.00
01:10:01 PM 6 132 0.00 0.00 0.00
01:20:01 PM 4 133 0.00 0.00 0.00
Average: 4 131 0.00 0.00 0.00
最近1小时内存和交换空间的统计统计
----------------------------------
Linux 2.6.18-308.13.1.el5 (monitor-testing) 10/30/2013
12:30:01 PM kbmemfree kbmemused %memused kbbuffers kbcached kbswpfree kbswpused %swpused kbswpcad
12:40:01 PM 88264 937816 91.40 247568 381400 2064376 0 0.00 0
12:50:01 PM 88016 938064 91.42 247632 381420 2064376 0 0.00 0
01:00:01 PM 87760 938320 91.45 247688 381444 2064376 0 0.00 0
01:10:01 PM 87380 938700 91.48 247772 381468 2064376 0 0.00 0
01:20:01 PM 87620 938460 91.46 247812 381488 2064376 0 0.00 0
Average: 87808 938272 91.44 247694 381444 2064376 0 0.00 0
假设我们要把该脚本下载到/opt/scripts目录下,执行如下命令
mkdir -p /opt/scripts
cd /opt/scripts
wget https://raw.github.com/onlytiancai/codesnip/master/shell/sysinfo.sh
mkdir -p /var/www/sysinfo/
cd /opt/scripts
wget https://raw.github.com/onlytiancai/codesnip/master/shell/sysinfo.sh
mkdir -p /var/www/sysinfo/
运行crontab -e,增加如下定时任务,每5分钟收集系统信息并保存在/var/www/sysinfo目录下
*/5 * * * * /bin/sh /opt/scripts/sysinfo.sh >/var/www/sysinfo/sysinfo.txt 2>&1
修改nginx配置,把/var/www/sysinfo目录开放成可以通过web访问。
location /sysinfo {
alias /var/www/sysinfo/;
}
alias /var/www/sysinfo/;
}
这样,你就可以在你的手机上输入 http://yourdomain.com/sysinfo/sysinfo.txt 来随时查看你的服务器状态了。
提示:
1:如果你觉得系统信息里有敏感信息,比如ps命令的输出,可以修改脚本把敏感信息去掉。
2:为了防止别人知道你的系统信息url,你可以在nginx配置的location节设置一个比较长的随机的地址,然后在手机浏览器的收藏夹里把该网址收藏一下。
服务器多了,有一个烦恼就是如何批量快速操作一堆服务器。这里我推荐一下经常使用利器pssh。这个工具给我的工作带来了莫大的帮助。
简介
pssh是一款开源的软件,使用python实现。用于批量ssh操作大批量机器。
pssh的项目地址 https://code.google.com/p/parallel-ssh/
安装
在pssh的项目主页找到相应的版本,下载到我们的服务器上,解压后执行python setup.py安装。下面以pssh-2.3的安装为例
常用的方法
pssh使用帮助
pssh查看所有服务器的uptime;-h list 指定了执行命令的机器列表;-A表示提示输入密码(如果机器都是ssh key打通的则无需加-A)
使用pscp向一堆机器分发文件
从一堆机器中拷贝文件到中心机器
批量上传
批量上传本地文件 linux-3.14.3.tar.xz 到服务器上的 /tmp 目录:
批量下载
批量下载服务器上的某文件到本地,不用担心重名问题,因为 pssh 已经建立了 grid01, grid02, …, grid05 目录来存放下载的文件:
批量同步
有时候我们需要保持开发机上(某目录里)的数据和服务器上的数据一致:
常见问题
如果你遇到这样的错误:
IOError: [Errno 4] Interrupted system call
建议升级python版本到python2.7
简介
pssh是一款开源的软件,使用python实现。用于批量ssh操作大批量机器。
pssh的项目地址 https://code.google.com/p/parallel-ssh/
安装
在pssh的项目主页找到相应的版本,下载到我们的服务器上,解压后执行python setup.py安装。下面以pssh-2.3的安装为例
wget 'https://parallel-ssh.googlecode.com/files/pssh-2.3.tar.gz'
tar -xzvf pssh-2.3.tar.gz
cd pssh-2.3
python setup.py install
tar -xzvf pssh-2.3.tar.gz
cd pssh-2.3
python setup.py install
常用的方法
pssh使用帮助
pssh --help
pssh查看所有服务器的uptime;-h list 指定了执行命令的机器列表;-A表示提示输入密码(如果机器都是ssh key打通的则无需加-A)
pssh -i -A -h list 'uptime'
使用pscp向一堆机器分发文件
pscp -h list localfile remote_dir
从一堆机器中拷贝文件到中心机器
pslurp -h list /etc/hosts local_dir
批量上传
批量上传本地文件 linux-3.14.3.tar.xz 到服务器上的 /tmp 目录:
$ pscp -l root -A -h grids linux-3.14.3.tar.xz /tmp/
Warning: do not enter your password if anyone else has superuser
privileges or access to your account.
Password:
[1] 05:56:16 [SUCCESS] grid01
[2] 05:56:16 [SUCCESS] grid03
[3] 05:57:04 [SUCCESS] grid05
[4] 05:57:04 [SUCCESS] grid04
[5] 05:57:05 [SUCCESS] grid02
Warning: do not enter your password if anyone else has superuser
privileges or access to your account.
Password:
[1] 05:56:16 [SUCCESS] grid01
[2] 05:56:16 [SUCCESS] grid03
[3] 05:57:04 [SUCCESS] grid05
[4] 05:57:04 [SUCCESS] grid04
[5] 05:57:05 [SUCCESS] grid02
批量下载
批量下载服务器上的某文件到本地,不用担心重名问题,因为 pssh 已经建立了 grid01, grid02, …, grid05 目录来存放下载的文件:
$ pslurp -l root -h grids -A /tmp/linux-3.14.3.tar.xz .
Warning: do not enter your password if anyone else has superuser
privileges or access to your account.
Password:
[1] 06:06:01 [SUCCESS] grid01
[2] 06:06:01 [SUCCESS] grid03
[3] 06:06:06 [SUCCESS] grid04
[4] 06:06:06 [SUCCESS] grid02
[5] 06:06:06 [SUCCESS] grid05
$ ls
grid01 grid02 grid03 grid04 grid05 grids linux-3.14.3.tar.xz parallel-ssh
Warning: do not enter your password if anyone else has superuser
privileges or access to your account.
Password:
[1] 06:06:01 [SUCCESS] grid01
[2] 06:06:01 [SUCCESS] grid03
[3] 06:06:06 [SUCCESS] grid04
[4] 06:06:06 [SUCCESS] grid02
[5] 06:06:06 [SUCCESS] grid05
$ ls
grid01 grid02 grid03 grid04 grid05 grids linux-3.14.3.tar.xz parallel-ssh
批量同步
有时候我们需要保持开发机上(某目录里)的数据和服务器上的数据一致:
$ prsync -l root -h grids -A -r develop/ /tmp/production/
Warning: do not enter your password if anyone else has superuser
privileges or access to your account.
Password:
[1] 06:12:52 [SUCCESS] grid05
[2] 06:12:52 [SUCCESS] grid01
[3] 06:12:52 [SUCCESS] grid04
[4] 06:12:52 [SUCCESS] grid02
[5] 06:12:52 [SUCCESS] grid03
Warning: do not enter your password if anyone else has superuser
privileges or access to your account.
Password:
[1] 06:12:52 [SUCCESS] grid05
[2] 06:12:52 [SUCCESS] grid01
[3] 06:12:52 [SUCCESS] grid04
[4] 06:12:52 [SUCCESS] grid02
[5] 06:12:52 [SUCCESS] grid03
常见问题
如果你遇到这样的错误:
IOError: [Errno 4] Interrupted system call
建议升级python版本到python2.7






