# :-: 服务器健康检查
| 适用范围 | 适用版本 | 人员 | 发布时间 | 文档版本 |备注 |
| --- | --- | --- | --- | --- |--- |
| 服务器操作系统 | V10-SP1-0711-X86 | 钱海龙 |2022.3.1| V1.0|发布|
| 服务器操作系统 | V10-SP1-0711-X86 | 张志勇 |2022.3.1| V1.1|模板调整|
### 经测试,该方案有效可行
*****
脚本内容:
```
#!/bin/bash
##---------- Kylin OS 健康检查脚本 ----------------------------------------------------##
#------variables used------#
S="************************************"
D="-------------------------------------"
COLOR="y"
MOUNT=$(mount|egrep -iw "ext4|ext3|xfs|gfs|gfs2|btrfs"|grep -v "loop"|sort -u -t' ' -k1,2)
FS_USAGE=$(df -PThl -x tmpfs -x iso9660 -x devtmpfs -x squashfs|awk '!seen[$1]++'|sort -k6n|tail -n +2)
IUSAGE=$(df -iPThl -x tmpfs -x iso9660 -x devtmpfs -x squashfs|awk '!seen[$1]++'|sort -k6n|tail -n +2)
if [ $COLOR == y ]; then
{
GCOLOR="\e[47;32m ------ OK/HEALTHY \e[0m"
WCOLOR="\e[43;31m ------ WARNING \e[0m"
CCOLOR="\e[47;31m ------ CRITICAL \e[0m"
}
else
{
GCOLOR=" ------ OK/HEALTHY "
WCOLOR=" ------ WARNING "
CCOLOR=" ------ CRITICAL "
}
fi
echo -e "$S"
echo -e "\tSystem Health Status"
echo -e "$S"
#--------Print Operating System Details--------#
hostname -f &> /dev/null && printf "Hostname : $(hostname -f)" || printf "Hostname : $(hostname -s)"
echo -en "\nOperating System : "
[ -f /etc/os-release ] && echo $(egrep -w "NAME|VERSION" /etc/os-release|awk -F= '{ print $2 }'|sed 's/"//g') || cat /etc/system-release
echo -e "Kernel Version :" $(uname -r)
printf "OS Architecture :"$(arch | grep x86_64 &> /dev/null) && printf " 64 Bit OS\n" || printf " 32 Bit OS\n"
#--------Print system uptime-------#
UPTIME=$(uptime)
echo -en "System Uptime : "
echo $UPTIME|grep day &> /dev/null
if [ $? != 0 ]; then
echo $UPTIME|grep -w min &> /dev/null && echo -en "$(echo $UPTIME|awk '{print $2" by "$3}'|sed -e 's/,.*//g') minutes" \
|| echo -en "$(echo $UPTIME|awk '{print $2" by "$3" "$4}'|sed -e 's/,.*//g') hours"
else
echo -en $(echo $UPTIME|awk '{print $2" by "$3" "$4" "$5" hours"}'|sed -e 's/,//g')
fi
echo -e "\nCurrent System Date & Time : "$(date +%c)
#--------Check for any read-only file systems--------#
echo -e "\nChecking For Read-only File System[s]"
echo -e "$D"
echo "$MOUNT"|grep -w \(ro\) && echo -e "\n.....Read Only file system[s] found"|| echo -e ".....No read-only file system[s] found. "
#--------Check for currently mounted file systems--------#
echo -e "\n\nChecking For Currently Mounted File System[s]"
echo -e "$D$D"
echo "$MOUNT"|column -t
#--------Check disk usage on all mounted file systems--------#
echo -e "\n\nChecking For Disk Usage On Mounted File System[s]"
echo -e "$D$D"
echo -e "( 0-85% = OK/HEALTHY, 85-95% = WARNING, 95-100% = CRITICAL )"
echo -e "$D$D"
echo -e "Mounted File System[s] Utilization (Percentage Used):\n"
COL1=$(echo "$FS_USAGE"|awk '{print $1 " "$7}')
COL2=$(echo "$FS_USAGE"|awk '{print $6}'|sed -e 's/%//g')
for i in $(echo "$COL2"); do
{
if [ $i -ge 95 ]; then
COL3="$(echo -e $i"% $CCOLOR\n$COL3")"
elif [[ $i -ge 85 && $i -lt 95 ]]; then
COL3="$(echo -e $i"% $WCOLOR\n$COL3")"
else
COL3="$(echo -e $i"% $GCOLOR\n$COL3")"
fi
}
done
COL3=$(echo "$COL3"|sort -k1n)
echo "$COL1" |column -t
echo "$COL3" |column -t
#--------Check for any zombie processes--------#
echo -e "\n\nChecking For Zombie Processes"
echo -e "$D"
ps -eo stat|grep -w Z 1>&2 > /dev/null
if [ $? == 0 ]; then
echo -e "Number of zombie process on the system are :" $(ps -eo stat|grep -w Z|wc -l)
echo -e "\n Details of each zombie processes found "
echo -e " $D"
ZPROC=$(ps -eo stat,pid|grep -w Z|awk '{print $2}')
for i in $(echo "$ZPROC"); do
ps -o pid,ppid,user,stat,args -p $i
done
else
echo -e "No zombie processes found on the system."
fi
#--------Check Inode usage--------#
echo -e "\n\nChecking For INode Usage"
echo -e "$D$D"
echo -e "( 0-85% = OK/HEALTHY, 85-95% = WARNING, 95-100% = CRITICAL )"
echo -e "$D$D"
echo -e "INode Utilization (Percentage Used):\n"
COL11=$(echo "$IUSAGE"|awk '{print $1" "$7}')
COL22=$(echo "$IUSAGE"|awk '{print $6}'|sed -e 's/%//g')
for i in $(echo "$COL22"); do
{
if [[ $i = *[[:digit:]]* ]]; then
{
if [ $i -ge 95 ]; then
COL33="$(echo -e $i"% $CCOLOR\n$COL33")"
elif [[ $i -ge 85 && $i -lt 95 ]]; then
COL33="$(echo -e $i"% $WCOLOR\n$COL33")"
else
COL33="$(echo -e $i"% $GCOLOR\n$COL33")"
fi
}
else
COL33="$(echo -e $i"% (Inode Percentage details not available)\n$COL33")"
fi
}
done
COL33=$(echo "$COL33"|sort -k1n)
echo "$COL11" |column -t
echo "$COL33" |column -t
#--------Check for SWAP Utilization--------#
echo -e "\n\nChecking SWAP Details"
echo -e "$D"
echo -e "Total Swap Memory in MiB : "$(grep -w SwapTotal /proc/meminfo|awk '{print $2/1024}')", in GiB : "\
$(grep -w SwapTotal /proc/meminfo|awk '{print $2/1024/1024}')
echo -e "Swap Free Memory in MiB : "$(grep -w SwapFree /proc/meminfo|awk '{print $2/1024}')", in GiB : "\
$(grep -w SwapFree /proc/meminfo|awk '{print $2/1024/1024}')
#--------Check for Processor Utilization (current data)--------#
echo -e "\n\nChecking For Processor Utilization"
echo -e "$D"
echo -e "\nCurrent Processor Utilization Summary :\n"
mpstat|tail -2
#--------Check for load average (current data)--------#
echo -e "\n\nChecking For Load Average"
echo -e "$D"
echo -e "Current Load Average : $(uptime|grep -o "load average.*"|awk '{print $3" " $4" " $5}')"
#------Print most recent 3 reboot events if available----#
echo -e "\n\nMost Recent 3 Reboot Events"
echo -e "$D$D"
last -x 2> /dev/null|grep reboot 1> /dev/null && /usr/bin/last -x 2> /dev/null|grep reboot|head -3 || \
echo -e "No reboot events are recorded."
#------Print most recent 3 shutdown events if available-----#
echo -e "\n\nMost Recent 3 Shutdown Events"
echo -e "$D$D"
last -x 2> /dev/null|grep shutdown 1> /dev/null && /usr/bin/last -x 2> /dev/null|grep shutdown|head -3 || \
echo -e "No shutdown events are recorded."
#--------Print top 5 Memory & CPU consumed process threads---------#
#--------excludes current running program which is hwlist----------#
echo -e "\n\nTop 5 Memory Resource Hog Processes"
echo -e "$D$D"
ps -eo pmem,pid,ppid,user,stat,args --sort=-pmem|grep -v $$|head -6|sed 's/$/\n/'
echo -e "\nTop 5 CPU Resource Hog Processes"
echo -e "$D$D"
ps -eo pcpu,pid,ppid,user,stat,args --sort=-pcpu|grep -v $$|head -6|sed 's/$/\n/'
echo -e "NOTE:- If any of the above fields are marked as \"blank\" or \"NONE\" or \"UNKNOWN\" or \"Not Available\" or \"Not Specified\"
that means either there is no value present in the system for these fields, otherwise that value may not be available,
or suppressed since there was an error in fetching details."
echo -e "\n\t\t %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
echo -e "\t\t <>--------<> Health Check Complete <>--------<>"
echo -e "\t\t %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
```
- 常见问题
- 进入救援模式
- 进入单用户模式
- grub引导修复
- V10SP1-biosdevname解析
- 外设挂载和卸载
- audit关闭解决方案
- 终端无法切换
- 救援模式取数据
- 网卡更名操作
- 问题复现解决类
- xgboost复现解决步骤
- 激活类
- 麒麟服务器激活常用命令
- 服务器故障激活问题--须知
- 激活权限获得方式
- V10-SP系列服务器激活
- 银河麒麟高级服务器操作系统V7&V10激活手册
- shell脚本类
- 多网卡队列绑定CPU脚本
- 服务器健康检查脚本
- 服务安装配置类
- 银河麒麟高级服务器操作系统V10(sp1)安装手册
- LVM创建
- Chrony时间同步配置
- 搭建网络yum源
- PXE部署实施
- Man手册安装
- 部署VNC服务
- Kylin-Server-V10-SP1-0711-DNS服务单机部署文档
- 麒麟ks文件定制-封装iso文件
- iptables端口配置
- V10-SP1-aarch64安装jdk1.7
- 工具使用类
- cyclictest测试工具
- e2fsprogs工具介绍
- Logrotate工具说明
- nmon工具安装与使用
- 升、降级类
- rsyslog升级报告
- 性能优化类
- 日志轮转
- 大页内存与透明大页详解
- 优化磁盘IO调度方式
- core文件设置
- 分析报告类
- bond模式4协商不通排查
- audit内存泄露问题分析报告
- mate-indicators内核占用过高问题分析报告
- ansible问题-hostname以及lvol报错
- 关于Linux内存计算的说明
- 磁盘IO调度算法
- 硬件相关类
- 串口
- 网络相关类
- bond创建
- 在已配置好网络情况下添加路由
- 网卡配合网桥实现内网互通KVM虚拟机
- Bond模式4协商不通排查过程
- HA高可用
- kylin HA shell实践
- kylin HA概念性及shell使用
- 虚拟化
- KVM创建虚拟机(图形化操作)
- KVM创建虚拟机(命令操作)
- 容器类
- docker-runc升级
- docker基础镜像制作-服务器版
- 麒麟云平台
- USB3.0设备穿透方法(针对win10云主机穿透)
- 麒麟云平台开关机操作说明
- 技术演练
