#!/bin/bash # (C) ken chase, alliance technologies 2005 # v0.02 2005sep27 - basic inception, gives status from /proc/mdstat # v0.03 2005sep29 - check for component devices, if none, exit # v0.04 2005sep30 - adapted for 2.6 kernels, which dont give err exit codes # for mdadm - now bases action on text output # v0.05 2005oct04 - updated so works with 2.4 (err exit code) or 2.6 (text) mdadm=/root/mdadm-1.12.0/mdadm if [ -z "$1" ]; then cat <<"EOF" Usage: $0 [md device] Will thrash /dev/[device] (an md device addressable with $mdadm) by removing each of component drives/partitions in turn from the array, and readding them, letting the array rebuild. EOF exit fi drives=`$mdadm --detail /dev/$1 | grep '^[ ]*[0-9]' | rev | cut -d " " -f1 | rev | grep /dev/` if [ -z "$drives" ]; then echo "Device /dev/$1 has no devices... EXITING..." exit fi while :; do it=$[ $it+1 ] echo "*** ITERATION $it `date` ***" echo echo "errorcode 1 = rebuilding. 2+ = more than 1 device failed. 3+ = failure" echo echo "Component devices:" echo " "$drives echo for i in $drives; do # while ! $mdadm --detail /dev/$1 --test >& /dev/null; do echo state is `$mdadm --detail /dev/$1 | grep "State :"` echo while $mdadm --detail /dev/$1 | grep -q "State.*degraded" || ! $mdadm --detail /dev/$1 --test >& /dev/null; do state=`$mdadm --detail /dev/$1 | grep "State :" | cut -d : -f2-` echo -ne "\r`date +%H:%M:%S` /dev/$1: $state \ `grep -A2 $1 /proc/mdstat | tail -1 | tr -s " \t" " " | sed 's/\[.*\]//' | sed 's/[a-z]*[ ]*=//g' | sed 's/clean //' | sed 's/(.*)//' | tr -s ' '`" # if [ $err -ge 2 ]; then # echo "Error code $err!!!  HALTING!!!" # exit # fi sleep 2 done echo echo "====================================================================" echo -n "Failing $i...." $mdadm --manage /dev/$1 --fail $i sleep 1 echo -n "Removing $i..." $mdadm --manage /dev/$1 --remove $i sleep 1 echo -n "Re-adding $i.." $mdadm --manage /dev/$1 --add $i sleep 1 echo echo "Waiting for rebuild of $1..." echo sleep 5 done done # # #root@nl6:/var/log>cat /proc/mdstat #Personalities : [raid5] #read_ahead 1024 sectors #Event: 22 #md0 : active raid5 sdb1[11] sdj1[8] sdl1[10] sdk1[9] sdi1[7] sdh1[6] sdg1[5] sdf1[4] sde1[3] sdd1[2] sdc1[1] # 48916480 blocks level 5, 64k chunk, algorithm 2 [11/10] [_UUUUUUUUUU] # [=========>...........] recovery = 45.2% (2213648/4891648) finish=7.9min speed=5591K/sec #md1 : active raid5 sdl2[10](F) sdk2[9] sdj2[8] sdi2[7] sdh2[6] sdg2[5] sdf2[4] sde2[3] sdd2[2] sdc2[1] sdb2[0] # 9879040 blocks level 5, 64k chunk, algorithm 2 [11/10] [UUUUUUUUUU_] # #unused devices: # #/dev/md1: # Version : 00.90.00 # Creation Time : Fri Sep 23 12:16:00 2005 # Raid Level : raid5 # Array Size : 9879040 (9.42 GiB 10.12 GB) # Device Size : 987904 (964.75 MiB 1011.61 MB) # Raid Devices : 11 # Total Devices : 12 #Preferred Minor : 1 # Persistence : Superblock is persistent # # Update Time : Mon Sep 26 17:16:11 2005 # State : dirty, no-errors # Active Devices : 11 #Working Devices : 11 # Failed Devices : 1 # Spare Devices : 0 # # Layout : left-symmetric # Chunk Size : 64K # # Number Major Minor RaidDevice State # 0 8 18 0 active sync /dev/sdb2 # 1 8 34 1 active sync /dev/sdc2 # 2 8 50 2 active sync /dev/sdd2 # 3 8 66 3 active sync /dev/sde2 # 4 8 82 4 active sync /dev/sdf2 # 5 8 98 5 active sync /dev/sdg2 # 6 8 114 6 active sync /dev/sdh2 # 7 8 130 7 active sync /dev/sdi2 # 8 8 146 8 active sync /dev/sdj2 # 9 8 162 9 active sync /dev/sdk2 # 10 8 178 10 active sync /dev/sdl2 # UUID : a2e82a7f:c0920995:d9c94456:20737321 # Events : 0.23 #::0:: #