ソフトウェアRAID故障時の対応

準備

あらかじめ下記のコマンドの結果を残しておく

yum install -y hdparm
cat /proc/mdstat
hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
mdadm -E /dev/sd*|egrep "(^/dev|Role)"

また、/etc/mdadm.conf のMAILADDRの項目を編集して、ステータスに変化があったらメールを飛ばすようにする。(ただし、メールサーバによっては拒否されることもあるので注意。2017年4月の時点ではgmailは大丈夫そうだけど、hotmailは無理そう。)

例えば

echo MAILADDR akyoshita@g.ecc.u-tokyo.ac.jp >> /etc/mdadm.conf
sudo systemctl restart mdmonitor

壊れたら・・・

mdadm --fail /dev/md127 /dev/sdi1
mdadm --remove /dev/md127 /dev/sdi1

とやってRAIDからディスクを削除。

そして、壊れてしまったら新しく付け替えたディスクのデバイス名(例えば/dev/sdiとする)を指定してパーティションを作成

i=sdi; parted --script /dev/$i 'mklabel gpt'; parted --script /dev/$i 'mkpart primary 2048s -1'; parted --script /dev/$i 'set 1 raid on'
#パーティションを作るけど、結局下記の流れでは消してしまうので、パーティションの作成が必要なのかは不明。

そして、RAIDに組み込むため、下記のコマンドを入力してRAIDに追加 (RAIDのデバイスをmd0, 新しいディスクのシリアル番号をZ1F2C7N9とすると)

mdadm /dev/md127 --add /dev/sdi
# mdadm /dev/md0 --add /dev/disk/by-id/ata-ST3000DM001-1CH166_Z1F2C7N9-part1 #OS起動時ではなく、自前で組んだRAIDであれば、こっちをやって/etc/mdadm.confを編集

m16

[root@m16 yoshitake.kazutoshi]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md127 : active raid5 sdd1[4] sde2[6] sdc1[2] sda1[0] sdb1[1] sdf1[3]
      9766912000 blocks super 1.2 level 5, 512k chunk, algorithm 2 [6/5] [UUUUU_]
      [===>.................]  recovery = 16.8% (329777412/1953382400) finish=263.5min speed=102670K/sec
      bitmap: 0/15 pages [0KB], 65536KB chunk

unused devices: <none>
[root@m16 yoshitake.kazutoshi]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       Hitachi HDS722020ALA330
        Serial Number:      JK1131YAHN7XKV
/dev/sdb:
        Model Number:       Hitachi HDS5C3020ALA632
        Serial Number:      ML0220F31GWKRN
/dev/sdc:
        Model Number:       WDC WD20EARX-00PASB0
        Serial Number:      WD-WCAZAC755908
/dev/sdd:
        Model Number:       ST2000DM001-1CH164
        Serial Number:      Z1E6F6FB
/dev/sde:
        Model Number:       TOSHIBA MD04ACA400
        Serial Number:      66NAKLELFSAA
/dev/sdf:
        Model Number:       TOSHIBA DT01ACA200
        Serial Number:      84C60UZGS
[root@m16 yoshitake.kazutoshi]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
/dev/sda:
/dev/sda1:
   Device Role : Active device 0
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdc:
/dev/sdc1:
   Device Role : Active device 2
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 4
mdadm: No md superblock detected on /dev/sde1.
mdadm: No md superblock detected on /dev/sde3.
mdadm: No md superblock detected on /dev/sde4.
/dev/sde:
/dev/sde2:
   Device Role : Active device 5
/dev/sdf:
/dev/sdf1:
   Device Role : Active device 3

m24

[root@m24 yoshitake]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md127 : active raid5 sda2[0] sdb1[1] sdc1[3]
      966146048 blocks super 1.2 level 5, 512k chunk, algorithm 2 [3/3] [UUU]
      bitmap: 4/4 pages [16KB], 65536KB chunk

unused devices: <none>
[root@m24 yoshitake]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       WDC WD5000AAKS-00A7B2
        Serial Number:      WD-WMASY7805526
/dev/sdb:
        Model Number:       ST3500418AS
        Serial Number:      5VM8934X
/dev/sdc:
        Model Number:       ST3500418AS
        Serial Number:      5VM80ZL5
/dev/sdd:
        Model Number:       SSI H/W RAID5
        Serial Number:      6ZYZHUH5NZX6CFIH5LST
[root@m24 yoshitake]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda1.
mdadm: No md superblock detected on /dev/sda3.
mdadm: No md superblock detected on /dev/sdb2.
mdadm: No md superblock detected on /dev/sdc2.
mdadm: No md superblock detected on /dev/sdd1.
/dev/sda:
/dev/sda2:
   Device Role : Active device 0
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdc:
/dev/sdc1:
   Device Role : Active device 2
/dev/sdd:

m48

[root@m48 ~]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md0 : active raid5 sdb1[0] sdd1[5] sdf1[1] sde1[2] sdc1[3]
      39065214976 blocks super 1.2 level 5, 512k chunk, algorithm 2 [5/5] [UUUUU]
      bitmap: 2/73 pages [8KB], 65536KB chunk

unused devices: <none>
[root@m48 ~]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       Hitachi HDS721050DLE630
        Serial Number:      MSKE215H0L663G
/dev/sdb:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA212CKK
/dev/sdc:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA214VVE
/dev/sdd:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA216SNT
/dev/sde:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA214MC5
/dev/sdf:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA214AAQ
[root@m48 ~]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda1.
mdadm: No md superblock detected on /dev/sda2.
/dev/sda:
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 0
/dev/sdc:
/dev/sdc1:
   Device Role : Active device 3
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 4
/dev/sde:
/dev/sde1:
   Device Role : Active device 2
/dev/sdf:
/dev/sdf1:
   Device Role : Active device 1

m64

[root@m64 yoshitake.kazutoshi]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md127 : active raid5 sdc1[1] sdd1[2] sdb1[0] sde1[4]
      8790402048 blocks super 1.2 level 5, 512k chunk, algorithm 2 [4/4] [UUUU]
      bitmap: 0/22 pages [0KB], 65536KB chunk

unused devices: <none>
[root@m64 yoshitake.kazutoshi]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       INTEL SSDSC2CW480A3
        Serial Number:      CVCV233000KU480DGN
/dev/sdb:
        Model Number:       Hitachi HDS5C3030ALA630
        Serial Number:      MJ1313YNG4S9YC
/dev/sdc:
        Model Number:       Hitachi HDS5C3030ALA630
        Serial Number:      MJ1313YNG4TK0C
/dev/sdd:
        Model Number:       Hitachi HDS5C3030ALA630
        Serial Number:      MJ1313YNG4UTNC
/dev/sde:
        Model Number:       TOSHIBA DT01ACA300
        Serial Number:      Y3CB5NHGS
/dev/sdf:
        Model Number:       ST2000DM001-1ER164
        Serial Number:      Z4Z55J9L
/dev/sdg:
        Model Number:       ST2000DM001-1ER164
        Serial Number:      W4Z3H4LQ
[root@m64 yoshitake.kazutoshi]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda1.
mdadm: No md superblock detected on /dev/sda2.
mdadm: No md superblock detected on /dev/sda3.
/dev/sda:
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 0
/dev/sdc:
/dev/sdc1:
   Device Role : Active device 1
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 2
/dev/sde:
/dev/sde1:
mdadm: No md superblock detected on /dev/sdf1.
mdadm: No md superblock detected on /dev/sdg1.
   Device Role : Active device 3
/dev/sdf:
/dev/sdg:

m96

[root@m96 ~]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md0 : active raid5 sdb1[1] sda1[5] sde1[2] sdf1[3] sdd1[0]
      39065214976 blocks super 1.2 level 5, 512k chunk, algorithm 2 [5/5] [UUUUU]
      bitmap: 2/73 pages [8KB], 65536KB chunk

unused devices: <none>
[root@m96 ~]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
SG_IO: bad/missing sense data, sb[]:    Model Number:       ST10000VN0004-1ZD101
 70 32 05 c0 01 00 00 08 22 22 22       Serial Number:      ZA2175GK
 22 20 00/dev/sdb:
        Model Number:       ST10000VN0004-1ZD101
 00     Serial Number:      ZA214ZT7
 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
/dev/sdc:
/dev/sdd:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA214XYP
/dev/sde:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA215RZT
/dev/sdf:
        Model Number:       ST10000VN0004-1ZD101
        Serial Number:      ZA215S0C
[root@m96 ~]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sdc1.
mdadm: No md superblock detected on /dev/sdc2.
/dev/sda:
/dev/sda1:
   Device Role : Active device 4
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdc:
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 0
/dev/sde:
/dev/sde1:
   Device Role : Active device 2
/dev/sdf:
/dev/sdf1:
   Device Role : Active device 3

m128


[root@m128 ~]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md127 : active raid6 sdc4[2] sde1[4] sdg1[6] sdf1[5] sda2[0] sdd1[3] sdb1[1] sdh1[7]
      23441307648 blocks super 1.2 level 6, 512k chunk, algorithm 2 [8/8] [UUUUUUUU]
      [========>............]  resync = 42.4% (1658926592/3906884608) finish=726.7min speed=51551K/sec
      bitmap: 19/30 pages [76KB], 65536KB chunk

unused devices: <none>
[root@m128 ~]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       Hitachi HDS724040ALE640
        Serial Number:      PK1361PAGD86RV
/dev/sdb:
        Model Number:       TOSHIBA MD04ACA400
        Serial Number:      64A2K44PFSAA
/dev/sdc:
        Model Number:       WDC WD60EFRX-68MYMN1
        Serial Number:      WD-WXC1H3438826
/dev/sdd:
        Model Number:       TOSHIBA MD04ACA400
        Serial Number:      54UHK2IPFSAA
/dev/sde:
        Model Number:       ST4000DM000-1F2168
        Serial Number:      Z300VV12
/dev/sdf:
        Model Number:       ST4000DM000-1F2168
        Serial Number:      Z300W1FN
/dev/sdg:
        Model Number:       WDC WD40EFRX-68WT0N0
        Serial Number:      WD-WCC4E5KC8S4J
/dev/sdh:
        Model Number:       TOSHIBA MD04ACA400
        Serial Number:      54U1K1R2FSAA
[root@m128 ~]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda1.
mdadm: No md superblock detected on /dev/sdc1.
mdadm: No md superblock detected on /dev/sdc2.
mdadm: No md superblock detected on /dev/sdc3.
mdadm: No md superblock detected on /dev/sdc5.
/dev/sda:
/dev/sda2:
   Device Role : Active device 0
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdc:
/dev/sdc4:
   Device Role : Active device 2
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 3
/dev/sde:
/dev/sde1:
   Device Role : Active device 4
/dev/sdf:
/dev/sdf1:
   Device Role : Active device 5
/dev/sdg:
/dev/sdg1:
   Device Role : Active device 6
/dev/sdh:
/dev/sdh1:
   Device Role : Active device 7

m256

[root@m256 yoshitake.kazutoshi]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md0 : active raid6 sdc1[1] sdb1[0] sdg1[4] sde1[3] sdd1[2]
      8790400512 blocks super 1.2 level 6, 512k chunk, algorithm 2 [5/5] [UUUUU]
      bitmap: 2/22 pages [8KB], 65536KB chunk

unused devices: <none>
[root@m256 yoshitake.kazutoshi]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       SSI H/W RAID5
        Serial Number:      NK7OCPZRJIQYMW2ZC4CM
/dev/sdb:
        Model Number:       ST3000DM001-9YN166
        Serial Number:      S1F0KGPN
/dev/sdc:
        Model Number:       ST3000DM001-9YN166
        Serial Number:      W1F0QVCA
/dev/sdd:
        Model Number:       ST3000DM001-9YN166
        Serial Number:      S1F0NG91
/dev/sde:
        Model Number:       ST3000DM001-9YN166
        Serial Number:      S1F0MWR0
/dev/sdf:
        Model Number:       INTEL SSDSA2CW600G3
        Serial Number:      CVPR1216047B600FGN
/dev/sdg:
        Model Number:       ST3000DM001-9YN166
        Serial Number:      S1F0PFLT
[root@m256 yoshitake.kazutoshi]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda.
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 0
/dev/sdc:
/dev/sdc1:
   Device Role : Active device 1
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 2
/dev/sde:
/dev/sde1:
mdadm: No md superblock detected on /dev/sdf1.
mdadm: No md superblock detected on /dev/sdf2.
   Device Role : Active device 3
/dev/sdf:
/dev/sdg:
/dev/sdg1:
   Device Role : Active device 4

m256i

[root@m256i yoshitake.kazutoshi]# cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4]
md127 : active raid5 sdi[6] sdh3[4] sda1[0] sdc1[2] sde1[3] sdb1[1]
      14650670080 blocks super 1.2 level 5, 512k chunk, algorithm 2 [6/5] [UUUUU_]
      [>....................]  recovery =  3.7% (110939224/2930134016) finish=257.5min speed=182402K/sec
      bitmap: 1/22 pages [4KB], 65536KB chunk

unused devices: <none>
[root@m256i yoshitake.kazutoshi]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       TOSHIBA DT01ACA300
        Serial Number:      Y324U7VGS
/dev/sdb:
        Model Number:       TOSHIBA DT01ACA300
        Serial Number:      Y323SLEGS
/dev/sdc:
        Model Number:       TOSHIBA DT01ACA300
        Serial Number:      63KZ1VEGS
/dev/sdd:
        Model Number:       WDC WD40EZRX-22SPEB0
        Serial Number:      WD-WCC4E6KL00K4
/dev/sde:
        Model Number:       TOSHIBA DT01ACA300
        Serial Number:      63KZ2XKGS
/dev/sdf:
        Model Number:       ST2000DM001-1ER164
        Serial Number:      Z4Z16GEQ
/dev/sdg:
        Model Number:       ST2000DM001-9YN164
        Serial Number:      Z34021Y4
/dev/sdh:
        Model Number:       TOSHIBA MD04ACA500
        Serial Number:      345AK05MFS9A
/dev/sdi:
        Model Number:       TOSHIBA DT01ACA300
        Serial Number:      Y323SDBGS
[root@m256i yoshitake.kazutoshi]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
/dev/sda:
/dev/sda1:
   Device Role : Active device 0
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdc:
/dev/sdc1:
   Device Role : Active device 2
/dev/sdd:
/dev/sdd2:
/dev/sde:
/dev/sde1:
mdadm: No md superblock detected on /dev/sdh1.
mdadm: No md superblock detected on /dev/sdh2.
mdadm: No md superblock detected on /dev/sdh4.
   Device Role : Active device 3
/dev/sdf:
/dev/sdf1:
/dev/sdg:
/dev/sdg1:
/dev/sdh:
/dev/sdh3:
   Device Role : Active device 4
/dev/sdi:
   Device Role : Active device 5

m384

[root@m384 ~]# cat /proc/mdstat
Personalities : [raid1] [raid6] [raid5] [raid4]
md126 : active raid6 sda2[0] sdc1[2] sdg1[6] sde1[4] sdf1[5] sdh1[7] sdd1[3] sdb1[1]
      17571348480 blocks super 1.2 level 6, 512k chunk, algorithm 2 [8/8] [UUUUUUUU]
      bitmap: 5/22 pages [20KB], 65536KB chunk

md127 : active raid1 sda3[0] sdb2[1]
      1048512 blocks super 1.0 [2/2] [UU]
      bitmap: 0/1 pages [0KB], 65536KB chunk

unused devices: <none>
[root@m384 ~]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       ST3000DM001-1CH166
        Serial Number:      Z1F2C7N9
/dev/sdb:
        Model Number:       ST3000DM001-1ER166
        Serial Number:      Z503RR0Y
/dev/sdc:
        Model Number:       ST3000DM008-2DM166
        Serial Number:      Z504C0KH
/dev/sdd:
        Model Number:       ST3000DM008-2DM166
        Serial Number:      Z504C9WH
/dev/sde:
        Model Number:       ST3000DM008-2DM166
        Serial Number:      Z504ADPW
/dev/sdf:
        Model Number:       ST3000DM008-2DM166
        Serial Number:      Z504GXC5
/dev/sdg:
        Model Number:       ST3000DM008-2DM166
        Serial Number:      Z504GLNB
/dev/sdh:
        Model Number:       ST3000DM008-2DM166
        Serial Number:      Z504GLLE
/dev/sdi:
        Model Number:       TOSHIBA MD04ACA500
        Serial Number:      345AK05MFS9A
[root@m384 ~]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda1.
mdadm: No md superblock detected on /dev/sda4.
mdadm: No md superblock detected on /dev/sdb3.
/dev/sda:
/dev/sda2:
   Device Role : Active device 0
/dev/sda3:
   Device Role : Active device 0
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdb2:
   Device Role : Active device 1
/dev/sdc:
/dev/sdc1:
mdadm: No md superblock detected on /dev/sdc2.
mdadm: No md superblock detected on /dev/sdd2.
mdadm: No md superblock detected on /dev/sde2.
mdadm: No md superblock detected on /dev/sdf2.
   Device Role : Active device 2
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 3
/dev/sde:
/dev/sde1:
   Device Role : Active device 4
/dev/sdf:
/dev/sdf1:
   Device Role : Active device 5
/dev/sdg:
mdadm: No md superblock detected on /dev/sdg2.
mdadm: No md superblock detected on /dev/sdh2.
mdadm: No md superblock detected on /dev/sdi.
/dev/sdg1:
   Device Role : Active device 6
/dev/sdh:
/dev/sdh1:
   Device Role : Active device 7

m512

[root@m512 ~]# cat /proc/mdstat
Personalities : [raid1] [raid6] [raid5] [raid4]
md126 : active raid1 sda2[0] sdb2[1]
      1048512 blocks super 1.0 [2/2] [UU]
      bitmap: 0/1 pages [0KB], 65536KB chunk

md127 : active raid6 sda1[0] sdf1[5] sde1[4] sdd1[3] sdb1[1] sdc1[2]
      7806525440 blocks super 1.2 level 6, 512k chunk, algorithm 2 [6/6] [UUUUUU]
      bitmap: 7/15 pages [28KB], 65536KB chunk

unused devices: <none>

[root@m512 ~]# hdparm -I /dev/sd?|egrep "(^/dev|Model|Serial Number)"
/dev/sda:
        Model Number:       Hitachi HDS5C3020ALA632
        Serial Number:      ML0220F31H8Y1N
/dev/sdb:
        Model Number:       Hitachi HDS5C3020ALA632
        Serial Number:      ML0220F31JMDUN
/dev/sdc:
        Model Number:       Hitachi HDS5C3020ALA632
        Serial Number:      ML0220F31LTH1D
/dev/sdd:
        Model Number:       Hitachi HDS722020ALA330
        Serial Number:      JK1101B9JZ0T5F
/dev/sde:
        Model Number:       Hitachi HDS722020ALA330
        Serial Number:      JK1101B9JZ0MMF
/dev/sdf:
        Model Number:       Hitachi HDS722020ALA330
        Serial Number:      JK11D1B8HX4N6Z

[root@m512 ~]# mdadm -E /dev/sd*|egrep "(^/dev|Role)"
mdadm: No md superblock detected on /dev/sda3.
mdadm: No md superblock detected on /dev/sdb3.
/dev/sda:
/dev/sda1:
   Device Role : Active device 0
/dev/sda2:
   Device Role : Active device 0
/dev/sdb:
/dev/sdb1:
   Device Role : Active device 1
/dev/sdb2:
   Device Role : Active device 1
/dev/sdc:
mdadm: No md superblock detected on /dev/sdc2.
mdadm: No md superblock detected on /dev/sdd2.
mdadm: No md superblock detected on /dev/sde2.
/dev/sdc1:
   Device Role : Active device 2
/dev/sdd:
/dev/sdd1:
   Device Role : Active device 3
/dev/sde:
/dev/sde1:
   Device Role : Active device 4
/dev/sdf:
/dev/sdf1:
mdadm: No md superblock detected on /dev/sdf2.
   Device Role : Active device 5