1. 配置前的准备工作
1.1 存储主机加入信任存储池
#在其余主机添加除自己外的主机也行 [root@mystorage1 ~]# gluster peer probe mystorage2 peer probe: success. [root@mystorage1 ~]# gluster peer probe mystorage3 peer probe: success. [root@mystorage1 ~]# gluster peer probe mystorage4 peer probe: success.
1.2 查看状态
[root@mystorage1 ~]# gluster peer status Number of Peers: 3 Hostname: mystorage2 Uuid: 290fef84-b90a-4ab8-9662-07928cacd1df State: Peer in Cluster (Connected) Hostname: mystorage3 Uuid: a16771b7-415d-4b5f-9bd2-26c4822432c0 State: Peer in Cluster (Connected) Hostname: mystorage4 Uuid: 6ff63c5b-bd0a-4aea-bc31-c71cdde8ef80 State: Peer in Cluster (Connected)
1.3 磁盘分区(以mystorage1为例)
安装xfs支持包:yum -y install xfsprogs fdisk –l 查看磁盘块设备 ,查看类似的信息 [root@mystorage1 ~]# fdisk -l Disk /dev/sda: 42.9 GB, 42949672960 bytes 255 heads, 63 sectors/track, 5221 cylinders Units = cylinders of 16065 * 512 = 8225280 bytes Sector size (logical/physical): 512 bytes / 512 bytes I/O size (minimum/optimal): 512 bytes / 512 bytes Disk identifier: 0x0006a2fd Device Boot Start End Blocks Id System /dev/sda1 * 1 39 307200 83 Linux Partition 1 does not end on cylinder boundary. /dev/sda2 39 4969 39603200 83 Linux /dev/sda3 4969 5222 2031616 82 Linux swap / Solaris Disk /dev/sdb: 53.7 GB, 53687091200 bytes 255 heads, 63 sectors/track, 6527 cylinders Units = cylinders of 16065 * 512 = 8225280 bytes Sector size (logical/physical): 512 bytes / 512 bytes I/O size (minimum/optimal): 512 bytes / 512 bytes Disk identifier: 0x00000000
注:mkfs支持的分区类型有限,且最大只支持16TB;当分区大于4T时,fdisk不适用,推荐用parted分区
#fdisk分区操作部署: #fdisk /dev/vdb => n => p => 1 => 回车 => w(w为保存、m为帮助) [root@mystorage1 ~]# fdisk /dev/sdb Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel Building a new DOS disklabel with disk identifier 0x5a64e65e. Changes will remain in memory only, until you decide to write them. After that, of course, the previous content won't be recoverable. Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite) WARNING: DOS-compatible mode is deprecated. It's strongly recommended to switch off the mode (command 'c') and change display units to sectors (command 'u'). Command (m for help): m Command action a toggle a bootable flag b edit bsd disklabel c toggle the dos compatibility flag d delete a partition l list known partition types m print this menu n add a new partition o create a new empty DOS partition table p print the partition table q quit without saving changes s create a new empty Sun disklabel t change a partition's system id u change display/entry units v verify the partition table w write table to disk and exit x extra functionality (experts only) Command (m for help): n Command action e extended p primary partition (1-4) p Partition number (1-4): 1 First cylinder (1-2610, default 1): Using default value 1 Last cylinder, +cylinders or +size{K,M,G} (1-2610, default 2610): Using default value 2610
注:生产中可以不分区直接格式化,磁盘如果只有一个分区与不分区效果差不多
1.4 磁盘格式化
#对磁盘进行格式化,在每台机器都执行 [root@mystorage1 ~]# mkfs.xfs -f /dev/sdb meta-data=/dev/sdb isize=256 agcount=4, agsize=3276800 blks = sectsz=512 attr=2, projid32bit=0 data = bsize=4096 blocks=13107200, imaxpct=25 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0 log =internal log bsize=4096 blocks=6400, version=2 = sectsz=512 sunit=0 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 #在四台机器上执行,建立挂载块设备的目录 [root@mystorage1 ~]# mkdir -p /storage/brick1 #挂载盘到文件目录 [root@mystorage1 ~]# mount /dev/sdb /storage/brick1 [root@mystorage1 ~]# df -h Filesystem Size Used Avail Use% Mounted on /dev/sda2 38G 15G 21G 42% / tmpfs 935M 0 935M 0% /dev/shm /dev/sda1 283M 85M 183M 32% /boot /dev/sdb 50G 33M 50G 1% /storage/brick1 #开机挂载可以编辑/etc/fstab文件 在 /etc/fstab 文件中加入 两行 /dev/sdb /storage/brick1 xfs defaults 0 0 方法二: 执行命令 echo "/dev/sdb /storage/brick1 xfs defaults 0 0" >> /etc/fstab mount -a
公司如果有raid卡,可以在底层做一层raid5提高IO性能,再在上头做GlusterFS。当然没有raid卡直接做GlusterFS也没事
2. 创建volume及其他操作
2.1 卷类型简介
Distributed:分布式卷,文件通过hash算法随机的分布到由bricks组成的卷上。 Replicated:复制式卷,类似raid1,replica数必须等于volume中brick所包含的存储服务器数,可用性高。 Striped:条带式卷,类似与raid0,stripe数必须等于volume中brick所包含的存储服务器数,文件被分成数据块,以Round Robin的方式存储在bricks中,并发粒度是数据块,大文件性能好。 Distributed Striped:分布式的条带卷,volume中brick所包含的存储服务器数必须是stripe的倍数(>=2倍),兼顾分布式和条带式的功能。 Distributed Replicated:分布式的复制卷,volume中brick所包含的存储服务器数必须是 replica 的倍数(>=2倍),兼顾分布式和复制式的功能。
2.2 创建分布式卷
#gv1为卷名,添加mystorage2中的卷,命令在任一一台机器执行都能同步生效 [root@mystorage1 ~]# gluster volume create gv1 mystorage1:/storage/brick1 mystorage2:/storage/brick1 force volume create: gv1: success: please start the volume to access data
2.3 启动创建的卷
[root@mystorage1 ~]# gluster volume start gv1 volume start: gv1: success #在mystorage4中查看创建的卷信息 [root@mystorage4 ~]# gluster volume info Volume Name: gv1 Type: Distribute #卷类型 Volume ID: d721ad47-3bfb-45fe-bc47-67ce11d19af9 Status: Started Number of Bricks: 2 #块设备数量 Transport-type: tcp #传输方式 Bricks: Brick1: mystorage1:/storage/brick1 Brick2: mystorage2:/storage/brick1 Options Reconfigured: performance.readdir-ahead: on
2.4 挂载卷到目录
[root@mystorage1 ~]# mount -t glusterfs 127.0.0.1:/gv1 /mnt #查看挂载情况,gv1卷容量为100G [root@mystorage1 ~]# df -h Filesystem Size Used Avail Use% Mounted on /dev/sda2 38G 15G 21G 42% / tmpfs 935M 0 935M 0% /dev/shm /dev/sda1 283M 85M 183M 32% /boot /dev/sdb 50G 33M 50G 1% /storage/brick1 127.0.0.1:/gv1 100G 65M 100G 1% /mnt #在storage2上也执行挂载命令,查看挂载情况 [root@mystorage2 ~]# df -h Filesystem Size Used Avail Use% Mounted on tmpfs 931M 0 931M 0% /dev/shm /dev/sda1 477M 40M 412M 9% /boot /dev/sdb 50G 33M 50G 1% /storage/brick1 127.0.0.1:/gv1 100G 65M 100G 1% /mnt #创建文件进行测试 [root@mystorage1 ~]# cd /mnt [root@mystorage1 mnt]# touch aa bb cc dd #在storage1的/mnt目录创建文件,storage2上挂载的目录也可看到 [root@mystorage2 ~]# ll /mnt total 0 -rw-r--r-- 1 root root 0 Mar 20 14:05 aa -rw-r--r-- 1 root root 0 Mar 20 14:05 bb -rw-r--r-- 1 root root 0 Mar #在storage4上挂载也能看到 [root@mystorage4 ~]# mount -t glusterfs 127.0.0.1:/gv1 /mnt [root@mystorage4 ~]# ll /mnt total 0 -rw-r--r-- 1 root root 0 Mar 20 14:05 aa -rw-r--r-- 1 root root 0 Mar 20 14:05 bb -rw-r--r-- 1 root root 0 Mar 20 14:05 cc -rw-r--r-- 1 root root 0 Mar 20 14:05 dd
创建分布式卷后,访问任一一台服务器都能看到创建的文件
2.5 用NFS方式挂载
[root@mystorage4 ~]# umount /mnt [root@mystorage4 ~]# mount -o mountproto=tcp -t nfs mystorage1:/gv1 /mnt/ [root@mystorage4 ~]# df -TH Filesystem Type Size Used Avail Use% Mounted on ext4 19G 2.6G 15G 15% / tmpfs tmpfs 977M 0 977M 0% /dev/shm /dev/sda1 ext4 500M 42M 432M 9% /boot /dev/sdb xfs 54G 34M 54G 1% /storage/brick1 mystorage1:/gv1 nfs 108G 68M 108G 1% /mnt
2.6 创建分布式复制卷
#复制几份replica后的数字就写几 [root@mystorage1 mnt]# gluster volume create gv2 replica 2 mystorage3:/storage/brick1 mystorage4:/storage/brick1 force volume create: gv2: success: please start the volume to access data #启动gv2卷 [root@mystorage1 mnt]# gluster volume start gv2 volume start: gv2: success #挂载gv2卷,可以看到复制卷gv2的容器为50G,相当于raid1,文件每次存2份 [root@mystorage1 mnt]# mount -t glusterfs 127.0.0.1:/gv2 /opt [root@mystorage1 mnt]# df -h Filesystem Size Used Avail Use% Mounted on /dev/sda2 38G 15G 21G 42% / tmpfs 935M 0 935M 0% /dev/shm /dev/sda1 283M 85M 183M 32% /boot /dev/sdb 50G 33M 50G 1% /storage/brick1 127.0.0.1:/gv1 100G 65M 100G 1% /mnt 127.0.0.1:/gv2 50G 33M 50G 1% /opt [root@mystorage1 mnt]# gluster volume info gv2 Volume Name: gv2 Type: Replicate #复制卷 Volume ID: 228f63c4-0219-4c39-8e87-f3ae237ff6d9 Status: Started Number of Bricks: 1 x 2 = 2 #2块盘整合成一个卷 Transport-type: tcp Bricks: Brick1: mystorage3:/storage/brick1 Brick2: mystorage4:/storage/brick1 Options Reconfigured: performance.readdir-ahead: on
2.7 测试效果
#因为是在3,4卷上做的复制卷gv2,所以1,2卷里没有数据,3,4卷中有数据,存2份 [root@mystorage1 mnt]# cd /opt [root@mystorage1 opt]# touch haha hehe [root@mystorage1 opt]# ls /storage/brick1/ aa [root@mystorage2 ~]# ls /storage/brick1/ bb cc dd [root@mystorage3 ~]# ls /storage/brick1/ haha hehe [root@mystorage4 ~]# ls /storage/brick1/ haha hehe
从结果也可以看出,分布式卷是没有冗余的,复制卷可靠性更高