一、背景
二、环境信息
三、实施前准备
四、扩容实施
-- root用户
scp -r ~/.ssh 192.168.200.12:~/
-- gbase用户
scp -r ~/.ssh 192.168.200.12:~/
-- root用户
vi /etc/c3.conf 添加
cluster new {
192.168.200.11:redhat1
192.168.200.12
}
-- 操作系统
cexec new_data: new: 'cat /etc/redhat-release'
-- 内核
cexec new_data: new: 'uname -a'
-- 防火墙
cexec new_data: new: 'service iptables status'
cexec new_data: new: 'service ip6tables status'
cexec new_data: new: 'chkconfig | grep iptables'
cexec new_data: new: 'chkconfig | grep ip6tables'
-- selinux
cexec new_data: new: 'sestatus'
cexec new_data: new: 'grep ^SELINUX= /etc/selinux/config'
-- 内存参数
cexec new_data: new: 'ulimit -H'
cexec new_data: new: 'ulimit -S'
cexec new_data: new: 'ulimit -m'
-- vi /etc/security/limits.config 添加
-- * soft as unlimited
-- * hard as unlimited
-- * rss as unlimited
-- 透明大页
cexec new_data: new: 'cat /sys/kernel/mm/redhat_transparent_hugepage/enabled'
-- echo "never" > /sys/kernel/mm/redhat_transparent_hugepage/enabled
-- 主机名检查
cexec new_data: new: 'hostname'
-- 修改/etc/hosts
vi /etc/hosts 添加新增节点信息并同步到新增节点
-- 确认集群正常
gcadmin
gcadmin showddlevent
gcadmin showdmlevent
gcadmin showdmlstorageevent
-- 设置集群readonly
gcadmin switchmode readonly
-- scn tableid 备份
cexec 'python -c "import gcware;print gcware.getscn()"'
cexec 'python -c "import gcware;print gcware. gettableid ()"'
-- 版本信息查看
cexec "/opt/gcluster/server/bin/gclusterd -V"
cexec "gcadmin -V"
cexec data: "/opt/gnode/server/bin/gbased -V"
cexec 'gccli -ugbase -pgbase20110531 -Nse "select @@version"'
-- 数据库信息备份
sh backup_database.sh
ls -l /home/gbase/gbase_expand/201811
-- nodedatamap备份
gccli -ugbase -pgbase20110531 -vvv -e"rmt:select * from gbase.nodedatamap into outfile '/home/gbase/gbase_expand/201811/nodedatamap.dat' fields terminated by '|'"
wc -l /home/gbase/gbase_expand/201811/nodedatamap.dat
-- 备份集群配置文件
cexec "mkdir -p /home/gbase/gbase_expand/201811/gcluster"
cexec "cp -r /opt/gcluster/config/ /home/gbase/gbase_expand/201811/gcluster/"
cexec "ls /home/gbase/gbase_expand/201811/gcluster/config"
cexec data: "mkdir -p /home/gbase/gbase_expand/201811/gnode"
cexec data: "cp -r /opt/gnode/config/ /home/gbase/gbase_expand/201811/gnode/"
cexec coor: "ls /home/gbase/gbase_expand/201811/gnode/config"
-- 备份corosync配置文件
cexec "cp -r /etc/corosync /home/gbase/gbase_expand/201811/"
cexec "ls /home/gbase/gbase_expand/201811/corosync | wc -l"
-- 备份gcware配置文件
cexec "cp -r /var/lib/gcware /home/gbase/gbase_expand/201811/"
cexec 'ls /home/gbase/gbase_expand/201811/gcware | wc -l'
-- 停止集群
cexec "service gcware stop"
--找到原来的升级包所在的目录
-- 修改demo.options
cd gcinstall/
vi demo.options
installPrefix= /opt
coordinateHost =
dataHost = 134.32.48.8,134.32.48.11,134.32.48.13,134.32.48.14,134.32.48.46,134.32.48.47,134.32.48.48,134.32.48.50
existCoordinateHost =
134.32.48.208,134.32.48.209,134.32.48.210,134.32.48.211,134.32.48.212,134.32.48.213,134.32.48.214,134.32.48.215,134.32.48.216,134.32.48.217,134.32.48.218,134.32.48.219,134.32.48.220,134.32.48.221,134.32.48.222,134.32.48.223,134.32.48.224,134.32.48.225,134.32.48.226,134.32.48.227
existDataHost =134.32.48.208,134.32.48.209,134.32.48.210,134.32.48.211,134.32.48.212,134.32.48.213,134.32.48.214,134.32.48.215,134.32.48.216,134.32.48.217,134.32.48.218,134.32.48.219,134.32.48.220,134.32.48.221,134.32.48.222,134.32.48.223,134.32.48.224,134.32.48.225,134.32.48.226,134.32.48.227
loginUser= root
loginUserPwd = ' Huawei#123'
#loginUserPwdFile = loginUserPwd.json
dbaUser = gbase
dbaGroup = gbase
dbaPwd = gbase
rootPwd = ' Huawei#123'
#rootPwdFile = rootPwd.json
dbRootPwd = 'Huawei@123'
#mcastAddr = 226.94.1.39
mcastPort = 5493
-- 执行扩容
./gcinstall.py --silent=demo.options
-- 配置文件对比
diff /opt/gcluster/config/gbase_8a_gcluster.cnf /home/gbase/gbase_expand/201811/gcluster/config/gbase_8a_gcluster.cnf
diff /opt/gnode/config/gbase_8a_gbase.cnf /home/gbase/gbase_expand/201811/gnode/config/gbase_8a_gbase.cnf
cexec data: md5sum /opt/gnode/config/gbase_8a_gbase.cnf
-- 生成新的distribution(备份方式)
gcadmin distribution gcChangeInfo.xml p 1 d 1
-- 生成新的hashmap
gccli -ugbase -pgbase20110531 -vvv -e"initnodedatamap"
增删改查测试
create database db_test;
create table db_test.t1(c1 int,c2 int) distributed by ('c1');
insert into db_test.t1 values (1,1),(2,2),(3,3);
update db_test.t1 set c2=10 where c1=1;
select * from db_test.t1;
delete from db_test.t1 where c1>=3;
select * from db_test.t1;
truncate table db_test.t1;
数据加载测试
load data infile 'sftp://gbase:gbase@192.168.200.11/tmp/t1.txt' into table db_test.t1 fields terminated by ':';
select count(1) from db_test.t1;
drop table db_test.t1;
drop database db_test;
五、数据重分布
-- 重分布
-- 设置重分布并发度为0
gccli -ugbase -pgbase20110531 -vvv -e"set global gcluster_rebalancing_concurrent_count=0"
gccli -ugbase -pgbase20110531 -Ns -e"select @@gcluster_rebalancing_concurrent_count"
-- 重分布整个实例
gccli -ugbase -pgbase20110531 -vvv -e"rebalance instance"
gccli -ugbase -pgbase20110531 -Ns -e"select count(1) from gclusterdb.rebalancing_status"
-- 调整优先级
create table test.reb_tab(db_name varchar(64),table_name varchar(64),priority int) replicated;
-- 插入优先级高的表
insert into test.reb_tab values ('test','t1',1),('test','t2',2);
update gclusterdb.rebalancing_status a, test.reb_tab b set a.priority=b.priority where a.db_name=b.db_name and a.table_name=b.table_name ;
select count(1) from gclusterdb.rebalancing_status where priority<5;
-- 调整重分布并发度
gccli -ugbase -pgbase20110531 -vvv -e"set global gcluster_rebalancing_concurrent_count=1"
gccli -ugbase -pgbase20110531 -Ns -e"select @@gcluster_rebalancing_concurrent_count"
-- 暂停重分布
gccli -ugbase -pgbase20110531 -vvv -e"pause rebalance instance"
gccli -ugbase -pgbase20110531 -Ns -e"select status,count(1) from gclusterdb.rebalancing_status group by 1"
-- 继续重分布
gccli -ugbase -pgbase20110531 -vvv -e"continue rebalance instance"
gccli -ugbase -pgbase20110531 -Ns -e"select status,count(1) from gclusterdb.rebalancing_status group by 1"
-- 等待重分布完成
-- 恢复业务
六、效率分析
注:因为有一张分布极不平均的表,全部数据落在一个节点,70个字段,75亿记录,13压缩,单个分片350GB。仅这一张表重分布就用了12小时。除掉这张一表以外,8801张表实际用时27小时(24日20:25~25日23:25),达到118MB/s,重分布速度远远超出预期。
七、经验总结