CentOS 7 建置 PostgreSQL複寫與故障轉移 Repmgr
17 min readOct 16, 2020
使用:repmgr 為 postgresql 配置複寫與故障轉移
參考:https://bbs.huaweicloud.com/blogs/183475
各Node安裝PostgreSQL 12 & repmgr12
# 安裝 PostgreSQL 12// 參考:https://computingforgeeks.com/how-to-install-postgresql-12-on-centos-7/sudo yum -y install https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpmsudo yum -y install epel-release yum-utilssudo yum-config-manager --enable pgdg12sudo yum install postgresql12-server postgresql12sudo yum install repmgr12# 關閉防火牆sudo firewall-cmd --state // 檢查狀態sudo systemctl stop firewalldsudo systemctl disable firewalld
Primary Node 配置
# 資料庫初始化
/usr/pgsql-12/bin/postgresql-12-setup initdb # 配置複寫
vi /var/lib/pgsql/12/data/postgresql.conf
# 新增以下配置
listen_addresses = '*'
max_wal_senders = 10
max_replication_slots = 10
wal_level = 'replica'
wal_log_hints = on
hot_standby = on
archive_mode = on
archive_command = '/bin/true'# 重啓服務
systemctl enable postgresql-12.service
systemctl restart postgresql-12.service# 創建repmgr資料庫與用戶
su - postgres
createuser --superuser repmgr
createdb --owner=repmgr repmgr
psql -c "ALTER USER repmgr SET search_path TO repmgr, public;"# postgres 加載 repmgr 套件
vi /var/lib/pgsql/12/data/postgresql.conf
shared_preload_libraries = 'repmgr' // 修改
repmgr 服務配置(Node 1 ~ 3)
# 配置文件
vi /etc/repmgr/12/repmgr.conf# Node1 -primarynode_id=1
node_name='node1'
conninfo='host=192.168.10.61 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'# Node2 -standynode_id=2
node_name='node2'
conninfo='host=192.168.10.62 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'# Node3 -standynode_id=3
node_name='node3'
conninfo='host=192.168.10.63 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'
配置Primary Node的pg_hba.conf
vi /var/lib/pgsql/12/data/pg_hba.conf#For Access Database
host all postgres 192.168.0.0/16 md5#For Replication
local replication repmgr trusthost replication repmgr 127.0.0.1/32 trusthost replication repmgr 192.168.10.0/24 trustlocal repmgr repmgr trusthost repmgr repmgr 127.0.0.1/32 trusthost repmgr repmgr 192.168.10.0/24 trust# 重啓服務
systemctl restart postgresql-12.service
Standby Node驗證能否訪問Primary Node
su - postgres -c "psql 'host=192.168.10.61 user=repmgr dbname=repmgr connect_timeout=2'"
向repmgr註冊Primary Node
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf primary register"
Clone Standby Node
# 預演
# Node 2 & Node 3
su - postgres -c"/usr/pgsql-12/bin/repmgr -h 192.168.10.61 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone --dry-run"<<<輸出Log>>>NOTICE: destination directory "/var/lib/pgsql/12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.10.61 user=repmgr dbname=repmgr
DETAIL: current installation size is 32 MB
INFO: "repmgr" extension is installed in database "repmgr"
INFO: parameter "max_wal_senders" set to 10
NOTICE: checking for available walsenders on the source node (2 required)
INFO: sufficient walsenders available on the source node
DETAIL: 2 required, 10 available
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: required number of replication connections could be made to the source server
DETAIL: 2 replication connections required
NOTICE: standby will attach to upstream node 1
HINT: consider using the -c/--fast-checkpoint option
INFO: all prerequisites for "standby clone" are met# 正式
# Node 2 & Node 3
su - postgres -c"/usr/pgsql-12/bin/repmgr -h 192.168.10.61 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone"<<<輸出Log>>>NOTICE: destination directory "/var/lib/pgsql/12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.10.61 user=repmgr dbname=repmgr
DETAIL: current installation size is 32 MB
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/var/lib/pgsql/12/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
/usr/pgsql-12/bin/pg_basebackup -l "repmgr base backup" -D /var/lib/pgsql/12/data -h 192.168.10.61 -p 5432 -U repmgr -X stream
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /var/lib/pgsql/12/data start
HINT: after starting the server, you need to register this standby with "repmgr standby register"# Node2 & Node3 啟動PostgreSQL
systemctl enable postgresql-12.service
systemctl restart postgresql-12.service
向repmgr註冊Standby Node
# Node 2 & Node 3
su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf standby register"
檢查群集狀態
su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show"su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show --compact"
配置automatic failover服務 Node-witness
# 初始化
/usr/pgsql-12/bin/postgresql-12-setup initdb# 修改
vi /var/lib/pgsql/12/data/postgresql.conflisten_addresses = '*'
shared_preload_libraries = 'repmgr'# 修改
vi /var/lib/pgsql/12/data/pg_hba.conf
#For Access Database
host all postgres 192.168.0.0/16 md5#For Replication
local replication repmgr trusthost replication repmgr 127.0.0.1/32 trusthost replication repmgr 192.168.10.0/24 trustlocal repmgr repmgr trusthost repmgr repmgr 127.0.0.1/32 trusthost repmgr repmgr 192.168.10.0/24 trust# 重啓服務
systemctl enable postgresql-12.service
systemctl restart postgresql-12.service# 創建repmgr資料庫與用戶
su - postgres
createuser --superuser repmgr
createdb --owner=repmgr repmgr
psql -c "ALTER USER repmgr SET search_path TO repmgr, public;"# Primary連結witness節點測試
su - postgres -c "psql 'host=192.168.10.70 user=repmgr dbname=repmgr connect_timeout=2'"# 編輯
vi /etc/repmgr/12/repmgr.confnode_id=4
node_name='node4-witness'
conninfo='host=192.168.10.70 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'# 向repmgr註冊witness Node
su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf witness register -h 192.168.10.61"
所有Node編輯sudoers
vi /etc/sudoer # 新增
Defaults:postgres !requiretty postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-12.service, /usr/bin/systemctl start postgresql-12.service, /usr/bin/systemctl restart postgresql-12.service, /usr/bin/systemctl reload postgresql-12.service, /usr/bin/systemctl start repmgr12.service, /usr/bin/systemctl stop repmgr12.service
所有Node配置repmgr參數
failover='automatic'priority=100 ///////// node1=100, node2=60, node3=40, witness不需要connection_check_type=pingreconnect_attempts=6reconnect_interval=10promote_command='/usr/pgsql-12/bin/repmgr standby promote -f /etc/repmgr/12/repmgr.conf --log-to-file'follow_command='/usr/pgsql-12/bin/repmgr standby follow -f /etc/repmgr/12/repmgr.conf --log-to-file --upstream-node-id=%n'monitoring_history=yesmonitor_interval_secs=2standby_disconnect_on_failover=trueprimary_visibility_consensus=truelog_status_interval=60service_start_command = 'sudo /usr/bin/systemctl start postgresql-12.service'service_stop_command = 'sudo /usr/bin/systemctl stop postgresql-12.service'service_restart_command = 'sudo /usr/bin/systemctl restart postgresql-12.service'service_reload_command = 'sudo /usr/bin/systemctl reload postgresql-12.service'repmgrd_service_start_command = 'sudo /usr/bin/systemctl start repmgr12.service'repmgrd_service_stop_command = 'sudo /usr/bin/systemctl stop repmgr12.service'
啟動各Node的repmgr服務
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf daemon start"# 開啟常駐
sudo /usr/bin/systemctl enable repmgr12.service# 檢查 repmgr 服務
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf service status"# 這邊可能會遇到無法啟動的狀況,參考以下做法,然後再試一次
mkdir /run/repmgr
chown postgres:postgres /run/repmgr
測試failover功能
# 停用primary
systemctl stop postgresql-12# 查看服務
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf service status"
舊Primary故障恢復後,並不會自動切換為Standby,而是以Primary角色獨自運行,需要手動將舊Primary強制轉換為Standby
# 這時 postgreSQL狀態應該還是關閉的# 轉為Standby,向新的Primary註冊
su - postgres -c "/usr/pgsql-12/bin/repmgr -h 192.168.10.62 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone -F"# 啟動PostgreSQL
sudo systemctl start postgresql-12.service# 註冊
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf standby register -F"
— 完成 —