CentOS 7 建置 PostgreSQL複寫與故障轉移 Repmgr

hsuan-ming Yang
17 min readOct 16, 2020

使用:repmgr 為 postgresql 配置複寫與故障轉移

參考:https://bbs.huaweicloud.com/blogs/183475

各Node安裝PostgreSQL 12 & repmgr12

# 安裝 PostgreSQL 12// 參考:https://computingforgeeks.com/how-to-install-postgresql-12-on-centos-7/sudo yum -y install https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpmsudo yum -y install epel-release yum-utilssudo yum-config-manager --enable pgdg12sudo yum install postgresql12-server postgresql12sudo yum install repmgr12# 關閉防火牆sudo firewall-cmd --state  // 檢查狀態sudo systemctl stop firewalldsudo systemctl disable firewalld

Primary Node 配置

# 資料庫初始化
/usr/pgsql-12/bin/postgresql-12-setup initdb
# 配置複寫
vi /var/lib/pgsql/12/data/postgresql.conf
# 新增以下配置
listen_addresses = '*'
max_wal_senders = 10
max_replication_slots = 10
wal_level = 'replica'
wal_log_hints = on
hot_standby = on
archive_mode = on
archive_command = '/bin/true'
# 重啓服務
systemctl enable postgresql-12.service
systemctl restart postgresql-12.service
# 創建repmgr資料庫與用戶
su - postgres
createuser --superuser repmgr
createdb --owner=repmgr repmgr
psql -c "ALTER USER repmgr SET search_path TO repmgr, public;"
# postgres 加載 repmgr 套件
vi /var/lib/pgsql/12/data/postgresql.conf
shared_preload_libraries = 'repmgr' // 修改

repmgr 服務配置(Node 1 ~ 3)

# 配置文件
vi /etc/repmgr/12/repmgr.conf
# Node1 -primarynode_id=1
node_name='node1'
conninfo='host=192.168.10.61 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'
# Node2 -standynode_id=2
node_name='node2'
conninfo='host=192.168.10.62 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'
# Node3 -standynode_id=3
node_name='node3'
conninfo='host=192.168.10.63 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'

配置Primary Node的pg_hba.conf

vi /var/lib/pgsql/12/data/pg_hba.conf#For Access Database
host all postgres 192.168.0.0/16 md5
#For Replication
local replication repmgr trust
host replication repmgr 127.0.0.1/32 trusthost replication repmgr 192.168.10.0/24 trustlocal repmgr repmgr trusthost repmgr repmgr 127.0.0.1/32 trusthost repmgr repmgr 192.168.10.0/24 trust# 重啓服務
systemctl restart postgresql-12.service

Standby Node驗證能否訪問Primary Node

su - postgres -c "psql 'host=192.168.10.61 user=repmgr dbname=repmgr connect_timeout=2'"

向repmgr註冊Primary Node

su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf primary register"

Clone Standby Node

# 預演
# Node 2 & Node 3
su - postgres -c"/usr/pgsql-12/bin/repmgr -h 192.168.10.61 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone --dry-run"
<<<輸出Log>>>NOTICE: destination directory "/var/lib/pgsql/12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.10.61 user=repmgr dbname=repmgr
DETAIL: current installation size is 32 MB
INFO: "repmgr" extension is installed in database "repmgr"
INFO: parameter "max_wal_senders" set to 10
NOTICE: checking for available walsenders on the source node (2 required)
INFO: sufficient walsenders available on the source node
DETAIL: 2 required, 10 available
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: required number of replication connections could be made to the source server
DETAIL: 2 replication connections required
NOTICE: standby will attach to upstream node 1
HINT: consider using the -c/--fast-checkpoint option
INFO: all prerequisites for "standby clone" are met
# 正式
# Node 2 & Node 3
su - postgres -c"/usr/pgsql-12/bin/repmgr -h 192.168.10.61 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone"
<<<輸出Log>>>NOTICE: destination directory "/var/lib/pgsql/12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=192.168.10.61 user=repmgr dbname=repmgr
DETAIL: current installation size is 32 MB
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/var/lib/pgsql/12/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
/usr/pgsql-12/bin/pg_basebackup -l "repmgr base backup" -D /var/lib/pgsql/12/data -h 192.168.10.61 -p 5432 -U repmgr -X stream
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /var/lib/pgsql/12/data start
HINT: after starting the server, you need to register this standby with "repmgr standby register"
# Node2 & Node3 啟動PostgreSQL
systemctl enable postgresql-12.service
systemctl restart postgresql-12.service

向repmgr註冊Standby Node

# Node 2 & Node 3
su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf standby register"

檢查群集狀態

su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show"su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf cluster show --compact"

配置automatic failover服務 Node-witness

# 初始化
/usr/pgsql-12/bin/postgresql-12-setup initdb
# 修改
vi /var/lib/pgsql/12/data/postgresql.conf
listen_addresses = '*'
shared_preload_libraries = 'repmgr'
# 修改
vi /var/lib/pgsql/12/data/pg_hba.conf
#For Access Database
host all postgres 192.168.0.0/16 md5
#For Replication
local replication repmgr trust
host replication repmgr 127.0.0.1/32 trusthost replication repmgr 192.168.10.0/24 trustlocal repmgr repmgr trusthost repmgr repmgr 127.0.0.1/32 trusthost repmgr repmgr 192.168.10.0/24 trust# 重啓服務
systemctl enable postgresql-12.service
systemctl restart postgresql-12.service
# 創建repmgr資料庫與用戶
su - postgres
createuser --superuser repmgr
createdb --owner=repmgr repmgr
psql -c "ALTER USER repmgr SET search_path TO repmgr, public;"
# Primary連結witness節點測試
su - postgres -c "psql 'host=192.168.10.70 user=repmgr dbname=repmgr connect_timeout=2'"
# 編輯
vi /etc/repmgr/12/repmgr.conf
node_id=4
node_name='node4-witness'
conninfo='host=192.168.10.70 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/var/lib/pgsql/12/data'
# 向repmgr註冊witness Node
su - postgres -c"/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf witness register -h 192.168.10.61"

所有Node編輯sudoers

vi /etc/sudoer # 新增
Defaults:postgres !requiretty postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-12.service, /usr/bin/systemctl start postgresql-12.service, /usr/bin/systemctl restart postgresql-12.service, /usr/bin/systemctl reload postgresql-12.service, /usr/bin/systemctl start repmgr12.service, /usr/bin/systemctl stop repmgr12.service

所有Node配置repmgr參數

failover='automatic'priority=100 ///////// node1=100, node2=60, node3=40, witness不需要connection_check_type=pingreconnect_attempts=6reconnect_interval=10promote_command='/usr/pgsql-12/bin/repmgr standby promote -f /etc/repmgr/12/repmgr.conf --log-to-file'follow_command='/usr/pgsql-12/bin/repmgr standby follow -f /etc/repmgr/12/repmgr.conf --log-to-file --upstream-node-id=%n'monitoring_history=yesmonitor_interval_secs=2standby_disconnect_on_failover=trueprimary_visibility_consensus=truelog_status_interval=60service_start_command = 'sudo /usr/bin/systemctl start postgresql-12.service'service_stop_command = 'sudo /usr/bin/systemctl stop postgresql-12.service'service_restart_command = 'sudo /usr/bin/systemctl restart postgresql-12.service'service_reload_command = 'sudo /usr/bin/systemctl reload postgresql-12.service'repmgrd_service_start_command = 'sudo /usr/bin/systemctl start repmgr12.service'repmgrd_service_stop_command = 'sudo /usr/bin/systemctl stop repmgr12.service'

啟動各Node的repmgr服務

su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf daemon start"# 開啟常駐
sudo /usr/bin/systemctl enable repmgr12.service
# 檢查 repmgr 服務
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf service status"
# 這邊可能會遇到無法啟動的狀況,參考以下做法,然後再試一次
mkdir /run/repmgr
chown postgres:postgres /run/repmgr
repmgrd 狀態

測試failover功能

# 停用primary 
systemctl stop postgresql-12
# 查看服務
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf service status"
過一會primary轉移到node2

舊Primary故障恢復後,並不會自動切換為Standby,而是以Primary角色獨自運行,需要手動將舊Primary強制轉換為Standby

# 這時 postgreSQL狀態應該還是關閉的# 轉為Standby,向新的Primary註冊
su - postgres -c "/usr/pgsql-12/bin/repmgr -h 192.168.10.62 -U repmgr -d repmgr -f /etc/repmgr/12/repmgr.conf standby clone -F"
# 啟動PostgreSQL
sudo systemctl start postgresql-12.service
# 註冊
su - postgres -c "/usr/pgsql-12/bin/repmgr -f /etc/repmgr/12/repmgr.conf standby register -F"

— 完成 —

--

--