[草稿]基于Patroni搭建PostgreSQL高可用环境
[TOC]
准备工作
- 准备3台机器
主机名 | ip地址 | os版本 | pg版本 | pg端口 | etcd版本 | patroni版本 | patroni角色 |
---|---|---|---|---|---|---|---|
pg_node1 | 192.168.31.101 | CentOS 7.9 | 16.6 | 1921 | 3.5.9 | 4.0.4 | Leader |
pg_node2 | 192.168.31.102 | CentOS 7.9 | 16.6 | 1921 | 3.5.9 | 4.0.4 | standby1 |
pg_node3 | 192.168.31.103 | CentOS 7.9 | 16.6 | 1921 | 3.5.9 | 4.0.4 | standby2 |
- 在root用户下,根据实际情况修改安装过程要用到的环境参数
#.示例.定义3个node节点
cat > /opt/.pgha.config <<EOF
workdir=/data
hostname_node1=pg_node1
hostname_node2=pg_node2
hostname_node3=pg_node3
ip_node1=192.168.31.101
ip_node2=192.168.31.102
ip_node3=192.168.31.103
pg_port1=1921
pg_port2=1921
pg_port3=1921
pg_user=postgres
pg_pass=Admin_147
EOF
- 所有节点:系统初始化(可重复执行)
#.获取配置文件中的参数
source /opt/.pgha.config
#.1.配置dns并安装基础软件
sed -i 's/^nameserver.*/nameserver 223.5.5.5/g' /etc/resolv.conf
cat /etc/resolv.conf | grep "^nameserver" > /dev/null || echo "nameserver 223.5.5.5" > /etc/resolv.conf
yum install -y -q curl wget ntp ntpdate lrzsz telnet zip unzip net-tools
#.2.修改时区并同步时间
timedatectl set-timezone Asia/Shanghai
/usr/sbin/ntpdate -u ntp.aliyun.com
#.3.建议禁用firewalld否则重启后需要iptable -F清除防火墙策略
/usr/sbin/iptables -F
systemctl stop firewalld.service
systemctl disable firewalld.service
systemctl status firewalld.service
#.4.建议关闭selinux否则会限制服务
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
setenforce 0
#.5.修改机器名
if [ `ip a|grep $ip_node1|wc -l` -eq 1 ];then hostnamectl set-hostname $hostname_node1; fi
if [ `ip a|grep $ip_node2|wc -l` -eq 1 ];then hostnamectl set-hostname $hostname_node2; fi
if [ `ip a|grep $ip_node3|wc -l` -eq 1 ];then hostnamectl set-hostname $hostname_node3; fi
#.6.修改hosts
cat /etc/hosts | grep $ip_node1 || echo "$ip_node1 $hostname_node1" >> /etc/hosts
cat /etc/hosts | grep $ip_node2 || echo "$ip_node2 $hostname_node2" >> /etc/hosts
cat /etc/hosts | grep $ip_node3 || echo "$ip_node3 $hostname_node3" >> /etc/hosts
- 所有节点:安装相关依赖包,【待测】
yum install -y readline readline-devel zlib zlib-devel gettext gettext-devel openssl openssl-devel pam pam-devel \
libxml2 libxml2-devel libxslt libxslt-devel perl perl-devel tcl-devel uuid-devel gcc gcc-c++ make flex bison perl-ExtUtils*
部署过程
安装 postgresql
- 所有节点:安装 postgresql-16.6
#.安装依赖
yum install -y gcc libicu-devel zlib-devel readline-devel perl-ExtUtils-Embed openssl openssl-devel tcl tcl-devel
#.编译安装
cd /opt/
wget -c http://iso.sqlfans.cn/postgresql/postgresql-16.6.tar.gz
tar xvf postgresql-16.6.tar.gz
cd postgresql-16.6
./configure --prefix=/opt/pgsql --with-perl --with-tcl --with-openssl --without-readline
make && make install
#.创建启动用户
groupadd postgres
useradd -g postgres postgres
echo "Admin_147" | passwd postgres --stdin
#.创建数据目录
mkdir -p /data/pgsql_1921
chown -R postgres:postgres /data/pgsql_1921
chmod -R 775 /data/pgsql_1921
#.初始化及启动
su - postgres -c "/opt/pgsql/bin/initdb -E UNICODE -D /data/pgsql_1921"
su - postgres -c '/opt/pgsql/bin/pg_ctl -D /data/pgsql_1921 -l logfile start'
#.优化默认参数-先取消注释
cp /data/pgsql_1921/postgresql.conf /data/pgsql_1921/postgresql.conf.$(date +%Y%m%d%H%M%S)
sed -i "s/^#port/port/" /data/pgsql_1921/postgresql.conf
sed -i "s/^#listen_addresses/listen_addresses/" /data/pgsql_1921/postgresql.conf
sed -i "s/^#max_connections/max_connections/" /data/pgsql_1921/postgresql.conf
sed -i "s/^#shared_buffers/shared_buffers/" /data/pgsql_1921/postgresql.conf
#.优化默认参数-再修改数值,示例端口改为 1921
sed -i "s/^port.*$/port=1921/g" /data/pgsql_1921/postgresql.conf
sed -i "s/^listen_addresses.*$/listen_addresses='*'/g" /data/pgsql_1921/postgresql.conf
sed -i "s/^max_connections.*$/max_connections=3000/g" /data/pgsql_1921/postgresql.conf
sed -i "s/^shared_buffers.*$/shared_buffers=512MB/g" /data/pgsql_1921/postgresql.conf
cat /data/pgsql_1921/postgresql.conf | egrep "(^port|^listen_addresses|^max_connections|^shared_buffers)"
#.允许所有ip访问
cp /data/pgsql_1921/pg_hba.conf /data/pgsql_1921/pg_hba.conf.$(date +%Y%m%d%H%M%S)
echo "host all all 0.0.0.0/0 md5" >> /data/pgsql_1921/pg_hba.conf
#.重启服务生效
su - postgres -c '/opt/pgsql/bin/pg_ctl -D /data/pgsql_1921 -l logfile stop'
su - postgres -c '/opt/pgsql/bin/pg_ctl -D /data/pgsql_1921 -l logfile start'
#.登陆测试
/opt/pgsql/bin/psql -h 127.0.0.1 -p 1921 -U postgres -d postgres -c "SELECT * FROM pg_shadow;"
搭建 pg 主从流复制
- 参考《pg主从流复制搭建》
安装 etcd 集群
- Etcd是⼀款基于Raft算法和协议开发的分布式 key-value 数据库,Patroni监控本地的PostgreSQL状态,并将相关信息写⼊Etcd,每个Patroni都能读写Etcd上的key,从⽽获取外地PostgreSQL数据库信息。当Etcd的leader节点不可⽤时,Etcd会⼀致性的选择⼀个合适的节点作为主节点,新的Etcd主节点将获取leader key
- 所有节点:安装 etcd-3.5.9
#.安装etcd-3.5.9
curl -sL http://iso.sqlfans.cn/linux/etcd-3.5.9-1.rhel7.x86_64.rpm -o /opt/etcd-3.5.9-1.rhel7.x86_64.rpm
yum localinstall -y /opt/etcd-3.5.9-1.rhel7.x86_64.rpm
- 节点1:修改 etcd 配置
#.获取配置文件中的参数
source /opt/.pgha.config
#.修改配置
cat > /etc/etcd/etcd.conf <<EOF
#leader
ETCD_NAME="$hostname_node1"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_CLIENT_URLS="http://127.0.0.1:2379,http://$ip_node1:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://127.0.0.1:2379,http://$ip_node1:2379"
ETCD_LISTEN_PEER_URLS="http://$ip_node1:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://$ip_node1:2380"
ETCD_INITIAL_CLUSTER="$hostname_node1=http://$ip_node1:2380,$hostname_node2=http://$ip_node2:2380,$hostname_node3=http://$ip_node3:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
- 节点2:修改 etcd 配置
#.获取配置文件中的参数
source /opt/.pgha.config
#.修改配置
cat > /etc/etcd/etcd.conf <<EOF
#standby1
ETCD_NAME="$hostname_node2"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_CLIENT_URLS="http://127.0.0.1:2379,http://$ip_node2:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://127.0.0.1:2379,http://$ip_node2:2379"
ETCD_LISTEN_PEER_URLS="http://$ip_node2:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://$ip_node2:2380"
ETCD_INITIAL_CLUSTER="$hostname_node1=http://$ip_node1:2380,$hostname_node2=http://$ip_node2:2380,$hostname_node3=http://$ip_node3:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
- 节点3:修改 etcd 配置
#.获取配置文件中的参数
source /opt/.pgha.config
#.修改配置
cat > /etc/etcd/etcd.conf <<EOF
#standby2
ETCD_NAME="$hostname_node3"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_CLIENT_URLS="http://127.0.0.1:2379,http://$ip_node3:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://127.0.0.1:2379,http://$ip_node3:2379"
ETCD_LISTEN_PEER_URLS="http://$ip_node3:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://$ip_node3:2380"
ETCD_INITIAL_CLUSTER="$hostname_node1=http://$ip_node1:2380,$hostname_node2=http://$ip_node2:2380,$hostname_node3=http://$ip_node3:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
- 所有节点:启动 etcd 服务
systemctl daemon-reload
systemctl start etcd.service
systemctl status etcd.service
- 任意节点:确认etcd集群状态
[root@pg_node1 ~]# #.查看集群成员信息
[root@pg_node1 ~]# etcdctl member list --write-out=table
+------------------+---------+----------+----------------------------+--------------------------------------------------+------------+
| ID | STATUS | NAME | PEER ADDRS | CLIENT ADDRS | IS LEARNER |
+------------------+---------+----------+----------------------------+--------------------------------------------------+------------+
| 2d248eb39654ed7 | started | pg_node1 | http://192.168.31.101:2380 | http://127.0.0.1:2379,http://192.168.31.101:2379 | false |
| 1e19931c2af12589 | started | pg_node2 | http://192.168.31.102:2380 | http://127.0.0.1:2379,http://192.168.31.102:2379 | false |
| 291b0bcd9cf9a651 | started | pg_node3 | http://192.168.31.103:2380 | http://127.0.0.1:2379,http://192.168.31.103:2379 | false |
+------------------+---------+----------+----------------------------+--------------------------------------------------+------------+
[root@pg_node1 ~]# #.查看集群状态
[root@pg_node1 ~]# etcdctl --endpoints=http://$ip_node1:2379,http://$ip_node2:2379,http://$ip_node3:2379 endpoint status --write-out=table
+----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| http://192.168.31.101:2379 | 2d248eb39654ed7 | 3.5.9 | 20 kB | true | false | 4 | 10 | 10 | |
| http://192.168.31.102:2379 | 1e19931c2af12589 | 3.5.9 | 25 kB | false | false | 4 | 10 | 10 | |
| http://192.168.31.103:2379 | 291b0bcd9cf9a651 | 3.5.9 | 20 kB | false | false | 4 | 10 | 10 | |
+----------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
安装 watchdog 防止脑裂
yum install -y watchdog
sed -ri 's/#watchdog-device/watchdog-device/' /etc/watchdog.conf
mknod /dev/watchdog c 10 130
modprobe softdog
chown postgres /dev/watchdog
安装 patroni
- 所有节点:安装 patroni-4.0.4
yum install -y python3 python3-devel python3-pip gcc libpq-devel
export PGPORT=1921
export PGUSER=postgres
export PGHOME=/opt/pgsql
export PGDATA=/opt/pgsql_1921
export LD_LIBRARY_PATH=$PGHOME/lib
export PATH=$PGHOME/bin:$PATH
pip3 install --upgrade pip
pip3 install psycopg2-binary
pip3 install patroni[etcd]
pip3 install patroni
pip3 list|grep "psycopg2-binary"
#.确认版本
/usr/local/bin/patroni --version
配置 patroni
- 节点1:修改 patroni 配置
#.获取配置文件中的参数
source /opt/.pgha.config
#.修改配置
mkdir -p /etc/patroni
cat > /etc/patroni/patroni.yml << EOF
#leader
scope: pg_cluster
namespace: /service/
name: pg1
restapi:
listen: 0.0.0.0:8008
connect_address: $ip_node1:8008
etcd:
hosts: $ip_node1:2379,$ip_node2:2379,$ip_node3:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
master_start_timeout: 300
synchronous_mode: false
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
users:
admin:
password: admin
options:
- createrole
- createdb
postgresql:
listen: 0.0.0.0:1921
connect_address: $ip_node1:1921
data_dir: /opt/pgsql_1921
bin_dir: /opt/pgsql/bin
pgpass: /tmp/pgpass
authentication:
replication:
username: repuser
password: "jw8s0F4"
superuser:
username: postgres
password: "$pg_pass"
watchdog:
mode: required
device: /dev/watchdog
safety_margin: 5
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
EOF
- 节点2:修改 patroni 配置
#.获取配置文件中的参数
source /opt/.pgha.config
#.修改配置
mkdir -p /etc/patroni
cat > /etc/patroni/patroni.yml << EOF
#standby1
scope: pg_cluster
namespace: /service/
name: pg1
restapi:
listen: 0.0.0.0:8008
connect_address: $ip_node2:8008
etcd:
hosts: $ip_node1:2379,$ip_node2:2379,$ip_node3:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
master_start_timeout: 300
synchronous_mode: false
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
users:
admin:
password: admin
options:
- createrole
- createdb
postgresql:
listen: 0.0.0.0:1921
connect_address: $ip_node2:1921
data_dir: /opt/pgsql_1921
bin_dir: /opt/pgsql/bin
pgpass: /tmp/pgpass
authentication:
replication:
username: repuser
password: "jw8s0F4"
superuser:
username: postgres
password: "$pg_pass"
watchdog:
mode: required
device: /dev/watchdog
safety_margin: 5
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
EOF
- 节点3:修改 patroni 配置
#.获取配置文件中的参数
source /opt/.pgha.config
#.修改配置
mkdir -p /etc/patroni
cat > /etc/patroni/patroni.yml << EOF
#standby2
scope: pg_cluster
namespace: /service/
name: pg1
restapi:
listen: 0.0.0.0:8008
connect_address: $ip_node3:8008
etcd:
hosts: $ip_node1:2379,$ip_node2:2379,$ip_node3:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
master_start_timeout: 300
synchronous_mode: false
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
users:
admin:
password: admin
options:
- createrole
- createdb
postgresql:
listen: 0.0.0.0:1921
connect_address: $ip_node3:1921
data_dir: /opt/pgsql_1921
bin_dir: /opt/pgsql/bin
pgpass: /tmp/pgpass
authentication:
replication:
username: repuser
password: "jw8s0F4"
superuser:
username: postgres
password: "$pg_pass"
watchdog:
mode: required
device: /dev/watchdog
safety_margin: 5
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
EOF
- 所有节点:修改 patroni 服务配置
cat > /etc/systemd/system/patroni.service << EOF
[Unit]
Description=patroni - a high-availability PostgreSQL
Documentation=https://patroni.readthedocs.io/en/latest/index.html
After=syslog.target network.target etcd.target
Wants=network-online.target
[Service]
Type=simple
User=postgres
Group=postgres
PermissionsStartOnly=true
Environment="LD_LIBRARY_PATH=/opt/pgsql/lib"
ExecStart=/usr/local/bin/patroni /etc/patroni/patroni.yml
ExecReload=/bin/kill -HUP $MAINPID
LimitNOFILE=65536
KillMode=process
KillSignal=SIGINT
Restart=on-abnormal
RestartSec=30s
TimeoutSec=0
[Install]
WantedBy=multi-user.target
EOF
- 所有节点:启动 patroni 服务,启动顺序必须是:主库 > 从库1 > 从库2
#.启动服务
systemctl daemon-reload
systemctl start patroni.service
systemctl status patroni.service
- 任意节点:查看patroni集群状态
[root@pg_node2 ~]# patronictl -c /etc/patroni/patroni.yml list
+----------+---------------------+--------------+-----------+----+-----------+
| Member | Host | Role | State | TL | Lag in MB |
+----------+---------------------+--------------+-----------+----+-----------+
| pg_node1 | 192.168.31.101:1921 | Leader | running | 7 | |
| pg_node2 | 192.168.31.102:1921 | Sync Standby | streaming | 7 | 0 |
| pg_node3 | 192.168.31.103:1921 | Replica | streaming | 7 | 0 |
+--------+---------------------+--------------+-----------+----+-----------+
- 主节点上查看复制状态
postgres=# select pid,usesysid,usename,application_name,client_addr,backend_start,state,sync_priority,sync_state from pg_stat_replication;
pid | usesysid | usename | application_name | client_addr | backend_start | state | sync_priority | sync_state
-------+----------+---------+------------------+----------------+-------------------------------+-----------+---------------+-----------
27254 | 16385 | repuser | pg16 | 192.168.31.101 | 2024-12-13 19:33:29.367051+08 | streaming | 1 | sync
27309 | 16385 | repuser | pg16 | 192.168.31.102 | 2024-12-13 19:38:30.09852+08 | streaming | 0 | async
测试
手动切换测试
- 在守护者集群中,当领导节点因意外原因不可用时,会自动执行故障转移。如果要手动测试守护者集群故障转移,可以使用以下命令启动到副本节点的故障转移
export PGPORT=1921
export PGUSER=postgres
export PGHOME=/opt/pgsql
export PGDATA=/opt/pgsql_1921
export LD_LIBRARY_PATH=$PGHOME/lib
export PATH=$PGHOME/bin:$PATH
patronictl -c /etc/patroni/patroni.yml list #.查询当前集群状态
patronictl -c /etc/patroni/patroni.yml failover #.手动切换后选举新的领导节点
patronictl -c /etc/patroni/patroni.yml list #.再次查询当前集群状态
patronictl -c /etc/patroni/patroni.yml history #.查询切换记录