基于公司现有最常用业务OLTP数据库MySQL, 8.4 做多场景数据恢复
基础测试环境:
PVE虚拟机
RockyLinux 9.4
MySQL 8.4
操作系统安装
yum install -y vim wget tar
关闭selinux
查看状态
getenforce
Enforcing:强制模式,代表SELinux运行中,且已经正确开始限制domain/type
Permissive:宽容模式,代表SELinux运行中,不过仅会有警告信息并不会实际限制domain/type的读写(这种模式可以用来作为SELinux的debuug之用)
Disabled:关闭模式,SELinux并没有实际运行
临时关闭
setenforce 0
永久关闭
vim /etc/selinux/config
SELINUX=disabled
reboot
关闭swap
swapon --show
临时关闭
swapoff /dev/dm-1
永久关闭
vim /etc/fstab
注释掉
UUID=xxxx-xxxx none swap sw 0 0 或者 /dev/sdXN none swap sw 0 0
更新 grub
grub2-mkconfig -o /boot/grub2/grub.cfg
重启系统
reboot
关闭防火墙
systemctl status firewalld
systemctl stop firewalld
systemctl disable firewalld
修改内核参数
vim /etc/sysctl.conf
fs.aio-max-nr= 1048576
fs.file-max= 6815744
kernel.shmall= 2097152
kernel.shmmax= 4294967295
kernel.shmmni= 4096
kernel.sem= 250 32000 100 128
net.ipv4.ip_local_port_range= 9000 65500
net.core.rmem_default= 262144
net.core.rmem_max= 4194304
net.core.wmem_default= 262144
net.core.wmem_max= 1048576
vm.swappiness = 0
生效
/sbin/sysctl -p
/sbin/sysctl -a
修改资源使用参数
vim /etc/security/limits.conf
# *表示所有用户,可只设置root和kingbase用户
* soft nofile 65536
# 注意:设置nofile的hard limit不能大于/proc/sys/fs/nr_open,否则注销后将无法正常登陆
* hard nofile 65535
* soft nproc 65536
* hard nproc 65535
# unlimited表示无限制
* soft core unlimited
* hard core unlimited
RemoveIPC参数
systemctl daemon-reload
systemctl restart systemd-logind.service
4.时区设置
export TZ=CST-8
yum install ntpdate -y
ntpdate cn.ntp.org.cn
5.关闭透明大页
echo never > /sys/kernel/mm/transparent_hugepage/enabled
数据盘挂载
lsblk
fdisk /dev/sdb
n
p
#1
t
#1
8e
w
fdisk -l
pvcreate /dev/sdb1
pvs
vgcreate vgmysql /dev/sdb1
vgs
lvcreate -L 99.9G -n lvmysql vgmysql
lvs
mkfs.ext4 /dev/vgmysql/lvmysql
mkdir /mysqldata
mount /dev/mapper/vgmysql-lvmysql /mysqldata
vim /etc/fstab
/dev/mapper/vgmysql-lvmysql /mysqldata ext4 defaults 0 0
重启操作系统
reboot
数据库安装
wget https://dev.mysql.com/get/mysql84-community-release-el9-1.noarch.rpm
yum localinstall -y mysql84-community-release-el9-1.noarch.rpm
yum repolist enabled | grep mysql.*-community
yum repolist all | grep mysql
dnf config-manager --enable mysql-8.4-lts-community
yum install -y mysql-community-server
mkdir -p /mysqldata/{data,logs}
touch /mysqldata/logs/slow.log
touch /mysqldata/logs/mysqld.log
touch /mysqldata/logs/error.log
chmod 640 /mysqldata/logs/error.log
chmod 640 /mysqldata/logs/slow.log
chown -R mysql:mysql /mysqldata
chmod 750 -R /mysqldata/
编译配置文件
# chmod 644 /etc/my.cnf
vim /etc/my.cnf
datadir=/mysqldata/data
port=3306
socket=/var/lib/mysql/mysql.sock
pid-file=/var/run/mysqld/mysqld.pid
server_id=1
autocommit=1
log_error=/mysqldata/logs/error.log
lower_case_table_names=1
slow_query_log=1
slow_query_log_file=/mysqldata/logs/slow.log
long_query_time=1
innodb_buffer_pool_size=6G
max_connections=512
wait_timeout=1800
performance_schema=1
local_infile=1
default-time_zone = '+8:00'
# event_scheduler=ON
# sql_mode=''
启动数据库
systemctl start mysqld
#获取临时密码
cat /mysqldata/logs/error.log|grep pass|awk '{print $NF}'
#更新root密码
ALTER USER 'root'@'localhost' IDENTIFIED BY 'Lunz@2017';
# 允许root用户远程连接
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION;
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'Lunz@2017' WITH GRANT OPTION;
use mysql;
update user set Host = '%' where User='root';
FLUSH PRIVILEGES;
#创建数据库
CREATE DATABASE whcenter;
#创建用户
CREATE USER 'user_wh'@'%' IDENTIFIED BY 'Lunz@2017';
CREATE USER 'user_backup'@'%' IDENTIFIED BY 'Lunz@2017';
#分配权限
GRANT ALL ON whcenter.* TO 'user_wh'@'%';
GRANT SELECT,PROCESS,LOCK TABLES ON *.* TO 'user_backup'@'%';
FLUSH PRIVILEGES;
#迁移测试数据至MySQL
本步骤使用navicat
#查看数据目录磁盘占用空间大小
SELECT CONCAT(TABLE_SCHEMA, '.', TABLE_NAME) AS `Table`,
TABLE_ROWS AS `Rows`,
ROUND(((data_length + index_length) / 1024 / 1024), 2) AS `Size (MB)`
FROM information_schema.TABLES
WHERE TABLE_SCHEMA='whcenter';
+---------------------------------+-------+-----------+
| Table | Rows | Size (MB) |
+---------------------------------+-------+-----------+
| whcenter.basic_datadictionary | 68 | 0.05 |
| whcenter.basic_district | 3592 | 0.81 |
| whcenter.tb_instockdetail | 8413 | 4.89 |
| whcenter.tb_instockdetailextra | 4824 | 2.17 |
| whcenter.tb_instockinfo | 6599 | 5.38 |
| whcenter.tb_outstockdetail | 32628 | 18.13 |
| whcenter.tb_outstockdetailextra | 12287 | 5.92 |
| whcenter.tb_outstockinfo | 23996 | 27.70 |
| whcenter.tb_stockinfo | 3829 | 1.91 |
| whcenter.tb_supplier | 785 | 0.17 |
| whcenter.tb_warehouse | 7775 | 3.70 |
+---------------------------------+-------+-----------+
df -hT
Filesystem Type Size Used Avail Use% Mounted on
devtmpfs devtmpfs 4.0M 0 4.0M 0% /dev
tmpfs tmpfs 3.8G 0 3.8G 0% /dev/shm
tmpfs tmpfs 1.5G 8.5M 1.5G 1% /run
/dev/mapper/rl-root xfs 44G 1.9G 43G 5% /
/dev/sda1 xfs 960M 225M 736M 24% /boot
/dev/mapper/vgmysql-lvmysql ext4 98G 359M 93G 1% /mysqldata
tmpfs tmpfs 759M 0 759M 0% /run/user/0
du -h --max-depth=1 *
116K data/sys
820K data/#innodb_temp
1.7M data/performance_schema
101M data/#innodb_redo
36K data/mysql
132M data/whcenter
359M data
12K logs
16K lost+found
#部署备份脚本(这里重点强调表级备份,方便按表还原,可大大缩短还原时间)
#!/bin/bash
# mysqlbackup
# author: wangcw
# Generated: 2022-08-19 17:30:46
# 1:00 am every full backup crontab
# 0 01 * * * /mysqldata/mysqlbak/mysqlbak_all.sh
#
# 备份数据库账号需有SELECT,PROCESS,LOCK TABLES权限
# GRANT SELECT,PROCESS,LOCK TABLES ON *.* TO '$user_backup'@'%';
# FLUSH PRIVILEGES;
# 安装必要工具
# mysql/mysqldump/jq
# dnf -y install mysql #注意mysql客户端版本
# dnf -y install holland-mysqldump.noarch
# dnf -y install jq
#
# 备份目标可选
# 1,rsync到另一台linux;
# 2,samba挂载到备份机
# 3,上传至云服务
#备份服务器配置
db_host="172.16.1.10"
db_user="user_backup"
db_pass="Lunz@2017"
db_port="3306"
db_except="information_schema|performance_schema|sys|mysql"
bak_dir="/mysqldata/mysqlbak"
bak_date="$(date +%Y%m%d)"
bak_day=7
bak_log=${bak_dir}/mysqlbak.log
#文件夹创建
if [ ! -d "$bak_dir" ]; then
mkdir -p $bak_dir
fi
if [ ! -f "$bak_log" ]; then
touch $bak_log
fi
fun_bak() {
mysql -h$db_host -P$db_port -u$db_user -p$db_pass -e "SHOW DATABASES;" 2>/dev/null -N | egrep -v $db_except > dbname
while read db
do
[ -d ${bak_dir}/$db ] || mkdir -p ${bak_dir}/$db
file_db_name=${db}-${bak_date}
dump_db_file=${bak_dir}/$db/${file_db_name}
echo $(date +'%Y-%m-%d %T')" ==> Start Dumping database: ${db}..." >>${bak_log}
mysqldump -h$db_host -P$db_port -u$db_user -p$db_pass -B $db > ${dump_db_file}.sql 2>>${bak_log} 2>&1
echo $(date +'%Y-%m-%d %T')" ==> Start zip file: ${dump_db_file}.sql..." >>${bak_log}
cd ${bak_dir}/$db
tar zcf "$file_db_name".tar.gz "$file_db_name".sql --transform='s/^.*\// /' --remove & > /dev/null
mysql -h$db_host -P$db_port -u$db_user -p$db_pass -N -e "show tables from $db" 2>/dev/null > tbname
while read tb
do
file_tb_name=${db}-${tb}-${bak_date}
dump_tb_file=${bak_dir}/$db/${file_tb_name}
echo $(date +'%Y-%m-%d %T')" ==> Start Dumping table: ${db}.${tb}..." >> ${bak_log}
mysqldump -h$db_host -P$db_port -u$db_user -p$db_pass $db $tb > ${dump_tb_file}.sql 2>>${bak_log} 2>&1
echo $(date +'%Y-%m-%d %T')" ==> Start zip file: ${file_tb_name}.sql..." >>${bak_log}
cd ${bak_dir}/$db/
tar zcf "$file_tb_name".tar.gz "$file_tb_name".sql --transform='s/^.*\// /' --remove & > /dev/null
echo $(date +'%Y-%m-%d %T')" ==> Success backup table: ${db}.${tb}..." >> ${bak_log}
done < tbname
echo $(date +'%Y-%m-%d %T')" ==> Success backup database: ${db} with file name: ${file_db_name}.tar.gz!" >> ${bak_log}
done < dbname
rm -rf tbname ../dbname
}
fun_rm() {
echo $(date +'%Y-%m-%d %T')" ==> Start delete backup file." >>${bak_log}
for db_dir in ${bak_dir}/*/ ; do
db_name=$(basename $db_dir)
find $db_dir -type f -name "*.tar.gz" -mtime +${bak_day} | while read file; do
echo "Deleting ${file} ..."
rm -f "$file"
done
done
echo $(date +'%Y-%m-%d %T')" ==> Finished delete backup file." >>${bak_log}
}
cd $bak_dir
fun_bak
fun_rm
数据库服务问题排查要点
- 1.日志查看
- 2.时间观念
- 3.确认备份文件是否存在
- 4.初步预估恢复时间
模拟场景
1 数据库表空间文件丢失
删除 数据库文件
mv /mysqldata/data/whcenter/basic_district.ibd /mysqldata/mysqltest/
查看数据库服务正常
[root@mysql84 ~]# systemctl status mysqld
● mysqld.service - MySQL Server
Loaded: loaded (/usr/lib/systemd/system/mysqld.service; enabled; preset>
Active: active (running) since Thu 2024-05-30 15:27:11 CST; 20h ago
Docs: man:mysqld(8)
http://dev.mysql.com/doc/refman/en/using-systemd.html
Main PID: 864 (mysqld)
Status: "Server is operational"
Tasks: 42 (limit: 48308)
Memory: 1.2G
CPU: 7min 25.971s
CGroup: /system.slice/mysqld.service
└─864 /usr/sbin/mysqld
May 30 15:27:10 mysql84 systemd[1]: Starting MySQL Server…
May 30 15:27:11 mysql84 systemd[1]: Started MySQL Server.
因表数据太小还没刷盘,可以手动触发刷盘操作:
ALTER TABLE basic_district ENGINE=InnoDB; # 修改表结构触发
OPTIMIZE TABLE basic_district; # 整理表触发
查看日志文件报错
2024-05-31T05:10:13.006319Z 33 [ERROR] [MY-012118] [InnoDB] Cannot rename ‘./whcenter/basic_district.ibd’ to ‘./whcenter/#sql-ib1084-1562079979.ibd’ for space ID 7 because the source file does not exist.
2024-05-31T05:10:25.113295Z 33 [ERROR] [MY-012118] [InnoDB] Cannot rename ‘./whcenter/basic_district.ibd’ to ‘./whcenter/#sql-ib1085-1562079981.ibd’ for space ID 7 because the source file does not exist.
2024-05-31T05:16:04.363899Z 33 [ERROR] [MY-012118] [InnoDB] Cannot rename ‘./whcenter/basic_district.ibd’ to ‘./whcenter/#sql-ib1088-1562079987.ibd’ for space ID 25 because the source file does not exist.
2024-05-31T05:16:15.195845Z 33 [ERROR] [MY-012118] [InnoDB] Cannot rename ‘./whcenter/basic_district.ibd’ to ‘./whcenter/#sql-ib1089-1562079989.ibd’ for space ID 25 because the source file does not exist.
重新复制回数据文件,数据库恢复正常
操作数据量比较大的表
mv /mysqldata/data/whcenter/tb_outstockinfo.ibd /mysqldata/mysqltest/
重启数据库
systemctl restart mysql
数据库正常启动
日志会有Warning级别提醒
2024-05-31T07:53:30.474784Z 1 [Warning] [MY-012351] [InnoDB] Tablespace 38, name ‘whcenter/tb_outstockinfo’, file ‘./whcenter/tb_outstockinfo.ibd’ is missing!
查询表数据
SELECT * FROM tb_outstockinfo;
数据库返回报错:
ERROR 1812 (HY000): Tablespace is missing for table whcenter
.tb_outstockinfo
.
数据库日志:
2024-05-31T05:22:32.505685Z 9 [ERROR] [MY-012592] [InnoDB] Operating system error number 2 in a file operation.
2024-05-31T05:22:32.505817Z 9 [ERROR] [MY-012593] [InnoDB] The error means the system cannot find the path specified.
2024-05-31T05:22:32.505841Z 9 [ERROR] [MY-012216] [InnoDB] Cannot open datafile for read-only: ‘./whcenter/tb_outstockinfo.ibd’ OS error: 71
2024-05-31T05:22:32.506520Z 9 [Warning] [MY-012049] [InnoDB] Cannot calculate statistics for table whcenter
.tb_outstockinfo
because the .ibd file is missing. Please refer to http://dev.mysql.com/doc/refman/8.4/en/innodb-troubleshooting.html for how to resolve the issue.
2024-05-31T05:22:32.648136Z 9 [Warning] [MY-012049] [InnoDB] Cannot calculate statistics for table whcenter
.tb_outstockinfo
because the .ibd file is missing. Please refer to http://dev.mysql.com/doc/refman/8.4/en/innodb-troubleshooting.html for how to resolve the issue.
数据库重启并没有启动失败,报错也可以直接显示出表名,可以看出MySQL逐渐健壮;
尝试恢复:
1.备份集中数据恢复,上面每日备份,定时每天1点整,可以恢复至今日1点的数据;
cd /mysqldata/mysqlbak/whcenter
tar -zxvf tar -zxvf whcenter-tb_outstockinfo-20240531.tar.gz
whcenter-tb_outstockinfo-20240531.sql
如数据库备份文件较大,单表数据恢复需编辑备份文件不现实,先新建一个备份库再抽取需还原表回来
使用数据库root用户登录
CREATE DATABASE whcenterbak;
GRANT ALL ON whcenterbak.* TO ‘user_wh’@’%’;
FLUSH PRIVILEGES;
使用user_wh登录数据库
use whcenterbak;
source /mysqldata/mysqlbak/whcenter/whcenter-tb_outstockinfo-20240531.sql
或命令行执行
cat /mysqldata/mysqlbak/whcenter/whcenter-tb_outstockinfo-20240531.sql | mysql -u user_wh -D whcenterbak -p
确认标文件是否完全恢复
SELECT COUNT(1) FROM tb_outstockinfo;
场景2: 数据磁盘空间不足
场景描述:
lvm磁盘缩小lv为5G
在数据文件目录创建空文件占用大小
cd /mysqldata
dd if=/dev/zero of=4gfile bs=1M count=3600
写入数据直至MySQL报磁盘空间不足
[root@mysql84 mysqldata]# dd if=/dev/zero of=4gfile bs=1M count=3600
3600+0 records in
3600+0 records out
3774873600 bytes (3.8 GB, 3.5 GiB) copied, 5.99516 s, 630 MB/s
[root@mysql84 mysqldata]# df -hT
Filesystem Type Size Used Avail Use% Mounted on
devtmpfs devtmpfs 4.0M 0 4.0M 0% /dev
tmpfs tmpfs 3.8G 0 3.8G 0% /dev/shm
tmpfs tmpfs 1.5G 8.5M 1.5G 1% /run
/dev/mapper/rl-root xfs 44G 1.9G 43G 5% /
/dev/sda1 xfs 960M 225M 736M 24% /boot
tmpfs tmpfs 759M 0 759M 0% /run/user/0
/dev/mapper/vgmysql-lvmysql ext4 4.4G 4.0G 200M 96% /mysqldata
使用navicat写入数据库(表数据量大一些)
实时监控次哦按剩余量
watch -n 1 df -hT
Every 1.0s: df -hT mysql84: Fri May 31 17:10:25 2024
Filesystem Type Size Used Avail Use% Mounted on
devtmpfs devtmpfs 4.0M 0 4.0M 0% /dev
tmpfs tmpfs 3.8G 0 3.8G 0% /dev/shm
tmpfs tmpfs 1.5G 8.6M 1.5G 1% /run
/dev/mapper/rl-root xfs 44G 2.0G 42G 5% /
/dev/sda1 xfs 960M 225M 736M 24% /boot
tmpfs tmpfs 759M 0 759M 0% /run/user/
0
/dev/mapper/vgmysql-lvmysql ext4 4.4G 4.2G 0 100% /mysqldata
mysql日志报错
2024-05-31T09:10:03.911223Z 9 [ERROR] [MY-012144] [InnoDB] posix_fallocate(): Failed to preallocate data for file ./whcenter/tb_materialstock.ibd, desired size 4194304 bytes. Operating system error number 28. Check that the disk is not full or a disk quota exceeded. Make sure the file system supports this function. Refer to your operating system documentation for operating system error code information.
2024-05-31T09:10:03.918589Z 9 [Warning] [MY-012638] [InnoDB] Retry attempts for writing partial data failed.
2024-05-31T09:10:03.918686Z 9 [ERROR] [MY-012639] [InnoDB] Write to file ./whcenter/tb_materialstock.ibd failed at offset 195035136, 1048576 bytes should have been written, only 0 were written. Operating system error number 28. Check that your OS and file system support files of this size. Check also that the disk is not full or a disk quota exceeded.
2024-05-31T09:10:03.918723Z 9 [ERROR] [MY-012640] [InnoDB] Error number 28 means ‘No space left on device’
2024-05-31T09:10:03.918785Z 9 [Warning] [MY-012145] [InnoDB] Error while writing 4194304 zeroes to ./whcenter/tb_materialstock.ibd starting at offset 192937984
2024-05-31T09:10:03.922078Z 9 [ERROR] [MY-013132] [Server] The table ‘tb_materialstock’ is full!
navicat数据写入报错
[ERR] 1> 1114 - The table ‘tb_materialstock’ is full
此时数据库依然没有宕机,还可以继续运行,再次尝试写入数据
2024-05-31T09:17:18.809432Z 9 [ERROR] [MY-012144] [InnoDB] posix_fallocate(): Failed to preallocate data for file ./whcenter/basic_district.ibd, desired size 16384 bytes. Operating system error number 28. Check that the disk is not full or a disk quota exceeded. Make sure the file system supports this function. Refer to your operating system documentation for operating system error code information.
2024-05-31T09:17:18.809538Z 9 [Warning] [MY-012638] [InnoDB] Retry attempts for writing partial data failed.
2024-05-31T09:17:18.809561Z 9 [Warning] [MY-012145] [InnoDB] Error while writing 16384 zeroes to ./whcenter/basic_district.ibd starting at offset 933888
2024-05-31T09:17:18.810164Z 9 [ERROR] [MY-013132] [Server] The table ‘basic_district’ is full!
2024-05-31T09:17:18.811277Z 9 [ERROR] [MY-012144] [InnoDB] posix_fallocate(): Failed to preallocate data for file ./whcenter/basic_district.ibd, desired size 16384 bytes. Operating system error number 28. Check that the disk is not full or a disk quota exceeded. Make sure the file system supports this function. Refer to your operating system documentation for operating system error code information.
2024-05-31T09:17:18.811351Z 9 [Warning] [MY-012638] [InnoDB] Retry attempts for writing partial data failed.
2024-05-31T09:17:18.811377Z 92024-05-31T09:17:27.155821Z 1 [Warning] [MY-012638] [InnoDB] Retry attempts for writing partial data failed.
2024-05-31T09:17:27.155905Z 1 [ERROR] [MY-012267] [InnoDB] Could not set the file size of ‘./ibtmp1’. Probably out of disk space
2024-05-31T09:17:27.155928Z 1 [ERROR] [MY-012926] [InnoDB] Unable to create the shared innodb_temporary.
2024-05-31T09:17:27.156113Z 1 [ERROR] [MY-012930] [InnoDB] Plugin initialization aborted with error Generic error.
2024-05-31T09:17:27.418128Z 1 [ERROR] [MY-010334] [Server] Failed to initialize DD Storage Engine
2024-05-31T09:17:27.418648Z 0 [ERROR] [MY-010020] [Server] Data Dictionary initialization failed.
2024-05-31T09:17:27.418902Z 0 [ERROR] [MY-010119] [Server] Aborting
2024-05-31T09:17:27.421203Z 0 [System] [MY-010910] [Server] /usr/sbin/mysqld: Shutdown complete (mysqld 8.4.0) MySQL Community Server - GPL.
2024-05-31T09:17:27.421253Z 0 [System] [MY-015016] [Server] MySQL Server - end.
此时数据库服务停止运行
新增lv磁盘空间
umount /dev/vgmysql/lvmysql
e2fsck -f /dev/vgmysql/lvmysql
resize2fs /dev/vgmysql/lvmysql 99G (不能用小数点)
lvextend -L +94.5G /dev/vgmysql/lvmysql
Size of logical volume vgmysql/lvmysql changed from 5.00 GiB (1280 extents) to 99.50 GiB (25472 extents).
Logical volume vgmysql/lvmysql successfully resized.
mount /dev/vgmysql/lvmysql /mysqldata
验证磁盘空间大小
df- hT
启动后MySQL服务正常
题外资料:
对于日志文件空间不足
报错关键字:
ER_IB_MSG_RECOVERY_NO_SPACE_IN_REDO_LOG__SKIP_IBUF_MERGES
一般网络教程
一、增加 redo log 缓冲区
MySQL 用户可以通过调整系统参数来增加 redo log 缓冲区,如增加 innodb_log_buffer_size 参数值。
二、扩展 redo log 文件大小
此外,用户还可以增加 redo log 文件的内存空间,从而扩展 redo log 环境的总容量:
1、停止 MySQL 服务器
2、使用 mv 命令备份日志文件
3、更改 redo log 文件大小
4、启动 MySQL 服务器
三、增加 redo log 环境容量
迅速补充现有 redo log 环境容量的另一种方法是创建新的 redo log 文件,使其成为新的变量组成部分,以增加环境的容量,例如:
1、打开 my.cnf 文件,增加 redo log 文件的个数
2、重启 MySQL 服务器
3、查看系统状态及重要参数配置,确保正常
4、启动新的 redo log 文件
场景3 数据写入中直接停掉虚拟机,模拟机房断电情况 (增量恢复)
使用navicat写入tb_outstockinfohis表
当数据开始写入但未结束时断掉虚拟机电源,接入电源,启动虚拟机宿主机,启动虚拟服务器,查看MySQL错误日志
系统自动启动后,数据库服务配置的自启,可以启动成功,日志文件中并没有记录到异常
查看tb_outstockinfohis表无数据,开始尝试使用binlog恢复表数据
查看binlog列表
SHOW BINARY LOGS;
查看当前使用的binlog文件
show master status;
8.4改为:
SHOW BINARY LOG STATUS;
查看特定binlog文件的内容
SHOW BINLOG EVENTS IN 'binlog.000006';
语法:
SHOW BINLOG EVENTS
[IN ‘log_name’]
[FROM pos]
[LIMIT [offset,] row_count]
使用mysqlbinlog工具导出特定binlog文件中的数据
mysqlbinlog --start-datetime="2024-06-20 00:00:00" --stop-datetime="2024-06-20 08:00:00" \
--database=dbname binlog.000006 > binlog_output.sql
mysqlbinlog 的执行格式
mysqlbinlog [options] log_file ...
查看bin-log二进制文件(shell方式)
mysqlbinlog -v --base64-output=decode-rows /mysqldata/data/binlog.000006
查看bin-log二进制文件(带查询条件)
mysqlbinlog -v --base64-output=decode-rows /mysqldata/data/binlog.000006 \
--start-datetime="2024-06-20 00:00:00" \
--stop-datetime="2024-06-20 08:00:00" \
--start-position="5000" \
--stop-position="20000"
应用binlog中的SQL语句恢复数据
mysql -u username -p dbname < binlog_output.sql
生产环境不建议在原库直接进行恢复。因为同一个库中,同一个GTID号只能对应一个事务,因此直接执行下面的语句什么也不会发生:
指定skip-gtids=true参数可以把binlog日志中已经执行过的语句当成一个新事务来执行才行,但是这样会把两个时间点中间的所有事务都重新执行一遍,可能会导致某些数据被重复插入,造成数据不一致。
删除binlog
语法:
PURGE { BINARY | MASTER } LOGS { TO ‘log_name’ | BEFORE datetime_expr }
场景4 数据误删除–使用binlog恢复场景(delete操作)
模拟删除表数据:
SELECT COUNT(1) FROM tb_outstockdetail WHERE CreatedAt >= '2024-06-01';
48
DELETE FROM tb_outstockdetail WHERE CreatedAt >= '2024-06-01';
Query 1 OK: 48 rows affected
恢复数据:
定位删除时间点
查看当前BINLOG
mysql> SHOW BINARY LOG STATUS;
+---------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+---------------+----------+--------------+------------------+-------------------+
| binlog.000006 | 10577 | | | |
+---------------+----------+--------------+------------------+-------------------+
1 row in set (0.00 sec)
# 检查当天binlog文件中的事件
mysql> SHOW BINLOG EVENTS IN 'binlog.000006';
+---------------+-------+----------------+-----------+-------------+--------------------------------------------+
| Log_name | Pos | Event_type | Server_id | End_log_pos | Info |
+---------------+-------+----------------+-----------+-------------+--------------------------------------------+
| binlog.000006 | 4 | Format_desc | 1 | 127 | Server ver: 8.4.0, Binlog ver: 4 |
| binlog.000006 | 127 | Previous_gtids | 1 | 158 | |
| binlog.000006 | 158 | Anonymous_Gtid | 1 | 237 | SET @@SESSION.GTID_NEXT= 'ANONYMOUS' |
| binlog.000006 | 237 | Query | 1 | 324 | BEGIN |
| binlog.000006 | 324 | Table_map | 1 | 453 | table_id: 116 (whcenter.tb_outstockdetail) |
| binlog.000006 | 453 | Delete_rows | 1 | 8610 | table_id: 116 |
| binlog.000006 | 8610 | Delete_rows | 1 | 10546 | table_id: 116 flags: STMT_END_F |
| binlog.000006 | 10546 | Xid | 1 | 10577 | COMMIT /* xid=147 */ |
+---------------+-------+----------------+-----------+-------------+--------------------------------------------+
8 rows in set (0.00 sec)
# 其中,Delete_rows对应删除事件,事务开始的位置(Pos)为237,结束的位置(End_log_pos)为10577 。该事务对应GTID生成的SET语句的开始位置为2298。
# 手动触发日志切换,生成新的binlog:
mysql> FLUSH LOGS;
Query OK, 0 rows affected (0.45 sec)
mysql> SHOW BINARY LOG STATUS;
+---------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+---------------+----------+--------------+------------------+-------------------+
| binlog.000007 | 158 | | | |
+---------------+----------+--------------+------------------+-------------------+
1 row in set (0.00 sec)
查找删除语句,更换实际删除动作的时间点
mysqlbinlog -vv --base64-output=decode-rows --start-datetime='2024-06-19 21:00:00' --stop-datetime='2024-06-19 22:00:00' -d whcenter /mysqldata/data/binlog.000006 | grep -i delete
mysqlbinlog -vv --base64-output=decode-rows --start-datetime='2024-06-19 21:00:00' --stop-datetime='2024-06-19 22:00:00' -d whcenter /mysqldata/data/binlog.000006 | less
#240619 21:34:44 server id 1 end_log_pos 10577 CRC32 0xfc08a7b7 Xid = 147
最终结束时间:
21:34:44
全量+binglog增量
首先恢复全量数据,时间点为 2024-06-19 00:00:00
查看 全量恢复到数据删除时间点的BIGLOG:
mysqlbinlog -vv –base64-output=decode-rows –start-datetime=’2024-06-19 00:00:00’ –stop-datetime=’2024-06-19 22:00:00’ -d whcenter /mysqldata/data/binlog.000006 | less
mysqlbinlog –start-position=127 –stop-position=158 -d whcenter /mysqldata/data/binlog.000006
在新还原的实例恢复binlog数据
mysqlbinlog –start-position=127 –stop-position=158 -d whcenter /mysqldata/data/binlog.000006 | mysql -uroot -h127.0.0.1 -p
更改删除语句,在源库直接INSERT已删除数据
由于事务与GTID的唯一对应性,如果想直接在原库进行数据恢复,需要找到删除动作对应的DELETE语句,并将其统一替换为INSERT语句来重新插入数据。该方法需要数据库开启了binlog_rows_query_log_events参数。
SHOW BINLOG EVENTS IN 'binlog.000006' FROM 158;
将删除数据保存到指定文件
mysqlbinlog -vv –base64-output=decode-rows –start-position=158 -d whcenter /mysqldata/data/binlog.000006 > /mysqldata/data/binlog_58.log
sed -n '/^###/p' /mysqldata/data/binlog_58.log > /mysqldata/data/sourcedata_58.sql # 获取删除前原始数据
sed -i 's/### //g' /mysqldata/data/sourcedata_58.sql
sed -i 's/DELETE FROM/INSERT INTO/g' /mysqldata/data/sourcedata_58.sql # 将DELETE语句替换为INSERT语句:
sed -i 's/WHERE/SELECT/g' /mysqldata/data/sourcedata_58.sql
sed -i 's# /.*#,#g' /mysqldata/data/sourcedata_58.sql # 去掉行末的注释/*...*/
sed -ri 's#(@23=.*)(,)#\1;#g' /mysqldata/data/sourcedata_58.sql # 将每条INSERT语句的最后一个逗号替换为分号,特别注意这里sed语法,@23为第23列(最后一列),如果表最后一个字段为@30则改为@30视实际情况而定
sed -ri 's#(@.*=)(.*)#\2#g' /mysqldata/data/sourcedata_58.sql # 去掉@数字=
sed -i '$a commit;' /mysqldata/data/sourcedata_58.sql # 添加COMMIT语句
tail -200 /mysqldata/data/sourcedata_58.sql # 查看最终结果
mysql -uroot -D whcenter -p < /mysqldata/data/sourcedata_58.sql # 执行INSERT 脚本导入数据
查看所有数据均已还原
mysql> SELECT COUNT(1) FROM tb_outstockdetail WHERE CreatedAt >= '2024-06-01';
+----------+
| COUNT(1) |
+----------+
| 48 |
+----------+
1 row in set (0.01 sec)
题外资料:
mysqlbinlog_flashback 阿里已将binlong闪回做成小工具,可直接加 -B 参数将binlog日志直接转换为恢复语句,在数据库内执行。
binlog恢复update语句前提条件:表有主键(所以说建表必须有主键很重要)
binlog format 必须为row模式
场景5 断电、磁盘损坏造成的数据库启动失败
error.log内报错数据库不能启动,需要启动数据库安全模式,先将数据库忽略错误启动。
实际操作就是修改配置文件my.cnf中innodb_force_recovery,MySQL在转储文件或者导入数据的过程中,出现中断、失败或者异常,造成数据无法回滚,可以通过innodb_force_recovery强力迫使InnoDB存储引擎运行,同时阻止后台操作运行,以便转储表数据。
innodb_force_recovery的值,mysql官方提供了6个等级,Innodb事务型存储引擎,通过redo,undo,double write这些特性保证数据的完整,针对硬件故障,内核bug,突然断电的事件,需要手动对Innodb进行恢复;可以将Innodb page 损坏分为几类,data page 损坏,secondary_index page 损坏, root index 损坏,data dictionary 损坏,恢复的难度依次增加;
innodb_force_recovery 在使用的时候,能尽量从1-6依次递增,高一些的值包含比其低值的设置:
- 1-3的时候,数据的完整性相对来说还是可以保证的(除了已经损坏的部分);
- 大于等于4的时候可能造成 page处于一种相对“过时”(obsolete state),重建损坏的表可以避免数据过时,也可能造成B-trees and other database structures 的损坏;
-
大于0的时候,INSERT,UPDATE,DELETE这些操作都是禁止的;
各个参数的具体含义:
1 (SRV_FORCE_IGNORE_CORRUPT):
强制忽略corrupt page并自动跳过,期间可以dump table;
2 (SRV_FORCE_NO_BACKGROUND):
在前置忽略corrupt page 的基础上(包含=1的作用),阻塞 master thread 和 任何的 purge thread 运行(有效防止在purge的时候发生MySQL crash)
3 (SRV_FORCE_NO_TRX_UNDO):
在忽略 corrupt page,阻塞 purge thread的基础上,不进行 transaction rollback;
4 (SRV_FORCE_NO_IBUF_MERGE):
在忽略 corrupt page,阻塞 purge thread,禁止 transaction rollback 基础上,禁止 merge insert buffer,对 table statistics 不进行更新;(这样会损坏 data file,等恢复后最好重建所有的secondary index);
5 (SRV_FORCE_NO_UNDO_LOG_SCAN):
在忽略 corrupt page ,阻塞purge thread,禁止 transaction rollback,禁止merge insert buffer,停止 table statistic 的基础上,在启动 MySQL的时候,不在扫描 undo logs,对待incomplete transactions as committed;
6 (SRV_FORCE_NO_LOG_REDO):
在以上所有的基础上,redo log 不进行前滚(roll-forward)
这里再次提醒下,对Innodb_force_recovery的赋值最好是依次递增(除非自己做过严格测试)
修改完配置文件,从1逐一增加innodb_force_recovery的值,直至数据库正常启动,
导出备份语句(或者整库导出),删表重建、删库使用备份数据重新恢复,修改应用连接至新数据库服务器。vim /etc/my.cnf innodb_force_recovery = 1
mysqldump -u root -p –single-transaction –no-create-info whcenter tb_materialstock > data_dump.sql
场景6 数据库数据目录恢复
场景描述:
客户正式环境上线,需要迁移数据到无外网环境机房,数据量比较大,指定数据库。
常用方法:
- 如客户有中转机,搭建datax或其他数据抽取etl类工具,直接抽取数据至客户正式环境; # 非常推荐
- mysqldump导出的tar压缩包,传至客户部署机器,给搭建好mysql的环境还原数据; # 上传包时间较长
- 原文件夹直接压缩tar包,传至客户部署机器,直接使用此数据目录初始化mysql; # 也能用
此场景即模拟第三种方案,将数据目录备份,在新机器上初始化新mysql
原机器上执行
cd /mysqldata
tar -zcvf mysqldatabak20240620.tar.gz data
新系统中上传备份包并解压
tar -zxvf mysqldatabak20240620.tar.gz -C /mysqldata/
修改配置文件
注意:修改配置文件内 server_id 参数,与主库不一致
修改 /mysqdata/data/auto.conf 中server-uuid,与主库不一致
vim /etc/my.cnf
datadir=/mysqldata/data
port=3306
socket=/var/lib/mysql/mysql.sock
pid-file=/var/run/mysqld/mysqld.pid
server_id=2
autocommit=1
log_error=/mysqldata/logs/error.log
lower_case_table_names=1
slow_query_log=1
slow_query_log_file=/mysqldata/logs/slow.log
long_query_time=1
innodb_buffer_pool_size=6G
max_connections=512
wait_timeout=1800
performance_schema=1
local_infile=1
default-time_zone = '+8:00'
event_scheduler=ON
sql_mode=''
注意此台机器不关闭Selinux
复制文件至/mysqldata
scp mysqldatabak20240620.tar.gz root@172.16.1.11:~/
启动mysql服务
systemctl start mysqld
[root@mysql842 mysqldata]# systemctl start mysqld
Job for mysqld.service failed because the control process exited with error code.
See "systemctl status mysqld.service" and "journalctl -xeu mysqld.service" for details.
直接报错,查看日志
systemctl status mysqld
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.677151Z 0 [System] [MY-015015] [Server] MySQL Server - start.
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.861294Z 0 [Warning] [MY-010091] [Server] Can't create test file /mysqldata/data/mysqld_tmp_file_case_inse>
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.861332Z 0 [System] [MY-010116] [Server] /usr/sbin/mysqld (mysqld 8.4.0) starting as process 5036
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.862996Z 0 [Warning] [MY-010091] [Server] Can't create test file /mysqldata/data/mysqld_tmp_file_case_inse>
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.863008Z 0 [Warning] [MY-010091] [Server] Can't create test file /mysqldata/data/mysqld_tmp_file_case_inse>
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.863338Z 0 [ERROR] [MY-010187] [Server] Could not open file '/mysqldata/logs/error.log' for error logging:>
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.863426Z 0 [ERROR] [MY-010119] [Server] Aborting
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.864495Z 0 [System] [MY-010910] [Server] /usr/sbin/mysqld: Shutdown complete (mysqld 8.4.0) MySQL Communi>
Jun 21 14:52:22 mysql842 mysqld[5036]: 2024-06-21T06:52:22.864498Z 0 [System] [MY-015016] [Server] MySQL Server - end.
Jun 21 14:52:22 mysql842 systemd[1]: mysqld.service: Main process exited, code=exited, status=1/FAILURE
这里重点是
Can’t create test file /mysqldata/data/mysqld_tmp_file_case_inse>
Could not open file ‘/mysqldata/logs/error.log’ for error logging:>
然而上面已经将文件夹目录归属设置为mysql,权限改为750
这里跟Selinux关系很大
解释:为了提高 Linux 系统的安全性,在 Linux 上通常会使用 SELinux 或 AppArmor 实现强制访问控制(Mandatory Access Control MAC)。对于 MySQL 数据库的强制访问控制策略通常是激活的,如果用户采用默认的配置,并不会感到强制访问控制策略对 MySQL 数据库的影响,一旦用户修改了 MySQL 数据库的默认配置,例如默认的数据目录或监听端口,MySQL 数据库的活动就会被 SELinux 或 AppArmor 阻止,数据库无法启动。
ps -Z 查看 mysqld 进程的 SELinux 的上下文:
[root@mysql84 logs]# ps -eZ | grep mysqld
system_u:system_r:mysqld_t:s0 863 ? 00:01:08 mysqld
ls -eZ 查看 MySQL 数据目录的 SELinux 的上下文:
[root@mysql84 logs]# ls -dZ /mysqldata/data
unconfined_u:object_r:default_t:s0 /mysqldata/data
参数说明:
system_u 是系统进程和对象的 SELinux 用户标识。
system_r 是用于系统进程的 SELinux 角色。
objects_r 是用于系统对象的 SELinux 角色。
mysqld_t 是与 mysqld 进程相关的 SELinux 类型。
mysqld_db_t 是与 MySQL 数据目录相关的 SELinux 类型。
如果我们把 MySQL 数据目录从默认的 /var/lib/mysql 改成其他目录,SELinux 将会阻止 mysqld 进程访问 MySQL 数据目录,从而造成 MySQL 无法启动,相关拒绝访问的信息记录在 /var/log/audit/audit.log 文件中:
grep mysql /var/log/audit/audit.log | grep denied
这里介绍下不关闭Selinux的操作方法,把新的 MySQL 数据目录增加到mysqld_db_t 这个 SELinux 类型中,例如使用 semanage fcontext 命令的 -a 选项增加一个目录为 /mysqldata/data 的 MySQL 数据目录,然后使用命令 restorecon 恢复这个数据目录对应的 SELinux 上下文
#安装命令
dnf install policycoreutils-python-utils
#把新的 MySQL 数据目录增加到mysqld_db_t 这个 SELinux 类型中
semanage fcontext -a -t mysqld_db_t "/mysqldata(/.*)?"
#使用命令 restorecon 恢复这个数据目录对应的 SELinux 上下文
restorecon -Rv /mysqldata/data
restorecon -Rv /mysqldata/logs
[root@mysql842 mysqldata]# restorecon -Rv /mysqldata/logs
Relabeled /mysqldata/logs from unconfined_u:object_r:unlabeled_t:s0 to unconfined_u:object_r:mysqld_db_t:s0
Relabeled /mysqldata/logs/slow.log from unconfined_u:object_r:unlabeled_t:s0 to unconfined_u:object_r:mysqld_db_t:s0
Relabeled /mysqldata/logs/mysqld.log from unconfined_u:object_r:unlabeled_t:s0 to unconfined_u:object_r:mysqld_db_t:s0
#用 semanage fcontext 命令的 -l 选项进行检查
semanage fcontext -l | grep mysqld_db_t
[root@mysql842 mysqldata]# semanage fcontext -l | grep mysqld_db_t
/mysqldata(/.*)? all files system_u:object_r:mysqld_db_t:s0
/var/lib/mysql(-files|-keyring)?(/.*)? all files system_u:object_r:mysqld_db_t:s0
启动数据库服务
systemctl start mysqld
查看服务启动日志
tail -200f /mysqldata/logs/error.log
设置开机自动启动
systemctl enable mysqld
至此mysql数据目录打包迁移数据迁移完毕,常用公共库建议做好mysqldump打包备份,以备客户上线随时还原,如baidumap数据库。
注意:这里有些linux发行版不用selinux,名称叫做AppArmor
场景7 mysql主从数据恢复
首先搭建主从,将上面恢复好的一台机器作为从机
主:172.16.1.10
从:172.16.1.11
确保两服务器配置文件中server-id不一致。
主从配置
#创建复制用户:
CREATE USER 'replica'@'%' IDENTIFIED BY 'Lunz@2017';
#授予权限
GRANT REPLICATION SLAVE ON *.* TO 'replica'@'%';
FLUSH PRIVILEGES;
#查看主库状态
SHOW MASTER STATUS;
MySQL8.4改为:
SHOW BINARY LOG STATUS;
#从机配置,备份用来升主使用
#创建复制用户:
CREATE USER 'replica'@'%' IDENTIFIED BY 'Lunz@2017';
#授予权限
GRANT REPLICATION SLAVE ON *.* TO 'replica'@'%';
FLUSH PRIVILEGES;
#查看主库状态
SHOW MASTER STATUS;
MySQL8.4改为:
SHOW BINARY LOG STATUS;
#主从同步
CHANGE MASTER TO MASTER_HOST='172.16.1.10',MASTER_USER='replica',MASTER_PASSWORD='Lunz@2017',MASTER_LOG_FILE='binlog.000015',MASTER_LOG_POS=767;
MySQL8.4指令有所变更:
CHANGE REPLICATION SOURCE TO SOURCE_HOST='172.16.1.10', SOURCE_USER='replica', SOURCE_PASSWORD='Lunz@2017', SOURCE_PORT=3306, SOURCE_LOG_FILE='binlog.000015', SOURCE_LOG_POS=767;
#启动复值进程
START REPLICA;
#备用指令
#清空复制信息
RESET REPLICA;
RESET REPLICA FOR CHANNEL "source_1";
#验证
SHOW REPLICA STATUS \G
SHOW PROCESSLIST \G
之前版本的 MySQL 中已弃用的与 MySQL 复制相关的许多功能的语法现已删除,具体清单如下。
Removed Replacements
START SLAVE START REPLICA
STOP SLAVE STOP REPLICA
SHOW SLAVE STATUS SHOW REPLICA STATUS
SHOW SLAVE HOSTS SHOW REPLICAS
RESET SLAVE RESET REPLICA
CHANGE MASTER TO CHANGE REPLICATION SOURCE TO
RESET MASTER RESET BINARY LOGS AND GTIDS
SHOW MASTER STATUS SHOW BINARY LOG STATUS
PURGE MASTER LOGS PURGE BINARY LOGS
SHOW MASTER LOGS SHOW BINARY LOGS
启用GTID
主库
SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = WARN;
SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = ON;
SET @@GLOBAL.GTID_MODE = OFF_PERMISSIVE;
SET @@GLOBAL.GTID_MODE = ON_PERMISSIVE;
SHOW STATUS LIKE 'ONGOING_ANONYMOUS_TRANSACTION_COUNT';
SET @@GLOBAL.GTID_MODE = ON;
从库
SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = WARN;
SET @@GLOBAL.ENFORCE_GTID_CONSISTENCY = ON;
SET @@GLOBAL.GTID_MODE = OFF_PERMISSIVE;
SET @@GLOBAL.GTID_MODE = ON_PERMISSIVE;
SHOW STATUS LIKE 'ONGOING_ANONYMOUS_TRANSACTION_COUNT';
SET @@GLOBAL.GTID_MODE = ON;
stop replica;
CHANGE REPLICATION SOURCE TO SOURCE_AUTO_POSITION = 1;
START replica;
修改配置文件
gtid_mode = ON
enforce_gtid_consistency = ON
恢复场景1 主库完好,从库同步链路发生问题
主库不动,设置主库只读,备份主库,按照搭建主从库方法,重新还原从库、配置主从连接。
恢复场景2 主库宕机,从库正常启动,从库数据无问题
从库:172.16.1.10
主库:172.16.1.11
从库取消复值
STOP REPLICA;
RESET REPLICA ALL;
从库关闭只读(如果有)
SET GLOBAL READ_ONLY=OFF;
SET GLOBAL SUPER_READ_ONLY=OFF;
从库只读状态备份
主库还原
主库执行:
CHANGE REPLICATION SOURCE TO SOURCE_HOST='172.16.1.11',SOURCE_PORT=3306,SOURCE_USER='replica', SOURCE_PASSWORD='Lunz@2017',SOURCE_auto_position=1;
START REPLICA;
从库恢复读写。