admin:infrastructure:backup
Différences
Ci-dessous, les différences entre deux révisions de la page.
| Prochaine révision | Révision précédente | ||
| admin:infrastructure:backup [2018/11/26 10:54] – créée fpoulain | admin:infrastructure:backup [2024/12/29 09:19] (Version actuelle) – [Script de backup] correction d'une typo dans l'URL du remote (suppression ":") pilou | ||
|---|---|---|---|
| Ligne 1: | Ligne 1: | ||
| - | Le backup de chapril est déployé via un paquet Debian sur un repo privé. On décrit ici les points essentiels. | ||
| - | |||
| ====== Paquet Debian ====== | ====== Paquet Debian ====== | ||
| - | ===== Script de backup | + | Le backup |
| - | <code bash src/backup.sh> | + | ===== Aspects |
| - | #! /bin/bash | + | |
| - | sleep $[$RANDOM % 30]m | + | ==== Script de backup ==== |
| - | logger=" | + | C'est fournis par [[https://torsion.org/borgmatic/ | borgmatic]]. |
| - | borg_bin=" | + | |
| - | backup_name=`date +%Y-%m-%d` | + | |
| - | backup_dest=" | + | |
| - | export BORG_RSH=" | + | On y adjoint une configuration dans ''/ |
| - | echo ======================================================================== >> $logger | + | <code yaml / |
| - | echo " | + | location: |
| - | echo ======================================================================== >> $logger | + | |
| - | date >> | + | - / |
| - | echo "" | + | |
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | - '/ | ||
| + | repositories: | ||
| + | - ' | ||
| - | echo "== Executing package selection: " >> $logger | + | storage: |
| - | date >> | + | |
| - | echo "" | + | archive_name_format: |
| - | dpkg --get-selections > /root/package-selections | + | # pour bullseye : borg_cache_directory: |
| - | echo "== Backup pre-hook" | + | consistency: |
| - | date >> | + | |
| - | echo "" | + | |
| - | for file in / | + | retention: |
| - | echo " | + | |
| - | $file | + | |
| - | done | + | |
| - | echo "== Backup launch" >> | + | hooks: |
| - | date >> | + | before_backup: |
| - | echo "" | + | - echo "Launching root backup at $(date -Iseconds)" |
| + | - for file in / | ||
| + | | ||
| + | - for file in / | ||
| + | - echo " | ||
| + | - borgmatic info --archive latest --json | ||
| + | on_error: | ||
| + | - echo " | ||
| + | # pour bullseye : | ||
| + | # after_check: | ||
| + | # - echo "Succeeded root checks at $(date -Iseconds)" | ||
| + | # after_prune: | ||
| + | # - echo " | ||
| + | </ | ||
| - | $borg_bin create --stats $backup_dest:: | + | ==== Entrée Systemd ==== |
| - | --exclude /proc \ | + | |
| - | --exclude /dev \ | + | |
| - | --exclude /sys \ | + | |
| - | --exclude / | + | |
| - | --exclude / | + | |
| - | --exclude / | + | |
| - | --exclude '/ | + | |
| - | | + | |
| - | rc=$? | + | On déclenche avec un timer systemd qui retarde le démarrage avec un timing aléatoire pour éviter le ddos de [[admin: |
| - | if [[ $rc != 0 ]]; then exit $rc; fi | + | |
| - | echo "== Backup info" | + | <code conf / |
| - | date >> | + | [Unit] |
| - | echo "" | + | Description=Run borgmatic backup |
| - | $borg_bin info $backup_dest::$backup_name 2>&1 >> $logger | + | [Timer] |
| + | # Will trigger at 01:00 each day | ||
| + | # + 0-60 random minutes | ||
| + | # + 30 minutes delay from borgmatic.service | ||
| + | OnCalendar=*-*-* 01:00:00 | ||
| + | Persistent=true | ||
| + | RandomizedDelaySec=60 minutes | ||
| - | echo "== Backup pruning" | + | [Install] |
| - | date >> | + | WantedBy=timers.target |
| - | echo "" | + | </code> |
| - | $borg_bin prune -v --keep-daily=7 --keep-weekly=5 $backup_dest 2>&1 >> $logger | + | <code conf / |
| + | [Unit] | ||
| + | Description=borgmatic backup | ||
| + | Wants=network-online.target | ||
| + | After=network-online.target | ||
| + | ConditionACPower=true | ||
| - | echo "== Backup post-hook" | + | [Service] |
| - | date >> | + | Type=oneshot |
| - | echo "" | + | |
| - | for file in /etc/ | + | ## Lower CPU and I/O priority. |
| - | [[ -f " | + | Nice=19 |
| - | echo " | + | CPUSchedulingPolicy=batch |
| - | $file | + | IOSchedulingClass=best-effort |
| - | done | + | IOSchedulingPriority=7 |
| - | + | IOWeight=100 | |
| - | echo "" | + | |
| - | date >> | + | |
| - | echo Returned $rc >> | + | |
| - | echo ======================================================================== >> $logger | + | |
| - | + | ||
| - | exit $rc | + | |
| - | </ | + | |
| - | ===== Entrée Cron ===== | + | ## Logs |
| + | StandardOutput=syslog | ||
| + | StandardError=syslog | ||
| + | SyslogIdentifier=borgmatic | ||
| + | # Prevent rate limiting of borgmatic log events. | ||
| + | LogRateLimitIntervalSec=0 | ||
| - | <code cron src/ | + | ## Launcher |
| - | 00 2 * * * root bash / | + | # Delay start to prevent backups immediately upon system startup |
| + | ExecStartPre=sleep 30m | ||
| + | ExecStart=borgmatic -v1 | ||
| + | Restart=no | ||
| </ | </ | ||
| - | ===== Scripts de pre hooks ===== | + | ==== Scripts de pre hooks ==== |
| <code bash scripts/ | <code bash scripts/ | ||
| Ligne 124: | Ligne 145: | ||
| su - postgres -c " | su - postgres -c " | ||
| done | done | ||
| + | </ | ||
| + | |||
| + | <code bash scripts/ | ||
| + | #!/bin/bash | ||
| + | |||
| + | if test -x / | ||
| + | backup_dir=/ | ||
| + | db=icinga2 | ||
| + | |||
| + | # Prepare. | ||
| + | mkdir -p $backup_dir | ||
| + | chmod 700 $backup_dir | ||
| + | |||
| + | # Backup. | ||
| + | influxd backup -portable -database $db -host localhost: | ||
| + | |||
| + | # Prune. | ||
| + | find $backup_dir/ | ||
| + | fi | ||
| </ | </ | ||
| Ligne 134: | Ligne 174: | ||
| </ | </ | ||
| - | ===== Script de post install | + | ==== Script de post install ==== |
| <code bash debian/ | <code bash debian/ | ||
| Ligne 223: | Ligne 263: | ||
| </ | </ | ||
| - | ===== Log rotate ===== | + | ==== Rsyslog |
| - | <code conf debian/logrotate> | + | <code conf / |
| - | /var/log/backup.log { | + | if $programname == ' |
| - | | + | & stop |
| - | | + | </ |
| - | compress | + | |
| - | | + | ==== Log rotate ==== |
| - | | + | |
| - | notifempty | + | <code conf debian/borgmatic> |
| - | create 644 root root | + | /var/log/borgmatic.log |
| + | { | ||
| + | rotate | ||
| + | | ||
| + | | ||
| + | missingok | ||
| + | notifempty | ||
| } | } | ||
| </ | </ | ||
| - | ====== Configuration de l' | + | ===== Configuration de l'hote ===== |
| C'est surtout du ssh. | C'est surtout du ssh. | ||
| - | <code ssh / | + | <code ssh __felicette__/ |
| command=" | command=" | ||
| command=" | command=" | ||
| Ligne 247: | Ligne 293: | ||
| </ | </ | ||
| - | ====== Configuration du monitoring | + | ===== Configuration du monitoring ===== |
| On a un script qui parse sur chaque machine le log de backup et qui est déployé par le paquet monitoring-plugins-chapril : | On a un script qui parse sur chaque machine le log de backup et qui est déployé par le paquet monitoring-plugins-chapril : | ||
| - | <code python / | + | <code python / |
| + | # | ||
| + | |||
| + | import datetime, itertools, os, re | ||
| + | |||
| + | now = datetime.datetime.now(datetime.timezone.utc) | ||
| + | max_backup_delay = datetime.timedelta(1, | ||
| + | |||
| + | def get_name(match): | ||
| + | return match.group(' | ||
| + | |||
| + | def check_backup(filename): | ||
| + | with open(filename) as f: | ||
| + | logs = f.read() | ||
| + | mixed_statuses = list(re.finditer(r' | ||
| + | for name, statuses in itertools.groupby(sorted(mixed_statuses, | ||
| + | last = sorted(statuses, | ||
| + | print(' | ||
| + | last_date = datetime.datetime.fromisoformat(last.group(' | ||
| + | last_status = last.group(' | ||
| + | if last_status != ' | ||
| + | failure.append(name) | ||
| + | |||
| + | failure = [] | ||
| + | try: | ||
| + | check_backup ("/ | ||
| + | except Exception: | ||
| + | check_backup ("/ | ||
| + | |||
| + | if failure: | ||
| + | exit (1) | ||
| + | else: | ||
| + | exit (0) | ||
| + | </ | ||
| + | |||
| + | Et la conf icinga2 : | ||
| + | <code conf __admin__/ | ||
| + | object CheckCommand " | ||
| + | command = [ " | ||
| + | } | ||
| + | |||
| + | apply Service " | ||
| + | import " | ||
| + | |||
| + | check_command = " | ||
| + | command_endpoint = host.vars.client_endpoint | ||
| + | |||
| + | assign where host.address && !host.vars.external | ||
| + | } | ||
| + | </ | ||
| + | ===== Aspects contrôle d' | ||
| + | |||
| + | On contrôle directement chaque nuit sur la machine où les backups sont stockés ([[admin: | ||
| + | |||
| + | ==== Script de contrôle ==== | ||
| + | |||
| + | <code bash __felicette__/ | ||
| + | #! /bin/bash | ||
| + | |||
| + | logger="/ | ||
| + | borg_bin="/ | ||
| + | backup_dest="/ | ||
| + | |||
| + | |||
| + | echo ======================================================================== >> $logger | ||
| + | echo " | ||
| + | echo ======================================================================== >> $logger | ||
| + | date >> | ||
| + | echo "" | ||
| + | |||
| + | cd $backup_dest | ||
| + | |||
| + | for repository in $(ls -d $backup_dest/ | ||
| + | do | ||
| + | |||
| + | echo " | ||
| + | date | ||
| + | echo "" | ||
| + | |||
| + | $borg_bin check $repository | ||
| + | rc=$? | ||
| + | if [[ $rc != 0 ]]; then exit $rc; fi | ||
| + | done | ||
| + | |||
| + | echo "" | ||
| + | date >> | ||
| + | echo Returned $rc >> | ||
| + | echo ======================================================================== | ||
| + | |||
| + | exit $rc | ||
| + | </ | ||
| + | |||
| + | ==== Entrée Cron ==== | ||
| + | |||
| + | <code cron __felicette__/ | ||
| + | 00 4 * * * root bash / | ||
| + | </ | ||
| + | |||
| + | ==== Log rotate ==== | ||
| + | |||
| + | <code conf __felicette__/ | ||
| + | / | ||
| + | weekly | ||
| + | rotate 52 | ||
| + | compress | ||
| + | delaycompress | ||
| + | missingok | ||
| + | notifempty | ||
| + | create 644 backup backup | ||
| + | } | ||
| + | </ | ||
| + | |||
| + | ==== Configuration du monitoring ==== | ||
| + | |||
| + | On a un script qui parse sur la machine le log de check_backup : | ||
| + | <code python __felicette__/ | ||
| # | # | ||
| # -*- encoding: | # -*- encoding: | ||
| Ligne 262: | Ligne 423: | ||
| with open(log_file) as s: | with open(log_file) as s: | ||
| logs_ok = re.findall (r'^([ a-zéûA-Z:, | logs_ok = re.findall (r'^([ a-zéûA-Z:, | ||
| - | print "Last backup : " + logs_ok | + | print "Last backup |
| try: | try: | ||
| return datetime.datetime.strptime (logs_ok, '%a %b %d %X %Z %Y') | return datetime.datetime.strptime (logs_ok, '%a %b %d %X %Z %Y') | ||
| Ligne 270: | Ligne 431: | ||
| try: | try: | ||
| - | last_backup_date= last_backup ("/ | + | last_backup_date= last_backup ("/ |
| except: | except: | ||
| - | last_backup_date= last_backup ("/ | + | last_backup_date= last_backup ("/ |
| if today - last_backup_date < max_backup_delay: | if today - last_backup_date < max_backup_delay: | ||
| Ligne 278: | Ligne 439: | ||
| else: | else: | ||
| exit (1) | exit (1) | ||
| + | </ | ||
| + | |||
| + | Et la conf icinga2 : | ||
| + | <code conf __admin__/ | ||
| + | object CheckCommand " | ||
| + | command = [ "/ | ||
| + | } | ||
| + | </ | ||
| + | <code conf __admin__/ | ||
| + | /* Backup checks */ | ||
| + | apply Service "Check Backup " { | ||
| + | import " | ||
| + | |||
| + | check_command = " | ||
| + | command_endpoint = host.vars.client_endpoint | ||
| + | |||
| + | assign where host.name == " | ||
| + | } | ||
| </ | </ | ||
admin/infrastructure/backup.1543229651.txt.gz · Dernière modification : 2018/11/26 10:54 de fpoulain
