-
Notifications
You must be signed in to change notification settings - Fork 0
/
healbitrot
executable file
·295 lines (254 loc) · 9.05 KB
/
healbitrot
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/bin/env bash
set +vx
# healbitrot - Automatic check and self-healing for bitrot
# (adapted from heal-bitrots: https://github.com/liloman/heal-bitrots)
# Required: bitrot(https://github.com/liloman/bitrot getconf(libc-bin)
# par2 grep find coreutils(rm mv cp mkdir cd du readlink)
# Environment variables (optional): BITROT_DATA BITROT_DIRS
Help(){
cat <<-EOS
healbitrot - Automatic check and self-healing for bitrot
USAGE: $self [-h|--help] [<dir>...]
<dir>: directory to check; only if none given, the file in \$BITROT_DIRS
is read, one directory per line. Bitrot data is stored in \$BITROT_DATA.
-h/--help: display this help text
BITROT_DIRS: $BITROT_DIRS
BITROT_DATA: $BITROT_DATA
EOS
}
# Location of recovery data (bitrot databases and par2 files)
BITROT_DATA=${BITROT_DATA:-~/.bitrot_backups}
# File with directories to check
BITROT_DIRS=${BITROT_DIRS:-$BITROT_DATA/bitrot_dirs}
self=$(basename "$0")
(($#==0)) && [[ ! -f $BITROT_DIRS ]] && Help &&
echo "Either pass directories to check as commandline arguments, or in \$BITROT_DIRS" &&
exit 1
[[ $1 = -h || $1 = --help ]] && Help && exit 0
! type -P bitrot >/dev/null &&
echo "Install bitrot first: https://github.com/liloman/bitrot" && exit 2
! type -P par2 >/dev/null && echo "Install par2 package first" && exit 3
[[ ! -d $BITROT_DATA ]] && mkdir -p "$BITROT_DATA"
par2=$(par2 -V)
par2b=$(cut -d'.' -f2 <<<${par2%%* })
((par2b<6)) &&
echo "WARNING: Old version of $par2 might choke on unicode filenames" &&
echo "Install newer version: https://github.com/Parchive/par2cmdline"
files_par2=files.par2 tmp=$(mktemp -d) pwd=$PWD default_options=
createlog=$tmp/.generate_par2_create.log
repairlog=$tmp/.generate_par2_repair.log
bitrotlog=$tmp/.generate_bitrot_db.log
max_files=32768 max_block_count=2000 redundancy=5
Generate_par2files(){ # $1:target_dir
# Reading: files_par2 createlog repairlog max_files max_block_count redundancy
local -a target_files
local target_dir=$(readlink -e "$1")
local source_dir=$BITROT_DATA/${target_dir:1}
local par2_files=$source_dir/$files_par2
local local_options
Err(){
echo -e "$@\nFor errors, see:"
[[ -f $createlog ]] && echo "$createlog"
[[ -f $repairlog ]] && echo "$repairlog"
exit 4
}
echo "----------------------------------"
echo "Generating par2 files for $target_dir"
# $target_dir could have been deleted/renamed, so no abort
[[ ! -d $target_dir ]] && echo "$target_dir not a dir or not found" &&
return
mkdir -p $source_dir || Err "Couldn't create $source_dir"
echo "Change local directory to $target_dir"
cd "$target_dir" || Err "Couldn't cd into $target_dir "
# Check the number of dir files
local total_files=$(find . -maxdepth 1 -type f | wc -l)
((total_files>max_files)) &&
Err "Number of files in $target_dir: $total_files > $max_files"
# Check that there aren't more than $max_block_count in $target_dir
if ((total_files>max_block_count))
then
local size=$(du -bs $target_dir |cut -f1)
# Block size for percentage of redundancy
local target_dir_tam=$((size*redundancy/100))
# Size in MB of $target_dir, in blocks of 4
local block_size=$((target_dir_tam*4/max_block_count))
echo "Increasing the default block size for $target_dir to $block_size bytes"
local_options+=" -s$block_size"
fi
for file in $(find . -maxdepth 1 -type f ! -size 0)
do
target_files+=("$file")
done
if ((${#target_files}>0))
then # Generate new par2 files
echo "Launching par2create"
if ! par2create $default_options $local_options -v $files_par2 "${target_files[@]}" &>"$createlog"
then
rm -f -- *.par2
Err "Couldn't generate par2 files for $target_dir"
fi
# Copy par2 files and bitrot database to $source_dir
echo "Moving par2 files to $source_dir"
if ! mv *.par2 "$source_dir"
then
rm -f -- *.par2
Err "Couldn't copy par2 files and bitrot database to $source_dir"
fi
fi
echo "Done!"
}
Split_dir() { # $1:target_dir, Reading: pwd bitrotlog createlog repairlog
cd "$pwd"
local line temp_file par2_files orig dest full_target
local target_dir=$(readlink -e "$1" 2>/dev/null)
local source_dir=$BITROT_DATA/${target_dir:1}
local regex_bitrot='error: SHA.* mismatch for (.*): expected .*'
shopt -s extglob
local regex_changes='([[:digit:]]* entries in the database. )?([[:digit:]]*) entries (updated|new|missing):'
local regex_moved='([[:digit:]]* entries in the database. )?([[:digit:]]*) entries renamed:'
local regex_dir_changes='(.*)' regex_dir_moved='from (.*) to (.*)'
local com=(find . -type d)
local -A dirs_bitrot=() dirs_change=() dirs_moved=()
local count=0 count_moved=0
Err() {
echo -e "$@\nFor errors, see:"
[[ -f $bitrotlog ]] && echo "$bitrotlog"
[[ -f $createlog ]] && echo "$createlog"
[[ -f $repairlog ]] && echo "$repairlog"
exit 5
}
echo "----------------------------------"
echo "Checking $target_dir for bitrot"
# Check if exists after backups dir check
[[ -d $target_dir ]] || Err "$target_dir not a dir or not found"
# Check if target is just the backups dir
full_target=$target_dir/
[[ ${full_target::$((${#BITROT_DATA}+1))} = $BITROT_DATA/ ]] &&
echo "Not checking: $target_dir is a subdir of $BITROT_DATA" &&
return
cd "$target_dir" || Err "couldn't cd to $target_dir"
if [[ -f $source_dir/.bitrot.db ]]
then
echo "Copying bitrot db files to $PWD"
cp "$source_dir"/.bitrot.* . ||
Err "Couldn't copy $source_dir/.bitrot.* files to $PWD"
fi
echo "Launching bitrot -v"
bitrot -v &>"$bitrotlog"
while read -r
do
if ((count==0 && count_moved==0))
then # No change detected
# Check log for bitrot errors
[[ $REPLY =~ $regex_bitrot ]] &&
temp_file="$target_dir/${BASH_REMATCH[1]:2}" &&
echo "Bitrot detected in file:$temp_file" &&
# Add to unique index (associative array) and save file just in case
dirs_bitrot["${temp_file%/*}/"]="$temp_file"
# Detect new/updated/missing files
[[ $REPLY =~ $regex_changes ]] && count=${BASH_REMATCH[2]}
# Detect renamed files
[[ $REPLY =~ $regex_moved ]] && count_moved=${BASH_REMATCH[2]}
else # Change detected
if ((count))
then
[[ $REPLY =~ $regex_dir_changes ]] &&
file=${BASH_REMATCH[1]} dirs_change["${file%/*}/"]=$file &&
echo "Change detected in:$file" && ((count--))
elif ((count_moved))
then
[[ $REPLY =~ $regex_dir_moved ]] &&
orig=${BASH_REMATCH[1]} dest=${BASH_REMATCH[2]} &&
echo "Move detected from:$orig to:$dest" &&
dirs_moved["${orig%/*}"]=1 dirs_moved["${dest%/*}"]=1 &&
((count_moved--))
fi
fi
done <"$bitrotlog"
for dir in "${!dirs_bitrot[@]}"
do
cd "$dir" || Err "Couldn't cd into $dir "
echo "Recovering from bitrot with par2 files in $dir"
par2_files=$BITROT_DATA/${dir:1}/$files_par2
# If par2 files generated already: copy them
if [[ -f $par2_files ]]
then
echo "Copying $par2_files to $PWD"
! cp "${par2_files%/*}"/*.par2 . &&
rm -f -- *.par2 && Err "Couldn't copy par2 files to $target_dir"
fi
# Purge recovery par2 files if successful
echo "Repairing bitrot files with par2repair"
if ! par2repair -p $files_par2 &>"$repairlog"
then
rm -f -- *.par2
cd "$target_dir" || Err "couldn't cd to $target_dir"
if [[ -e .bitrot.db ]]
then
[[ -d $source_dir ]] || mkdir -p "$source_dir"
echo "Moving bitrot db files to $source_dir"
! mv .bitrot.* "$source_dir" && rm -f -- .bitrot.* &&
Err "Couldn't move bitrot files to $source_dir"
fi
Err "Couldn't repair $dir with par2 files"
else # Update the db for the new changes
cd "$target_dir" || Err "couldn't cd to $target_dir"
echo "Launching bitrot -v after bitrot"
bitrot -v &>"$bitrotlog"
fi
cd - &>/dev/null
done
# Regenerate dir changes
for dir in "${!dirs_change[@]}"
do
cd "$target_dir" || Err "couldn't cd to $target_dir"
echo "Launching generate_par2files in:$dir"
Generate_par2files "${dir}"
done
# Regenerate dir moved
for dir in "${!dirs_moved[@]}"
do
# Not present: just normal in this case
[[ -d $dir ]] || continue
cd "$target_dir" || Err "couldn't cd to $target_dir"
echo "launching generate_par2files in:$dir"
Generate_par2files "${dir}"
done
((${#dirs_bitrot}==0 && ${#dirs_change[@]}==0 && ${#dirs_moved[@]})) &&
echo "No changes detected"
cd "$target_dir" || Err "couldn't cd to $target_dir"
if [[ -e .bitrot.db ]]
then
[[ -d $source_dir ]] || mkdir -p "$source_dir"
echo "Moving bitrot db files to $source_dir"
! mv .bitrot.* "$source_dir" && rm -f -- .bitrot.* &&
Err "Couldn't move bitrot files to $source_dir"
fi
echo "Done!"
}
Main(){ # Reading: par2 par2b, Changing: default_options
# par2 option: use half free memory (in MB not kB)
local mem=$(grep -e '^MemAvailable:' /proc/meminfo |grep -o '[0-9]*')
default_options+=" -m$((mem/2048))"
if ((par2b>6))
then
# par2 option: set number of threads (more than 2 processors: leave 2)
local cpus=$(getconf _NPROCESSORS_ONLN) path
((cpus==2)) && default_options+=" -t1"
((cpus>2)) && default_options+=" -t$((cpus-2))"
fi
if (($#))
then # At least one argument: for each path
for path
do
Split_dir "$path"
done
else # File(s) in $BITROT_DIRS
while IFS= read -r
do
Split_dir "$REPLY"
done <"$BITROT_DIRS"
fi
}
Main "$@"
exit 0