我想从各种 PDF 文件中提取页面,然后从这些页面创建新的 PDF 文件。我应该使用什么工具?
答案1
基本答案如下pdftk
:
pdftk B=<input-file-1.pdf> C=<input-file-2.pdf> ... cat B<pagenum1> C<pagenum2> ... output <outputfile.pdf>;
下面是一个脚本,它接受一个输入文件,其中每一行都有一个 PDF 文件(名称中没有空格!!)然后是页码:
<input-file-1.pdf> <pagenum1>
<input-file-2.pdf> <pagenum2>
...
#!/bin/bash
declare -a input_lines;
declare -a page_spec;
character_set=({A..Z});
handle="A";
increment_character() {
for (( i=0; i < "${#character_set[@]}"; ++i )); do
if [[ "$1" == "${character_set[$i]}" ]]; then
echo "${character_set[$((i+1))]}";
return;
fi;
done;
echo "error";
}
increase_handle() {
rolled_over=n;
for (( l=${#handle}; l>0; --l )); do
letter=${handle:$((l-1)):1};
prefix="${handle:0:$((l-1))}";
suffix="${handle:$((l))}";
if [[ "$letter" == "${character_set[-1]}" ]]; then
handle="${prefix}A$suffix";
rolled_over=y;
else
new_letter=$(increment_character "$letter");
handle="$prefix$new_letter$suffix";
rolled_over=n;
break;
fi;
done;
if [[ "$rolled_over" == 'y' ]]; then
handle=A"$handle";
true;
fi;
echo "$handle";
}
echo "extracting...";
while IFS=' ' read -r file page; do
if [[ "$file" = "#"* ]]; then
# Comment
continue;
fi;
if [[ -z "$file" ]]; then
# Empty line
continue;
fi;
handle="$(increase_handle)";
input_lines+=("$handle=$file");
page_num="$page";
page_spec+=("$handle$page_num");
echo " $file";
echo " page $page_num";
done < "$1"
pdftk "${input_lines[@]}" cat "${page_spec[@]}" output "$1".pdf;