我有一个国际会议视频,里面包含两种语言,也就是视频里混杂着英文和中文的句子。我想用命令行删除中文部分。
首先,使用whisper
whisper myvideo --model large --language en
字幕文件包含两种语言和时间
1
00:00:00,000 --> 00:00:04,220
if you are not concerned and doing the work of the Lord.
2
00:00:09,120 --> 00:00:13,880
如果你不愿意去遵行耶稣基督的话语的话,
3
00:00:14,220 --> 00:00:18,220
就没有必要昼夜去默想神的话。
4
00:00:18,220 --> 00:00:22,200
Take more of me, give me more of you.
....
问题是如何使用命令行并ffmpeg
删除全部根据字幕的时间来显示视频的中文部分?视频很长,目的是使用命令行来完成任务,而不是手动。
步骤1 )所以我需要识别字幕每一行的语言:
#!/bin/bash
while IFS= read -r line
do
echo "text: $line"
lan= trans -id $line |awk '/^Code/ {print $2}'
echo "lan: $lan"
done < "$1"
那么上面的 bash 还不能正常工作,该怎么办?
答案1
我写了一个bash脚本,勉强把视频里的中文部分去掉。
- 用于
openai-whisper
生成中英文定时字幕文件
whisper myvideo -model large-v3 -language Chinese
- 用来
bash
提取所需的英语时间。 - 用于
ffmpeg
提取和合并英文视频
第 2 步和第 3 步的完整脚本如下:
#!/bin/bash
# Date: 20240214
# [email protected]
# usage: bash video_rm_chi.sh myvideo.*
# output: *_eng.srt
# output: *_t.txt
# output: *_list.txt
# output: *_list.*
# output: *_eng.*
re_encode=false
clean_final=false
fullfile="$1"
filename=$(basename -- "$fullfile")
extension="${filename##*.}"
filename="${filename%.*}"
rm "$filename"_t.txt
rm "$filename"_list*
rm "$filename"_eng*
N=$(wc "$filename".srt |awk '{print $1}')
echo "N= $N"
N4=`expr $N / 4`
echo "N/4= $N4"
#Nr=`expr $N % 4`
#echo "N%4= $Nr"
n=1
jump=1
eng=0
tstart=0
tend=0
#while IFS= read -r line
#while read -r line
while(($n<=$N4))
do
n4=`expr $n \* 4 - 1`
echo "n = $n , n4 = $n4"
if sed "$n4!d" "$filename".srt | ugrep "[\x{4e00}-\x{9fcc}]"; then
echo "line `expr $n \* 4 - 1` contains Chinese, reject, and jump to next (x4) lyrics line"
if [ $eng -gt 0 ]; then
line_eng=$(sed -n "`expr $n4 - 5`p" "$filename".srt)
time_eng=($line_eng)
t2=${time_eng[2]}
echo "t2 $t2"
fi
jump=`expr $jump + 1`
eng=0
else
if [ $jump -gt 0 ] && [ $eng -eq 0 ]; then
echo '======================'
line_eng=$(sed -n "`expr $n4 - 1`p" "$filename".srt)
time_eng=($line_eng)
t1=${time_eng[0]}
t2=${time_eng[2]}
#sed "`expr $n4 - 1`!d" "$filename".srt >> "$filename""_eng.srt"
#if [[ $(sed "`expr $n4 - 1`!d" "$filename".srt) = *'-->'* ]]; then sed "`expr $n4 - 1`!d" "$filename".srt >> "$filename"_t.txt; fi
fi
jump=0
eng=`expr $eng + 1`
fi
#if [[ $(sed "`expr $n4 - 1`!d" "$filename".srt) = *'-->'* ]]; then sed "`expr $n4 - 1`!d" "$filename".srt >> "$filename"_t.txt; fi
if [ $jump -eq 1 ] && [ $eng -eq 0 ] ; then
echo "write1 $t1 $t2"
echo "$t1 $t2" >> "$filename"_t.txt
fi
if [ $jump -eq 0 ] && [ $n -eq $N4 ] ; then
echo "write2 $t1 $t2"
echo "$t1 $t2" >> "$filename"_t.txt
fi
n=`expr $n + 1`
done < "$fullfile"
sed -i 's/,/./g' "$filename"_t.txt
ts_get_msec()
{
read -r h m s ms <<< $(echo $1 | tr '.:' ' ' )
h=${h#0}
m=${m#0}
s=${s#0}
ms=${ms#0}
echo $(((h*60*60*1000)+(m*60*1000)+(s*1000)+ms))
}
duration=0
i=1
while read -r start_ts stop_ts <&3; do
i_formatted=$(printf "%04d" "$i")
if [ $re_encode = true ] ; then
ffmpeg -i $fullfile -ss $start_ts -to $stop_ts -c copy "$filename"_list"$i_formatted"."$extension"
else
ffmpeg -i $fullfile -ss $start_ts -to $stop_ts -c copy "$filename"_list"$i_formatted"."$extension"
#ffmpeg -ss $start_ts -to $stop_ts -i $fullfile -vcodec copy -acodec copy -avoid_negative_ts make_zero "$filename"_list"$i_formatted"."$extension"
fi
i=`expr $i + 1`
START=$(ts_get_msec $start_ts)
STOP=$(ts_get_msec $stop_ts)
DIFF=$((STOP-START))
duration=`expr $duration + $DIFF`
echo "start $START"
echo "stop $STOP"
echo "diff $DIFF"
echo "duration $duration"
done 3< "$filename"_t.txt
#https://stackoverflow.com/a/55682555/5845212
#ffmpeg will corrupt stanin, so use 3<
echo "f duration $duration"
for f in "$filename"_list*."$extension"; do echo "file '$f'" >> "$filename"_list.txt; done
if [ $re_encode = true ] ; then
ffmpeg -f conat -safe 0 -i "$filename"_list.txt "$filename"_eng."$extension"
else
ffmpeg -f concat -safe 0 -i "$filename"_list.txt -c copy "$filename"_eng."$extension"
fi
min=$(($duration /(60*1000)))
sec=$((($duration %(60*1000))/1000))
ms=$((($duration %(60*1000))%1000))
echo "video duration ${min}:${sec}.$ms"
if [ $clean_final = true ] ; then
rm "$filename"_eng.srt
rm "$filename"_t.txt
rm "$filename"_list.*
fi