ffmpeg：删除静音部分：有效删除视频的几个部分

Question 1

https://github.com/carykh/jumpcutter（MIT 许可证）会自动删除视频中没有音频或音频很少的部分。它依赖于 ffmpeg，管道使用 Python 3 编码（可在 Ubuntu、Microsoft Windows 和 Mac OS X 上运行）。

解释：https://www.youtube.com/watch?v=DQ8orIurGxw

脚本（MIT 许可证，作者：卡雷赫)：

from contextlib import closing
from PIL import Image
import subprocess
from audiotsm import phasevocoder
from audiotsm.io.wav import WavReader, WavWriter
from scipy.io import wavfile
import numpy as np
import re
import math
from shutil import copyfile, rmtree
import os
import argparse
from pytube import YouTube

def downloadFile(url):
    name = YouTube(url).streams.first().download()
    newname = name.replace(' ','_')
    os.rename(name,newname)
    return newname

def getMaxVolume(s):
    maxv = float(np.max(s))
    minv = float(np.min(s))
    return max(maxv,-minv)

def copyFrame(inputFrame,outputFrame):
    src = TEMP_FOLDER+"/frame{:06d}".format(inputFrame+1)+".jpg"
    dst = TEMP_FOLDER+"/newFrame{:06d}".format(outputFrame+1)+".jpg"
    if not os.path.isfile(src):
        return False
    copyfile(src, dst)
    if outputFrame%20 == 19:
        print(str(outputFrame+1)+" time-altered frames saved.")
    return True

def inputToOutputFilename(filename):
    dotIndex = filename.rfind(".")
    return filename[:dotIndex]+"_ALTERED"+filename[dotIndex:]

def createPath(s):
    #assert (not os.path.exists(s)), "The filepath "+s+" already exists. Don't want to overwrite it. Aborting."

    try:  
        os.mkdir(s)
    except OSError:  
        assert False, "Creation of the directory %s failed. (The TEMP folder may already exist. Delete or rename it, and try again.)"

def deletePath(s): # Dangerous! Watch out!
    try:  
        rmtree(s,ignore_errors=False)
    except OSError:  
        print ("Deletion of the directory %s failed" % s)
        print(OSError)

parser = argparse.ArgumentParser(description='Modifies a video file to play at different speeds when there is sound vs. silence.')
parser.add_argument('--input_file', type=str,  help='the video file you want modified')
parser.add_argument('--url', type=str, help='A youtube url to download and process')
parser.add_argument('--output_file', type=str, default="", help="the output file. (optional. if not included, it'll just modify the input file name)")
parser.add_argument('--silent_threshold', type=float, default=0.03, help="the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)")
parser.add_argument('--sounded_speed', type=float, default=1.00, help="the speed that sounded (spoken) frames should be played at. Typically 1.")
parser.add_argument('--silent_speed', type=float, default=5.00, help="the speed that silent frames should be played at. 999999 for jumpcutting.")
parser.add_argument('--frame_margin', type=float, default=1, help="some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable.")
parser.add_argument('--sample_rate', type=float, default=44100, help="sample rate of the input and output videos")
parser.add_argument('--frame_rate', type=float, default=30, help="frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work.")
parser.add_argument('--frame_quality', type=int, default=3, help="quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default.")

args = parser.parse_args()



frameRate = args.frame_rate
SAMPLE_RATE = args.sample_rate
SILENT_THRESHOLD = args.silent_threshold
FRAME_SPREADAGE = args.frame_margin
NEW_SPEED = [args.silent_speed, args.sounded_speed]
if args.url != None:
    INPUT_FILE = downloadFile(args.url)
else:
    INPUT_FILE = args.input_file
URL = args.url
FRAME_QUALITY = args.frame_quality

assert INPUT_FILE != None , "why u put no input file, that dum"

if len(args.output_file) >= 1:
    OUTPUT_FILE = args.output_file
else:
    OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

TEMP_FOLDER = "TEMP"
AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)

createPath(TEMP_FOLDER)

command = "ffmpeg -i "+INPUT_FILE+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner"
subprocess.call(command, shell=True)

command = "ffmpeg -i "+INPUT_FILE+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav"

subprocess.call(command, shell=True)

command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1"
f = open(TEMP_FOLDER+"/params.txt", "w")
subprocess.call(command, shell=True, stdout=f)



sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
audioSampleCount = audioData.shape[0]
maxAudioVolume = getMaxVolume(audioData)

f = open(TEMP_FOLDER+"/params.txt", 'r+')
pre_params = f.read()
f.close()
params = pre_params.split('\n')
for line in params:
    m = re.search('Stream #.*Video.* ([0-9]*) fps',line)
    if m is not None:
        frameRate = float(m.group(1))

samplesPerFrame = sampleRate/frameRate

audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))

hasLoudAudio = np.zeros((audioFrameCount))



for i in range(audioFrameCount):
    start = int(i*samplesPerFrame)
    end = min(int((i+1)*samplesPerFrame),audioSampleCount)
    audiochunks = audioData[start:end]
    maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
    if maxchunksVolume >= SILENT_THRESHOLD:
        hasLoudAudio[i] = 1

chunks = [[0,0,0]]
shouldIncludeFrame = np.zeros((audioFrameCount))
for i in range(audioFrameCount):
    start = int(max(0,i-FRAME_SPREADAGE))
    end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
    shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
    if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
        chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])

chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
chunks = chunks[1:]

outputAudioData = np.zeros((0,audioData.shape[1]))
outputPointer = 0

lastExistingFrame = None
for chunk in chunks:
    audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]

    sFile = TEMP_FOLDER+"/tempStart.wav"
    eFile = TEMP_FOLDER+"/tempEnd.wav"
    wavfile.write(sFile,SAMPLE_RATE,audioChunk)
    with WavReader(sFile) as reader:
        with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
            tsm.run(reader, writer)
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer+leng
    outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))

    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

    # smooth out transitiion's audio by quickly fading in/out

    if leng < AUDIO_FADE_ENVELOPE_SIZE:
        outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
    else:
        premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
        mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
        outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
        outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask

    startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
    endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
    for outputFrame in range(startOutputFrame, endOutputFrame):
        inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
        didItWork = copyFrame(inputFrame,outputFrame)
        if didItWork:
            lastExistingFrame = inputFrame
        else:
            copyFrame(lastExistingFrame,outputFrame)

    outputPointer = endPointer

wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)

'''
outputFrame = math.ceil(outputPointer/samplesPerFrame)
for endGap in range(outputFrame,audioFrameCount):
    copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
'''

command = "ffmpeg -framerate "+str(frameRate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+OUTPUT_FILE
subprocess.call(command, shell=True)

deletePath(TEMP_FOLDER)

Answer

https://github.com/carykh/jumpcutter（MIT 许可证）会自动删除视频中没有音频或音频很少的部分。它依赖于 ffmpeg，管道使用 Python 3 编码（可在 Ubuntu、Microsoft Windows 和 Mac OS X 上运行）。

解释：https://www.youtube.com/watch?v=DQ8orIurGxw

脚本（MIT 许可证，作者：卡雷赫)：

from contextlib import closing
from PIL import Image
import subprocess
from audiotsm import phasevocoder
from audiotsm.io.wav import WavReader, WavWriter
from scipy.io import wavfile
import numpy as np
import re
import math
from shutil import copyfile, rmtree
import os
import argparse
from pytube import YouTube

def downloadFile(url):
    name = YouTube(url).streams.first().download()
    newname = name.replace(' ','_')
    os.rename(name,newname)
    return newname

def getMaxVolume(s):
    maxv = float(np.max(s))
    minv = float(np.min(s))
    return max(maxv,-minv)

def copyFrame(inputFrame,outputFrame):
    src = TEMP_FOLDER+"/frame{:06d}".format(inputFrame+1)+".jpg"
    dst = TEMP_FOLDER+"/newFrame{:06d}".format(outputFrame+1)+".jpg"
    if not os.path.isfile(src):
        return False
    copyfile(src, dst)
    if outputFrame%20 == 19:
        print(str(outputFrame+1)+" time-altered frames saved.")
    return True

def inputToOutputFilename(filename):
    dotIndex = filename.rfind(".")
    return filename[:dotIndex]+"_ALTERED"+filename[dotIndex:]

def createPath(s):
    #assert (not os.path.exists(s)), "The filepath "+s+" already exists. Don't want to overwrite it. Aborting."

    try:  
        os.mkdir(s)
    except OSError:  
        assert False, "Creation of the directory %s failed. (The TEMP folder may already exist. Delete or rename it, and try again.)"

def deletePath(s): # Dangerous! Watch out!
    try:  
        rmtree(s,ignore_errors=False)
    except OSError:  
        print ("Deletion of the directory %s failed" % s)
        print(OSError)

parser = argparse.ArgumentParser(description='Modifies a video file to play at different speeds when there is sound vs. silence.')
parser.add_argument('--input_file', type=str,  help='the video file you want modified')
parser.add_argument('--url', type=str, help='A youtube url to download and process')
parser.add_argument('--output_file', type=str, default="", help="the output file. (optional. if not included, it'll just modify the input file name)")
parser.add_argument('--silent_threshold', type=float, default=0.03, help="the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)")
parser.add_argument('--sounded_speed', type=float, default=1.00, help="the speed that sounded (spoken) frames should be played at. Typically 1.")
parser.add_argument('--silent_speed', type=float, default=5.00, help="the speed that silent frames should be played at. 999999 for jumpcutting.")
parser.add_argument('--frame_margin', type=float, default=1, help="some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable.")
parser.add_argument('--sample_rate', type=float, default=44100, help="sample rate of the input and output videos")
parser.add_argument('--frame_rate', type=float, default=30, help="frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work.")
parser.add_argument('--frame_quality', type=int, default=3, help="quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default.")

args = parser.parse_args()



frameRate = args.frame_rate
SAMPLE_RATE = args.sample_rate
SILENT_THRESHOLD = args.silent_threshold
FRAME_SPREADAGE = args.frame_margin
NEW_SPEED = [args.silent_speed, args.sounded_speed]
if args.url != None:
    INPUT_FILE = downloadFile(args.url)
else:
    INPUT_FILE = args.input_file
URL = args.url
FRAME_QUALITY = args.frame_quality

assert INPUT_FILE != None , "why u put no input file, that dum"

if len(args.output_file) >= 1:
    OUTPUT_FILE = args.output_file
else:
    OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)

TEMP_FOLDER = "TEMP"
AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)

createPath(TEMP_FOLDER)

command = "ffmpeg -i "+INPUT_FILE+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner"
subprocess.call(command, shell=True)

command = "ffmpeg -i "+INPUT_FILE+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav"

subprocess.call(command, shell=True)

command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1"
f = open(TEMP_FOLDER+"/params.txt", "w")
subprocess.call(command, shell=True, stdout=f)



sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
audioSampleCount = audioData.shape[0]
maxAudioVolume = getMaxVolume(audioData)

f = open(TEMP_FOLDER+"/params.txt", 'r+')
pre_params = f.read()
f.close()
params = pre_params.split('\n')
for line in params:
    m = re.search('Stream #.*Video.* ([0-9]*) fps',line)
    if m is not None:
        frameRate = float(m.group(1))

samplesPerFrame = sampleRate/frameRate

audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))

hasLoudAudio = np.zeros((audioFrameCount))



for i in range(audioFrameCount):
    start = int(i*samplesPerFrame)
    end = min(int((i+1)*samplesPerFrame),audioSampleCount)
    audiochunks = audioData[start:end]
    maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
    if maxchunksVolume >= SILENT_THRESHOLD:
        hasLoudAudio[i] = 1

chunks = [[0,0,0]]
shouldIncludeFrame = np.zeros((audioFrameCount))
for i in range(audioFrameCount):
    start = int(max(0,i-FRAME_SPREADAGE))
    end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
    shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
    if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
        chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])

chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
chunks = chunks[1:]

outputAudioData = np.zeros((0,audioData.shape[1]))
outputPointer = 0

lastExistingFrame = None
for chunk in chunks:
    audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]

    sFile = TEMP_FOLDER+"/tempStart.wav"
    eFile = TEMP_FOLDER+"/tempEnd.wav"
    wavfile.write(sFile,SAMPLE_RATE,audioChunk)
    with WavReader(sFile) as reader:
        with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
            tsm.run(reader, writer)
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer+leng
    outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))

    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

    # smooth out transitiion's audio by quickly fading in/out

    if leng < AUDIO_FADE_ENVELOPE_SIZE:
        outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
    else:
        premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
        mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
        outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
        outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask

    startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
    endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
    for outputFrame in range(startOutputFrame, endOutputFrame):
        inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
        didItWork = copyFrame(inputFrame,outputFrame)
        if didItWork:
            lastExistingFrame = inputFrame
        else:
            copyFrame(lastExistingFrame,outputFrame)

    outputPointer = endPointer

wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)

'''
outputFrame = math.ceil(outputPointer/samplesPerFrame)
for endGap in range(outputFrame,audioFrameCount):
    copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
'''

command = "ffmpeg -framerate "+str(frameRate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+OUTPUT_FILE
subprocess.call(command, shell=True)

deletePath(TEMP_FOLDER)

Question 2

扩展 Franck 的回答，Jumpcutter 还有更好的替代方案：

https://github.com/carykh/jumpcutter/issues/180

编辑：不幸的是，这个问题已经在 Github 上被删除了，但是你可以在 Web Archive 上看到它： https://web.archive.org/web/20201203163529/https://github.com/carykh/jumpcutter/issues/180

使用自动编辑器：

https://github.com/WyattBlue/auto-editor

Answer

扩展 Franck 的回答，Jumpcutter 还有更好的替代方案：

https://github.com/carykh/jumpcutter/issues/180

编辑：不幸的是，这个问题已经在 Github 上被删除了，但是你可以在 Web Archive 上看到它： https://web.archive.org/web/20201203163529/https://github.com/carykh/jumpcutter/issues/180

使用自动编辑器：

https://github.com/WyattBlue/auto-editor

Question 3

Github 上的项目

我创建了自己的脚本，对我来说效果很好：
https://github.com/DarkTrick/python-video-silence-cutter

使用 ffmpeg 剪切静音的逻辑

使用 ffmpeg 查找所有静音部分：
ffmpeg -i ./example.mp4 -af silencedetect=n=-35dB:d=1 -f null -
[这将输出到标准输出（显然无法存储文件）。]
准备两个文件：一个带有视频过滤器，一个带有音频过滤器。这些文件将告诉 ffmpeg 要处理视频的哪些部分使用（不是切割）
- 视频滤镜文件内容（此处video_filter.txt）：
  select='between(t,0,2.94075)+between(t,5.6015,5.60152)', setpts=N/FRAME_RATE/TB
- 音频过滤文件内容（此处audio_filter.txt）：
  aselect='between(t,0,2.94075)+between(t,5.6015,5.60152)', asetpts=N/SR/TB
- [你可以看到它们的内容几乎完全相同]
运行 ffmpeg 创建新视频：
ffmpeg -i ./example.mp4 -filter_script:v ./video_filter.txt -filter_script:a ./audio_filter.txt ./example_cut.mp4

Answer

Github 上的项目

我创建了自己的脚本，对我来说效果很好：
https://github.com/DarkTrick/python-video-silence-cutter

使用 ffmpeg 剪切静音的逻辑

使用 ffmpeg 查找所有静音部分：
ffmpeg -i ./example.mp4 -af silencedetect=n=-35dB:d=1 -f null -
[这将输出到标准输出（显然无法存储文件）。]
准备两个文件：一个带有视频过滤器，一个带有音频过滤器。这些文件将告诉 ffmpeg 要处理视频的哪些部分使用（不是切割）
- 视频滤镜文件内容（此处video_filter.txt）：
  select='between(t,0,2.94075)+between(t,5.6015,5.60152)', setpts=N/FRAME_RATE/TB
- 音频过滤文件内容（此处audio_filter.txt）：
  aselect='between(t,0,2.94075)+between(t,5.6015,5.60152)', asetpts=N/SR/TB
- [你可以看到它们的内容几乎完全相同]
运行 ffmpeg 创建新视频：
ffmpeg -i ./example.mp4 -filter_script:v ./video_filter.txt -filter_script:a ./audio_filter.txt ./example_cut.mp4

Question 4

我现在使用的解决方案是扫描列表输入文件，并根据这些信息创建包含“无静音”片段的小文件。所有文件都temp按正确的顺序写入文件。完成后，各部分将合并，临时文件将被清理。这是不是非常快。在 i7 处理器、10GB RAM 的情况下，90 分钟剪辑中总共约 1000 个剪辑需要大约 5 个小时。

我猜使用 ffmpeg 过滤器的解决方案会快得多。但目前来说太复杂了。另一个解决方案可能是代碼範例。两者可能都更有效率，因为它们不创建临时文件（带有标题、文件写入等）。

无论如何，这是当前状态下的工作代码：

#!/bin/bash
echo "">temp

# File or Directory selection menu with dialog
function fileDialog {
    read -p "$2: " fileselection
    echo "You've selected $fileselection"
}


WORKPATH=$(pwd)

echo "Arbeitsverzeichnis: $WORKPATH"

fileDialog "$WORKPATH" "Select original Video-File"
INFILE=$fileselection
ff=$(ffmpeg -i "$INFILE" 2>&1)
d="${ff#*Duration: }"
LENGTH="${d%%,*}"
echo "Arbeitsverzeichnis: $WORKPATH"
echo "Videodatei: $INFILE"
echo "Gesamtvideolänge $LENGTH"

fileDialog "$WORKPATH" "Select Label-List-File (exported from Audacity)"
LABELFILE=$fileselection
NUMLINES=$(awk 'END { print NR }' "$LABELFILE")
clear
echo "Arbeitsverzeichnis: $WORKPATH"
echo "Videodatei: $INFILE"
echo "Gesamtvideolänge $LENGTH"
echo "Sequence-Label-Datei: $LABELFILE"
echo $NUMLINES lines/sequences found in \'$LABELFILE\'
echo ""
echo "Start processing..."
awk '{d=sprintf("echo Processing Part "NR"/'"$NUMLINES"' named "$3" from "$1"s to "$2"s..."); system(d); s=sprintf("ffmpeg -hide_banner -loglevel panic -accurate_seek -y -i '"$INFILE"' -strict -2 -ss "$1" -to "$2" tmp-"NR".mp4>/dev/null"); system(s); p=sprintf("echo \"$(cat temp)\nfile '"$WORKPATH"'/tmp-"NR".mp4\">temp"); system(p);}' "$LABELFILE"

echo "Merging all "$NUMLINES" parts."
ffmpeg -f concat -i temp -c copy "short-$INFILE.mp4"

echo "Cleaning up..."
rm tmp-*.mp4
rm temp
rm "$INFILE"

还有两件小事要做：目前这会将您的视频转换为 mp4。这其实不是必需的，可能只需要大量的 CPU 时间。

Answer

我现在使用的解决方案是扫描列表输入文件，并根据这些信息创建包含“无静音”片段的小文件。所有文件都temp按正确的顺序写入文件。完成后，各部分将合并，临时文件将被清理。这是不是非常快。在 i7 处理器、10GB RAM 的情况下，90 分钟剪辑中总共约 1000 个剪辑需要大约 5 个小时。

我猜使用 ffmpeg 过滤器的解决方案会快得多。但目前来说太复杂了。另一个解决方案可能是代碼範例。两者可能都更有效率，因为它们不创建临时文件（带有标题、文件写入等）。

无论如何，这是当前状态下的工作代码：

#!/bin/bash
echo "">temp

# File or Directory selection menu with dialog
function fileDialog {
    read -p "$2: " fileselection
    echo "You've selected $fileselection"
}


WORKPATH=$(pwd)

echo "Arbeitsverzeichnis: $WORKPATH"

fileDialog "$WORKPATH" "Select original Video-File"
INFILE=$fileselection
ff=$(ffmpeg -i "$INFILE" 2>&1)
d="${ff#*Duration: }"
LENGTH="${d%%,*}"
echo "Arbeitsverzeichnis: $WORKPATH"
echo "Videodatei: $INFILE"
echo "Gesamtvideolänge $LENGTH"

fileDialog "$WORKPATH" "Select Label-List-File (exported from Audacity)"
LABELFILE=$fileselection
NUMLINES=$(awk 'END { print NR }' "$LABELFILE")
clear
echo "Arbeitsverzeichnis: $WORKPATH"
echo "Videodatei: $INFILE"
echo "Gesamtvideolänge $LENGTH"
echo "Sequence-Label-Datei: $LABELFILE"
echo $NUMLINES lines/sequences found in \'$LABELFILE\'
echo ""
echo "Start processing..."
awk '{d=sprintf("echo Processing Part "NR"/'"$NUMLINES"' named "$3" from "$1"s to "$2"s..."); system(d); s=sprintf("ffmpeg -hide_banner -loglevel panic -accurate_seek -y -i '"$INFILE"' -strict -2 -ss "$1" -to "$2" tmp-"NR".mp4>/dev/null"); system(s); p=sprintf("echo \"$(cat temp)\nfile '"$WORKPATH"'/tmp-"NR".mp4\">temp"); system(p);}' "$LABELFILE"

echo "Merging all "$NUMLINES" parts."
ffmpeg -f concat -i temp -c copy "short-$INFILE.mp4"

echo "Cleaning up..."
rm tmp-*.mp4
rm temp
rm "$INFILE"

还有两件小事要做：目前这会将您的视频转换为 mp4。这其实不是必需的，可能只需要大量的 CPU 时间。

ffmpeg：删除静音部分：有效删除视频的几个部分

答案1

答案2

答案3

Github 上的项目

使用 ffmpeg 剪切静音的逻辑

答案4

相关内容