使用 youtube-dl 将自动生成的字幕转换为 srt 格式

使用 youtube-dl 将自动生成的字幕转换为 srt 格式

我想从 youtube 下载生成的字幕并使用命令将其转换为 srt 格式,youtube-dl --write-auto-sub --sub-lang en --convert-subs=srt --skip-download <URL>它只输出 .vtt 文件而不会将其转换为 srt。

但是当--skip-download被删除时,它会下载视频,vtt 文件,然后最终将 vtt 转换为 srt。

有没有办法只下载 vtt 文件,然后将其转换为 srt 而不下载视频?

答案1

原因可能是在此过程中后台使用的 ffmpeg 不直接处理“ffmpeg.exe -i path.vtt path.vtt.srt”之类的重新编码请求。它需要一个文件流,为此它需要一个视频文件。

至少,这样的请求对我来说不起作用,并且报告没有流,尽管互联网上有类似的例子。

答案2

我制作了一个非常简单的命令行工具来将 vtt 转换为 srt,它仅通过非常简单的文本处理(不使用词汇分析或标记)工作,并且它仅适用于 Youtube 的自动转录字幕,对于这样的文本:

WEBVTT 类型:字幕 语言:en

00:00:01.740 --> 00:00:05.030 对齐:起始位置:0%
没有什么<00:00:02.700>是<00:00:03.179>不可能的......

您可以在此处下载 ZIP 文件:http://pececko.szm.com/@ VTT to SRT。我不打算更新它,所以请将此代码视为公共领域。我希望这对某些人有用。

// C program to convert subtitles from VTT file to SRT file
// public domain
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXLINELEN 32767

int main(int argc,char** argv)
{
    FILE* fh;
    char filein[MAXLINELEN]; // = "test.vtt"
    char fileout[MAXLINELEN]; // = "test.srt"
    int i=0, j=0, k=0, l=0, m=0, pos=0;
    //float r=0;
    char **arr; // array of pointers to lines loaded to memory from file
    char *buffer, *line, *ptr;
    long numbytes;
    int erase=0, previousline=0; // booleans
    fputs("Convert Youtube's Autotranscribed VTT to SRT [build 2023-06-22]\nConverts .vtt files from youtube to .srt files.\nUsage: vtttosrt.exe [sourcesubtitles.vtt] [targetsubtitles.srt]\nPress Ctrl+C to abort.\n", stdout);
    if (argc>1){
        strcpy(filein, argv[1]);
    } else {
        printf("Enter input filename (*.vtt) :");
        fgets (filein, MAXLINELEN, stdin);
        filein[strlen(filein) - 1] = '\0';
    }
    k = strlen(filein);
    if (!k) {
        return 0;
    }
    if (k<=3 || (strstr(&filein[k-4],".vtt") == NULL)) {
        strcat(filein,".vtt"); //append missing extension
    }

    fh = fopen(filein, "r"); // Opening file in reading mode
    if (fh==NULL) {
        printf("File %s not found.\n",filein);
        return 1;
    }
 
    //printf("Loading file %s\n", filein);
    fseek(fh, 0L, SEEK_END);
    numbytes = ftell(fh); // Get the number of bytes
    fseek(fh, 0L, SEEK_SET);
    buffer = (char*)calloc(numbytes, sizeof(char)); 
    if(buffer == NULL) {
        fclose(fh);
        return 1;
    }
    fread(buffer, sizeof(char), numbytes, fh);
    fclose(fh);
    //printf("filesize is %d Bytes:\n%s\n",numbytes,buffer);

    arr = (char**)calloc(numbytes/4, sizeof(char));
    if(arr == NULL) {
        free(buffer);
        return 1;
    }
    pos=1;
    arr[0] = &buffer[0];
    for (i=0; i<numbytes; i++) {
        if (buffer[i]=='\n') {
            buffer[i]='\0';
            arr[pos++]=&buffer[i+1];
        }
    }
    //printf("number of lines in file = %d\n",pos); return 0;
    //now we will remove <timestamps> and align+position
    j=0;
    for (i=0; i<pos; i++) {
        line=arr[i];
        if (strlen(line)>1){ // if not empty line
            arr[j]=arr[i];
            if (NULL != (ptr=strstr(line," --> "))) { // if timing line
                ptr[17] = '\0'; //strip the text "align:start position:0%"
                l = strlen(line);
                for (m=0; m<=l; m++) {
                    if (line[m]=='.') {
                        line[m]=',';
                    }
                }
            } else { //else subtitle line
                l = strlen(line);
                k = 0;
                erase=0; // boolean: keep or erase text in between <...>
                for (m=0; m<=l; m++){
                    if (line[m]=='<') {erase=1;}
                    else if (line[m]=='>') {erase=0;}
                        else if (!erase) {
                            line[k++] = line[m];
                        }
                }
            }
            //printf("%d:%d:%d: %s\n",i,j, strlen(line), line);
            j++;
        } // else {skip empty line.}
    }
    pos=j;
    //printf("exluding empty lines = %d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(line), arr[i]);} return 0;

    // now we will remove duplicate textlines:
    i=0;
    while ((NULL == strstr(arr[i]," --> ")) && (i<pos)){
        i++;
    };// seek to the first timeline
    line = arr[0] = arr[i]; // pointer to first time line
    i++;
    j=1;
    for (; i<pos; i++) {
        ptr=strstr(arr[i]," --> ");
        erase = (strcmp(arr[i],line)==0); // 0==are equals.
        if (ptr == NULL && erase) { // if this textline and duplicate of previous textline
            continue;
        }
        // now to treat timelines and new textlines:
        if (ptr == NULL) { // if isnt timeline
            line = arr[i]; // its new textline
        }
        arr[j] = arr[i];
        j++;
        //printf("%d:%d: %s\n",i, strlen(line), arr[i]);
    }
    pos = j;
    //printf("without duplicate textlines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;

    // now we will remove duplicate timelines:
    previousline=0; // previous line was 0=textline, 1=timeline
    j=1;
    for (i=1; i<pos; i++) {
        ptr=strstr(arr[i]," --> ");
        if (previousline==1 || ptr==NULL){ // keep this, if previous was textline or this is textline
            arr[j]=arr[i];
            j++;
        } //else { this is second timeline, skip it }
        previousline = (ptr!=NULL)?1:0;
        //printf("%d:%d: %s\n",i, strlen(line), arr[i]);
    }
    pos = j;
    //printf("without duplicate timelines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;
    printf("Subtitles from file %s loaded.\n", filein);
    
    //create .srt file
    if (argc==3) {
        strcpy(fileout, argv[2]);
    } else {
        k=strlen(filein);
        filein[k-3]='s';filein[k-2]='r';filein[k-1]='t';
        printf("Enter output filename (default: %s) : ",filein);
        fgets (fileout, MAXLINELEN, stdin);
        fileout[strlen(fileout) - 1] = '\0';
    }
    k = strlen(fileout);
    if (k==0) {
        strcpy(fileout, filein); // use default output filename.srt
        k=strlen(fileout);
    }
    if (k<=3 || (strstr(&fileout[k-4],".srt") == NULL)) { // if .srt extension omitted
        strcat(fileout,".srt"); //append missing extension
    }
    fh = fopen(fileout, "w"); // create new file
    if (fh==NULL) {
        printf("File creating error. Sending the result to stdout.\n");
        fh=stdout;
    }

    //find first timeline
    i=0;
    while (!strstr(arr[i]," --> ") && (i<pos)) {
        i++;
    }
    j=1;
    while (i<pos){
        ptr = strstr(arr[i]," --> "); // NULL~textline
        if (ptr) { //if timeline
            fprintf(fh,"%d\n%s\n",j,arr[i]);
            j++;
        } else {
            fprintf(fh,"%s\n\n",arr[i]);
        }
        i++;
        //printf("%d:%d: %s\n",i, stlen(arr[i]), arr[i]);
    }
    if (fh!=stdout) {
        fclose(fh);
    }
    printf("Subtitles exported to %s.\n",fileout);
    free(arr);
    free(buffer);
    return 0;
}

相关内容