我想从 youtube 下载生成的字幕并使用命令将其转换为 srt 格式,youtube-dl --write-auto-sub --sub-lang en --convert-subs=srt --skip-download <URL>
它只输出 .vtt 文件而不会将其转换为 srt。
但是当--skip-download
被删除时,它会下载视频,vtt 文件,然后最终将 vtt 转换为 srt。
有没有办法只下载 vtt 文件,然后将其转换为 srt 而不下载视频?
答案1
原因可能是在此过程中后台使用的 ffmpeg 不直接处理“ffmpeg.exe -i path.vtt path.vtt.srt”之类的重新编码请求。它需要一个文件流,为此它需要一个视频文件。
至少,这样的请求对我来说不起作用,并且报告没有流,尽管互联网上有类似的例子。
答案2
我制作了一个非常简单的命令行工具来将 vtt 转换为 srt,它仅通过非常简单的文本处理(不使用词汇分析或标记)工作,并且它仅适用于 Youtube 的自动转录字幕,对于这样的文本:
WEBVTT 类型:字幕 语言:en
00:00:01.740 --> 00:00:05.030 对齐:起始位置:0%
没有什么<00:00:02.700>是<00:00:03.179>不可能的......
您可以在此处下载 ZIP 文件:http://pececko.szm.com/@ VTT to SRT
。我不打算更新它,所以请将此代码视为公共领域。我希望这对某些人有用。
// C program to convert subtitles from VTT file to SRT file
// public domain
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXLINELEN 32767
int main(int argc,char** argv)
{
FILE* fh;
char filein[MAXLINELEN]; // = "test.vtt"
char fileout[MAXLINELEN]; // = "test.srt"
int i=0, j=0, k=0, l=0, m=0, pos=0;
//float r=0;
char **arr; // array of pointers to lines loaded to memory from file
char *buffer, *line, *ptr;
long numbytes;
int erase=0, previousline=0; // booleans
fputs("Convert Youtube's Autotranscribed VTT to SRT [build 2023-06-22]\nConverts .vtt files from youtube to .srt files.\nUsage: vtttosrt.exe [sourcesubtitles.vtt] [targetsubtitles.srt]\nPress Ctrl+C to abort.\n", stdout);
if (argc>1){
strcpy(filein, argv[1]);
} else {
printf("Enter input filename (*.vtt) :");
fgets (filein, MAXLINELEN, stdin);
filein[strlen(filein) - 1] = '\0';
}
k = strlen(filein);
if (!k) {
return 0;
}
if (k<=3 || (strstr(&filein[k-4],".vtt") == NULL)) {
strcat(filein,".vtt"); //append missing extension
}
fh = fopen(filein, "r"); // Opening file in reading mode
if (fh==NULL) {
printf("File %s not found.\n",filein);
return 1;
}
//printf("Loading file %s\n", filein);
fseek(fh, 0L, SEEK_END);
numbytes = ftell(fh); // Get the number of bytes
fseek(fh, 0L, SEEK_SET);
buffer = (char*)calloc(numbytes, sizeof(char));
if(buffer == NULL) {
fclose(fh);
return 1;
}
fread(buffer, sizeof(char), numbytes, fh);
fclose(fh);
//printf("filesize is %d Bytes:\n%s\n",numbytes,buffer);
arr = (char**)calloc(numbytes/4, sizeof(char));
if(arr == NULL) {
free(buffer);
return 1;
}
pos=1;
arr[0] = &buffer[0];
for (i=0; i<numbytes; i++) {
if (buffer[i]=='\n') {
buffer[i]='\0';
arr[pos++]=&buffer[i+1];
}
}
//printf("number of lines in file = %d\n",pos); return 0;
//now we will remove <timestamps> and align+position
j=0;
for (i=0; i<pos; i++) {
line=arr[i];
if (strlen(line)>1){ // if not empty line
arr[j]=arr[i];
if (NULL != (ptr=strstr(line," --> "))) { // if timing line
ptr[17] = '\0'; //strip the text "align:start position:0%"
l = strlen(line);
for (m=0; m<=l; m++) {
if (line[m]=='.') {
line[m]=',';
}
}
} else { //else subtitle line
l = strlen(line);
k = 0;
erase=0; // boolean: keep or erase text in between <...>
for (m=0; m<=l; m++){
if (line[m]=='<') {erase=1;}
else if (line[m]=='>') {erase=0;}
else if (!erase) {
line[k++] = line[m];
}
}
}
//printf("%d:%d:%d: %s\n",i,j, strlen(line), line);
j++;
} // else {skip empty line.}
}
pos=j;
//printf("exluding empty lines = %d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(line), arr[i]);} return 0;
// now we will remove duplicate textlines:
i=0;
while ((NULL == strstr(arr[i]," --> ")) && (i<pos)){
i++;
};// seek to the first timeline
line = arr[0] = arr[i]; // pointer to first time line
i++;
j=1;
for (; i<pos; i++) {
ptr=strstr(arr[i]," --> ");
erase = (strcmp(arr[i],line)==0); // 0==are equals.
if (ptr == NULL && erase) { // if this textline and duplicate of previous textline
continue;
}
// now to treat timelines and new textlines:
if (ptr == NULL) { // if isnt timeline
line = arr[i]; // its new textline
}
arr[j] = arr[i];
j++;
//printf("%d:%d: %s\n",i, strlen(line), arr[i]);
}
pos = j;
//printf("without duplicate textlines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;
// now we will remove duplicate timelines:
previousline=0; // previous line was 0=textline, 1=timeline
j=1;
for (i=1; i<pos; i++) {
ptr=strstr(arr[i]," --> ");
if (previousline==1 || ptr==NULL){ // keep this, if previous was textline or this is textline
arr[j]=arr[i];
j++;
} //else { this is second timeline, skip it }
previousline = (ptr!=NULL)?1:0;
//printf("%d:%d: %s\n",i, strlen(line), arr[i]);
}
pos = j;
//printf("without duplicate timelines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;
printf("Subtitles from file %s loaded.\n", filein);
//create .srt file
if (argc==3) {
strcpy(fileout, argv[2]);
} else {
k=strlen(filein);
filein[k-3]='s';filein[k-2]='r';filein[k-1]='t';
printf("Enter output filename (default: %s) : ",filein);
fgets (fileout, MAXLINELEN, stdin);
fileout[strlen(fileout) - 1] = '\0';
}
k = strlen(fileout);
if (k==0) {
strcpy(fileout, filein); // use default output filename.srt
k=strlen(fileout);
}
if (k<=3 || (strstr(&fileout[k-4],".srt") == NULL)) { // if .srt extension omitted
strcat(fileout,".srt"); //append missing extension
}
fh = fopen(fileout, "w"); // create new file
if (fh==NULL) {
printf("File creating error. Sending the result to stdout.\n");
fh=stdout;
}
//find first timeline
i=0;
while (!strstr(arr[i]," --> ") && (i<pos)) {
i++;
}
j=1;
while (i<pos){
ptr = strstr(arr[i]," --> "); // NULL~textline
if (ptr) { //if timeline
fprintf(fh,"%d\n%s\n",j,arr[i]);
j++;
} else {
fprintf(fh,"%s\n\n",arr[i]);
}
i++;
//printf("%d:%d: %s\n",i, stlen(arr[i]), arr[i]);
}
if (fh!=stdout) {
fclose(fh);
}
printf("Subtitles exported to %s.\n",fileout);
free(arr);
free(buffer);
return 0;
}