python将vtt文件转换成lrc文件
·
平常需要用到这个功能,于是自己写了个
2025.12.15
更新,之前的一堆bug
import os
import re
def path_to_original(vtt_file_path=""):
'''
从可能包含多层扩展名的文件路径中提取原始文件名。
会递归移除指定的媒体文件扩展名(.mp3, .wav, .vtt等)。
:param vtt_file_path: /home/direct.ory/file.wav.vtt 转换成 /home/direct.ory/file
:return: /home/direct.ory/file
'''
dic_name = os.path.dirname(vtt_file_path)
file_name = os.path.basename(vtt_file_path)
# 把文件名分成真实名称和扩展名
real_file_name, ext_name = os.path.splitext(file_name)
not_allow_ext = ['.mp3', '.wav', '.vtt']
# 如果扩展名还包含在真实名称里,继续分离
while ext_name and ext_name in not_allow_ext:
real_file_name, ext_name = os.path.splitext(real_file_name)
if ext_name not in not_allow_ext:
real_file_name = real_file_name + ext_name
# 返回无后缀名的路径
return os.path.join(dic_name, real_file_name)
# 处理时间格式
def deal_time(timeStamp=''):
# if len(timeStamp) < 1:
# return None
#
# startTime = timeStamp.split("-->")[0].strip()
# times = startTime.split(":")
# times[-1] = times[-1][0: 5]
# # print(startTime)
# # print(times)
# ans = "[{}:{}]".format(times[1], times[2])
# # print(ans)
# return ans
if not timeStamp:
return None
# 使用正则表达式匹配时间格式
time_match = re.match(r'(\d{2}):(\d{2}):(\d{2})\.(\d{3})', timeStamp.split("-->")[0].strip())
if time_match:
minutes, seconds = time_match.group(2), time_match.group(3)
# 将毫秒转换为百分之一秒(LRC格式)
centiseconds = str(round(int(time_match.group(4)) / 10)).zfill(2)
return f"[{minutes}:{seconds}.{centiseconds}]"
return None
# 进行转换
def convert_vtt_to_lrc(vtt_file_path):
lines = []
# 打开vtt文件进行读取
with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
# 读取所有行
lines = vtt_file.readlines()
# 创建对应的lrc文件名
lrc_file_path = path_to_original(vtt_file_path) + '.lrc'
print(lrc_file_path)
# lrc_file_path = "{}{}".format(lrc_file_path.split(".")[0], ".lrc")
# print(lrc_file_path)
# return
timeStamp = ''
# 标记句子的开始和结束
contents = []
# 打开lrc文件进行写入
for line in lines:
if line.strip().isnumeric():
# 上一句添加换行
if contents: # 确保列表不为空
contents[-1] = contents[-1] + "\n"
continue
# 如果行以时间戳格式开始,则跳过(vtt文件中的时间戳行不需要复制到lrc文件)
if line.strip().startswith(('00:', '01:', '02:', '03:', '04:', '05:', '06:', '07:', '08:', '09:')):
timeStamp = deal_time(line.strip())
# print(timeStamp)
continue
# line = line.replace("\n", "")
# 将其他行写入lrc文件
contents.append(timeStamp + line)
# print(timeStamp + line)
timeStamp = ""
with open(lrc_file_path, 'w', encoding='utf-8') as lrc_file:
lrc_file.writelines(contents)
# remove_blank_lines(vtt_file_path)
def convert_vtt_files_to_lrc(directory):
# 获取目录下的所有文件和文件夹
for root, dirs, files in os.walk(directory):
for file in files:
# 拼接文件的完整路径
file_path = os.path.join(root, file)
# 如果文件是vtt文件,则进行转换
if file.endswith('.vtt'):
print(f"Converting {file} to LRC...")
convert_vtt_to_lrc(file_path)
# elif file.endswith('.lrc'):
# cut(file_path)
# 测试程序
if __name__ == "__main__":
# 输入要处理的目录路径
directory_path = input("请输入要处理的目录路径:")
# 调用函数将vtt文件转换为lrc文件
convert_vtt_files_to_lrc(directory_path)
print("Conversion completed!")
更多推荐
所有评论(0)