Administrator
发布于 2025-02-01 / 3 阅读 / 0 评论 / 0 点赞

mac 语音转文字

import tkinter as tk
import webbrowser
from tkinter import ttk, filedialog, messagebox, scrolledtext
import whisper
import os
import subprocess
from pydub import AudioSegment
import threading
import platform
import torch
import sys
from datetime import timedelta
import ssl


# 创建一个自定义的 StreamToWidget 类来重定向输出
class StreamToWidget:
    def __init__(self, widget):
        self.widget = widget

    def write(self, text):
        self.widget.insert('end', text)
        self.widget.see('end')  # 自动滚动到最新内容

    def flush(self):
        pass


class AudioTranscriber:
    def __init__(self, root):
        self.root = root
        self.root.title("音频转文字工具")
        self.root.geometry("800x700")

        # 禁用证书验证
        ssl._create_default_https_context = ssl._create_unverified_context

        # 检查系统要求
        if not self.check_requirements():
            return

        # 创建主框架
        main_frame = ttk.Frame(root, padding="10")
        main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))

        # GPU 状态显示
        gpu_status = "GPU可用" if torch.cuda.is_available() else "使用CPU处理"
        gpu_frame = ttk.Frame(main_frame)
        gpu_frame.grid(row=0, column=0, columnspan=2, sticky=tk.W)
        ttk.Label(gpu_frame, text=f"处理器状态: {gpu_status}").pack(side=tk.LEFT)
        if not torch.cuda.is_available():
            ttk.Label(gpu_frame, text=" (处理速度可能较慢)", foreground="red").pack(side=tk.LEFT)

        # 输入文件部分
        ttk.Label(main_frame, text="输入文件:").grid(row=1, column=0, sticky=tk.W, pady=5)
        self.input_path = tk.StringVar()
        input_entry = ttk.Entry(main_frame, textvariable=self.input_path, width=50)
        input_entry.grid(row=2, column=0, padx=5)
        ttk.Button(main_frame, text="选择文件", command=self.browse_input).grid(row=2, column=1)

        # 输出目录部分
        ttk.Label(main_frame, text="输出目录:").grid(row=3, column=0, sticky=tk.W, pady=5)
        self.output_path = tk.StringVar()
        self.output_path.set(os.path.expanduser("~/Desktop"))  # 默认设置为桌面
        output_entry = ttk.Entry(main_frame, textvariable=self.output_path, width=50)
        output_entry.grid(row=4, column=0, padx=5)
        ttk.Button(main_frame, text="选择目录", command=self.browse_output).grid(row=4, column=1)

        # 输出格式选择
        ttk.Label(main_frame, text="输出格式:").grid(row=5, column=0, sticky=tk.W, pady=5)
        format_frame = ttk.Frame(main_frame)
        format_frame.grid(row=6, column=0, sticky=tk.W, padx=5)
        self.output_format = tk.StringVar(value="text_only")
        ttk.Radiobutton(format_frame, text="仅文本内容",
                        value="text_only", variable=self.output_format).pack(side=tk.LEFT)
        ttk.Radiobutton(format_frame, text="包含时间戳",
                        value="with_timestamp", variable=self.output_format).pack(side=tk.LEFT)

        # 模型选择
        ttk.Label(main_frame, text="识别模型:").grid(row=7, column=0, sticky=tk.W, pady=5)
        self.model_var = tk.StringVar(value="medium")
        models = [
            ("tiny - 最快,精度最低", "tiny"),
            ("base - 较快,精度一般", "base"),
            ("small - 平衡,推荐", "small"),
            ("medium - 较慢,精度较高", "medium"),
            ("large - 最慢,精度最高", "large")
        ]
        model_frame = ttk.Frame(main_frame)
        model_frame.grid(row=8, column=0, sticky=tk.W, padx=5)
        for i, (text, value) in enumerate(models):
            ttk.Radiobutton(model_frame, text=text, value=value,
                            variable=self.model_var).grid(row=i, column=0, sticky=tk.W)

        # 日志输出区域
        ttk.Label(main_frame, text="处理日志:").grid(row=9, column=0, sticky=tk.W, pady=5)
        self.log_widget = scrolledtext.ScrolledText(main_frame, height=10, width=80)
        self.log_widget.grid(row=10, column=0, columnspan=2, pady=5)

        # 重定向标准输出到日志窗口
        sys.stdout = StreamToWidget(self.log_widget)
        sys.stderr = StreamToWidget(self.log_widget)

        # 进度条
        self.progress_var = tk.DoubleVar()
        self.progress = ttk.Progressbar(main_frame, length=400, mode='indeterminate',
                                        variable=self.progress_var)
        self.progress.grid(row=11, column=0, columnspan=2, pady=10)

        # 状态标签
        self.status_var = tk.StringVar(value="就绪")
        self.status_label = ttk.Label(main_frame, textvariable=self.status_var)
        self.status_label.grid(row=12, column=0, columnspan=2)

        # 转录按钮
        self.transcribe_btn = ttk.Button(main_frame, text="开始转录",
                                         command=self.start_transcription)
        self.transcribe_btn.grid(row=13, column=0, columnspan=2, pady=10)

    def log(self, message):
        """添加日志到日志窗口"""
        self.log_widget.insert('end', message + '\n')
        self.log_widget.see('end')
        self.root.update()

    def format_timestamp(self, seconds):
        """将秒数转换为时间戳格式"""
        return str(timedelta(seconds=round(seconds)))

    def save_transcription(self, result, output_file):
        """保存转录结果"""
        with open(output_file, 'w', encoding='utf-8') as f:
            if self.output_format.get() == "with_timestamp":
                # 保存带时间戳的格式
                for segment in result["segments"]:
                    start_time = self.format_timestamp(segment["start"])
                    end_time = self.format_timestamp(segment["end"])
                    f.write(f'[{start_time} --> {end_time}] {segment["text"]}\n')
            else:
                # 仅保存文本内容
                f.write(result["text"])

    def check_requirements(self):
        """检查系统要求"""
        # 检查 FFmpeg
        ffmpeg_paths = [
            'ffmpeg',
            '/opt/homebrew/bin/ffmpeg',
            '/usr/local/bin/ffmpeg',
        ]

        ffmpeg_found = False
        for ffmpeg_path in ffmpeg_paths:
            try:
                subprocess.run([ffmpeg_path, '-version'], capture_output=True)
                if ffmpeg_path != 'ffmpeg':
                    os.environ['PATH'] = f"{os.path.dirname(ffmpeg_path)}:{os.environ['PATH']}"
                ffmpeg_found = True
                break
            except FileNotFoundError:
                continue

        if not ffmpeg_found:
            response = messagebox.askquestion("系统要求",
                                              "需要安装 FFmpeg 才能继续。\n\n" +
                                              "您想现在安装 FFmpeg 吗?\n" +
                                              "1. 首先需要安装 Homebrew\n" +
                                              "2. 然后用 Homebrew 安装 FFmpeg",
                                              icon='warning')

            if response == 'yes':
                # 打开 Homebrew 官网
                webbrowser.open('https://brew.sh/index_zh-cn')
                messagebox.showinfo("安装说明",
                                    "1. 请先安装 Homebrew\n" +
                                    "2. 安装完成后,打开终端运行:\n" +
                                    "   brew install ffmpeg\n" +
                                    "3. 安装完成后重启本程序")
                self.root.quit()
                return False
            else:
                self.root.quit()
                return False
        return True

    def browse_input(self):
        filetypes = (
            ('音频/视频文件', '*.mp3 *.wav *.m4a *.mp4 *.avi *.mov *.wmv'),
            ('所有文件', '*.*')
        )
        filename = filedialog.askopenfilename(filetypes=filetypes)
        if filename:
            self.input_path.set(filename)

    def browse_output(self):
        directory = filedialog.askdirectory()
        if directory:
            self.output_path.set(directory)

    def convert_to_mp3(self, input_file):
        self.status_var.set("正在转换为MP3格式...")
        self.log(f"开始转换文件: {input_file}")

        file_name = os.path.splitext(os.path.basename(input_file))[0]
        output_mp3 = os.path.join(os.path.dirname(input_file), f"{file_name}_converted.mp3")

        # 检查是否为视频文件
        video_extensions = ['.mp4', '.avi', '.mov', '.wmv']
        if any(input_file.lower().endswith(ext) for ext in video_extensions):
            self.log("检测到视频文件,正在提取音频...")
            command = [
                'ffmpeg', '-i', input_file,
                '-vn',
                '-acodec', 'libmp3lame',
                '-ab', '192k',
                '-ar', '44100',
                '-y',
                output_mp3
            ]
            try:
                subprocess.run(command, check=True, capture_output=True)
                self.log("音频提取完成")
                return output_mp3
            except subprocess.CalledProcessError as e:
                raise Exception(f"视频转换MP3出错: {e.stderr.decode()}")

        # 使用pydub转换音频文件
        try:
            self.log("正在转换音频格式...")
            audio = AudioSegment.from_file(input_file)
            audio.export(output_mp3, format="mp3")
            self.log("格式转换完成")
            return output_mp3
        except Exception as e:
            raise Exception(f"音频转换MP3出错: {str(e)}")

    def start_transcription(self):
        if not self.input_path.get():
            messagebox.showerror("错误", "请选择输入文件")
            return
        if not self.output_path.get():
            messagebox.showerror("错误", "请选择输出目录")
            return

        self.transcribe_btn.state(['disabled'])
        self.progress.start()
        self.log_widget.delete(1.0, tk.END)  # 清空日志

        # 在单独的线程中启动转录
        thread = threading.Thread(target=self.transcribe)
        thread.daemon = True
        thread.start()

    def transcribe(self):
        try:
            input_file = self.input_path.get()
            output_dir = self.output_path.get()

            # 如果需要,转换为MP3
            if not input_file.lower().endswith('.mp3'):
                self.status_var.set("正在转换文件为MP3格式...")
                input_file = self.convert_to_mp3(input_file)

            # 加载Whisper模型
            self.status_var.set("正在加载识别模型...")
            self.log(f"正在加载 {self.model_var.get()} 模型...")
            model = whisper.load_model(self.model_var.get())

            # 转录音频
            self.status_var.set("正在转录音频...")
            self.log("开始转录音频...")
            result = model.transcribe(input_file)

            # 保存转录结果
            output_file = os.path.join(output_dir,
                                       f"{os.path.splitext(os.path.basename(input_file))[0]}_转录结果.txt")
            self.log("正在保存转录结果...")
            self.save_transcription(result, output_file)

            self.status_var.set("转录完成!")
            self.log("转录完成!")
            messagebox.showinfo("成功", f"转录完成!\n结果保存在:{output_file}")

        except Exception as e:
            self.status_var.set("发生错误")
            self.log(f"错误: {str(e)}")
            messagebox.showerror("错误", str(e))

        finally:
            self.progress.stop()
            self.transcribe_btn.state(['!disabled'])
            self.root.update()


if __name__ == "__main__":
    root = tk.Tk()
    app = AudioTranscriber(root)
    root.mainloop()

评论