import { writeFile, rm, mkdtemp } from "node:fs/promises" import { join } from "node:path" import { tmpdir } from "node:os" import { execFile } from "node:child_process" import { promisify } from "node:util" const execFileAsync = promisify(execFile) export interface TranscribeConfig { ffmpeg?: string whisperCli?: string model?: string threads?: number } export function createTranscribeHandler(config?: TranscribeConfig) { const ffmpeg = config?.ffmpeg ?? "/opt/homebrew/bin/ffmpeg" const whisperCli = config?.whisperCli ?? "/opt/homebrew/bin/whisper-cli" const model = config?.model ?? "/opt/homebrew/share/whisper-cpp/models/ggml-base.en.bin" const threads = config?.threads ?? 4 return async (request: Request): Promise => { const headers = { "Content-Type": "application/json" } let tmpDir = "" try { const formData = await request.formData() const file = formData.get("audio") as File | null if (!file) { return new Response( JSON.stringify({ error: "audio file required" }), { status: 400, headers }, ) } tmpDir = await mkdtemp(join(tmpdir(), "whisper-")) const inputPath = join(tmpDir, "input.webm") const wavPath = join(tmpDir, "input.wav") const buf = Buffer.from(await file.arrayBuffer()) await writeFile(inputPath, buf) await execFileAsync( ffmpeg, ["-i", inputPath, "-ar", "16000", "-ac", "1", "-f", "wav", wavPath], { timeout: 15000 }, ) const { stdout } = await execFileAsync( whisperCli, ["-m", model, "-f", wavPath, "--no-timestamps", "--no-prints", "-t", String(threads)], { timeout: 30000 }, ) return new Response(JSON.stringify({ text: stdout.trim() }), { headers }) } catch (err: any) { const is4xx = err.message?.includes("Content-Type") || err.message?.includes("audio file"); return new Response( JSON.stringify({ error: err.message }), { status: is4xx ? 400 : 500, headers }, ) } finally { if (tmpDir) { try { await rm(tmpDir, { recursive: true }) } catch {} } } } }