Use language from "/input/video_language.txt"

180c5dbc · Jan Hartig · 6b9f1ab2 · 180c5dbc
Commit 180c5dbc authored 1 year ago by Jan Hartig
--- a/main.py
+++ b/main.py
@@ -29,13 +29,19 @@ print("Using {} {} capable GPUs".format(len(capableGPUs), compute_type))
 tStart = perf_counter()

 # Run on GPU with FP32
-model = WhisperModel(model_size, device="cuda", device_index=capableGPUs, compute_type="float32", local_files_only=True)
+model = WhisperModel(
+    model_size,
+    device="cuda",
+    device_index=capableGPUs,
+    compute_type="float32",
+    local_files_only=True,
+)
 print("Model initialized")

-# Run on CPU
-# model = WhisperModel(model_size, device="cpu", compute_type="float32")
+with open("/input/video_language.txt") as f:
+    language = f.read()

-segments, info = model.transcribe("/input/audiofile", language="de", beam_size=5)
+segments, info = model.transcribe("/input/audio.mkv", language=language, beam_size=5)

 with open("/output/subtitles.vtt", "w", encoding="utf-8") as f:
    f.write("WEBVTT\n\nNOTE This transcript was automatically generated.")
@@ -52,7 +58,11 @@ with open("/output/subtitles.vtt", "w", encoding="utf-8") as f:
            endH, endM = divmod(endM, 60)

            # write cue & text
-            f.write("\n\n{:02.0f}:{:02.0f}:{:06.3f} --> {:02.0f}:{:02.0f}:{:06.3f}\n".format(startH, startM, startS, endH, endM, endS))
+            f.write(
+                "\n\n{:02.0f}:{:02.0f}:{:06.3f} --> {:02.0f}:{:02.0f}:{:06.3f}\n".format(
+                    startH, startM, startS, endH, endM, endS
+                )
+            )
            f.write(segment.text[1:])

            # update progressbar
@@ -66,4 +76,8 @@ tDeltaM, tDeltaS = divmod(tDelta, 60)

 durationM, durationS = divmod(info.duration, 60)

-print("Processed {:02.0f}m {:02.0f}s audio in {:02.0f}m {:02.0f}s".format(durationM, durationS, tDeltaM, tDeltaS))
+print(
+    "Processed {:02.0f}m {:02.0f}s audio in {:02.0f}m {:02.0f}s".format(
+        durationM, durationS, tDeltaM, tDeltaS
+    )
+)