Initial commit

1c021a2e · Jan Hartig · 1c021a2e · 1c021a2e · 1c021a2e · 1c021a2e
Commit 1c021a2e authored 2 years ago by Jan Hartig
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/Dockerfile
+++ b/Dockerfile
+FROM ghcr.io/opennmt/ctranslate2:latest-ubuntu20.04-cuda11.2
+
+# install faster-whisper
+RUN python3 -m pip --no-cache-dir install faster-whisper
+
+# preload model
+RUN python3 -c "import faster_whisper; faster_whisper.download_model('large-v2')"
+
+COPY main.py .
+
+VOLUME "/output/"
+VOLUME "/input"
+
+ENTRYPOINT ["python3", "main.py"]
\ No newline at end of file
--- a/README.md
+++ b/README.md
+Usage:
+
+```shell
+docker build -t whisper-transcriber .
+
+docker run --rm -it \
+           -v <inputfile>:/input/audiofile:ro \
+           -v <output_dir>:/output whisper-transcriber
+```
\ No newline at end of file
--- a/main.py
+++ b/main.py
+from faster_whisper import WhisperModel
+from datetime import timedelta
+
+model_size = "large-v2"
+
+# Run on GPU with FP32
+model = WhisperModel(model_size, device="cuda", compute_type="float32")
+
+# Run on CPU
+# model = WhisperModel(model_size, device="cpu", compute_type="float32")
+
+segments, info = model.transcribe("/input/audiofile", language="de", beam_size=5)
+
+with open("/output/transcript.vtt", "w", encoding="utf-8") as f:
+    f.write("WEBVTT\n\nNOTE This transcript was automatically generated.")
+
+    for segment in segments:
+        start = timedelta(seconds=segment.start)
+        end = timedelta(seconds=segment.end)
+        text = segment.text[1:]
+
+        entry = "{} --> {}\n{}".format(start, end, text)
+
+        print(entry)
+
+        f.write("\n\n")
+        f.write(entry)