Skip to content
Snippets Groups Projects
Commit 1c021a2e authored by Jan Hartig's avatar Jan Hartig
Browse files

Initial commit

parents
Branches
No related tags found
No related merge requests found
# Default ignored files
/shelf/
/workspace.xml
FROM ghcr.io/opennmt/ctranslate2:latest-ubuntu20.04-cuda11.2
# install faster-whisper
RUN python3 -m pip --no-cache-dir install faster-whisper
# preload model
RUN python3 -c "import faster_whisper; faster_whisper.download_model('large-v2')"
COPY main.py .
VOLUME "/output/"
VOLUME "/input"
ENTRYPOINT ["python3", "main.py"]
\ No newline at end of file
Usage:
```shell
docker build -t whisper-transcriber .
docker run --rm -it \
-v <inputfile>:/input/audiofile:ro \
-v <output_dir>:/output whisper-transcriber
```
\ No newline at end of file
main.py 0 → 100644
from faster_whisper import WhisperModel
from datetime import timedelta
model_size = "large-v2"
# Run on GPU with FP32
model = WhisperModel(model_size, device="cuda", compute_type="float32")
# Run on CPU
# model = WhisperModel(model_size, device="cpu", compute_type="float32")
segments, info = model.transcribe("/input/audiofile", language="de", beam_size=5)
with open("/output/transcript.vtt", "w", encoding="utf-8") as f:
f.write("WEBVTT\n\nNOTE This transcript was automatically generated.")
for segment in segments:
start = timedelta(seconds=segment.start)
end = timedelta(seconds=segment.end)
text = segment.text[1:]
entry = "{} --> {}\n{}".format(start, end, text)
print(entry)
f.write("\n\n")
f.write(entry)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment