diff --git a/architecture.md b/architecture.md index 78269a9ac9a4110ccc5780962e8994234cde2d40..3cb6c9933570e1a5036451b5273c2baea13b9078 100644 --- a/architecture.md +++ b/architecture.md @@ -17,7 +17,7 @@ job_uuid: - audio.mkv - video_language.txt - metadata.json - - statefile (new/done/error) + - statefile (new/new_transcribe/done/error) ``` ### audio.mkv @@ -31,6 +31,7 @@ Is used by the cronjob script (step 3). Used by mailservice (step 4). ```json { + "job_type": "subtitle", "email": "example@example.com", "language": "de", "video_language": "de", @@ -40,7 +41,8 @@ Used by mailservice (step 4). ### statefile State is tracked through the following files in the jobs folder: - - new: Job has been submitted by user + - new: Subtitle job has been submitted by user + - new_transcribe: Transcript job has been submitted by user - submitted: Job has been scheduled on gpu cluster - done: Job has been processed without errors - error: Job has been processed with errors \ No newline at end of file diff --git a/forms.py b/forms.py index 8b251e7191fd7066ab4e48ac385106c16225982d..2b80c66a1a6bdc7521bfc065a62ef4b561481e39 100644 --- a/forms.py +++ b/forms.py @@ -2,8 +2,8 @@ import av from flask import current_app from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired, FileSize -from wtforms import StringField, SelectField -from wtforms.validators import InputRequired, Email +from wtforms import StringField, SelectField, IntegerField +from wtforms.validators import InputRequired, Email, Optional from wtforms.validators import ValidationError @@ -18,9 +18,9 @@ def validate_audio(_, field): if not has_audio: raise ValidationError("noaudiotrack") - except av.AVError as e: + except av.FFmpegError as e: current_app.logger.info( - "Error while checking audio of file '{}': {}".format(file.filename, str(e)) + "Error while checking audio of file '{}': {}".format(file.filename, str(e.strerror)) ) raise ValidationError("brokenfile") @@ -41,4 +41,4 @@ class UploadForm(FlaskForm): FileSize(current_app.config["MAX_CONTENT_LENGTH"]), validate_audio, ], - ) + ) \ No newline at end of file diff --git a/localisations.toml b/localisations.toml index 856ea0ef65b46465c97a7baf0d99d4ce9ea91738..8874bf561a102e34e35ee7060a2b2788eff102fd 100644 --- a/localisations.toml +++ b/localisations.toml @@ -109,4 +109,36 @@ en = "Your subtitles" [ mail.content ] de = "Die Untertitel für Ihre Datei '{}' wurden erstellt.\nSie finden das Resultat im Anhang dieser E-Mail." -en = "The subtitles for your file '{}' have been created.\nThe result is attached to this email." \ No newline at end of file +en = "The subtitles for your file '{}' have been created.\nThe result is attached to this email." + +[ transcribe_title ] +de = "PTB Transkriptionsdienst" +en = "PTB Transcription Service" + +[ transcribe_leadtext ] +de = "Dieser Dienst erstellt automatisch Transkripte für Video- oder Audioaufnahmen.<br>Die KI-Verarbeitung läuft im HPC-Cluster der PTB." +en = "This service automatically creates transcripts for video or audio recordings.<br>The AI processing runs in PTB's HPC cluster." + +[ transcribe_successtext] +de = "Daten erfolgreich übermittelt. Ihr Transkript wird demnächst generiert.<br>Sobald der Prozess abgeschlossen ist, erhalten Sie eine E-Mail." +en = "Data transmitted successfully. Your transcript will be generated shortly.<br>Once the process has completed an email will be sent to you." + +[ transcribe_submit ] +de = "Transkript erstellen" +en = "Create transcript" + +[ footer_subtitle ] +de = "Stattdessen Transkript erstellen?" +en = "Create transcript instead?" + +[ footer_transcript ] +de = "Stattdessen Untertitel erstellen?" +en = "Create subtitles instead?" + +[ mail.transcribe_subject ] +de = "Ihr Transkript" +en = "Your transcript" + +[ mail.transcribe_content ] +de = "Das Transkript für Ihre Datei '{}' wurden erstellt.\nSie finden das Resultat im Anhang dieser E-Mail." +en = "The transcript for your file '{}' have been created.\nThe result is attached to this email." \ No newline at end of file diff --git a/mailservice.py b/mailservice.py index bfe1e67bdc429959194df5ba11e66206406b6258..7627a780fafa081b7a296fd9036064905cc849c5 100644 --- a/mailservice.py +++ b/mailservice.py @@ -1,3 +1,4 @@ +import io import json import shutil import smtplib @@ -5,6 +6,7 @@ import tomllib from email.message import EmailMessage from os import scandir from pathlib import Path +import re import requests from requests.auth import HTTPBasicAuth @@ -67,26 +69,66 @@ def main(end): language = metadata["language"] msg = EmailMessage() - msg["Subject"] = localisations["mail"]["subject"][language] - msg["From"] = config["MAIL"]["FROM"] - msg["To"] = metadata["email"] - msg.set_content( - localisations["mail"]["content"][language].format(metadata["filename"]) - ) + if metadata["job_type"] == "subtitle": + msg["Subject"] = localisations["mail"]["subject"][language] + msg["From"] = config["MAIL"]["FROM"] + msg["To"] = metadata["email"] - # filename.language.vtt - if metadata["video_language"] == "None": - filename = Path(metadata["filename"]).with_suffix(".vtt").name - else: - filename = ( - Path(metadata["filename"]) - .with_suffix(".{}.vtt".format(metadata["video_language"])) - .name + msg.set_content( + localisations["mail"]["content"][language].format(metadata["filename"]) ) - with open(Path(job).joinpath("audio.vtt")) as f: - msg.add_attachment(f.read(), filename=filename) + # filename.language.vtt + if metadata["video_language"] == "None": + filename = Path(metadata["filename"]).with_suffix(".vtt").name + else: + filename = ( + Path(metadata["filename"]) + .with_suffix(".{}.vtt".format(metadata["video_language"])) + .name + ) + + with open(Path(job).joinpath("audio.vtt")) as f: + msg.add_attachment(f.read(), filename=filename) + + elif metadata["job_type"] == "transcript": + msg["Subject"] = localisations["mail"]["transcribe_subject"][language] + msg["From"] = config["MAIL"]["FROM"] + msg["To"] = metadata["email"] + + msg.set_content( + localisations["mail"]["transcribe_content"][language].format( + metadata["filename"] + ) + ) + + # filename.txt + filename = Path(metadata["filename"]).with_suffix(".txt").name + + with open(Path(job).joinpath("audio.txt"), encoding="utf-8") as f: + transcript = f.read() + + # reformat transcript + lines = [] + for line in transcript.split("\n"): + match = re.fullmatch(transcript_pattern, line) + + if match: + lines.append(match.groups()) + + transcript = "" + first_line = True + previous_speaker = None + for speaker, line in lines: + if previous_speaker == speaker: + transcript += f" {line}" + else: + previous_speaker = speaker + transcript += f"{'\n' if not first_line else ''}{speaker}: {line}" + first_line = False + + msg.add_attachment(transcript, filename=filename) s.send_message(msg) @@ -146,6 +188,8 @@ if __name__ == "__main__": end = Event() + transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$") + def handler(signum, frame): global end print(signum) diff --git a/requirements.txt b/requirements.txt index 440415088a50fbf522ea853655eb68af3a26dfac..f3b0f22286246c305073b8e9ff514e85beaaadee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -av~=14.0.0 +pyav~=14.0.0 Flask~=3.1.0 Flask-WTF~=1.2.2 wtforms[email]~=3.2.1 diff --git a/routes.py b/routes.py index 472e8804bfd2ecfe7ad7c388067de67bd32ce272..ca38c5a20feb391756eb5afb8f6d08a6607044e8 100644 --- a/routes.py +++ b/routes.py @@ -16,87 +16,79 @@ from flask import ( from forms import UploadForm -@current_app.route("/upload/<string:language>", methods=["GET", "POST"]) -def upload(language: str): +@current_app.route("/subtitle/<string:language>", methods=["GET", "POST"]) +def subtitle(language: str): set_language(language) + request.type = "subtitle" - form = UploadForm() - form.language.choices = [ - (k, v[language]) for k, v in current_app.config["LOCALISATIONS"]["language"]["options"].items() - ] + form = get_form(language) if form.is_submitted(): if not form.email.data.endswith(current_app.config["MAIL_DOMAIN"]): form.email.data += current_app.config["MAIL_DOMAIN"] if form.validate(): + file = form.file.data + video_language = "None" if form.language.data == "auto" else form.language.data + metadata = { + "job_type": "subtitle", + "email": form.email.data, + "language": language, + "video_language": video_language, + "filename": file.filename, + } + return stream_with_context(process_file(file, video_language, metadata, request.type)) + else: + if form.email.data: + form.email.data = form.email.data[: -len(current_app.config["MAIL_DOMAIN"])] - @stream_with_context - def process_file(): - # provide immediate user feedback - yield render_template("success.html") - - # do all processing - file = form.file.data - - folder_name = str(uuid4()) - folder_path = path.join(current_app.config["UPLOAD_FOLDER"], folder_name) - makedirs(folder_path) - - # Only save first audio stream - with av.open(file.stream) as container: - audio_stream = [stream for stream in container.streams if stream.type == "audio"][0] - - with av.open(path.join(folder_path, "audio.mkv"), "w") as out: - out_stream = out.add_stream_from_template(audio_stream) - - for packet in container.demux(audio_stream): - # Skip the "flushing" packets that `demux` generates. - if packet.dts is None: - continue - - # Assign the packet to the new stream. - packet.stream = out_stream - - out.mux(packet) - - video_language = "None" if form.language.data == "auto" else form.language.data + if language != current_app.config["DEFAULT_LANGUAGE"]: + form = language_fix_form(form, language) - metadata = { - "email": form.email.data, - "language": language, - "video_language": video_language, - "filename": file.filename, - } + return render_template("upload.html", form=form) - with open(path.join(folder_path, "metadata.json"), "w") as f: - json.dump(metadata, f) - with open(path.join(folder_path, "video_language.txt"), "w") as f: - f.write(video_language) +@current_app.route("/transcribe/<string:language>", methods=["GET", "POST"]) +def transcribe(language: str): + set_language(language) + request.type = "transcript" - open(path.join(folder_path, "new"), "wb").close() + form = get_form(language) - # yield empty string to close response - yield "" + if form.is_submitted(): + if not form.email.data.endswith(current_app.config["MAIL_DOMAIN"]): + form.email.data += current_app.config["MAIL_DOMAIN"] - return process_file() + if form.validate(): + file = form.file.data + video_language = "None" if form.language.data == "auto" else form.language.data + metadata = { + "job_type": "transcript", + "email": form.email.data, + "language": language, + "video_language": video_language, + "filename": file.filename, + } + return stream_with_context(process_file(file, video_language, metadata, request.type)) else: if form.email.data: form.email.data = form.email.data[: -len(current_app.config["MAIL_DOMAIN"])] if language != current_app.config["DEFAULT_LANGUAGE"]: - form.email.label.text = current_app.config["LOCALISATIONS"]["email"]["label"][language] - form.language.label.text = current_app.config["LOCALISATIONS"]["language"]["label"][language] - form.file.label.text = current_app.config["LOCALISATIONS"]["file"]["label"][language] + form = language_fix_form(form, language) return render_template("upload.html", form=form) @current_app.route("/") -@current_app.route("/upload/") +@current_app.route("/subtitle/") def default(): - return redirect(url_for("upload", language=current_app.config["DEFAULT_LANGUAGE"])) + return redirect(url_for("subtitle", language=current_app.config["DEFAULT_LANGUAGE"])) + + +@current_app.route("/transcribe/") +def default_transcribe(): + return redirect(url_for("transcribe", language=current_app.config["DEFAULT_LANGUAGE"])) def set_language(language): @@ -106,6 +98,65 @@ def set_language(language): request.language = language +def get_form(language): + form = UploadForm() + form.language.choices = [ + (k, v[language]) for k, v in current_app.config["LOCALISATIONS"]["language"]["options"].items() + ] + + return form + + +def language_fix_form(form, language): + form.email.label.text = current_app.config["LOCALISATIONS"]["email"]["label"][language] + form.language.label.text = current_app.config["LOCALISATIONS"]["language"]["label"][language] + form.file.label.text = current_app.config["LOCALISATIONS"]["file"]["label"][language] + + return form + + +def process_file(file, video_language, metadata, type): + # provide immediate user feedback + yield render_template("success.html") + + folder_name = str(uuid4()) + folder_path = path.join(current_app.config["UPLOAD_FOLDER"], folder_name) + makedirs(folder_path) + + # Only save first audio stream + with av.open(file.stream) as container: + audio_stream = [stream for stream in container.streams if stream.type == "audio"][0] + + with av.open(path.join(folder_path, "audio.mkv"), "w") as out: + out_stream = out.add_stream_from_template(audio_stream) + + for packet in container.demux(audio_stream): + # Skip the "flushing" packets that `demux` generates. + if packet.dts is None: + continue + + # Assign the packet to the new stream. + packet.stream = out_stream + + out.mux(packet) + + with open(path.join(folder_path, "metadata.json"), "w") as f: + json.dump(metadata, f) + + with open(path.join(folder_path, "video_language.txt"), "w") as f: + f.write(video_language) + + if type == "subtitle": + open(path.join(folder_path, "new"), "wb").close() + elif type == "transcript": + open(path.join(folder_path, "new_transcribe"), "wb").close() + else: + raise NotImplementedError + + # yield empty string to close response + yield "" + + @current_app.route("/health") def health(): return "", 204 diff --git a/templates/base.html b/templates/base.html index 6d648beb33606a424104ea1a0c5d91e9c1eefd4d..55b97132cff7fbb82c595d3336515959bd7b665b 100644 --- a/templates/base.html +++ b/templates/base.html @@ -44,8 +44,8 @@ <div class="py-5 text-center"> <img class="d-block mx-auto mb-4" src="{{ url_for('static', filename='img/logo.svg') }}" alt="PTB Logo" width="96" height="96"> - <h1 class="h2">{{ config["LOCALISATIONS"]["title"][request.language] }}</h1> - <p class="lead">{% autoescape false %}{{ config["LOCALISATIONS"]["leadtext"][request.language] }}{% endautoescape %}</p> + <h1 class="h2">{% block title %}{% endblock %}</h1> + <p class="lead">{% block leadtext %}{% endblock %}</p> </div> {% block content %}{% endblock %} </main> @@ -61,6 +61,16 @@ {% endfor %} </p> <br> + <p class="mb-1"> + {% if request.type == "subtitle" -%} + <a class="link-secondary" href="{{ url_for("transcribe", language=request.language) }}">{{ config["LOCALISATIONS"]["footer_subtitle"][request.language] }}</a> + {% endif %} + {% if request.type == "transcript" -%} + <a class="link-secondary" href="{{ url_for("subtitle", language=request.language) }}">{{ config["LOCALISATIONS"]["footer_transcript"][request.language] }}</a> + {% endif %} + + </p> + <br> <p class="mb-1">{{ config["LOCALISATIONS"]["contact"]["text"][request.language] }}:</p> <p class="mb-1">{{ config["CONTACT"]["ORG"] }} <a class="link-secondary" href="mailto:{{ config["CONTACT"]["MAIL"] }}">{{ config["CONTACT"]["NAME"] }}</a></p> <p class="font-monospace"><a class="link-secondary" target="_blank" referrerpolicy="no-referrer" href="https://github.com/m-bain/whisperX">WhisperX</a><br> diff --git a/templates/success.html b/templates/success.html index 9d58ebe877e670e0f84eb70ce3859a7f4c9758af..4c2b4d64568f0197db034ee04f8786fac7eca636 100644 --- a/templates/success.html +++ b/templates/success.html @@ -5,8 +5,22 @@ <img class="mx-auto mb-4 spinner processing transition-opacity" src="{{ url_for('static', filename='img/arrow-repeat.svg') }}" alt="Spinning arrow" height="136" aria-hidden="true"> <img class="mx-auto mb-4 success hidden transition-opacity" src="{{ url_for('static', filename='img/cloud-check.svg') }}" alt="Cloud with checkmark" height="136" aria-hidden="true" hidden> <p class="pt-4 lead processing transition-opacity">{% autoescape false %}{{ config["LOCALISATIONS"]["processingtext"][request.language] }}{% endautoescape %}</p> - <p class="pt-4 lead success hidden transition-opacity" hidden>{% autoescape false %}{{ config["LOCALISATIONS"]["successtext"][request.language] }}{% endautoescape %}</p> - <a class="pt-4 fs-1 lead success hidden transition-opacity icon-link icon-link-hover link-offset-2" style="--bs-icon-link-transform: translate3d(-.125rem, 0, 0);" href="{{ url_for('upload', language=request.language) }}"> + <p class="pt-4 lead success hidden transition-opacity" hidden> + {% if request.type == "subtitle" -%} + {% autoescape false %}{{ config["LOCALISATIONS"]["successtext"][request.language] }}{% endautoescape %} + {% endif %} + {% if request.type == "transcript" -%} + {% autoescape false %}{{ config["LOCALISATIONS"]["transcribe_successtext"][request.language] }}{% endautoescape %} + {% endif %} + + </p> + <a class="pt-4 fs-1 lead success hidden transition-opacity icon-link icon-link-hover link-offset-2" style="--bs-icon-link-transform: translate3d(-.125rem, 0, 0);" href=" + {% if request.type == "subtitle" -%} + {{ url_for('subtitle', language=request.language) }} + {% endif %} + {% if request.type == "transcript" -%} + {{ url_for('transcribe', language=request.language) }} + {% endif %}"> <img class="bi" aria-label='{{ config["LOCALISATIONS"]["return"][request.language] }}' src="{{ url_for('static', filename='img/arrow-left.svg') }}" alt="Return arrow" height="4em"> </a> </div> diff --git a/templates/upload.html b/templates/upload.html index f123f9de3266dcd9bed99694e887b47b38c07d6e..68bc5a65398d13365df9ea7f9bd356a034cfc345 100644 --- a/templates/upload.html +++ b/templates/upload.html @@ -1,5 +1,23 @@ {% extends "base.html" %} +{% block title %} + {% if request.type == "subtitle" -%} + {{ config["LOCALISATIONS"]["title"][request.language] }} + {% endif %} + {% if request.type == "transcript" -%} + {{ config["LOCALISATIONS"]["transcribe_title"][request.language] }} + {% endif %} +{% endblock %} + +{% block leadtext %} + {% if request.type == "subtitle" -%} + {% autoescape false %}{{ config["LOCALISATIONS"]["leadtext"][request.language] }}{% endautoescape %} + {% endif %} + {% if request.type == "transcript" -%} + {% autoescape false %}{{ config["LOCALISATIONS"]["transcribe_leadtext"][request.language] }}{% endautoescape %} + {% endif %} +{% endblock %} + {% block content %} <div> <form id="form" class="transition-opacity" method="post" enctype="multipart/form-data"> @@ -81,7 +99,14 @@ </div> <div class="row justify-content-center"> <div class="col-sm-9 col-md-7 pt-4"> - <button class="w-100 btn btn-dark btn-lg" type="submit">{{ config["LOCALISATIONS"]["submit"][request.language] }}</button> + <button class="w-100 btn btn-dark btn-lg" type="submit"> + {% if request.type == "subtitle" -%} + {{ config["LOCALISATIONS"]["submit"][request.language] }} + {% endif %} + {% if request.type == "transcript" -%} + {{ config["LOCALISATIONS"]["transcribe_submit"][request.language] }} + {% endif %} + </button> </div> </div> </form>