From d14f0200d954e16b4a09b861baa053ec6822abfe Mon Sep 17 00:00:00 2001 From: Jan Hartig <jan.hartig@ptb.de> Date: Tue, 15 Apr 2025 17:54:54 +0200 Subject: [PATCH] mailservice: Add transcript support --- fix_transcript.py | 29 ------------------ localisations.toml | 8 +++++ mailservice.py | 76 ++++++++++++++++++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 45 deletions(-) delete mode 100644 fix_transcript.py diff --git a/fix_transcript.py b/fix_transcript.py deleted file mode 100644 index 1848391..0000000 --- a/fix_transcript.py +++ /dev/null @@ -1,29 +0,0 @@ -import re - -transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$") - -with open(r"transcript.txt", encoding="utf-8") as f: - transcript = f.read() - -lines = [] -for line in transcript.split("\n"): - match = re.fullmatch(transcript_pattern, line) - - if match: - lines.append(match.groups()) - -output = "" -first_line = True -previous_speaker = None -for speaker, line in lines: - if previous_speaker == speaker: - output += f" {line}" - else: - previous_speaker = speaker - output += f"{'\n' if not first_line else ''}{speaker}: {line}" - first_line = False - -print(output) - -with open(r"transcript_fixed.txt", "w", encoding="utf-8") as f: - f.write(output) diff --git a/localisations.toml b/localisations.toml index e9f1787..8874bf5 100644 --- a/localisations.toml +++ b/localisations.toml @@ -134,3 +134,11 @@ en = "Create transcript instead?" [ footer_transcript ] de = "Stattdessen Untertitel erstellen?" en = "Create subtitles instead?" + +[ mail.transcribe_subject ] +de = "Ihr Transkript" +en = "Your transcript" + +[ mail.transcribe_content ] +de = "Das Transkript für Ihre Datei '{}' wurden erstellt.\nSie finden das Resultat im Anhang dieser E-Mail." +en = "The transcript for your file '{}' have been created.\nThe result is attached to this email." \ No newline at end of file diff --git a/mailservice.py b/mailservice.py index bfe1e67..7627a78 100644 --- a/mailservice.py +++ b/mailservice.py @@ -1,3 +1,4 @@ +import io import json import shutil import smtplib @@ -5,6 +6,7 @@ import tomllib from email.message import EmailMessage from os import scandir from pathlib import Path +import re import requests from requests.auth import HTTPBasicAuth @@ -67,26 +69,66 @@ def main(end): language = metadata["language"] msg = EmailMessage() - msg["Subject"] = localisations["mail"]["subject"][language] - msg["From"] = config["MAIL"]["FROM"] - msg["To"] = metadata["email"] - msg.set_content( - localisations["mail"]["content"][language].format(metadata["filename"]) - ) + if metadata["job_type"] == "subtitle": + msg["Subject"] = localisations["mail"]["subject"][language] + msg["From"] = config["MAIL"]["FROM"] + msg["To"] = metadata["email"] - # filename.language.vtt - if metadata["video_language"] == "None": - filename = Path(metadata["filename"]).with_suffix(".vtt").name - else: - filename = ( - Path(metadata["filename"]) - .with_suffix(".{}.vtt".format(metadata["video_language"])) - .name + msg.set_content( + localisations["mail"]["content"][language].format(metadata["filename"]) ) - with open(Path(job).joinpath("audio.vtt")) as f: - msg.add_attachment(f.read(), filename=filename) + # filename.language.vtt + if metadata["video_language"] == "None": + filename = Path(metadata["filename"]).with_suffix(".vtt").name + else: + filename = ( + Path(metadata["filename"]) + .with_suffix(".{}.vtt".format(metadata["video_language"])) + .name + ) + + with open(Path(job).joinpath("audio.vtt")) as f: + msg.add_attachment(f.read(), filename=filename) + + elif metadata["job_type"] == "transcript": + msg["Subject"] = localisations["mail"]["transcribe_subject"][language] + msg["From"] = config["MAIL"]["FROM"] + msg["To"] = metadata["email"] + + msg.set_content( + localisations["mail"]["transcribe_content"][language].format( + metadata["filename"] + ) + ) + + # filename.txt + filename = Path(metadata["filename"]).with_suffix(".txt").name + + with open(Path(job).joinpath("audio.txt"), encoding="utf-8") as f: + transcript = f.read() + + # reformat transcript + lines = [] + for line in transcript.split("\n"): + match = re.fullmatch(transcript_pattern, line) + + if match: + lines.append(match.groups()) + + transcript = "" + first_line = True + previous_speaker = None + for speaker, line in lines: + if previous_speaker == speaker: + transcript += f" {line}" + else: + previous_speaker = speaker + transcript += f"{'\n' if not first_line else ''}{speaker}: {line}" + first_line = False + + msg.add_attachment(transcript, filename=filename) s.send_message(msg) @@ -146,6 +188,8 @@ if __name__ == "__main__": end = Event() + transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$") + def handler(signum, frame): global end print(signum) -- GitLab