From d14f0200d954e16b4a09b861baa053ec6822abfe Mon Sep 17 00:00:00 2001
From: Jan Hartig <jan.hartig@ptb.de>
Date: Tue, 15 Apr 2025 17:54:54 +0200
Subject: [PATCH] mailservice: Add transcript support

---
 fix_transcript.py  | 29 ------------------
 localisations.toml |  8 +++++
 mailservice.py     | 76 ++++++++++++++++++++++++++++++++++++----------
 3 files changed, 68 insertions(+), 45 deletions(-)
 delete mode 100644 fix_transcript.py

diff --git a/fix_transcript.py b/fix_transcript.py
deleted file mode 100644
index 1848391..0000000
--- a/fix_transcript.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import re
-
-transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$")
-
-with open(r"transcript.txt", encoding="utf-8") as f:
-    transcript = f.read()
-
-lines = []
-for line in transcript.split("\n"):
-    match = re.fullmatch(transcript_pattern, line)
-
-    if match:
-        lines.append(match.groups())
-
-output = ""
-first_line = True
-previous_speaker = None
-for speaker, line in lines:
-    if previous_speaker == speaker:
-        output += f" {line}"
-    else:
-        previous_speaker = speaker
-        output += f"{'\n' if not first_line else ''}{speaker}: {line}"
-    first_line = False
-
-print(output)
-
-with open(r"transcript_fixed.txt", "w", encoding="utf-8") as f:
-    f.write(output)
diff --git a/localisations.toml b/localisations.toml
index e9f1787..8874bf5 100644
--- a/localisations.toml
+++ b/localisations.toml
@@ -134,3 +134,11 @@ en = "Create transcript instead?"
 [ footer_transcript ]
 de = "Stattdessen Untertitel erstellen?"
 en = "Create subtitles instead?"
+
+[ mail.transcribe_subject ]
+de = "Ihr Transkript"
+en = "Your transcript"
+
+[ mail.transcribe_content ]
+de = "Das Transkript für Ihre Datei '{}' wurden erstellt.\nSie finden das Resultat im Anhang dieser E-Mail."
+en = "The transcript for your file '{}' have been created.\nThe result is attached to this email."
\ No newline at end of file
diff --git a/mailservice.py b/mailservice.py
index bfe1e67..7627a78 100644
--- a/mailservice.py
+++ b/mailservice.py
@@ -1,3 +1,4 @@
+import io
 import json
 import shutil
 import smtplib
@@ -5,6 +6,7 @@ import tomllib
 from email.message import EmailMessage
 from os import scandir
 from pathlib import Path
+import re
 
 import requests
 from requests.auth import HTTPBasicAuth
@@ -67,26 +69,66 @@ def main(end):
                     language = metadata["language"]
 
                     msg = EmailMessage()
-                    msg["Subject"] = localisations["mail"]["subject"][language]
-                    msg["From"] = config["MAIL"]["FROM"]
-                    msg["To"] = metadata["email"]
 
-                    msg.set_content(
-                        localisations["mail"]["content"][language].format(metadata["filename"])
-                    )
+                    if metadata["job_type"] == "subtitle":
+                        msg["Subject"] = localisations["mail"]["subject"][language]
+                        msg["From"] = config["MAIL"]["FROM"]
+                        msg["To"] = metadata["email"]
 
-                    # filename.language.vtt
-                    if metadata["video_language"] == "None":
-                        filename = Path(metadata["filename"]).with_suffix(".vtt").name
-                    else:
-                        filename = (
-                            Path(metadata["filename"])
-                            .with_suffix(".{}.vtt".format(metadata["video_language"]))
-                            .name
+                        msg.set_content(
+                            localisations["mail"]["content"][language].format(metadata["filename"])
                         )
 
-                    with open(Path(job).joinpath("audio.vtt")) as f:
-                        msg.add_attachment(f.read(), filename=filename)
+                        # filename.language.vtt
+                        if metadata["video_language"] == "None":
+                            filename = Path(metadata["filename"]).with_suffix(".vtt").name
+                        else:
+                            filename = (
+                                Path(metadata["filename"])
+                                .with_suffix(".{}.vtt".format(metadata["video_language"]))
+                                .name
+                            )
+
+                        with open(Path(job).joinpath("audio.vtt")) as f:
+                            msg.add_attachment(f.read(), filename=filename)
+
+                    elif metadata["job_type"] == "transcript":
+                        msg["Subject"] = localisations["mail"]["transcribe_subject"][language]
+                        msg["From"] = config["MAIL"]["FROM"]
+                        msg["To"] = metadata["email"]
+
+                        msg.set_content(
+                            localisations["mail"]["transcribe_content"][language].format(
+                                metadata["filename"]
+                            )
+                        )
+
+                        # filename.txt
+                        filename = Path(metadata["filename"]).with_suffix(".txt").name
+
+                        with open(Path(job).joinpath("audio.txt"), encoding="utf-8") as f:
+                            transcript = f.read()
+
+                        # reformat transcript
+                        lines = []
+                        for line in transcript.split("\n"):
+                            match = re.fullmatch(transcript_pattern, line)
+
+                            if match:
+                                lines.append(match.groups())
+
+                        transcript = ""
+                        first_line = True
+                        previous_speaker = None
+                        for speaker, line in lines:
+                            if previous_speaker == speaker:
+                                transcript += f" {line}"
+                            else:
+                                previous_speaker = speaker
+                                transcript += f"{'\n' if not first_line else ''}{speaker}: {line}"
+                            first_line = False
+
+                        msg.add_attachment(transcript, filename=filename)
 
                     s.send_message(msg)
 
@@ -146,6 +188,8 @@ if __name__ == "__main__":
 
     end = Event()
 
+    transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$")
+
     def handler(signum, frame):
         global end
         print(signum)
-- 
GitLab