Skip to content
Snippets Groups Projects
Commit d14f0200 authored by Jan Hartig's avatar Jan Hartig
Browse files

mailservice: Add transcript support

parent a95bb274
No related branches found
No related tags found
1 merge request!16Feature: transcription
Pipeline #54338 passed
import re
transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$")
with open(r"transcript.txt", encoding="utf-8") as f:
transcript = f.read()
lines = []
for line in transcript.split("\n"):
match = re.fullmatch(transcript_pattern, line)
if match:
lines.append(match.groups())
output = ""
first_line = True
previous_speaker = None
for speaker, line in lines:
if previous_speaker == speaker:
output += f" {line}"
else:
previous_speaker = speaker
output += f"{'\n' if not first_line else ''}{speaker}: {line}"
first_line = False
print(output)
with open(r"transcript_fixed.txt", "w", encoding="utf-8") as f:
f.write(output)
...@@ -134,3 +134,11 @@ en = "Create transcript instead?" ...@@ -134,3 +134,11 @@ en = "Create transcript instead?"
[ footer_transcript ] [ footer_transcript ]
de = "Stattdessen Untertitel erstellen?" de = "Stattdessen Untertitel erstellen?"
en = "Create subtitles instead?" en = "Create subtitles instead?"
[ mail.transcribe_subject ]
de = "Ihr Transkript"
en = "Your transcript"
[ mail.transcribe_content ]
de = "Das Transkript für Ihre Datei '{}' wurden erstellt.\nSie finden das Resultat im Anhang dieser E-Mail."
en = "The transcript for your file '{}' have been created.\nThe result is attached to this email."
\ No newline at end of file
import io
import json import json
import shutil import shutil
import smtplib import smtplib
...@@ -5,6 +6,7 @@ import tomllib ...@@ -5,6 +6,7 @@ import tomllib
from email.message import EmailMessage from email.message import EmailMessage
from os import scandir from os import scandir
from pathlib import Path from pathlib import Path
import re
import requests import requests
from requests.auth import HTTPBasicAuth from requests.auth import HTTPBasicAuth
...@@ -67,26 +69,66 @@ def main(end): ...@@ -67,26 +69,66 @@ def main(end):
language = metadata["language"] language = metadata["language"]
msg = EmailMessage() msg = EmailMessage()
msg["Subject"] = localisations["mail"]["subject"][language]
msg["From"] = config["MAIL"]["FROM"]
msg["To"] = metadata["email"]
msg.set_content( if metadata["job_type"] == "subtitle":
localisations["mail"]["content"][language].format(metadata["filename"]) msg["Subject"] = localisations["mail"]["subject"][language]
) msg["From"] = config["MAIL"]["FROM"]
msg["To"] = metadata["email"]
# filename.language.vtt msg.set_content(
if metadata["video_language"] == "None": localisations["mail"]["content"][language].format(metadata["filename"])
filename = Path(metadata["filename"]).with_suffix(".vtt").name
else:
filename = (
Path(metadata["filename"])
.with_suffix(".{}.vtt".format(metadata["video_language"]))
.name
) )
with open(Path(job).joinpath("audio.vtt")) as f: # filename.language.vtt
msg.add_attachment(f.read(), filename=filename) if metadata["video_language"] == "None":
filename = Path(metadata["filename"]).with_suffix(".vtt").name
else:
filename = (
Path(metadata["filename"])
.with_suffix(".{}.vtt".format(metadata["video_language"]))
.name
)
with open(Path(job).joinpath("audio.vtt")) as f:
msg.add_attachment(f.read(), filename=filename)
elif metadata["job_type"] == "transcript":
msg["Subject"] = localisations["mail"]["transcribe_subject"][language]
msg["From"] = config["MAIL"]["FROM"]
msg["To"] = metadata["email"]
msg.set_content(
localisations["mail"]["transcribe_content"][language].format(
metadata["filename"]
)
)
# filename.txt
filename = Path(metadata["filename"]).with_suffix(".txt").name
with open(Path(job).joinpath("audio.txt"), encoding="utf-8") as f:
transcript = f.read()
# reformat transcript
lines = []
for line in transcript.split("\n"):
match = re.fullmatch(transcript_pattern, line)
if match:
lines.append(match.groups())
transcript = ""
first_line = True
previous_speaker = None
for speaker, line in lines:
if previous_speaker == speaker:
transcript += f" {line}"
else:
previous_speaker = speaker
transcript += f"{'\n' if not first_line else ''}{speaker}: {line}"
first_line = False
msg.add_attachment(transcript, filename=filename)
s.send_message(msg) s.send_message(msg)
...@@ -146,6 +188,8 @@ if __name__ == "__main__": ...@@ -146,6 +188,8 @@ if __name__ == "__main__":
end = Event() end = Event()
transcript_pattern = re.compile(r"^\[(\w+)]: (.+)$")
def handler(signum, frame): def handler(signum, frame):
global end global end
print(signum) print(signum)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment