Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
ptb-subtitle-service
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Container registry
Model registry
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jan Hartig
ptb-subtitle-service
Commits
d14f0200
Commit
d14f0200
authored
1 month ago
by
Jan Hartig
Browse files
Options
Downloads
Patches
Plain Diff
mailservice: Add transcript support
parent
a95bb274
No related branches found
No related tags found
1 merge request
!16
Feature: transcription
Pipeline
#54338
passed
1 month ago
Stage: build
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
fix_transcript.py
+0
-29
0 additions, 29 deletions
fix_transcript.py
localisations.toml
+8
-0
8 additions, 0 deletions
localisations.toml
mailservice.py
+60
-16
60 additions, 16 deletions
mailservice.py
with
68 additions
and
45 deletions
fix_transcript.py
deleted
100644 → 0
+
0
−
29
View file @
a95bb274
import
re
transcript_pattern
=
re
.
compile
(
r
"
^\[(\w+)]: (.+)$
"
)
with
open
(
r
"
transcript.txt
"
,
encoding
=
"
utf-8
"
)
as
f
:
transcript
=
f
.
read
()
lines
=
[]
for
line
in
transcript
.
split
(
"
\n
"
):
match
=
re
.
fullmatch
(
transcript_pattern
,
line
)
if
match
:
lines
.
append
(
match
.
groups
())
output
=
""
first_line
=
True
previous_speaker
=
None
for
speaker
,
line
in
lines
:
if
previous_speaker
==
speaker
:
output
+=
f
"
{
line
}
"
else
:
previous_speaker
=
speaker
output
+=
f
"
{
'
\n
'
if
not
first_line
else
''
}{
speaker
}
:
{
line
}
"
first_line
=
False
print
(
output
)
with
open
(
r
"
transcript_fixed.txt
"
,
"
w
"
,
encoding
=
"
utf-8
"
)
as
f
:
f
.
write
(
output
)
This diff is collapsed.
Click to expand it.
localisations.toml
+
8
−
0
View file @
d14f0200
...
@@ -134,3 +134,11 @@ en = "Create transcript instead?"
...
@@ -134,3 +134,11 @@ en = "Create transcript instead?"
[ footer_transcript ]
[ footer_transcript ]
de
=
"Stattdessen Untertitel erstellen?"
de
=
"Stattdessen Untertitel erstellen?"
en
=
"Create subtitles instead?"
en
=
"Create subtitles instead?"
[ mail.transcribe_subject ]
de
=
"Ihr Transkript"
en
=
"Your transcript"
[ mail.transcribe_content ]
de
=
"Das Transkript für Ihre Datei '{}' wurden erstellt.
\n
Sie finden das Resultat im Anhang dieser E-Mail."
en
=
"The transcript for your file '{}' have been created.
\n
The result is attached to this email."
\ No newline at end of file
This diff is collapsed.
Click to expand it.
mailservice.py
+
60
−
16
View file @
d14f0200
import
io
import
json
import
json
import
shutil
import
shutil
import
smtplib
import
smtplib
...
@@ -5,6 +6,7 @@ import tomllib
...
@@ -5,6 +6,7 @@ import tomllib
from
email.message
import
EmailMessage
from
email.message
import
EmailMessage
from
os
import
scandir
from
os
import
scandir
from
pathlib
import
Path
from
pathlib
import
Path
import
re
import
requests
import
requests
from
requests.auth
import
HTTPBasicAuth
from
requests.auth
import
HTTPBasicAuth
...
@@ -67,26 +69,66 @@ def main(end):
...
@@ -67,26 +69,66 @@ def main(end):
language
=
metadata
[
"
language
"
]
language
=
metadata
[
"
language
"
]
msg
=
EmailMessage
()
msg
=
EmailMessage
()
msg
[
"
Subject
"
]
=
localisations
[
"
mail
"
][
"
subject
"
][
language
]
msg
[
"
From
"
]
=
config
[
"
MAIL
"
][
"
FROM
"
]
msg
[
"
To
"
]
=
metadata
[
"
email
"
]
msg
.
set_content
(
if
metadata
[
"
job_type
"
]
==
"
subtitle
"
:
localisations
[
"
mail
"
][
"
content
"
][
language
].
format
(
metadata
[
"
filename
"
])
msg
[
"
Subject
"
]
=
localisations
[
"
mail
"
][
"
subject
"
][
language
]
)
msg
[
"
From
"
]
=
config
[
"
MAIL
"
][
"
FROM
"
]
msg
[
"
To
"
]
=
metadata
[
"
email
"
]
# filename.language.vtt
msg
.
set_content
(
if
metadata
[
"
video_language
"
]
==
"
None
"
:
localisations
[
"
mail
"
][
"
content
"
][
language
].
format
(
metadata
[
"
filename
"
])
filename
=
Path
(
metadata
[
"
filename
"
]).
with_suffix
(
"
.vtt
"
).
name
else
:
filename
=
(
Path
(
metadata
[
"
filename
"
])
.
with_suffix
(
"
.{}.vtt
"
.
format
(
metadata
[
"
video_language
"
]))
.
name
)
)
with
open
(
Path
(
job
).
joinpath
(
"
audio.vtt
"
))
as
f
:
# filename.language.vtt
msg
.
add_attachment
(
f
.
read
(),
filename
=
filename
)
if
metadata
[
"
video_language
"
]
==
"
None
"
:
filename
=
Path
(
metadata
[
"
filename
"
]).
with_suffix
(
"
.vtt
"
).
name
else
:
filename
=
(
Path
(
metadata
[
"
filename
"
])
.
with_suffix
(
"
.{}.vtt
"
.
format
(
metadata
[
"
video_language
"
]))
.
name
)
with
open
(
Path
(
job
).
joinpath
(
"
audio.vtt
"
))
as
f
:
msg
.
add_attachment
(
f
.
read
(),
filename
=
filename
)
elif
metadata
[
"
job_type
"
]
==
"
transcript
"
:
msg
[
"
Subject
"
]
=
localisations
[
"
mail
"
][
"
transcribe_subject
"
][
language
]
msg
[
"
From
"
]
=
config
[
"
MAIL
"
][
"
FROM
"
]
msg
[
"
To
"
]
=
metadata
[
"
email
"
]
msg
.
set_content
(
localisations
[
"
mail
"
][
"
transcribe_content
"
][
language
].
format
(
metadata
[
"
filename
"
]
)
)
# filename.txt
filename
=
Path
(
metadata
[
"
filename
"
]).
with_suffix
(
"
.txt
"
).
name
with
open
(
Path
(
job
).
joinpath
(
"
audio.txt
"
),
encoding
=
"
utf-8
"
)
as
f
:
transcript
=
f
.
read
()
# reformat transcript
lines
=
[]
for
line
in
transcript
.
split
(
"
\n
"
):
match
=
re
.
fullmatch
(
transcript_pattern
,
line
)
if
match
:
lines
.
append
(
match
.
groups
())
transcript
=
""
first_line
=
True
previous_speaker
=
None
for
speaker
,
line
in
lines
:
if
previous_speaker
==
speaker
:
transcript
+=
f
"
{
line
}
"
else
:
previous_speaker
=
speaker
transcript
+=
f
"
{
'
\n
'
if
not
first_line
else
''
}{
speaker
}
:
{
line
}
"
first_line
=
False
msg
.
add_attachment
(
transcript
,
filename
=
filename
)
s
.
send_message
(
msg
)
s
.
send_message
(
msg
)
...
@@ -146,6 +188,8 @@ if __name__ == "__main__":
...
@@ -146,6 +188,8 @@ if __name__ == "__main__":
end
=
Event
()
end
=
Event
()
transcript_pattern
=
re
.
compile
(
r
"
^\[(\w+)]: (.+)$
"
)
def
handler
(
signum
,
frame
):
def
handler
(
signum
,
frame
):
global
end
global
end
print
(
signum
)
print
(
signum
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment