Pages : 1
#1 Le 03/12/2011, à 17:20
- Crazyfaboo
[RÉSOLU] srt to ass
Bonjour,
Je voudrais convertir en ligne de commande des centaines de sous-titres SRT vers ASS pour pouvoir y ajouter des styles.
Existe-t-il un logiciel en ligne de commande que le fasse ? Je sais le faire manuellement avec subtitle-editor… mais c'est clair que je vais pas le faire à la main dans ce cas là.
Dernière modification par Crazyfaboo (Le 04/12/2011, à 18:35)
Hors ligne
#2 Le 03/12/2011, à 19:16
- Levi59
Re : [RÉSOLU] srt to ass
c'est quoi la différence entre un srt et un ass ? (un bout de code du fichier serait pratique )
Hors ligne
#3 Le 03/12/2011, à 22:12
- Crazyfaboo
Re : [RÉSOLU] srt to ass
La différence principale, c'est que le Advanced Sub Station Alpha (ass) contient des informations sur les polices, tailles, couleurs, transformations et plus encore à appliquer sur le texte. L'intérêt principal que j'en tire, en plus d'afficher mon sous titre dans une jolie police, c'est d'avoir une bordure autour du texte : finis les textes blancs sur fond blanc
Substantiellement ça ressemble à ça :
Dialogue: 0,0:03:54.70,0:03:59.23,Italic,,0000,0000,0000,,From the Dardanelles\NTo the mountains of Peru
Alors qu'un élément de sous titre SubRip (srt) va être :
2
00:03:54,705 --> 00:03:59,233
<i>From the Dardanelles
To the mountains of Peru</i>
…
Mais finalement j'ai écrit mon propre script python pour gérer ça… C'est pas compliqué à faire… et je cherchais justement à savoir si ça existait déjà ou pas.
Le script python, très simple d'utiliasation. Supporte l'utf-8, 16 et 32 et les BOM Unicodes et ptèt aussi l'ISO-8859-1. Ça corrige un peu le sous titre parsé et applique une police "normale" pour le texte normal et une police italique pour le texte italique.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs
import argparse
class SubElement:
def __init__(self, start, end, text):
self.timeStart = start
self.timeEnd = end
self.text = text
def appendLine(self, text):
text = text.lstrip().rstrip()
if text != '':
if self.text != '':
self.text += "\n"
self.text += text
def __repr__(self):
return self.text
class AdvancedSubStationAlphaWriter:
def __init__(self, filename, subElements):
self.filename = filename
self.elements = subElements
self.write()
def write(self):
f = open(self.filename, 'w')
f.write(codecs.BOM_UTF8)
f.write(self.getHeader())
for e in self.elements:
f.write(self.getLine(e))
f.close()
def getLine(self, element):
start = self.getTime(element.timeStart)
end = self.getTime(element.timeEnd)
text = element.text.replace("\n", "\\N")
font = "Regular"
isItalic = text.find('<i>') != -1
if isItalic:
font = "Italic"
text = text.replace("<i>", "").replace("</i>", "")
return "Dialogue: 0,{0},{1},{2},,0000,0000,0000,,{3}\n".format(start, end, font, text)
def getTime(self, t):
ms = t % 1000
t = (t - ms) / 1000
s = t % 60
t = (t - s) / 60
m = t % 60
h = (t - m) / 60
ms /= 10
return '{0:01}:{1:02}:{2:02}.{3:02}'.format(h, m, s, ms)
def getHeader(self):
return "ScriptType: V4.00+\n" \
+ "\n" \
+ "[V4+ Styles]\n" \
+ "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n" \
+ "Style: Regular,Cronos Pro,24,&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,1,2,20,20,20,0\n" \
+ "Style: Italic,Cronos Pro,24,&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,-1,-1,0,0,100,100,0,0,1,1,1,2,20,20,20,0\n" \
+ "\n" \
+ "[Events]\n" \
+ "Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text\n"
class SubRipReader:
def __init__(self, filename):
self.filename = filename
self.bomLength = 0
self.encoding = 'utf-8'
self.lines = []
self.elements = []
self.detectBomAndEncoding()
self.read()
self.parse()
def detectBomAndEncoding(self):
f = open(self.filename, 'r')
begin = f.read(4)
f.close()
if begin.startswith(codecs.BOM_UTF8):
self.encoding = 'utf-8'
self.bomLength = 3
elif begin.startswith(codecs.BOM_UTF16_LE):
self.encoding = 'utf-16-le'
self.bomLength = 2
elif begin.startswith(codecs.BOM_UTF16_BE):
self.encoding = 'utf-16-be'
self.bomLength = 2
elif begin.startswith(codecs.BOM_UTF32_LE):
self.encoding = 'utf-32-le'
self.bomLength = 4
elif begin.startswith(codecs.BOM_UTF32_BE):
self.encoding = 'utf-32-be'
self.bomLength = 4
def read(self):
f = codecs.open(self.filename, 'r', self.encoding)
self.lines = f.readlines()
f.close()
if len(self.lines) > 0 and self.bomLength > 0:
self.lines[0] = self.lines[0].encode('utf-8').decode('utf-8-sig')
def parse(self):
el = None
isJustNew = False
for line in self.lines:
l = line.encode('utf-8') \
.replace("\n", "") \
.replace("\r", "")
if isJustNew:
isJustNew = False
times = l.split(' --> ')
el.timeStart = self.parseTime(times[0])
el.timeEnd = self.parseTime(times[1])
elif l.isdigit():
if (el != None):
self.elements.append(el)
el = SubElement(0, 0, '')
isJustNew = True
else:
l = l.replace("", "œ") \
.replace("", "’") \
.replace("", "“") \
.replace("", "”") \
.replace("´´", '"') \
.replace("´", "'") \
.replace("....", "…") \
.replace("...", "…") \
.replace(". . .", "…")
l = self.applyLanguageOnLine(l)
el.appendLine(l)
if (el != None):
self.elements.append(el)
def parseTime(self, time):
r1 = time.split(":")
h = int(r1[0])
m = int(r1[1])
r2 = r1[2].split(',')
s = int(r2[0])
ms = int(r2[1])
return ((h * 60 + m) * 60 + s) * 1000 + ms
def applyLanguageOnLine(self, line):
return line
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('file', metavar='file', nargs=1,
help='the SubRip (srt) file')
args = parser.parse_args()
if __name__ == "__main__":
srt = args.file[0]
ass = srt.replace('.srt', '.ass')
subs = SubRipReader(srt)
AdvancedSubStationAlphaWriter(ass, subs.elements)
Je viens juste de le pondre, donc y'a encore quelques petites améliorations à apporter, mais ça marche bien.
Hors ligne
#4 Le 04/12/2011, à 01:32
- Levi59
Re : [RÉSOLU] srt to ass
Bravo! Ma question était justement destinée à vérifier si l'écriture d'un script était envisageable mais tu as répondu à ma question! Si le script marche bien, tu devrais le mettre dans "Trucs, astuces et scripts utiles".
Si tu as résolu ton problème, ajoute un Résolu au titre en éditant ton premier message.
Hors ligne
#5 Le 11/12/2011, à 20:17
- Crazyfaboo
Re : [RÉSOLU] srt to ass
J'ai attendu un peu avant de poster une version définitive.
Testé et éprouvé avec de nombreux sous-titres (une cinquantaine je dirais)… UTF-8 : OK, ISO-8859-1 : OK et plus d'options dans la ligne de commande.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs
import argparse
import os, glob
class Console:
verbose = False
@classmethod
def l(c, message):
print message
@classmethod
def v(c, message):
if c.verbose:
c.l(message)
@classmethod
def w(c, message):
c.v("## Warning ## " + message)
class SubElement:
def __init__(self, start, end, text):
self.timeStart = start
self.timeEnd = end
self.text = text
def appendLine(self, text):
text = text.lstrip().rstrip()
if text != '':
if self.text != '':
self.text += "\n"
self.text += text
def __repr__(self):
return self.text
class AdvancedSubStationAlphaWriter:
def __init__(self, filename, subElements, regular, italic, size):
Console.v('Writing "' + filename + '"')
self.fontRegular = regular
self.fontItalic = italic
self.fontSize = size
self.filename = filename
self.elements = subElements
self.write()
def write(self):
f = open(self.filename, 'w')
f.write(codecs.BOM_UTF8)
f.write(self.getHeader())
for e in self.elements:
f.write(self.getLine(e))
f.close()
def getLine(self, element):
start = self.getTime(element.timeStart)
end = self.getTime(element.timeEnd)
text = element.text.replace("\n", "\\N")
font = "Regular"
isItalic = text.find('<i>') != -1
if isItalic:
font = "Italic"
text = text.replace("<i>", "").replace("</i>", "")
return "Dialogue: 0,{0},{1},{2},,0000,0000,0000,,{3}\n".format(start, end, font, text)
def getTime(self, t):
ms = t % 1000
t = (t - ms) / 1000
s = t % 60
t = (t - s) / 60
m = t % 60
h = (t - m) / 60
ms /= 10
return '{0:01}:{1:02}:{2:02}.{3:02}'.format(h, m, s, ms)
def getHeader(self):
return "[Script Info]\n" \
+ "ScriptType: V4.00+\n" \
+ "\n" \
+ "[V4+ Styles]\n" \
+ "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n" \
+ "Style: Regular," + self.fontRegular + "," + self.fontSize + ",&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,1,2,20,20,20,0\n" \
+ "Style: Italic," + self.fontItalic + "," + self.fontSize + ",&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,-1,-1,0,0,100,100,0,0,1,1,1,2,20,20,20,0\n" \
+ "\n" \
+ "[Events]\n" \
+ "Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text\n"
class SubRipReader:
def __init__(self, filename):
Console.v('Reading "' + filename + '"')
self.filename = filename
self.bomLength = 0
self.encoding = 'utf-8'
self.lines = []
self.elements = []
self.detectBomAndEncoding()
self.read()
self.parse()
def detectBomAndEncoding(self):
f = open(self.filename, 'r')
begin = f.read(4)
f.close()
if begin.startswith(codecs.BOM_UTF8):
self.encoding = 'utf-8'
self.bomLength = 3
elif begin.startswith(codecs.BOM_UTF16_LE):
self.encoding = 'utf-16-le'
self.bomLength = 2
elif begin.startswith(codecs.BOM_UTF16_BE):
self.encoding = 'utf-16-be'
self.bomLength = 2
elif begin.startswith(codecs.BOM_UTF32_LE):
self.encoding = 'utf-32-le'
self.bomLength = 4
elif begin.startswith(codecs.BOM_UTF32_BE):
self.encoding = 'utf-32-be'
self.bomLength = 4
if self.bomLength > 0:
Console.v('BOM detected! Encoding: ' + self.encoding)
else:
Console.v('No BOM found. Using utf-8 as default encoding.')
def read(self):
if not self.read2():
Console.v('Fail to read the file with ' + self.encoding + '. Trying iso-8859-1.')
self.encoding = 'iso-8859-1'
self.read2()
def read2(self):
try:
f = codecs.open(self.filename, 'r', self.encoding)
self.lines = f.readlines()
f.close()
if len(self.lines) > 0 and self.bomLength > 0:
self.lines[0] = self.lines[0].encode('utf-8').decode('utf-8-sig')
except:
return False
return True
def parse(self):
el = None
isJustNew = False
nextCountReady = True
count = 0
emptySub = []
for line in self.lines:
l = line.encode('utf-8') \
.replace("\n", "") \
.replace("\r", "")
if isJustNew:
isJustNew = False
nextCountReady = False
times = l.split(' --> ')
el.timeStart = self.parseTime(times[0])
el.timeEnd = self.parseTime(times[1])
elif nextCountReady and l.isdigit():
count += 1
Console.v("Reading SRT # " + l + "\r")
if int(l) != count:
Console.w("Bad SRT number! Found: #" + l + ", but should be #" + count)
if el != None:
if el.text == '':
emptySub.append(count - 1)
else:
self.elements.append(el)
el = SubElement(0, 0, '')
isJustNew = True
else:
l = l.replace("", "œ") \
.replace("", "’") \
.replace("", "“") \
.replace("", "”") \
.replace("´´", '"') \
.replace("´", "'") \
.replace("....", "…") \
.replace("...", "…") \
.replace(". . .", "…")
l = self.applyLanguageOnLine(l)
el.appendLine(l)
nextCountReady = True
if (el != None):
self.elements.append(el)
Console.v('Parsing complete: ' + str(len(self.elements)) + ' subtitles found!')
Console.v(str(len(emptySub)) + ' subtitles were empty: ' + str(emptySub))
def parseTime(self, time):
r1 = time.split(":")
h = int(r1[0])
m = int(r1[1])
r2 = r1[2].split(',')
s = int(r2[0])
ms = int(r2[1])
return ((h * 60 + m) * 60 + s) * 1000 + ms
def applyLanguageOnLine(self, line):
return line
class Srt2Ass:
def __init__(self, srt, fregular, fitalic, fsize, delete):
ass = srt.replace('.srt', '.ass')
subs = SubRipReader(srt)
AdvancedSubStationAlphaWriter(ass, subs.elements, fregular, fitalic, fsize)
if delete:
Console.l("Removing " + srt + "…")
os.remove(srt)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert SubRip (srt) files to Advanced Sub Station Alpha (ass) files.')
parser.add_argument('file', metavar='file', nargs='*', help='the SubRip (srt) file')
parser.add_argument('--font-regular', metavar='fontr', nargs='?', help='Name of the font to use for regular text. Default is "Cronos Pro".', default="Cronos Pro")
parser.add_argument('--font-italic', metavar='fonti', nargs='?', help='Name of the font to use for italic text. Default is "Cronos Pro".', default="Cronos Pro")
parser.add_argument('--font-size', metavar='fonts', nargs='?', help='Font\'s size for both regular and italic fonts. Default is 24.', default="24")
parser.add_argument('--delete', '-d', action='store_true', help='delete the original SubRip (srt) file')
parser.add_argument('--verbose', '-v', action='store_true', help='print information about the process')
args = parser.parse_args()
Console.verbose = args.verbose
srtfiles = []
if len(args.file) == 0:
os.chdir(".")
srtfiles = glob.glob("*.srt")
elif len(args.file) == 1:
os.chdir(".")
srtfiles = glob.glob(args.file[0])
else:
srtfiles = args.file
for srt in srtfiles:
Srt2Ass(srt, args.font_regular, args.font_italic, args.font_size, args.delete)
Je l'ai mis dans le wiki aussi :
http://doc.ubuntu-fr.org/sous-titrage
http://doc.ubuntu-fr.org/srt2ass
Hors ligne
Pages : 1