Here is the initial implementation of SubRip subtitle input, CLI only for now, and this first diff is macosx CLI without xcode only.
I need help from KonaBlend to get this working for other platform/OS combinations which need the iconv library.
The file reader is implemented as a work object - which is a little hacky since it is a source and not a processor, so I feed a NULL buffer into its input fifo to keep it running until it runs out of subs to read, then it finishes.
http://pastebin.ca/1469328
Code: Select all
Index: test/test.c
===================================================================
--- test/test.c (revision 2594)
+++ test/test.c (working copy)
@@ -72,6 +72,10 @@
static char ** subforce = NULL;
static char * subburn = NULL;
static char * subdefault = NULL;
+static char ** srtfile = NULL;
+static char ** srtcodeset = NULL;
+static char ** srtoffset = NULL;
+static char ** srtlang = NULL;
static int subtitle_scan = 0;
static int width = 0;
static int height = 0;
@@ -1790,6 +1794,39 @@
}
}
+ if( srtfile )
+ {
+ char * token;
+ int i, pos;
+ char *codeset = "L1";
+ int64_t offset = 0;
+ char *lang = "und";
+ hb_subtitle_config_t sub_config;
+
+ pos = 0;
+ for( i=0; srtfile[i] != NULL; i++ )
+ {
+ pos++;
+ token = srtfile[i];
+ if( srtcodeset && srtcodeset[i] )
+ {
+ codeset = srtcodeset[i];
+ }
+ if( srtoffset && srtoffset[i] )
+ {
+ offset = strtoll( srtoffset[i], &srtoffset[i], 0 );
+ }
+ if ( srtlang && srtlang[i] )
+ {
+ lang = srtlang[i];
+ }
+ sub_config.force = 0;
+ sub_config.default_track = 0;
+ hb_srt_add( job, &sub_config, srtfile[i], codeset, lang,
+ offset );
+ }
+ }
+
if( native_language )
{
char audio_lang[4];
@@ -2319,7 +2356,17 @@
" that matches the --native-language. If there are no\n"
" matching audio tracks then the first matching\n"
" subtitle track is used instead.\n"
-
+ " --srt-file <string> SubRip SRT filename(s), separated by commas.\n"
+ " --srt-codeset Character codeset(s) that the SRT file(s) are\n"
+ " <string> encoded in, separted by commas.\n"
+ " Use 'iconv -l' for a list of valid\n"
+ " codesets. If not specified latin1 is assumed\n"
+ " --srt-offset Offset in milli-seconds to apply to the SRT file(s)\n"
+ " <string> separted by commas. If not specified zero is assumed.\n"
+ " Offsets may be negative.\n"
+ " --srt-lang <string> Language as an iso639-2 code fra, eng, spa et cetera)\n"
+ " for the SRT file(s) separated by commas. If not specified\n"
+ " then 'und' is used.\n"
"\n"
@@ -2421,6 +2468,10 @@
#define SUB_BURNED 266
#define SUB_DEFAULT 267
#define NATIVE_DUB 268
+ #define SRT_FILE 269
+ #define SRT_CODESET 270
+ #define SRT_OFFSET 271
+ #define SRT_LANG 272
for( ;; )
{
@@ -2451,9 +2502,12 @@
{ "subtitle-forced", optional_argument, NULL, 'F' },
{ "subtitle-burned", optional_argument, NULL, SUB_BURNED },
{ "subtitle-default", optional_argument, NULL, SUB_DEFAULT },
+ { "srt-file", required_argument, NULL, SRT_FILE },
+ { "srt-codeset", required_argument, NULL, SRT_CODESET },
+ { "srt-offset", required_argument, NULL, SRT_OFFSET },
+ { "srt-lang", required_argument, NULL, SRT_LANG },
{ "native-language", required_argument, NULL,'N' },
{ "native-dub", no_argument, NULL, NATIVE_DUB },
-
{ "encoder", required_argument, NULL, 'e' },
{ "aencoder", required_argument, NULL, 'E' },
{ "two-pass", no_argument, NULL, '2' },
@@ -2671,6 +2725,18 @@
case NATIVE_DUB:
native_dub = 1;
break;
+ case SRT_FILE:
+ srtfile = str_split( optarg, "," );
+ break;
+ case SRT_CODESET:
+ srtcodeset = str_split( optarg, "," );
+ break;
+ case SRT_OFFSET:
+ srtoffset = str_split( optarg, "," );
+ break;
+ case SRT_LANG:
+ srtlang = str_split( optarg, "," );
+ break;
case '2':
twoPass = 1;
break;
Index: test/module.defs
===================================================================
--- test/module.defs (revision 2594)
+++ test/module.defs (working copy)
@@ -37,7 +37,7 @@
ifeq ($(BUILD.system),darwin)
TEST.GCC.f += IOKit CoreServices AudioToolbox
- TEST.GCC.l += bz2 z
+ TEST.GCC.l += iconv bz2 z
else ifeq ($(BUILD.system),linux)
TEST.GCC.l += bz2 z pthread dl m
else ifeq (1-mingw,$(BUILD.cross)-$(BUILD.system))
Index: libhb/decsrtsub.c
===================================================================
--- libhb/decsrtsub.c (revision 0)
+++ libhb/decsrtsub.c (revision 0)
@@ -0,0 +1,284 @@
+/*
+ This file is part of the HandBrake source code.
+ Homepage: <http://handbrake.fr/>.
+ It may be used under the terms of the GNU General Public License. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <iconv.h>
+#include <errno.h>
+#include "hb.h"
+
+struct start_and_end {
+ unsigned long start, end;
+};
+
+enum
+{
+ k_state_inEntry,
+ k_state_potential_new_entry,
+ k_state_timecode,
+};
+
+typedef struct srt_entry_s {
+ long offset, duration;
+ long start, stop;
+ char text[1024];
+} srt_entry_t;
+
+/*
+ * Store all context in the work private struct,
+ */
+struct hb_work_private_s
+{
+ hb_job_t *job;
+ FILE *file;
+ unsigned long current_time;
+ unsigned long number_of_entries;
+ unsigned long current_state;
+ srt_entry_t current_entry;
+ iconv_t *iconv_context;
+ hb_subtitle_t *subtitle;
+};
+
+static struct start_and_end read_time_from_string( const char* timeString )
+{
+ // for ex. 00:00:15,248 --> 00:00:16,545
+
+ long houres1, minutes1, seconds1, milliseconds1,
+ houres2, minutes2, seconds2, milliseconds2;
+
+ sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n", &houres1, &minutes1, &seconds1, &milliseconds1,
+ &houres2, &minutes2, &seconds2, &milliseconds2);
+
+ struct start_and_end result = {
+ milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000,
+ milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000};
+ return result;
+}
+
+/*
+ * Read the SRT file and put the entries into the subtitle fifo for all to read
+ */
+static hb_buffer_t *srt_read( hb_work_private_t *pv )
+{
+
+ char line_buffer[1024];
+
+ if( !pv->file )
+ {
+ return NULL;
+ }
+
+ while( fgets( line_buffer, sizeof( line_buffer ), pv->file ) )
+ {
+ switch (pv->current_state)
+ {
+ case k_state_timecode:
+ {
+ struct start_and_end timing = read_time_from_string( line_buffer );
+ pv->current_entry.duration = timing.end - timing.start;
+ pv->current_entry.offset = timing.start - pv->current_time;
+
+ pv->current_time = timing.end;
+
+ pv->current_entry.start = timing.start;
+ pv->current_entry.stop = timing.end;
+
+ pv->current_state = k_state_inEntry;
+ continue;
+ }
+
+ case k_state_inEntry:
+ {
+ char *p, *q;
+ size_t in_size;
+ size_t out_size;
+ size_t retval;
+
+ // If the current line is empty, we assume this is the
+ // seperation betwene two entries. In case we are wrong,
+ // the mistake is corrected in the next state.
+ if (strcmp(line_buffer, "\n") == 0 || strcmp(line_buffer, "\r\n") == 0) {
+ pv->current_state = k_state_potential_new_entry;
+ continue;
+ }
+
+
+ for( q = pv->current_entry.text; (q < pv->current_entry.text+1024) && *q; q++);
+
+ p = line_buffer;
+
+ in_size = strlen(line_buffer);
+ out_size = (pv->current_entry.text+1024) - q;
+
+ retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size);
+ *q = '\0';
+
+ if( ( retval == -1 ) && ( errno == EINVAL ) )
+ {
+ hb_error( "Invalid shift sequence" );
+ } else if ( ( retval == -1 ) && ( errno == EILSEQ ) )
+ {
+ hb_error( "Invalid byte for codeset in input, %d bytes discarded",
+ in_size);
+ } else if ( ( retval == -1 ) && ( errno == E2BIG ) )
+ {
+ hb_error( "Not enough space in output buffer");
+ }
+
+ break;
+ }
+
+ case k_state_potential_new_entry:
+ {
+ const char endpoint[] = "\0";
+ const unsigned long potential_entry_number = strtol(line_buffer, (char**)&endpoint, 10);
+ hb_buffer_t *buffer = NULL;
+ /*
+ * Is this really new next entry begin?
+ */
+ if (potential_entry_number == pv->number_of_entries + 1) {
+ /*
+ * We found the next entry - or a really rare error condition
+ */
+ if( *pv->current_entry.text )
+ {
+ long length;
+ char *p;
+
+ length = strlen( pv->current_entry.text );
+
+ for( p = pv->current_entry.text; *p; p++)
+ {
+ if( *p == '\n' || *p == '\r' )
+ {
+ *p = ' ';
+ }
+ }
+
+ buffer = hb_buffer_init( length + 1 );
+
+ if( buffer )
+ {
+ buffer->start = ( pv->current_entry.start + pv->subtitle->offset ) * 90;
+ buffer->stop = ( pv->current_entry.stop + pv->subtitle->offset ) * 90;
+
+ memcpy( buffer->data, pv->current_entry.text, length + 1 );
+ }
+ }
+ memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
+ ++(pv->number_of_entries);
+ pv->current_state = k_state_timecode;
+ if( buffer )
+ {
+ return buffer;
+ }
+ continue;
+ } else {
+ /*
+ * Well.. looks like we are in the wrong mode.. lets add the
+ * newline we misinterpreted...
+ */
+ strncat(pv->current_entry.text, " ", 1024);
+ pv->current_state = k_state_inEntry;
+ }
+
+ break;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static int decsrtInit( hb_work_object_t * w, hb_job_t * job )
+{
+ int retval = 1;
+ hb_work_private_t * pv;
+ hb_buffer_t *buffer;
+
+ pv = calloc( 1, sizeof( hb_work_private_t ) );
+ if( pv )
+ {
+ w->private_data = pv;
+
+ pv->job = job;
+
+ buffer = hb_buffer_init( 0 );
+ hb_fifo_push( w->fifo_in, buffer);
+
+ pv->file = fopen( w->subtitle->src_filename, "r" );
+
+ pv->current_state = k_state_potential_new_entry;
+ pv->number_of_entries = 0;
+ pv->current_time = 0;
+ pv->subtitle = w->subtitle;
+
+ pv->iconv_context = iconv_open( "utf8", pv->subtitle->src_codeset );
+
+
+ if( pv->iconv_context == (iconv_t) -1 )
+ {
+ hb_error("Could not open the iconv library with those file formats\n");
+
+ } else {
+ memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
+
+ pv->file = fopen( w->subtitle->src_filename, "r" );
+
+ if( !pv->file )
+ {
+ hb_error("Could not open the SRT subtitle file '%s'\n", w->subtitle->src_filename);
+ } else {
+ retval = 0;
+ }
+ }
+ }
+
+ return retval;
+}
+
+static int decsrtWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
+ hb_buffer_t ** buf_out )
+{
+ hb_work_private_t * pv = w->private_data;
+ hb_buffer_t * in = *buf_in;
+ hb_buffer_t * out = NULL;
+
+ out = srt_read( pv );
+
+ if( out )
+ {
+ /*
+ * Keep a buffer in our input fifo so that we get run.
+ */
+ hb_fifo_push( w->fifo_in, in);
+ *buf_in = NULL;
+ *buf_out = out;
+ } else {
+ printf("\nSRT Done\n");
+ *buf_out = NULL;
+ return HB_WORK_OK;
+ }
+
+ return HB_WORK_OK;
+}
+
+static void decsrtClose( hb_work_object_t * w )
+{
+ hb_work_private_t * pv = w->private_data;
+ fclose( pv->file );
+ iconv_close(pv->iconv_context);
+ free( w->private_data );
+}
+
+hb_work_object_t hb_decsrtsub =
+{
+ WORK_DECSRTSUB,
+ "SRT Subtitle Decoder",
+ decsrtInit,
+ decsrtWork,
+ decsrtClose
+};
Index: libhb/hb.c
===================================================================
--- libhb/hb.c (revision 2594)
+++ libhb/hb.c (working copy)
@@ -162,6 +162,7 @@
hb_register( &hb_decvobsub );
hb_register( &hb_encvobsub );
hb_register( &hb_deccc608 );
+ hb_register( &hb_decsrtsub );
hb_register( &hb_render );
hb_register( &hb_encavcodec );
hb_register( &hb_encx264 );
@@ -258,6 +259,7 @@
hb_register( &hb_decvobsub );
hb_register( &hb_encvobsub );
hb_register( &hb_deccc608 );
+ hb_register( &hb_decsrtsub );
hb_register( &hb_render );
hb_register( &hb_encavcodec );
hb_register( &hb_encx264 );
Index: libhb/sync.c
===================================================================
--- libhb/sync.c (revision 2594)
+++ libhb/sync.c (working copy)
@@ -446,7 +446,8 @@
* Rewrite timestamps on subtitles that need it (on raw queue).
*/
if( subtitle->source == CC608SUB ||
- subtitle->source == CC708SUB )
+ subtitle->source == CC708SUB ||
+ subtitle->source == SRTSUB )
{
/*
* Rewrite timestamps on subtitles that came from Closed Captions
Index: libhb/internal.h
===================================================================
--- libhb/internal.h (revision 2594)
+++ libhb/internal.h (working copy)
@@ -256,6 +256,7 @@
WORK_DECMPEG2,
WORK_DECCC608,
WORK_DECVOBSUB,
+ WORK_DECSRTSUB,
WORK_ENCVOBSUB,
WORK_RENDER,
WORK_ENCAVCODEC,
Index: libhb/work.c
===================================================================
--- libhb/work.c (revision 2594)
+++ libhb/work.c (working copy)
@@ -589,6 +589,15 @@
hb_list_add( job->list_work, w );
}
+ if( !job->indepth_scan && subtitle->source == SRTSUB )
+ {
+ w = hb_get_work( WORK_DECSRTSUB );
+ w->fifo_in = subtitle->fifo_in;
+ w->fifo_out = subtitle->fifo_raw;
+ w->subtitle = subtitle;
+ hb_list_add( job->list_work, w );
+ }
+
if( !job->indepth_scan &&
subtitle->format == PICTURESUB
&& subtitle->config.dest == PASSTHRUSUB )
Index: libhb/common.c
===================================================================
--- libhb/common.c (revision 2594)
+++ libhb/common.c (working copy)
@@ -9,6 +9,7 @@
#include <sys/time.h>
#include "common.h"
+#include "lang.h"
#include "hb.h"
/**********************************************************************
@@ -864,8 +865,39 @@
/* We fail! */
return 0;
}
- subtitle->config = *subtitlecfg;
+ subtitle->config = *subtitlecfg;
hb_list_add(job->list_subtitle, subtitle);
return 1;
}
+int hb_srt_add(const hb_job_t * job, const hb_subtitle_config_t * subtitlecfg,
+ const char * filename, const char *codeset, const char *lang,
+ int64_t offset)
+{
+ hb_subtitle_t *subtitle;
+ iso639_lang_t *language = NULL;
+ int retval = 0;
+
+ subtitle = calloc(1, sizeof(*subtitle));
+
+ subtitle->format = TEXTSUB;
+ subtitle->source = SRTSUB;
+
+ language = lang_for_code2(lang);
+
+ if( language )
+ {
+
+ strcpy( subtitle->lang, language->eng_name );
+ strncpy( subtitle->iso639_2, lang, 4 );
+ strncpy( subtitle->src_filename, filename, 1024);
+ strncpy( subtitle->src_codeset, codeset, 40);
+ subtitle->offset = offset;
+
+ subtitle->config = *subtitlecfg;
+ subtitle->config.dest = PASSTHRUSUB;
+ hb_list_add(job->list_subtitle, subtitle);
+ retval = 1;
+ }
+ return retval;
+}
Index: libhb/common.h
===================================================================
--- libhb/common.h (revision 2594)
+++ libhb/common.h (working copy)
@@ -96,7 +96,11 @@
hb_audio_config_t * hb_list_audio_config_item(hb_list_t * list, int i);
int hb_subtitle_add(const hb_job_t * job, const hb_subtitle_config_t * subtitlecfg, int track);
+int hb_srt_add(const hb_job_t * job, const hb_subtitle_config_t * subtitlecfg,
+ const char * filename, const char *codeset, const char *lang,
+ int64_t offset);
+
struct hb_rate_s
{
char * string;
@@ -463,6 +467,10 @@
int hits; /* How many hits/occurrences of this subtitle */
int forced_hits; /* How many forced hits in this subtitle */
+ char src_filename[1024];
+ char src_codeset[40];
+ int64_t offset;
+
#ifdef __LIBHB__
/* Internal data */
hb_fifo_t * fifo_in; /* SPU ES */
@@ -671,6 +679,7 @@
extern hb_work_object_t hb_decvobsub;
extern hb_work_object_t hb_encvobsub;
extern hb_work_object_t hb_deccc608;
+extern hb_work_object_t hb_decsrtsub;
extern hb_work_object_t hb_render;
extern hb_work_object_t hb_encavcodec;
extern hb_work_object_t hb_encx264;