HB uses liba52 for AC3 decoding. By default, liba52 applies the dynamic range compression hints that are embedded in the AC3 track. These hints tell liba52 how much to boost soft sounds and dampen loud sounds. They're embedded when the audio track is mastered. The idea is to normalize the sound to one listening level. But it's focused around a true home theater.
Perian uses liba52 as well, and gbooker's added an interesting feature. You'll note the Perian preferences pane has an option for how to "Play Soft Sounds", with a slider from 0.0 to 4.0. This tweaks the embedded normalization hints, to sound better on small speakers or in noisy environments.
The compression hint in the track is a number, let's call it c.
The tweaked normalization float is another number, let's call it n.
When liba52 encounters a sample that's hinted to be made softer, it just applies the compression in c.
But when liba52 encounters a sample that's hinted to be made louder, it applies c to the power of n -- making it much louder.
Anyway, I've ported this function over to libhb and implemented it in the CLI.
http://pastebin.ca/803835
Code: Select all
Index: test/test.c
===================================================================
--- test/test.c (revision 1091)
+++ test/test.c (working copy)
@@ -39,6 +39,7 @@
static int h264_30 = 0;
static char * audios = NULL;
static int audio_mixdown = HB_AMIXDOWN_DOLBYPLII;
+static float dynamic_range_compression = 0;
static int sub = 0;
static int width = 0;
static int height = 0;
@@ -823,7 +824,11 @@
{
job->acodec = acodec;
}
-
+ if ( dynamic_range_compression )
+ {
+ job->dynamic_range_compression = dynamic_range_compression;
+ }
+
if( size )
{
job->vbitrate = hb_calc_bitrate( job, size );
@@ -1181,6 +1186,10 @@
fprintf( stderr, "/" );
}
fprintf( stderr, " kHz)\n"
+ " -D, --normalize <float> Normalize the source's audio by applying dynamic\n"
+ " range compression, so soft sounds are louder.\n"
+ " Range is 0.0 (off) to 4.0 (too loud), with\n"
+ " 1.0 - 2.5 being a good range for... normal use.\n"
@@ -1267,6 +1276,7 @@
{ "markers", optional_argument, NULL, 'm' },
{ "audio", required_argument, NULL, 'a' },
{ "mixdown", required_argument, NULL, '6' },
+ { "normalize", required_argument, NULL, 'D' },
{ "subtitle", required_argument, NULL, 's' },
{ "subtitle-scan", no_argument, NULL, 'U' },
{ "subtitle-forced", no_argument, NULL, 'F' },
@@ -1308,7 +1318,7 @@
int c;
c = getopt_long( argc, argv,
- "hvuC:f:4i:Io:t:Lc:ma:6:s:UFN:e:E:2d789gpOP::w:l:n:b:q:S:B:r:R:Qx:TY:X:VZ:z",
+ "hvuC:f:4i:Io:t:Lc:ma:6:s:UFN:e:E:2dD:789gpOP::w:l:n:b:q:S:B:r:R:Qx:TY:X:VZ:z",
long_options, &option_index );
if( c < 0 )
{
@@ -1416,6 +1426,9 @@
audio_mixdown = HB_AMIXDOWN_6CH;
}
break;
+ case 'D':
+ dynamic_range_compression = atof( optarg );
+ break;
case 's':
sub = atoi( optarg );
break;
Index: libhb/deca52.c
===================================================================
--- libhb/deca52.c (revision 1091)
+++ libhb/deca52.c (working copy)
@@ -56,6 +56,30 @@
static hb_buffer_t * Decode( hb_work_object_t * w );
/***********************************************************************
+ * dynrng_call
+ ***********************************************************************
+ * Boosts soft audio -- taken from gbooker's work in A52Decoder, comment and all..
+ * Two cases
+ * 1) The user requested a compression of 1 or less, return the typical power rule
+ * 2) The user requested a compression of more than 1 (decompression):
+ * If the stream's requested compression is less than 1.0 (loud sound), return the normal compression
+ * If the stream's requested compression is more than 1.0 (soft sound), use power rule (which will make
+ * it louder in this case).
+ *
+ **********************************************************************/
+static sample_t dynrng_call (sample_t c, void *data)
+{
+ double *level = (double *)data;
+ float levelToUse = (float)*level;
+ if(c > 1.0 || levelToUse <= 1.0)
+ {
+ return powf(c, levelToUse);
+ }
+ else
+ return c;
+}
+
+/***********************************************************************
* hb_work_deca52_init
***********************************************************************
* Allocate the work object, initialize liba52
@@ -200,7 +224,13 @@
/* Feed liba52 */
a52_frame( pv->state, pv->frame, &pv->flags_out, &pv->level, 0 );
-
+
+ if ( pv->job->dynamic_range_compression )
+ {
+ float dynamic_compression_level = pv->job->dynamic_range_compression;;
+ a52_dynrng( pv->state, dynrng_call, &pv->job->dynamic_range_compression);
+ }
+
/* 6 blocks per frame, 256 samples per block, channelsused channels */
buf = hb_buffer_init( 6 * 256 * pv->out_discrete_channels * sizeof( float ) );
if (pts == -1)
Index: libhb/work.c
===================================================================
--- libhb/work.c (revision 1091)
+++ libhb/work.c (working copy)
@@ -159,7 +159,7 @@
title->width, title->height, job->width, job->height,
job->crop[0], job->crop[1], job->crop[2], job->crop[3] );
hb_log( " + grayscale %s", job->grayscale ? "on" : "off" );
-
+
if ( job->vfr )
{
job->vrate_base = 900900;
@@ -324,6 +324,9 @@
"faac" : ( ( job->acodec & HB_ACODEC_LAME ) ? "lame" :
"vorbis" ) );
}
+
+ if ( job->dynamic_range_compression )
+ hb_log(" + normalizing audio at strength: %f", job->dynamic_range_compression);
/* if we are doing AC3 passthru, then remove any non-AC3 audios from the job */
/* otherwise, Bad Things will happen */
Index: libhb/common.h
===================================================================
--- libhb/common.h (revision 1091)
+++ libhb/common.h (working copy)
@@ -236,6 +236,7 @@
int acodec;
int abitrate;
int arate;
+ float dynamic_range_compression;
/* Subtitle settings:
subtitle: index in hb_title_t's subtitles list, starting
However, I'm doubtful it solves all of HB's audio level issues.
It's been suggested we just apply a hammer, and let the user set the level range directly. However, after reading through the a52Decoder source, it really seems like for 16-bit PCM (which I believe is what we output from liba52), you should always use a level range of -1 to 1 and a bias of 384. We achieve this, through some float->integer->float conversion I don't quite understand, by sending liba52 a level range of -32768.0 to 32768.0 and a bias of 0...which, somehow, I think, ends up being equivalent.
I'm noticing that when downmixing a 5.1 track, I end up with a lower volume than if I directly encode a 2.0 track. This is making me wonder if the problem is with the downmixing code. Last spring, gbooker patched his copy of liba52 to fix how bias works with downmixing: http://trac.cod3r.com/a52codec/changeset/49
The only other thing I can think of is that it's because we don't send the A52_ADJUST_LEVEL flag, which supposedly corrects things so the given level range is the one used after downmixing, instead of before...but when I tried adding that, I didn't really hear a difference.
Links:
* doom9 guide to ac3:
http://forum.doom9.org/showthread.php?s=&threadid=56020
* Using the liba52 API:
http://trac.cod3r.com/a52codec/browser/ ... liba52.txt
* Implementation of dynamic range compression in a52decoder:
http://trac.cod3r.com/a52codec/changeset/40
* doom9 discussion of dynamic range compression:
http://forum.doom9.org/showthread.php?t=104686&page=7
* sample liba52 interface code:
http://trac.cod3r.com/a52codec/browser/ ... ecoder.cpp