From 2d2db3b2088d21ba13e13625478ccbfbf00445b8 Mon Sep 17 00:00:00 2001
From: hrimfaxi <outmatch@gmail.com>
Date: Sat, 18 Jul 2020 20:42:29 +0800
Subject: [PATCH] Use libswresample to convert 32-bit and float audio into s16

Since libao+pulseaudio cannot play 32-bit or flt/fltp/dbl/dblp audio,
the following audio formats are passed through libswresample
to convert into AV_SAMPLE_FMT_S16, which is accepted by libao:

* AV_SAMPLE_FMT_S32
* AV_SAMPLE_FMT_S32P
* AV_SAMPLE_FMT_FLT
* AV_SAMPLE_FMT_FLTP
* AV_SAMPLE_FMT_DBL
* AV_SAMPLE_FMT_DBLP

This fixes issue #949 and issue #1014. Now FFmpeg+libao internal player
can play with pulseaudio backend enabled in /etc/libao.conf .

Signed-off-by: hrimfaxi <outmatch@gmail.com>
---
 ffmpegaudio.cc | 111 ++++++++++++++++++++++---------------------------
 goldendict.pro |   5 ++-
 2 files changed, 53 insertions(+), 63 deletions(-)
diff --git a/ffmpegaudio.cc b/ffmpegaudio.cc
index 415fc79c..d550f4a7 100644
--- a/ffmpegaudio.cc
+++ b/ffmpegaudio.cc
@@ -19,6 +19,7 @@ extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavutil/avutil.h>
+#include "libswresample/swresample.h"
 }
 
 #include <QString>
@@ -100,6 +101,8 @@ struct DecoderContext
   ao_device * aoDevice_;
   bool avformatOpened_;
 
+  SwrContext *swr_;
+
   DecoderContext( QByteArray const & audioData, QAtomicInt & isCancelled );
   ~DecoderContext();
 
@@ -122,7 +125,8 @@ DecoderContext::DecoderContext( QByteArray const & audioData, QAtomicInt & isCan
   avioContext_( NULL ),
   audioStream_( NULL ),
   aoDevice_( NULL ),
-  avformatOpened_( false )
+  avformatOpened_( false ),
+  swr_( NULL )
 {
 }
 
@@ -243,11 +247,36 @@ bool DecoderContext::openCodec( QString & errorString )
 
   av_log( NULL, AV_LOG_INFO, "Codec open: %s: channels: %d, rate: %d, format: %s\n", codec_->long_name,
           codecContext_->channels, codecContext_->sample_rate, av_get_sample_fmt_name( codecContext_->sample_fmt ) );
+
+  if ( codecContext_->sample_fmt == AV_SAMPLE_FMT_S32  ||
+       codecContext_->sample_fmt == AV_SAMPLE_FMT_S32P ||
+       codecContext_->sample_fmt == AV_SAMPLE_FMT_FLT  ||
+       codecContext_->sample_fmt == AV_SAMPLE_FMT_FLTP ||
+       codecContext_->sample_fmt == AV_SAMPLE_FMT_DBL  ||
+       codecContext_->sample_fmt == AV_SAMPLE_FMT_DBLP )
+  {
+    swr_ = swr_alloc_set_opts( NULL,
+        codecContext_->channel_layout,
+        AV_SAMPLE_FMT_S16,
+        codecContext_->sample_rate,
+        codecContext_->channel_layout,
+        codecContext_->sample_fmt,
+        codecContext_->sample_rate,
+        0,
+        NULL );
+    swr_init( swr_ );
+  }
+
   return true;
 }
 
 void DecoderContext::closeCodec()
 {
+  if ( swr_ )
+  {
+    swr_free( &swr_ );
+  }
+
   if ( !formatContext_ )
   {
     if ( avioContext_ )
@@ -306,11 +335,12 @@ bool DecoderContext::openOutputDevice( QString & errorString )
   }
 
   ao_sample_format aoSampleFormat;
+  memset (&aoSampleFormat, 0, sizeof(aoSampleFormat) );
   aoSampleFormat.channels = codecContext_->channels;
   aoSampleFormat.rate = codecContext_->sample_rate;
   aoSampleFormat.byte_format = AO_FMT_NATIVE;
   aoSampleFormat.matrix = 0;
-  aoSampleFormat.bits = qMin( 32, av_get_bytes_per_sample( codecContext_->sample_fmt ) << 3 );
+  aoSampleFormat.bits = qMin( 16, av_get_bytes_per_sample( codecContext_->sample_fmt ) << 3 );
 
   if ( aoSampleFormat.bits == 0 )
   {
@@ -484,34 +514,11 @@ bool DecoderContext::normalizeAudio( AVFrame * frame, vector<char> & samples )
   {
     case AV_SAMPLE_FMT_U8:
     case AV_SAMPLE_FMT_S16:
-    case AV_SAMPLE_FMT_S32:
     {
       samples.resize( dataSize );
       memcpy( &samples.front(), frame->data[0], lineSize );
     }
     break;
-    case AV_SAMPLE_FMT_FLT:
-    {
-      samples.resize( dataSize );
-
-      int32_t * out = ( int32_t * )&samples.front();
-      for ( int i = 0; i < dataSize; i += sizeof( float ) )
-      {
-        *out++ = toInt32( *( float * )frame->data[i] );
-      }
-    }
-    break;
-    case AV_SAMPLE_FMT_DBL:
-    {
-      samples.resize( dataSize / 2 );
-
-      int32_t * out = ( int32_t * )&samples.front();
-      for ( int i = 0; i < dataSize; i += sizeof( double ) )
-      {
-        *out++ = toInt32( *( double * )frame->data[i] );
-      }
-    }
-    break;
     // Planar
     case AV_SAMPLE_FMT_U8P:
     {
@@ -541,48 +548,28 @@ bool DecoderContext::normalizeAudio( AVFrame * frame, vector<char> & samples )
       }
     }
     break;
+    case AV_SAMPLE_FMT_S32:
+    /* Pass through */
     case AV_SAMPLE_FMT_S32P:
-    {
-      samples.resize( dataSize );
-
-      int32_t * out = ( int32_t * )&samples.front();
-      for ( int i = 0; i < frame->nb_samples; i++ )
-      {
-        for ( int ch = 0; ch < codecContext_->channels; ch++ )
-        {
-          *out++ = ( ( int32_t * )frame->extended_data[ch] )[i];
-        }
-      }
-    }
-    break;
+    /* Pass through */
+    case AV_SAMPLE_FMT_FLT:
+    /* Pass through */
     case AV_SAMPLE_FMT_FLTP:
-    {
-      samples.resize( dataSize );
-
-      float ** data = ( float ** )frame->extended_data;
-      int32_t * out = ( int32_t * )&samples.front();
-      for ( int i = 0; i < frame->nb_samples; i++ )
-      {
-        for ( int ch = 0; ch < codecContext_->channels; ch++ )
-        {
-          *out++ = toInt32( data[ch][i] );
-        }
-      }
-    }
-    break;
-    case AV_SAMPLE_FMT_DBLP:
+    /* Pass through */
     {
       samples.resize( dataSize / 2 );
 
-      double ** data = ( double ** )frame->extended_data;
-      int32_t * out = ( int32_t * )&samples.front();
-      for ( int i = 0; i < frame->nb_samples; i++ )
-      {
-        for ( int ch = 0; ch < codecContext_->channels; ch++ )
-        {
-          *out++ = toInt32( data[ch][i] );
-        }
-      }
+      uint8_t *out = ( uint8_t * )&samples.front();
+      swr_convert( swr_, &out, frame->nb_samples, (const uint8_t**)frame->extended_data, frame->nb_samples );
+    }
+    break;
+    case AV_SAMPLE_FMT_DBL:
+    case AV_SAMPLE_FMT_DBLP:
+    {
+      samples.resize( dataSize / 4 );
+
+      uint8_t *out = ( uint8_t * )&samples.front();
+      swr_convert( swr_, &out, frame->nb_samples, (const uint8_t**)frame->extended_data, frame->nb_samples );
     }
     break;
     default:
diff --git a/goldendict.pro b/goldendict.pro
index 5d304e76..f3f43fc9 100644
--- a/goldendict.pro
+++ b/goldendict.pro
@@ -108,6 +108,7 @@ win32 {
         -logg
     !CONFIG( no_ffmpeg_player ) {
         LIBS += -lao \
+            -lswresample-gd \
             -lavutil-gd \
             -lavformat-gd \
             -lavcodec-gd
@@ -156,7 +157,8 @@ unix:!mac {
         PKGCONFIG += ao \
             libavutil \
             libavformat \
-            libavcodec
+            libavcodec \
+            libswresample \
     }
     arm {
         LIBS += -liconv
@@ -210,6 +212,7 @@ mac {
         -llzo2
     !CONFIG( no_ffmpeg_player ) {
         LIBS += -lao \
+            -lswresample-gd \
             -lavutil-gd \
             -lavformat-gd \
             -lavcodec-gd