'[Datatype-cvs] mp4/payload latmpacketizer.cpp,1.1.8.3,1.1.8.4'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       helix-datatype-cvs
Subject:    [Datatype-cvs] mp4/payload latmpacketizer.cpp,1.1.8.3,1.1.8.4
From:       jgordon () helixcommunity ! org
Date:       2007-10-30 10:48:03
Message-ID: 200710301048.l9UAme4m006755 () mailer ! progressive-comp ! com
[Download RAW message or body]

Update of /cvsroot/datatype/mp4/payload
In directory cvs01.internal.helixcommunity.org:/tmp/cvs-serv29127/payload

Modified Files:
      Tag: SERVER_BORABORA
	latmpacketizer.cpp 
Log Message:
Synopsis
========
Fixes PRs 206360 and 206364 - incorrect rate used in rtpmap and poor
playback on some clients

Branches: SERVER_BORABORA, HEAD (SERVER_CURRENT)
Reviewer: milko


Description
===========
mp4a-latm packetization spec requires that the RTP time stamp scale must
either match the audio sampling rate or else must be 90khz. When SBR
data is present, the time scale needs to match the base sampling rate
only.

In the mp4a-latm packetizer, we were simply using the sample rate
supplied by the file format as the time scale. This sample rate however
is not the audio sampling rate, but is the mp4 sample rate (the time
scale used within the mp4 container samples). While most (maybe all)
encoders use the audio sampling rate for this purpose, in case of
SBR data present, they use the full sampling rate *with* SBR data. The
time scale for RTP time stamps however needs to match only the base
sampling rate, without SBR. Some clients cannot handle this properly
and assume that the RTP time scale will be the base audio sampling rate.

We cannot simply convert from the supplied MP4 sampling rate assuming
that it is the AAC + SBR rate, as there is no such guarantee - it could
really be anything.

So the packetizer must parse the AudioSpecificConfig in order to
determine the base audio sampling rate and convert the RTP time stamps
to the new rate.

This change updates the packetizer to use CAudioSpecificConfig (the full
parser) since we now need to parse the full config in order to determine
all these various necessary values and also updates CAudioSpecificConfig
to add one missing necessary method. It determines the core
AudioSamplingRate from this structure, sets that as the RTP time scale
(SamplesPerSecond), and converts input time stamps to the appropriate
rate to determine the output time stamp.

Files Affected
==============
datatype/mp4/common/gaConfig.cpp
datatype/mp4/common/pub/gaConfig.h
datatype/mp4/payload/latmpacketizer.cpp
datatype/mp4/payload/pub/latmpacketizer.h


Testing Performed
=================
Unit Tests:

Integration Tests:
- Verified core sampling rate advertised in the rtpmap as the time scale
- Verified time stamping matches this rate
- Verified proper playback with RealPlayer w/ sdpgen
- Verified proper playback with RealPlayer w/ pure RTSP
- Verified proper playback with QuickTime player w/ sdpgen

Leak Tests:

Performance Tests:

Platforms Tested: win32-i386-vc7
Build verified: sunos-5.10-sparc-server, win32-i386-vc7


QA Hints
===============
I do not currently have access to the clients that have playback issues,
so cannot verify that 206364 is truly fixed with this change.



Index: latmpacketizer.cpp
===================================================================
RCS file: /cvsroot/datatype/mp4/payload/latmpacketizer.cpp,v
retrieving revision 1.1.8.3
retrieving revision 1.1.8.4
diff -u -d -r1.1.8.3 -r1.1.8.4

--- latmpacketizer.cpp	15 Aug 2007 19:51:00 -0000	1.1.8.3
+++ latmpacketizer.cpp	30 Oct 2007 10:48:00 -0000	1.1.8.4
@@ -46,9 +46,11 @@
 #include "sdptools.h"
 #include "mp4desc.h"
 #include "rtptypes.h"
+#include "tsconvrt.h"
 
 /* config parsing */
-#include "mp4a-mux-cfg.h"
+#include "bitstream.h"
+#include "gaConfig.h"
 
 #include "latmpacketizer.h"
 
@@ -58,7 +60,7 @@
 const char* MP4A_LICENSE_ERROR_STR = 
     "Packetizer: This Server is not licenced to use audio/MP4A-LATM Packetizer.";
 
-LATMPacketizer::LATMPacketizer(UINT32 ulChannels, UINT32 ulSampleRate) :
+LATMPacketizer::LATMPacketizer(UINT32 ulChannels, UINT32 ulTimeScale) :
     m_lRefCount(0),
     m_pClassFactory(NULL),
     m_pStreamHeader(NULL),
@@ -66,7 +68,9 @@
     m_bRTPPacketTested(FALSE),
     m_bFlushed(FALSE),
     m_ulChannels(ulChannels),
-    m_ulSampleRate(ulSampleRate)
+    m_ulInputTimeScale(ulTimeScale),
+    m_ulOutTimeScale(ulTimeScale),
+    m_pTSConverter(NULL)
 {
 }
 
@@ -80,6 +84,7 @@
 
     HX_RELEASE(m_pClassFactory);
     HX_RELEASE(m_pStreamHeader);
+    HX_DELETE(m_pTSConverter);
 }
 
 STDMETHODIMP 
@@ -157,6 +162,7 @@
 
     HX_RELEASE(m_pClassFactory);
     HX_RELEASE(m_pStreamHeader);
+    HX_DELETE(m_pTSConverter);
 
     m_bFlushed = FALSE;
     m_bUsesRTPPackets = FALSE;
@@ -188,13 +194,23 @@
     m_pStreamHeader = pHeader;
     m_pStreamHeader->AddRef();
 
-    HX_RESULT res;
-    res = AddHeaderMimeType();
-    if(SUCCEEDED(res))
+    HX_RESULT res = GetHeaderProperties();
+    
+    if (SUCCEEDED(res))
+    {
+        res = AddHeaderMimeType();
+    }
+
+    if (SUCCEEDED(res))
     {
         res = AddHeaderSDPData();
     }
 
+    if (SUCCEEDED(res))
+    {
+        res = InitializeResampling();
+    }
+
     return res;
 }
 
@@ -227,6 +243,8 @@
             (void**) &pRTPPacket)))
         {
             m_bUsesRTPPackets = TRUE;
+            m_pTSConverter->SetOffset(pRTPPacket->GetRTPTime(), FALSE);
+
             pRTPPacket->Release();
         }
 
@@ -293,8 +311,12 @@
                 (void**)&pPacket);
         if(SUCCEEDED(res))
         {
+            UINT32 ulRTPTimeIn = ((IHXRTPPacket*)pInPacket)->GetRTPTime();
+            UINT32 ulRTPTimeOut = m_pTSConverter ? 
+                m_pTSConverter->Convert(ulRTPTimeIn) : ulRTPTimeIn;
+
             res = ((IHXRTPPacket*)pPacket)->SetRTP(pPayloadBuf,
-                    ulTime, ((IHXRTPPacket*)pInPacket)->GetRTPTime(), 
+                    ulTime, ulRTPTimeOut,
                     unStreamNumber, unASMFlags, unASMRuleNumber);
         }
     }
@@ -341,6 +363,28 @@
     *pHeader = (UINT8) ulSampleSize;
 }
 
+HX_RESULT
+LATMPacketizer::GetHeaderProperties()
+{
+    // If we weren't passed the number of channels and sample rate,
+    // see if they are in the header
+    if (m_ulInputTimeScale == 0)
+    {
+        m_pStreamHeader->GetPropertyULONG32("SamplesPerSecond", m_ulInputTimeScale);
+    }
+    
+    if (FAILED(m_pStreamHeader->GetPropertyULONG32("Channels", m_ulChannels)))
+    {
+        // Add Channels if it was passed to us but is not in the header
+        if(m_ulChannels != 0)
+        {
+            m_pStreamHeader->SetPropertyULONG32("Channels", m_ulChannels);
+        }
+    }
+
+    return HXR_OK;
+}
+
 HX_RESULT 
 LATMPacketizer::AddHeaderMimeType()
 {
@@ -378,29 +422,7 @@
     UINT32 ulRTPPayloadType;
     const char* pOldSDPBuf = NULL;
     UINT32 ulOldSDPLen;
-    ULONG32 ulChannels = 0;
-
-    // If we weren't passed the number of channels and sample rate,
-    // see if they are in the header
-    if (FAILED(m_pStreamHeader->GetPropertyULONG32("Channels", 
-        ulChannels)))
-    {
-        if(m_ulChannels != 0)
-        {
-            m_pStreamHeader->SetPropertyULONG32("Channels", m_ulChannels);
-        }
-    }
-    else
-    {
-        m_ulChannels = ulChannels;
-    }
-
-    if (m_ulSampleRate == 0 && 
-        FAILED(m_pStreamHeader->GetPropertyULONG32("SamplesPerSecond", 
-        m_ulSampleRate)))
-    {
-        m_ulSampleRate = 0;
-    }
+    CAudioSpecificConfig audioConfig;
 
     // Get the decoder config info
     res = m_pStreamHeader->GetPropertyBuffer("OpaqueData", pOpaque);
@@ -412,7 +434,7 @@
         res = ESDesc.Unpack(pESDescData, ulESDescSize);
         pOpaque->Release();
     }
-    if(SUCCEEDED(res))
+    if (SUCCEEDED(res))
     {
         DecoderConfigDescriptor* pDCDesc = ESDesc.m_pDecConfigDescr;
         if (pDCDesc && pDCDesc->m_pDecSpecificInfo)
@@ -425,9 +447,39 @@
             res = HXR_INVALID_PARAMETER;
         }
     }
+    if (SUCCEEDED(res))
+    {
+        // parse the audio specific config
+        BITSTREAM* pBS;
+        int nRes = newBitstream(&pBS, ulConfigSize*8);
+        if (nRes == 0)
+        {
+            nRes = feedBitstream(pBS, pConfig, ulConfigSize*8);
+        }
+        if (nRes == 0)
+        {
+            nRes = setAtBitstream(pBS, 0, 1);
+        }
+        if (nRes == 0)
+        {
+            res = audioConfig.Read(*pBS);
+        }
 
-    if(SUCCEEDED(res))
+        if (pBS)
+        {
+            deleteBitstream(pBS);
+        }
+        if (nRes != 0)
+        {
+            res = HXR_FAIL;
+        }
+    }
+    if (SUCCEEDED(res))
     {
+        // Set the output time scale to the core audio sample rate 
+        // (not the extended rate!)
+        m_ulOutTimeScale = audioConfig.GetCoreSampleRate();
+
         // Look for any existing SDP data
         // I don't think this should ever really happen
         if(SUCCEEDED(m_pStreamHeader->GetPropertyCString("SDPData", pOldSDP)))
@@ -436,18 +488,15 @@
             ulOldSDPLen = pOldSDP->GetSize();
             ulSDPBufSize += ulOldSDPLen;
         }
-    }
 
-    // Create SDP data Buffer
-    if(SUCCEEDED(res))
-    {
+        // Create SDP data Buffer
         res = m_pClassFactory->CreateInstance(IID_IHXBuffer, 
                 (void**)(&pSDPData));
-        if(SUCCEEDED(res))
-        {
-            res = pSDPData->SetSize(ulSDPBufSize);
-            pSDPBuf = (char*)pSDPData->GetBuffer();
-        }
+    }
+    if (SUCCEEDED(res))
+    {
+        res = pSDPData->SetSize(ulSDPBufSize);
+        pSDPBuf = (char*)pSDPData->GetBuffer();
     }
 
     if(SUCCEEDED(res))
@@ -467,11 +516,13 @@
         {
             ulRTPPayloadType = RTP_PAYLOAD_RTSP;
         }
+    }
 
+    if (SUCCEEDED(res))
+    {
         // Write the a=fmtp data to the sdp
-        int nSize = WriteFMTP(pSDPBuf, ulSDPBufSize, pConfig, 
-                              ulConfigSize, ulRTPPayloadType, 
-                              m_ulChannels, m_ulSampleRate);
+        int nSize = WriteFMTP(pSDPBuf, ulSDPBufSize, pConfig, ulConfigSize, 
+                               ulRTPPayloadType, audioConfig);
 
         // Set the buffer size to the actual size
         if (nSize > 0)
@@ -483,10 +534,10 @@
             res = HXR_FAIL;
         }
     }
-
-    // Add the SDPData to the stream header
+ 
     if(SUCCEEDED(res))
     {
+        // Add the SDPData to the stream header
         res = m_pStreamHeader->SetPropertyCString("SDPData", pSDPData);
     }
 
@@ -496,23 +547,36 @@
     return res;
 }
 
-int
-LATMPacketizer::WriteFMTP(char* pBuf, UINT32 ulSize, UINT8* pConfig, 
-                          UINT32 ulConfigSize, UINT32 ulRTPPayloadType,
-                          UINT32 ulChannels, UINT32 ulSampleRate)
+HX_RESULT
+LATMPacketizer::InitializeResampling()
 {
-    if(ulConfigSize == 0)
+    // If the output RTP time scale differs from the input scale, initialize
+    // a timestamp converter
+    if (m_ulInputTimeScale != m_ulOutTimeScale)
     {
-        return -1;
+        m_pTSConverter = new CTSConverter(m_ulInputTimeScale, m_ulOutTimeScale);
     }
 
-    UINT8 uAudioObjectType = ((pConfig[0] >> 3) & 0x1F);
+    // Make sure the rate is set in the stream header
+    return m_pStreamHeader->SetPropertyULONG32("SamplesPerSecond", m_ulOutTimeScale);
+}
+
+int
+LATMPacketizer::WriteFMTP(char* pBuf, UINT32 ulSize, UINT8* pConfig, 
+                          UINT32 ulConfigSize, UINT32 ulRTPPayloadType,
+                          CAudioSpecificConfig& audioConfig)
+{
+    UINT32 ulAudioObjectType = audioConfig.GetObjectType();
+    UINT32 ulSampleRate = audioConfig.GetSampleRate();
+    UINT32 ulChannels = audioConfig.GetNChannels();
+    UINT32 ulBaseConfigSize = audioConfig.GetCoreConfigSize() >> 3;
+    HXBOOL bSBR = audioConfig.GetIsSBR();
 
     // 3GPP requires profile-level-id 15 for AAC-LC (object 2)
     // and AAC-LTP (object 4) streams fitting level 15 criteria
     // so check if it fits and set the level
     UINT32 ulProfileLevelId = 
-            (uAudioObjectType == 2 || uAudioObjectType == 4) &&
+            (ulAudioObjectType == 2 || ulAudioObjectType == 4) &&
             (ulChannels == 1 || ulChannels == 2) && 
             (ulSampleRate > 0 && ulSampleRate <= 48000) 
             ? 15 : 0;
@@ -520,7 +584,7 @@
     char* pWriter = pBuf;
     int nWritten = SafeSprintf(pWriter, ulSize, 
         "a=fmtp:%u object=%u; cpresent=0;",
-        ulRTPPayloadType, uAudioObjectType);
+        ulRTPPayloadType, ulAudioObjectType);
 
     // Write the profile-level-id if known and start the config
     if (nWritten >= 0)
@@ -540,11 +604,6 @@
         }
     }
 
-    // check for extensionAudioObjectType(if present then convey SBR present flag)
-    HXBOOL bSBR = FALSE;
-    MP4AAudioSpec mp4AAudioSpec;
-    mp4AAudioSpec.SBRPresent(pConfig, ulConfigSize, bSBR);
-    ULONG32 ulBaseConfigSize = mp4AAudioSpec.GetBaseConfigSize();
     // calculate and write the config string
     if (nWritten >= 0)
     {
@@ -554,12 +613,15 @@
         nWritten = FormatStreamMuxConfig(pWriter, ulSize, pConfig,
             ulBaseConfigSize, 1);
     }
-    if(bSBR)
+
+    // Add SBR-enabled flag
+    if(nWritten >= 0 && audioConfig.GetIsSBR())
     {
         pWriter += nWritten;
         ulSize -= nWritten;
         nWritten = SafeSprintf(pWriter, ulSize, "; SBR-enabled=1");
     }
+
     // terminate the line
     if (nWritten >= 0)
     {


_______________________________________________
Datatype-cvs mailing list
Datatype-cvs@helixcommunity.org
http://lists.helixcommunity.org/mailman/listinfo/datatype-cvs
[prev in list] [next in list] [prev in thread] [next in thread]