1. decoder is isolated from aah_decoder_pump to serve both audio playback and audio analyzer 2. In transmitter, aah_audio_processor reads audio decoder output, downsamples to 8 bits mono pcm, calculates fft and feeds to aah_audio_algorithm. Beat detection is based on selecting significant amplitude change on sub bands of frequency domain. Beat sequence number is increased if there is a significant change, all visualizers will switch to same scene based on the beat sequence number. 3. A new TRTPMetaDataPacket is added for carrying general metadata information from transmitter to receiver. The packet is sent every 250ms for beat information. 4. Audio/Video synchronization: every beat information from transmitter carries a media timestamp, receiver performs media time to java elapsedRealtime transformation then send to java process. 5. created binder service IAAHMetaDataService, it broadcast beat information to all IAAHMetaDataClient. Modified service_manager.c "allow table" to allow mediaserver register IAAHMetaDataService. 6. Added a java static library to access IAAHMetaDataService. Jni part wraps access to native IAAHMetaDataClient. Robustness: both IAAHMetaDataService side and IAAHMetaDataClient sides can detect binder death of the other side, and perform cleanup and recovery step for a new connection. Change-Id: Iaad2a9d9d3aa3990fb796efe59a93bf4efc81b32
292 lines
9.7 KiB
C++
292 lines
9.7 KiB
C++
/*
|
|
* Copyright (C) 2012 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#define LOG_TAG "LibAAH_RTP"
|
|
//#define LOG_NDEBUG 0
|
|
#include <utils/Log.h>
|
|
|
|
#include <math.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "aah_audio_algorithm.h"
|
|
|
|
// #define DEBUG_BEAT_VALUE
|
|
|
|
namespace android {
|
|
|
|
// magic number, the bar should set higher if kBands is bigger
|
|
const float BeatDetectionAlgorithm::kThreshHold = 8;
|
|
const float BeatDetectionAlgorithm::kSumThreshold = 250;
|
|
|
|
|
|
// back trace time 1s
|
|
const float BeatDetectionAlgorithm::kBacktraceTime = 1;
|
|
|
|
// we must wait 1 second before switch generate a new sequence number, this is
|
|
// to prevent visualizer switches too much
|
|
const int64_t BeatDetectionAlgorithm::kBeatInterval = 1000000;
|
|
|
|
const float BeatDetectionAlgorithm::kMaxBeatValue = 100000;
|
|
|
|
// how many beat information will be cached before send out? We group beats
|
|
// in one packet to reduce the cost of sending too much packets. The time
|
|
// should be shorter than kAAHBufferTimeUs defined in TxPlayer
|
|
// The extra latency is introduced by fft, beat algorithm, time transform,
|
|
// binder service latency, jni latency, etc. If all these extra latency
|
|
// add up too much, then kAAHBufferTimeUs must be increased
|
|
const int32_t BeatDetectionAlgorithm::kAAHBeatInfoBufferTimeMS = 250;
|
|
|
|
// each thread holds a random data structure
|
|
static __thread unsigned short sRandData[3];
|
|
static __thread bool sRandDataInitialized = false;
|
|
|
|
static inline float normalizeBeatValue(float scale, float threshold) {
|
|
if (scale < 1) {
|
|
return 1;
|
|
} else if (scale > threshold) {
|
|
return threshold;
|
|
}
|
|
return scale;
|
|
}
|
|
|
|
BeatDetectionAlgorithm::BeatDetectionAlgorithm()
|
|
: mSamplesPerSegment(0),
|
|
mSegments(0),
|
|
mEnergyTrain(NULL),
|
|
mBeatTrain(NULL) {
|
|
if (!sRandDataInitialized) {
|
|
seed48(sRandData);
|
|
sRandDataInitialized = true;
|
|
}
|
|
mBeatSequenceNumber = nrand48(sRandData);
|
|
}
|
|
|
|
BeatDetectionAlgorithm::~BeatDetectionAlgorithm() {
|
|
cleanup();
|
|
}
|
|
|
|
bool BeatDetectionAlgorithm::initialize(uint32_t samples_per_seg,
|
|
uint32_t sample_rates) {
|
|
LOGV("initialize algorithm samples_per_seg %d sample_rates %d",
|
|
samples_per_seg, sample_rates);
|
|
uint32_t segments = (uint32_t)(
|
|
sample_rates / samples_per_seg * kBacktraceTime);
|
|
if (mSamplesPerSegment == samples_per_seg && mSegments == segments) {
|
|
return true;
|
|
}
|
|
uint32_t samplesPerBand = samples_per_seg / kBands;
|
|
if (samplesPerBand * kBands != samples_per_seg) {
|
|
LOGE("%s samples per segment not divided evenly by bands",
|
|
__PRETTY_FUNCTION__);
|
|
return false;
|
|
}
|
|
if (samplesPerBand & 1) {
|
|
LOGE("%s each band must contain even number of samples",
|
|
__PRETTY_FUNCTION__);
|
|
return false;
|
|
}
|
|
cleanup();
|
|
mSamplesPerSegment = samples_per_seg;
|
|
mSegments = segments;
|
|
mSamplesPerBand = samplesPerBand;
|
|
mTrainMatrixSize = kSearchBands * mSegments;
|
|
mEnergyTrain = new uint64_t[mTrainMatrixSize];
|
|
mBeatTrain = new float[mTrainMatrixSize];
|
|
if (!mEnergyTrain || !mBeatTrain) {
|
|
LOGE("%s failed allocating memory", __PRETTY_FUNCTION__);
|
|
return false;
|
|
}
|
|
flush();
|
|
return true;
|
|
}
|
|
|
|
void BeatDetectionAlgorithm::process(int64_t ts, int32_t* fft,
|
|
uint32_t samples_per_seg) {
|
|
CHECK(samples_per_seg == mSamplesPerSegment);
|
|
if (mSegments == 0) {
|
|
return;
|
|
}
|
|
// access fft array as 16bits
|
|
int16_t* segmentFt = (int16_t*)fft;
|
|
float maxNewEnergyScale = 0;
|
|
int maxBeatIdx = -1;
|
|
float sum = 0;
|
|
for (int i = 0, trainIndexForBand = 0;
|
|
i < kBandEnd - kBandStart;
|
|
i++, trainIndexForBand += mSegments) {
|
|
uint64_t newEnergy = 0;
|
|
// mSamplesPerBand is already validated to be even in initialize()
|
|
uint32_t startSample = (kBandStart + i) * mSamplesPerBand;
|
|
for (uint32_t li = startSample;
|
|
li < startSample + mSamplesPerBand;
|
|
li += 2) {
|
|
uint64_t amplitude = (int32_t)segmentFt[li] * (int32_t)segmentFt[li]
|
|
+ (int32_t)segmentFt[li + 1] * (int32_t)segmentFt[li + 1];
|
|
newEnergy += amplitude;
|
|
}
|
|
newEnergy = newEnergy / (mSamplesPerBand >> 1);
|
|
if (mEnergyTrainFilled) {
|
|
// update beat train
|
|
float newEnergyScale = (float) newEnergy
|
|
/ ((double) mEnergyTrainSum[i] / (double) mSegments);
|
|
mBeatTrain[trainIndexForBand + mBeatTrainIdx] = newEnergyScale;
|
|
if (isnan(newEnergyScale) || isinf(newEnergyScale)
|
|
|| newEnergyScale > maxNewEnergyScale) {
|
|
maxNewEnergyScale = newEnergyScale;
|
|
maxBeatIdx = i;
|
|
}
|
|
if (newEnergyScale > kThreshHold) {
|
|
sum += newEnergyScale;
|
|
}
|
|
}
|
|
|
|
// Update the energy train and energy sum
|
|
mEnergyTrainSum[i] -= mEnergyTrain[trainIndexForBand + mEnergyTrainIdx];
|
|
mEnergyTrain[trainIndexForBand + mEnergyTrainIdx] = newEnergy;
|
|
mEnergyTrainSum[i] += mEnergyTrain[trainIndexForBand + mEnergyTrainIdx];
|
|
|
|
}
|
|
if (isnan(maxNewEnergyScale) || isinf(maxNewEnergyScale)
|
|
|| maxNewEnergyScale > kMaxBeatValue) {
|
|
maxNewEnergyScale = kMaxBeatValue;
|
|
}
|
|
bool beat = false;
|
|
if (sum >= kSumThreshold /*&& maxNewEnergyScale > kThreshHold*/
|
|
&& (mBeatLastTs == -1 || (ts - mBeatLastTs) > kBeatInterval)) {
|
|
mBeatLastTs = ts;
|
|
mBeatSequenceNumber++;
|
|
beat = true;
|
|
LOGV("BEAT!!!! %d %f", mBeatSequenceNumber, maxNewEnergyScale);
|
|
}
|
|
mBeatValue = maxNewEnergyScale;
|
|
mBeatValueSmoothed = mBeatValueSmoothed * 0.7
|
|
+ normalizeBeatValue(mBeatValue, 30) * 0.3;
|
|
AudioBeatInfo beatInfo(ts, mBeatValue, mBeatValueSmoothed,
|
|
mBeatSequenceNumber);
|
|
// allowing overwrite existing item in the queue if we didn't send out
|
|
// data in time: lost beats is very unlikely to happen
|
|
mBeatInfoQueue.writeAllowOverflow(beatInfo);
|
|
|
|
#ifdef DEBUG_BEAT_VALUE
|
|
char debugstr[256];
|
|
uint32_t i;
|
|
for (i = 0; i < mBeatValue && i < sizeof(debugstr) - 1; i++) {
|
|
debugstr[i] = beat ? 'B' : '*';
|
|
}
|
|
debugstr[i] = 0;
|
|
LOGD("%lld %lld %f %f %s", mBeatLastTs, ts, mBeatValue, sum, debugstr);
|
|
#endif
|
|
|
|
mEnergyTrainIdx = mEnergyTrainIdx + 1;
|
|
if (mEnergyTrainIdx == mSegments) {
|
|
mEnergyTrainIdx = 0;
|
|
mEnergyTrainFilled = true;
|
|
}
|
|
|
|
if (mEnergyTrainFilled) {
|
|
mBeatTrainIdx = mBeatTrainIdx + 1;
|
|
if (mBeatTrainIdx == mSegments) {
|
|
mBeatTrainIdx = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void BeatDetectionAlgorithm::cleanup() {
|
|
if (mEnergyTrain) {
|
|
delete mEnergyTrain;
|
|
mEnergyTrain = NULL;
|
|
}
|
|
if (mBeatTrain) {
|
|
delete mBeatTrain;
|
|
mBeatTrain = NULL;
|
|
}
|
|
}
|
|
|
|
class TRTPMetaDataBeat : public TRTPMetaDataBlock {
|
|
public:
|
|
TRTPMetaDataBeat()
|
|
: TRTPMetaDataBlock(kMetaDataBeat, 0) {}
|
|
TRTPMetaDataBeat(uint16_t beats,
|
|
AudioBeatInfo* beatInfo)
|
|
: TRTPMetaDataBlock(kMetaDataBeat, calculateItemLength(beats))
|
|
, mCount(beats)
|
|
{
|
|
memcpy(&beatInfos, beatInfo, beats * sizeof(AudioBeatInfo) );
|
|
}
|
|
static inline uint32_t calculateItemLength(uint16_t beats) {
|
|
return 2 + BeatDetectionAlgorithm::kItemLength * beats;
|
|
}
|
|
virtual ~TRTPMetaDataBeat() {}
|
|
virtual void write(uint8_t*& buf) const;
|
|
uint16_t mCount;
|
|
struct AudioBeatInfo beatInfos[BeatDetectionAlgorithm::kBeatQueueLen];
|
|
};
|
|
|
|
void TRTPMetaDataBeat::write(uint8_t*& buf) const {
|
|
writeBlockHead(buf);
|
|
TRTPPacket::writeU16(buf, mCount);
|
|
for (uint16_t i = 0; i < mCount; i++) {
|
|
TRTPPacket::writeU64(buf, beatInfos[i].ts);
|
|
TRTPPacket::writeFloat(buf, beatInfos[i].beatValue);
|
|
TRTPPacket::writeFloat(buf, beatInfos[i].smoothedBeatValue);
|
|
TRTPPacket::writeU32(buf, beatInfos[i].sequenceNumber);
|
|
}
|
|
}
|
|
|
|
TRTPMetaDataBlock* BeatDetectionAlgorithm::collectMetaData(bool flushOut) {
|
|
AudioBeatInfo beatInfo[kBeatQueueLen];
|
|
uint32_t min_read;
|
|
if (flushOut) {
|
|
min_read = 0;
|
|
} else {
|
|
min_read = mSegments * kAAHBeatInfoBufferTimeMS / 1000;
|
|
if (min_read > kBeatQueueLen) {
|
|
min_read = kBeatQueueLen;
|
|
}
|
|
}
|
|
int beats = mBeatInfoQueue.readBulk(beatInfo, min_read,
|
|
kBeatQueueLen);
|
|
if (beats > 0) {
|
|
uint32_t privateSize = TRTPMetaDataBeat::calculateItemLength(beats);
|
|
if (privateSize > 0xffff) {
|
|
LOGE("metadata packet too big");
|
|
return NULL;
|
|
}
|
|
return new TRTPMetaDataBeat(beats, beatInfo);
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
void BeatDetectionAlgorithm::flush() {
|
|
if (mEnergyTrain == NULL || mBeatTrain == NULL) {
|
|
return;
|
|
}
|
|
mEnergyTrainIdx = 0;
|
|
mBeatTrainIdx = 0;
|
|
mEnergyTrainFilled = false;
|
|
mBeatValue = 0;
|
|
mBeatValueSmoothed = 0;
|
|
mBeatLastTs = -1;
|
|
|
|
memset(mEnergyTrain, 0, mTrainMatrixSize * sizeof(uint64_t));
|
|
// IEEE745: all zero bytes generates 0.0f
|
|
memset(mBeatTrain, 0, mTrainMatrixSize * sizeof(float));
|
|
memset(&mEnergyTrainSum, 0, sizeof(mEnergyTrainSum));
|
|
}
|
|
|
|
} // namespace android
|