Merge "Add pullTimeoutDurations to statsdstats" into rvc-dev

This commit is contained in:
Jeffrey Huang
2020-06-18 19:11:46 +00:00
committed by Android (Google) Code Review
7 changed files with 80 additions and 9 deletions

View File

@@ -44,7 +44,8 @@ StatsPuller::StatsPuller(const int tagId, const int64_t coolDownNs, const int64_
bool StatsPuller::Pull(const int64_t eventTimeNs, std::vector<std::shared_ptr<LogEvent>>* data) {
lock_guard<std::mutex> lock(mLock);
int64_t elapsedTimeNs = getElapsedRealtimeNs();
const int64_t elapsedTimeNs = getElapsedRealtimeNs();
const int64_t systemUptimeMillis = getSystemUptimeMillis();
StatsdStats::getInstance().notePull(mTagId);
const bool shouldUseCache =
(mLastEventTimeNs == eventTimeNs) || (elapsedTimeNs - mLastPullTimeNs < mCoolDownNs);
@@ -67,16 +68,18 @@ bool StatsPuller::Pull(const int64_t eventTimeNs, std::vector<std::shared_ptr<Lo
if (!mHasGoodData) {
return mHasGoodData;
}
const int64_t pullDurationNs = getElapsedRealtimeNs() - elapsedTimeNs;
StatsdStats::getInstance().notePullTime(mTagId, pullDurationNs);
const bool pullTimeOut = pullDurationNs > mPullTimeoutNs;
const int64_t pullElapsedDurationNs = getElapsedRealtimeNs() - elapsedTimeNs;
const int64_t pullSystemUptimeDurationMillis = getSystemUptimeMillis() - systemUptimeMillis;
StatsdStats::getInstance().notePullTime(mTagId, pullElapsedDurationNs);
const bool pullTimeOut = pullElapsedDurationNs > mPullTimeoutNs;
if (pullTimeOut) {
// Something went wrong. Discard the data.
mCachedData.clear();
mHasGoodData = false;
StatsdStats::getInstance().notePullTimeout(mTagId);
StatsdStats::getInstance().notePullTimeout(
mTagId, pullSystemUptimeDurationMillis, NanoToMillis(pullElapsedDurationNs));
ALOGW("Pull for atom %d exceeds timeout %lld nano seconds.", mTagId,
(long long)pullDurationNs);
(long long)pullElapsedDurationNs);
return mHasGoodData;
}

View File

@@ -38,6 +38,7 @@ using android::util::ProtoOutputStream;
using std::lock_guard;
using std::shared_ptr;
using std::string;
using std::to_string;
using std::vector;
const int FIELD_ID_BEGIN_TIME = 1;
@@ -436,9 +437,18 @@ void StatsdStats::notePullDataError(int pullAtomId) {
mPulledAtomStats[pullAtomId].dataError++;
}
void StatsdStats::notePullTimeout(int pullAtomId) {
void StatsdStats::notePullTimeout(int pullAtomId,
int64_t pullUptimeMillis,
int64_t pullElapsedMillis) {
lock_guard<std::mutex> lock(mLock);
mPulledAtomStats[pullAtomId].pullTimeout++;
PulledAtomStats& pulledAtomStats = mPulledAtomStats[pullAtomId];
pulledAtomStats.pullTimeout++;
if (pulledAtomStats.pullTimeoutMetadata.size() == kMaxTimestampCount) {
pulledAtomStats.pullTimeoutMetadata.pop_front();
}
pulledAtomStats.pullTimeoutMetadata.emplace_back(pullUptimeMillis, pullElapsedMillis);
}
void StatsdStats::notePullExceedMaxDelay(int pullAtomId) {
@@ -630,6 +640,7 @@ void StatsdStats::resetInternalLocked() {
pullStats.second.unregisteredCount = 0;
pullStats.second.atomErrorCount = 0;
pullStats.second.binderCallFailCount = 0;
pullStats.second.pullTimeoutMetadata.clear();
}
mAtomMetricStats.clear();
mActivationBroadcastGuardrailStats.clear();
@@ -786,6 +797,20 @@ void StatsdStats::dumpStats(int out) const {
pair.second.pullUidProviderNotFound, pair.second.pullerNotFound,
pair.second.registeredCount, pair.second.unregisteredCount,
pair.second.atomErrorCount);
if (pair.second.pullTimeoutMetadata.size() > 0) {
string uptimeMillis = "(pull timeout system uptime millis) ";
string pullTimeoutMillis = "(pull timeout elapsed time millis) ";
for (const auto& stats : pair.second.pullTimeoutMetadata) {
uptimeMillis.append(to_string(stats.pullTimeoutUptimeMillis)).append(",");;
pullTimeoutMillis.append(to_string(stats.pullTimeoutElapsedMillis)).append(",");
}
uptimeMillis.pop_back();
uptimeMillis.push_back('\n');
pullTimeoutMillis.pop_back();
pullTimeoutMillis.push_back('\n');
dprintf(out, "%s", uptimeMillis.c_str());
dprintf(out, "%s", pullTimeoutMillis.c_str());
}
}
if (mAnomalyAlarmRegisteredStats > 0) {

View File

@@ -352,7 +352,7 @@ public:
/*
* Records pull exceeds timeout for the puller.
*/
void notePullTimeout(int pullAtomId);
void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis);
/*
* Records pull exceeds max delay for a metric.
@@ -498,6 +498,14 @@ public:
*/
void dumpStats(int outFd) const;
typedef struct PullTimeoutMetadata {
int64_t pullTimeoutUptimeMillis;
int64_t pullTimeoutElapsedMillis;
PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) :
pullTimeoutUptimeMillis(uptimeMillis),
pullTimeoutElapsedMillis(elapsedMillis) {/* do nothing */}
} PullTimeoutMetadata;
typedef struct {
long totalPull = 0;
long totalPullFromCache = 0;
@@ -519,6 +527,7 @@ public:
long unregisteredCount = 0;
int32_t atomErrorCount = 0;
long binderCallFailCount = 0;
std::list<PullTimeoutMetadata> pullTimeoutMetadata;
} PulledAtomStats;
typedef struct {

View File

@@ -467,6 +467,11 @@ message StatsdStatsReport {
optional int64 binder_call_failed = 19;
optional int64 failed_uid_provider_not_found = 20;
optional int64 puller_not_found = 21;
message PullTimeoutMetadata {
optional int64 pull_timeout_uptime_millis = 1;
optional int64 pull_timeout_elapsed_millis = 2;
}
repeated PullTimeoutMetadata pull_atom_metadata = 22;
}
repeated PulledAtomStats pulled_atom_stats = 10;

View File

@@ -81,6 +81,9 @@ const int FIELD_ID_ATOM_ERROR_COUNT = 18;
const int FIELD_ID_BINDER_CALL_FAIL_COUNT = 19;
const int FIELD_ID_PULL_UID_PROVIDER_NOT_FOUND = 20;
const int FIELD_ID_PULLER_NOT_FOUND = 21;
const int FIELD_ID_PULL_TIMEOUT_METADATA = 22;
const int FIELD_ID_PULL_TIMEOUT_METADATA_UPTIME_MILLIS = 1;
const int FIELD_ID_PULL_TIMEOUT_METADATA_ELAPSED_MILLIS = 2;
// for AtomMetricStats proto
const int FIELD_ID_ATOM_METRIC_STATS = 17;
@@ -497,6 +500,16 @@ void writePullerStatsToStream(const std::pair<int, StatsdStats::PulledAtomStats>
(long long)pair.second.pullUidProviderNotFound);
protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULLER_NOT_FOUND,
(long long)pair.second.pullerNotFound);
for (const auto& pullTimeoutMetadata : pair.second.pullTimeoutMetadata) {
uint64_t timeoutMetadataToken = protoOutput->start(FIELD_TYPE_MESSAGE |
FIELD_ID_PULL_TIMEOUT_METADATA |
FIELD_COUNT_REPEATED);
protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULL_TIMEOUT_METADATA_UPTIME_MILLIS,
pullTimeoutMetadata.pullTimeoutUptimeMillis);
protoOutput->write(FIELD_TYPE_INT64 | FIELD_ID_PULL_TIMEOUT_METADATA_ELAPSED_MILLIS,
pullTimeoutMetadata.pullTimeoutElapsedMillis);
protoOutput->end(timeoutMetadataToken);
}
protoOutput->end(token);
}
@@ -542,6 +555,10 @@ int64_t getElapsedRealtimeMillis() {
return ::android::elapsedRealtime();
}
int64_t getSystemUptimeMillis() {
return ::android::uptimeMillis();
}
int64_t getWallClockNs() {
return time(nullptr) * NS_PER_SEC;
}

View File

@@ -61,6 +61,9 @@ int64_t getElapsedRealtimeMillis();
// Gets the elapsed timestamp in seconds.
int64_t getElapsedRealtimeSec();
// Gets the system uptime in millis.
int64_t getSystemUptimeMillis();
// Gets the wall clock timestamp in ns.
int64_t getWallClockNs();

View File

@@ -306,6 +306,8 @@ TEST(StatsdStatsTest, TestPullAtomStats) {
stats.notePullUidProviderNotFound(util::DISK_SPACE);
stats.notePullerNotFound(util::DISK_SPACE);
stats.notePullerNotFound(util::DISK_SPACE);
stats.notePullTimeout(util::DISK_SPACE, 3000L, 6000L);
stats.notePullTimeout(util::DISK_SPACE, 4000L, 7000L);
vector<uint8_t> output;
stats.dumpStats(&output, false);
@@ -328,6 +330,13 @@ TEST(StatsdStatsTest, TestPullAtomStats) {
EXPECT_EQ(1L, report.pulled_atom_stats(0).binder_call_failed());
EXPECT_EQ(1L, report.pulled_atom_stats(0).failed_uid_provider_not_found());
EXPECT_EQ(2L, report.pulled_atom_stats(0).puller_not_found());
ASSERT_EQ(2, report.pulled_atom_stats(0).pull_atom_metadata_size());
EXPECT_EQ(3000L, report.pulled_atom_stats(0).pull_atom_metadata(0).pull_timeout_uptime_millis());
EXPECT_EQ(4000L, report.pulled_atom_stats(0).pull_atom_metadata(1).pull_timeout_uptime_millis());
EXPECT_EQ(6000L, report.pulled_atom_stats(0).pull_atom_metadata(0)
.pull_timeout_elapsed_millis());
EXPECT_EQ(7000L, report.pulled_atom_stats(0).pull_atom_metadata(1)
.pull_timeout_elapsed_millis());
}
TEST(StatsdStatsTest, TestAtomMetricsStats) {