Merge "Reset statsd and correctly record the dump reason when system server restarts/crashes." into pi-dev am: ec41a069fd

am: c1d985cf8b

Change-Id: I4df0cd841757ed13a9f7bf0f5bbf4d2df8ff0fb2
This commit is contained in:
android-build-team Robot
2018-05-03 20:15:56 -07:00
committed by android-build-merger
10 changed files with 105 additions and 25 deletions

View File

@@ -162,6 +162,19 @@ void StatsLogProcessor::OnLogEvent(LogEvent* event) {
OnLogEvent(event, false);
}
void StatsLogProcessor::resetConfigs() {
std::lock_guard<std::mutex> lock(mMetricsMutex);
resetConfigsLocked(getElapsedRealtimeNs());
}
void StatsLogProcessor::resetConfigsLocked(const int64_t timestampNs) {
std::vector<ConfigKey> configKeys;
for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
configKeys.push_back(it->first);
}
resetConfigsLocked(timestampNs, configKeys);
}
void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
const int64_t currentTimestampNs = event->GetElapsedTimestampNs();
@@ -188,11 +201,7 @@ void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) {
WriteDataToDiskLocked(CONFIG_RESET);
// We see fresher event before we see the checkpoint. We might have lost data.
// The best we can do is to reset.
std::vector<ConfigKey> configKeys;
for (auto it = mMetricsManagers.begin(); it != mMetricsManagers.end(); it++) {
configKeys.push_back(it->first);
}
resetConfigsLocked(currentTimestampNs, configKeys);
resetConfigsLocked(currentTimestampNs);
} else {
// Still in search of the CP. Keep going.
return;
@@ -242,6 +251,7 @@ void StatsLogProcessor::OnLogEvent(LogEvent* event, bool reconnected) {
void StatsLogProcessor::OnConfigUpdated(const int64_t timestampNs, const ConfigKey& key,
const StatsdConfig& config) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
WriteDataToDiskLocked(key, timestampNs, CONFIG_UPDATED);
OnConfigUpdatedLocked(timestampNs, key, config);
}
@@ -251,10 +261,6 @@ void StatsLogProcessor::OnConfigUpdatedLocked(
sp<MetricsManager> newMetricsManager =
new MetricsManager(key, config, mTimeBaseNs, timestampNs, mUidMap,
mAnomalyAlarmMonitor, mPeriodicAlarmMonitor);
auto it = mMetricsManagers.find(key);
if (it != mMetricsManagers.end()) {
WriteDataToDiskLocked(it->first, CONFIG_UPDATED);
}
if (newMetricsManager->isConfigValid()) {
mUidMap->OnConfigUpdated(key);
if (newMetricsManager->shouldAddUidMapListener()) {
@@ -419,6 +425,7 @@ void StatsLogProcessor::resetIfConfigTtlExpiredLocked(const int64_t timestampNs)
}
}
if (configKeysTtlExpired.size() > 0) {
WriteDataToDiskLocked(CONFIG_RESET);
resetConfigsLocked(timestampNs, configKeysTtlExpired);
}
}
@@ -427,7 +434,7 @@ void StatsLogProcessor::OnConfigRemoved(const ConfigKey& key) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
auto it = mMetricsManagers.find(key);
if (it != mMetricsManagers.end()) {
WriteDataToDiskLocked(key, CONFIG_REMOVED);
WriteDataToDiskLocked(key, getElapsedRealtimeNs(), CONFIG_REMOVED);
mMetricsManagers.erase(it);
mUidMap->OnConfigRemoved(key);
}
@@ -474,9 +481,13 @@ void StatsLogProcessor::flushIfNecessaryLocked(
}
void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key,
const int64_t timestampNs,
const DumpReportReason dumpReportReason) {
if (mMetricsManagers.find(key) == mMetricsManagers.end()) {
return;
}
ProtoOutputStream proto;
onConfigMetricsReportLocked(key, getElapsedRealtimeNs(),
onConfigMetricsReportLocked(key, timestampNs,
true /* include_current_partial_bucket*/,
false /* include strings */, dumpReportReason, &proto);
string file_name = StringPrintf("%s/%ld_%d_%lld", STATS_DATA_DIR,
@@ -491,14 +502,15 @@ void StatsLogProcessor::WriteDataToDiskLocked(const ConfigKey& key,
}
void StatsLogProcessor::WriteDataToDiskLocked(const DumpReportReason dumpReportReason) {
const int64_t timeNs = getElapsedRealtimeNs();
for (auto& pair : mMetricsManagers) {
WriteDataToDiskLocked(pair.first, dumpReportReason);
WriteDataToDiskLocked(pair.first, timeNs, dumpReportReason);
}
}
void StatsLogProcessor::WriteDataToDisk(bool isShutdown) {
void StatsLogProcessor::WriteDataToDisk(const DumpReportReason dumpReportReason) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
WriteDataToDiskLocked(DEVICE_SHUTDOWN);
WriteDataToDiskLocked(dumpReportReason);
}
void StatsLogProcessor::informPullAlarmFired(const int64_t timestampNs) {

View File

@@ -77,7 +77,10 @@ public:
unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>> alarmSet);
/* Flushes data to disk. Data on memory will be gone after written to disk. */
void WriteDataToDisk(bool shutdown);
void WriteDataToDisk(const DumpReportReason dumpReportReason);
// Reset all configs.
void resetConfigs();
inline sp<UidMap> getUidMap() {
return mUidMap;
@@ -121,8 +124,9 @@ private:
void OnConfigUpdatedLocked(
const int64_t currentTimestampNs, const ConfigKey& key, const StatsdConfig& config);
void WriteDataToDiskLocked(DumpReportReason dumpReportReason);
void WriteDataToDiskLocked(const ConfigKey& key, DumpReportReason dumpReportReason);
void WriteDataToDiskLocked(const DumpReportReason dumpReportReason);
void WriteDataToDiskLocked(const ConfigKey& key, const int64_t timestampNs,
const DumpReportReason dumpReportReason);
void onConfigMetricsReportLocked(const ConfigKey& key, const int64_t dumpTimeStampNs,
const bool include_current_partial_bucket,
@@ -141,6 +145,9 @@ private:
// Handler over the isolated uid change event.
void onIsolatedUidChangedEventLocked(const LogEvent& event);
// Reset all configs.
void resetConfigsLocked(const int64_t timestampNs);
// Reset the specified configs.
void resetConfigsLocked(const int64_t timestampNs, const std::vector<ConfigKey>& configs);
// Function used to send a broadcast so that receiver for the config key can call getData

View File

@@ -659,7 +659,7 @@ status_t StatsService::cmd_print_uid_map(FILE* out, const Vector<String8>& args)
status_t StatsService::cmd_write_data_to_disk(FILE* out) {
fprintf(out, "Writing data to disk\n");
mProcessor->WriteDataToDisk(false);
mProcessor->WriteDataToDisk(ADB_DUMP);
return NO_ERROR;
}
@@ -816,10 +816,10 @@ Status StatsService::systemRunning() {
return Status::ok();
}
Status StatsService::informDeviceShutdown(bool isShutdown) {
Status StatsService::informDeviceShutdown() {
ENFORCE_UID(AID_SYSTEM);
VLOG("StatsService::informDeviceShutdown");
mProcessor->WriteDataToDisk(isShutdown);
mProcessor->WriteDataToDisk(DEVICE_SHUTDOWN);
return Status::ok();
}
@@ -967,7 +967,12 @@ Status StatsService::unsetBroadcastSubscriber(int64_t configId,
void StatsService::binderDied(const wp <IBinder>& who) {
ALOGW("statscompanion service died");
mProcessor->WriteDataToDisk(STATSCOMPANION_DIED);
StatsdStats::getInstance().noteSystemServerRestart(getWallClockSec());
if (mProcessor != nullptr) {
ALOGW("Reset statsd upon system server restars.");
mProcessor->WriteDataToDisk(STATSCOMPANION_DIED);
mProcessor->resetConfigs();
}
mAnomalyAlarmMonitor->setStatsCompanionService(nullptr);
mPeriodicAlarmMonitor->setStatsCompanionService(nullptr);
SubscriberReporter::getInstance().setStatsCompanionService(nullptr);

View File

@@ -66,7 +66,7 @@ public:
const vector<String16>& app);
virtual Status informOnePackage(const String16& app, int32_t uid, int64_t version);
virtual Status informOnePackageRemoved(const String16& app, int32_t uid);
virtual Status informDeviceShutdown(bool isShutdown);
virtual Status informDeviceShutdown();
/**
* Called right before we start processing events.

View File

@@ -51,6 +51,7 @@ const int FIELD_ID_ANOMALY_ALARM_STATS = 9;
const int FIELD_ID_LOGGER_ERROR_STATS = 11;
const int FIELD_ID_PERIODIC_ALARM_STATS = 12;
const int FIELD_ID_LOG_LOSS_STATS = 14;
const int FIELD_ID_SYSTEM_SERVER_RESTART = 15;
const int FIELD_ID_ATOM_STATS_TAG = 1;
const int FIELD_ID_ATOM_STATS_COUNT = 2;
@@ -355,6 +356,15 @@ void StatsdStats::noteAtomLogged(int atomId, int32_t timeSec) {
mPushedAtomStats[atomId]++;
}
void StatsdStats::noteSystemServerRestart(int32_t timeSec) {
lock_guard<std::mutex> lock(mLock);
if (mSystemServerRestartSec.size() == kMaxSystemServerRestarts) {
mSystemServerRestartSec.pop_front();
}
mSystemServerRestartSec.push_back(timeSec);
}
void StatsdStats::noteLoggerError(int error) {
lock_guard<std::mutex> lock(mLock);
// grows strictly one at a time. so it won't > kMaxLoggerErrors
@@ -377,6 +387,7 @@ void StatsdStats::resetInternalLocked() {
mAnomalyAlarmRegisteredStats = 0;
mPeriodicAlarmRegisteredStats = 0;
mLoggerErrors.clear();
mSystemServerRestartSec.clear();
mLogLossTimestampNs.clear();
for (auto& config : mConfigStats) {
config.second->broadcast_sent_time_sec.clear();
@@ -395,7 +406,7 @@ string buildTimeString(int64_t timeSec) {
time_t t = timeSec;
struct tm* tm = localtime(&t);
char timeBuffer[80];
strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %I:%M%p\n", tm);
strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %I:%M%p", tm);
return string(timeBuffer);
}
@@ -511,6 +522,12 @@ void StatsdStats::dumpStats(FILE* out) const {
strftime(buffer, sizeof(buffer), "%Y-%m-%d %I:%M%p\n", error_tm);
fprintf(out, "Logger error %d at %s\n", error.second, buffer);
}
for (const auto& restart : mSystemServerRestartSec) {
fprintf(out, "System server restarts at %s(%lld)\n",
buildTimeString(restart).c_str(), (long long)restart);
}
for (const auto& loss : mLogLossTimestampNs) {
fprintf(out, "Log loss detected at %lld (elapsedRealtimeNs)\n", (long long)loss);
}
@@ -673,6 +690,11 @@ void StatsdStats::dumpStats(std::vector<uint8_t>* output, bool reset) {
(long long)loss);
}
for (const auto& restart : mSystemServerRestartSec) {
proto.write(FIELD_TYPE_INT32 | FIELD_ID_SYSTEM_SERVER_RESTART | FIELD_COUNT_REPEATED,
restart);
}
output->clear();
size_t bufferSize = proto.size();
output->resize(bufferSize);

View File

@@ -104,6 +104,8 @@ public:
const static int kMaxLoggerErrors = 20;
const static int kMaxSystemServerRestarts = 20;
const static int kMaxTimestampCount = 20;
const static int kMaxLogSourceCount = 50;
@@ -275,6 +277,11 @@ public:
*/
void noteLoggerError(int error);
/*
* Records when system server restarts.
*/
void noteSystemServerRestart(int32_t timeSec);
/**
* Records statsd skipped an event.
*/
@@ -338,6 +345,8 @@ private:
// Timestamps when we detect log loss after logd reconnect.
std::list<int64_t> mLogLossTimestampNs;
std::list<int32_t> mSystemServerRestartSec;
// Stores the number of times statsd modified the anomaly alarm registered with
// StatsCompanionService.
int mAnomalyAlarmRegisteredStats = 0;
@@ -366,6 +375,7 @@ private:
FRIEND_TEST(StatsdStatsTest, TestAtomLog);
FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold);
FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor);
FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash);
};
} // namespace statsd

View File

@@ -383,4 +383,6 @@ message StatsdStatsReport {
repeated SkippedLogEventStats skipped_log_event_stats = 13;
repeated int64 log_loss_stats = 14;
repeated int32 system_restart_sec = 15;
}

View File

@@ -298,6 +298,28 @@ TEST(StatsdStatsTest, TestTimestampThreshold) {
EXPECT_EQ(newTimestamp, configStats->dump_report_stats.back().first);
}
TEST(StatsdStatsTest, TestSystemServerCrash) {
StatsdStats stats;
vector<int32_t> timestamps;
for (int i = 0; i < StatsdStats::kMaxSystemServerRestarts; i++) {
timestamps.push_back(i);
stats.noteSystemServerRestart(timestamps[i]);
}
vector<uint8_t> output;
stats.dumpStats(&output, false);
StatsdStatsReport report;
EXPECT_TRUE(report.ParseFromArray(&output[0], output.size()));
const int maxCount = StatsdStats::kMaxSystemServerRestarts;
EXPECT_EQ(maxCount, (int)report.system_restart_sec_size());
stats.noteSystemServerRestart(StatsdStats::kMaxSystemServerRestarts + 1);
output.clear();
stats.dumpStats(&output, false);
EXPECT_TRUE(report.ParseFromArray(&output[0], output.size()));
EXPECT_EQ(maxCount, (int)report.system_restart_sec_size());
EXPECT_EQ(StatsdStats::kMaxSystemServerRestarts + 1, report.system_restart_sec(maxCount - 1));
}
} // namespace statsd
} // namespace os
} // namespace android

View File

@@ -56,7 +56,7 @@ interface IStatsManager {
/**
* Tells statsd that the device is about to shutdown.
*/
void informDeviceShutdown(boolean isShutdown);
void informDeviceShutdown();
/**
* Inform statsd what the version and package are for each uid. Note that each array should

View File

@@ -426,7 +426,7 @@ public class StatsCompanionService extends IStatsCompanionService.Stub {
return;
}
try {
sStatsd.informDeviceShutdown(true);
sStatsd.informDeviceShutdown();
} catch (Exception e) {
Slog.w(TAG, "Failed to inform statsd of a shutdown event.", e);
}