Merge "Dumping stack traces to proto." into pi-dev

This commit is contained in:
TreeHugger Robot
2018-03-08 01:55:13 +00:00
committed by Android (Google) Code Review
13 changed files with 278 additions and 9 deletions

View File

@@ -813,6 +813,7 @@ gensrcs {
],
srcs: [
"core/proto/android/os/backtrace.proto",
"core/proto/android/os/batterytype.proto",
"core/proto/android/os/cpufreq.proto",
"core/proto/android/os/cpuinfo.proto",

View File

@@ -15,8 +15,10 @@
LOCAL_PATH:= $(call my-dir)
# proto files used in incidentd to generate cppstream proto headers.
PROTO_FILES:= frameworks/base/core/proto/android/util/log.proto \
frameworks/base/core/proto/android/os/data.proto
PROTO_FILES:= \
frameworks/base/core/proto/android/os/backtrace.proto \
frameworks/base/core/proto/android/os/data.proto \
frameworks/base/core/proto/android/util/log.proto
# ========= #
# incidentd #
@@ -46,6 +48,8 @@ LOCAL_SHARED_LIBRARIES := \
libbase \
libbinder \
libcutils \
libdebuggerd_client \
libdumputils \
libincident \
liblog \
libprotobuf-cpp-lite \
@@ -119,6 +123,8 @@ LOCAL_SHARED_LIBRARIES := \
libbase \
libbinder \
libcutils \
libdebuggerd_client \
libdumputils \
libincident \
liblog \
libprotobuf-cpp-lite \

View File

@@ -16,6 +16,7 @@ service incidentd /system/bin/incidentd
class main
user incidentd
group incidentd log readproc
capabilities KILL SYS_PTRACE
on post-fs-data
# Create directory for incidentd

View File

@@ -87,6 +87,35 @@ status_t FdBuffer::read(int fd, int64_t timeout) {
return NO_ERROR;
}
status_t FdBuffer::readFully(int fd) {
mStartTime = uptimeMillis();
while (true) {
if (mBuffer.size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) {
// Don't let it get too big.
mTruncated = true;
VLOG("Truncating data");
break;
}
if (mBuffer.writeBuffer() == NULL) return NO_MEMORY;
ssize_t amt =
TEMP_FAILURE_RETRY(::read(fd, mBuffer.writeBuffer(), mBuffer.currentToWrite()));
if (amt < 0) {
VLOG("Fail to read %d: %s", fd, strerror(errno));
return -errno;
} else if (amt == 0) {
VLOG("Done reading %zu bytes", mBuffer.size());
// We're done.
break;
}
mBuffer.wp()->move(amt);
}
mFinishTime = uptimeMillis();
return NO_ERROR;
}
status_t FdBuffer::readProcessedDataInStream(int fd, int toFd, int fromFd, int64_t timeoutMs,
const bool isSysfs) {
struct pollfd pfds[] = {

View File

@@ -40,6 +40,12 @@ public:
*/
status_t read(int fd, int64_t timeoutMs);
/**
* Read the data until we hit eof.
* Returns NO_ERROR if there were no errors.
*/
status_t readFully(int fd);
/**
* Read processed results by streaming data to a parsing process, e.g. incident helper.
* The parsing process provides IO fds which are 'toFd' and 'fromFd'. The function

View File

@@ -18,13 +18,19 @@
#include "Section.h"
#include <dirent.h>
#include <errno.h>
#include <wait.h>
#include <mutex>
#include <set>
#include <android-base/file.h>
#include <android-base/stringprintf.h>
#include <android/util/protobuf.h>
#include <binder/IServiceManager.h>
#include <debuggerd/client.h>
#include <dumputils/dump_utils.h>
#include <log/log_event_list.h>
#include <log/log_read.h>
#include <log/logprint.h>
@@ -33,6 +39,7 @@
#include "FdBuffer.h"
#include "Privacy.h"
#include "PrivacyBuffer.h"
#include "frameworks/base/core/proto/android/os/backtrace.proto.h"
#include "frameworks/base/core/proto/android/os/data.proto.h"
#include "frameworks/base/core/proto/android/util/log.proto.h"
#include "incidentd_util.h"
@@ -95,6 +102,7 @@ static status_t write_section_header(int fd, int sectionId, size_t size) {
return WriteFully(fd, buf, p - buf) ? NO_ERROR : -errno;
}
// Reads data from FdBuffer and writes it to the requests file descriptor.
static status_t write_report_requests(const int id, const FdBuffer& buffer,
ReportRequestSet* requests) {
status_t err = -EBADF;
@@ -387,6 +395,7 @@ status_t GZipSection::Execute(ReportRequestSet* requests) const {
return NO_ERROR;
}
// ================================================================================
struct WorkerThreadData : public virtual RefBase {
const WorkerThreadSection* section;
@@ -413,7 +422,8 @@ WorkerThreadData::WorkerThreadData(const WorkerThreadSection* sec)
WorkerThreadData::~WorkerThreadData() {}
// ================================================================================
WorkerThreadSection::WorkerThreadSection(int id) : Section(id) {}
WorkerThreadSection::WorkerThreadSection(int id, const int64_t timeoutMs)
: Section(id, timeoutMs) {}
WorkerThreadSection::~WorkerThreadSection() {}
@@ -594,7 +604,7 @@ status_t CommandSection::Execute(ReportRequestSet* requests) const {
return readStatus;
}
// TODO: wait for command here has one trade-off: the failed status of command won't be detected
// Waiting for command here has one trade-off: the failed status of command won't be detected
// until buffer timeout, but it has advatage on starting the data stream earlier.
status_t cmdStatus = wait_child(cmdPid);
status_t ihStatus = wait_child(ihPid);
@@ -694,7 +704,6 @@ static inline int32_t get4LE(uint8_t const* src) {
}
status_t LogSection::BlockingCall(int pipeWriteFd) const {
status_t err = NO_ERROR;
// Open log buffer and getting logs since last retrieved time if any.
unique_ptr<logger_list, void (*)(logger_list*)> loggers(
gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end()
@@ -705,15 +714,16 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const {
if (android_logger_open(loggers.get(), mLogID) == NULL) {
ALOGW("LogSection %s: Can't get logger.", this->name.string());
return err;
return NO_ERROR;
}
log_msg msg;
log_time lastTimestamp(0);
status_t err = NO_ERROR;
ProtoOutputStream proto;
while (true) { // keeps reading until logd buffer is fully read.
status_t err = android_logger_list_read(loggers.get(), &msg);
err = android_logger_list_read(loggers.get(), &msg);
// err = 0 - no content, unexpected connection drop or EOF.
// err = +ive number - size of retrieved data from logger
// err = -ive number, OS supplied error _except_ for -EAGAIN
@@ -814,3 +824,133 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const {
proto.flush(pipeWriteFd);
return err;
}
// ================================================================================
TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs)
: WorkerThreadSection(id, timeoutMs), mType(type) {
name += "tombstone ";
name += type;
}
TombstoneSection::~TombstoneSection() {}
status_t TombstoneSection::BlockingCall(int pipeWriteFd) const {
std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
if (proc.get() == nullptr) {
ALOGE("opendir /proc failed: %s\n", strerror(errno));
return -errno;
}
const std::set<int> hal_pids = get_interesting_hal_pids();
ProtoOutputStream proto;
struct dirent* d;
status_t err = NO_ERROR;
while ((d = readdir(proc.get()))) {
int pid = atoi(d->d_name);
if (pid <= 0) {
continue;
}
const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
std::string exe;
if (!android::base::Readlink(link_name, &exe)) {
ALOGE("Can't read '%s': %s\n", link_name.c_str(), strerror(errno));
continue;
}
bool is_java_process;
if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
if (mType != "java") continue;
// Don't bother dumping backtraces for the zygote.
if (IsZygote(pid)) {
VLOG("Skipping Zygote");
continue;
}
is_java_process = true;
} else if (should_dump_native_traces(exe.c_str())) {
if (mType != "native") continue;
is_java_process = false;
} else if (hal_pids.find(pid) != hal_pids.end()) {
if (mType != "hal") continue;
is_java_process = false;
} else {
// Probably a native process we don't care about, continue.
VLOG("Skipping %d", pid);
continue;
}
Fpipe dumpPipe;
if (!dumpPipe.init()) {
ALOGW("TombstoneSection '%s' failed to setup dump pipe", this->name.string());
err = -errno;
break;
}
const uint64_t start = Nanotime();
pid_t child = fork();
if (child < 0) {
ALOGE("Failed to fork child process");
break;
} else if (child == 0) {
// This is the child process.
close(dumpPipe.readFd());
const int ret = dump_backtrace_to_file_timeout(
pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace,
is_java_process ? 5 : 20, dumpPipe.writeFd());
if (ret == -1) {
if (errno == 0) {
ALOGW("Dumping failed for pid '%d', likely due to a timeout\n", pid);
} else {
ALOGE("Dumping failed for pid '%d': %s\n", pid, strerror(errno));
}
}
if (close(dumpPipe.writeFd()) != 0) {
ALOGW("TombstoneSection '%s' failed to close dump pipe writeFd: %d",
this->name.string(), errno);
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
close(dumpPipe.writeFd());
// Parent process.
// Read from the pipe concurrently to avoid blocking the child.
FdBuffer buffer;
err = buffer.readFully(dumpPipe.readFd());
if (err != NO_ERROR) {
ALOGW("TombstoneSection '%s' failed to read stack dump: %d", this->name.string(), err);
if (close(dumpPipe.readFd()) != 0) {
ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %s",
this->name.string(), strerror(errno));
}
break;
}
auto dump = std::make_unique<char[]>(buffer.size());
auto iterator = buffer.data();
int i = 0;
while (iterator.hasNext()) {
dump[i] = iterator.next();
i++;
}
long long token = proto.start(android::os::BackTraceProto::TRACES);
proto.write(android::os::BackTraceProto::Stack::PID, pid);
proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i);
proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS,
static_cast<long long>(Nanotime() - start));
proto.end(token);
if (close(dumpPipe.readFd()) != 0) {
ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %d", this->name.string(),
errno);
err = -errno;
break;
}
}
proto.flush(pipeWriteFd);
return err;
}

View File

@@ -103,7 +103,7 @@ private:
*/
class WorkerThreadSection : public Section {
public:
WorkerThreadSection(int id);
WorkerThreadSection(int id, const int64_t timeoutMs = REMOTE_CALL_TIMEOUT_MS);
virtual ~WorkerThreadSection();
virtual status_t Execute(ReportRequestSet* requests) const;
@@ -161,4 +161,18 @@ private:
bool mBinary;
};
/**
* Section that gets data from tombstoned.
*/
class TombstoneSection : public WorkerThreadSection {
public:
TombstoneSection(int id, const char* type, const int64_t timeoutMs = 30000 /* 30 seconds */);
virtual ~TombstoneSection();
virtual status_t BlockingCall(int pipeWriteFd) const;
private:
std::string mType;
};
#endif // SECTIONS_H

View File

@@ -80,6 +80,7 @@ pid_t fork_execute_cmd(const char* cmd, char* const argv[], Fpipe* input, Fpipe*
close(output->writeFd());
return pid;
}
// ================================================================================
const char** varargs(const char* first, va_list rest) {
va_list copied_rest;
@@ -101,3 +102,11 @@ const char** varargs(const char* first, va_list rest) {
ret[numOfArgs] = NULL;
return ret;
}
// ================================================================================
const uint64_t NANOS_PER_SEC = 1000000000;
uint64_t Nanotime() {
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint64_t>(ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec);
}

View File

@@ -60,4 +60,9 @@ pid_t fork_execute_cmd(const char* cmd, char* const argv[], Fpipe* input, Fpipe*
*/
const char** varargs(const char* first, va_list rest);
#endif // INCIDENTD_UTIL_H
/**
* Returns the current monotonic clock time in nanoseconds.
*/
uint64_t Nanotime();
#endif // INCIDENTD_UTIL_H

View File

@@ -0,0 +1,36 @@
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto2";
package android.os;
option java_multiple_files = true;
import "frameworks/base/libs/incident/proto/android/privacy.proto";
message BackTraceProto {
option (android.msg_privacy).dest = DEST_AUTOMATIC;
message Stack {
option (android.msg_privacy).dest = DEST_AUTOMATIC;
optional int32 pid = 1;
optional string dump = 2;
// Time it took to dump the stacktrace.
optional int64 dump_duration_ns = 3;
}
repeated Stack traces = 1;
}

View File

@@ -17,6 +17,7 @@
syntax = "proto2";
option java_multiple_files = true;
import "frameworks/base/core/proto/android/os/backtrace.proto";
import "frameworks/base/core/proto/android/os/batterytype.proto";
import "frameworks/base/core/proto/android/os/cpufreq.proto";
import "frameworks/base/core/proto/android/os/cpuinfo.proto";
@@ -115,6 +116,22 @@ message IncidentProto {
(section).args = "LOG_ID_KERNEL"
];
// Stack dumps
optional android.os.BackTraceProto native_traces = 1200 [
(section).type = SECTION_TOMBSTONE,
(section).args = "native"
];
optional android.os.BackTraceProto hal_traces = 1201 [
(section).type = SECTION_TOMBSTONE,
(section).args = "hal"
];
optional android.os.BackTraceProto java_traces = 1202 [
(section).type = SECTION_TOMBSTONE,
(section).args = "java"
];
// Linux services
optional ProcrankProto procrank = 2000 [
(section).type = SECTION_NONE, // disable procrank until figure out permission

View File

@@ -43,6 +43,9 @@ enum SectionType {
// incidentd read file and gzip the data in bytes field
SECTION_GZIP = 5;
// incidentd calls tombstoned for annotated field
SECTION_TOMBSTONE = 6;
}
message SectionFlags {

View File

@@ -427,6 +427,8 @@ static bool generateSectionListCpp(Descriptor const* descriptor) {
printf(" new GZipSection(%d,", field->number());
splitAndPrint(s.args());
printf(" NULL),\n");
case SECTION_TOMBSTONE:
printf(" new TombstoneSection(%d, \"%s\"),\n", field->number(), s.args().c_str());
break;
}
}