Merge "Dumping stack traces to proto." into pi-dev
This commit is contained in:
committed by
Android (Google) Code Review
commit
09ed26a046
@@ -813,6 +813,7 @@ gensrcs {
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"core/proto/android/os/backtrace.proto",
|
||||
"core/proto/android/os/batterytype.proto",
|
||||
"core/proto/android/os/cpufreq.proto",
|
||||
"core/proto/android/os/cpuinfo.proto",
|
||||
|
||||
@@ -15,8 +15,10 @@
|
||||
LOCAL_PATH:= $(call my-dir)
|
||||
|
||||
# proto files used in incidentd to generate cppstream proto headers.
|
||||
PROTO_FILES:= frameworks/base/core/proto/android/util/log.proto \
|
||||
frameworks/base/core/proto/android/os/data.proto
|
||||
PROTO_FILES:= \
|
||||
frameworks/base/core/proto/android/os/backtrace.proto \
|
||||
frameworks/base/core/proto/android/os/data.proto \
|
||||
frameworks/base/core/proto/android/util/log.proto
|
||||
|
||||
# ========= #
|
||||
# incidentd #
|
||||
@@ -46,6 +48,8 @@ LOCAL_SHARED_LIBRARIES := \
|
||||
libbase \
|
||||
libbinder \
|
||||
libcutils \
|
||||
libdebuggerd_client \
|
||||
libdumputils \
|
||||
libincident \
|
||||
liblog \
|
||||
libprotobuf-cpp-lite \
|
||||
@@ -119,6 +123,8 @@ LOCAL_SHARED_LIBRARIES := \
|
||||
libbase \
|
||||
libbinder \
|
||||
libcutils \
|
||||
libdebuggerd_client \
|
||||
libdumputils \
|
||||
libincident \
|
||||
liblog \
|
||||
libprotobuf-cpp-lite \
|
||||
|
||||
@@ -16,6 +16,7 @@ service incidentd /system/bin/incidentd
|
||||
class main
|
||||
user incidentd
|
||||
group incidentd log readproc
|
||||
capabilities KILL SYS_PTRACE
|
||||
|
||||
on post-fs-data
|
||||
# Create directory for incidentd
|
||||
|
||||
@@ -87,6 +87,35 @@ status_t FdBuffer::read(int fd, int64_t timeout) {
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
status_t FdBuffer::readFully(int fd) {
|
||||
mStartTime = uptimeMillis();
|
||||
|
||||
while (true) {
|
||||
if (mBuffer.size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) {
|
||||
// Don't let it get too big.
|
||||
mTruncated = true;
|
||||
VLOG("Truncating data");
|
||||
break;
|
||||
}
|
||||
if (mBuffer.writeBuffer() == NULL) return NO_MEMORY;
|
||||
|
||||
ssize_t amt =
|
||||
TEMP_FAILURE_RETRY(::read(fd, mBuffer.writeBuffer(), mBuffer.currentToWrite()));
|
||||
if (amt < 0) {
|
||||
VLOG("Fail to read %d: %s", fd, strerror(errno));
|
||||
return -errno;
|
||||
} else if (amt == 0) {
|
||||
VLOG("Done reading %zu bytes", mBuffer.size());
|
||||
// We're done.
|
||||
break;
|
||||
}
|
||||
mBuffer.wp()->move(amt);
|
||||
}
|
||||
|
||||
mFinishTime = uptimeMillis();
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
status_t FdBuffer::readProcessedDataInStream(int fd, int toFd, int fromFd, int64_t timeoutMs,
|
||||
const bool isSysfs) {
|
||||
struct pollfd pfds[] = {
|
||||
|
||||
@@ -40,6 +40,12 @@ public:
|
||||
*/
|
||||
status_t read(int fd, int64_t timeoutMs);
|
||||
|
||||
/**
|
||||
* Read the data until we hit eof.
|
||||
* Returns NO_ERROR if there were no errors.
|
||||
*/
|
||||
status_t readFully(int fd);
|
||||
|
||||
/**
|
||||
* Read processed results by streaming data to a parsing process, e.g. incident helper.
|
||||
* The parsing process provides IO fds which are 'toFd' and 'fromFd'. The function
|
||||
|
||||
@@ -18,13 +18,19 @@
|
||||
|
||||
#include "Section.h"
|
||||
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <wait.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
#include <android-base/file.h>
|
||||
#include <android-base/stringprintf.h>
|
||||
#include <android/util/protobuf.h>
|
||||
#include <binder/IServiceManager.h>
|
||||
#include <debuggerd/client.h>
|
||||
#include <dumputils/dump_utils.h>
|
||||
#include <log/log_event_list.h>
|
||||
#include <log/log_read.h>
|
||||
#include <log/logprint.h>
|
||||
@@ -33,6 +39,7 @@
|
||||
#include "FdBuffer.h"
|
||||
#include "Privacy.h"
|
||||
#include "PrivacyBuffer.h"
|
||||
#include "frameworks/base/core/proto/android/os/backtrace.proto.h"
|
||||
#include "frameworks/base/core/proto/android/os/data.proto.h"
|
||||
#include "frameworks/base/core/proto/android/util/log.proto.h"
|
||||
#include "incidentd_util.h"
|
||||
@@ -95,6 +102,7 @@ static status_t write_section_header(int fd, int sectionId, size_t size) {
|
||||
return WriteFully(fd, buf, p - buf) ? NO_ERROR : -errno;
|
||||
}
|
||||
|
||||
// Reads data from FdBuffer and writes it to the requests file descriptor.
|
||||
static status_t write_report_requests(const int id, const FdBuffer& buffer,
|
||||
ReportRequestSet* requests) {
|
||||
status_t err = -EBADF;
|
||||
@@ -387,6 +395,7 @@ status_t GZipSection::Execute(ReportRequestSet* requests) const {
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
struct WorkerThreadData : public virtual RefBase {
|
||||
const WorkerThreadSection* section;
|
||||
@@ -413,7 +422,8 @@ WorkerThreadData::WorkerThreadData(const WorkerThreadSection* sec)
|
||||
WorkerThreadData::~WorkerThreadData() {}
|
||||
|
||||
// ================================================================================
|
||||
WorkerThreadSection::WorkerThreadSection(int id) : Section(id) {}
|
||||
WorkerThreadSection::WorkerThreadSection(int id, const int64_t timeoutMs)
|
||||
: Section(id, timeoutMs) {}
|
||||
|
||||
WorkerThreadSection::~WorkerThreadSection() {}
|
||||
|
||||
@@ -594,7 +604,7 @@ status_t CommandSection::Execute(ReportRequestSet* requests) const {
|
||||
return readStatus;
|
||||
}
|
||||
|
||||
// TODO: wait for command here has one trade-off: the failed status of command won't be detected
|
||||
// Waiting for command here has one trade-off: the failed status of command won't be detected
|
||||
// until buffer timeout, but it has advatage on starting the data stream earlier.
|
||||
status_t cmdStatus = wait_child(cmdPid);
|
||||
status_t ihStatus = wait_child(ihPid);
|
||||
@@ -694,7 +704,6 @@ static inline int32_t get4LE(uint8_t const* src) {
|
||||
}
|
||||
|
||||
status_t LogSection::BlockingCall(int pipeWriteFd) const {
|
||||
status_t err = NO_ERROR;
|
||||
// Open log buffer and getting logs since last retrieved time if any.
|
||||
unique_ptr<logger_list, void (*)(logger_list*)> loggers(
|
||||
gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end()
|
||||
@@ -705,15 +714,16 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const {
|
||||
|
||||
if (android_logger_open(loggers.get(), mLogID) == NULL) {
|
||||
ALOGW("LogSection %s: Can't get logger.", this->name.string());
|
||||
return err;
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
log_msg msg;
|
||||
log_time lastTimestamp(0);
|
||||
|
||||
status_t err = NO_ERROR;
|
||||
ProtoOutputStream proto;
|
||||
while (true) { // keeps reading until logd buffer is fully read.
|
||||
status_t err = android_logger_list_read(loggers.get(), &msg);
|
||||
err = android_logger_list_read(loggers.get(), &msg);
|
||||
// err = 0 - no content, unexpected connection drop or EOF.
|
||||
// err = +ive number - size of retrieved data from logger
|
||||
// err = -ive number, OS supplied error _except_ for -EAGAIN
|
||||
@@ -814,3 +824,133 @@ status_t LogSection::BlockingCall(int pipeWriteFd) const {
|
||||
proto.flush(pipeWriteFd);
|
||||
return err;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
|
||||
TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs)
|
||||
: WorkerThreadSection(id, timeoutMs), mType(type) {
|
||||
name += "tombstone ";
|
||||
name += type;
|
||||
}
|
||||
|
||||
TombstoneSection::~TombstoneSection() {}
|
||||
|
||||
status_t TombstoneSection::BlockingCall(int pipeWriteFd) const {
|
||||
std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
|
||||
if (proc.get() == nullptr) {
|
||||
ALOGE("opendir /proc failed: %s\n", strerror(errno));
|
||||
return -errno;
|
||||
}
|
||||
|
||||
const std::set<int> hal_pids = get_interesting_hal_pids();
|
||||
|
||||
ProtoOutputStream proto;
|
||||
struct dirent* d;
|
||||
status_t err = NO_ERROR;
|
||||
while ((d = readdir(proc.get()))) {
|
||||
int pid = atoi(d->d_name);
|
||||
if (pid <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
|
||||
std::string exe;
|
||||
if (!android::base::Readlink(link_name, &exe)) {
|
||||
ALOGE("Can't read '%s': %s\n", link_name.c_str(), strerror(errno));
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_java_process;
|
||||
if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
|
||||
if (mType != "java") continue;
|
||||
// Don't bother dumping backtraces for the zygote.
|
||||
if (IsZygote(pid)) {
|
||||
VLOG("Skipping Zygote");
|
||||
continue;
|
||||
}
|
||||
|
||||
is_java_process = true;
|
||||
} else if (should_dump_native_traces(exe.c_str())) {
|
||||
if (mType != "native") continue;
|
||||
is_java_process = false;
|
||||
} else if (hal_pids.find(pid) != hal_pids.end()) {
|
||||
if (mType != "hal") continue;
|
||||
is_java_process = false;
|
||||
} else {
|
||||
// Probably a native process we don't care about, continue.
|
||||
VLOG("Skipping %d", pid);
|
||||
continue;
|
||||
}
|
||||
|
||||
Fpipe dumpPipe;
|
||||
if (!dumpPipe.init()) {
|
||||
ALOGW("TombstoneSection '%s' failed to setup dump pipe", this->name.string());
|
||||
err = -errno;
|
||||
break;
|
||||
}
|
||||
|
||||
const uint64_t start = Nanotime();
|
||||
pid_t child = fork();
|
||||
if (child < 0) {
|
||||
ALOGE("Failed to fork child process");
|
||||
break;
|
||||
} else if (child == 0) {
|
||||
// This is the child process.
|
||||
close(dumpPipe.readFd());
|
||||
const int ret = dump_backtrace_to_file_timeout(
|
||||
pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace,
|
||||
is_java_process ? 5 : 20, dumpPipe.writeFd());
|
||||
if (ret == -1) {
|
||||
if (errno == 0) {
|
||||
ALOGW("Dumping failed for pid '%d', likely due to a timeout\n", pid);
|
||||
} else {
|
||||
ALOGE("Dumping failed for pid '%d': %s\n", pid, strerror(errno));
|
||||
}
|
||||
}
|
||||
if (close(dumpPipe.writeFd()) != 0) {
|
||||
ALOGW("TombstoneSection '%s' failed to close dump pipe writeFd: %d",
|
||||
this->name.string(), errno);
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
close(dumpPipe.writeFd());
|
||||
// Parent process.
|
||||
// Read from the pipe concurrently to avoid blocking the child.
|
||||
FdBuffer buffer;
|
||||
err = buffer.readFully(dumpPipe.readFd());
|
||||
if (err != NO_ERROR) {
|
||||
ALOGW("TombstoneSection '%s' failed to read stack dump: %d", this->name.string(), err);
|
||||
if (close(dumpPipe.readFd()) != 0) {
|
||||
ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %s",
|
||||
this->name.string(), strerror(errno));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
auto dump = std::make_unique<char[]>(buffer.size());
|
||||
auto iterator = buffer.data();
|
||||
int i = 0;
|
||||
while (iterator.hasNext()) {
|
||||
dump[i] = iterator.next();
|
||||
i++;
|
||||
}
|
||||
long long token = proto.start(android::os::BackTraceProto::TRACES);
|
||||
proto.write(android::os::BackTraceProto::Stack::PID, pid);
|
||||
proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i);
|
||||
proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS,
|
||||
static_cast<long long>(Nanotime() - start));
|
||||
proto.end(token);
|
||||
|
||||
if (close(dumpPipe.readFd()) != 0) {
|
||||
ALOGW("TombstoneSection '%s' failed to close dump pipe readFd: %d", this->name.string(),
|
||||
errno);
|
||||
err = -errno;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
proto.flush(pipeWriteFd);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -103,7 +103,7 @@ private:
|
||||
*/
|
||||
class WorkerThreadSection : public Section {
|
||||
public:
|
||||
WorkerThreadSection(int id);
|
||||
WorkerThreadSection(int id, const int64_t timeoutMs = REMOTE_CALL_TIMEOUT_MS);
|
||||
virtual ~WorkerThreadSection();
|
||||
|
||||
virtual status_t Execute(ReportRequestSet* requests) const;
|
||||
@@ -161,4 +161,18 @@ private:
|
||||
bool mBinary;
|
||||
};
|
||||
|
||||
/**
|
||||
* Section that gets data from tombstoned.
|
||||
*/
|
||||
class TombstoneSection : public WorkerThreadSection {
|
||||
public:
|
||||
TombstoneSection(int id, const char* type, const int64_t timeoutMs = 30000 /* 30 seconds */);
|
||||
virtual ~TombstoneSection();
|
||||
|
||||
virtual status_t BlockingCall(int pipeWriteFd) const;
|
||||
|
||||
private:
|
||||
std::string mType;
|
||||
};
|
||||
|
||||
#endif // SECTIONS_H
|
||||
|
||||
@@ -80,6 +80,7 @@ pid_t fork_execute_cmd(const char* cmd, char* const argv[], Fpipe* input, Fpipe*
|
||||
close(output->writeFd());
|
||||
return pid;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
const char** varargs(const char* first, va_list rest) {
|
||||
va_list copied_rest;
|
||||
@@ -101,3 +102,11 @@ const char** varargs(const char* first, va_list rest) {
|
||||
ret[numOfArgs] = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ================================================================================
|
||||
const uint64_t NANOS_PER_SEC = 1000000000;
|
||||
uint64_t Nanotime() {
|
||||
timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return static_cast<uint64_t>(ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec);
|
||||
}
|
||||
|
||||
@@ -60,4 +60,9 @@ pid_t fork_execute_cmd(const char* cmd, char* const argv[], Fpipe* input, Fpipe*
|
||||
*/
|
||||
const char** varargs(const char* first, va_list rest);
|
||||
|
||||
#endif // INCIDENTD_UTIL_H
|
||||
/**
|
||||
* Returns the current monotonic clock time in nanoseconds.
|
||||
*/
|
||||
uint64_t Nanotime();
|
||||
|
||||
#endif // INCIDENTD_UTIL_H
|
||||
|
||||
36
core/proto/android/os/backtrace.proto
Normal file
36
core/proto/android/os/backtrace.proto
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto2";
|
||||
package android.os;
|
||||
|
||||
option java_multiple_files = true;
|
||||
|
||||
import "frameworks/base/libs/incident/proto/android/privacy.proto";
|
||||
|
||||
message BackTraceProto {
|
||||
option (android.msg_privacy).dest = DEST_AUTOMATIC;
|
||||
|
||||
message Stack {
|
||||
option (android.msg_privacy).dest = DEST_AUTOMATIC;
|
||||
|
||||
optional int32 pid = 1;
|
||||
optional string dump = 2;
|
||||
// Time it took to dump the stacktrace.
|
||||
optional int64 dump_duration_ns = 3;
|
||||
}
|
||||
repeated Stack traces = 1;
|
||||
}
|
||||
@@ -17,6 +17,7 @@
|
||||
syntax = "proto2";
|
||||
option java_multiple_files = true;
|
||||
|
||||
import "frameworks/base/core/proto/android/os/backtrace.proto";
|
||||
import "frameworks/base/core/proto/android/os/batterytype.proto";
|
||||
import "frameworks/base/core/proto/android/os/cpufreq.proto";
|
||||
import "frameworks/base/core/proto/android/os/cpuinfo.proto";
|
||||
@@ -115,6 +116,22 @@ message IncidentProto {
|
||||
(section).args = "LOG_ID_KERNEL"
|
||||
];
|
||||
|
||||
// Stack dumps
|
||||
optional android.os.BackTraceProto native_traces = 1200 [
|
||||
(section).type = SECTION_TOMBSTONE,
|
||||
(section).args = "native"
|
||||
];
|
||||
|
||||
optional android.os.BackTraceProto hal_traces = 1201 [
|
||||
(section).type = SECTION_TOMBSTONE,
|
||||
(section).args = "hal"
|
||||
];
|
||||
|
||||
optional android.os.BackTraceProto java_traces = 1202 [
|
||||
(section).type = SECTION_TOMBSTONE,
|
||||
(section).args = "java"
|
||||
];
|
||||
|
||||
// Linux services
|
||||
optional ProcrankProto procrank = 2000 [
|
||||
(section).type = SECTION_NONE, // disable procrank until figure out permission
|
||||
|
||||
@@ -43,6 +43,9 @@ enum SectionType {
|
||||
|
||||
// incidentd read file and gzip the data in bytes field
|
||||
SECTION_GZIP = 5;
|
||||
|
||||
// incidentd calls tombstoned for annotated field
|
||||
SECTION_TOMBSTONE = 6;
|
||||
}
|
||||
|
||||
message SectionFlags {
|
||||
|
||||
@@ -427,6 +427,8 @@ static bool generateSectionListCpp(Descriptor const* descriptor) {
|
||||
printf(" new GZipSection(%d,", field->number());
|
||||
splitAndPrint(s.args());
|
||||
printf(" NULL),\n");
|
||||
case SECTION_TOMBSTONE:
|
||||
printf(" new TombstoneSection(%d, \"%s\"),\n", field->number(), s.args().c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user