Implement PII stripper in incidentd, part 1

1. automatically parse privacy options and generate lookup table
2. create FdBuffer iterator API in order to remove dependency on Reporter.h

Bug: 64687253
Test: Unit test for iterator API, and manually tested lookup table
Change-Id: I1ea376a4481fc4afc7bdf447936f767b63690fd3
This commit is contained in:
Yi Jin
2017-08-18 14:51:20 -07:00
parent 6e250517d6
commit 0ed9b68a3f
9 changed files with 271 additions and 22 deletions

View File

@@ -174,7 +174,7 @@ FdBuffer::readProcessedDataInStream(int fd, int toFd, int fromFd, int64_t timeou
if (rpos >= wpos) {
amt = ::read(fd, cirBuf + rpos, BUFFER_SIZE - rpos);
} else {
amt = :: read(fd, cirBuf + rpos, wpos - rpos);
amt = ::read(fd, cirBuf + rpos, wpos - rpos);
}
if (amt < 0) {
if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
@@ -241,6 +241,7 @@ FdBuffer::readProcessedDataInStream(int fd, int toFd, int fromFd, int64_t timeou
size_t
FdBuffer::size()
{
if (mBuffers.empty()) return 0;
return ((mBuffers.size() - 1) * BUFFER_SIZE) + mCurrentWritten;
}
@@ -255,4 +256,30 @@ FdBuffer::write(ReportRequestSet* reporter)
return NO_ERROR;
}
FdBuffer::iterator
FdBuffer::end()
{
if (mBuffers.empty() || mCurrentWritten < 0) return begin();
if (mCurrentWritten == BUFFER_SIZE)
// FdBuffer doesn't allocate another buf since no more bytes to read.
return FdBuffer::iterator(*this, mBuffers.size(), 0);
return FdBuffer::iterator(*this, mBuffers.size() - 1, mCurrentWritten);
}
FdBuffer::iterator&
FdBuffer::iterator::operator+(size_t offset)
{
size_t newOffset = mOffset + offset;
while (newOffset >= BUFFER_SIZE) {
mIndex++;
newOffset -= BUFFER_SIZE;
}
mOffset = newOffset;
return *this;
}
size_t
FdBuffer::iterator::bytesRead()
{
return mIndex * BUFFER_SIZE + mOffset;
}

View File

@@ -21,7 +21,6 @@
#include <utils/Errors.h>
#include <set>
#include <vector>
using namespace android;
@@ -74,7 +73,8 @@ public:
size_t size();
/**
* Write the data that we recorded to the fd given.
* [Deprecated] Write the data that we recorded to the fd given.
* TODO: remove it once the iterator api is working
*/
status_t write(ReportRequestSet* requests);
@@ -83,6 +83,37 @@ public:
*/
int64_t durationMs() { return mFinishTime - mStartTime; }
/**
* Read data stored in FdBuffer
*/
class iterator;
friend class iterator;
class iterator : public std::iterator<std::random_access_iterator_tag, uint8_t> {
private:
FdBuffer& mFdBuffer;
size_t mIndex;
size_t mOffset;
public:
explicit iterator(FdBuffer& buffer, ssize_t index, ssize_t offset)
: mFdBuffer(buffer), mIndex(index), mOffset(offset) {}
iterator& operator=(iterator& other) { return other; }
iterator& operator+(size_t offset); // this is implemented in .cpp
iterator& operator+=(size_t offset) { return *this + offset; }
iterator& operator++() { return *this + 1; }
iterator operator++(int) { return *this + 1; }
bool operator==(iterator other) const {
return mIndex == other.mIndex && mOffset == other.mOffset;
}
bool operator!=(iterator other) const { return !(*this == other); }
reference operator*() const { return mFdBuffer.mBuffers[mIndex][mOffset]; }
// random access could make the iterator out of bound
size_t bytesRead();
bool outOfBound() { return bytesRead() > mFdBuffer.size(); };
};
iterator begin() { return iterator(*this, 0, 0); }
iterator end();
private:
vector<uint8_t*> mBuffers;
int64_t mStartTime;

View File

@@ -22,9 +22,6 @@
#include <private/android_filesystem_config.h>
#include <binder/IServiceManager.h>
#include <mutex>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wait.h>
#include <unistd.h>
@@ -37,7 +34,7 @@ const char* INCIDENT_HELPER = "/system/bin/incident_helper";
static pid_t
forkAndExecuteIncidentHelper(const int id, const char* name, Fpipe& p2cPipe, Fpipe& c2pPipe)
{
const char* ihArgs[] { INCIDENT_HELPER, "-s", to_string(id).c_str(), NULL };
const char* ihArgs[] { INCIDENT_HELPER, "-s", String8::format("%d", id).string(), NULL };
// fork used in multithreaded environment, avoid adding unnecessary code in child process
pid_t pid = fork();

View File

@@ -16,6 +16,7 @@
#include "protobuf.h"
uint8_t*
write_raw_varint(uint8_t* buf, uint32_t val)
{

View File

@@ -21,8 +21,46 @@
/**
* This is the mapping of section IDs to the commands that are run to get those commands.
* The section IDs are guaranteed in ascending order
*/
extern const Section* SECTION_LIST[];
/*
* In order not to use libprotobuf-cpp-full nor libplatformprotos in incidentd
* privacy options's data structure are explicityly redefined in this file.
*/
// DESTINATION enum
extern const uint8_t DEST_LOCAL;
extern const uint8_t DEST_EXPLICIT;
extern const uint8_t DEST_AUTOMATIC;
// This is the default value of DEST enum
// field with this value doesn't generate Privacy to save too much generated code
extern const uint8_t DEST_DEFAULT_VALUE;
// type of the field, identitical to protobuf definition
extern const uint8_t TYPE_STRING;
extern const uint8_t TYPE_MESSAGE;
struct Privacy {
int field_id;
uint8_t type;
// the following two fields are identitical to
// frameworks/base/libs/incident/proto/android/privacy.proto
uint8_t dest;
const char** patterns;
// ignore parent's privacy flags if children are set, NULL-terminated
const Privacy** children;
};
/**
* This is the mapping of section IDs to each section's privacy policy.
* The section IDs are guaranteed in ascending order
*/
extern const Privacy* PRIVACY_POLICY_LIST[];
#endif // SECTION_LIST_H

View File

@@ -92,6 +92,30 @@ TEST_F(FdBufferTest, ReadAndWrite) {
AssertBufferContent(testdata.c_str());
}
TEST_F(FdBufferTest, IterateEmpty) {
FdBuffer::iterator it = buffer.begin();
EXPECT_EQ(it, buffer.end());
it += 1;
EXPECT_TRUE(it.outOfBound());
}
TEST_F(FdBufferTest, ReadAndIterate) {
std::string testdata = "FdBuffer test string";
ASSERT_TRUE(WriteStringToFile(testdata, tf.path, false));
ASSERT_EQ(NO_ERROR, buffer.read(tf.fd, READ_TIMEOUT));
int i=0;
for (FdBuffer::iterator it = buffer.begin(); it != buffer.end(); ++it) {
EXPECT_EQ(*it, (uint8_t)testdata[i++]);
}
FdBuffer::iterator it = buffer.begin();
it += buffer.size();
EXPECT_EQ(it, buffer.end());
EXPECT_EQ(it.bytesRead(), testdata.size());
EXPECT_FALSE(it.outOfBound());
}
TEST_F(FdBufferTest, ReadTimeout) {
int pid = fork();
ASSERT_TRUE(pid != -1);
@@ -202,6 +226,7 @@ TEST_F(FdBufferTest, ReadInStreamEmpty) {
TEST_F(FdBufferTest, ReadInStreamMoreThan4MB) {
const std::string testFile = kTestDataPath + "morethan4MB.txt";
size_t fourMB = (size_t) 4 * 1024 * 1024;
int fd = open(testFile.c_str(), O_RDONLY);
ASSERT_NE(fd, -1);
int pid = fork();
@@ -220,10 +245,18 @@ TEST_F(FdBufferTest, ReadInStreamMoreThan4MB) {
ASSERT_EQ(NO_ERROR, buffer.readProcessedDataInStream(fd,
p2cPipe.writeFd(), c2pPipe.readFd(), READ_TIMEOUT));
EXPECT_EQ(buffer.size(), (size_t) (4 * 1024 * 1024));
EXPECT_EQ(buffer.size(), fourMB);
EXPECT_FALSE(buffer.timedOut());
EXPECT_TRUE(buffer.truncated());
wait(&pid);
FdBuffer::iterator it = buffer.begin();
it += fourMB;
EXPECT_EQ(it.bytesRead(), fourMB);
EXPECT_EQ(it, buffer.end());
for (FdBuffer::iterator it = buffer.begin(); it != buffer.end(); it++) {
char c = 'A' + (it.bytesRead() % 64 / 8);
ASSERT_TRUE(*it == c);
}
}
}

View File

@@ -47,6 +47,9 @@ message IncidentHeaderProto {
Cause cause = 1;
}
// privacy field options must not be set at this level because all
// the sections are able to be controlled and configured by section ids.
// Instead privacy field options need to be configured in each section proto message.
message IncidentProto {
// Incident header
repeated IncidentHeaderProto header = 1;

View File

@@ -36,7 +36,7 @@ enum Destination {
// off the device with an explicit user action.
DEST_EXPLICIT = 1;
// Fields or messages annotated with DEST_LOCAL can be sent by
// Fields or messages annotated with DEST_AUTOMATIC can be sent by
// automatic means, without per-sending user consent. The user
// still must have previously accepted a consent to share this
// information.
@@ -47,8 +47,11 @@ enum Destination {
message PrivacyFlags {
optional Destination dest = 1 [
default = DEST_LOCAL
default = DEST_EXPLICIT
];
// regex to filter pii sensitive info from a string field type
repeated string patterns = 2;
}
extend google.protobuf.FieldOptions {

View File

@@ -27,21 +27,24 @@ using namespace google::protobuf::io;
using namespace google::protobuf::internal;
using namespace std;
static void generateHead(const char* header) {
printf("// Auto generated file. Do not modify\n");
printf("\n");
printf("#include \"%s.h\"\n", header);
static inline void emptyline() {
printf("\n");
}
static void generateHead(const char* header) {
printf("// Auto generated file. Do not modify\n");
emptyline();
printf("#include \"%s.h\"\n", header);
emptyline();
}
// ================================================================================
static bool generateIncidentSectionsCpp()
static bool generateIncidentSectionsCpp(Descriptor const* descriptor)
{
generateHead("incident_sections");
map<string,FieldDescriptor const*> sections;
int N;
Descriptor const* descriptor = IncidentProto::descriptor();
N = descriptor->field_count();
for (int i=0; i<N; i++) {
const FieldDescriptor* field = descriptor->field(i);
@@ -85,11 +88,100 @@ static void splitAndPrint(const string& args) {
}
}
static bool generateSectionListCpp() {
static const char* replaceAll(const string& field_name, const char oldC, const string& newS) {
if (field_name.find_first_of(oldC) == field_name.npos) return field_name.c_str();
size_t pos = 0, idx = 0;
char* res = new char[field_name.size() * newS.size() + 1]; // assign a larger buffer
while (pos != field_name.size()) {
char cur = field_name[pos++];
if (cur != oldC) {
res[idx++] = cur;
continue;
}
for (size_t i=0; i<newS.size(); i++) {
res[idx++] = newS[i];
}
}
res[idx] = '\0';
return res;
}
static inline bool isDefaultDest(const FieldDescriptor* field) {
return field->options().GetExtension(privacy).dest() == PrivacyFlags::default_instance().dest();
}
// Returns true if the descriptor doesn't have any non default privacy flags set, including its submessages
static bool generatePrivacyFlags(const Descriptor* descriptor, const char* alias, map<string, bool> &msgNames) {
bool hasDefaultFlags[descriptor->field_count()];
// iterate though its field and generate sub flags first
for (int i=0; i<descriptor->field_count(); i++) {
hasDefaultFlags[i] = true; // set default to true
const FieldDescriptor* field = descriptor->field(i);
const char* field_name = replaceAll(field->full_name(), '.', "__");
// check if the same name is already defined
if (msgNames.find(field_name) != msgNames.end()) {
hasDefaultFlags[i] = msgNames[field_name];
continue;
};
PrivacyFlags p = field->options().GetExtension(privacy);
switch (field->type()) {
case FieldDescriptor::TYPE_MESSAGE:
if (generatePrivacyFlags(field->message_type(), field_name, msgNames) &&
isDefaultDest(field)) break;
printf("static Privacy %s = { %d, %d, %d, NULL, %s_LIST };\n", field_name, field->number(),
(int) field->type(), p.dest(), field_name);
hasDefaultFlags[i] = false;
break;
case FieldDescriptor::TYPE_STRING:
if (isDefaultDest(field) && p.patterns_size() == 0) break;
printf("static const char* %s_patterns[] = {\n", field_name);
for (int i=0; i<p.patterns_size(); i++) {
// the generated string need to escape backslash as well, need to dup it here
printf(" \"%s\",\n", replaceAll(p.patterns(i), '\\', "\\\\"));
}
printf(" NULL };\n");
printf("static Privacy %s = { %d, %d, %d, %s_patterns };\n", field_name, field->number(),
(int) field->type(), p.dest(), field_name);
hasDefaultFlags[i] = false;
break;
default:
if (isDefaultDest(field)) break;
printf("static Privacy %s = { %d, %d, %d };\n", field_name, field->number(),
(int) field->type(), p.dest());
hasDefaultFlags[i] = false;
}
// add the field name to message map, true means it has default flags
msgNames[field_name] = hasDefaultFlags[i];
}
bool allDefaults = true;
for (int i=0; i<descriptor->field_count(); i++) {
allDefaults &= hasDefaultFlags[i];
}
if (allDefaults) return true;
emptyline();
printf("const Privacy* %s_LIST[] = {\n", alias);
for (int i=0; i<descriptor->field_count(); i++) {
const FieldDescriptor* field = descriptor->field(i);
if (hasDefaultFlags[i]) continue;
printf(" &%s,\n", replaceAll(field->full_name(), '.', "__"));
}
printf(" NULL };\n");
emptyline();
return false;
}
static bool generateSectionListCpp(Descriptor const* descriptor) {
generateHead("section_list");
// generates SECTION_LIST
printf("const Section* SECTION_LIST[] = {\n");
Descriptor const* descriptor = IncidentProto::descriptor();
for (int i=0; i<descriptor->field_count(); i++) {
const FieldDescriptor* field = descriptor->field(i);
@@ -115,8 +207,30 @@ static bool generateSectionListCpp() {
break;
}
}
printf(" NULL\n");
printf("};\n");
printf(" NULL };\n");
emptyline();
// generates DESTINATION enum values
EnumDescriptor const* destination = Destination_descriptor();
for (int i=0; i<destination->value_count(); i++) {
EnumValueDescriptor const* val = destination->value(i);
printf("const uint8_t %s = %d;\n", val->name().c_str(), val->number());
}
emptyline();
printf("const uint8_t DEST_DEFAULT_VALUE = %d;\n", PrivacyFlags::default_instance().dest());
emptyline();
// populates string type and message type values
printf("const uint8_t TYPE_STRING = %d;\n", (int) FieldDescriptor::TYPE_STRING);
printf("const uint8_t TYPE_MESSAGE = %d;\n", (int) FieldDescriptor::TYPE_MESSAGE);
emptyline();
// generates PRIVACY_POLICY
map<string, bool> messageNames;
if (generatePrivacyFlags(descriptor, "PRIVACY_POLICY", messageNames)) {
// if no privacy options set at all, define an empty list
printf("const Privacy* PRIVACY_POLICY_LIST[] = { NULL };\n");
}
return true;
}
@@ -126,11 +240,13 @@ int main(int argc, char const *argv[])
if (argc != 2) return 1;
const char* module = argv[1];
Descriptor const* descriptor = IncidentProto::descriptor();
if (strcmp(module, "incident") == 0) {
return !generateIncidentSectionsCpp();
return !generateIncidentSectionsCpp(descriptor);
}
if (strcmp(module, "incidentd") == 0 ) {
return !generateSectionListCpp();
return !generateSectionListCpp(descriptor);
}
// return failure if not called by the whitelisted modules