From bbdd67d19f4912fbec00220b22e44c68eff5ab3f Mon Sep 17 00:00:00 2001 From: Yao Chen Date: Wed, 24 Oct 2018 12:15:56 -0700 Subject: [PATCH] Allow atoms to log fields in bytes format. There are an increasing number of requests to log data in complex format to statsd, while the data is not expected to be parsed or aggregated by statsd and only to be uploaded as events. Instead of making an exception for each of these cases in a hard coded way, this CL add a feature to annotate these field in atoms.proto and the stats-log-api-gen tool will produce byte array interfaces for them. Note that log_msg does not have byte array type, and only has string type, when statsd receives the log, these fields are in string type. Only when the atom is written to proto, we will check if this field should be bytes field and write it to protobuf in message format. Change-Id: If53dd95c5826710c76d7fe982bf951a435dfc738 Fix: 118386797 Test: unit test & manual test --- cmds/statsd/src/atom_field_options.proto | 12 ++++ cmds/statsd/src/stats_log_util.cpp | 40 +++++++++--- tools/stats_log_api_gen/Collation.cpp | 47 +++++++++---- tools/stats_log_api_gen/Collation.h | 2 + tools/stats_log_api_gen/main.cpp | 76 +++++++++++++++++++++- tools/stats_log_api_gen/test.proto | 22 +++++++ tools/stats_log_api_gen/test_collation.cpp | 14 ++++ 7 files changed, 190 insertions(+), 23 deletions(-) diff --git a/cmds/statsd/src/atom_field_options.proto b/cmds/statsd/src/atom_field_options.proto index 453bf7e126cde..e33bd8c97a079 100644 --- a/cmds/statsd/src/atom_field_options.proto +++ b/cmds/statsd/src/atom_field_options.proto @@ -64,10 +64,22 @@ message StateAtomFieldOption { optional StateField option = 1 [default = STATE_FIELD_UNSET]; } +// Used to generate StatsLog.write APIs. +enum LogMode { + MODE_UNSET = 0; + // Log fields as their actual types e.g., all primary data types. + // Or fields that are hardcoded in stats_log_api_gen tool e.g., AttributionNode + MODE_AUTOMATIC = 1; + // Log fields in their proto binary format. These fields will not be parsed in statsd + MODE_BYTES = 2; +} + extend google.protobuf.FieldOptions { // Flags to decorate an atom that presents a state change. optional StateAtomFieldOption state_field_option = 50000; // Flags to decorate the uid fields in an atom. optional bool is_uid = 50001 [default = false]; + + optional LogMode log_mode = 50002 [default = MODE_AUTOMATIC]; } \ No newline at end of file diff --git a/cmds/statsd/src/stats_log_util.cpp b/cmds/statsd/src/stats_log_util.cpp index 805e5833c1ebd..2498d9f32d06d 100644 --- a/cmds/statsd/src/stats_log_util.cpp +++ b/cmds/statsd/src/stats_log_util.cpp @@ -25,15 +25,16 @@ #include #include +using android::util::AtomsInfo; using android::util::FIELD_COUNT_REPEATED; using android::util::FIELD_TYPE_BOOL; +using android::util::FIELD_TYPE_FIXED64; using android::util::FIELD_TYPE_FLOAT; using android::util::FIELD_TYPE_INT32; using android::util::FIELD_TYPE_INT64; -using android::util::FIELD_TYPE_UINT64; -using android::util::FIELD_TYPE_FIXED64; using android::util::FIELD_TYPE_MESSAGE; using android::util::FIELD_TYPE_STRING; +using android::util::FIELD_TYPE_UINT64; using android::util::ProtoOutputStream; namespace android { @@ -294,8 +295,9 @@ void writeDimensionPathToProto(const std::vector& fieldMatchers, // } // // -void writeFieldValueTreeToStreamHelper(const std::vector& dims, size_t* index, - int depth, int prefix, ProtoOutputStream* protoOutput) { +void writeFieldValueTreeToStreamHelper(int tagId, const std::vector& dims, + size_t* index, int depth, int prefix, + ProtoOutputStream* protoOutput) { size_t count = dims.size(); while (*index < count) { const auto& dim = dims[*index]; @@ -319,9 +321,31 @@ void writeFieldValueTreeToStreamHelper(const std::vector& dims, size case FLOAT: protoOutput->write(FIELD_TYPE_FLOAT | fieldNum, dim.mValue.float_value); break; - case STRING: - protoOutput->write(FIELD_TYPE_STRING | fieldNum, dim.mValue.str_value); + case STRING: { + bool isBytesField = false; + // Bytes field is logged via string format in log_msg format. So here we check + // if this string field is a byte field. + std::map>::const_iterator itr; + if (depth == 0 && (itr = AtomsInfo::kBytesFieldAtoms.find(tagId)) != + AtomsInfo::kBytesFieldAtoms.end()) { + const std::vector& bytesFields = itr->second; + for (int bytesField : bytesFields) { + if (bytesField == fieldNum) { + // This is a bytes field + isBytesField = true; + break; + } + } + } + if (isBytesField) { + protoOutput->write(FIELD_TYPE_MESSAGE | fieldNum, + (const char*)dim.mValue.str_value.c_str(), + dim.mValue.str_value.length()); + } else { + protoOutput->write(FIELD_TYPE_STRING | fieldNum, dim.mValue.str_value); + } break; + } case STORAGE: protoOutput->write(FIELD_TYPE_MESSAGE | fieldNum, (const char*)dim.mValue.storage_value.data(), @@ -342,7 +366,7 @@ void writeFieldValueTreeToStreamHelper(const std::vector& dims, size } // Directly jump to the leaf value because the repeated position field is implied // by the position of the sub msg in the parent field. - writeFieldValueTreeToStreamHelper(dims, index, valueDepth, + writeFieldValueTreeToStreamHelper(tagId, dims, index, valueDepth, dim.mField.getPrefix(valueDepth), protoOutput); if (msg_token != 0) { protoOutput->end(msg_token); @@ -359,7 +383,7 @@ void writeFieldValueTreeToStream(int tagId, const std::vector& value uint64_t atomToken = protoOutput->start(FIELD_TYPE_MESSAGE | tagId); size_t index = 0; - writeFieldValueTreeToStreamHelper(values, &index, 0, 0, protoOutput); + writeFieldValueTreeToStreamHelper(tagId, values, &index, 0, 0, protoOutput); protoOutput->end(atomToken); } diff --git a/tools/stats_log_api_gen/Collation.cpp b/tools/stats_log_api_gen/Collation.cpp index d1f42f8b398e7..257043b307049 100644 --- a/tools/stats_log_api_gen/Collation.cpp +++ b/tools/stats_log_api_gen/Collation.cpp @@ -47,7 +47,8 @@ AtomDecl::AtomDecl(const AtomDecl& that) fields(that.fields), primaryFields(that.primaryFields), exclusiveField(that.exclusiveField), - uidField(that.uidField) {} + uidField(that.uidField), + binaryFields(that.binaryFields) {} AtomDecl::AtomDecl(int c, const string& n, const string& m) :code(c), @@ -119,6 +120,9 @@ java_type(const FieldDescriptor* field) } else if (field->message_type()->full_name() == "android.os.statsd.KeyValuePair") { return JAVA_TYPE_KEY_VALUE_PAIR; + } else if (field->options().GetExtension(os::statsd::log_mode) == + os::statsd::LogMode::MODE_BYTES) { + return JAVA_TYPE_BYTE_ARRAY; } else { return JAVA_TYPE_OBJECT; } @@ -188,6 +192,8 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl, for (map::const_iterator it = fields.begin(); it != fields.end(); it++) { const FieldDescriptor *field = it->second; + bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) == + os::statsd::LogMode::MODE_BYTES; java_type_t javaType = java_type(field); @@ -197,17 +203,24 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl, continue; } else if (javaType == JAVA_TYPE_OBJECT && atomDecl->code < PULL_ATOM_START_ID) { - // Allow attribution chain, but only at position 1. - print_error(field, - "Message type not allowed for field in pushed atoms: %s\n", - field->name().c_str()); - errorCount++; - continue; - } else if (javaType == JAVA_TYPE_BYTE_ARRAY) { - print_error(field, "Raw bytes type not allowed for field: %s\n", - field->name().c_str()); - errorCount++; - continue; + // Allow attribution chain, but only at position 1. + print_error(field, + "Message type not allowed for field in pushed atoms: %s\n", + field->name().c_str()); + errorCount++; + continue; + } else if (javaType == JAVA_TYPE_BYTE_ARRAY && !isBinaryField) { + print_error(field, "Raw bytes type not allowed for field: %s\n", + field->name().c_str()); + errorCount++; + continue; + } + + if (isBinaryField && javaType != JAVA_TYPE_BYTE_ARRAY) { + print_error(field, "Cannot mark field %s as bytes.\n", + field->name().c_str()); + errorCount++; + continue; } } @@ -233,6 +246,8 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl, it != fields.end(); it++) { const FieldDescriptor *field = it->second; java_type_t javaType = java_type(field); + bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) == + os::statsd::LogMode::MODE_BYTES; AtomField atField(field->name(), javaType); // Generate signature for pushed atoms @@ -241,8 +256,10 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl, // All enums are treated as ints when it comes to function signatures. signature->push_back(JAVA_TYPE_INT); collate_enums(*field->enum_type(), &atField); + } else if (javaType == JAVA_TYPE_OBJECT && isBinaryField) { + signature->push_back(JAVA_TYPE_BYTE_ARRAY); } else { - signature->push_back(javaType); + signature->push_back(javaType); } } if (javaType == JAVA_TYPE_ENUM) { @@ -287,6 +304,10 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl, errorCount++; } } + // Binary field validity is already checked above. + if (isBinaryField) { + atomDecl->binaryFields.push_back(it->first); + } } return errorCount; diff --git a/tools/stats_log_api_gen/Collation.h b/tools/stats_log_api_gen/Collation.h index 31b8b07472ccf..450b30547c21f 100644 --- a/tools/stats_log_api_gen/Collation.h +++ b/tools/stats_log_api_gen/Collation.h @@ -89,6 +89,8 @@ struct AtomDecl { int uidField = 0; + vector binaryFields; + AtomDecl(); AtomDecl(const AtomDecl& that); AtomDecl(int code, const string& name, const string& message); diff --git a/tools/stats_log_api_gen/main.cpp b/tools/stats_log_api_gen/main.cpp index 56c8428051909..1ef34b9c22ebf 100644 --- a/tools/stats_log_api_gen/main.cpp +++ b/tools/stats_log_api_gen/main.cpp @@ -66,6 +66,8 @@ cpp_type_name(java_type_t type) return "double"; case JAVA_TYPE_STRING: return "char const*"; + case JAVA_TYPE_BYTE_ARRAY: + return "char const*"; default: return "UNKNOWN"; } @@ -88,6 +90,8 @@ java_type_name(java_type_t type) return "double"; case JAVA_TYPE_STRING: return "java.lang.String"; + case JAVA_TYPE_BYTE_ARRAY: + return "byte[]"; default: return "UNKNOWN"; } @@ -198,13 +202,40 @@ static int write_stats_log_cpp(FILE *out, const Atoms &atoms, } fprintf(out, " return options;\n"); - fprintf(out, " }\n"); + fprintf(out, "}\n"); fprintf(out, "const std::map " "AtomsInfo::kStateAtomsFieldOptions = " "getStateAtomFieldOptions();\n"); + fprintf(out, + "static std::map> " + "getBinaryFieldAtoms() {\n"); + fprintf(out, " std::map> options;\n"); + for (set::const_iterator atom = atoms.decls.begin(); + atom != atoms.decls.end(); atom++) { + if (atom->binaryFields.size() == 0) { + continue; + } + fprintf(out, + "\n // Adding binary fields for atom " + "(%d)%s\n", + atom->code, atom->name.c_str()); + + for (const auto& field : atom->binaryFields) { + fprintf(out, " options[static_cast(%s)].push_back(%d);\n", + make_constant_name(atom->name).c_str(), field); + } + } + + fprintf(out, " return options;\n"); + fprintf(out, "}\n"); + + fprintf(out, + "const std::map> " + "AtomsInfo::kBytesFieldAtoms = " + "getBinaryFieldAtoms();\n"); fprintf(out, "int64_t lastRetryTimestampNs = -1;\n"); fprintf(out, "const int64_t kMinRetryIntervalNs = NS_PER_SEC * 60 * 20; // 20 minutes\n"); @@ -664,6 +695,9 @@ write_stats_log_header(FILE* out, const Atoms& atoms, const AtomDecl &attributio fprintf(out, " const static std::map " "kStateAtomsFieldOptions;\n"); + fprintf(out, + " const static std::map> " + "kBytesFieldAtoms;"); fprintf(out, "};\n"); fprintf(out, "const static int kMaxPushedAtomId = %d;\n\n", @@ -698,6 +732,8 @@ static void write_java_usage(FILE* out, const string& method_name, const string& fprintf(out, ", android.os.WorkSource workSource"); } else if (field->javaType == JAVA_TYPE_KEY_VALUE_PAIR) { fprintf(out, ", SparseArray value_map"); + } else if (field->javaType == JAVA_TYPE_BYTE_ARRAY) { + fprintf(out, ", byte[] %s", field->name.c_str()); } else { fprintf(out, ", %s %s", java_type_name(field->javaType), field->name.c_str()); } @@ -890,6 +926,8 @@ jni_type_name(java_type_t type) return "jdouble"; case JAVA_TYPE_STRING: return "jstring"; + case JAVA_TYPE_BYTE_ARRAY: + return "jbyteArray"; default: return "UNKNOWN"; } @@ -942,6 +980,9 @@ jni_function_name(const string& method_name, const vector& signatur case JAVA_TYPE_KEY_VALUE_PAIR: result += "_KeyValuePairs"; break; + case JAVA_TYPE_BYTE_ARRAY: + result += "_bytes"; + break; default: result += "_UNKNOWN"; break; @@ -967,6 +1008,8 @@ java_type_signature(java_type_t type) return "D"; case JAVA_TYPE_STRING: return "Ljava/lang/String;"; + case JAVA_TYPE_BYTE_ARRAY: + return "[B"; default: return "UNKNOWN"; } @@ -1081,6 +1124,25 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp fprintf(out, " } else {\n"); fprintf(out, " str%d = NULL;\n", argIndex); fprintf(out, " }\n"); + } else if (*arg == JAVA_TYPE_BYTE_ARRAY) { + hadStringOrChain = true; + fprintf(out, " jbyte* jbyte_array%d;\n", argIndex); + fprintf(out, " const char* str%d;\n", argIndex); + fprintf(out, " if (arg%d != NULL) {\n", argIndex); + fprintf(out, + " jbyte_array%d = " + "env->GetByteArrayElements(arg%d, NULL);\n", + argIndex, argIndex); + fprintf(out, + " str%d = " + "reinterpret_cast(env->GetByteArrayElements(arg%" + "d, NULL));\n", + argIndex, argIndex); + fprintf(out, " } else {\n"); + fprintf(out, " jbyte_array%d = NULL;\n", argIndex); + fprintf(out, " str%d = NULL;\n", argIndex); + fprintf(out, " }\n"); + } else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) { hadStringOrChain = true; for (auto chainField : attributionDecl.fields) { @@ -1154,7 +1216,10 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp } else if (*arg == JAVA_TYPE_KEY_VALUE_PAIR) { fprintf(out, ", int32_t_map, int64_t_map, string_map, float_map"); } else { - const char *argName = (*arg == JAVA_TYPE_STRING) ? "str" : "arg"; + const char* argName = (*arg == JAVA_TYPE_STRING || + *arg == JAVA_TYPE_BYTE_ARRAY) + ? "str" + : "arg"; fprintf(out, ", (%s)%s%d", cpp_type_name(*arg), argName, argIndex); } argIndex++; @@ -1171,6 +1236,13 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp fprintf(out, " env->ReleaseStringUTFChars(arg%d, str%d);\n", argIndex, argIndex); fprintf(out, " }\n"); + } else if (*arg == JAVA_TYPE_BYTE_ARRAY) { + fprintf(out, " if (str%d != NULL) { \n", argIndex); + fprintf(out, + " env->ReleaseByteArrayElements(arg%d, " + "jbyte_array%d, 0);\n", + argIndex, argIndex); + fprintf(out, " }\n"); } else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) { for (auto chainField : attributionDecl.fields) { if (chainField.javaType == JAVA_TYPE_INT) { diff --git a/tools/stats_log_api_gen/test.proto b/tools/stats_log_api_gen/test.proto index f6359748a27ea..3be87d95df159 100644 --- a/tools/stats_log_api_gen/test.proto +++ b/tools/stats_log_api_gen/test.proto @@ -109,6 +109,28 @@ message BadAttributionNodePosition { oneof event { BadAttributionNodePositionAtom bad = 1; } } +message GoodEventWithBinaryFieldAtom { + oneof event { GoodBinaryFieldAtom field1 = 1; } +} + +message ComplexField { + optional string str = 1; +} + +message GoodBinaryFieldAtom { + optional int32 field1 = 1; + optional ComplexField bf = 2 [(android.os.statsd.log_mode) = MODE_BYTES]; +} + +message BadEventWithBinaryFieldAtom { + oneof event { BadBinaryFieldAtom field1 = 1; } +} + +message BadBinaryFieldAtom { + optional int32 field1 = 1; + optional ComplexField bf = 2; +} + message BadStateAtoms { oneof event { BadStateAtom1 bad1 = 1; diff --git a/tools/stats_log_api_gen/test_collation.cpp b/tools/stats_log_api_gen/test_collation.cpp index 1936d9667948b..ad3bffacd4426 100644 --- a/tools/stats_log_api_gen/test_collation.cpp +++ b/tools/stats_log_api_gen/test_collation.cpp @@ -212,5 +212,19 @@ TEST(CollationTest, PassOnGoodStateAtomOptions) { EXPECT_EQ(0, errorCount); } +TEST(CollationTest, PassOnGoodBinaryFieldAtom) { + Atoms atoms; + int errorCount = + collate_atoms(GoodEventWithBinaryFieldAtom::descriptor(), &atoms); + EXPECT_EQ(0, errorCount); +} + +TEST(CollationTest, FailOnBadBinaryFieldAtom) { + Atoms atoms; + int errorCount = + collate_atoms(BadEventWithBinaryFieldAtom::descriptor(), &atoms); + EXPECT_TRUE(errorCount > 0); +} + } // namespace stats_log_api_gen } // namespace android \ No newline at end of file