Allow atoms to log fields in bytes format.

There are an increasing number of requests to log data in complex format to statsd, while the data
is not expected to be parsed or aggregated by statsd and only to be uploaded as events.

Instead of making an exception for each of these cases in a hard coded way, this CL add a feature to
annotate these field in atoms.proto and the stats-log-api-gen tool will produce byte array
interfaces for them.

Note that log_msg does not have byte array type, and only has string type, when statsd receives the
log, these fields are in string type. Only when the atom is written to proto, we will check if this
field should be bytes field and write it to protobuf in message format.

Change-Id: If53dd95c5826710c76d7fe982bf951a435dfc738
Fix: 118386797
Test: unit test & manual test
This commit is contained in:
Yao Chen
2018-10-24 12:15:56 -07:00
parent 7a1b30e9db
commit bbdd67d19f
7 changed files with 190 additions and 23 deletions

View File

@@ -64,10 +64,22 @@ message StateAtomFieldOption {
optional StateField option = 1 [default = STATE_FIELD_UNSET];
}
// Used to generate StatsLog.write APIs.
enum LogMode {
MODE_UNSET = 0;
// Log fields as their actual types e.g., all primary data types.
// Or fields that are hardcoded in stats_log_api_gen tool e.g., AttributionNode
MODE_AUTOMATIC = 1;
// Log fields in their proto binary format. These fields will not be parsed in statsd
MODE_BYTES = 2;
}
extend google.protobuf.FieldOptions {
// Flags to decorate an atom that presents a state change.
optional StateAtomFieldOption state_field_option = 50000;
// Flags to decorate the uid fields in an atom.
optional bool is_uid = 50001 [default = false];
optional LogMode log_mode = 50002 [default = MODE_AUTOMATIC];
}

View File

@@ -25,15 +25,16 @@
#include <utils/Log.h>
#include <utils/SystemClock.h>
using android::util::AtomsInfo;
using android::util::FIELD_COUNT_REPEATED;
using android::util::FIELD_TYPE_BOOL;
using android::util::FIELD_TYPE_FIXED64;
using android::util::FIELD_TYPE_FLOAT;
using android::util::FIELD_TYPE_INT32;
using android::util::FIELD_TYPE_INT64;
using android::util::FIELD_TYPE_UINT64;
using android::util::FIELD_TYPE_FIXED64;
using android::util::FIELD_TYPE_MESSAGE;
using android::util::FIELD_TYPE_STRING;
using android::util::FIELD_TYPE_UINT64;
using android::util::ProtoOutputStream;
namespace android {
@@ -294,8 +295,9 @@ void writeDimensionPathToProto(const std::vector<Matcher>& fieldMatchers,
// }
//
//
void writeFieldValueTreeToStreamHelper(const std::vector<FieldValue>& dims, size_t* index,
int depth, int prefix, ProtoOutputStream* protoOutput) {
void writeFieldValueTreeToStreamHelper(int tagId, const std::vector<FieldValue>& dims,
size_t* index, int depth, int prefix,
ProtoOutputStream* protoOutput) {
size_t count = dims.size();
while (*index < count) {
const auto& dim = dims[*index];
@@ -319,9 +321,31 @@ void writeFieldValueTreeToStreamHelper(const std::vector<FieldValue>& dims, size
case FLOAT:
protoOutput->write(FIELD_TYPE_FLOAT | fieldNum, dim.mValue.float_value);
break;
case STRING:
protoOutput->write(FIELD_TYPE_STRING | fieldNum, dim.mValue.str_value);
case STRING: {
bool isBytesField = false;
// Bytes field is logged via string format in log_msg format. So here we check
// if this string field is a byte field.
std::map<int, std::vector<int>>::const_iterator itr;
if (depth == 0 && (itr = AtomsInfo::kBytesFieldAtoms.find(tagId)) !=
AtomsInfo::kBytesFieldAtoms.end()) {
const std::vector<int>& bytesFields = itr->second;
for (int bytesField : bytesFields) {
if (bytesField == fieldNum) {
// This is a bytes field
isBytesField = true;
break;
}
}
}
if (isBytesField) {
protoOutput->write(FIELD_TYPE_MESSAGE | fieldNum,
(const char*)dim.mValue.str_value.c_str(),
dim.mValue.str_value.length());
} else {
protoOutput->write(FIELD_TYPE_STRING | fieldNum, dim.mValue.str_value);
}
break;
}
case STORAGE:
protoOutput->write(FIELD_TYPE_MESSAGE | fieldNum,
(const char*)dim.mValue.storage_value.data(),
@@ -342,7 +366,7 @@ void writeFieldValueTreeToStreamHelper(const std::vector<FieldValue>& dims, size
}
// Directly jump to the leaf value because the repeated position field is implied
// by the position of the sub msg in the parent field.
writeFieldValueTreeToStreamHelper(dims, index, valueDepth,
writeFieldValueTreeToStreamHelper(tagId, dims, index, valueDepth,
dim.mField.getPrefix(valueDepth), protoOutput);
if (msg_token != 0) {
protoOutput->end(msg_token);
@@ -359,7 +383,7 @@ void writeFieldValueTreeToStream(int tagId, const std::vector<FieldValue>& value
uint64_t atomToken = protoOutput->start(FIELD_TYPE_MESSAGE | tagId);
size_t index = 0;
writeFieldValueTreeToStreamHelper(values, &index, 0, 0, protoOutput);
writeFieldValueTreeToStreamHelper(tagId, values, &index, 0, 0, protoOutput);
protoOutput->end(atomToken);
}

View File

@@ -47,7 +47,8 @@ AtomDecl::AtomDecl(const AtomDecl& that)
fields(that.fields),
primaryFields(that.primaryFields),
exclusiveField(that.exclusiveField),
uidField(that.uidField) {}
uidField(that.uidField),
binaryFields(that.binaryFields) {}
AtomDecl::AtomDecl(int c, const string& n, const string& m)
:code(c),
@@ -119,6 +120,9 @@ java_type(const FieldDescriptor* field)
} else if (field->message_type()->full_name() ==
"android.os.statsd.KeyValuePair") {
return JAVA_TYPE_KEY_VALUE_PAIR;
} else if (field->options().GetExtension(os::statsd::log_mode) ==
os::statsd::LogMode::MODE_BYTES) {
return JAVA_TYPE_BYTE_ARRAY;
} else {
return JAVA_TYPE_OBJECT;
}
@@ -188,6 +192,8 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
for (map<int, const FieldDescriptor *>::const_iterator it = fields.begin();
it != fields.end(); it++) {
const FieldDescriptor *field = it->second;
bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) ==
os::statsd::LogMode::MODE_BYTES;
java_type_t javaType = java_type(field);
@@ -197,17 +203,24 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
continue;
} else if (javaType == JAVA_TYPE_OBJECT &&
atomDecl->code < PULL_ATOM_START_ID) {
// Allow attribution chain, but only at position 1.
print_error(field,
"Message type not allowed for field in pushed atoms: %s\n",
field->name().c_str());
errorCount++;
continue;
} else if (javaType == JAVA_TYPE_BYTE_ARRAY) {
print_error(field, "Raw bytes type not allowed for field: %s\n",
field->name().c_str());
errorCount++;
continue;
// Allow attribution chain, but only at position 1.
print_error(field,
"Message type not allowed for field in pushed atoms: %s\n",
field->name().c_str());
errorCount++;
continue;
} else if (javaType == JAVA_TYPE_BYTE_ARRAY && !isBinaryField) {
print_error(field, "Raw bytes type not allowed for field: %s\n",
field->name().c_str());
errorCount++;
continue;
}
if (isBinaryField && javaType != JAVA_TYPE_BYTE_ARRAY) {
print_error(field, "Cannot mark field %s as bytes.\n",
field->name().c_str());
errorCount++;
continue;
}
}
@@ -233,6 +246,8 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
it != fields.end(); it++) {
const FieldDescriptor *field = it->second;
java_type_t javaType = java_type(field);
bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) ==
os::statsd::LogMode::MODE_BYTES;
AtomField atField(field->name(), javaType);
// Generate signature for pushed atoms
@@ -241,8 +256,10 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
// All enums are treated as ints when it comes to function signatures.
signature->push_back(JAVA_TYPE_INT);
collate_enums(*field->enum_type(), &atField);
} else if (javaType == JAVA_TYPE_OBJECT && isBinaryField) {
signature->push_back(JAVA_TYPE_BYTE_ARRAY);
} else {
signature->push_back(javaType);
signature->push_back(javaType);
}
}
if (javaType == JAVA_TYPE_ENUM) {
@@ -287,6 +304,10 @@ int collate_atom(const Descriptor *atom, AtomDecl *atomDecl,
errorCount++;
}
}
// Binary field validity is already checked above.
if (isBinaryField) {
atomDecl->binaryFields.push_back(it->first);
}
}
return errorCount;

View File

@@ -89,6 +89,8 @@ struct AtomDecl {
int uidField = 0;
vector<int> binaryFields;
AtomDecl();
AtomDecl(const AtomDecl& that);
AtomDecl(int code, const string& name, const string& message);

View File

@@ -66,6 +66,8 @@ cpp_type_name(java_type_t type)
return "double";
case JAVA_TYPE_STRING:
return "char const*";
case JAVA_TYPE_BYTE_ARRAY:
return "char const*";
default:
return "UNKNOWN";
}
@@ -88,6 +90,8 @@ java_type_name(java_type_t type)
return "double";
case JAVA_TYPE_STRING:
return "java.lang.String";
case JAVA_TYPE_BYTE_ARRAY:
return "byte[]";
default:
return "UNKNOWN";
}
@@ -198,13 +202,40 @@ static int write_stats_log_cpp(FILE *out, const Atoms &atoms,
}
fprintf(out, " return options;\n");
fprintf(out, " }\n");
fprintf(out, "}\n");
fprintf(out,
"const std::map<int, StateAtomFieldOptions> "
"AtomsInfo::kStateAtomsFieldOptions = "
"getStateAtomFieldOptions();\n");
fprintf(out,
"static std::map<int, std::vector<int>> "
"getBinaryFieldAtoms() {\n");
fprintf(out, " std::map<int, std::vector<int>> options;\n");
for (set<AtomDecl>::const_iterator atom = atoms.decls.begin();
atom != atoms.decls.end(); atom++) {
if (atom->binaryFields.size() == 0) {
continue;
}
fprintf(out,
"\n // Adding binary fields for atom "
"(%d)%s\n",
atom->code, atom->name.c_str());
for (const auto& field : atom->binaryFields) {
fprintf(out, " options[static_cast<int>(%s)].push_back(%d);\n",
make_constant_name(atom->name).c_str(), field);
}
}
fprintf(out, " return options;\n");
fprintf(out, "}\n");
fprintf(out,
"const std::map<int, std::vector<int>> "
"AtomsInfo::kBytesFieldAtoms = "
"getBinaryFieldAtoms();\n");
fprintf(out, "int64_t lastRetryTimestampNs = -1;\n");
fprintf(out, "const int64_t kMinRetryIntervalNs = NS_PER_SEC * 60 * 20; // 20 minutes\n");
@@ -664,6 +695,9 @@ write_stats_log_header(FILE* out, const Atoms& atoms, const AtomDecl &attributio
fprintf(out,
" const static std::map<int, StateAtomFieldOptions> "
"kStateAtomsFieldOptions;\n");
fprintf(out,
" const static std::map<int, std::vector<int>> "
"kBytesFieldAtoms;");
fprintf(out, "};\n");
fprintf(out, "const static int kMaxPushedAtomId = %d;\n\n",
@@ -698,6 +732,8 @@ static void write_java_usage(FILE* out, const string& method_name, const string&
fprintf(out, ", android.os.WorkSource workSource");
} else if (field->javaType == JAVA_TYPE_KEY_VALUE_PAIR) {
fprintf(out, ", SparseArray<Object> value_map");
} else if (field->javaType == JAVA_TYPE_BYTE_ARRAY) {
fprintf(out, ", byte[] %s", field->name.c_str());
} else {
fprintf(out, ", %s %s", java_type_name(field->javaType), field->name.c_str());
}
@@ -890,6 +926,8 @@ jni_type_name(java_type_t type)
return "jdouble";
case JAVA_TYPE_STRING:
return "jstring";
case JAVA_TYPE_BYTE_ARRAY:
return "jbyteArray";
default:
return "UNKNOWN";
}
@@ -942,6 +980,9 @@ jni_function_name(const string& method_name, const vector<java_type_t>& signatur
case JAVA_TYPE_KEY_VALUE_PAIR:
result += "_KeyValuePairs";
break;
case JAVA_TYPE_BYTE_ARRAY:
result += "_bytes";
break;
default:
result += "_UNKNOWN";
break;
@@ -967,6 +1008,8 @@ java_type_signature(java_type_t type)
return "D";
case JAVA_TYPE_STRING:
return "Ljava/lang/String;";
case JAVA_TYPE_BYTE_ARRAY:
return "[B";
default:
return "UNKNOWN";
}
@@ -1081,6 +1124,25 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp
fprintf(out, " } else {\n");
fprintf(out, " str%d = NULL;\n", argIndex);
fprintf(out, " }\n");
} else if (*arg == JAVA_TYPE_BYTE_ARRAY) {
hadStringOrChain = true;
fprintf(out, " jbyte* jbyte_array%d;\n", argIndex);
fprintf(out, " const char* str%d;\n", argIndex);
fprintf(out, " if (arg%d != NULL) {\n", argIndex);
fprintf(out,
" jbyte_array%d = "
"env->GetByteArrayElements(arg%d, NULL);\n",
argIndex, argIndex);
fprintf(out,
" str%d = "
"reinterpret_cast<char*>(env->GetByteArrayElements(arg%"
"d, NULL));\n",
argIndex, argIndex);
fprintf(out, " } else {\n");
fprintf(out, " jbyte_array%d = NULL;\n", argIndex);
fprintf(out, " str%d = NULL;\n", argIndex);
fprintf(out, " }\n");
} else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) {
hadStringOrChain = true;
for (auto chainField : attributionDecl.fields) {
@@ -1154,7 +1216,10 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp
} else if (*arg == JAVA_TYPE_KEY_VALUE_PAIR) {
fprintf(out, ", int32_t_map, int64_t_map, string_map, float_map");
} else {
const char *argName = (*arg == JAVA_TYPE_STRING) ? "str" : "arg";
const char* argName = (*arg == JAVA_TYPE_STRING ||
*arg == JAVA_TYPE_BYTE_ARRAY)
? "str"
: "arg";
fprintf(out, ", (%s)%s%d", cpp_type_name(*arg), argName, argIndex);
}
argIndex++;
@@ -1171,6 +1236,13 @@ write_stats_log_jni(FILE* out, const string& java_method_name, const string& cpp
fprintf(out, " env->ReleaseStringUTFChars(arg%d, str%d);\n",
argIndex, argIndex);
fprintf(out, " }\n");
} else if (*arg == JAVA_TYPE_BYTE_ARRAY) {
fprintf(out, " if (str%d != NULL) { \n", argIndex);
fprintf(out,
" env->ReleaseByteArrayElements(arg%d, "
"jbyte_array%d, 0);\n",
argIndex, argIndex);
fprintf(out, " }\n");
} else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) {
for (auto chainField : attributionDecl.fields) {
if (chainField.javaType == JAVA_TYPE_INT) {

View File

@@ -109,6 +109,28 @@ message BadAttributionNodePosition {
oneof event { BadAttributionNodePositionAtom bad = 1; }
}
message GoodEventWithBinaryFieldAtom {
oneof event { GoodBinaryFieldAtom field1 = 1; }
}
message ComplexField {
optional string str = 1;
}
message GoodBinaryFieldAtom {
optional int32 field1 = 1;
optional ComplexField bf = 2 [(android.os.statsd.log_mode) = MODE_BYTES];
}
message BadEventWithBinaryFieldAtom {
oneof event { BadBinaryFieldAtom field1 = 1; }
}
message BadBinaryFieldAtom {
optional int32 field1 = 1;
optional ComplexField bf = 2;
}
message BadStateAtoms {
oneof event {
BadStateAtom1 bad1 = 1;

View File

@@ -212,5 +212,19 @@ TEST(CollationTest, PassOnGoodStateAtomOptions) {
EXPECT_EQ(0, errorCount);
}
TEST(CollationTest, PassOnGoodBinaryFieldAtom) {
Atoms atoms;
int errorCount =
collate_atoms(GoodEventWithBinaryFieldAtom::descriptor(), &atoms);
EXPECT_EQ(0, errorCount);
}
TEST(CollationTest, FailOnBadBinaryFieldAtom) {
Atoms atoms;
int errorCount =
collate_atoms(BadEventWithBinaryFieldAtom::descriptor(), &atoms);
EXPECT_TRUE(errorCount > 0);
}
} // namespace stats_log_api_gen
} // namespace android