Merge "NetdEventListener: add rolling log of connect and dns stats"

am: 64ef2a4d28

Change-Id: I7f25f4ed42f0aecea7a1bb09b8c65a5d2e4a84cd
This commit is contained in:
Hugo Benichi
2017-10-24 00:14:48 +00:00
committed by android-build-merger
4 changed files with 307 additions and 64 deletions

View File

@@ -20,6 +20,7 @@ import android.net.NetworkCapabilities;
import android.system.OsConstants;
import android.util.IntArray;
import android.util.SparseIntArray;
import com.android.internal.util.BitUtils;
import com.android.internal.util.TokenBucket;
@@ -43,6 +44,8 @@ public class ConnectStats {
public final TokenBucket mLatencyTb;
/** Maximum number of latency values recorded. */
public final int mMaxLatencyRecords;
/** Total count of events */
public int eventCount = 0;
/** Total count of successful connects. */
public int connectCount = 0;
/** Total count of successful connects done in blocking mode. */
@@ -57,12 +60,15 @@ public class ConnectStats {
mMaxLatencyRecords = maxLatencyRecords;
}
public void addEvent(int errno, int latencyMs, String ipAddr) {
boolean addEvent(int errno, int latencyMs, String ipAddr) {
eventCount++;
if (isSuccess(errno)) {
countConnect(errno, ipAddr);
countLatency(errno, latencyMs);
return true;
} else {
countError(errno);
return false;
}
}
@@ -101,7 +107,7 @@ public class ConnectStats {
return (errno == 0) || isNonBlocking(errno);
}
private static boolean isNonBlocking(int errno) {
static boolean isNonBlocking(int errno) {
// On non-blocking TCP sockets, connect() immediately returns EINPROGRESS.
// On non-blocking TCP sockets that are connecting, connect() immediately returns EALREADY.
return (errno == EINPROGRESS) || (errno == EALREADY);
@@ -117,6 +123,7 @@ public class ConnectStats {
for (int t : BitUtils.unpackBits(transports)) {
builder.append(NetworkCapabilities.transportNameOf(t)).append(", ");
}
builder.append(String.format("%d events, ", eventCount));
builder.append(String.format("%d success, ", connectCount));
builder.append(String.format("%d blocking, ", connectBlockingCount));
builder.append(String.format("%d IPv6 dst", ipv6ConnectCount));

View File

@@ -17,11 +17,13 @@
package android.net.metrics;
import android.net.NetworkCapabilities;
import java.util.Arrays;
import com.android.internal.util.BitUtils;
import java.util.Arrays;
/**
* A DNS event recorded by NetdEventListenerService.
* A batch of DNS events recorded by NetdEventListenerService for a specific network.
* {@hide}
*/
final public class DnsEvent {
@@ -38,6 +40,8 @@ final public class DnsEvent {
// the eventTypes, returnCodes, and latenciesMs arrays have the same length and the i-th event
// is spread across the three array at position i.
public int eventCount;
// The number of successful DNS queries recorded.
public int successCount;
// The types of DNS queries as defined in INetdEventListener.
public byte[] eventTypes;
// Current getaddrinfo codes go from 1 to EAI_MAX = 15. gethostbyname returns errno, but there
@@ -54,10 +58,11 @@ final public class DnsEvent {
latenciesMs = new int[initialCapacity];
}
public void addResult(byte eventType, byte returnCode, int latencyMs) {
boolean addResult(byte eventType, byte returnCode, int latencyMs) {
boolean isSuccess = (returnCode == 0);
if (eventCount >= SIZE_LIMIT) {
// TODO: implement better rate limiting that does not biases metrics.
return;
return isSuccess;
}
if (eventCount == eventTypes.length) {
resize((int) (1.4 * eventCount));
@@ -66,6 +71,10 @@ final public class DnsEvent {
returnCodes[eventCount] = returnCode;
latenciesMs[eventCount] = latencyMs;
eventCount++;
if (isSuccess) {
successCount++;
}
return isSuccess;
}
public void resize(int newLength) {
@@ -80,6 +89,8 @@ final public class DnsEvent {
for (int t : BitUtils.unpackBits(transports)) {
builder.append(NetworkCapabilities.transportNameOf(t)).append(", ");
}
return builder.append(eventCount).append(" events)").toString();
builder.append(String.format("%d events, ", eventCount));
builder.append(String.format("%d success)", successCount));
return builder.toString();
}
}

View File

@@ -0,0 +1,168 @@
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.net.metrics;
import android.net.NetworkCapabilities;
import com.android.internal.util.BitUtils;
import com.android.internal.util.TokenBucket;
import java.util.StringJoiner;
/**
* A class accumulating network metrics received from Netd regarding dns queries and
* connect() calls on a given network.
*
* This class also accumulates running sums of dns and connect latency stats and
* error counts for bug report logging.
*
* @hide
*/
public class NetworkMetrics {
private static final int INITIAL_DNS_BATCH_SIZE = 100;
private static final int CONNECT_LATENCY_MAXIMUM_RECORDS = 20000;
// The network id of the Android Network.
public final int netId;
// The transport types bitmap of the Android Network, as defined in NetworkCapabilities.java.
public final long transports;
// Accumulated metrics for connect events.
public final ConnectStats connectMetrics;
// Accumulated metrics for dns events.
public final DnsEvent dnsMetrics;
// Running sums of latencies and error counts for connect and dns events.
public final Summary summary;
// Running sums of the most recent latencies and error counts for connect and dns events.
// Starts null until some events are accumulated.
// Allows to collect periodic snapshot of the running summaries for a given network.
public Summary pendingSummary;
public NetworkMetrics(int netId, long transports, TokenBucket tb) {
this.netId = netId;
this.transports = transports;
this.connectMetrics =
new ConnectStats(netId, transports, tb, CONNECT_LATENCY_MAXIMUM_RECORDS);
this.dnsMetrics = new DnsEvent(netId, transports, INITIAL_DNS_BATCH_SIZE);
this.summary = new Summary(netId, transports);
}
/**
* Get currently pending Summary statistics, if any, for this NetworkMetrics, merge them
* into the long running Summary statistics of this NetworkMetrics, and also clear them.
*/
public Summary getPendingStats() {
Summary s = pendingSummary;
pendingSummary = null;
if (s != null) {
summary.merge(s);
}
return s;
}
/** Accumulate a dns query result reported by netd. */
public void addDnsResult(int eventType, int returnCode, int latencyMs) {
if (pendingSummary == null) {
pendingSummary = new Summary(netId, transports);
}
boolean isSuccess = dnsMetrics.addResult((byte) eventType, (byte) returnCode, latencyMs);
pendingSummary.dnsLatencies.count(latencyMs);
pendingSummary.dnsErrorRate.count(isSuccess ? 0 : 1);
}
/** Accumulate a connect query result reported by netd. */
public void addConnectResult(int error, int latencyMs, String ipAddr) {
if (pendingSummary == null) {
pendingSummary = new Summary(netId, transports);
}
boolean isSuccess = connectMetrics.addEvent(error, latencyMs, ipAddr);
pendingSummary.connectErrorRate.count(isSuccess ? 0 : 1);
if (ConnectStats.isNonBlocking(error)) {
pendingSummary.connectLatencies.count(latencyMs);
}
}
/** Represents running sums for dns and connect average error counts and average latencies. */
public static class Summary {
public final int netId;
public final long transports;
// DNS latencies measured in milliseconds.
public final Metrics dnsLatencies = new Metrics();
// DNS error rate measured in percentage points.
public final Metrics dnsErrorRate = new Metrics();
// Blocking connect latencies measured in milliseconds.
public final Metrics connectLatencies = new Metrics();
// Blocking and non blocking connect error rate measured in percentage points.
public final Metrics connectErrorRate = new Metrics();
public Summary(int netId, long transports) {
this.netId = netId;
this.transports = transports;
}
void merge(Summary that) {
dnsLatencies.merge(that.dnsLatencies);
dnsErrorRate.merge(that.dnsErrorRate);
connectLatencies.merge(that.connectLatencies);
connectErrorRate.merge(that.connectErrorRate);
}
@Override
public String toString() {
StringJoiner j = new StringJoiner(", ", "{", "}");
j.add("netId=" + netId);
for (int t : BitUtils.unpackBits(transports)) {
j.add(NetworkCapabilities.transportNameOf(t));
}
j.add(String.format("dns avg=%dms max=%dms err=%.1f%% tot=%d",
(int) dnsLatencies.average(), (int) dnsLatencies.max,
100 * dnsErrorRate.average(), dnsErrorRate.count));
j.add(String.format("connect avg=%dms max=%dms err=%.1f%% tot=%d",
(int) connectLatencies.average(), (int) connectLatencies.max,
100 * connectErrorRate.average(), connectErrorRate.count));
return j.toString();
}
}
/** Tracks a running sum and returns the average of a metric. */
static class Metrics {
public double sum;
public double max = Double.MIN_VALUE;
public int count;
void merge(Metrics that) {
this.count += that.count;
this.sum += that.sum;
this.max = Math.max(this.max, that.max);
}
void count(double value) {
count++;
sum += value;
max = Math.max(max, value);
}
double average() {
double a = sum / (double) count;
if (Double.isNaN(a)) {
a = 0;
}
return a;
}
}
}

View File

@@ -27,6 +27,7 @@ import android.net.metrics.ConnectStats;
import android.net.metrics.DnsEvent;
import android.net.metrics.INetdEventListener;
import android.net.metrics.IpConnectivityLog;
import android.net.metrics.NetworkMetrics;
import android.net.metrics.WakeupEvent;
import android.net.metrics.WakeupStats;
import android.os.RemoteException;
@@ -34,6 +35,7 @@ import android.text.format.DateUtils;
import android.util.Log;
import android.util.ArrayMap;
import android.util.SparseArray;
import com.android.internal.annotations.GuardedBy;
import com.android.internal.annotations.VisibleForTesting;
import com.android.internal.util.BitUtils;
@@ -41,10 +43,11 @@ import com.android.internal.util.IndentingPrintWriter;
import com.android.internal.util.RingBuffer;
import com.android.internal.util.TokenBucket;
import com.android.server.connectivity.metrics.nano.IpConnectivityLogClass.IpConnectivityEvent;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.StringJoiner;
/**
* Implementation of the INetdEventListener interface.
@@ -57,13 +60,13 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
private static final boolean DBG = false;
private static final boolean VDBG = false;
private static final int INITIAL_DNS_BATCH_SIZE = 100;
// Rate limit connect latency logging to 1 measurement per 15 seconds (5760 / day) with maximum
// bursts of 5000 measurements.
private static final int CONNECT_LATENCY_BURST_LIMIT = 5000;
private static final int CONNECT_LATENCY_FILL_RATE = 15 * (int) DateUtils.SECOND_IN_MILLIS;
private static final int CONNECT_LATENCY_MAXIMUM_RECORDS = 20000;
private static final long METRICS_SNAPSHOT_SPAN_MS = 5 * DateUtils.MINUTE_IN_MILLIS;
private static final int METRICS_SNAPSHOT_BUFFER_SIZE = 48; // 4 hours
@VisibleForTesting
static final int WAKEUP_EVENT_BUFFER_LENGTH = 1024;
@@ -72,11 +75,15 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
@VisibleForTesting
static final String WAKEUP_EVENT_IFACE_PREFIX = "iface:";
// Sparse arrays of DNS and connect events, grouped by net id.
// Array of aggregated DNS and connect events sent by netd, grouped by net id.
@GuardedBy("this")
private final SparseArray<DnsEvent> mDnsEvents = new SparseArray<>();
private final SparseArray<NetworkMetrics> mNetworkMetrics = new SparseArray<>();
@GuardedBy("this")
private final SparseArray<ConnectStats> mConnectEvents = new SparseArray<>();
private final RingBuffer<NetworkMetricsSnapshot> mNetworkMetricsSnapshots =
new RingBuffer<>(NetworkMetricsSnapshot.class, METRICS_SNAPSHOT_BUFFER_SIZE);
@GuardedBy("this")
private long mLastSnapshot = 0;
// Array of aggregated wakeup event stats, grouped by interface name.
@GuardedBy("this")
@@ -84,7 +91,7 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
// Ring buffer array for storing packet wake up events sent by Netd.
@GuardedBy("this")
private final RingBuffer<WakeupEvent> mWakeupEvents =
new RingBuffer(WakeupEvent.class, WAKEUP_EVENT_BUFFER_LENGTH);
new RingBuffer<>(WakeupEvent.class, WAKEUP_EVENT_BUFFER_LENGTH);
private final ConnectivityManager mCm;
@@ -116,6 +123,41 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
mCm = cm;
}
private static long projectSnapshotTime(long timeMs) {
return (timeMs / METRICS_SNAPSHOT_SPAN_MS) * METRICS_SNAPSHOT_SPAN_MS;
}
private NetworkMetrics getMetricsForNetwork(long timeMs, int netId) {
collectPendingMetricsSnapshot(timeMs);
NetworkMetrics metrics = mNetworkMetrics.get(netId);
if (metrics == null) {
// TODO: allow to change transport for a given netid.
metrics = new NetworkMetrics(netId, getTransports(netId), mConnectTb);
mNetworkMetrics.put(netId, metrics);
}
return metrics;
}
private NetworkMetricsSnapshot[] getNetworkMetricsSnapshots() {
collectPendingMetricsSnapshot(System.currentTimeMillis());
return mNetworkMetricsSnapshots.toArray();
}
private void collectPendingMetricsSnapshot(long timeMs) {
// Detects time differences larger than the snapshot collection period.
// This is robust against clock jumps and long inactivity periods.
if (Math.abs(timeMs - mLastSnapshot) <= METRICS_SNAPSHOT_SPAN_MS) {
return;
}
mLastSnapshot = projectSnapshotTime(timeMs);
NetworkMetricsSnapshot snapshot =
NetworkMetricsSnapshot.collect(mLastSnapshot, mNetworkMetrics);
if (snapshot.stats.isEmpty()) {
return;
}
mNetworkMetricsSnapshots.append(snapshot);
}
@Override
// Called concurrently by multiple binder threads.
// This method must not block or perform long-running operations.
@@ -124,15 +166,10 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
throws RemoteException {
maybeVerboseLog("onDnsEvent(%d, %d, %d, %dms)", netId, eventType, returnCode, latencyMs);
DnsEvent dnsEvent = mDnsEvents.get(netId);
if (dnsEvent == null) {
dnsEvent = makeDnsEvent(netId);
mDnsEvents.put(netId, dnsEvent);
}
dnsEvent.addResult((byte) eventType, (byte) returnCode, latencyMs);
long timestamp = System.currentTimeMillis();
getMetricsForNetwork(timestamp, netId).addDnsResult(eventType, returnCode, latencyMs);
if (mNetdEventCallback != null) {
long timestamp = System.currentTimeMillis();
mNetdEventCallback.onDnsEvent(hostname, ipAddresses, ipAddressesCount, timestamp, uid);
}
}
@@ -144,15 +181,11 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
int port, int uid) throws RemoteException {
maybeVerboseLog("onConnectEvent(%d, %d, %dms)", netId, error, latencyMs);
ConnectStats connectStats = mConnectEvents.get(netId);
if (connectStats == null) {
connectStats = makeConnectStats(netId);
mConnectEvents.put(netId, connectStats);
}
connectStats.addEvent(error, latencyMs, ipAddr);
long timestamp = System.currentTimeMillis();
getMetricsForNetwork(timestamp, netId).addConnectResult(error, latencyMs, ipAddr);
if (mNetdEventCallback != null) {
mNetdEventCallback.onConnectEvent(ipAddr, port, System.currentTimeMillis(), uid);
mNetdEventCallback.onConnectEvent(ipAddr, port, timestamp, uid);
}
}
@@ -189,11 +222,24 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
}
public synchronized void flushStatistics(List<IpConnectivityEvent> events) {
flushProtos(events, mConnectEvents, IpConnectivityEventBuilder::toProto);
flushProtos(events, mDnsEvents, IpConnectivityEventBuilder::toProto);
for (int i = 0; i < mNetworkMetrics.size(); i++) {
ConnectStats stats = mNetworkMetrics.valueAt(i).connectMetrics;
if (stats.eventCount == 0) {
continue;
}
events.add(IpConnectivityEventBuilder.toProto(stats));
}
for (int i = 0; i < mNetworkMetrics.size(); i++) {
DnsEvent ev = mNetworkMetrics.valueAt(i).dnsMetrics;
if (ev.eventCount == 0) {
continue;
}
events.add(IpConnectivityEventBuilder.toProto(ev));
}
for (int i = 0; i < mWakeupStats.size(); i++) {
events.add(IpConnectivityEventBuilder.toProto(mWakeupStats.valueAt(i)));
}
mNetworkMetrics.clear();
mWakeupStats.clear();
}
@@ -206,8 +252,15 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
}
public synchronized void list(PrintWriter pw) {
listEvents(pw, mConnectEvents, (x) -> x, "\n");
listEvents(pw, mDnsEvents, (x) -> x, "\n");
for (int i = 0; i < mNetworkMetrics.size(); i++) {
pw.println(mNetworkMetrics.valueAt(i).connectMetrics);
}
for (int i = 0; i < mNetworkMetrics.size(); i++) {
pw.println(mNetworkMetrics.valueAt(i).dnsMetrics);
}
for (NetworkMetricsSnapshot s : getNetworkMetricsSnapshots()) {
pw.println(s);
}
for (int i = 0; i < mWakeupStats.size(); i++) {
pw.println(mWakeupStats.valueAt(i));
}
@@ -217,41 +270,17 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
}
public synchronized void listAsProtos(PrintWriter pw) {
listEvents(pw, mConnectEvents, IpConnectivityEventBuilder::toProto, "");
listEvents(pw, mDnsEvents, IpConnectivityEventBuilder::toProto, "");
for (int i = 0; i < mNetworkMetrics.size(); i++) {
pw.print(IpConnectivityEventBuilder.toProto(mNetworkMetrics.valueAt(i).connectMetrics));
}
for (int i = 0; i < mNetworkMetrics.size(); i++) {
pw.print(IpConnectivityEventBuilder.toProto(mNetworkMetrics.valueAt(i).dnsMetrics));
}
for (int i = 0; i < mWakeupStats.size(); i++) {
pw.print(IpConnectivityEventBuilder.toProto(mWakeupStats.valueAt(i)));
}
}
private static <T> void flushProtos(List<IpConnectivityEvent> out, SparseArray<T> in,
Function<T, IpConnectivityEvent> mapper) {
for (int i = 0; i < in.size(); i++) {
out.add(mapper.apply(in.valueAt(i)));
}
in.clear();
}
private static <T> void listEvents(
PrintWriter pw, SparseArray<T> events, Function<T, Object> mapper, String separator) {
// Proto derived Classes have toString method that adds a \n at the end.
// Let the caller control that by passing in the line separator explicitly.
for (int i = 0; i < events.size(); i++) {
pw.print(mapper.apply(events.valueAt(i)));
pw.print(separator);
}
}
private ConnectStats makeConnectStats(int netId) {
long transports = getTransports(netId);
return new ConnectStats(netId, transports, mConnectTb, CONNECT_LATENCY_MAXIMUM_RECORDS);
}
private DnsEvent makeDnsEvent(int netId) {
long transports = getTransports(netId);
return new DnsEvent(netId, transports, INITIAL_DNS_BATCH_SIZE);
}
private long getTransports(int netId) {
// TODO: directly query ConnectivityService instead of going through Binder interface.
NetworkCapabilities nc = mCm.getNetworkCapabilities(new Network(netId));
@@ -268,4 +297,32 @@ public class NetdEventListenerService extends INetdEventListener.Stub {
private static void maybeVerboseLog(String s, Object... args) {
if (VDBG) Log.d(TAG, String.format(s, args));
}
/** Helper class for buffering summaries of NetworkMetrics at regular time intervals */
static class NetworkMetricsSnapshot {
public long timeMs;
public List<NetworkMetrics.Summary> stats = new ArrayList<>();
static NetworkMetricsSnapshot collect(long timeMs, SparseArray<NetworkMetrics> networkMetrics) {
NetworkMetricsSnapshot snapshot = new NetworkMetricsSnapshot();
snapshot.timeMs = timeMs;
for (int i = 0; i < networkMetrics.size(); i++) {
NetworkMetrics.Summary s = networkMetrics.valueAt(i).getPendingStats();
if (s != null) {
snapshot.stats.add(s);
}
}
return snapshot;
}
@Override
public String toString() {
StringJoiner j = new StringJoiner(", ");
for (NetworkMetrics.Summary s : stats) {
j.add(s.toString());
}
return String.format("%tT.%tL: %s", timeMs, timeMs, j.toString());
}
}
}