From 4d1ad408736f28229c3f625de6873e3862adf6bf Mon Sep 17 00:00:00 2001 From: Karthik Ravi Shankar Date: Fri, 13 Oct 2017 15:42:32 -0700 Subject: [PATCH] Add crash recovery logic to HardwarePropertiesManagerService The HardwarePropertiesManagerService-JNI communicates with the thermal HAL to expose temperature and other thermal data to Java services. When Thermal HAL died, there was no recovery mechanism in place. This change adds that recovery mechanism. Bug: 67769672 Test: 1) Rebooted the device multiple times and checked that VrCore is able to get the thermal information correctly. 2) Enter and exit VR mode multiple times killing thermalHal by design and ensure that we're getting thermal temperatures correctly. 10-16 10:05:19.605 1099 1600 E HardwarePropertiesManagerService-JNI: ThermalHAL just died ... 10-16 10:05:29.761 3459 3459 D ThermalWarningManager: THERMAL: ThermalInfo: [current temp=33.0, VR throttling temp (soft exit)=52.0, VR hard exit temp=54.0, VR warning temp=51.0, last throttling warning wall time=n/a, last soft 'exit VR' flow wall time=n/a, last hard 'exit VR' flow wall time=n/a] Change-Id: I55d8eae31526e1bd1a232afea5bd02cb0afca142 Signed-off-by: Karthik Ravi Shankar --- ...erver_HardwarePropertiesManagerService.cpp | 71 +++++++++++++++---- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp b/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp index 5f67ac1dfe8f2..ed79352bba21d 100644 --- a/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp +++ b/services/core/jni/com_android_server_HardwarePropertiesManagerService.cpp @@ -30,6 +30,8 @@ namespace android { +using android::hidl::base::V1_0::IBase; +using hardware::hidl_death_recipient; using hardware::hidl_vec; using hardware::thermal::V1_0::CoolingDevice; using hardware::thermal::V1_0::CpuUsage; @@ -58,7 +60,22 @@ static struct { jfloat gUndefinedTemperature; -static sp gThermalModule; +static void getThermalHalLocked(); +static std::mutex gThermalHalMutex; +static sp gThermalHal = nullptr; + +// struct ThermalHalDeathRecipient; +struct ThermalHalDeathRecipient : virtual public hidl_death_recipient { + // hidl_death_recipient interface + virtual void serviceDied(uint64_t cookie, const wp& who) override { + std::lock_guard lock(gThermalHalMutex); + ALOGE("ThermalHAL just died"); + gThermalHal = nullptr; + getThermalHalLocked(); + } +}; + +sp gThermalHalDeathRecipient = nullptr; // ---------------------------------------------------------------------------- @@ -66,25 +83,50 @@ float finalizeTemperature(float temperature) { return isnan(temperature) ? gUndefinedTemperature : temperature; } -static void nativeInit(JNIEnv* env, jobject obj) { - // TODO(b/31632518) - if (gThermalModule == nullptr) { - gThermalModule = IThermal::getService(); +// The caller must be holding gThermalHalMutex. +static void getThermalHalLocked() { + if (gThermalHal != nullptr) { + return; } - if (gThermalModule == nullptr) { + gThermalHal = IThermal::getService(); + + if (gThermalHal == nullptr) { ALOGE("Unable to get Thermal service."); + } else { + if (gThermalHalDeathRecipient == nullptr) { + gThermalHalDeathRecipient = new ThermalHalDeathRecipient(); + } + hardware::Return linked = gThermalHal->linkToDeath( + gThermalHalDeathRecipient, 0x451F /* cookie */); + if (!linked.isOk()) { + ALOGE("Transaction error in linking to ThermalHAL death: %s", + linked.description().c_str()); + gThermalHal = nullptr; + } else if (!linked) { + ALOGW("Unable to link to ThermalHal death notifications"); + gThermalHal = nullptr; + } else { + ALOGD("Link to death notification successful"); + } } } +static void nativeInit(JNIEnv* env, jobject obj) { + std::lock_guard lock(gThermalHalMutex); + getThermalHalLocked(); +} + static jfloatArray nativeGetFanSpeeds(JNIEnv *env, jclass /* clazz */) { - if (gThermalModule == nullptr) { + std::lock_guard lock(gThermalHalMutex); + getThermalHalLocked(); + if (gThermalHal == nullptr) { ALOGE("Couldn't get fan speeds because of HAL error."); return env->NewFloatArray(0); } hidl_vec list; - Return ret = gThermalModule->getCoolingDevices( + Return ret = gThermalHal->getCoolingDevices( [&list](ThermalStatus status, hidl_vec devices) { if (status.code == ThermalStatusCode::SUCCESS) { list = std::move(devices); @@ -109,12 +151,14 @@ static jfloatArray nativeGetFanSpeeds(JNIEnv *env, jclass /* clazz */) { static jfloatArray nativeGetDeviceTemperatures(JNIEnv *env, jclass /* clazz */, int type, int source) { - if (gThermalModule == nullptr) { + std::lock_guard lock(gThermalHalMutex); + getThermalHalLocked(); + if (gThermalHal == nullptr) { ALOGE("Couldn't get device temperatures because of HAL error."); return env->NewFloatArray(0); } hidl_vec list; - Return ret = gThermalModule->getTemperatures( + Return ret = gThermalHal->getTemperatures( [&list](ThermalStatus status, hidl_vec temperatures) { if (status.code == ThermalStatusCode::SUCCESS) { list = std::move(temperatures); @@ -154,12 +198,14 @@ static jfloatArray nativeGetDeviceTemperatures(JNIEnv *env, jclass /* clazz */, } static jobjectArray nativeGetCpuUsages(JNIEnv *env, jclass /* clazz */) { - if (gThermalModule == nullptr || !gCpuUsageInfoClassInfo.initMethod) { + std::lock_guard lock(gThermalHalMutex); + getThermalHalLocked(); + if (gThermalHal == nullptr || !gCpuUsageInfoClassInfo.initMethod) { ALOGE("Couldn't get CPU usages because of HAL error."); return env->NewObjectArray(0, gCpuUsageInfoClassInfo.clazz, nullptr); } hidl_vec list; - Return ret = gThermalModule->getCpuUsages( + Return ret = gThermalHal->getCpuUsages( [&list](ThermalStatus status, hidl_vec cpuUsages) { if (status.code == ThermalStatusCode::SUCCESS) { list = std::move(cpuUsages); @@ -202,7 +248,6 @@ static const JNINativeMethod gHardwarePropertiesManagerServiceMethods[] = { }; int register_android_server_HardwarePropertiesManagerService(JNIEnv* env) { - gThermalModule = nullptr; int res = jniRegisterNativeMethods(env, "com/android/server/HardwarePropertiesManagerService", gHardwarePropertiesManagerServiceMethods, NELEM(gHardwarePropertiesManagerServiceMethods));