Commit da1f537d authored by dtu's avatar dtu Committed by Commit bot

[telemetry] Detect and correct overflow in Intel energy MSRs.

Failure seen here:
http://build.chromium.org/p/chromium.perf/builders/Win%207%20Perf%20%281%29/builds/52
AssertionError: Negative energy consumption. (Starting energy was 65522.6123962.)

BUG=336556, 336558
TEST=None.
R=tonyg

Review URL: https://codereview.chromium.org/607733002

Cr-Commit-Position: refs/heads/master@{#296880}
parent a7ae5053
...@@ -103,7 +103,7 @@ class LinuxPlatformBackend( ...@@ -103,7 +103,7 @@ class LinuxPlatformBackend(
def StopMonitoringPower(self): def StopMonitoringPower(self):
return self._power_monitor.StopMonitoringPower() return self._power_monitor.StopMonitoringPower()
def ReadMsr(self, msr_number): def ReadMsr(self, msr_number, start=0, length=64):
cmd = ['/usr/sbin/rdmsr', '-d', str(msr_number)] cmd = ['/usr/sbin/rdmsr', '-d', str(msr_number)]
(out, err) = subprocess.Popen(cmd, (out, err) = subprocess.Popen(cmd,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
...@@ -114,7 +114,7 @@ class LinuxPlatformBackend( ...@@ -114,7 +114,7 @@ class LinuxPlatformBackend(
result = int(out) result = int(out)
except ValueError: except ValueError:
raise OSError('Cannot interpret rdmsr output: %s' % out) raise OSError('Cannot interpret rdmsr output: %s' % out)
return result return result >> start & ((1 << length) - 1)
def _IsIpfwKernelModuleInstalled(self): def _IsIpfwKernelModuleInstalled(self):
return 'ipfw_mod' in subprocess.Popen( return 'ipfw_mod' in subprocess.Popen(
......
...@@ -226,10 +226,16 @@ class PlatformBackend(object): ...@@ -226,10 +226,16 @@ class PlatformBackend(object):
def StopMonitoringPower(self): def StopMonitoringPower(self):
raise NotImplementedError() raise NotImplementedError()
def ReadMsr(self, msr_number): def ReadMsr(self, msr_number, start=0, length=64):
"""Read a CPU model-specific register (MSR). """Read a CPU model-specific register (MSR).
Which MSRs are available depends on the CPU model. Which MSRs are available depends on the CPU model.
On systems with multiple CPUs, this function may run on any CPU. On systems with multiple CPUs, this function may run on any CPU.
Args:
msr_number: The number of the register to read.
start: The least significant bit to read, zero-indexed.
(Said another way, the number of bits to right-shift the MSR value.)
length: The number of bits to read. MSRs are 64 bits, even on 32-bit CPUs.
""" """
raise NotImplementedError() raise NotImplementedError()
...@@ -78,8 +78,9 @@ class MsrPowerMonitor(power_monitor.PowerMonitor): ...@@ -78,8 +78,9 @@ class MsrPowerMonitor(power_monitor.PowerMonitor):
energy_consumption_j = self._PackageEnergyJoules() - self._start_energy_j energy_consumption_j = self._PackageEnergyJoules() - self._start_energy_j
average_temp_c = (self._TemperatureCelsius() + self._start_temp_c) / 2. average_temp_c = (self._TemperatureCelsius() + self._start_temp_c) / 2.
assert energy_consumption_j >= 0, ('Negative energy consumption. (Starting ' if energy_consumption_j < 0: # Correct overflow.
'energy was %s.)' % self._start_energy_j) # The energy portion of the MSR is 4 bytes.
energy_consumption_j += 2 ** 32 * self._EnergyMultiplier()
self._start_energy_j = None self._start_energy_j = None
self._start_temp_c = None self._start_temp_c = None
...@@ -96,19 +97,18 @@ class MsrPowerMonitor(power_monitor.PowerMonitor): ...@@ -96,19 +97,18 @@ class MsrPowerMonitor(power_monitor.PowerMonitor):
@decorators.Cache @decorators.Cache
def _EnergyMultiplier(self): def _EnergyMultiplier(self):
return 0.5 ** ((self._backend.ReadMsr(MSR_RAPL_POWER_UNIT) >> 8) & 0x1f) return 0.5 ** self._backend.ReadMsr(MSR_RAPL_POWER_UNIT, 8, 5)
def _PackageEnergyJoules(self): def _PackageEnergyJoules(self):
return (self._backend.ReadMsr(MSR_PKG_ENERGY_STATUS) * return (self._backend.ReadMsr(MSR_PKG_ENERGY_STATUS, 0, 32) *
self._EnergyMultiplier()) self._EnergyMultiplier())
def _TemperatureCelsius(self): def _TemperatureCelsius(self):
tcc_activation_temp = ( tcc_activation_temp = self._backend.ReadMsr(IA32_TEMPERATURE_TARGET, 16, 7)
self._backend.ReadMsr(IA32_TEMPERATURE_TARGET) >> 16 & 0x7f)
if tcc_activation_temp <= 0: if tcc_activation_temp <= 0:
tcc_activation_temp = 105 tcc_activation_temp = 105
package_temp_headroom = ( package_temp_headroom = self._backend.ReadMsr(
self._backend.ReadMsr(IA32_PACKAGE_THERM_STATUS) >> 16 & 0x7f) IA32_PACKAGE_THERM_STATUS, 16, 7)
return tcc_activation_temp - package_temp_headroom return tcc_activation_temp - package_temp_headroom
def _CheckMSRs(self): def _CheckMSRs(self):
......
...@@ -346,7 +346,7 @@ class WinPlatformBackend(desktop_platform_backend.DesktopPlatformBackend): ...@@ -346,7 +346,7 @@ class WinPlatformBackend(desktop_platform_backend.DesktopPlatformBackend):
self.CloseMsrServer() self.CloseMsrServer()
atexit.register(TerminateProcess, self._msr_server_handle) atexit.register(TerminateProcess, self._msr_server_handle)
def ReadMsr(self, msr_number): def ReadMsr(self, msr_number, start=0, length=64):
self._StartMsrServerIfNeeded() self._StartMsrServerIfNeeded()
if not self._msr_server_handle: if not self._msr_server_handle:
raise OSError('Unable to start MSR server.') raise OSError('Unable to start MSR server.')
...@@ -357,4 +357,4 @@ class WinPlatformBackend(desktop_platform_backend.DesktopPlatformBackend): ...@@ -357,4 +357,4 @@ class WinPlatformBackend(desktop_platform_backend.DesktopPlatformBackend):
response = sock.recv(8) response = sock.recv(8)
finally: finally:
sock.close() sock.close()
return struct.unpack('Q', response)[0] return struct.unpack('Q', response)[0] >> start & ((1 << length) - 1)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment