Commit da1f537d authored by dtu's avatar dtu Committed by Commit bot

[telemetry] Detect and correct overflow in Intel energy MSRs.

Failure seen here:
http://build.chromium.org/p/chromium.perf/builders/Win%207%20Perf%20%281%29/builds/52
AssertionError: Negative energy consumption. (Starting energy was 65522.6123962.)

BUG=336556, 336558
TEST=None.
R=tonyg

Review URL: https://codereview.chromium.org/607733002

Cr-Commit-Position: refs/heads/master@{#296880}
parent a7ae5053
......@@ -103,7 +103,7 @@ class LinuxPlatformBackend(
def StopMonitoringPower(self):
return self._power_monitor.StopMonitoringPower()
def ReadMsr(self, msr_number):
def ReadMsr(self, msr_number, start=0, length=64):
cmd = ['/usr/sbin/rdmsr', '-d', str(msr_number)]
(out, err) = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
......@@ -114,7 +114,7 @@ class LinuxPlatformBackend(
result = int(out)
except ValueError:
raise OSError('Cannot interpret rdmsr output: %s' % out)
return result
return result >> start & ((1 << length) - 1)
def _IsIpfwKernelModuleInstalled(self):
return 'ipfw_mod' in subprocess.Popen(
......
......@@ -226,10 +226,16 @@ class PlatformBackend(object):
def StopMonitoringPower(self):
raise NotImplementedError()
def ReadMsr(self, msr_number):
def ReadMsr(self, msr_number, start=0, length=64):
"""Read a CPU model-specific register (MSR).
Which MSRs are available depends on the CPU model.
On systems with multiple CPUs, this function may run on any CPU.
Args:
msr_number: The number of the register to read.
start: The least significant bit to read, zero-indexed.
(Said another way, the number of bits to right-shift the MSR value.)
length: The number of bits to read. MSRs are 64 bits, even on 32-bit CPUs.
"""
raise NotImplementedError()
......@@ -78,8 +78,9 @@ class MsrPowerMonitor(power_monitor.PowerMonitor):
energy_consumption_j = self._PackageEnergyJoules() - self._start_energy_j
average_temp_c = (self._TemperatureCelsius() + self._start_temp_c) / 2.
assert energy_consumption_j >= 0, ('Negative energy consumption. (Starting '
'energy was %s.)' % self._start_energy_j)
if energy_consumption_j < 0: # Correct overflow.
# The energy portion of the MSR is 4 bytes.
energy_consumption_j += 2 ** 32 * self._EnergyMultiplier()
self._start_energy_j = None
self._start_temp_c = None
......@@ -96,19 +97,18 @@ class MsrPowerMonitor(power_monitor.PowerMonitor):
@decorators.Cache
def _EnergyMultiplier(self):
return 0.5 ** ((self._backend.ReadMsr(MSR_RAPL_POWER_UNIT) >> 8) & 0x1f)
return 0.5 ** self._backend.ReadMsr(MSR_RAPL_POWER_UNIT, 8, 5)
def _PackageEnergyJoules(self):
return (self._backend.ReadMsr(MSR_PKG_ENERGY_STATUS) *
return (self._backend.ReadMsr(MSR_PKG_ENERGY_STATUS, 0, 32) *
self._EnergyMultiplier())
def _TemperatureCelsius(self):
tcc_activation_temp = (
self._backend.ReadMsr(IA32_TEMPERATURE_TARGET) >> 16 & 0x7f)
tcc_activation_temp = self._backend.ReadMsr(IA32_TEMPERATURE_TARGET, 16, 7)
if tcc_activation_temp <= 0:
tcc_activation_temp = 105
package_temp_headroom = (
self._backend.ReadMsr(IA32_PACKAGE_THERM_STATUS) >> 16 & 0x7f)
package_temp_headroom = self._backend.ReadMsr(
IA32_PACKAGE_THERM_STATUS, 16, 7)
return tcc_activation_temp - package_temp_headroom
def _CheckMSRs(self):
......
......@@ -346,7 +346,7 @@ class WinPlatformBackend(desktop_platform_backend.DesktopPlatformBackend):
self.CloseMsrServer()
atexit.register(TerminateProcess, self._msr_server_handle)
def ReadMsr(self, msr_number):
def ReadMsr(self, msr_number, start=0, length=64):
self._StartMsrServerIfNeeded()
if not self._msr_server_handle:
raise OSError('Unable to start MSR server.')
......@@ -357,4 +357,4 @@ class WinPlatformBackend(desktop_platform_backend.DesktopPlatformBackend):
response = sock.recv(8)
finally:
sock.close()
return struct.unpack('Q', response)[0]
return struct.unpack('Q', response)[0] >> start & ((1 << length) - 1)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment