Commit 40e8ee4f authored by erikchen's avatar erikchen Committed by Commit bot

Telemetry: Create new profile creator large_profile_creator.

This CL creates two subclasses of FastNavigationProfileExtender:
CookieProfileExtender and HistoryProfilerExtender. The former performs up to
500 navigations, with the goal of filling up the Cookie Database but not
overfilling it (which is possible, unfortunately). The latter performs a large
number ~20,000 navigations to URIs pointing at the local file system to fill up
the History Database.

The run these profile extenders, this CL adds the class LargeProfileCreator.
This new class intentionally contains minimal logic, since the existing
profile_creator.py file is pretty hacky, and the goal is to eventually move
away from it entirely.

BUG=442546

Review URL: https://codereview.chromium.org/914253005

Cr-Commit-Position: refs/heads/master@{#317112}
parent bd132952
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import multiprocessing
import os
import sqlite3
from profile_creators import fast_navigation_profile_extender
from profile_creators import profile_safe_url_list
class CookieProfileExtender(
fast_navigation_profile_extender.FastNavigationProfileExtender):
"""This extender performs a large number of navigations (up to 500), with the
goal of filling out the cookie database.
By default, Chrome purges the cookie DB down to 3300 cookies. However, it
won't purge cookies accessed in the last month. This means the extender needs
to be careful not to create an artificially high number of cookies.
"""
_COOKIE_DB_EXPECTED_SIZE = 3300
def __init__(self):
# The rate limiting factors are fetching network resources and executing
# javascript. There's not much to be done about the former, and having one
# tab per logical core appears close to optimum for the latter.
maximum_batch_size = multiprocessing.cpu_count()
super(CookieProfileExtender, self).__init__(maximum_batch_size)
# A list of urls that have not yet been navigated to. This list will shrink
# over time. Each navigation will add a diminishing number of new cookies,
# since there's a high probability that the cookie is already present. If
# the cookie DB isn't full by 500 navigations, just give up.
self._navigation_urls = profile_safe_url_list.GetShuffledSafeUrls()[0:500]
def GetUrlIterator(self):
"""Superclass override."""
return iter(self._navigation_urls)
def ShouldExitAfterBatchNavigation(self):
"""Superclass override."""
return self._IsCookieDBFull()
@staticmethod
def _CookieCountInDB(db_path):
"""The number of cookies in the db at |db_path|."""
connection = sqlite3.connect(db_path)
try:
cursor = connection.cursor()
cursor.execute("select count(*) from cookies")
cookie_count = cursor.fetchone()[0]
except:
raise
finally:
connection.close()
return cookie_count
def _IsCookieDBFull(self):
"""Chrome does not immediately flush cookies to its database. It's possible
that this method will return a false negative."""
cookie_db_path = os.path.join(self.profile_path, "Default", "Cookies")
try:
cookie_count = CookieProfileExtender._CookieCountInDB(cookie_db_path)
except sqlite3.OperationalError:
# There will occasionally be contention for the SQLite database. This
# shouldn't happen often, so ignore the errors.
return False
return cookie_count > CookieProfileExtender._COOKIE_DB_EXPECTED_SIZE
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sqlite3
import tempfile
import unittest
from profile_creators.cookie_profile_extender import CookieProfileExtender
# Testing private method.
# pylint: disable=protected-access
class CookieProfileExtenderTest(unittest.TestCase):
def _CreateCookieTable(self, path):
connection = sqlite3.connect(path)
cursor = connection.cursor()
cursor.execute("CREATE TABLE cookies (url text)")
connection.commit()
connection.close()
def _AddCookiesToTable(self, path, count):
connection = sqlite3.connect(path)
cursor = connection.cursor()
for i in range(count):
cursor.execute("INSERT INTO cookies VALUES ('%s')" % i)
connection.commit()
connection.close()
def testCookieCount(self):
# Neither tempfile.TemporaryFile() nor tempfile.NamedTemporaryFile() work
# well here. The former doesn't work at all, since it doesn't gaurantee a
# file-system visible path. The latter doesn't work well, since the
# returned file cannot be opened a second time on Windows. The returned
# file would have to be closed, and the method would need to be called with
# Delete=False, which makes its functionality no simpler than
# tempfile.mkstemp().
handle, path = tempfile.mkstemp()
try:
os.close(handle)
self._CreateCookieTable(path)
self.assertEquals(CookieProfileExtender._CookieCountInDB(path), 0)
self._AddCookiesToTable(path, 100)
self.assertEquals(CookieProfileExtender._CookieCountInDB(path), 100)
finally:
os.remove(path)
...@@ -6,6 +6,7 @@ import time ...@@ -6,6 +6,7 @@ import time
from telemetry.core import browser_finder from telemetry.core import browser_finder
from telemetry.core import browser_finder_exceptions from telemetry.core import browser_finder_exceptions
from telemetry.core import exceptions from telemetry.core import exceptions
from telemetry.core import platform
from telemetry.core import util from telemetry.core import util
...@@ -21,24 +22,26 @@ class FastNavigationProfileExtender(object): ...@@ -21,24 +22,26 @@ class FastNavigationProfileExtender(object):
with the number of batches, but does not scale with the size of the with the number of batches, but does not scale with the size of the
batch. batch.
""" """
def __init__(self): def __init__(self, maximum_batch_size):
"""Initializer.
Args:
maximum_batch_size: A positive integer indicating the number of tabs to
simultaneously perform navigations.
"""
super(FastNavigationProfileExtender, self).__init__() super(FastNavigationProfileExtender, self).__init__()
# The path of the profile that the browser will use while it's running.
# This member is initialized during SetUp().
self._profile_path = None
# A reference to the browser that will be performing all of the tab # A reference to the browser that will be performing all of the tab
# navigations. # navigations.
# This member is initialized during SetUp().
self._browser = None self._browser = None
# A static copy of the urls that this class is going to navigate to.
self._navigation_urls = None
# The number of tabs to use. # The number of tabs to use.
self._NUM_TABS = 15 self._NUM_TABS = maximum_batch_size
# The number of pages to load in parallel.
self._NUM_PARALLEL_PAGES = 15
assert self._NUM_PARALLEL_PAGES <= self._NUM_TABS, (' the batch size can\'t'
' be larger than the number of available tabs')
# The amount of time to wait for a batch of pages to finish loading. # The amount of time to wait for a batch of pages to finish loading.
self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10
...@@ -55,19 +58,68 @@ class FastNavigationProfileExtender(object): ...@@ -55,19 +58,68 @@ class FastNavigationProfileExtender(object):
profile, and sufficient information to choose a specific browser binary. profile, and sufficient information to choose a specific browser binary.
""" """
try: try:
self._navigation_urls = self.GetUrlsToNavigate() self.SetUp(finder_options)
self._SetUpBrowser(finder_options)
self._PerformNavigations() self._PerformNavigations()
finally: finally:
self._TearDownBrowser() self.TearDown()
def GetUrlsToNavigate(self): def GetUrlIterator(self):
"""Returns a list of urls to be navigated to. """Gets URLs for the browser to navigate to.
Intended for subclass override. Intended for subclass override.
Returns:
An iterator whose elements are urls to be navigated to.
""" """
raise NotImplementedError() raise NotImplementedError()
def ShouldExitAfterBatchNavigation(self):
"""Returns a boolean indicating whether profile extension is finished.
Intended for subclass override.
"""
raise NotImplementedError()
def SetUp(self, finder_options):
"""Finds the browser, starts the browser, and opens the requisite number of
tabs.
Can be overridden by subclasses. Subclasses must call the super class
implementation.
"""
self._profile_path = finder_options.output_profile_path
possible_browser = self._GetPossibleBrowser(finder_options)
assert possible_browser.supports_tab_control
assert (platform.GetHostPlatform().GetOSName() in
["win", "mac", "linux"])
self._browser = possible_browser.Create(finder_options)
while(len(self._browser.tabs) < self._NUM_TABS):
self._browser.tabs.New()
def TearDown(self):
"""Teardown that is guaranteed to be executed before the instance is
destroyed.
Can be overridden by subclasses. Subclasses must call the super class
implementation.
"""
if self._browser:
self._browser.Close()
self._browser = None
def CleanUpAfterBatchNavigation(self):
"""A hook for subclasses to perform cleanup after each batch of
navigations.
Can be overridden by subclasses.
"""
pass
@property
def profile_path(self):
return self._profile_path
def _GetPossibleBrowser(self, finder_options): def _GetPossibleBrowser(self, finder_options):
"""Return a possible_browser with the given options.""" """Return a possible_browser with the given options."""
...@@ -162,40 +214,38 @@ class FastNavigationProfileExtender(object): ...@@ -162,40 +214,38 @@ class FastNavigationProfileExtender(object):
# Ignore time outs and web page crashes. # Ignore time outs and web page crashes.
pass pass
def _SetUpBrowser(self, finder_options): def _GetUrlsToNavigate(self, url_iterator):
"""Finds the browser, starts the browser, and opens the requisite number of """Returns an array of urls to navigate to, given a url_iterator."""
tabs.""" urls = []
possible_browser = self._GetPossibleBrowser(finder_options) for _ in xrange(self._NUM_TABS):
self._browser = possible_browser.Create(finder_options) try:
urls.append(url_iterator.next())
for _ in range(self._NUM_TABS): except StopIteration:
self._browser.tabs.New() break
return urls
def _PerformNavigations(self): def _PerformNavigations(self):
"""Performs the navigations specified by |_navigation_urls| in large """Repeatedly fetches a batch of urls, and navigates to those urls. This
batches.""" will run until an empty batch is returned, or
# The index of the first url that has not yet been navigated to. ShouldExitAfterBatchNavigation() returns True.
navigation_url_index = 0 """
url_iterator = self.GetUrlIterator()
while True: while True:
# Generate the next batch of navigations. urls = self._GetUrlsToNavigate(url_iterator)
if len(urls) == 0:
break
batch = [] batch = []
max_index = min(navigation_url_index + self._NUM_PARALLEL_PAGES, for i in range(len(urls)):
len(self._navigation_urls)) url = urls[i]
for i in range(navigation_url_index, max_index): tab = self._browser.tabs[i]
url = self._navigation_urls[i]
tab = self._browser.tabs[i % self._NUM_TABS]
batch.append((tab, url)) batch.append((tab, url))
navigation_url_index = max_index
queued_tabs = self._BatchNavigateTabs(batch) queued_tabs = self._BatchNavigateTabs(batch)
self._WaitForQueuedTabsToLoad(queued_tabs) self._WaitForQueuedTabsToLoad(queued_tabs)
if navigation_url_index == len(self._navigation_urls): self.CleanUpAfterBatchNavigation()
break
def _TearDownBrowser(self): if self.ShouldExitAfterBatchNavigation():
"""Teardown that is guaranteed to be executed before the instance is break
destroyed."""
if self._browser:
self._browser.Close()
self._browser = None
...@@ -26,45 +26,53 @@ class FakeBrowser(object): ...@@ -26,45 +26,53 @@ class FakeBrowser(object):
# pylint: disable=protected-access # pylint: disable=protected-access
class FastNavigationProfileExtenderTest(unittest.TestCase): class FastNavigationProfileExtenderTest(unittest.TestCase):
def testPerformNavigations(self): def testPerformNavigations(self):
extender = FastNavigationProfileExtender() maximum_batch_size = 15
num_urls = extender._NUM_PARALLEL_PAGES * 3 + 4 extender = FastNavigationProfileExtender(maximum_batch_size)
num_tabs = extender._NUM_TABS
navigation_urls = [] navigation_urls = []
for i in range(num_urls): for i in range(extender._NUM_TABS):
navigation_urls.append('http://test%s.com' % i) navigation_urls.append('http://test%s.com' % i)
batch_size = 5
navigation_urls_batch = navigation_urls[3:3 + batch_size]
extender._navigation_urls = navigation_urls extender.GetUrlIterator = mock.MagicMock(
extender._browser = FakeBrowser(num_tabs) return_value=iter(navigation_urls_batch))
extender.ShouldExitAfterBatchNavigation = mock.MagicMock(return_value=True)
extender._WaitForQueuedTabsToLoad = mock.MagicMock() extender._WaitForQueuedTabsToLoad = mock.MagicMock()
extender._browser = FakeBrowser(extender._NUM_TABS)
extender._BatchNavigateTabs = mock.MagicMock() extender._BatchNavigateTabs = mock.MagicMock()
# Set up a callback to record the tabs and urls in each navigation. # Set up a callback to record the tabs and urls in each navigation.
batch_callback_tabs = [] callback_tabs_batch = []
batch_callback_urls = [] callback_urls_batch = []
def SideEffect(*args, **_): def SideEffect(*args, **_):
batch = args[0] batch = args[0]
for tab, url in batch: for tab, url in batch:
batch_callback_tabs.append(tab) callback_tabs_batch.append(tab)
batch_callback_urls.append(url) callback_urls_batch.append(url)
extender._BatchNavigateTabs.side_effect = SideEffect extender._BatchNavigateTabs.side_effect = SideEffect
# Perform the navigations. # Perform the navigations.
extender._PerformNavigations() extender._PerformNavigations()
# Each url should have been navigated to exactly once. # Each url in the batch should have been navigated to exactly once.
self.assertEqual(set(batch_callback_urls), set(navigation_urls)) self.assertEqual(set(callback_urls_batch), set(navigation_urls_batch))
# The other urls should not have been navigated to.
navigation_urls_remaining = (set(navigation_urls) -
set(navigation_urls_batch))
self.assertFalse(navigation_urls_remaining & set(callback_urls_batch))
# The first 4 tabs should have been navigated 4 times. The remaining tabs # The first couple of tabs should have been navigated once. The remaining
# should have been navigated 3 times. # tabs should not have been navigated.
num_navigations_per_tab = 3
num_tabs_with_one_extra_navigation = 4
for i in range(len(extender._browser.tabs)): for i in range(len(extender._browser.tabs)):
tab = extender._browser.tabs[i] tab = extender._browser.tabs[i]
expected_tab_navigation_count = num_navigations_per_tab if i < batch_size:
if i < num_tabs_with_one_extra_navigation: expected_tab_navigation_count = 1
expected_tab_navigation_count += 1 else:
expected_tab_navigation_count = 0
count = batch_callback_tabs.count(tab) count = callback_tabs_batch.count(tab)
self.assertEqual(count, expected_tab_navigation_count) self.assertEqual(count, expected_tab_navigation_count)
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import multiprocessing
import tempfile
import os
from profile_creators import fast_navigation_profile_extender
class HistoryProfileExtender(
fast_navigation_profile_extender.FastNavigationProfileExtender):
"""This extender navigates Chrome to a large number of URIs pointing to local
files. It continues running until the history DB becomes full."""
_HISTORY_DB_MAX_SIZE_IN_MB = 10
def __init__(self):
# The rate limiting factors are the speed of page navigation, and the speed
# of python bindings. The former is larger than the latter, so having a
# large batch size skews the amortized average time per page load towards
# the latter.
maximum_batch_size = multiprocessing.cpu_count() * 2
super(HistoryProfileExtender, self).__init__(maximum_batch_size)
# A list of paths of temporary files. The instance is responsible for
# making sure that the files are deleted before they are removed from this
# list.
self._generated_temp_files = []
def _MakeTemporaryFile(self):
"""Makes a temporary file and returns a URI to the file.
This method has the side effect of appending the temporary file to
self._generated_temp_files. The instance is responsible for deleting the
file at a later point in time.
"""
# Adding a long suffix to the name of the file fills up the history
# database faster. The suffix can't be too long, since some file systems
# have a 256 character limit on the length of the path. While we could
# dynamically vary the length of the path, that would generate different
# profiles on different OSes, which is not ideal.
suffix = "reallylongsuffixintendedtoartificiallyincreasethelengthoftheurl"
# Neither tempfile.TemporaryFile() nor tempfile.NamedTemporaryFile() work
# well here. The former doesn't work at all, since it doesn't gaurantee a
# file-system visible path. The latter doesn't work well, since the
# returned file cannot be opened a second time on Windows. The returned
# file would have to be closed, and the method would need to be called with
# Delete=False, which makes its functionality no simpler than
# tempfile.mkstemp().
handle, path = tempfile.mkstemp(suffix=suffix)
os.close(handle)
self._generated_temp_files.append(path)
file_url = "file://" + path
return file_url
def GetUrlIterator(self):
"""Superclass override."""
while True:
yield self._MakeTemporaryFile()
def ShouldExitAfterBatchNavigation(self):
"""Superclass override."""
return self._IsHistoryDBAtMaxSize()
def TearDown(self):
"""Superclass override."""
super(HistoryProfileExtender, self).TearDown()
for path in self._generated_temp_files:
os.remove(path)
self._generated_temp_files = []
def CleanUpAfterBatchNavigation(self):
"""Superclass override."""
for path in self._generated_temp_files:
os.remove(path)
self._generated_temp_files = []
def _IsHistoryDBAtMaxSize(self):
"""Whether the history DB has reached its maximum size."""
history_db_path = os.path.join(self.profile_path, "Default", "History")
stat_info = os.stat(history_db_path)
size = stat_info.st_size
max_size_threshold = 0.95
bytes_in_megabyte = 2**10
max_size = (bytes_in_megabyte *
HistoryProfileExtender._HISTORY_DB_MAX_SIZE_IN_MB * max_size_threshold)
return size > max_size
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import shutil
import tempfile
import unittest
from profile_creators.history_profile_extender import HistoryProfileExtender
from telemetry import decorators
from telemetry.core import util
from telemetry.unittest_util import options_for_unittests
util.AddDirToPythonPath(util.GetTelemetryDir(), 'third_party', 'mock')
import mock
# Testing private method.
# pylint: disable=protected-access
class HistoryProfileExtenderTest(unittest.TestCase):
# The profile extender does not work on Android or ChromeOS.
@decorators.Disabled('android', 'chromeos')
def testFullFunctionality(self):
extender = HistoryProfileExtender()
# Stop the extender at the earliest possible opportunity.
extender.ShouldExitAfterBatchNavigation = mock.MagicMock(return_value=True)
# Normally, the number of tabs depends on the number of cores. Use a
# static, small number to increase the speed of the test.
extender._NUM_TABS = 3
options = options_for_unittests.GetCopy()
options.output_profile_path = tempfile.mkdtemp()
try:
extender.Run(options)
self.assertEquals(extender.profile_path, options.output_profile_path)
self.assertTrue(os.path.exists(extender.profile_path))
history_db_path = os.path.join(extender.profile_path, "Default",
"History")
stat_info = os.stat(history_db_path)
self.assertGreater(stat_info.st_size, 1000)
finally:
shutil.rmtree(options.output_profile_path)
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from profile_creators import cookie_profile_extender
from profile_creators import history_profile_extender
from telemetry.page import profile_creator
class LargeProfileCreator(profile_creator.ProfileCreator):
"""This class creates a large profile by performing a large number of url
navigations."""
def Run(self, options):
extender = history_profile_extender.HistoryProfileExtender()
extender.Run(options)
extender = cookie_profile_extender.CookieProfileExtender()
extender.Run(options)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment