Commit e95c9e29 authored by ahernandez.miralles's avatar ahernandez.miralles Committed by Commit bot

Docserver: Override Walk in CachingFileSystem

NOTRY=True

Review URL: https://codereview.chromium.org/521453003

Cr-Commit-Position: refs/heads/master@{#292708}
parent 14cb9047
application: chrome-apps-doc
version: 3-40-1
version: 3-41-0
runtime: python27
api_version: 1
threadsafe: false
......
......@@ -6,8 +6,8 @@ import posixpath
import sys
from file_system import FileSystem, StatInfo, FileNotFoundError
from future import Future
from path_util import IsDirectory, ToDirectory
from future import All, Future
from path_util import AssertIsDirectory, IsDirectory, ToDirectory
from third_party.json_schema_compiler.memoize import memoize
......@@ -23,12 +23,13 @@ class CachingFileSystem(FileSystem):
CachingFileSystem,
category='%s/%s' % (file_system.GetIdentity(), category),
**optargs)
self._stat_object_store = create_object_store('stat')
self._stat_cache = create_object_store('stat')
# The read caches can start populated (start_empty=False) because file
# updates are picked up by the stat, so it doesn't need the force-refresh
# which starting empty is designed for. Without this optimisation, cron
# runs are extra slow.
self._read_object_store = create_object_store('read', start_empty=False)
self._read_cache = create_object_store('read', start_empty=False)
self._walk_cache = create_object_store('walk', start_empty=False)
def Refresh(self):
return self._file_system.Refresh()
......@@ -55,14 +56,14 @@ class CachingFileSystem(FileSystem):
(path, dir_path, dir_stat.child_versions))
return StatInfo(file_version)
dir_stat = self._stat_object_store.Get(dir_path).Get()
dir_stat = self._stat_cache.Get(dir_path).Get()
if dir_stat is not None:
return Future(callback=lambda: make_stat_info(dir_stat))
def next(dir_stat):
assert dir_stat is not None # should have raised a FileNotFoundError
# We only ever need to cache the dir stat.
self._stat_object_store.Set(dir_path, dir_stat)
self._stat_cache.Set(dir_path, dir_stat)
return make_stat_info(dir_stat)
return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
......@@ -82,8 +83,8 @@ class CachingFileSystem(FileSystem):
# Files which aren't found are cached in the read object store as
# (path, None, None). This is to prevent re-reads of files we know
# do not exist.
cached_read_values = self._read_object_store.GetMulti(paths).Get()
cached_stat_values = self._stat_object_store.GetMulti(paths).Get()
cached_read_values = self._read_cache.GetMulti(paths).Get()
cached_stat_values = self._stat_cache.GetMulti(paths).Get()
# Populate a map of paths to Futures to their stat. They may have already
# been cached in which case their Future will already have been constructed
......@@ -126,12 +127,12 @@ class CachingFileSystem(FileSystem):
def next(new_results):
# Update the cache. This is a path -> (data, version) mapping.
self._read_object_store.SetMulti(
self._read_cache.SetMulti(
dict((path, (new_result, stat_futures[path].Get().version))
for path, new_result in new_results.iteritems()))
# Update the read cache to include files that weren't found, to prevent
# constantly trying to read a file we now know doesn't exist.
self._read_object_store.SetMulti(
self._read_cache.SetMulti(
dict((path, (None, None)) for path in paths
if stat_futures[path].Get() is None))
new_results.update(up_to_date_data)
......@@ -140,6 +141,28 @@ class CachingFileSystem(FileSystem):
return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
skip_not_found=skip_not_found).Then(next)
def Walk(self, root, depth=-1):
'''Overrides FileSystem.Walk() to provide caching functionality.
'''
def file_lister(root):
res, root_stat = All((self._walk_cache.Get(root),
self.StatAsync(root))).Get()
if res and res[2] == root_stat.version:
dirs, files = res[0], res[1]
else:
# Wasn't cached, or not up to date.
dirs, files = [], []
for f in self.ReadSingle(root).Get():
if IsDirectory(f):
dirs.append(f)
else:
files.append(f)
# Update the cache. This is a root -> (dirs, files, version) mapping.
self._walk_cache.Set(root, (dirs, files, root_stat.version))
return dirs, files
return self._file_system.Walk(root, depth=depth, file_lister=file_lister)
def GetIdentity(self):
return self._file_system.GetIdentity()
......
......@@ -58,13 +58,13 @@ class CachingFileSystemTest(unittest.TestCase):
file_system = self._CreateCachingFileSystem(
_CreateLocalFs(), start_empty=False)
expected = ['dir/'] + ['file%d.html' % i for i in range(7)]
file_system._read_object_store.Set(
file_system._read_cache.Set(
'list/',
(expected, file_system.Stat('list/').version))
self.assertEqual(expected, sorted(file_system.ReadSingle('list/').Get()))
expected.remove('file0.html')
file_system._read_object_store.Set(
file_system._read_cache.Set(
'list/',
(expected, file_system.Stat('list/').version))
self.assertEqual(expected, sorted(file_system.ReadSingle('list/').Get()))
......@@ -112,9 +112,9 @@ class CachingFileSystemTest(unittest.TestCase):
# Test directory and subdirectory stats are cached.
file_system = create_empty_caching_fs()
file_system._stat_object_store.Del('bob/bob0')
file_system._read_object_store.Del('bob/bob0')
file_system._stat_object_store.Del('bob/bob1')
file_system._stat_cache.Del('bob/bob0')
file_system._read_cache.Del('bob/bob0')
file_system._stat_cache.Del('bob/bob1')
test_fs.IncrementStat();
futures = (file_system.ReadSingle('bob/bob1'),
file_system.ReadSingle('bob/bob0'))
......@@ -128,8 +128,8 @@ class CachingFileSystemTest(unittest.TestCase):
# Test a more recent parent directory doesn't force a refetch of children.
file_system = create_empty_caching_fs()
file_system._read_object_store.Del('bob/bob0')
file_system._read_object_store.Del('bob/bob1')
file_system._read_cache.Del('bob/bob0')
file_system._read_cache.Del('bob/bob1')
futures = (file_system.ReadSingle('bob/bob1'),
file_system.ReadSingle('bob/bob2'),
file_system.ReadSingle('bob/bob3'))
......@@ -150,7 +150,7 @@ class CachingFileSystemTest(unittest.TestCase):
self.assertTrue(*mock_fs.CheckAndReset(stat_count=1))
file_system = create_empty_caching_fs()
file_system._stat_object_store.Del('bob/bob0')
file_system._stat_cache.Del('bob/bob0')
future = file_system.ReadSingle('bob/bob0')
self.assertTrue(*mock_fs.CheckAndReset(read_count=1))
self.assertEqual('bob/bob0 contents', future.Get())
......@@ -251,6 +251,46 @@ class CachingFileSystemTest(unittest.TestCase):
'bob/bob0': 'bob/bob0 contents',
}, read_skip_not_found(('bob/bob0', 'bob/bob2')))
def testWalkCaching(self):
test_fs = TestFileSystem({
'root': {
'file1': 'file1',
'file2': 'file2',
'dir1': {
'dir1_file1': 'dir1_file1',
'dir2': {},
'dir3': {
'dir3_file1': 'dir3_file1',
'dir3_file2': 'dir3_file2'
}
}
}
})
mock_fs = MockFileSystem(test_fs)
file_system = self._CreateCachingFileSystem(mock_fs, start_empty=True)
for walkinfo in file_system.Walk(''):
pass
self.assertTrue(*mock_fs.CheckAndReset(
read_resolve_count=5, read_count=5, stat_count=5))
all_dirs, all_files = [], []
for root, dirs, files in file_system.Walk(''):
all_dirs.extend(dirs)
all_files.extend(files)
self.assertEqual(sorted(['root/', 'dir1/', 'dir2/', 'dir3/']),
sorted(all_dirs))
self.assertEqual(
sorted(['file1', 'file2', 'dir1_file1', 'dir3_file1', 'dir3_file2']),
sorted(all_files))
# All data should be cached.
self.assertTrue(*mock_fs.CheckAndReset())
# Starting from a different root should still pull cached data.
for walkinfo in file_system.Walk('root/dir1/'):
pass
self.assertTrue(*mock_fs.CheckAndReset())
# TODO(ahernandez): Test with a new instance CachingFileSystem so a
# different object store is utilized.
if __name__ == '__main__':
unittest.main()
......@@ -2,4 +2,4 @@ cron:
- description: Repopulates all cached data.
url: /_cron
schedule: every 5 minutes
target: 3-40-1
target: 3-41-0
......@@ -155,7 +155,7 @@ class FileSystem(object):
'''
raise NotImplementedError(self.__class__)
def Walk(self, root, depth=-1):
def Walk(self, root, depth=-1, file_lister=None):
'''Recursively walk the directories in a file system, starting with root.
Behaviour is very similar to os.walk from the standard os module, yielding
......@@ -164,6 +164,15 @@ class FileSystem(object):
|base| respectively. If |depth| is specified and greater than 0, Walk will
only recurse |depth| times.
|file_lister|, if specified, should be a callback of signature
def my_file_lister(root):,
which returns a tuple (dirs, files), where |dirs| is a list of directory
names under |root|, and |files| is a list of file names under |root|. Note
that the listing of files and directories should be for a *single* level
only, i.e. it should not recursively list anything.
Note that directories will always end with a '/', files never will.
If |root| cannot be found, raises a FileNotFoundError.
......@@ -176,13 +185,16 @@ class FileSystem(object):
if depth == 0:
return
AssertIsDirectory(root)
dirs, files = [], []
for f in self.ReadSingle(root).Get():
if IsDirectory(f):
dirs.append(f)
else:
files.append(f)
if file_lister:
dirs, files = file_lister(root)
else:
dirs, files = [], []
for f in self.ReadSingle(root).Get():
if IsDirectory(f):
dirs.append(f)
else:
files.append(f)
yield root[len(basepath):].rstrip('/'), dirs, files
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment