Commit e95c9e29 authored by ahernandez.miralles's avatar ahernandez.miralles Committed by Commit bot

Docserver: Override Walk in CachingFileSystem

NOTRY=True

Review URL: https://codereview.chromium.org/521453003

Cr-Commit-Position: refs/heads/master@{#292708}
parent 14cb9047
application: chrome-apps-doc application: chrome-apps-doc
version: 3-40-1 version: 3-41-0
runtime: python27 runtime: python27
api_version: 1 api_version: 1
threadsafe: false threadsafe: false
......
...@@ -6,8 +6,8 @@ import posixpath ...@@ -6,8 +6,8 @@ import posixpath
import sys import sys
from file_system import FileSystem, StatInfo, FileNotFoundError from file_system import FileSystem, StatInfo, FileNotFoundError
from future import Future from future import All, Future
from path_util import IsDirectory, ToDirectory from path_util import AssertIsDirectory, IsDirectory, ToDirectory
from third_party.json_schema_compiler.memoize import memoize from third_party.json_schema_compiler.memoize import memoize
...@@ -23,12 +23,13 @@ class CachingFileSystem(FileSystem): ...@@ -23,12 +23,13 @@ class CachingFileSystem(FileSystem):
CachingFileSystem, CachingFileSystem,
category='%s/%s' % (file_system.GetIdentity(), category), category='%s/%s' % (file_system.GetIdentity(), category),
**optargs) **optargs)
self._stat_object_store = create_object_store('stat') self._stat_cache = create_object_store('stat')
# The read caches can start populated (start_empty=False) because file # The read caches can start populated (start_empty=False) because file
# updates are picked up by the stat, so it doesn't need the force-refresh # updates are picked up by the stat, so it doesn't need the force-refresh
# which starting empty is designed for. Without this optimisation, cron # which starting empty is designed for. Without this optimisation, cron
# runs are extra slow. # runs are extra slow.
self._read_object_store = create_object_store('read', start_empty=False) self._read_cache = create_object_store('read', start_empty=False)
self._walk_cache = create_object_store('walk', start_empty=False)
def Refresh(self): def Refresh(self):
return self._file_system.Refresh() return self._file_system.Refresh()
...@@ -55,14 +56,14 @@ class CachingFileSystem(FileSystem): ...@@ -55,14 +56,14 @@ class CachingFileSystem(FileSystem):
(path, dir_path, dir_stat.child_versions)) (path, dir_path, dir_stat.child_versions))
return StatInfo(file_version) return StatInfo(file_version)
dir_stat = self._stat_object_store.Get(dir_path).Get() dir_stat = self._stat_cache.Get(dir_path).Get()
if dir_stat is not None: if dir_stat is not None:
return Future(callback=lambda: make_stat_info(dir_stat)) return Future(callback=lambda: make_stat_info(dir_stat))
def next(dir_stat): def next(dir_stat):
assert dir_stat is not None # should have raised a FileNotFoundError assert dir_stat is not None # should have raised a FileNotFoundError
# We only ever need to cache the dir stat. # We only ever need to cache the dir stat.
self._stat_object_store.Set(dir_path, dir_stat) self._stat_cache.Set(dir_path, dir_stat)
return make_stat_info(dir_stat) return make_stat_info(dir_stat)
return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next) return self._MemoizedStatAsyncFromFileSystem(dir_path).Then(next)
...@@ -82,8 +83,8 @@ class CachingFileSystem(FileSystem): ...@@ -82,8 +83,8 @@ class CachingFileSystem(FileSystem):
# Files which aren't found are cached in the read object store as # Files which aren't found are cached in the read object store as
# (path, None, None). This is to prevent re-reads of files we know # (path, None, None). This is to prevent re-reads of files we know
# do not exist. # do not exist.
cached_read_values = self._read_object_store.GetMulti(paths).Get() cached_read_values = self._read_cache.GetMulti(paths).Get()
cached_stat_values = self._stat_object_store.GetMulti(paths).Get() cached_stat_values = self._stat_cache.GetMulti(paths).Get()
# Populate a map of paths to Futures to their stat. They may have already # Populate a map of paths to Futures to their stat. They may have already
# been cached in which case their Future will already have been constructed # been cached in which case their Future will already have been constructed
...@@ -126,12 +127,12 @@ class CachingFileSystem(FileSystem): ...@@ -126,12 +127,12 @@ class CachingFileSystem(FileSystem):
def next(new_results): def next(new_results):
# Update the cache. This is a path -> (data, version) mapping. # Update the cache. This is a path -> (data, version) mapping.
self._read_object_store.SetMulti( self._read_cache.SetMulti(
dict((path, (new_result, stat_futures[path].Get().version)) dict((path, (new_result, stat_futures[path].Get().version))
for path, new_result in new_results.iteritems())) for path, new_result in new_results.iteritems()))
# Update the read cache to include files that weren't found, to prevent # Update the read cache to include files that weren't found, to prevent
# constantly trying to read a file we now know doesn't exist. # constantly trying to read a file we now know doesn't exist.
self._read_object_store.SetMulti( self._read_cache.SetMulti(
dict((path, (None, None)) for path in paths dict((path, (None, None)) for path in paths
if stat_futures[path].Get() is None)) if stat_futures[path].Get() is None))
new_results.update(up_to_date_data) new_results.update(up_to_date_data)
...@@ -140,6 +141,28 @@ class CachingFileSystem(FileSystem): ...@@ -140,6 +141,28 @@ class CachingFileSystem(FileSystem):
return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()), return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
skip_not_found=skip_not_found).Then(next) skip_not_found=skip_not_found).Then(next)
def Walk(self, root, depth=-1):
'''Overrides FileSystem.Walk() to provide caching functionality.
'''
def file_lister(root):
res, root_stat = All((self._walk_cache.Get(root),
self.StatAsync(root))).Get()
if res and res[2] == root_stat.version:
dirs, files = res[0], res[1]
else:
# Wasn't cached, or not up to date.
dirs, files = [], []
for f in self.ReadSingle(root).Get():
if IsDirectory(f):
dirs.append(f)
else:
files.append(f)
# Update the cache. This is a root -> (dirs, files, version) mapping.
self._walk_cache.Set(root, (dirs, files, root_stat.version))
return dirs, files
return self._file_system.Walk(root, depth=depth, file_lister=file_lister)
def GetIdentity(self): def GetIdentity(self):
return self._file_system.GetIdentity() return self._file_system.GetIdentity()
......
...@@ -58,13 +58,13 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -58,13 +58,13 @@ class CachingFileSystemTest(unittest.TestCase):
file_system = self._CreateCachingFileSystem( file_system = self._CreateCachingFileSystem(
_CreateLocalFs(), start_empty=False) _CreateLocalFs(), start_empty=False)
expected = ['dir/'] + ['file%d.html' % i for i in range(7)] expected = ['dir/'] + ['file%d.html' % i for i in range(7)]
file_system._read_object_store.Set( file_system._read_cache.Set(
'list/', 'list/',
(expected, file_system.Stat('list/').version)) (expected, file_system.Stat('list/').version))
self.assertEqual(expected, sorted(file_system.ReadSingle('list/').Get())) self.assertEqual(expected, sorted(file_system.ReadSingle('list/').Get()))
expected.remove('file0.html') expected.remove('file0.html')
file_system._read_object_store.Set( file_system._read_cache.Set(
'list/', 'list/',
(expected, file_system.Stat('list/').version)) (expected, file_system.Stat('list/').version))
self.assertEqual(expected, sorted(file_system.ReadSingle('list/').Get())) self.assertEqual(expected, sorted(file_system.ReadSingle('list/').Get()))
...@@ -112,9 +112,9 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -112,9 +112,9 @@ class CachingFileSystemTest(unittest.TestCase):
# Test directory and subdirectory stats are cached. # Test directory and subdirectory stats are cached.
file_system = create_empty_caching_fs() file_system = create_empty_caching_fs()
file_system._stat_object_store.Del('bob/bob0') file_system._stat_cache.Del('bob/bob0')
file_system._read_object_store.Del('bob/bob0') file_system._read_cache.Del('bob/bob0')
file_system._stat_object_store.Del('bob/bob1') file_system._stat_cache.Del('bob/bob1')
test_fs.IncrementStat(); test_fs.IncrementStat();
futures = (file_system.ReadSingle('bob/bob1'), futures = (file_system.ReadSingle('bob/bob1'),
file_system.ReadSingle('bob/bob0')) file_system.ReadSingle('bob/bob0'))
...@@ -128,8 +128,8 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -128,8 +128,8 @@ class CachingFileSystemTest(unittest.TestCase):
# Test a more recent parent directory doesn't force a refetch of children. # Test a more recent parent directory doesn't force a refetch of children.
file_system = create_empty_caching_fs() file_system = create_empty_caching_fs()
file_system._read_object_store.Del('bob/bob0') file_system._read_cache.Del('bob/bob0')
file_system._read_object_store.Del('bob/bob1') file_system._read_cache.Del('bob/bob1')
futures = (file_system.ReadSingle('bob/bob1'), futures = (file_system.ReadSingle('bob/bob1'),
file_system.ReadSingle('bob/bob2'), file_system.ReadSingle('bob/bob2'),
file_system.ReadSingle('bob/bob3')) file_system.ReadSingle('bob/bob3'))
...@@ -150,7 +150,7 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -150,7 +150,7 @@ class CachingFileSystemTest(unittest.TestCase):
self.assertTrue(*mock_fs.CheckAndReset(stat_count=1)) self.assertTrue(*mock_fs.CheckAndReset(stat_count=1))
file_system = create_empty_caching_fs() file_system = create_empty_caching_fs()
file_system._stat_object_store.Del('bob/bob0') file_system._stat_cache.Del('bob/bob0')
future = file_system.ReadSingle('bob/bob0') future = file_system.ReadSingle('bob/bob0')
self.assertTrue(*mock_fs.CheckAndReset(read_count=1)) self.assertTrue(*mock_fs.CheckAndReset(read_count=1))
self.assertEqual('bob/bob0 contents', future.Get()) self.assertEqual('bob/bob0 contents', future.Get())
...@@ -251,6 +251,46 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -251,6 +251,46 @@ class CachingFileSystemTest(unittest.TestCase):
'bob/bob0': 'bob/bob0 contents', 'bob/bob0': 'bob/bob0 contents',
}, read_skip_not_found(('bob/bob0', 'bob/bob2'))) }, read_skip_not_found(('bob/bob0', 'bob/bob2')))
def testWalkCaching(self):
test_fs = TestFileSystem({
'root': {
'file1': 'file1',
'file2': 'file2',
'dir1': {
'dir1_file1': 'dir1_file1',
'dir2': {},
'dir3': {
'dir3_file1': 'dir3_file1',
'dir3_file2': 'dir3_file2'
}
}
}
})
mock_fs = MockFileSystem(test_fs)
file_system = self._CreateCachingFileSystem(mock_fs, start_empty=True)
for walkinfo in file_system.Walk(''):
pass
self.assertTrue(*mock_fs.CheckAndReset(
read_resolve_count=5, read_count=5, stat_count=5))
all_dirs, all_files = [], []
for root, dirs, files in file_system.Walk(''):
all_dirs.extend(dirs)
all_files.extend(files)
self.assertEqual(sorted(['root/', 'dir1/', 'dir2/', 'dir3/']),
sorted(all_dirs))
self.assertEqual(
sorted(['file1', 'file2', 'dir1_file1', 'dir3_file1', 'dir3_file2']),
sorted(all_files))
# All data should be cached.
self.assertTrue(*mock_fs.CheckAndReset())
# Starting from a different root should still pull cached data.
for walkinfo in file_system.Walk('root/dir1/'):
pass
self.assertTrue(*mock_fs.CheckAndReset())
# TODO(ahernandez): Test with a new instance CachingFileSystem so a
# different object store is utilized.
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -2,4 +2,4 @@ cron: ...@@ -2,4 +2,4 @@ cron:
- description: Repopulates all cached data. - description: Repopulates all cached data.
url: /_cron url: /_cron
schedule: every 5 minutes schedule: every 5 minutes
target: 3-40-1 target: 3-41-0
...@@ -155,7 +155,7 @@ class FileSystem(object): ...@@ -155,7 +155,7 @@ class FileSystem(object):
''' '''
raise NotImplementedError(self.__class__) raise NotImplementedError(self.__class__)
def Walk(self, root, depth=-1): def Walk(self, root, depth=-1, file_lister=None):
'''Recursively walk the directories in a file system, starting with root. '''Recursively walk the directories in a file system, starting with root.
Behaviour is very similar to os.walk from the standard os module, yielding Behaviour is very similar to os.walk from the standard os module, yielding
...@@ -164,6 +164,15 @@ class FileSystem(object): ...@@ -164,6 +164,15 @@ class FileSystem(object):
|base| respectively. If |depth| is specified and greater than 0, Walk will |base| respectively. If |depth| is specified and greater than 0, Walk will
only recurse |depth| times. only recurse |depth| times.
|file_lister|, if specified, should be a callback of signature
def my_file_lister(root):,
which returns a tuple (dirs, files), where |dirs| is a list of directory
names under |root|, and |files| is a list of file names under |root|. Note
that the listing of files and directories should be for a *single* level
only, i.e. it should not recursively list anything.
Note that directories will always end with a '/', files never will. Note that directories will always end with a '/', files never will.
If |root| cannot be found, raises a FileNotFoundError. If |root| cannot be found, raises a FileNotFoundError.
...@@ -176,13 +185,16 @@ class FileSystem(object): ...@@ -176,13 +185,16 @@ class FileSystem(object):
if depth == 0: if depth == 0:
return return
AssertIsDirectory(root) AssertIsDirectory(root)
dirs, files = [], []
for f in self.ReadSingle(root).Get(): if file_lister:
if IsDirectory(f): dirs, files = file_lister(root)
dirs.append(f) else:
else: dirs, files = [], []
files.append(f) for f in self.ReadSingle(root).Get():
if IsDirectory(f):
dirs.append(f)
else:
files.append(f)
yield root[len(basepath):].rstrip('/'), dirs, files yield root[len(basepath):].rstrip('/'), dirs, files
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment