Commit e8fc5eb6 authored by ahernandez.miralles's avatar ahernandez.miralles Committed by Commit bot

Docserver: Add more skip_not_found support and cache "not found"s

BUG=305280
NOTRY=True

Review URL: https://codereview.chromium.org/512453002

Cr-Commit-Position: refs/heads/master@{#292212}
parent 7465b713
application: chrome-apps-doc application: chrome-apps-doc
version: 3-39-4 version: 3-40-0
runtime: python27 runtime: python27
api_version: 1 api_version: 1
threadsafe: false threadsafe: false
......
...@@ -57,7 +57,7 @@ class CachingFileSystem(FileSystem): ...@@ -57,7 +57,7 @@ class CachingFileSystem(FileSystem):
dir_stat = self._stat_object_store.Get(dir_path).Get() dir_stat = self._stat_object_store.Get(dir_path).Get()
if dir_stat is not None: if dir_stat is not None:
return Future(value=make_stat_info(dir_stat)) return Future(callback=lambda: make_stat_info(dir_stat))
def next(dir_stat): def next(dir_stat):
assert dir_stat is not None # should have raised a FileNotFoundError assert dir_stat is not None # should have raised a FileNotFoundError
...@@ -76,9 +76,12 @@ class CachingFileSystem(FileSystem): ...@@ -76,9 +76,12 @@ class CachingFileSystem(FileSystem):
return self._file_system.StatAsync(dir_path) return self._file_system.StatAsync(dir_path)
def Read(self, paths, skip_not_found=False): def Read(self, paths, skip_not_found=False):
'''Reads a list of files. If a file is in memcache and it is not out of '''Reads a list of files. If a file is cached and it is not out of
date, it is returned. Otherwise, the file is retrieved from the file system. date, it is returned. Otherwise, the file is retrieved from the file system.
''' '''
# Files which aren't found are cached in the read object store as
# (path, None, None). This is to prevent re-reads of files we know
# do not exist.
cached_read_values = self._read_object_store.GetMulti(paths).Get() cached_read_values = self._read_object_store.GetMulti(paths).Get()
cached_stat_values = self._stat_object_store.GetMulti(paths).Get() cached_stat_values = self._stat_object_store.GetMulti(paths).Get()
...@@ -102,26 +105,39 @@ class CachingFileSystem(FileSystem): ...@@ -102,26 +105,39 @@ class CachingFileSystem(FileSystem):
stat_future = Future(value=stat_value) stat_future = Future(value=stat_value)
stat_futures[path] = stat_future stat_futures[path] = stat_future
# Filter only the cached data which is fresh by comparing to the latest # Filter only the cached data which is up to date by comparing to the latest
# stat. The cached read data includes the cached version. Remove it for # stat. The cached read data includes the cached version. Remove it for
# the result returned to callers. # the result returned to callers. |version| == None implies a non-existent
fresh_data = dict( # file, so skip it.
up_to_date_data = dict(
(path, data) for path, (data, version) in cached_read_values.iteritems() (path, data) for path, (data, version) in cached_read_values.iteritems()
if stat_futures[path].Get().version == version) if version is not None and stat_futures[path].Get().version == version)
if len(fresh_data) == len(paths): if skip_not_found:
# Filter out paths which we know do not exist, i.e. if |path| is in
# |cached_read_values| *and* has a None version, then it doesn't exist.
# See the above declaration of |cached_read_values| for more information.
paths = [path for path in paths
if cached_read_values.get(path, (None, True))[1]]
if len(up_to_date_data) == len(paths):
# Everything was cached and up-to-date. # Everything was cached and up-to-date.
return Future(value=fresh_data) return Future(value=up_to_date_data)
def next(new_results): def next(new_results):
# Update the cache. This is a path -> (data, version) mapping. # Update the cache. This is a path -> (data, version) mapping.
self._read_object_store.SetMulti( self._read_object_store.SetMulti(
dict((path, (new_result, stat_futures[path].Get().version)) dict((path, (new_result, stat_futures[path].Get().version))
for path, new_result in new_results.iteritems())) for path, new_result in new_results.iteritems()))
new_results.update(fresh_data) # Update the read cache to include files that weren't found, to prevent
# constantly trying to read a file we now know doesn't exist.
self._read_object_store.SetMulti(
dict((path, (None, None)) for path in paths
if stat_futures[path].Get() is None))
new_results.update(up_to_date_data)
return new_results return new_results
# Read in the values that were uncached or old. # Read in the values that were uncached or old.
return self._file_system.Read(set(paths) - set(fresh_data.iterkeys()), return self._file_system.Read(set(paths) - set(up_to_date_data.iterkeys()),
skip_not_found=skip_not_found).Then(next) skip_not_found=skip_not_found).Then(next)
def GetIdentity(self): def GetIdentity(self):
......
...@@ -9,7 +9,7 @@ import unittest ...@@ -9,7 +9,7 @@ import unittest
from caching_file_system import CachingFileSystem from caching_file_system import CachingFileSystem
from extensions_paths import SERVER2 from extensions_paths import SERVER2
from file_system import StatInfo from file_system import FileNotFoundError, StatInfo
from local_file_system import LocalFileSystem from local_file_system import LocalFileSystem
from mock_file_system import MockFileSystem from mock_file_system import MockFileSystem
from object_store_creator import ObjectStoreCreator from object_store_creator import ObjectStoreCreator
...@@ -159,6 +159,23 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -159,6 +159,23 @@ class CachingFileSystemTest(unittest.TestCase):
file_system.ReadSingle('bob/bob0').Get()) file_system.ReadSingle('bob/bob0').Get())
self.assertTrue(*mock_fs.CheckAndReset()) self.assertTrue(*mock_fs.CheckAndReset())
# Test skip_not_found caching behavior.
file_system = create_empty_caching_fs()
future = file_system.ReadSingle('bob/no_file', skip_not_found=True)
self.assertTrue(*mock_fs.CheckAndReset(read_count=1))
self.assertEqual(None, future.Get())
self.assertTrue(*mock_fs.CheckAndReset(read_resolve_count=1, stat_count=1))
future = file_system.ReadSingle('bob/no_file', skip_not_found=True)
# There shouldn't be another read/stat from the file system;
# we know the file is not there.
self.assertTrue(*mock_fs.CheckAndReset())
future = file_system.ReadSingle('bob/no_file')
self.assertTrue(*mock_fs.CheckAndReset(read_count=1))
# Even though we cached information about non-existent files,
# trying to read one without specifiying skip_not_found should
# still raise an error.
self.assertRaises(FileNotFoundError, future.Get)
def testCachedStat(self): def testCachedStat(self):
test_fs = TestFileSystem({ test_fs = TestFileSystem({
'bob': { 'bob': {
...@@ -219,20 +236,20 @@ class CachingFileSystemTest(unittest.TestCase): ...@@ -219,20 +236,20 @@ class CachingFileSystemTest(unittest.TestCase):
test_fs.IncrementStat() test_fs.IncrementStat()
run_expecting_stat('1') run_expecting_stat('1')
def testSkipNotFound(self): def testSkipNotFound(self):
caching_fs = self._CreateCachingFileSystem(TestFileSystem({ caching_fs = self._CreateCachingFileSystem(TestFileSystem({
'bob': { 'bob': {
'bob0': 'bob/bob0 contents', 'bob0': 'bob/bob0 contents',
'bob1': 'bob/bob1 contents' 'bob1': 'bob/bob1 contents'
} }
})) }))
def read_skip_not_found(paths): def read_skip_not_found(paths):
return caching_fs.Read(paths, skip_not_found=True).Get() return caching_fs.Read(paths, skip_not_found=True).Get()
self.assertEqual({}, read_skip_not_found(('grub',))) self.assertEqual({}, read_skip_not_found(('grub',)))
self.assertEqual({}, read_skip_not_found(('bob/bob2',))) self.assertEqual({}, read_skip_not_found(('bob/bob2',)))
self.assertEqual({ self.assertEqual({
'bob/bob0': 'bob/bob0 contents', 'bob/bob0': 'bob/bob0 contents',
}, read_skip_not_found(('bob/bob0', 'bob/bob2'))) }, read_skip_not_found(('bob/bob0', 'bob/bob2')))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -46,11 +46,11 @@ class ChainedCompiledFileSystem(object): ...@@ -46,11 +46,11 @@ class ChainedCompiledFileSystem(object):
self._compiled_fs_chain = compiled_fs_chain self._compiled_fs_chain = compiled_fs_chain
self._identity = identity self._identity = identity
def GetFromFile(self, path): def GetFromFile(self, path, skip_not_found=False):
return self._GetImpl( return self._GetImpl(
path, path,
lambda compiled_fs: compiled_fs.GetFromFile(path), lambda cfs: cfs.GetFromFile(path, skip_not_found=skip_not_found),
lambda compiled_fs: compiled_fs.GetFileVersion(path)) lambda cfs: cfs.GetFileVersion(path))
def GetFromFileListing(self, path): def GetFromFileListing(self, path):
path = ToDirectory(path) path = ToDirectory(path)
......
...@@ -185,28 +185,30 @@ class CompiledFileSystem(object): ...@@ -185,28 +185,30 @@ class CompiledFileSystem(object):
return self._file_system.Read(add_prefix(path, first_layer_dirs)).Then( return self._file_system.Read(add_prefix(path, first_layer_dirs)).Then(
lambda results: first_layer_files + get_from_future_listing(results)) lambda results: first_layer_files + get_from_future_listing(results))
def GetFromFile(self, path): def GetFromFile(self, path, skip_not_found=False):
'''Calls |compilation_function| on the contents of the file at |path|. If '''Calls |compilation_function| on the contents of the file at |path|.
|binary| is True then the file will be read as binary - but this will only If |skip_not_found| is True, then None is passed to |compilation_function|.
apply for the first time the file is fetched; if already cached, |binary|
will be ignored.
''' '''
AssertIsFile(path) AssertIsFile(path)
try: try:
version = self._file_system.Stat(path).version version = self._file_system.Stat(path).version
except FileNotFoundError: except FileNotFoundError:
return Future(exc_info=sys.exc_info()) if skip_not_found:
version = None
else:
return Future(exc_info=sys.exc_info())
cache_entry = self._file_object_store.Get(path).Get() cache_entry = self._file_object_store.Get(path).Get()
if (cache_entry is not None) and (version == cache_entry.version): if (cache_entry is not None) and (version == cache_entry.version):
return Future(value=cache_entry._cache_data) return Future(value=cache_entry._cache_data)
def next(files): def compile_(files):
cache_data = self._compilation_function(path, files) cache_data = self._compilation_function(path, files)
self._file_object_store.Set(path, _CacheEntry(cache_data, version)) self._file_object_store.Set(path, _CacheEntry(cache_data, version))
return cache_data return cache_data
return self._file_system.ReadSingle(path).Then(next) return self._file_system.ReadSingle(
path, skip_not_found=skip_not_found).Then(compile_)
def GetFromFileListing(self, path): def GetFromFileListing(self, path):
'''Calls |compilation_function| on the listing of the files at |path|. '''Calls |compilation_function| on the listing of the files at |path|.
......
...@@ -200,6 +200,24 @@ class CompiledFileSystemTest(unittest.TestCase): ...@@ -200,6 +200,24 @@ class CompiledFileSystemTest(unittest.TestCase):
future.Get() future.Get()
self.assertTrue(*mock_fs.CheckAndReset(read_count=2, read_resolve_count=3)) self.assertTrue(*mock_fs.CheckAndReset(read_count=2, read_resolve_count=3))
def testSkipNotFound(self):
mock_fs = MockFileSystem(TestFileSystem(_TEST_DATA))
compiled_fs = CompiledFileSystem.Factory(
ObjectStoreCreator.ForTest()).Create(
mock_fs, lambda path, contents: contents, type(self))
future = compiled_fs.GetFromFile('no_file', skip_not_found=True)
# If the file doesn't exist, then the file system is not read.
self.assertTrue(*mock_fs.CheckAndReset(read_count=1, stat_count=1))
self.assertEqual(None, future.Get())
self.assertTrue(*mock_fs.CheckAndReset(read_resolve_count=1))
future = compiled_fs.GetFromFile('no_file', skip_not_found=True)
self.assertTrue(*mock_fs.CheckAndReset(stat_count=1))
self.assertEqual(None, future.Get())
# The result for a non-existent file should still be cached.
self.assertTrue(*mock_fs.CheckAndReset())
future = compiled_fs.GetFromFile('no_file')
self.assertRaises(FileNotFoundError, future.Get)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -2,4 +2,4 @@ cron: ...@@ -2,4 +2,4 @@ cron:
- description: Repopulates all cached data. - description: Repopulates all cached data.
url: /_cron url: /_cron
schedule: every 5 minutes schedule: every 5 minutes
target: 3-39-4 target: 3-40-0
...@@ -84,13 +84,13 @@ class FileSystem(object): ...@@ -84,13 +84,13 @@ class FileSystem(object):
''' '''
raise NotImplementedError(self.__class__) raise NotImplementedError(self.__class__)
def ReadSingle(self, path): def ReadSingle(self, path, skip_not_found=False):
'''Reads a single file from the FileSystem. Returns a Future with the same '''Reads a single file from the FileSystem. Returns a Future with the same
rules as Read(). If |path| is not found raise a FileNotFoundError on Get(). rules as Read(). If |path| is not found raise a FileNotFoundError on Get().
''' '''
AssertIsValid(path) AssertIsValid(path)
read_single = self.Read([path]) read_single = self.Read([path], skip_not_found=skip_not_found)
return Future(callback=lambda: read_single.Get()[path]) return Future(callback=lambda: read_single.Get().get(path, None))
def Exists(self, path): def Exists(self, path):
'''Returns a Future to the existence of |path|; True if |path| exists, '''Returns a Future to the existence of |path|; True if |path| exists,
......
...@@ -88,7 +88,12 @@ class LocalFileSystem(FileSystem): ...@@ -88,7 +88,12 @@ class LocalFileSystem(FileSystem):
if path == '' or path.endswith('/'): if path == '' or path.endswith('/'):
result[path] = _ListDir(full_path) result[path] = _ListDir(full_path)
else: else:
result[path] = _ReadFile(full_path) try:
result[path] = _ReadFile(full_path)
except FileNotFoundError:
if skip_not_found:
continue
return Future(exc_info=sys.exc_info())
return result return result
return Future(callback=resolve) return Future(callback=resolve)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment