Commit bf7b1fec authored by kalman@chromium.org's avatar kalman@chromium.org

Docserver: Fix invalid path usage in CloudStorageFileSystem.

R=mangini@chromium.org
TBR=yoz@chromium.org

Review URL: https://codereview.chromium.org/165353004

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251237 0039d316-1c4b-4281-b951-d872f2087c98
parent 9487b39e
application: chrome-apps-doc application: chrome-apps-doc
version: 3-9-0 version: 3-10-0
runtime: python27 runtime: python27
api_version: 1 api_version: 1
threadsafe: false threadsafe: false
......
...@@ -11,7 +11,7 @@ from docs_server_utils import ToUnicode ...@@ -11,7 +11,7 @@ from docs_server_utils import ToUnicode
from file_system import FileNotFoundError from file_system import FileNotFoundError
from future import Gettable, Future from future import Gettable, Future
from path_canonicalizer import PathCanonicalizer from path_canonicalizer import PathCanonicalizer
from path_util import AssertIsValid, ToDirectory from path_util import AssertIsValid, Join, ToDirectory
from special_paths import SITE_VERIFICATION_FILE from special_paths import SITE_VERIFICATION_FILE
from third_party.handlebar import Handlebar from third_party.handlebar import Handlebar
from third_party.markdown import markdown from third_party.markdown import markdown
...@@ -153,11 +153,11 @@ class ContentProvider(object): ...@@ -153,11 +153,11 @@ class ContentProvider(object):
futures = [self._path_canonicalizer.Cron()] futures = [self._path_canonicalizer.Cron()]
for root, _, files in self.file_system.Walk(''): for root, _, files in self.file_system.Walk(''):
for f in files: for f in files:
futures.append(self.GetContentAndType(posixpath.join(root, f))) futures.append(self.GetContentAndType(Join(root, f)))
# Also cache the extension-less version of the file if needed. # Also cache the extension-less version of the file if needed.
base, ext = posixpath.splitext(f) base, ext = posixpath.splitext(f)
if f != SITE_VERIFICATION_FILE and ext in self._default_extensions: if f != SITE_VERIFICATION_FILE and ext in self._default_extensions:
futures.append(self.GetContentAndType(posixpath.join(root, base))) futures.append(self.GetContentAndType(Join(root, base)))
# TODO(kalman): Cache .zip files for each directory (if supported). # TODO(kalman): Cache .zip files for each directory (if supported).
return Future(delegate=Gettable(lambda: [f.Get() for f in futures])) return Future(delegate=Gettable(lambda: [f.Get() for f in futures]))
......
...@@ -2,4 +2,4 @@ cron: ...@@ -2,4 +2,4 @@ cron:
- description: Repopulates all cached data. - description: Repopulates all cached data.
url: /_cron url: /_cron
schedule: every 5 minutes schedule: every 5 minutes
target: 3-9-0 target: 3-10-0
...@@ -6,7 +6,9 @@ import posixpath ...@@ -6,7 +6,9 @@ import posixpath
import traceback import traceback
from future import Gettable, Future from future import Gettable, Future
from path_util import AssertIsDirectory, AssertIsValid, SplitParent, ToDirectory from path_util import (
AssertIsDirectory, AssertIsValid, IsDirectory, IsValid, SplitParent,
ToDirectory)
class _BaseFileSystemException(Exception): class _BaseFileSystemException(Exception):
...@@ -39,6 +41,9 @@ class StatInfo(object): ...@@ -39,6 +41,9 @@ class StatInfo(object):
'''The result of calling Stat on a FileSystem. '''The result of calling Stat on a FileSystem.
''' '''
def __init__(self, version, child_versions=None): def __init__(self, version, child_versions=None):
if child_versions:
assert all(IsValid(path) for path in child_versions.iterkeys()), \
child_versions
self.version = version self.version = version
self.child_versions = child_versions self.child_versions = child_versions
...@@ -152,7 +157,7 @@ class FileSystem(object): ...@@ -152,7 +157,7 @@ class FileSystem(object):
dirs, files = [], [] dirs, files = [], []
for f in self.ReadSingle(root).Get(): for f in self.ReadSingle(root).Get():
if f.endswith('/'): if IsDirectory(f):
dirs.append(f) dirs.append(f)
else: else:
files.append(f) files.append(f)
......
...@@ -9,47 +9,55 @@ from third_party.cloudstorage import errors ...@@ -9,47 +9,55 @@ from third_party.cloudstorage import errors
from docs_server_utils import StringIdentity from docs_server_utils import StringIdentity
from file_system import FileSystem, FileNotFoundError, StatInfo from file_system import FileSystem, FileNotFoundError, StatInfo
from future import Gettable, Future from future import Gettable, Future
from path_util import (
AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)
import logging import logging
import traceback import traceback
# See gcs_file_system_provider.py for documentation on using Google Cloud
# Storage as a filesystem.
#
# Note that the path requirements for GCS are different for the docserver;
# GCS requires that paths start with a /, we require that they don't.
# Name of the file containing the Git hash of the latest commit sync'ed # Name of the file containing the Git hash of the latest commit sync'ed
# to Cloud Storage. This file is generated by the Github->GCS sync script # to Cloud Storage. This file is generated by the Github->GCS sync script
LAST_COMMIT_HASH_FILENAME='.__lastcommit.txt' LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'
'''See gcs_file_system_provider.py for documentation on using Google Cloud
Storage as a filesystem.
'''
def _ReadFile(filename): def _ReadFile(filename):
AssertIsFile(filename)
try: try:
with cloudstorage_api.open(filename, 'r') as f: with cloudstorage_api.open('/' + filename, 'r') as f:
return f.read() return f.read()
except errors.Error: except errors.Error:
raise FileNotFoundError('Read failed for %s: %s' % (filename, raise FileNotFoundError('Read failed for %s: %s' % (filename,
traceback.format_exc())) traceback.format_exc()))
def _ListDir(dir_name): def _ListDir(dir_name):
AssertIsDirectory(dir_name)
try: try:
files = cloudstorage_api.listbucket(dir_name) files = cloudstorage_api.listbucket('/' + dir_name)
return [os_path.filename for os_path in files] return [os_path.filename.lstrip('/') for os_path in files]
except errors.Error: except errors.Error:
raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' % raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' %
(dir_name, traceback.format_exc())) (dir_name, traceback.format_exc()))
def _CreateStatInfo(bucket, path): def _CreateStatInfo(bucket, path):
bucket = '/%s' % bucket full_path = Join(bucket, path)
full_path = '/'.join( (bucket, path.lstrip('/')) ) last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME)
last_commit_file = '%s/%s' % (bucket, LAST_COMMIT_HASH_FILENAME)
try: try:
last_commit = _ReadFile(last_commit_file) last_commit = _ReadFile(last_commit_file)
if full_path.endswith('/'): if IsDirectory(full_path):
child_versions = dict() child_versions = dict()
# Fetching stats for all files under full_path, recursively. The # Fetching stats for all files under full_path, recursively. The
# listbucket method uses a prefix approach to simulate hierarchy, # listbucket method uses a prefix approach to simulate hierarchy,
# but calling it without the "delimiter" argument searches for prefix, # but calling it without the "delimiter" argument searches for prefix,
# which means, for directories, everything beneath it. # which means, for directories, everything beneath it.
for _file in cloudstorage_api.listbucket(full_path): for _file in cloudstorage_api.listbucket('/' + full_path):
filename = _file.filename[len(full_path):] filename = _file.filename.lstrip('/')[len(full_path):]
child_versions[filename] = last_commit child_versions[filename] = last_commit
else: else:
child_versions = None child_versions = None
...@@ -71,16 +79,17 @@ class CloudStorageFileSystem(FileSystem): ...@@ -71,16 +79,17 @@ class CloudStorageFileSystem(FileSystem):
logging.debug('gcs: prefixing all bucket names with %s' % logging.debug('gcs: prefixing all bucket names with %s' %
debug_bucket_prefix) debug_bucket_prefix)
self._bucket = debug_bucket_prefix + self._bucket self._bucket = debug_bucket_prefix + self._bucket
AssertIsValid(self._bucket)
def Read(self, paths): def Read(self, paths):
def resolve(): def resolve():
try: try:
result = {} result = {}
for path in paths: for path in paths:
full_path = '/%s/%s' % (self._bucket, path.lstrip('/')) full_path = Join(self._bucket, path)
logging.debug('gcs: requested path %s, reading %s' % logging.debug('gcs: requested path "%s", reading "%s"' %
(path, full_path)) (path, full_path))
if path == '' or path.endswith('/'): if IsDirectory(path):
result[path] = _ListDir(full_path) result[path] = _ListDir(full_path)
else: else:
result[path] = _ReadFile(full_path) result[path] = _ReadFile(full_path)
...@@ -95,6 +104,7 @@ class CloudStorageFileSystem(FileSystem): ...@@ -95,6 +104,7 @@ class CloudStorageFileSystem(FileSystem):
return Future(value=()) return Future(value=())
def Stat(self, path): def Stat(self, path):
AssertIsValid(path)
try: try:
return _CreateStatInfo(self._bucket, path) return _CreateStatInfo(self._bucket, path)
except errors.AuthorizationError: except errors.AuthorizationError:
......
...@@ -29,6 +29,11 @@ def AssertIsValid(path): ...@@ -29,6 +29,11 @@ def AssertIsValid(path):
assert IsValid(path), 'Path "%s" is invalid' % path assert IsValid(path), 'Path "%s" is invalid' % path
def Join(*paths):
assert all(IsValid(path) for path in paths), paths
return posixpath.join(*paths)
def SplitParent(path): def SplitParent(path):
'''Returns the parent directory and base name of |path| in a tuple. '''Returns the parent directory and base name of |path| in a tuple.
Any trailing slash of |path| is preserved, such that the parent of Any trailing slash of |path| is preserved, such that the parent of
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment