Commit bf7b1fec authored by kalman@chromium.org's avatar kalman@chromium.org

Docserver: Fix invalid path usage in CloudStorageFileSystem.

R=mangini@chromium.org
TBR=yoz@chromium.org

Review URL: https://codereview.chromium.org/165353004

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251237 0039d316-1c4b-4281-b951-d872f2087c98
parent 9487b39e
application: chrome-apps-doc
version: 3-9-0
version: 3-10-0
runtime: python27
api_version: 1
threadsafe: false
......
......@@ -11,7 +11,7 @@ from docs_server_utils import ToUnicode
from file_system import FileNotFoundError
from future import Gettable, Future
from path_canonicalizer import PathCanonicalizer
from path_util import AssertIsValid, ToDirectory
from path_util import AssertIsValid, Join, ToDirectory
from special_paths import SITE_VERIFICATION_FILE
from third_party.handlebar import Handlebar
from third_party.markdown import markdown
......@@ -153,11 +153,11 @@ class ContentProvider(object):
futures = [self._path_canonicalizer.Cron()]
for root, _, files in self.file_system.Walk(''):
for f in files:
futures.append(self.GetContentAndType(posixpath.join(root, f)))
futures.append(self.GetContentAndType(Join(root, f)))
# Also cache the extension-less version of the file if needed.
base, ext = posixpath.splitext(f)
if f != SITE_VERIFICATION_FILE and ext in self._default_extensions:
futures.append(self.GetContentAndType(posixpath.join(root, base)))
futures.append(self.GetContentAndType(Join(root, base)))
# TODO(kalman): Cache .zip files for each directory (if supported).
return Future(delegate=Gettable(lambda: [f.Get() for f in futures]))
......
......@@ -2,4 +2,4 @@ cron:
- description: Repopulates all cached data.
url: /_cron
schedule: every 5 minutes
target: 3-9-0
target: 3-10-0
......@@ -6,7 +6,9 @@ import posixpath
import traceback
from future import Gettable, Future
from path_util import AssertIsDirectory, AssertIsValid, SplitParent, ToDirectory
from path_util import (
AssertIsDirectory, AssertIsValid, IsDirectory, IsValid, SplitParent,
ToDirectory)
class _BaseFileSystemException(Exception):
......@@ -39,6 +41,9 @@ class StatInfo(object):
'''The result of calling Stat on a FileSystem.
'''
def __init__(self, version, child_versions=None):
if child_versions:
assert all(IsValid(path) for path in child_versions.iterkeys()), \
child_versions
self.version = version
self.child_versions = child_versions
......@@ -152,7 +157,7 @@ class FileSystem(object):
dirs, files = [], []
for f in self.ReadSingle(root).Get():
if f.endswith('/'):
if IsDirectory(f):
dirs.append(f)
else:
files.append(f)
......
......@@ -9,47 +9,55 @@ from third_party.cloudstorage import errors
from docs_server_utils import StringIdentity
from file_system import FileSystem, FileNotFoundError, StatInfo
from future import Gettable, Future
from path_util import (
AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)
import logging
import traceback
# See gcs_file_system_provider.py for documentation on using Google Cloud
# Storage as a filesystem.
#
# Note that the path requirements for GCS are different for the docserver;
# GCS requires that paths start with a /, we require that they don't.
# Name of the file containing the Git hash of the latest commit sync'ed
# to Cloud Storage. This file is generated by the Github->GCS sync script
LAST_COMMIT_HASH_FILENAME='.__lastcommit.txt'
LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'
'''See gcs_file_system_provider.py for documentation on using Google Cloud
Storage as a filesystem.
'''
def _ReadFile(filename):
AssertIsFile(filename)
try:
with cloudstorage_api.open(filename, 'r') as f:
with cloudstorage_api.open('/' + filename, 'r') as f:
return f.read()
except errors.Error:
raise FileNotFoundError('Read failed for %s: %s' % (filename,
traceback.format_exc()))
def _ListDir(dir_name):
AssertIsDirectory(dir_name)
try:
files = cloudstorage_api.listbucket(dir_name)
return [os_path.filename for os_path in files]
files = cloudstorage_api.listbucket('/' + dir_name)
return [os_path.filename.lstrip('/') for os_path in files]
except errors.Error:
raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' %
(dir_name, traceback.format_exc()))
def _CreateStatInfo(bucket, path):
bucket = '/%s' % bucket
full_path = '/'.join( (bucket, path.lstrip('/')) )
last_commit_file = '%s/%s' % (bucket, LAST_COMMIT_HASH_FILENAME)
full_path = Join(bucket, path)
last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME)
try:
last_commit = _ReadFile(last_commit_file)
if full_path.endswith('/'):
if IsDirectory(full_path):
child_versions = dict()
# Fetching stats for all files under full_path, recursively. The
# listbucket method uses a prefix approach to simulate hierarchy,
# but calling it without the "delimiter" argument searches for prefix,
# which means, for directories, everything beneath it.
for _file in cloudstorage_api.listbucket(full_path):
filename = _file.filename[len(full_path):]
for _file in cloudstorage_api.listbucket('/' + full_path):
filename = _file.filename.lstrip('/')[len(full_path):]
child_versions[filename] = last_commit
else:
child_versions = None
......@@ -71,16 +79,17 @@ class CloudStorageFileSystem(FileSystem):
logging.debug('gcs: prefixing all bucket names with %s' %
debug_bucket_prefix)
self._bucket = debug_bucket_prefix + self._bucket
AssertIsValid(self._bucket)
def Read(self, paths):
def resolve():
try:
result = {}
for path in paths:
full_path = '/%s/%s' % (self._bucket, path.lstrip('/'))
logging.debug('gcs: requested path %s, reading %s' %
full_path = Join(self._bucket, path)
logging.debug('gcs: requested path "%s", reading "%s"' %
(path, full_path))
if path == '' or path.endswith('/'):
if IsDirectory(path):
result[path] = _ListDir(full_path)
else:
result[path] = _ReadFile(full_path)
......@@ -95,6 +104,7 @@ class CloudStorageFileSystem(FileSystem):
return Future(value=())
def Stat(self, path):
AssertIsValid(path)
try:
return _CreateStatInfo(self._bucket, path)
except errors.AuthorizationError:
......
......@@ -29,6 +29,11 @@ def AssertIsValid(path):
assert IsValid(path), 'Path "%s" is invalid' % path
def Join(*paths):
assert all(IsValid(path) for path in paths), paths
return posixpath.join(*paths)
def SplitParent(path):
'''Returns the parent directory and base name of |path| in a tuple.
Any trailing slash of |path| is preserved, such that the parent of
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment