Docserver: Fix invalid path usage in CloudStorageFileSystem.

R=mangini@chromium.org TBR=yoz@chromium.org Review URL: https://codereview.chromium.org/165353004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251237 0039d316-1c4b-4281-b951-d872f2087c98

Docserver: Fix invalid path usage in CloudStorageFileSystem.
R=mangini@chromium.org TBR=yoz@chromium.org Review URL: https://codereview.chromium.org/165353004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@251237 0039d316-1c4b-4281-b951-d872f2087c98
bf7b1fec · kalman@chromium.org · 9487b39e · bf7b1fec · bf7b1fec · bf7b1fec
Commit bf7b1fec authored Feb 14, 2014 by kalman@chromium.org
6 changed files
--- a/chrome/common/extensions/docs/server2/app.yaml
+++ b/chrome/common/extensions/docs/server2/app.yaml
 application: chrome-apps-doc
-version: 3-9-0
+version: 3-10-0
 runtime: python27
 api_version: 1
 threadsafe: false

--- a/chrome/common/extensions/docs/server2/content_provider.py
+++ b/chrome/common/extensions/docs/server2/content_provider.py
@@ -11,7 +11,7 @@ from docs_server_utils import ToUnicode
 from file_system import FileNotFoundError
 from future import Gettable, Future
 from path_canonicalizer import PathCanonicalizer
-from path_util import AssertIsValid, ToDirectory
+from path_util import AssertIsValid, Join, ToDirectory
 from special_paths import SITE_VERIFICATION_FILE
 from third_party.handlebar import Handlebar
 from third_party.markdown import markdown
@@ -153,11 +153,11 @@ class ContentProvider(object):
    futures = [self._path_canonicalizer.Cron()]
    for root, _, files in self.file_system.Walk(''):
      for f in files:
-        futures.append(self.GetContentAndType(posixpath.join(root, f)))
+        futures.append(self.GetContentAndType(Join(root, f)))
        # Also cache the extension-less version of the file if needed.
        base, ext = posixpath.splitext(f)
        if f != SITE_VERIFICATION_FILE and ext in self._default_extensions:
-          futures.append(self.GetContentAndType(posixpath.join(root, base)))
+          futures.append(self.GetContentAndType(Join(root, base)))
      # TODO(kalman): Cache .zip files for each directory (if supported).
    return Future(delegate=Gettable(lambda: [f.Get() for f in futures]))


--- a/chrome/common/extensions/docs/server2/cron.yaml
+++ b/chrome/common/extensions/docs/server2/cron.yaml
@@ -2,4 +2,4 @@ cron:
 - description: Repopulates all cached data.
  url: /_cron
  schedule: every 5 minutes
-  target: 3-9-0
+  target: 3-10-0
--- a/chrome/common/extensions/docs/server2/file_system.py
+++ b/chrome/common/extensions/docs/server2/file_system.py
@@ -6,7 +6,9 @@ import posixpath
 import traceback

 from future import Gettable, Future
-from path_util import AssertIsDirectory, AssertIsValid, SplitParent, ToDirectory
+from path_util import (
+    AssertIsDirectory, AssertIsValid, IsDirectory, IsValid, SplitParent,
+    ToDirectory)


 class _BaseFileSystemException(Exception):
@@ -39,6 +41,9 @@ class StatInfo(object):
  '''The result of calling Stat on a FileSystem.
  '''
  def __init__(self, version, child_versions=None):
+    if child_versions:
+      assert all(IsValid(path) for path in child_versions.iterkeys()), \
+             child_versions
    self.version = version
    self.child_versions = child_versions

@@ -152,7 +157,7 @@ class FileSystem(object):
      dirs, files = [], []

      for f in self.ReadSingle(root).Get():
-        if f.endswith('/'):
+        if IsDirectory(f):
          dirs.append(f)
        else:
          files.append(f)

--- a/chrome/common/extensions/docs/server2/gcs_file_system.py
+++ b/chrome/common/extensions/docs/server2/gcs_file_system.py
@@ -9,47 +9,55 @@ from third_party.cloudstorage import errors
 from docs_server_utils import StringIdentity
 from file_system import FileSystem, FileNotFoundError, StatInfo
 from future import Gettable, Future
+from path_util import (
+    AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)

 import logging
 import traceback

+
+# See gcs_file_system_provider.py for documentation on using Google Cloud
+# Storage as a filesystem.
+#
+# Note that the path requirements for GCS are different for the docserver;
+# GCS requires that paths start with a /, we require that they don't.
+
+
 # Name of the file containing the Git hash of the latest commit sync'ed
 # to Cloud Storage. This file is generated by the Github->GCS sync script
-LAST_COMMIT_HASH_FILENAME='.__lastcommit.txt'
+LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'

-'''See gcs_file_system_provider.py for documentation on using Google Cloud
-Storage as a filesystem.
-'''
 def _ReadFile(filename):
+  AssertIsFile(filename)
  try:
-    with cloudstorage_api.open(filename, 'r') as f:
+    with cloudstorage_api.open('/' + filename, 'r') as f:
      return f.read()
  except errors.Error:
    raise FileNotFoundError('Read failed for %s: %s' % (filename,
        traceback.format_exc()))

 def _ListDir(dir_name):
+  AssertIsDirectory(dir_name)
  try:
-    files = cloudstorage_api.listbucket(dir_name)
-    return [os_path.filename for os_path in files]
+    files = cloudstorage_api.listbucket('/' + dir_name)
+    return [os_path.filename.lstrip('/') for os_path in files]
  except errors.Error:
    raise FileNotFoundError('cloudstorage.listbucket failed for %s: %s' %
                            (dir_name, traceback.format_exc()))

 def _CreateStatInfo(bucket, path):
-  bucket = '/%s' % bucket
-  full_path = '/'.join( (bucket, path.lstrip('/')) )
-  last_commit_file = '%s/%s' % (bucket, LAST_COMMIT_HASH_FILENAME)
+  full_path = Join(bucket, path)
+  last_commit_file = Join(bucket, LAST_COMMIT_HASH_FILENAME)
  try:
    last_commit = _ReadFile(last_commit_file)
-    if full_path.endswith('/'):
+    if IsDirectory(full_path):
      child_versions = dict()
      # Fetching stats for all files under full_path, recursively. The
      # listbucket method uses a prefix approach to simulate hierarchy,
      # but calling it without the "delimiter" argument searches for prefix,
      # which means, for directories, everything beneath it.
-      for _file in cloudstorage_api.listbucket(full_path):
-        filename = _file.filename[len(full_path):]
+      for _file in cloudstorage_api.listbucket('/' + full_path):
+        filename = _file.filename.lstrip('/')[len(full_path):]
        child_versions[filename] = last_commit
    else:
      child_versions = None
@@ -71,16 +79,17 @@ class CloudStorageFileSystem(FileSystem):
      logging.debug('gcs: prefixing all bucket names with %s' %
                    debug_bucket_prefix)
      self._bucket = debug_bucket_prefix + self._bucket
+    AssertIsValid(self._bucket)

  def Read(self, paths):
    def resolve():
      try:
        result = {}
        for path in paths:
-          full_path = '/%s/%s' % (self._bucket, path.lstrip('/'))
-          logging.debug('gcs: requested path %s, reading %s' %
+          full_path = Join(self._bucket, path)
+          logging.debug('gcs: requested path "%s", reading "%s"' %
                        (path, full_path))
-          if path == '' or path.endswith('/'):
+          if IsDirectory(path):
            result[path] = _ListDir(full_path)
          else:
            result[path] = _ReadFile(full_path)
@@ -95,6 +104,7 @@ class CloudStorageFileSystem(FileSystem):
    return Future(value=())

  def Stat(self, path):
+    AssertIsValid(path)
    try:
      return _CreateStatInfo(self._bucket, path)
    except errors.AuthorizationError:

--- a/chrome/common/extensions/docs/server2/path_util.py
+++ b/chrome/common/extensions/docs/server2/path_util.py
@@ -29,6 +29,11 @@ def AssertIsValid(path):
  assert IsValid(path), 'Path "%s" is invalid' % path


+def Join(*paths):
+  assert all(IsValid(path) for path in paths), paths
+  return posixpath.join(*paths)
+
+
 def SplitParent(path):
  '''Returns the parent directory and base name of |path| in a tuple.
  Any trailing slash of |path| is preserved, such that the parent of