Commit 7a2cd85b authored by Jan Wilken Dörrie's avatar Jan Wilken Dörrie Committed by Chromium LUCI CQ

[clang] Add StringPiece Rewriter Tool

This change adds a clang tool to rewrite deprecated base::StringPiece
APIs. To do so, it makes use Clang Transformer [1], significantly
reducing the required boilerplate. For now, it only removes usages of
the `as_string()` API, replacing it with explicit std::string
constructions or omitting it entirely, if possible.

The edits generated by the tool compile cleanly, except when used
in complicated macro expressions. In these cases commas might not be
enclosed in parentheses correctly. While it'd be possible to modify
the tool to handle these cases properly, the required effort likely
outweighs a small manual pass over the result to fix these issues.

[1] http://lists.llvm.org/pipermail/cfe-dev/2018-November/060184.html

Bug: 1049498
Change-Id: I77a0945d6cb59bde6f2ad65f079a399f67bbd054
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2601742
Commit-Queue: Jan Wilken Dörrie <jdoerrie@chromium.org>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarŁukasz Anforowicz <lukasza@chromium.org>
Cr-Commit-Position: refs/heads/master@{#841008}
parent abfa914a
set(LLVM_LINK_COMPONENTS
BitReader
MCParser
Option
Support
X86AsmParser
X86CodeGen
X86Desc
X86Info
)
add_llvm_executable(string_piece_rewriters
StringPieceRewriters.cpp
)
target_link_libraries(string_piece_rewriters
clangAST
clangASTMatchers
clangAnalysis
clangBasic
clangDriver
clangEdit
clangFrontend
clangLex
clangParse
clangSema
clangSerialization
clangTooling
clangToolingCore
clangTransformer
)
cr_install(TARGETS string_piece_rewriters RUNTIME DESTINATION bin)
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <assert.h>
#include <stdlib.h>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "clang/AST/ASTContext.h"
#include "clang/AST/ParentMap.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchersMacros.h"
#include "clang/Analysis/CFG.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Refactoring.h"
#include "clang/Tooling/Refactoring/AtomicChange.h"
#include "clang/Tooling/Tooling.h"
#include "clang/Tooling/Transformer/RewriteRule.h"
#include "clang/Tooling/Transformer/SourceCodeBuilders.h"
#include "clang/Tooling/Transformer/Stencil.h"
#include "clang/Tooling/Transformer/Transformer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetSelect.h"
using clang::tooling::AtomicChange;
using clang::tooling::AtomicChanges;
using clang::tooling::Transformer;
using namespace ::clang::ast_matchers;
using namespace ::clang::transformer;
namespace {
// Custom matcher to differentiate variable initializations based on the syntax.
AST_MATCHER_P(clang::VarDecl,
hasInitStyle,
clang::VarDecl::InitializationStyle,
InitStyle) {
return Node.getInitStyle() == InitStyle;
}
// Like `maybeDeref`, but with support for smart pointers. Assumes that any type
// that overloads `->` also overloads `*`.
Stencil maybeDerefSmart(std::string ID) {
return run([ID = std::move(ID)](const MatchFinder::MatchResult& result)
-> llvm::Expected<std::string> {
if (const auto* op_call =
result.Nodes.getNodeAs<clang::CXXOperatorCallExpr>(ID)) {
if (op_call->getOperator() == clang::OO_Arrow &&
op_call->getNumArgs() == 1) {
llvm::Optional<std::string> text = clang::tooling::buildDereference(
*op_call->getArg(0), *result.Context);
if (!text) {
return llvm::make_error<llvm::StringError>(
llvm::errc::invalid_argument,
"ID has no corresponding source: " + ID);
}
return *text;
}
}
return maybeDeref(std::move(ID))->eval(result);
});
}
// A matcher that matches the `as_string()` member function call on a
// StringPiece. Binds both the call to `as_string()`, as well as the
// StringPiece.
auto GetAsStringMatcher() {
return materializeTemporaryExpr(
has(ignoringParenImpCasts(
cxxBindTemporaryExpr(has(cxxMemberCallExpr(
on(expr().bind("piece")),
callee(cxxMethodDecl(
ofClass(hasName("::base::BasicStringPiece")),
hasName("as_string")))))))))
.bind("as_string");
}
// Replaces calls of `piece.as_string()` and `piece_ptr->as_string()` with
// `std::string(piece)` and `std::string(*piece_ptr)` respectively.
RewriteRule ReplaceAsStringWithExplicitStringConversionRule() {
return makeRule(GetAsStringMatcher(),
changeTo(cat("std::string(", maybeDerefSmart("piece"), ")")));
}
// A rule that rewrites expressions like `std::string str = piece.as_string();`
// to `std::string str(foo);`, making use of the explicit conversion from
// base::StringPiece to std::string.
RewriteRule RewriteImplicitToExplicitStringConstructionRule() {
auto matcher = materializeTemporaryExpr(
GetAsStringMatcher(), hasParent(cxxConstructExpr(
hasDeclaration(cxxConstructorDecl(
ofClass(hasName("::std::basic_string")))),
hasParent(exprWithCleanups(hasParent(
varDecl(hasInitStyle(clang::VarDecl::CInit))
.bind("varDecl")))))));
return makeRule(
matcher,
// Remove the existing initialization via assignment and insert a new
// making use of explicit construction.
editList({
remove(between(name("varDecl"), after(node("as_string")))),
insertAfter(name("varDecl"), cat("(", maybeDerefSmart("piece"), ")")),
}));
}
// A rule that removes redundant calls to `as_string`. This can happen if:
//
// (1) the resulting string is converted to another string piece,
// (2) the resulting string is involved in a call to a member function (2a) or
// operator (2b) StringPiece also supports, or
// (3) the as_string call is part of the explicit construction of a std::string.
// This can either be a local variable that is explicitly constructed (3a),
// or a class member initialized by the constructor list (3b).
//
// The resulting rewrite rule will replace expressions like `piece.as_string()`
// simply with `piece`, and expressions like `piece_ptr->as_string()` with
// either `*piece_ptr` or `piece_ptr->`, depending on whether or not it is
// followed by a member expression.
RewriteRule RemoveAsStringRule() {
// List of std::string members that are also supported by base::StringPiece.
// Note: `data()` is absent from this list, because std::string::data is
// guaranteed to return a null-terminated string, while
// base::StringPiece::data is not. Furthermore, `substr()` is missing as well,
// due to the possibly breaking change in return type (std::string vs
// base::StringPiece).
static constexpr llvm::StringRef kMatchingStringMembers[] = {
"begin",
"cbegin",
"end",
"cend",
"rbegin",
"crbegin",
"rend",
"crend",
"at",
"front",
"back",
"size",
"length",
"max_size",
"empty",
"copy",
"compare",
"find",
"rfind",
"find_first_of",
"find_last_of",
"find_first_not_of",
"find_last_not_of",
"npos",
};
// List of std::string operators that are also supported by base::StringPiece.
// Note: `operator[]` is absent from this list, because string::operator[idx]
// is valid for idx == size(), while base::StringPiece::operator[] is not.
static constexpr llvm::StringRef kMatchingStringOperators[] = {
"==", "!=", "<", ">", "<=", ">=", "<<",
};
auto string_piece_construct_expr = cxxConstructExpr(hasDeclaration(
cxxConstructorDecl(ofClass(hasName("::base::BasicStringPiece")))));
auto matching_string_member_expr =
memberExpr(member(hasAnyName(kMatchingStringMembers))).bind("member");
auto matching_string_operator_call_expr = cxxOperatorCallExpr(
hasAnyOverloadedOperatorName(kMatchingStringOperators));
auto string_construct_expr = cxxConstructExpr(hasDeclaration(
cxxConstructorDecl(ofClass(hasName("::std::basic_string")))));
// Matches the explicit construction of a string variable, i.e. not making use
// of C-style assignment syntax.
auto explicit_string_var_construct_expr = cxxConstructExpr(
string_construct_expr,
hasParent(exprWithCleanups(
hasParent(varDecl(unless(hasInitStyle(clang::VarDecl::CInit)))))));
auto string_class_member_construct_expr = cxxConstructExpr(
string_construct_expr,
hasParent(exprWithCleanups(hasParent(cxxConstructorDecl()))));
auto matcher = materializeTemporaryExpr(
GetAsStringMatcher(),
anyOf(
// Case (1)
hasParent(string_piece_construct_expr),
// Case (2a)
hasParent(matching_string_member_expr),
// Const APIs like `size()` or `find()` add an extra implicit cast
// to const std::string here, that we need to ignore.
hasParent(implicitCastExpr(hasParent(matching_string_member_expr))),
// Case (2b)
hasParent(matching_string_operator_call_expr),
// Case (3a)
hasParent(explicit_string_var_construct_expr),
// Case (3b)
hasParent(string_class_member_construct_expr)));
return makeRule(
matcher,
// In case there is a bound member expression, construct an access
// expression into the string piece. This is required to handle
// expressions like `piece_ptr->as_string().some_member()` correctly.
ifBound("member",
changeTo(node("member"), access("piece", cat(member("member")))),
changeTo(maybeDerefSmart("piece"))));
}
// Returns a consumer that adds `change` to `changes` if present.
Transformer::ChangeConsumer GetConsumer(AtomicChanges& changes) {
return [&changes](llvm::Expected<AtomicChange> change) {
if (change)
changes.push_back(*change);
};
}
} // namespace
int main(int argc, const char* argv[]) {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmParser();
static llvm::cl::OptionCategory tool_options("Tool options");
clang::tooling::CommonOptionsParser options(argc, argv, tool_options);
clang::tooling::ClangTool tool(options.getCompilations(),
options.getSourcePathList());
// Combine the above rules into a single one and add an include for the right
// header.
RewriteRule as_string_rule = applyFirst({
RemoveAsStringRule(),
RewriteImplicitToExplicitStringConstructionRule(),
ReplaceAsStringWithExplicitStringConversionRule(),
});
addInclude(as_string_rule, "base/strings/string_piece.h");
AtomicChanges changes;
Transformer transformer(as_string_rule, GetConsumer(changes));
MatchFinder match_finder;
transformer.registerMatchers(&match_finder);
auto factory = clang::tooling::newFrontendActionFactory(&match_finder);
int result = tool.run(factory.get());
if (result != 0)
return result;
if (changes.empty())
return 0;
// Serialization format is documented in tools/clang/scripts/run_tool.py
llvm::outs() << "==== BEGIN EDITS ====\n";
for (const auto& change : changes) {
for (const auto& r : change.getReplacements()) {
std::string replacement(r.getReplacementText());
std::replace(replacement.begin(), replacement.end(), '\n', '\0');
llvm::outs() << "r:::" << r.getFilePath() << ":::" << r.getOffset()
<< ":::" << r.getLength() << ":::" << replacement << "\n";
}
for (const auto& header : change.getInsertedHeaders()) {
llvm::outs() << "include-user-header:::" << change.getFilePath()
<< ":::-1:::-1:::" << header << "\n";
}
}
llvm::outs() << "==== END EDITS ====\n";
return 0;
}
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <iostream>
#include <memory>
#include "base/strings/string_piece.h"
void RemoveAsStringInExplicitStringConstruction() {
base::StringPiece piece = "Hello";
std::string str{piece};
}
void RemoveAsStringWhenConstructingStringPiece() {
auto* piece_ptr = new base::StringPiece("Hello");
const base::StringPiece& piece_ref = *piece_ptr;
}
void RemoveAsStringForMembers() {
base::StringPiece piece = "Hello";
piece.begin();
piece.cbegin();
piece.end();
piece.cend();
piece.rbegin();
piece.crbegin();
piece.rend();
piece.crend();
piece.at(0);
piece.front();
piece.back();
piece.size();
auto* piece_ptr = &piece;
piece_ptr->length();
piece_ptr->max_size();
piece_ptr->empty();
piece_ptr->copy(nullptr, 0);
piece_ptr->compare(std::string(*piece_ptr));
piece_ptr->find('\0');
piece_ptr->rfind('\0');
piece_ptr->find_first_of('\0');
piece_ptr->find_last_of('\0');
piece_ptr->find_first_not_of('\0');
piece_ptr->find_last_not_of('\0');
piece_ptr->npos;
// Negative tests, where simply removing as_string() is incorrect. It should
// rather be replaced by an explicit std::string construction.
std::string(piece).data();
std::string(*piece_ptr).substr(0);
}
void RemoveAsStringForOperators() {
base::StringPiece piece = "Hello";
std::cout << piece;
piece == "Hello";
piece != "Hello";
piece < "Hello";
piece > "Hello";
piece <= "Hello";
piece >= "Hello";
// Negative tests, where simply removing as_string() is incorrect. It should
// rather be replaced by an explicit std::string construction.
std::string(piece) += "Hello";
std::string(piece) + "Hello";
std::string(piece) = "Hello";
std::string(piece)[0];
}
void RemoveAsStringWhenConstructingStringMember() {
class S {
public:
explicit S(base::StringPiece piece) : str_(piece) {}
private:
std::string str_;
};
}
void RewriteCStyleStringInitialization() {
auto piece_ptr = std::make_unique<base::StringPiece>("Hello");
const std::string str(*piece_ptr);
}
void ReplaceAsStringWithStringConstructor() {
auto piece_ptr = std::make_unique<base::StringPiece>("Hello");
std::string str = std::string(*piece_ptr).append(" World");
}
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <iostream>
#include <memory>
#include "base/strings/string_piece.h"
void RemoveAsStringInExplicitStringConstruction() {
base::StringPiece piece = "Hello";
std::string str{piece.as_string()};
}
void RemoveAsStringWhenConstructingStringPiece() {
auto* piece_ptr = new base::StringPiece("Hello");
const base::StringPiece& piece_ref = piece_ptr->as_string();
}
void RemoveAsStringForMembers() {
base::StringPiece piece = "Hello";
piece.as_string().begin();
piece.as_string().cbegin();
piece.as_string().end();
piece.as_string().cend();
piece.as_string().rbegin();
piece.as_string().crbegin();
piece.as_string().rend();
piece.as_string().crend();
piece.as_string().at(0);
piece.as_string().front();
piece.as_string().back();
piece.as_string().size();
auto* piece_ptr = &piece;
piece_ptr->as_string().length();
piece_ptr->as_string().max_size();
piece_ptr->as_string().empty();
piece_ptr->as_string().copy(nullptr, 0);
piece_ptr->as_string().compare(piece_ptr->as_string());
piece_ptr->as_string().find('\0');
piece_ptr->as_string().rfind('\0');
piece_ptr->as_string().find_first_of('\0');
piece_ptr->as_string().find_last_of('\0');
piece_ptr->as_string().find_first_not_of('\0');
piece_ptr->as_string().find_last_not_of('\0');
piece_ptr->as_string().npos;
// Negative tests, where simply removing as_string() is incorrect. It should
// rather be replaced by an explicit std::string construction.
piece.as_string().data();
piece_ptr->as_string().substr(0);
}
void RemoveAsStringForOperators() {
base::StringPiece piece = "Hello";
std::cout << piece.as_string();
piece.as_string() == "Hello";
piece.as_string() != "Hello";
piece.as_string() < "Hello";
piece.as_string() > "Hello";
piece.as_string() <= "Hello";
piece.as_string() >= "Hello";
// Negative tests, where simply removing as_string() is incorrect. It should
// rather be replaced by an explicit std::string construction.
piece.as_string() += "Hello";
piece.as_string() + "Hello";
piece.as_string() = "Hello";
piece.as_string()[0];
}
void RemoveAsStringWhenConstructingStringMember() {
class S {
public:
explicit S(base::StringPiece piece) : str_(piece.as_string()) {}
private:
std::string str_;
};
}
void RewriteCStyleStringInitialization() {
auto piece_ptr = std::make_unique<base::StringPiece>("Hello");
const std::string str = piece_ptr->as_string();
}
void ReplaceAsStringWithStringConstructor() {
auto piece_ptr = std::make_unique<base::StringPiece>("Hello");
std::string str = piece_ptr->as_string().append(" World");
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment