diff --git a/llvm/include/llvm/Support/SimpleTable.h b/llvm/include/llvm/Support/SimpleTable.h new file mode 100644 index 0000000000000..85ce21beb53ca --- /dev/null +++ b/llvm/include/llvm/Support/SimpleTable.h @@ -0,0 +1,147 @@ +//==-- SimpleTable.h -- tabular data simple transforms and I/O -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines a simple model for a tabular data container with simple operations +// over rows and columns. Columns are named and referenced by name. +// Major use case is to model dynamically-sized "2D" sets of output files by +// tools like post-link and being able to manipulate columns - for example +// replace a column listing files with bitcode with a column of .spv files. +// +// TODO May make sense to make the interface SQL-like in future if evolves. +// TODO Use YAML as serialization format. +// TODO Today cells are strings, but can be extended to other commonly used +// types such as integers. +// +// Example of a table: +// [Code|Symbols|Properties] +// a_0.bc|a_0.sym|a_0.props +// a_1.bc|a_1.sym|a_1.props +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_SIMPLETABLE_H +#define LLVM_SUPPORT_SIMPLETABLE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +#include +#include +#include +#include + +namespace llvm { +namespace util { + +// The tabular data abstraction. +// TODO Supports only cells of string type only for now. +class SimpleTable { +public: + using UPtrTy = std::unique_ptr; + + // A single row in the table. Basically a vector of string data cells. + class Row { + public: + Row() = default; + + Row(SimpleTable *Parent, int NCols) : Parent(Parent) { + Cells.resize(NCols); + } + StringRef getCell(StringRef ColName) const; + StringRef getCell(StringRef ColName, StringRef DefaultVal) const; + + void setParent(SimpleTable *P) { + assert(Parent == nullptr && "parent already set"); + Parent = P; + } + + private: + friend class SimpleTable; + + Row(SimpleTable *Parent) : Parent(Parent) {} + + Row(SimpleTable *Parent, ArrayRef R) : Row(Parent) { + for (auto Cell : R) + Cells.emplace_back(Cell.str()); + } + + std::string &operator[](int I) { return Cells[I]; } + + const std::string &operator[](int I) const { return Cells[I]; } + + private: + std::vector Cells; + SimpleTable *Parent; + }; + +public: + SimpleTable() = default; + static Expected create(ArrayRef ColNames); + static Expected create(int NColumns); + int getNumColumns() const { return static_cast(ColumnNames.size()); } + int getNumRows() const { return static_cast(rows().size()); } + + // Add a column with given title and assign cells to given values. The table + // must be empty or the number of the input cells must match column size. + Error addColumn(const Twine &Title, ArrayRef Cells); + Error addColumn(const Twine &Title, ArrayRef Cells); + + // Replaces a column in this table with another column of the same size from + // another table. Columns are identified by their names. If source column name + // is empty, it is assumed to match the source's name. + Error replaceColumn(StringRef Name, const SimpleTable &Src, + StringRef SrcName = ""); + + // Renames a column. + Error renameColumn(StringRef OldName, StringRef NewName); + + // Removes all columns except those with given names. + Error peelColumns(ArrayRef ColNames); + + // Iterates all cells top-down lef-right and adds their values to given + // container. + void linearize(std::vector &Res) const; + + // Serialized the table to a stream. + void write(raw_ostream &Out, bool WriteTitles = true, + char ColSep = '|') const; + + // De-serializes a table from a stream. + static Expected read(MemoryBuffer *Buf, char ColSep = '|'); + + // De-serializes a table from a file. + static Expected read(const Twine &FileName, char ColSep = '|'); + + const SmallVectorImpl &rows() const { return Rows; } + + void addRow(ArrayRef R) { + assert((R.size() == ColumnNames.size()) && "column number mismatch"); + Rows.emplace_back(Row(this, R)); + } + + int getColumnId(StringRef ColName) const; + + Row &operator[](int I) { return Rows[I]; } + const Row &operator[](int I) const { return Rows[I]; } + +private: + Error addColumnName(StringRef ColName); + void rebuildName2NumMapping(); + + std::map ColumnName2Num; + // Use list as the holder of string objects as modification never invalidate + // element addresses and iterators, unlike vector. + std::list ColumnNames; + SmallVector::iterator, 4> ColumnNum2Name; + SmallVector Rows; +}; + +} // namespace util +} // namespace llvm + +#endif // LLVM_SUPPORT_SIMPLETABLE_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 75a62f45da366..158cd92c23b53 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -125,6 +125,7 @@ add_llvm_component_library(LLVMSupport ScopedPrinter.cpp SHA1.cpp Signposts.cpp + SimpleTable.cpp SmallPtrSet.cpp SmallVector.cpp SourceMgr.cpp diff --git a/llvm/lib/Support/SimpleTable.cpp b/llvm/lib/Support/SimpleTable.cpp new file mode 100644 index 0000000000000..0f65686a61f23 --- /dev/null +++ b/llvm/lib/Support/SimpleTable.cpp @@ -0,0 +1,245 @@ +//==-- SimpleTable.cpp -- tabular data simple transforms and I/O -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SimpleTable.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" + +#include +#include +#include + +using namespace llvm; + +static Error makeError(const Twine &Msg) { + return createStringError(errc::invalid_argument, Msg); +} + +namespace llvm { +namespace util { + +StringRef SimpleTable::Row::getCell(StringRef ColName) const { + int I = Parent->getColumnId(ColName); + assert(I >= 0 && "column name not found"); + return Cells[I]; +} + +StringRef SimpleTable::Row::getCell(StringRef ColName, + StringRef DefaultVal) const { + int I = Parent->getColumnId(ColName); + return (I >= 0) ? Cells[I] : DefaultVal; +} + +Expected +SimpleTable::create(ArrayRef ColNames) { + auto Res = std::make_unique(); + + for (auto N : ColNames) + if (Error Err = Res->addColumnName(N)) + return std::move(Err); + return std::move(Res); +} + +Expected SimpleTable::create(int NColumns) { + auto Res = std::make_unique(); + + for (int I = 0; I < NColumns; I++) + if (Error Err = Res->addColumnName(Twine(I).str())) + return std::move(Err); + return std::move(Res); +} + +int SimpleTable::getColumnId(StringRef ColName) const { + auto It = ColumnName2Num.find(ColName); + return (It != ColumnName2Num.end()) ? It->second : -1; +} + +Error SimpleTable::addColumnName(StringRef ColName) { + if (ColumnName2Num.find(ColName) != ColumnName2Num.end()) + return makeError("column already exists" + ColName); + ColumnNames.emplace_back(ColName.str()); + ColumnName2Num[ColumnNames.back()] = static_cast(ColumnNames.size()) - 1; + ColumnNum2Name.push_back(std::prev(ColumnNames.end())); + return Error::success(); +} + +Error SimpleTable::addColumn(const Twine &Title, ArrayRef Cells) { + const auto N = Cells.size(); + if (!Rows.empty() && (Rows.size() != N)) + return makeError("column size mismatch for " + Title); + if (Error Err = addColumnName(Title.str())) + return std::move(Err); + if (Rows.empty()) { + Rows.resize(Cells.size()); + for (auto &R : Rows) + R.setParent(this); + } + int I = 0; + + for (auto &R : Rows) + R.Cells.push_back(Cells[I++]); + return Error::success(); +} + +Error SimpleTable::addColumn(const Twine &Title, ArrayRef Cells) { + std::vector CellsVec(Cells.begin(), Cells.end()); + return addColumn(Title, CellsVec); +} + +Error SimpleTable::replaceColumn(StringRef Name, const SimpleTable &Src, + StringRef SrcName) { + if (Rows.size() != Src.rows().size()) + return makeError("column length mismatch for '" + Name + "' and '" + + SrcName + "'"); + if ((getNumColumns() == 0) && (Src.getNumColumns() == 0)) + return makeError("empty table"); + int Cdst = getNumColumns() > 1 ? getColumnId(Name) : 0; + int Csrc = Src.getNumColumns() > 1 ? Src.getColumnId(SrcName) : 0; + + for (unsigned R = 0; R < Rows.size(); ++R) + Rows[R][Cdst] = Src[R][Csrc]; + return Error::success(); +} + +Error SimpleTable::renameColumn(StringRef OldName, StringRef NewName) { + int I = getColumnId(OldName); + + if (I < 0) + return makeError("column not found: " + OldName); + *ColumnNum2Name[I] = std::move(NewName.str()); + ColumnName2Num.erase(OldName); + ColumnName2Num[StringRef(*ColumnNum2Name[I])] = I; + return Error::success(); +} + +void SimpleTable::rebuildName2NumMapping() { + int Ind = 0; + ColumnNum2Name.resize(ColumnNames.size()); + + for (auto It = ColumnNames.begin(); It != ColumnNames.end(); It++, ++Ind) { + ColumnNum2Name[Ind] = It; + ColumnName2Num[*It] = Ind; + } +} + +Error SimpleTable::peelColumns(ArrayRef ColNames) { + std::set Names(ColNames.begin(), ColNames.end()); + + if (Names.size() != ColNames.size()) + return makeError("duplicated column names found"); + + // go backwards not to affect prior column numbers + for (int Col = getNumColumns() - 1; Col >= 0; --Col) { + std::list::iterator Iter = ColumnNum2Name[Col]; + // see if current column is among those which will stay + if (Names.erase(StringRef(*Iter)) > 0) + continue; // yes + // no - remove from titles (ColumnNum2Name will be updated in rebuild below) + ColumnName2Num.erase(*Iter); + ColumnNames.erase(Iter); + // ... and from data + for (int Row = 0; Row < getNumRows(); ++Row) + Rows[Row].Cells.erase(Rows[Row].Cells.begin() + Col); + } + if (Names.size() > 0) + return makeError("column not found " + *Names.begin()); + rebuildName2NumMapping(); + return Error::success(); +} + +void SimpleTable::linearize(std::vector &Res) const { + for (const auto &R : Rows) + for (const auto &C : R.Cells) + Res.push_back(C); +} + +static constexpr char COL_TITLE_LINE_OPEN[] = "["; +static constexpr char COL_TITLE_LINE_CLOSE[] = "]"; +static constexpr char ROW_SEP[] = "\n"; + +void SimpleTable::write(raw_ostream &Out, bool WriteTitles, char ColSep) const { + if (WriteTitles) { + Out << COL_TITLE_LINE_OPEN; + + for (unsigned I = 0; I < ColumnNames.size(); ++I) { + if (I != 0) + Out << ColSep; + Out << *ColumnNum2Name[I]; + } + Out << COL_TITLE_LINE_CLOSE << ROW_SEP; + } + const unsigned N = ColumnNames.size(); + + for (unsigned I = 0; I < Rows.size(); ++I) { + const auto &R = Rows[I]; + + for (unsigned J = 0; J < N; ++J) { + if (J != 0) + Out << ColSep; + Out << R.Cells[J]; + } + Out << ROW_SEP; + } +} + +Expected SimpleTable::read(MemoryBuffer *Buf, + char ColSep) { + line_iterator LI(*Buf); + + if (LI.is_at_end() || LI->empty()) // empty table + return std::make_unique(); + UPtrTy Res; + + if (LI->startswith(COL_TITLE_LINE_OPEN)) { + if (!LI->endswith(COL_TITLE_LINE_CLOSE)) + return createStringError(errc::invalid_argument, "malformed title line"); + // column titles present + StringRef L = LI->substr(1, LI->size() - 2); // trim '[' and ']' + SmallVector Titles; + L.split(Titles, ColSep); + auto Table = SimpleTable::create(Titles); + if (!Table) + return Table.takeError(); + Res = std::move(Table.get()); + LI++; + } + // parse rows + while (!LI.is_at_end()) { + SmallVector Vals; + LI->split(Vals, ColSep); + + if (!Res) { + auto Table = SimpleTable::create(Vals.size()); + if (!Table) + return Table.takeError(); + Res = std::move(Table.get()); + } + if (static_cast(Vals.size()) != Res->getNumColumns()) + return createStringError(errc::invalid_argument, + "row size mismatch at line " + + Twine(LI.line_number())); + Res->addRow(Vals); + LI++; + } + return std::move(Res); +} + +Expected SimpleTable::read(const Twine &FileName, + char ColSep) { + llvm::ErrorOr> MemBuf = + MemoryBuffer::getFileAsStream(FileName); + if (!MemBuf || !MemBuf->get()) + return createFileError(Twine("can't read ") + FileName, MemBuf.getError()); + return read(MemBuf->get(), ColSep); +} + +} // namespace util +} // namespace llvm diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index ebb7aaa3ca753..0fcfdb1ae8326 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -59,6 +59,7 @@ add_llvm_unittest(SupportTests ReverseIterationTest.cpp ReplaceFileTest.cpp ScaledNumberTest.cpp + SimpleTableTest.cpp SourceMgrTest.cpp SpecialCaseListTest.cpp StringPool.cpp diff --git a/llvm/unittests/Support/SimpleTableTest.cpp b/llvm/unittests/Support/SimpleTableTest.cpp new file mode 100644 index 0000000000000..c17ea89ddfe28 --- /dev/null +++ b/llvm/unittests/Support/SimpleTableTest.cpp @@ -0,0 +1,85 @@ +//===- llvm/unittest/Support/SimpleTableTest.cpp -- Simple table tests ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SimpleTable.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::util; + +namespace { + +TEST(SimpleTable, IO) { + auto Content = "[Code|Symbols|Properties]\n" + "a_0.bc|a_0.sym|a_0.props\n" + "a_1.bc|a_1.sym|a_1.props\n"; + auto MemBuf = MemoryBuffer::getMemBuffer(Content); + // Parse a property set registry + auto Table = SimpleTable::read(MemBuf.get()); + + if (!Table) + FAIL() << "SimpleTable::read failed\n"; + + std::string Serialized; + { + llvm::raw_string_ostream OS(Serialized); + // Serialize + Table->get()->write(OS); + } + // Check that the original and the serialized version are equal + ASSERT_EQ(Serialized, Content); +} + +TEST(SimpleTable, Operations) { + auto Content = "[Code|Symbols|Properties]\n" + "a_0.bc|a_0.sym|a_0.props\n" + "a_1.bc|a_1.sym|a_1.props\n"; + + auto ReplaceCodeWith = "a_0.spv\n" + "a_1.spv\n"; + + auto MemBuf = MemoryBuffer::getMemBuffer(Content); + auto MemBufRepl = MemoryBuffer::getMemBuffer(ReplaceCodeWith); + // Create tables from the strings above + auto Table = SimpleTable::read(MemBuf.get()); + auto TableRepl = SimpleTable::read(MemBufRepl.get()); + + if (!Table || !TableRepl) + FAIL() << "SimpleTable::read failed\n"; + + // Perform operations + // -- Replace + if (Error Err = Table->get()->replaceColumn("Code", *TableRepl->get(), "")) + FAIL() << "SimpleTable::replaceColumn failed: " << Err << "\n"; + + // -- Add + SmallVector NewCol = {"a_0.mnf", "a_1.mnf"}; + if (Error Err = Table->get()->addColumn("Manifest", NewCol)) + FAIL() << "SimpleTable::addColumn failed: " << Err << "\n"; + + // -- Peel + if (Error Err = Table->get()->peelColumns({"Code", "Properties", "Manifest"})) + FAIL() << "SimpleTable::peelColumns failed: " << Err << "\n"; + + // Check the result + std::string Result; + { + llvm::raw_string_ostream OS(Result); + // Serialize + Table->get()->write(OS); + } + auto Expected = "[Code|Properties|Manifest]\n" + "a_0.spv|a_0.props|a_0.mnf\n" + "a_1.spv|a_1.props|a_1.mnf\n"; + ASSERT_EQ(Result, Expected); +} + +} // namespace