LLVM Pass转储类或结构的内存布局

一、背景介绍

有次因故需要了解std::string类型内存布局,简单折腾一番,分享了一篇

《GDB查看结构或类的内存布局及分离终端》
https://scz.617.cn/unix/202411151604.txt

bluerust随即让我看下面这篇

STL容器逆向与实战 – [2023-02-07]
https://mp.weixin.qq.com/s/bfzeGbieYWaPS3_iB-gSeg

他的原话是,主要看”llvm pass dump data type”。看了这篇,于我而言,属于”每个字都认识”系列,大概明白其基本原理是啥,但完全不了解所涉及的”LLVM Pass”技术。我不会C++编程,基本未碰上过C++ STL容器逆向需求,不在意上文中那些具体容器的实现细节。我感兴趣的是,如何转储类或结构的内存布局,也就是上文第一部分的内容。本文面向”LLVM Pass”小白提供完整可操作示例,聚焦”转储内存布局”。

二、dumpclass.cpp

参看

————————————————————————–
Writing an LLVM Pass (legacy PM version)
https://llvm.org/docs/WritingAnLLVMPass.html

Writing an LLVM Pass
https://llvm.org/docs/WritingAnLLVMNewPMPass.html
————————————————————————–

看雪那篇是Legacy格式的”LLVM Pass”,此处dumpclass.cpp改写成New格式。支持两个命令行参数,允许成员名中包含相对偏移或绝对偏移,允许过滤类或结构名。

————————————————————————–
#include “llvm/Passes/PassBuilder.h”
#include “llvm/Passes/PassPlugin.h”
#include “llvm/Support/raw_ostream.h”

#define DEFAULTSUBSTR “<default>”

using namespace llvm;

namespace {

static cl::opt<int> passmode
(
“passmode”,
cl::desc(“absolute offset or not”),
cl::value_desc(“int”),
cl::init(0)
);

static cl::opt<std::string> substr
(
“substr”,
cl::desc(“part of struct name”),
cl::value_desc(“std::string”),
cl::init(DEFAULTSUBSTR)
);

struct DumpClass : PassInfoMixin<DumpClass>
{

std::string getTypeName ( Type *type, const DataLayout &data )
{
if ( type->isIntegerTy() )
{
IntegerType *i = cast<IntegerType>( type );

return “uint” + std::to_string( i->getBitWidth() ) + “_t”;
}
else if ( type->isPointerTy() )
{
PointerType *ptr = cast<PointerType>( type );

return getTypeName( ptr->getPointerElementType(), data ) + “*”;
}
else if ( type->isArrayTy() )
{
ArrayType *arr = cast<ArrayType>( type );

return getTypeName( arr->getArrayElementType(), data ) + “[” + std::to_string( arr->getArrayNumElements() ) + “]”;
}
else if ( type->isFloatTy() )
{
return “float”;
}
else if ( type->isStructTy() )
{
StructType *stc = cast<StructType>( type );

return std::string( stc->getStructName() );
}
else
{
return “unknown_” + std::to_string( data.getTypeAllocSizeInBits( type ) );
}
}

void dumpType ( int depth, Type *type, const std::string &suffix, const DataLayout *data, unsigned base, int mode )
{
std::string blank( depth * 4, ‘ ‘ );

if ( type->isStructTy() )
{
StructType *stc = cast<StructType>( type );
const StructLayout *sl = data->getStructLayout( stc );

errs() << blank + stc->getStructName() + “\n” + blank + “{\n”;
for ( size_t i = 0; i < stc->getStructNumElements(); i++ )
{
Type *subType = stc->getStructElementType( i );
unsigned offset = sl->getElementOffset( i );
unsigned size = data->getTypeAllocSize( subType );

if ( mode > 0 )
{
offset += base;
dumpType( depth+1, subType, std::to_string(offset)+”_”+std::to_string(size), data, offset, mode );
}
else
{
dumpType( depth+1, subType, std::to_string(offset)+”_”+std::to_string(size), data, 0, mode );
}
}
errs() << blank + “} field_” + suffix + “;\n”;
}
else
{
errs() << blank + getTypeName( type, *data ) + ” field_” + suffix + “;\n”;
}
}

void visitor ( Function &F )
{
if ( F.getName() != “main” )
{
return;
}

std::set<StructType*> types;
const DataLayout &data = F.getParent()->getDataLayout();

for ( auto &B : F )
{
for ( auto &I : B )
{
if ( auto *A = dyn_cast<AllocaInst>( &I ) )
{
Type *type = A->getAllocatedType();
if ( type->isStructTy() )
{
StructType *stc = cast<StructType>( type );

if ( stc->isOpaque() )
{
continue;
}
std::string struct_name
= std::string( stc->getStructName() );
if ( substr != DEFAULTSUBSTR && struct_name.find( substr ) == std::string::npos )
{
continue;
}
types.insert( stc );
}
}
}
}

int index = 0;

for ( StructType *type : types )
{
dumpType( 0, type, std::to_string( index++ ), &data, 0, passmode );
}
}

PreservedAnalyses run ( Function &F, FunctionAnalysisManager &FAM )
{
visitor( F );
return PreservedAnalyses::all();
}

};

}

PassPluginLibraryInfo getDumpClassPluginInfo ()
{
const auto callback = []( PassBuilder &PB )
{
PB.registerPipelineParsingCallback
(
[](
StringRef Name,
FunctionPassManager &FPM,
ArrayRef<PassBuilder::PipelineElement>
)
{
if ( Name == “DumpClass” )
{
FPM.addPass( DumpClass() );
return true;
}
return false;
}
);
PB.registerPipelineStartEPCallback
(
[&]( ModulePassManager &MPM, auto )
{
FunctionPassManager FPM;

FPM.addPass( DumpClass() );
MPM.addPass( createModuleToFunctionPassAdaptor( std::move( FPM ) ) );
return true;
}
);
};

return { LLVM_PLUGIN_API_VERSION, “DumpClass”, LLVM_VERSION_STRING, callback };
}

extern “C” LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo ()
{
return getDumpClassPluginInfo();
}
————————————————————————–

从dumpclass.cpp生成dumpclass.so

clang-14 \
-I”/usr/include/llvm-14″ \
-I”/usr/include/llvm-c-14″ \
-Wall -pipe \
-fPIC -shared -Wl,-soname,dumpclass.so \
-O3 -s \
-o dumpclass.so dumpclass.cpp

后面会演示如何将dumpclass.so用作”LLVM Pass”来转储类或结构的内存布局。

三、dumptarget.cpp

dumptarget.cpp是假想的目标程序,将来根据dumptarget.cpp转储其中的类或结构。

————————————————————————–
#include <deque>
#include <map>
#include <unordered_map>
#include <string>
#include <iostream>

class TargetClass
{
private:
std::string unused;
public:
std::deque<std::map<int, std::unordered_map<std::string, int>>> myDeque;
std::map<int, std::unordered_map<std::string, int>> myMap;
};

int main ( int argc, char * argv[] )
{
TargetClass obj;

obj.myMap[1][“one”] = 1;
obj.myMap[2][“two”] = 2;

obj.myDeque.push_back( obj.myMap );

for ( const auto &d : obj.myDeque )
{
for ( const auto &pair : d )
{
std::cout << “Key : ” << pair.first << ” -> Value : “;
for ( const auto &innerpair : pair.second )
{
std::cout << innerpair.first << ” -> ” << innerpair.second;
}
std::cout << std::endl;
}
}

return 0;
}
————————————————————————–

四、用dumpclass.so处理dumptarget.cpp

有多种办法加载dumpclass.so,此处演示其中之一,依次执行这两条命令

clang-14 \
-Wall -pipe -S -emit-llvm \
-Xclang -disable-O0-optnone \
-o dumptarget.ll dumptarget.cpp

opt-14 \
-disable-output \
-load ./dumpclass.so -load-pass-plugin ./dumpclass.so \
-passes=DumpClass -passmode=1 -substr=”::basic_string” \
dumptarget.ll 2>&1 | less

先从dumptarget.cpp生成dumptarget.ll,再用dumpclass.so处理dumptarget.ll。正常情况下会得到

————————————————————————–
class.std::__cxx11::basic_string
{
struct.std::__cxx11::basic_string<char>::_Alloc_hider
{
uint8_t* field_0_8;
} field_0_8;
uint64_t field_8_8;
union.anon
{
uint64_t field_16_8;
uint8_t[8] field_24_8;
} field_16_16;
} field_0;
————————————————————————–

尝试不给opt指定passmode、substr参数,观察输出,加强理解。

五、pahole

pahole也能转储类或结构的内存布局,不如dumpclass.cpp,出于完备性写在此处。

g++ -Wall -pipe -std=c++11 -O0 -g -o dumptarget_dbg dumptarget.cpp

pahole –hex -E -M -C TargetClass dumptarget_dbg | grep -A 25 “class basic_string”

正常情况下会得到

————————————————————————–
/* typedef string */ class basic_string<char, std::char_traits<char>, std::allocator<char> > {
struct _Alloc_hider : allocator<char> {
/* class allocator<char> : public new_allocator<char> {
public:

/* class new_allocator<char> {
public:

}<ancestor>; */ /* 0 0 */

/* XXX last struct has 1 byte of padding */
}<ancestor>; */ /* 0 0x1 */

/* XXX last struct has 1 byte of padding */
/* XXX 65535 bytes hole, try to pack */

/* typedef pointer -> pointer -> pointer */ char * _M_p; /* 0 0x8 */
}_M_dataplus; /* 0 0x8 */
/* typedef size_type -> size_type -> size_type -> size_t */ long unsigned int _M_string_length; /* 0x8 0x8 */
union {
char _M_local_buf[16]; /* 0x10 0x10 */
/* typedef size_type -> size_type -> size_type -> size_t */ long unsigned int _M_allocated_capacity; /* 0x10 0x8 */
}; /* 0x10 0x10 */
public:

} unused; /* 0 0x20 */
————————————————————————–

六、clang -Xclang -fdump-record-layouts

clang -Xclang -fdump-record-layouts dumptarget.cpp 2> /dev/null | grep -A 10 “0 | class std::basic_string” | less

正常情况下会得到

————————————————————————–
0 | class std::basic_string<char>
0 | struct std::basic_string<char>::_Alloc_hider _M_dataplus
0 | class std::allocator<char> (base) (empty)
0 | class __gnu_cxx::new_allocator<char> (base) (empty)
0 | std::basic_string<char>::pointer _M_p
8 | std::basic_string<char>::size_type _M_string_length
16 | union std::basic_string<char>::(anonymous at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:179:7)
16 | char[16] _M_local_buf
16 | std::basic_string<char>::size_type _M_allocated_capacity
| [sizeof=32, dsize=32, align=8,
| nvsize=32, nvalign=8]

————————————————————————–

七、VC有隐藏选项

假设VirtualBaseClass.cpp如下

————————————————————————–
#include <stdio.h>
#include <windows.h>

class Base
{
public:
int x;
};

class Derived1 : virtual public Base
{
public:
int y;
};

class Derived2 : virtual public Base
{
public:
int z;
};

class Multiple : public Derived1, public Derived2
{
public:
int w;
};

int __cdecl main ( int argc, char * argv[] )
{
Multiple m;

m.x = 10;
return 0;
}
————————————————————————–

VC编译时有隐藏选项,查看C++类的内存布局

cl /d1reportSingleClassLayoutBase VirtualBaseClass.cpp
cl /d1reportSingleClassLayoutDerived1 VirtualBaseClass.cpp
cl /d1reportSingleClassLayoutDerived2 VirtualBaseClass.cpp
cl /d1reportSingleClassLayoutDerived VirtualBaseClass.cpp (子串匹配)
cl /d1reportSingleClassLayoutMultiple VirtualBaseClass.cpp
cl /d1reportAllClassLayout VirtualBaseClass.cpp (输出太多,慎用)

用ASCII图显示内存布局,向stdout输出,不影响其他编译选项。

Spread the word. Share this post!

Meet The Author