从ELF抠代码

标题: Unix系列(19)–从ELF抠代码

创建: 2025-09-13 09:30
修改: 2025-09-28 11:55

————————————————————————–

目录:

☆ 原始需求
☆ 交叉编译环境
1) gcc
☆ 自制测试用例
1) hello.c
☆ 手工构造ELF
1) hello_arm64.json
3) skeleton.c
4) smallelf_arm64_1.py
☆ 参考资源

————————————————————————–

☆ 原始需求

有个ARM64的ELF,并不打算真地执行它,只是Angr符号执行时需要处理其中某个目标
函数,额外涉及strlen()这种库函数。这个ELF很大,想用某种手段切掉大量无关函
数,只保留目标函数及必要的ELF信息,将切割剩下的ELF留作测试样本。这个需求有
什么好的满足方式?不想留一个24MB的so做测试样本。

计划在x64中交叉编译ARM64版hello.c,其中含有若干目标函数,作为后续研究对象。
摸索出各种解决方案后,再实测初始样本。

☆ 交叉编译环境

1) gcc

参[1],下载交叉编译工具链。

mkdir /home/scz/src/aarch64-none-linux-gnu
cd /home/scz/src/aarch64-none-linux-gnu
tar xfJ /tmp/arm-gnu-toolchain-14.3.rel1-x86_64-aarch64-none-linux-gnu.tar.xz –strip-components=1

“–strip-components=N”必须放在尾部,解包时将路径的前N层目录去掉。

☆ 自制测试用例

1) hello.c

————————————————————————–
#if 0

export PATH=/home/scz/src/aarch64-none-linux-gnu/bin:$PATH
aarch64-none-linux-gnu-gcc -Wall -pipe -O3 -s -o hello_arm64 hello.c

#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

__attribute__((optimize(“O0”), noinline, used))
static unsigned int bar ( char *in, char *out, char key )
{
int i;

for ( i = 0; i < strlen( in ); i++ )
{
out[i] = in[i] ^ key;
}
return 0x5120LL;
}

__attribute__((optimize(“O0”), noinline, used))
static unsigned int baz ( char *in, char *out, char key )
{
int i;

for ( i = 0; i < strlen( in ); i++ )
{
out[i] = in[i] ^ key;
}
return 0x1314LL;
}

__attribute__((optimize(“O0”), noinline, used))
static unsigned int foo ( unsigned int n )
{
unsigned int mod = n % 4;
unsigned int ret = 0;

if ( mod == 0 )
{
ret = ( n | 0xbaaad0bf ) * ( 2 ^ n );
}
else if ( mod == 1 )
{
ret = ( n & 0xbaaad0bf ) * ( 3 + n );
}
else if ( mod == 2 )
{
ret = ( n ^ 0xbaaad0bf ) * ( 4 | n );
}
else
{
ret = ( n + 0xbaaad0bf ) * ( 5 & n );
}
return ret;
}

__attribute__((optimize(“O0”)))
int main ( int argc, char * argv[] )
{
unsigned int n,
key;

if ( argc < 2 )
{
fprintf( stderr, “Usage: %s <num>\n”, argv[0] );
return -1;
}
n = (unsigned int)strtoul( argv[1], NULL, 0 );
key = foo( n );
fprintf( stdout, “n=%#x key=%#x\n”, n, key );
n = bar( argv[0], argv[0], key );
fprintf( stdout, “n=%#x\n”, n );
n = baz( argv[0], argv[0], key );
fprintf( stdout, “n=%#x\n”, n );
return 0;
}
————————————————————————–

hello.c用了些技巧,确保foo()、bar()、baz()函数体保留在ELF中。初版main()并
未调用bar()、baz(),必须使用attribute达成目的。终版main()实际调用了它们,
attribute显得冗余,出于演示目的,保留之。

☆ 手工构造ELF

可用某些Python模块,手工构造ELF。抠取foo、bar、baz函数体,置于.text。为便
于IDA分析,将foo、bar、baz加入.symtab、.dynsym节。这些函数体中可能含有bl指
令,调用其他函数。需修改bl目标,全部跳转到stub函数,这是只含ret指令的空函
数。假设多条bl指令的目标相同,修改后跳转到同一stub函数。不同bl目标对应不同
stub函数,将来给不同的stub函数赋不同的名字,便于IDA分析。大致布局如下

foo + pad + bar + pad + baz + pad + stub + pad + stub + pad

手工构造所得ELF并不打算真地执行,仅用于IDA静态分析、Angr模拟执行。

1) hello_arm64.json

————————————————————————–
{
“foo”: {
“off”: “0x8a0”,
“size”: “0xd8”
},
“bar”: {
“off”: “0x7a0”,
“size”: “0x7c”,
“bl”: [
{
“off”: “0x800”,
“size”: 4,
“target”: “strlen”
}
]
},
“baz”: {
“off”: “0x820”,
“size”: “0x7c”,
“bl”: [
{
“off”: “0x880”,
“size”: 4,
“target”: “strlen”
}
]
}
}
————————————————————————–

下面所说的偏移,全部是相对于文件首字节的偏移。

foo在偏移0x8a0处,函数体0xd8字节,不含bl指令。

bar在偏移0x7a0处,函数体0x7c字节。偏移0x800处有4字节”bl strlen”。

baz在偏移0x820处,函数体0x7c字节。偏移0x880处有4字节”bl strlen”。

这个json提供的信息,理论上可用反汇编引擎自动获取,尤其bl指令的偏移,但我无
此刚需,不欲增加后续编码复杂度。

3) skeleton.c

————————————————————————–
#if 0

aarch64-none-linux-gnu-gcc -Wall -pipe -O0 -fPIC -shared -s -o libskeleton.so skeleton.c

#endif

__attribute__((optimize(“O0”), noinline, used))
unsigned int dummy ( void )
{
return 0x51201314LL;
}
————————————————————————–

由skeleton.c正常编译得到libskeleton.so,在其基础上添加内容。

4) smallelf_arm64_1.py

————————————————————————–
#!/usr/bin/env python
# -*- encoding: utf-8 -*-

#
# python3 smallelf_arm64_1.py <oldelf> <json> <newelf>
#

import sys, json, struct
import collections
import lief
import IPython

def to_int ( x ) :
if isinstance( x, int ) :
return x
if isinstance( x, str ) :
return int( x, 0 )

def loadjson ( filename ) :
with open( filename, “r” ) as f :
data = json.load( f )
for func, meta in data.items() :
meta[“off”] = to_int( meta[“off”] )
meta[“size”] = to_int( meta[“size”] )
if “bl” in meta :
for bl in meta[“bl”] :
bl[“off”] = to_int( bl[“off”] )
bl[“size”] = to_int( bl[“size”] )
return data

#
# 返回的buf是16字节的倍数,尾部用0填充
#
def getbuf ( filename, off, size, pad=True ) :
with open( filename, “rb” ) as f :
f.seek( off )
buf = bytearray( f.read( size ) )
if pad :
#
# 考虑原buf已是16字节整数倍的情形
#
padsize = ( 16 – ( len( buf ) % 16 ) ) % 16
if padsize :
buf.extend( b”\x00″ * padsize )
return buf

#
# 返回OrderedDict
#
# func_0 -> ( buf, size, 1 )
# func_1 -> ( buf, size, 1 )
# func_2 -> ( buf, size, 1 )
# stub_0 -> ( ret, 4, 0 )
# stub_1 -> ( ret, 4, 0 )
#
def prepare_info ( filename, info ) :
#
# AARCH64的ret指令
#
STUB = bytearray( b’\xc0\x03\x5f\xd6′ + b’\0′ * 12 )
out = collections.OrderedDict()
#
# 收集所有target
#
targets = []
for meta in info.values() :
for bl in meta.get( “bl”, [] ) :
t = bl[“target”]
if t not in targets :
targets.append( t )
#
# 处理所有真实函数,记录合并后的函数基址
#
func_base = 0
#
# fname -> base
#
func_bases = {}
for fname, meta in info.items() :
off = meta[“off”]
size = meta[“size”]
#
# 返回的buf已对齐在16字节边界上
#
buf = getbuf( filename, off, size, True )
out[fname] \
= ( buf, size, 1 )
func_bases[fname] \
= func_base
func_base \
+= len( buf )
#
# 计算全局stub的基址们
#
stub_base = func_base
#
# target -> base
#
stub_bases = {}
for t in targets :
stub_bases[t] = stub_base
stub_base += len( STUB )
#
# Patch真实函数内的bl指令,跳向全局stub
#
for fname, meta in info.items() :
buf, _, _ = out[fname]
func_base = func_bases[fname]
func_off = meta[“off”]
for bl in meta.get( “bl”, [] ) :
off = bl[“off”] – func_off
t = bl[“target”]
saddr = func_base + off
daddr = stub_bases[t]
imm = ( daddr – saddr ) >> 2
imm26 = imm & 0x3ffffff
insn = 0x94000000 | imm26
buf[off:off+4] \
= insn.to_bytes( 4, “little” )
#
# 追加全局stub
#
for t in targets :
#
# 假设只有一条ret指令,占4字节
#
out[t] = ( STUB, 4, 0 )
return out

def build_elf ( filename, funcs ) :
binary = lief.ELF.parse( ‘libskeleton.so’ )
#
# .extcode
#
extcode_sec = lief.ELF.Section( “.extcode”, lief.ELF.Section.TYPE.PROGBITS )
extcode_sec.add( lief.ELF.Section.FLAGS.ALLOC | lief.ELF.Section.FLAGS.EXECINSTR )
extcode_sec.alignment \
= 16
extcode_sec.content \
= list( b”.join( buf for buf, _, _ in funcs.values() ) )
binary.add( extcode_sec )

sym = lief.ELF.Symbol()
sym.name = ”
sym.value = 0
sym.type = lief.ELF.Symbol.TYPE.NOTYPE
sym.binding = lief.ELF.Symbol.BINDING.LOCAL
sym.visibility \
= lief.ELF.Symbol.VISIBILITY.DEFAULT
sym.shndx = 0
#
# 向.symtab增加条目
#
binary.add_symtab_symbol( sym )
extcode_sec = binary.get_section( “.extcode” )
off = 0
for name, ( buf, size, type ) in funcs.items() :
sym = lief.ELF.Symbol()
sym.name = name
sym.value = extcode_sec.virtual_address + off
sym.size = size
sym.type = lief.ELF.Symbol.TYPE.FUNC
if type :
sym.binding = lief.ELF.Symbol.BINDING.GLOBAL
sym.visibility \
= lief.ELF.Symbol.VISIBILITY.DEFAULT
else :
sym.binding = lief.ELF.Symbol.BINDING.LOCAL
sym.visibility \
= lief.ELF.Symbol.VISIBILITY.HIDDEN
sym.shndx = binary.get_section_idx( “.extcode” )
if type :
binary.add_dynamic_symbol( sym )
else :
binary.add_symtab_symbol( sym )
off += len( buf )

builder = lief.ELF.Builder( binary )
builder.build()
builder.write( filename )

def main ( argv ) :
ret = False
while True :
if len( argv ) != 4 :
print( f”Usage: python3 {argv[0]} <oldelf> <json> <newelf>” )
break
oldelf = argv[1]
jsonf = argv[2]
newelf = argv[3]
info = loadjson( jsonf )
funcs = prepare_info( oldelf, info )
build_elf( newelf, funcs )
ret = True
break
return ret

if “__main__” == __name__ :
ret = main( sys.argv )
sys.exit( 0 if ret else -1 )
————————————————————————–

python3 smallelf_arm64_1.py hello_arm64 hello_arm64.json hello_arm64_small_1
aarch64-none-linux-gnu-readelf -Wa hello_arm64_small_1

☆ 参考资源

[1] Arm GNU Toolchain Downloads
https://developer.arm.com/downloads/-/arm-gnu-toolchain-downloads

x86_64 Linux hosted cross toolchains: AArch64 GNU/Linux target (aarch64-none-linux-gnu)
https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/arm-gnu-toolchain-14.3.rel1-x86_64-aarch64-none-linux-gnu.tar.xz

Windows (mingw-w64-x86_64) hosted cross toolchains: AArch64 GNU/Linux target (aarch64-none-linux-gnu)
https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/arm-gnu-toolchain-14.3.rel1-mingw-w64-x86_64-aarch64-none-linux-gnu.zip

[2] https://github.com/doronz88/simpleelf

[3] LIEF – Library to Instrument Executable Formats
https://github.com/lief-project/LIEF
https://lief.re/doc/latest/index.html
https://lief.re/doc/latest/api/binary_abstraction/index.html
https://lief.re/doc/latest/api/binary_abstraction/python.html
https://lief.re/doc/latest/formats/elf/index.html
https://lief.re/doc/latest/formats/elf/python.html

New ELF Builder – Romain Thomas [2022-01-23]
https://lief.re/blog/2022-01-23-new-elf-builder/

lief-patchelf
https://lief.re/blog/2025-07-13-patchelf/
https://lief.re/doc/latest/tools/lief-patchelf/index.html
https://github.com/lief-project/LIEF/tree/main/tools/lief-patchelf

[4] A tool to edit .dynsym symbols in ELF files – [2022-09-27]
https://softwarerecs.stackexchange.com/questions/84123/a-tool-to-edit-dynsym-symbols-in-elf-files

使用LIEF库为ELF文件添加新Section – headedit [2025-09-22]
https://www.52pojie.cn/thread-2061926-1-1.html

Spread the word. Share this post!

Meet The Author