objc_msgSend 快速查找

631 阅读3分钟

系统是怎么调用方法的

实验代码

main.m

#import <Foundation/Foundation.h>
#import <objc/runtime.h>

@interface AClass: NSObject

- (void)superMethod1;

@end

@implementation AClass

- (void)superMethod1 {}

@end

@interface BClass: AClass

- (void)method1;

@end

@implementation BClass

- (void)method1{};

@end

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        BClass *b = [BClass alloc];
        // 调用本类实例方法
        [b method1];
        // 调用父类实例方法
        [b superMethod1];
    }
    return 0;
}

编译 main.m

clang -rewrite-objc -c main.m

输出文件

int main(int argc, const char * argv[]) {
    /* @autoreleasepool */ { __AtAutoreleasePool __autoreleasepool; 

        BClass *b = ((BClass *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("BClass"), sel_registerName("alloc"));
        // 调用本类定义的方法
        ((void (*)(id, SEL))(void *)objc_msgSend)((id)b, sel_registerName("method1"));
        // 调用父类定义的方法
        ((void (*)(id, SEL))(void *)objc_msgSend)((id)b, sel_registerName("superMethod1"));
    }
    return 0;
}

objc_msgSend

上面的实验我们可以发现,实例方法调用的本质就是 objc_msgSend。

查找源代码,我们发现 objc_msgSend 是使用汇编实现的。

一些需要了解的预编译宏

#if __ARM_ARCH_7K__ >= 2  ||  (__arm64__ && !__LP64__)
// armv7k or arm64_32
#   define SUPPORT_INDEXED_ISA 1
#else
// 其他架构
#   define SUPPORT_INDEXED_ISA 0
#endif
#if (!__LP64__  ||  TARGET_OS_WIN32  ||  \
     (TARGET_OS_SIMULATOR && !TARGET_OS_IOSMAC))
// 指针长度不是64位等情况
#   define SUPPORT_PACKED_ISA 0
#else
// 其他架构(大部分情况是这里)
#   define SUPPORT_PACKED_ISA 1
#endif

SUPPORT_PACKED_ISA 和 SUPPORT_INDEXED_ISA 直接影响 isa 的存储结构
我们在现在的 64位真机上,编译条件都是:
SUPPORT_PACKED_ISA=1 SUPPORT_INDEXED_ISA=0

因此 isa 都是如下结构

#   define ISA_MASK        0x0000000ffffffff8ULL
#   define ISA_MAGIC_MASK  0x000003f000000001ULL
#   define ISA_MAGIC_VALUE 0x000001a000000001ULL
#   define ISA_BITFIELD                                                      \
      uintptr_t nonpointer        : 1;                                       \
      uintptr_t has_assoc         : 1;                                       \
      uintptr_t has_cxx_dtor      : 1;                                       \
      uintptr_t shiftcls          : 33; /*MACH_VM_MAX_ADDRESS 0x1000000000*/ \
      uintptr_t magic             : 6;                                       \
      uintptr_t weakly_referenced : 1;                                       \
      uintptr_t deallocating      : 1;                                       \
      uintptr_t has_sidetable_rc  : 1;                                       \
      uintptr_t extra_rc          : 19
#   define RC_ONE   (1ULL<<45)
#   define RC_HALF  (1ULL<<18)

从 objc-msg-arm64.s 文件获取汇编代码片段。

_objc_msgSend

/********************************************************************
 *
 * id objc_msgSend(id self, SEL _cmd, ...);
 * IMP objc_msgLookup(id self, SEL _cmd, ...);
 * 
 * objc_msgLookup ABI:
 * IMP returned in x17
 * x16 reserved for our use but not used
 *
 ********************************************************************/

    /* ...省略... */

    /* 进入 _objc_msgSend 流程 */
	ENTRY _objc_msgSend
	UNWIND _objc_msgSend, NoFrame

    /* 比较第一个参数 p0 与 空值 #0 */
	cmp	p0, #0			// nil check and tagged pointer check
    
    /* 判断是否支持小对象类型 */
#if SUPPORT_TAGGED_POINTERS 
    /* 如果比较结果为 <, 则跳转 LNilOrTagged;否则继续 */
	b.le	LNilOrTagged		//  (MSB tagged pointer looks negative)
#else
    /* 如果上面的比较结果为 <, 则跳转 LNilOrTagged;否则继续 */
	b.eq	LReturnZero
#endif

    /* 将 x0 寄存器的值,读到 p13 寄存器,这个值就是 isa */
	ldr	p13, [x0]		// p13 = isa

    /* 从 isa 中读取 shiftcls, 并固定存储到 p16 寄存器。 读取方法解析在下一部分 */
	GetClassFromIsa_p16 p13		// p16 = class
LGetIsaDone:
	// calls imp or objc_msgSend_uncached
    /* 从缓存中以 NORMAL 模式,查找 _objc_msgSend。查询方法见下一部分 */
	CacheLookup NORMAL, _objc_msgSend

#if SUPPORT_TAGGED_POINTERS
LNilOrTagged:
	b.eq	LReturnZero		// nil check

	// tagged
	adrp	x10, _objc_debug_taggedpointer_classes@PAGE
	add	x10, x10, _objc_debug_taggedpointer_classes@PAGEOFF
	ubfx	x11, x0, #60, #4
	ldr	x16, [x10, x11, LSL #3]
	adrp	x10, _OBJC_CLASS_$___NSUnrecognizedTaggedPointer@PAGE
	add	x10, x10, _OBJC_CLASS_$___NSUnrecognizedTaggedPointer@PAGEOFF
	cmp	x10, x16
	b.ne	LGetIsaDone

	// ext tagged
	adrp	x10, _objc_debug_taggedpointer_ext_classes@PAGE
	add	x10, x10, _objc_debug_taggedpointer_ext_classes@PAGEOFF
	ubfx	x11, x0, #52, #8
	ldr	x16, [x10, x11, LSL #3]
	b	LGetIsaDone
// SUPPORT_TAGGED_POINTERS
#endif

LReturnZero:
	// x0 is already zero
	mov	x1, #0
	movi	d0, #0
	movi	d1, #0
	movi	d2, #0
	movi	d3, #0
	ret

	END_ENTRY _objc_msgSend


	ENTRY _objc_msgLookup
	UNWIND _objc_msgLookup, NoFrame
	cmp	p0, #0			// nil check and tagged pointer check
#if SUPPORT_TAGGED_POINTERS
	b.le	LLookup_NilOrTagged	//  (MSB tagged pointer looks negative)
#else
	b.eq	LLookup_Nil
#endif
	ldr	p13, [x0]		// p13 = isa
	GetClassFromIsa_p16 p13		// p16 = class
LLookup_GetIsaDone:
	// returns imp
	CacheLookup LOOKUP, _objc_msgLookup

#if SUPPORT_TAGGED_POINTERS
LLookup_NilOrTagged:
	b.eq	LLookup_Nil	// nil check

	// tagged
	adrp	x10, _objc_debug_taggedpointer_classes@PAGE
	add	x10, x10, _objc_debug_taggedpointer_classes@PAGEOFF
	ubfx	x11, x0, #60, #4
	ldr	x16, [x10, x11, LSL #3]
	adrp	x10, _OBJC_CLASS_$___NSUnrecognizedTaggedPointer@PAGE
	add	x10, x10, _OBJC_CLASS_$___NSUnrecognizedTaggedPointer@PAGEOFF
	cmp	x10, x16
	b.ne	LLookup_GetIsaDone

LLookup_ExtTag:	
	adrp	x10, _objc_debug_taggedpointer_ext_classes@PAGE
	add	x10, x10, _objc_debug_taggedpointer_ext_classes@PAGEOFF
	ubfx	x11, x0, #52, #8
	ldr	x16, [x10, x11, LSL #3]
	b	LLookup_GetIsaDone
// SUPPORT_TAGGED_POINTERS
#endif

LLookup_Nil:
	adrp	x17, __objc_msgNil@PAGE
	add	x17, x17, __objc_msgNil@PAGEOFF
	ret

	END_ENTRY _objc_msgLookup

	
	STATIC_ENTRY __objc_msgNil

	// x0 is already zero
	mov	x1, #0
	movi	d0, #0
	movi	d1, #0
	movi	d2, #0
	movi	d3, #0
	ret
	
	END_ENTRY __objc_msgNil


	ENTRY _objc_msgSendSuper
	UNWIND _objc_msgSendSuper, NoFrame

	ldp	p0, p16, [x0]		// p0 = real receiver, p16 = class
	// calls imp or objc_msgSend_uncached
	CacheLookup NORMAL, _objc_msgSendSuper

	END_ENTRY _objc_msgSendSuper

	// no _objc_msgLookupSuper

	ENTRY _objc_msgSendSuper2
	UNWIND _objc_msgSendSuper2, NoFrame

	ldp	p0, p16, [x0]		// p0 = real receiver, p16 = class
	ldr	p16, [x16, #SUPERCLASS]	// p16 = class->superclass
	CacheLookup NORMAL, _objc_msgSendSuper2

	END_ENTRY _objc_msgSendSuper2

	
	ENTRY _objc_msgLookupSuper2
	UNWIND _objc_msgLookupSuper2, NoFrame

	ldp	p0, p16, [x0]		// p0 = real receiver, p16 = class
	ldr	p16, [x16, #SUPERCLASS]	// p16 = class->superclass
	CacheLookup LOOKUP, _objc_msgLookupSuper2

	END_ENTRY _objc_msgLookupSuper2


.macro MethodTableLookup
	
	// push frame
	SignLR
	stp	fp, lr, [sp, #-16]!
	mov	fp, sp

	// save parameter registers: x0..x8, q0..q7
	sub	sp, sp, #(10*8 + 8*16)
	stp	q0, q1, [sp, #(0*16)]
	stp	q2, q3, [sp, #(2*16)]
	stp	q4, q5, [sp, #(4*16)]
	stp	q6, q7, [sp, #(6*16)]
	stp	x0, x1, [sp, #(8*16+0*8)]
	stp	x2, x3, [sp, #(8*16+2*8)]
	stp	x4, x5, [sp, #(8*16+4*8)]
	stp	x6, x7, [sp, #(8*16+6*8)]
	str	x8,     [sp, #(8*16+8*8)]

	// lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
	// receiver and selector already in x0 and x1
	mov	x2, x16
	mov	x3, #3
	bl	_lookUpImpOrForward

	// IMP in x0
	mov	x17, x0
	
	// restore registers and return
	ldp	q0, q1, [sp, #(0*16)]
	ldp	q2, q3, [sp, #(2*16)]
	ldp	q4, q5, [sp, #(4*16)]
	ldp	q6, q7, [sp, #(6*16)]
	ldp	x0, x1, [sp, #(8*16+0*8)]
	ldp	x2, x3, [sp, #(8*16+2*8)]
	ldp	x4, x5, [sp, #(8*16+4*8)]
	ldp	x6, x7, [sp, #(8*16+6*8)]
	ldr	x8,     [sp, #(8*16+8*8)]

	mov	sp, fp
	ldp	fp, lr, [sp], #16
	AuthenticateLR

.endmacro

	STATIC_ENTRY __objc_msgSend_uncached
	UNWIND __objc_msgSend_uncached, FrameWithNoSaves

	// THIS IS NOT A CALLABLE C FUNCTION
	// Out-of-band p16 is the class to search
	
	MethodTableLookup
	TailCallFunctionPointer x17

	END_ENTRY __objc_msgSend_uncached


	STATIC_ENTRY __objc_msgLookup_uncached
	UNWIND __objc_msgLookup_uncached, FrameWithNoSaves

	// THIS IS NOT A CALLABLE C FUNCTION
	// Out-of-band p16 is the class to search
	
	MethodTableLookup
	ret

	END_ENTRY __objc_msgLookup_uncached


	STATIC_ENTRY _cache_getImp

	GetClassFromIsa_p16 p0
	CacheLookup GETIMP, _cache_getImp

LGetImpMiss:
	mov	p0, #0
	ret

	END_ENTRY _cache_getImp

GetClassFromIsa_p16

/********************************************************************
 * GetClassFromIsa_p16 src
 * src is a raw isa field. Sets p16 to the corresponding class pointer.
 * The raw isa might be an indexed isa to be decoded, or a
 * packed isa that needs to be masked.
 *
 * On exit:
 *   $0 is unchanged
 *   p16 is a class pointer
 *   x10 is clobbered
 ********************************************************************/

 /* ...省略... */

.macro GetClassFromIsa_p16 /* src */

#if SUPPORT_INDEXED_ISA
	// Indexed isa
    /* 将要传进来的 isa $0 移动到 p16 */
	mov	p16, $0			// optimistically set dst = src

    /* 判断是否支持 ISA_INDEX_IS_NPI_BIT, 支持则跳转 1f */
	tbz	p16, #ISA_INDEX_IS_NPI_BIT, 1f	// done if not non-pointer isa

	// isa in p16 is indexed
    /* 得到一个大小为 4KB, 且包含 _objc_indexed_classes地址页的基地址,存储在 x10 */
	adrp	x10, _objc_indexed_classes@PAGE

    /* 根据 _objc_indexed_classes 页内偏移量,读取到 _objc_indexed_classes 的地址,存储在 x10 */
	add	x10, x10, _objc_indexed_classes@PAGEOFF

    /* 读取 isa 中 类的存储的内容 */
	ubfx	p16, p16, #ISA_INDEX_SHIFT, #ISA_INDEX_BITS  // extract index

    /* 从 x10 类地址开始,遍历读取 p16 数组位,将结果存储在 p16 */
	ldr	p16, [x10, p16, UXTP #PTRSHIFT]	// load class from array
1:

#elif __LP64__
	// 64-bit packed isa
	and	p16, $0, #ISA_MASK

#else
	// 32-bit raw isa
	mov	p16, $0

#endif

.endmacro

CacheLookup

.macro CacheLookup
	//
	// Restart protocol:
	//
	//   As soon as we're past the LLookupStart$1 label we may have loaded
	//   an invalid cache pointer or mask.
	//
	//   When task_restartable_ranges_synchronize() is called,
	//   (or when a signal hits us) before we're past LLookupEnd$1,
	//   then our PC will be reset to LLookupRecover$1 which forcefully
	//   jumps to the cache-miss codepath which have the following
	//   requirements:
	//
	//   GETIMP:
	//     The cache-miss is just returning NULL (setting x0 to 0)
	//
	//   NORMAL and LOOKUP:
	//   - x0 contains the receiver
	//   - x1 contains the selector
	//   - x16 contains the isa
	//   - other registers are set as per calling conventions
	//
    /* 开始查询,我们传递了两个参数 $0=NORMAL, $1=_objc_msgSend */
LLookupStart$1:

/* 
#define CACHE            (2 * __SIZEOF_POINTER__)
读取 p16 寄存器存储的地址,偏移两个指针长度,读取到 catch_t
catch_t 是 mask|buckets 存储的集合
将读取到的值存储在 p11
*/
	// p1 = SEL, p16 = isa
	ldr	p11, [x16, #CACHE]				// p11 = mask|buckets

#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 
    /* arm64_64 */

    /* 将 p11 与上 #0x0000ffffffffffff,获取到 buckets */
	and	p10, p11, #0x0000ffffffffffff	// p10 = buckets
    /*
    将 p11 右移 48 位,获取到 mask
    将 p1 与上 上一步得到的 mask
    将结果存储在 p12,  _cmd & mask
    获取这个值的原因是 _cmd 在 buckets 存储位置, 是通过 cache_hash 算出来的,原理就是获取上面的值,然后求余数
    */
	and	p12, p1, p11, LSR #48		// x12 = _cmd & mask

#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4 
    /* arm64_32 */

	and	p10, p11, #~0xf			// p10 = buckets
	and	p11, p11, #0xf			// p11 = maskShift
	mov	p12, #0xffff
	lsr	p11, p12, p11				// p11 = mask = 0xffff >> p11
	and	p12, p1, p11				// x12 = _cmd & mask

#else
#error Unsupported cache mask storage for ARM64.
#endif

    /*
    p12 左移(1 + 3 = 4) 位到对应的 bucket 的位置
    p10 平移 上一步的结果长度,得到对应的 bucket,存储到 p12
    */
	add	p12, p10, p12, LSL #(1+PTRSHIFT)
		             // p12 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

    /* 读取 p12 中的 {imp, sel} 分别存储在 p17 和 p9 */
	ldp	p17, p9, [x12]		// {imp, sel} = *bucket

    /* 拿我们查询到的 sel 与要查询的 sel 对比 */
1:	cmp	p9, p1			// if (bucket->sel != _cmd)

    /* a.没找到就跳转到 2f */
	b.ne	2f			//     scan more

    /* b.找到就返回结果 */
	CacheHit $0			// call or return imp
	
2:	// not hit: p12 = not-hit bucket
    /* CheckMiss 检查当前类是否已经缓存了,没有就去创建缓存 */
	CheckMiss $0			// miss if bucket->sel == 0

    /* 判断当前查到的 bucket 是否是 buckets 中的第一个 */
	cmp	p12, p10		// wrap if bucket == buckets

    /* 如果想等,就跳转 3f */
	b.eq	3f

    /* 
    x12 减去 BUCKET_SIZE 获取位置
    从 p9 中读取上一步的偏移量 进行平移,将值存储在 p17
    这一步操作就是在 buckets 内存中 从高位到低位 步进递减遍历
    */
	ldp	p17, p9, [x12, #-BUCKET_SIZE]!	// {imp, sel} = *--bucket

    /* 跳转到 1b, 比较 p9 和 p1 */
	b	1b			// loop

3:	// wrap: p12 = first bucket, w11 = mask
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16

    /* 
    p11 右移到 当前内存中 buckets 存储的最高位
    从 p12 存储的地址 平移 上一步的偏移量,将值存储在 p12
    */
	add	p12, p12, p11, LSR #(48 - (1+PTRSHIFT))
					// p12 = buckets + (mask << 1+PTRSHIFT)
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
	add	p12, p12, p11, LSL #(1+PTRSHIFT)
					// p12 = buckets + (mask << 1+PTRSHIFT)
#else
#error Unsupported cache mask storage for ARM64.
#endif

	// Clone scanning loop to miss instead of hang when cache is corrupt.
	// The slow path may detect any corruption and halt later.

    /* 读取 p12 中的 {imp, sel} 分别存储在 p17 和 p9 */
	ldp	p17, p9, [x12]		// {imp, sel} = *bucket

    /* 拿我们查询到的 sel 与要查询的 sel 对比 */
1:	cmp	p9, p1			// if (bucket->sel != _cmd)

    /* 不相等跳转 2f */
	b.ne	2f			//     scan more

    /* 相等就返回 */
	CacheHit $0			// call or return imp
	
2:	// not hit: p12 = not-hit bucket
	CheckMiss $0			// miss if bucket->sel == 0
    /* 判断是否遍历到 buckets 的首地址 */
	cmp	p12, p10		// wrap if bucket == buckets

    /* 是则跳转 3f */
	b.eq	3f

    /* 否则步进倒叙遍历 buckets */
	ldp	p17, p9, [x12, #-BUCKET_SIZE]!	// {imp, sel} = *--bucket
	b	1b			// loop

LLookupEnd$1:
LLookupRecover$1:
3:	// double wrap
    /* 没有找到方法 */
	JumpMiss $0

.endmacro

参考文章:


  1. iOS-底层原理 12:objc_msgSend流程分析之快速查找
  2. ARM指令集
  3. 汇编七、ADRP指令