iOS-13.方法查找流程之快速查找流程

ios底层文章汇总

1 Runtime

1.1 编译时

编译器将代码编译成机器能识别的代码的时候，实际可能只翻译成某个中间状态的语言.

编译的主要功能：

编译时类型检查，也叫静态类型检查：检测关键字错误，词法分析，语法分析---> errors & warning

1.2 运行时

代码跑起来，被装载到内存中去了的时候。会在内存中做一些操作和判断.

1.3 Runtime的两个版本

Legacy版本(早期版本) ：对应的编程接口是Objective-C 1.0，32位的Mac OS X的平台上

Modern版本(现行版本) ：对应的编程接口是Objective-C 2.0，iPhone程序和Mac OS X v10.5 及以后的系统中的64位程序

1.4 Objective-C Runtime Programming Guide

Objective-C Runtime Programming Guide 参考官方文档

1.5 runtime的使用有以下三种方式

通过OC代码，eg: [person sayNB]

通过Framework&Service, eg:isKindOfClass

通过Runtime API，eg:class_getInstanceSize

其三种实现方法与编译层和底层的关系如图所示:

2 方法的本质

2.1 终端使用clang编译main.m生成main.cpp

clang -rewrite-objc main.m -o main.cpp

clang -rewrite-objc -fobjc-arc -fobjc-runtime=ios-13.0.0 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator14.2.sdk main.m

从.cpp代码中可以看到,OC代码

HTPerson *person = [HTPerson alloc];

编译器编译后可用C++代码替换为:

LGPerson *person = ((LGPerson *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("LGPerson"), sel_registerName("alloc"));
//LGPerson *person = objc_msgSend(objc_getClass("LGPerson"), sel_registerName("alloc"));

objc_msgSend需要导入头文件#import <objc/message.h>

报错Too many arguments to function call,expected 0, have 2,需要把参数的严厉的检查机制关掉

给父类发送消息

objc_msgSendSuper源码：

OBJC_EXPORT void
objc_msgSendSuper(void /* struct objc_super *super, SEL op, ... */ )
    OBJC_AVAILABLE(10.0, 2.0, 9.0, 1.0, 2.0);
    
struct objc_super {
    /// Specifies an instance of a class.
    __unsafe_unretained _Nonnull id receiver;

    /// Specifies the particular superclass of the instance to message. 
#if !defined(__cplusplus)  &&  !__OBJC2__
    /* For compatibility with old objc-runtime.h header */
    __unsafe_unretained _Nonnull Class class;
#else
    __unsafe_unretained _Nonnull Class super_class;
#endif
    /* super_class is the first class to search */
};

2.2 方法的本质：`objc_msgSend`

具体参考：iOS-OC对象的本质之clang编译main.m

3 objc_msgSend 汇编代码流程逻辑

3.1 使用汇编的原因

C语言不能通过写一个函数来保留未知的参数并且跳转到一个任意的函数指针，C语言没有满足做这件事情的必要性
objc_msgSend必须够快，汇编执行很快
objc_msgSend 参数具有动态性(不确定性)

3.2 objc_msgSend汇编源码定位

在objc4-781源码中全局搜索objc_msgSend，可搜索到很多处。
按住 commad + control + 点击搜索到的文件的下三角箭头，将文件收缩起来
找到objc-msg-arm64.s文件点击 ENTRY _objc_msgSend

3.3 _objc_msgSend汇编源码

	ENTRY _objc_msgSend
	UNWIND _objc_msgSend, NoFrame
    // objc_msgSend(person,sel_registerName("sayHello"));
    //判断_objc_msgSend的第一个参数 p0(id类型的接受者receiver)和0比较: nil 检测和 tagged pointer 检测
	cmp	p0, #0			
#if SUPPORT_TAGGED_POINTERS // 支持tagged pointer的分支
	b.le	LNilOrTagged		//  objc_msgSend的第一个参数 p0(id类型的接受者)小于等于0跳转到LNilOrTagged
#else //不支持tagged pointer的分组
	b.eq	LReturnZero //  objc_msgSend的第一个参数 p0(id类型的接受者)等于0跳转到LReturnZero
#endif

	ldr	p13, [x0]    	// 获取isa : p13 = isa
	GetClassFromIsa_p16 p13		// 获取class : p16 = class
/*
 GetClassFromIsa_p16 执行的操作:
    and    p16, $0, #ISA_MASK
    $0  : GetClassFromIsa_p16传输过来的第一个参数,即p13,当前p13存储的是isa,
    ISA_MASK: 0x0000000ffffffff8ULL
 GetClassFromIsa_p16 实际上就是将 isa&ISA_MASK 存放在p16中
 */
LGetIsaDone:
	// 缓存查找流程:快速查找
	CacheLookup NORMAL, _objc_msgSend

#if SUPPORT_TAGGED_POINTERS  //支持tagged pointer
LNilOrTagged:
/*
    objc_msgSend(person,sel_registerName("sayHello"));
    判断_objc_msgSend的第一个参数 p0(id类型的接受者)和0比较,id类型的接受者e为 nil ,跳转到LReturnZero
 */
	b.eq	LReturnZero		// nil check

	// tagged
	adrp	x10, _objc_debug_taggedpointer_classes@PAGE
	add	x10, x10, _objc_debug_taggedpointer_classes@PAGEOFF
	ubfx	x11, x0, #60, #4
	ldr	x16, [x10, x11, LSL #3]
	adrp	x10, _OBJC_CLASS_$___NSUnrecognizedTaggedPointer@PAGE
	add	x10, x10, _OBJC_CLASS_$___NSUnrecognizedTaggedPointer@PAGEOFF
	cmp	x10, x16
	b.ne	LGetIsaDone //b.ne 不等于0时跳转

	// ext tagged
	adrp	x10, _objc_debug_taggedpointer_ext_classes@PAGE
	add	x10, x10, _objc_debug_taggedpointer_ext_classes@PAGEOFF
	ubfx	x11, x0, #52, #8
	ldr	x16, [x10, x11, LSL #3]
	b	LGetIsaDone
// SUPPORT_TAGGED_POINTERS
#endif

LReturnZero:
	// x0 is already zero
	mov	x1, #0
	movi	d0, #0
	movi	d1, #0
	movi	d2, #0
	movi	d3, #0
	ret

	END_ENTRY _objc_msgSend

3.4 _objc_msgSend汇编逻辑

objc_msgSend的入参有两个：第一个是id类型receiver消息的接收者，第二个参数是sel
objc_msgSend需要判断消息的接受者是否为空,为空时直接跳转到LReturnZero，将寄存器归零后退出
- 支持tagged pointer的情况 receiver<0
- 不支持tagged pointer的情况 receiver>0
从receiver中取出isa存入p13寄存器
通过 GetClassFromIsa_p16 :isa & ISA_MASK获取shiftcls位域的类信息(class)存入寄存器p6，
进入快速查找缓存方法 CacheLookup NORMAL, _objc_msgSend

3.5 CacheLookup汇编源码

.macro CacheLookup
	//
	// Restart protocol:
	//
	//   As soon as we're past the LLookupStart$1 label we may have loaded
	//   an invalid cache pointer or mask.
	//
	
	//   GETIMP:
	//     The cache-miss is just returning NULL (setting x0 to 0)
	//
	//   NORMAL and LOOKUP:
	//   - x0  receiver
	//   - x1 selector
	//   - x16 isa指向类
	//   - other registers are set as per calling conventions
	//
LLookupStart$1:

    //  p16 = isa指向类，#define CACHE (2 * __SIZEOF_POINTER__)，其中 __SIZEOF_POINTER__表示pointer的大小 ，即 2*8 = 16
    //p16=isa，指向类的首地址偏移16个字节[isa（8字节）superClass（8字节)]后取得cache存入p11, p11 = mask|buckets
    //p11 = mask|buckets（mask高16位 + buckets低48位）
	ldr	p11, [x16, #CACHE]				// p11 = mask|buckets

//64位真机
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
	and	p10, p11, #0x0000ffffffffffff	// p10 = buckets
	and	p12, p1, p11, LSR #48		// x12 = _cmd & mask

//32位真机
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
	and	p10, p11, #~0xf			// p10 = buckets
	and	p11, p11, #0xf			// p11 = maskShift
	mov	p12, #0xffff
	lsr	p11, p12, p11			// p11 = mask = 0xffff >> p11
	and	p12, p1, p11			// x12 = _cmd & mask
#else
#error Unsupported cache mask storage for ARM64.
#endif

    //p12是存储bucket的下标index p10是buckets数组首地址，index * (1<<4)(即16) 得到实际内存的偏移量，通过buckets的首地址偏移，获取bucket存入p12寄存器
    //LSL #(1+PTRSHIFT)-左移4位，其中PTRSHIFT是3， 1+PTRSHIFT = 4 ，index<<4相当于乘以16，也就是下标为index的bucket相对于buckets的首地址的偏移地址
	add	p12, p10, p12, LSL #(1+PTRSHIFT)
	// p12 = buckets + ((_cmd & mask) << (1+PTRSHIFT)) ，p12中存储的是下标为index的bucket的地址


    // 从x12（即p12）中取出 bucket 分别将imp和sel 存入 p17（存储imp） 和 p9（存储sel）
	ldp	p17, p9, [x12]		// {imp, sel} = *bucket

//比较 sel 与 p1（传入的参数cmd）
1:	cmp	p9, p1			// if (bucket->sel != _cmd)
    //(bucket->sel != _cmd) 如果不相等，即没有找到，请跳转至 2f
	b.ne	2f			//     scan more
    //如果相等 即cacheHit 缓存命中，直接返回imp
	CacheHit $0			// call or return imp
	
2:	// not hit: p12 = not-hit bucket
    //bucket->sel == 0则跳转至__objc_msgSend_uncached
	CheckMiss $0			// miss if bucket->sel == 0

    //比较p12（下标对应的bucket） 与 p10（buckets数组第一个元素）
	cmp	p12, p10
    // 如果 bucket == buckets,则跳转到3f,从buckets数组的末尾开始查找
	b.eq	3f
    //即向前查找变量buckets数组: 从x12（即p12 bucket）-向前平移的内存大小BUCKET_SIZE，得到得到第下一个个bucket地址，将bucket的imp-sel分别存入p17-p9
	ldp	p17, p9, [x12, #-BUCKET_SIZE]!	// {imp, sel} = *--bucket
	b	1b			// 跳转到1b

3:	// wrap: p12 = first bucket, w11 = mask
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
//真机64位 p11 = mask|buckets p11右移48位,再左移4位 得到(mask<<4)
//(mask<<4)是buckets数组最后一个bucket相对于数组首地址的偏移量
//p12 = buckets + (mask << 1+PTRSHIFT) 是buckets数组最后一个bucket的地址
	add	p12, p12, p11, LSR #(48 - (1+PTRSHIFT))

#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
//同理 32位系统: p12 = buckets + (mask << 1+PTRSHIFT)
	add	p12, p12, p11, LSL #(1+PTRSHIFT)

#else
#error Unsupported cache mask storage for ARM64.
#endif

	// Clone scanning loop to miss instead of hang when cache is corrupt.
	// The slow path may detect any corruption and halt later.

//从x12（即p12）中取出 bucket 分别将imp和sel 存入 p17（存储imp） 和 p9（存储sel）
	ldp	p17, p9, [x12]		// {imp, sel} = *bucket

1:	cmp	p9, p1			// if (bucket->sel != _cmd)
    //(bucket->sel != _cmd) 如果不相等，即没有找到，请跳转至 2f
	b.ne	2f			//     scan more
  //如果相等 即cacheHit 缓存命中，直接返回imp
	CacheHit $0			// call or return imp
	
2:	// not hit: p12 = not-hit bucket
  //bucket->sel == 0则跳转至__objc_msgSend_uncached
	CheckMiss $0			// miss if bucket->sel == 0

//比较p12（下标对应的bucket） 与 p10（buckets数组第一个元素）
	cmp	p12, p10		// wrap if bucket == buckets

 // 如果 bucket == buckets,则跳转到3f,从buckets数组的末尾开始查找
	b.eq	3f

 //即向前查找变量buckets数组: 从x12（即p12 bucket）-向前平移的内存大小BUCKET_SIZE，得到得到第下一个个bucket地址，将bucket的imp-sel分别存入p17-p9
	ldp	p17, p9, [x12, #-BUCKET_SIZE]!	// {imp, sel} = *--bucket
	b	1b			// loop

LLookupEnd$1:
LLookupRecover$1:
3:	// double wrap
	JumpMiss $0

.endmacro

#define NORMAL 0
#define GETIMP 1
#define LOOKUP 2

// CacheHit: x17 = cached IMP, x12 = bucket地址, x1 = SEL, x16 = isa
.macro CacheHit
.if $0 == NORMAL
	TailCallCachedImp x17, x12, x1, x16	// authenticate and call imp
.elseif $0 == GETIMP
	mov	p0, p17
	cbz	p0, 9f			// don't ptrauth a nil imp
	AuthAndResignAsIMP x0, x12, x1, x16	// authenticate imp and re-sign as IMP
9:	ret				// return IMP
.elseif $0 == LOOKUP
	// No nil check for ptrauth: the caller would crash anyway when they
	// jump to a nil IMP. We don't care if that jump also fails ptrauth.
	AuthAndResignAsIMP x17, x12, x1, x16	// authenticate imp and re-sign as IMP
	ret				// return imp via x17
.else
.abort oops
.endif
.endmacro

.macro CheckMiss
	// miss if bucket->sel == 0
    //cbz为0则跳转
.if $0 == GETIMP
	cbz	p9, LGetImpMiss
.elseif $0 == NORMAL
	cbz	p9, __objc_msgSend_uncached
.elseif $0 == LOOKUP
	cbz	p9, __objc_msgLookup_uncached
.else
.abort oops
.endif
.endmacro

3.6 CacheLookup汇编逻辑

进入CacheLookup NORMAL, _objc_msgSend

x0 : receiver

x1 : selector

x16 : isa，指向类 , define CACHE (2 * __SIZEOF_POINTER__)，其中 __SIZEOF_POINTER__ 表示pointer的大小，即 2*8 = 16

p16=isa，指向类的首地址偏移16个字节[isa（8字节）superClass（8字节)]后取得cache存入p11
ldr p11, [x16, #CACHE] p11 = mask|buckets（mask高16位 + buckets低48位）
and p10, p11, #0x0000ffffffffffff p10 = buckets
and p12, p1, p11, LSR #48 x12 = _cmd & mask
p12是存储bucket的下标index

index * (1<<4) (即16) 得到实际内存的偏移量，通过buckets的首地址偏移，获取bucket存入p12寄存器

LSL #(1+PTRSHIFT)-左移4位，其中PTRSHIFT是3， 1+PTRSHIFT = 4 ，index<<4相当于乘以16，也就是下标为index的bucket相对于buckets的首地址的偏移地址

p12 = buckets + ((_cmd & mask) << (1+PTRSHIFT)) ，p12中存储的是下标为index的bucket的地址
从x12（即p12）中取出 bucket 分别将imp和sel 存入 p17（存储imp） 和 p9（存储sel） . {imp, sel} = *bucket
循环比较 sel 与 p1（传入的参数cmd）

//伪代码
while{
	if (bucket->sel != _cmd) 
    	if （bucket->sel == 0）则跳转至__objc_msgSend_uncached,跳出循环
        else 
        	if( bucket == buckets)
            	跳转到从buckets数组的末尾开始查找
            else 
            	前查找变量buckets数组, 从x12（即p12 bucket）向前平移的内存大小BUCKET_SIZE，得到得到下一个bucket地址
    else 如果相等 即cacheHit 缓存命中，直接返回imp，跳出循环
   }