Skip to content
  • Peter Zijlstra's avatar
    locking/mutex: Optimize __mutex_trylock_fast() · c427f695
    Peter Zijlstra authored
    
    
    Use try_cmpxchg to avoid the pointless TEST instruction..
    And add the (missing) atomic_long_try_cmpxchg*() wrappery.
    
    On x86_64 this gives:
    
    0000000000000710 <mutex_lock>:						0000000000000710 <mutex_lock>:
     710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx                      710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx
     717:   00 00                                                            717:   00 00
                            715: R_X86_64_32S       current_task                                    715: R_X86_64_32S       current_task
     719:   31 c0                   xor    %eax,%eax                         719:   31 c0                   xor    %eax,%eax
     71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)                 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)
     720:   48 85 c0                test   %rax,%rax                         720:   75 02                   jne    724 <mutex_lock+0x14>
     723:   75 02                   jne    727 <mutex_lock+0x17>             722:   f3 c3                   repz retq
     725:   f3 c3                   repz retq                                724:   eb da                   jmp    700 <__mutex_lock_slowpath>
     727:   eb d7                   jmp    700 <__mutex_lock_slowpath>       726:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
     729:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)                         72d:   00 00 00
    
    On ARM64 this gives:
    
    000000000000638 <mutex_lock>:						0000000000000638 <mutex_lock>:
         638:       d5384101        mrs     x1, sp_el0                           638:       d5384101        mrs     x1, sp_el0
         63c:       d2800002        mov     x2, #0x0                             63c:       d2800002        mov     x2, #0x0
         640:       f9800011        prfm    pstl1strm, [x0]                      640:       f9800011        prfm    pstl1strm, [x0]
         644:       c85ffc03        ldaxr   x3, [x0]                             644:       c85ffc03        ldaxr   x3, [x0]
         648:       ca020064        eor     x4, x3, x2                           648:       ca020064        eor     x4, x3, x2
         64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>            64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>
         650:       c8047c01        stxr    w4, x1, [x0]                         650:       c8047c01        stxr    w4, x1, [x0]
         654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>             654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>
         658:       b40000c3        cbz     x3, 670 <mutex_lock+0x38>            658:       b5000043        cbnz    x3, 660 <mutex_lock+0x28>
         65c:       a9bf7bfd        stp     x29, x30, [sp,#-16]!                 65c:       d65f03c0        ret
         660:       910003fd        mov     x29, sp                              660:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
         664:       97ffffef        bl      620 <__mutex_lock_slowpath>          664:       910003fd        mov     x29, sp
         668:       a8c17bfd        ldp     x29, x30, [sp],#16                   668:       97ffffee        bl      620 <__mutex_lock_slowpath>
         66c:       d65f03c0        ret                                          66c:       a8c17bfd        ldp     x29, x30, [sp],#16
         670:       d65f03c0        ret                                          670:       d65f03c0        ret
    
    Reported-by: default avatarMatthew Wilcox <mawilcox@microsoft.com>
    Acked-by: default avatarWill Deacon <will.deacon@arm.com>
    Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Cc: Peter Zijlstra <peterz@infradead.org>
    Cc: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
    c427f695