#!/usr/bin/python
#-*- encoding: UTF-8
"""
Calcula el offset real de un binario Mach-O para ubicar facilmente una 
instrucción. by @Reflejo
"""

import sys
import struct

# Constants
LITTLE_ENDIAN = 0
BIG_ENDIAN = 1
ARQUITECTURES = {
    7: ('x86', LITTLE_ENDIAN),
    8: ('MIPS', LITTLE_ENDIAN),
    12: ('ARM', LITTLE_ENDIAN),
    14: ('SPARC', BIG_ENDIAN),
    18: ('PowerPC', BIG_ENDIAN),
    0x1000007: ('x86_64', LITTLE_ENDIAN),
    0x1000012: ('PowerPC 64-bit', LITTLE_ENDIAN),
}

def unpack(template, data, endianess):
    template = template if endianess == LITTLE_ENDIAN else '>' + template
    return struct.unpack(template, data)

if len(sys.argv) < 3:
    print """
    Calculador de offsets para archivos Mach-O by @Reflejo
    -------------------------------------------------------

    Uso: %s <binario> <offset inicial en hexa> (cputype)

    cputype puede ser: x86/x86_64/PowerPC/PowerPC 64-bit
    """ % sys.argv[0]
    sys.exit(1)

cputype = sys.argv[3] if len(sys.argv) > 3 else 'x86'
initial_offset = int(sys.argv[2], 16)

target = ()
content = open(sys.argv[1], 'rb')
magicnumber, count = struct.unpack('>II', content.read(8))
if magicnumber == 0xcafebabe:
    print "Es un Mach-O con %d arquitecturas" % count
    for i in xrange(count):
        information = struct.unpack('>IIIII', content.read(20))
        info = {
            'type': ARQUITECTURES[information[0]],
            'subtype': information[1],
            'offset': information[2],
            'size': information[3],
            'align': information[4]
        }
        if info['type'][0] == cputype:
            target = (info['offset'], info['type'][1])
            print "Arquitectura encontrada!", target

elif magicnumber == 0xcefaedfe:
	print "Es un Mach-O exclusivamente i386"

elif magicnumber == 0xfeedface:
	print "Es un Mach-O PPC"

if not target:
    print "Arquitectura no encontrada :-("
    sys.exit(1)

# Buscamos el header
target, endianess = target
content.seek(target)
information = unpack("IIIIIII", content.read(28), endianess)

# Ok, separamos las variables
magic, cputype, subtype, filetype, ncmds, sizeofcmds, flags = information

commands = []
for i in xrange(ncmds):
    # Leemos cada load_command
    pos = content.tell()
    cmd, size = unpack("II", content.read(8), endianess)

    commands.append({
        'position': pos,
        'cmd': cmd,
        'size': size
    })
    content.seek(pos + size)

for cmd in commands:
    if cmd['cmd'] == 1:
        # Busquemos la sección __TEXT,___text
        content.seek(cmd['position'] + 8)
    
        information = unpack('16sIIIIIIII', content.read(48), endianess)
        segname, vmaddr, vmsize, fileoff, filesize, maxprot, \
                initprot, nsects, flags = information

        pos = content.tell()
        if nsects > 0:
            for x in xrange(nsects):
                information = unpack('16s16sIII', content.read(44), endianess)
                content.read(24)
                secname, segname, addr, size, offset = information
                secname = secname[:secname.find('\x00')]
                segname = segname[:segname.find('\x00')]

                # Necesitamos esto para calcular el offset
                if secname == '__text' and segname == '__TEXT':
                    found_offset = offset 
                    found_vmaddr = addr
                    found_size = size

print "El offset real es: %x" % (target + found_offset + initial_offset - found_vmaddr)

