"""
name: Qakbot 5.0
category: config extractors
author: malcat

Decrypt strings and extract CnC informations from a (plain-text) Qakbot 5.0 sample
"""

import malcat
import struct
import itertools
import hashlib
import json
import datetime
import re
import math
import collections

malcat.setup()  # Add Malcat's data directories to sys.path when called in headless mode
from transforms.binary import CircularXor
from transforms.block import AesDecrypt



############################ utility functions

def decrypt_aes_iv_prefix(data:bytes, aes_password: bytes):
    key = hashlib.sha256(aes_password).digest()
    iv = data[0:16]
    data = data[16:]
    return AesDecrypt().run(data, mode="cbc", iv=iv, key=key, unpad=True)


def get_all_referencing_functions(a:malcat.Analysis, address:int):
    res = []
    for source in a.xrefs[address]:
        fn = a.fns.find(source.address)
        if fn is not None:
            res.append(fn)
    return set(res)



def entropy(data:str, base=2):
    if len(data) <= 1:
        return 0
    counts = collections.Counter()
    for d in data:
        counts[d] += 1
    ent = 0
    probs = [float(c) / len(data) for c in counts.values()]
    for p in probs:
        if p > 0.:
            ent -= p * math.log(p, base)
    return ent


############################ interesting buffer heuristics

def enumerate_interesting_buffers(a:malcat.Analysis, section_name:str, prefixed_buffer:bool = False):
    section = a.map[section_name]

    # get all incoming xref in the section: denotates the start of a buffer
    data_xrefs = [x.target for x in a.xref[section.start:section.end]]

    for i in range(1, len(data_xrefs) - 1): # let's assume the first and last xrefs will never be interesting
        prev, cur, next = data_xrefs[i-1:i+2]
        prev_off = a.a2p(prev)
        cur_off = a.a2p(cur)
        next_off = a.a2p(next)
        
        if prefixed_buffer and cur - prev == 2:
            # is it a size-prefixed buffer ? (i.e. there is a referenced word 2 bytes before)
            size, = struct.unpack("<H", a.file[prev_off:cur_off])
            yield cur, size
        elif not prefixed_buffer:
            # we'll look for all immediate constants in referencing functions and see which one could be a size
            for fn in get_all_referencing_functions(a, cur):
                for basic_block in fn:
                    if not basic_block.code:
                        continue
                    for instruction in basic_block:
                        for operand in instruction:
                            if operand.value and operand.value > 0x10 and cur + operand.value <= next and next - (cur + operand.value) < 0x20:
                                yield cur, operand.value


############################ strings decryption


def get_potential_strings_triples(a:malcat.Analysis):
    # Here we will look for 3 buffers referenced from the same function: 
    # one is the strings, one the xor key, one the aes password

    function_to_refs = {}
    done = set()

    # group all interesting buffers by referencing functions
    for address, size in enumerate_interesting_buffers(a, ".data", prefixed_buffer=False):
        if size < 0x20:
            continue
        # find all reference coming from functions
        for fn in get_all_referencing_functions(a, address):
            function_to_refs.setdefault(fn.address, []).append((address, size))
    
    # now try to find a function referencing 3 interesting buffers
    for fn_address, by_function in function_to_refs.items():
        if len(by_function) < 3:
            # there should be at least 3 references to candidate buffers inside one function
            continue
        # we don't know which is one is the data, xor key or aes password: try all permutations of triples
        for candidate_triple in itertools.permutations(by_function, r=3):
            if not candidate_triple in done:
                done.add(candidate_triple)
                yield candidate_triple
    
def get_strings_arrays(a:malcat.Analysis):
    res = []
    # tries to decrypt all string arrays candidates
    for strings, xor, aes_password in get_potential_strings_triples(a):

        print(f"Trying strings=({a.ppa(strings[0])}, {hex(strings[1])}), xor=({a.ppa(xor[0])}, {hex(xor[1])}), aes_password=({a.ppa(aes_password[0])}, {hex(aes_password[1])}) ... ", end="")

        try:
            # decrypt XOR key using AES
            xor_address, xor_size = xor
            xor_offset = a.a2p(xor_address)
            xor_buffer = a.file[xor_offset: xor_offset + xor_size]

            aes_address, aes_size = aes_password
            aes_offset = a.a2p(aes_address)
            aes_buffer = a.file[aes_offset: aes_offset + aes_size]
            
            xor_key = decrypt_aes_iv_prefix(xor_buffer, aes_buffer)

            # decrypt strings using XOR key         
            strings_address, strings_size = strings
            strings_offset = a.a2p(strings_address)
            strings_buffer = a.file[strings_offset: strings_offset + strings_size]
            
            strings_decrypted = CircularXor().run(strings_buffer, key=xor_key).decode("utf8")
            all_strings = strings_decrypted.split("\x00")

            res.append(all_strings)
            print(f"Found {len(all_strings)} strings !")

        except BaseException as e:
            print(f"{e} :(")

    return res



############################ config extraction

def qakbot_config_extraction(a:malcat.Analysis):
    print("Running heuristic to find string arrays ...")
    config_password = None
    strings_1 = []

    # find string arrays
    for string_array in get_strings_arrays(a):
        print(f"\nFound one string array of {len(string_array)} strings:")
        print("\n".join(string_array))
        if "ipconfig /all" in string_array:
            strings_1 = string_array
        print()

    ips = []
    options = {}
    config_passwords = []

    # try to find endpoint
    for s in strings_1:
        if re.match(r"^/[a-zA-Z0-9_%?=&-]{2,16}$", s):
            options["http_endpoint"] = s
            break
    
    # try to find password candidates: high-entropy, good length, not a lot of space or backslaches
    for s in strings_1:
        if len(s) > 30 and len(s) < 60 and entropy(s) > 4 and s.count(" ") < 2 and s.count("\\") < 2:
            config_passwords.append(s)
    print(f"Found {len(config_passwords)} password candidates: {', '.join(config_passwords)}")

    # ok now try to look for prefixed buffers:
    for address, size in enumerate_interesting_buffers(a, ".data", prefixed_buffer=True):
        
        # and try to decrypt using our password candidates
        for config_password in config_passwords:
            print(f"Trying config decryption for {a.ppa(address)}, {hex(size)}) with password {config_password} ... ", end="")
            try:
                offset = a.a2p(address)
                buffer = a.file[offset:offset+size]
                
                # AES decrypt
                decrypted = decrypt_aes_iv_prefix(buffer[1:], config_password.encode("ascii"))

                # verify checksum
                checksum = decrypted[:32]
                data = decrypted[32:]
                if hashlib.sha256(data).digest() != checksum:
                    raise ValueError("Invalid blob checksum")

                # looks like campaign info?
                if data.count(b"=") >= 2:
                    data = data.decode("ascii").replace("\r", "")
                    d = dict([x.split("=") for x in data.split("\n") if x.strip()])
                    print(f"Found config dictionnary with  {len(d)} entries!")
                    for k, v in d.items():
                        if k == "10":
                            k = "campaign_id"
                        elif k == "3":
                            k = "date"
                            v = datetime.datetime.fromtimestamp(int(v)).isoformat()
                        options[k] = v

                # looks like campaign IPs list?
                elif data.startswith(b"\x01"):
                    for i in range(0, len(data), 8):
                        type, ip, port,_ = struct.unpack_from(">B4sHB", data, i)
                        if type != 1:
                            raise ValueError(f"Unknown CNC format {type}")
                        ip = ".".join(map(str, struct.unpack("BBBB", ip)))
                        ips.append((ip, port))
                    print ("Found IPs !")

                else:
                    print("Unknwon config data")

            except Exception as e:
                print(f"{e} :(")

    return {
        "cncs": ips,
        "options": options,
    }

################################ MAIN


if __name__ == "__main__":

    configs = []
    if "analysis" in globals():
        # called from the gui, analysis object is already instanciated with the current file
        configs.append(qakbot_config_extraction(analysis))
    else:
        # called in headless mode, we need to analyse a file first
        import optparse
        usage = "usage: %prog <file1> [file2] ... [fileN]"
        parser = optparse.OptionParser(usage=usage, description="""Extract config for (unpacked) Qakbot 5.0 samples""")
        options, args = parser.parse_args()
        if len(args) < 1:
            parser.error("Please give path to a file")

        for fname in args:
            a = malcat.analyse(fname)
            configs.append(qakbot_config_extraction(a))
    
    for config in configs:
        print("\nQAKBOT_CONFIG = ", end="")
        print(json.dumps(config, indent=4))
